From db6210cb199dac929c326aa2e643420a6a480c3c Mon Sep 17 00:00:00 2001 From: a Date: Thu, 15 Dec 2022 01:42:48 -0600 Subject: [PATCH] Add missing CORS header and support .jrd extension Also layout roadmap in README --- README.md | 117 +++++++++++++++++++++++++++++++++++++++++--- pyproject.toml | 2 +- webfinger/io.py | 12 ++++- webfinger/lookup.py | 43 +++++++++------- 4 files changed, 149 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index f17d302..e08e49f 100644 --- a/README.md +++ b/README.md @@ -7,16 +7,17 @@ Simple WebFinger server that returns static resources. Written with Python and F ### Add static resources - Create a `resource/` folder. This will map to the `?resource=` query parameter. -- Put a .json file in there; anything before the .json will be served via that `resource` query parameter. +- Put a .jrd file in there; anything before the .jrd will be served via that `resource` query parameter. - Currently, the "subject" will be ignored (as the `resource` will be used directly). - Otherwise, `links` and `aliases` and `properties` will be served normally. +- Symlinks will be resolved as well. Try using a URI as a symlink and put it in `aliases` too! -Example `acct:a@trwnh.com.json` +Example `acct:a@trwnh.com.jrd` ```json { "subject": "acct:a@trwnh.com", - "aliases": ["https://trwnh.com/actors/7057bc10-db1c-4ebe-9e00-22cf04be4e5e", "https://trwnh.com/~a"], + "aliases": ["https://ap.trwnh.com/actors/7057bc10-db1c-4ebe-9e00-22cf04be4e5e", "https://trwnh.com/~a", "acct:trwnh@ap.trwnh.com"], "links": [ { "rel": "self", @@ -45,6 +46,8 @@ pdm install pdm run uvicorn webfinger:app --port 7033 ``` +You can also run `pdm run start` or `pdm run python -m webfinger` + #### ...with virtualenv ```bash @@ -54,10 +57,112 @@ pip install -r requirements.txt uvicorn webfinger:app --port 7033 ``` -### Post-run +You can also run `python -m webfinger` -proxy `/.well-known/webfinger` to localhost:7033 +### Post-run proxy or redirect + +You probably want to proxy `/.well-known/webfinger` to localhost:7033 or otherwise host a webfinger service externally and redirect to that instead. + +For simple deployments on a single domain, you probably want to set up a reverse proxy to the running Uvicorn process (by default, this will be hosted at `localhost:7033`). This will allow you to serve WebFinger via your own domain, without a trailing port. Since WebFinger specifies HTTPS requests, your reverse proxy must support HTTPS. + +Example nginx proxy_pass to localhost: + +``` +location /.well-known/webfinger { + proxy_pass localhost:7033; +} +``` + +It's recommended that you do any redirects to external services with an HTTP 307 Temporary Redirect. Why not HTTP 302? This is because HTTP 302 does not usually preserve the HTTP method -- some browsers (especially older ones) will treat 302 as 303, issuing a GET request to the new Location regardless of the original request's method. + +Example nginx redirect to external service: + +``` +location /.well-known/webfinger { + add_header Access-Control-Allow-Origin *; + return 307 https://webfinger.example$request_uri; +} +``` ## Development -`pdm run uvicorn webfinger:app --reload` \ No newline at end of file +`pdm run dev` or `uvicorn webfinger:app --reload` + +### TODO + +#### some api for managing JRD documents + +basic create/update/delete? + +#### MAYBE support authorization and private attributes? + +```text +As with all web resources, access to the WebFinger resource could +require authentication. Further, failure to provide required +credentials might result in the server forbidding access or providing +a different response than had the client authenticated with the +server. + +Likewise, a WebFinger resource MAY provide different responses to +different clients based on other factors, such as whether the client +is inside or outside a corporate network. As a concrete example, a +query performed on the internal corporate network might return link +relations to employee pictures, whereas link relations for employee +pictures might not be provided to external entities. +``` + +- [ ] idk if i'm gonna do this but maybe if you authenticate as the owner of the JRD then you can see things like phone numbers and email addresses? + +```text +Systems or services that expose personal data via WebFinger MUST +provide an interface by which users can select which data elements +are exposed through the WebFinger interface. For example, social +networking sites might allow users to mark certain data as "public" +and then utilize that marking as a means of determining what +information to expose via WebFinger. The information published via +WebFinger would thus comprise only the information marked as public +by the user. Further, the user has the ability to remove information +from publication via WebFinger by removing this marking. + +WebFinger MUST NOT be used to provide any personal data unless +publishing that data via WebFinger by the relevant service was +explicitly authorized by the person whose information is being +shared. Publishing one's personal data within an access-controlled +or otherwise limited environment on the Internet does not equate to +providing implicit authorization of further publication of that data +via WebFinger. +``` + +- [ ] if the above gets done, then there should be a dashboard to choose which information is public and which is not. this probably breaks static serving though, or at least private info should be stored in a database and merged into the final response + +#### MAYBE rate limit and proxy webfinger requests + +```text +It is RECOMMENDED that implementers of WebFinger server software take +steps to mitigate abuse, including malicious over-use of the server +and harvesting of user information. Although there is no mechanism +that can guarantee that publicly accessible WebFinger databases won't +be harvested, rate-limiting by IP address will prevent or at least +dramatically slow harvest by private individuals without access to +botnets or other distributed systems. The reason these mitigation +strategies are not mandatory is that the correct choice of mitigation +strategy (if any) depends greatly on the context. Implementers +should not construe this as meaning that they do not need to consider +whether to use a mitigation strategy, and if so, what strategy to +use. + +WebFinger client developers should also be aware of potential abuse +by spammers or those phishing for information about users. As an +example, suppose a mail client was configured to automatically +perform a WebFinger query on the sender of each received mail +message. If a spammer sent an email using a unique identifier in the +'From' header, then when the WebFinger query was performed, the +spammer would be able to associate the request with a particular +user's email address. This would provide information to the spammer, +including the user's IP address, the fact the user just checked +email, what kind of WebFinger client the user utilized, and so on. +For this reason, it is strongly advised that clients not perform +WebFinger queries unless authorized by the user to do so. +``` + +- [ ] if/when i add a REST API, the api could have a method to do a lookup proxied through the server. this would allow applications to not leak user IPs so long as they actually use the REST API \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index ea18351..ef9c4a5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "webfinger" -version = "2" +version = "1" description = "Simple WebFinger server that returns static resources. Written with Python and FastAPI." authors = [ {name = "a", email = "a@trwnh.com"}, diff --git a/webfinger/io.py b/webfinger/io.py index 8d11f69..b847f5a 100644 --- a/webfinger/io.py +++ b/webfinger/io.py @@ -27,11 +27,21 @@ def get_jrd(resource: str) -> dict[str, str | list[str] | dict[str,str]]: # Get a filename for the resource document. dir = env.get("RESOURCE_DIR") or "resource" filename = resource - path = f"{dir}/{filename}.json" + path = f"{dir}/{filename}.jrd" # If we can't get a file, try percent-encoding if not Path(path).is_file(): filename = url_encode(resource) + path = f"{dir}/{filename}.jrd" + + # Try plain JSON + if not Path(path).is_file(): + filename = resource + path = f"{dir}/{filename}.json" + + # Try plain JSON and percent-encoding + if not Path(path).is_file(): + filename = resource path = f"{dir}/{filename}.json" # Open the file and load the JSON as a dictionary. diff --git a/webfinger/lookup.py b/webfinger/lookup.py index 87d0ce2..6bbbcfd 100644 --- a/webfinger/lookup.py +++ b/webfinger/lookup.py @@ -9,13 +9,16 @@ from webfinger.io import get_jrd router = APIRouter( - prefix="/.well-known/webfinger" -) + prefix="/.well-known/webfinger", + ) -## String literals +# Define required CORS header per RFC7033 -MEDIA_TYPE = "application/jrd+json" +HEADERS = {'Access-Control-Allow-Origin': '*'} +# if env.get("CORS_ALLOW_ORIGIN"): +# allowed_origins = env.get("CORS_ALLOW_ORIGIN") +# headers.update({'Access-Control-Allow-Origin': allowed_origins}) ## Pydantic models for OpenAPI schema @@ -62,7 +65,7 @@ class JRD(BaseModel): } class JRDResponse(Response): - media_type = MEDIA_TYPE + media_type = "application/jrd+json" ## Example responses for OpenAPI schema @@ -76,7 +79,7 @@ RESPONSES = { "description": "OK: The resource you requested was found, and has the returned resource document.", "model": JRD, "content": { - MEDIA_TYPE: { + "application/jrd+json": { "example": json.loads(JRD.Config.schema_extra['example']), }, }, @@ -118,8 +121,7 @@ RESPONSES = { response_model = JRD, response_class = JRDResponse, responses = {**RESPONSES}, - -) + ) async def lookup( resource: str = Query( None, @@ -138,23 +140,26 @@ async def lookup( # If no resource is given, then show a basic hint. if not resource: return PlainTextResponse( - content=RESOURCE_NOT_PROVIDED, + content = RESOURCE_NOT_PROVIDED, status_code=400, - ) + headers = HEADERS, + ) # Otherwise, try to read the resource document. try: jrd = get_jrd(resource) except FileNotFoundError: # JRD file does not exist return PlainTextResponse( - content=RESOURCE_NOT_FOUND, - status_code=404, - ) + content = RESOURCE_NOT_FOUND, + status_code = 404, + headers = HEADERS, + ) except: # JRD file could not be read or parsed return PlainTextResponse( - content=RESOURCE_NOT_PARSED, - status_code=500, - ) + content = RESOURCE_NOT_PARSED, + status_code = 500, + headers = HEADERS, + ) # Obtain values from the resource document. subject: str = jrd.get('subject', None) or resource @@ -174,4 +179,8 @@ async def lookup( "links": links, } content = {k: v for k, v in content.items() if v} # remove null values - return JSONResponse(content=content, media_type=MEDIA_TYPE) \ No newline at end of file + return JSONResponse( + content = content, + media_type = "application/jrd+json", + headers = HEADERS, + ) \ No newline at end of file