webmention/src/webmention/main.py
2023-11-22 16:02:08 -06:00

121 lines
3 KiB
Python

from fastapi import FastAPI, Request
from httpx import AsyncClient
import validators
from bs4 import BeautifulSoup
app = FastAPI()
client = AsyncClient()
@app.get("/")
async def receive_webmention(request: Request):
"""
Receive a POST with `source` and `target` parameters.
Verify the parameters, and then queue for processing.
There are three possible responses on success:
- 200 OK, if processed synchronously (not recommended)
- 201 Created, with Location header pointing to a status page
- 202 Accepted, if async and there is no status page.
There are also possible error responses:
- 400 Bad Request, before fetching the `source`
- `target` not found
- `target` does not accept Webmentions
- `source` was malformed or has unsupported scheme
- 400 Bad Request, after fetching the `source`
- `source` not found
- `source` does not contain a link to `target`
- 500 Internal Server Error, if anything fails on our end
"""
pass
async def verify_webmention(source: str, target: str):
"""
MUST: Perform a GET on the `source`, following redirects.
(Limit redirects to some arbitrary limit.)
SHOULD: Include an Accept header for preferred content types.
Verify that the `source` mentions the `target`:
- HTML5
- href = <target>
- src = <target>
- JSON
- any property = <target>
- plaintext
- search for the string <target>
- others?
- [at your discretion]
"""
pass
async def is_bad_request(source: str, target: str) -> bool:
"""
- before fetching the `source`
- `target` not found
- `target` does not accept Webmentions
- `source` was malformed or has unsupported scheme
- after fetching the `source`
- `source` not found
- `source` does not contain a link to `target`
"""
# Before fetching
if not source.startswith("http"):
# For now, we only support http(s) web.
return True
if not validators.url(source):
return True
if is_not_found(target):
return True
if does_not_accept_webmentions(target):
# Ideally this would be a 403, but that's a spec violation .-.
return True
# After fetching
response = await client.get(source)
async def is_not_found(url: str) -> bool:
"""
Determine if the resource is "not found".
"""
response = await client.get(url)
return response.status_code == "404"
async def does_not_accept_webmentions(url: str) -> bool:
"""
Determine if the resource does not accept Webmentions.
"""
# Try to find a Link header first
response = await client.head(url)
link = response.links.get("webmention")
if link:
return False
# Try to find a <link> or <a> with rel
response = await client.get(url)
link = BeautifulSoup(response.content).find(
['a', 'link'],
attrs = {
"rel": "webmention",
"href": True # this is an extra check i'm adding for sanity
}
)
if await valid_webmention_endpoint(link):
return False
# By this point, no endpoint was found
return True
async def valid_webmention_endpoint(link: str) -> bool:
"""
"""
if (
link
and link["href"]
and link["href"].startswith("http")
and validators.url(link["href"])
):
return True