More cleanup
This commit is contained in:
@@ -4,9 +4,9 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def strtobool(s: str) -> bool:
|
||||
if s in ['y', 'yes', 'true', 'on', '1']:
|
||||
if s in ["y", "yes", "true", "on", "1"]:
|
||||
return True
|
||||
if s in ['n', 'no', 'false', 'off', '0']:
|
||||
if s in ["n", "no", "false", "off", "0"]:
|
||||
return False
|
||||
|
||||
raise ValueError(f'cannot convert {s} to bool')
|
||||
raise ValueError(f"cannot convert {s} to bool")
|
||||
|
@@ -1,68 +0,0 @@
|
||||
from typing import Any
|
||||
from typing import Dict
|
||||
from typing import List
|
||||
from typing import Optional
|
||||
|
||||
import requests
|
||||
|
||||
from .errors import RecursionLimitExceededError
|
||||
from .errors import UnexpectedActivityTypeError
|
||||
|
||||
|
||||
def _do_req(url: str, headers: Dict[str, str]) -> Dict[str, Any]:
|
||||
resp = requests.get(url, headers=headers)
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
|
||||
def parse_collection(
|
||||
payload: Optional[Dict[str, Any]] = None,
|
||||
url: Optional[str] = None,
|
||||
user_agent: Optional[str] = None,
|
||||
level: int = 0,
|
||||
do_req: Any = _do_req,
|
||||
) -> List[str]:
|
||||
"""Resolve/fetch a `Collection`/`OrderedCollection`."""
|
||||
if level > 3:
|
||||
raise RecursionLimitExceededError('recursion limit exceeded')
|
||||
|
||||
# Go through all the pages
|
||||
headers = {'Accept': 'application/activity+json'}
|
||||
if user_agent:
|
||||
headers['User-Agent'] = user_agent
|
||||
|
||||
out: List[str] = []
|
||||
if url:
|
||||
payload = do_req(url, headers)
|
||||
if not payload:
|
||||
raise ValueError('must at least prove a payload or an URL')
|
||||
|
||||
if payload['type'] in ['Collection', 'OrderedCollection']:
|
||||
if 'orderedItems' in payload:
|
||||
return payload['orderedItems']
|
||||
if 'items' in payload:
|
||||
return payload['items']
|
||||
if 'first' in payload:
|
||||
if 'orderedItems' in payload['first']:
|
||||
out.extend(payload['first']['orderedItems'])
|
||||
if 'items' in payload['first']:
|
||||
out.extend(payload['first']['items'])
|
||||
n = payload['first'].get('next')
|
||||
if n:
|
||||
out.extend(parse_collection(url=n, user_agent=user_agent, level=level+1, do_req=do_req))
|
||||
return out
|
||||
|
||||
while payload:
|
||||
if payload['type'] in ['CollectionPage', 'OrderedCollectionPage']:
|
||||
if 'orderedItems' in payload:
|
||||
out.extend(payload['orderedItems'])
|
||||
if 'items' in payload:
|
||||
out.extend(payload['items'])
|
||||
n = payload.get('next')
|
||||
if n is None:
|
||||
break
|
||||
payload = do_req(n, headers)
|
||||
else:
|
||||
raise UnexpectedActivityTypeError('unexpected activity type {}'.format(payload['type']))
|
||||
|
||||
return out
|
@@ -17,7 +17,7 @@ class NotAnActorError(Exception):
|
||||
|
||||
class ActorService(object):
|
||||
def __init__(self, user_agent, col, actor_id, actor_data, instances):
|
||||
logger.debug(f'Initializing ActorService user_agent={user_agent}')
|
||||
logger.debug(f"Initializing ActorService user_agent={user_agent}")
|
||||
self._user_agent = user_agent
|
||||
self._col = col
|
||||
self._in_mem = {actor_id: actor_data}
|
||||
@@ -25,57 +25,70 @@ class ActorService(object):
|
||||
self._known_instances = set()
|
||||
|
||||
def _fetch(self, actor_url):
|
||||
logger.debug(f'fetching remote object {actor_url}')
|
||||
logger.debug(f"fetching remote object {actor_url}")
|
||||
|
||||
check_url(actor_url)
|
||||
|
||||
resp = requests.get(actor_url, headers={
|
||||
'Accept': 'application/activity+json',
|
||||
'User-Agent': self._user_agent,
|
||||
})
|
||||
resp = requests.get(
|
||||
actor_url,
|
||||
headers={
|
||||
"Accept": "application/activity+json",
|
||||
"User-Agent": self._user_agent,
|
||||
},
|
||||
)
|
||||
if resp.status_code == 404:
|
||||
raise ActivityNotFoundError(f'{actor_url} cannot be fetched, 404 not found error')
|
||||
raise ActivityNotFoundError(
|
||||
f"{actor_url} cannot be fetched, 404 not found error"
|
||||
)
|
||||
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
def get(self, actor_url, reload_cache=False):
|
||||
logger.info(f'get actor {actor_url} (reload_cache={reload_cache})')
|
||||
logger.info(f"get actor {actor_url} (reload_cache={reload_cache})")
|
||||
|
||||
if actor_url in self._in_mem:
|
||||
return self._in_mem[actor_url]
|
||||
|
||||
instance = urlparse(actor_url)._replace(path='', query='', fragment='').geturl()
|
||||
instance = urlparse(actor_url)._replace(path="", query="", fragment="").geturl()
|
||||
if instance not in self._known_instances:
|
||||
self._known_instances.add(instance)
|
||||
if not self._instances.find_one({'instance': instance}):
|
||||
self._instances.insert({'instance': instance, 'first_object': actor_url})
|
||||
if not self._instances.find_one({"instance": instance}):
|
||||
self._instances.insert(
|
||||
{"instance": instance, "first_object": actor_url}
|
||||
)
|
||||
|
||||
if reload_cache:
|
||||
actor = self._fetch(actor_url)
|
||||
self._in_mem[actor_url] = actor
|
||||
self._col.update({'actor_id': actor_url}, {'$set': {'cached_response': actor}}, upsert=True)
|
||||
self._col.update(
|
||||
{"actor_id": actor_url},
|
||||
{"$set": {"cached_response": actor}},
|
||||
upsert=True,
|
||||
)
|
||||
return actor
|
||||
|
||||
cached_actor = self._col.find_one({'actor_id': actor_url})
|
||||
cached_actor = self._col.find_one({"actor_id": actor_url})
|
||||
if cached_actor:
|
||||
return cached_actor['cached_response']
|
||||
return cached_actor["cached_response"]
|
||||
|
||||
actor = self._fetch(actor_url)
|
||||
if not 'type' in actor:
|
||||
if not "type" in actor:
|
||||
raise NotAnActorError(None)
|
||||
if actor['type'] != 'Person':
|
||||
if actor["type"] != "Person":
|
||||
raise NotAnActorError(actor)
|
||||
|
||||
self._col.update({'actor_id': actor_url}, {'$set': {'cached_response': actor}}, upsert=True)
|
||||
self._col.update(
|
||||
{"actor_id": actor_url}, {"$set": {"cached_response": actor}}, upsert=True
|
||||
)
|
||||
self._in_mem[actor_url] = actor
|
||||
return actor
|
||||
|
||||
def get_public_key(self, actor_url, reload_cache=False):
|
||||
profile = self.get(actor_url, reload_cache=reload_cache)
|
||||
pub = profile['publicKey']
|
||||
return pub['id'], RSA.importKey(pub['publicKeyPem'])
|
||||
pub = profile["publicKey"]
|
||||
return pub["id"], RSA.importKey(pub["publicKeyPem"])
|
||||
|
||||
def get_inbox_url(self, actor_url, reload_cache=False):
|
||||
profile = self.get(actor_url, reload_cache=reload_cache)
|
||||
return profile.get('inbox')
|
||||
return profile.get("inbox")
|
||||
|
@@ -1,65 +0,0 @@
|
||||
import re
|
||||
import typing
|
||||
from typing import Any
|
||||
from typing import Dict
|
||||
from typing import List
|
||||
from typing import Optional
|
||||
from typing import Tuple
|
||||
from typing import Type
|
||||
from typing import Union
|
||||
|
||||
from bleach.linkifier import Linker
|
||||
from markdown import markdown
|
||||
|
||||
from config import ACTOR_SERVICE
|
||||
from config import BASE_URL
|
||||
from config import ID
|
||||
from config import USERNAME
|
||||
from utils.webfinger import get_actor_url
|
||||
|
||||
|
||||
def set_attrs(attrs, new=False):
|
||||
attrs[(None, u'target')] = u'_blank'
|
||||
attrs[(None, u'class')] = u'external'
|
||||
attrs[(None, u'rel')] = u'noopener'
|
||||
attrs[(None, u'title')] = attrs[(None, u'href')]
|
||||
return attrs
|
||||
|
||||
|
||||
LINKER = Linker(callbacks=[set_attrs])
|
||||
HASHTAG_REGEX = re.compile(r"(#[\d\w\.]+)")
|
||||
MENTION_REGEX = re.compile(r"@[\d\w_.+-]+@[\d\w-]+\.[\d\w\-.]+")
|
||||
|
||||
|
||||
def hashtagify(content: str) -> Tuple[str, List[Dict[str, str]]]:
|
||||
tags = []
|
||||
for hashtag in re.findall(HASHTAG_REGEX, content):
|
||||
tag = hashtag[1:]
|
||||
link = f'<a href="{BASE_URL}/tags/{tag}" class="mention hashtag" rel="tag">#<span>{tag}</span></a>'
|
||||
tags.append(dict(href=f'{BASE_URL}/tags/{tag}', name=hashtag, type='Hashtag'))
|
||||
content = content.replace(hashtag, link)
|
||||
return content, tags
|
||||
|
||||
|
||||
def mentionify(content: str) -> Tuple[str, List[Dict[str, str]]]:
|
||||
tags = []
|
||||
for mention in re.findall(MENTION_REGEX, content):
|
||||
_, username, domain = mention.split('@')
|
||||
actor_url = get_actor_url(mention)
|
||||
p = ACTOR_SERVICE.get(actor_url)
|
||||
print(p)
|
||||
tags.append(dict(type='Mention', href=p['id'], name=mention))
|
||||
link = f'<span class="h-card"><a href="{p["url"]}" class="u-url mention">@<span>{username}</span></a></span>'
|
||||
content = content.replace(mention, link)
|
||||
return content, tags
|
||||
|
||||
|
||||
def parse_markdown(content: str) -> Tuple[str, List[Dict[str, str]]]:
|
||||
tags = []
|
||||
content = LINKER.linkify(content)
|
||||
content, hashtag_tags = hashtagify(content)
|
||||
tags.extend(hashtag_tags)
|
||||
content, mention_tags = mentionify(content)
|
||||
tags.extend(mention_tags)
|
||||
content = markdown(content)
|
||||
return content, tags
|
@@ -1,37 +0,0 @@
|
||||
|
||||
class Error(Exception):
|
||||
status_code = 400
|
||||
|
||||
def __init__(self, message, status_code=None, payload=None):
|
||||
Exception.__init__(self)
|
||||
self.message = message
|
||||
if status_code is not None:
|
||||
self.status_code = status_code
|
||||
self.payload = payload
|
||||
|
||||
def to_dict(self):
|
||||
rv = dict(self.payload or ())
|
||||
rv['message'] = self.message
|
||||
return rv
|
||||
|
||||
def __repr__(self):
|
||||
return f'{self.__class__.__qualname__}({self.message!r}, payload={self.payload!r}, status_code={self.status_code})'
|
||||
|
||||
|
||||
class NotFromOutboxError(Error):
|
||||
pass
|
||||
|
||||
class ActivityNotFoundError(Error):
|
||||
status_code = 404
|
||||
|
||||
|
||||
class BadActivityError(Error):
|
||||
pass
|
||||
|
||||
|
||||
class RecursionLimitExceededError(BadActivityError):
|
||||
pass
|
||||
|
||||
|
||||
class UnexpectedActivityTypeError(BadActivityError):
|
||||
pass
|
@@ -1,95 +0,0 @@
|
||||
"""Implements HTTP signature for Flask requests.
|
||||
|
||||
Mastodon instances won't accept requests that are not signed using this scheme.
|
||||
|
||||
"""
|
||||
import base64
|
||||
import hashlib
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
from typing import Dict
|
||||
from typing import Optional
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from Crypto.Hash import SHA256
|
||||
from Crypto.Signature import PKCS1_v1_5
|
||||
from flask import request
|
||||
from requests.auth import AuthBase
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _build_signed_string(signed_headers: str, method: str, path: str, headers: Any, body_digest: str) -> str:
|
||||
out = []
|
||||
for signed_header in signed_headers.split(' '):
|
||||
if signed_header == '(request-target)':
|
||||
out.append('(request-target): '+method.lower()+' '+path)
|
||||
elif signed_header == 'digest':
|
||||
out.append('digest: '+body_digest)
|
||||
else:
|
||||
out.append(signed_header+': '+headers[signed_header])
|
||||
return '\n'.join(out)
|
||||
|
||||
|
||||
def _parse_sig_header(val: Optional[str]) -> Optional[Dict[str, str]]:
|
||||
if not val:
|
||||
return None
|
||||
out = {}
|
||||
for data in val.split(','):
|
||||
k, v = data.split('=', 1)
|
||||
out[k] = v[1:len(v)-1]
|
||||
return out
|
||||
|
||||
|
||||
def _verify_h(signed_string, signature, pubkey):
|
||||
signer = PKCS1_v1_5.new(pubkey)
|
||||
digest = SHA256.new()
|
||||
digest.update(signed_string.encode('utf-8'))
|
||||
return signer.verify(digest, signature)
|
||||
|
||||
|
||||
def _body_digest() -> str:
|
||||
h = hashlib.new('sha256')
|
||||
h.update(request.data)
|
||||
return 'SHA-256='+base64.b64encode(h.digest()).decode('utf-8')
|
||||
|
||||
|
||||
def verify_request(actor_service) -> bool:
|
||||
hsig = _parse_sig_header(request.headers.get('Signature'))
|
||||
if not hsig:
|
||||
logger.debug('no signature in header')
|
||||
return False
|
||||
logger.debug(f'hsig={hsig}')
|
||||
signed_string = _build_signed_string(hsig['headers'], request.method, request.path, request.headers, _body_digest())
|
||||
_, rk = actor_service.get_public_key(hsig['keyId'])
|
||||
return _verify_h(signed_string, base64.b64decode(hsig['signature']), rk)
|
||||
|
||||
|
||||
class HTTPSigAuth(AuthBase):
|
||||
def __init__(self, keyid, privkey):
|
||||
self.keyid = keyid
|
||||
self.privkey = privkey
|
||||
|
||||
def __call__(self, r):
|
||||
logger.info(f'keyid={self.keyid}')
|
||||
host = urlparse(r.url).netloc
|
||||
bh = hashlib.new('sha256')
|
||||
bh.update(r.body.encode('utf-8'))
|
||||
bodydigest = 'SHA-256='+base64.b64encode(bh.digest()).decode('utf-8')
|
||||
date = datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT')
|
||||
r.headers.update({'Digest': bodydigest, 'Date': date})
|
||||
r.headers.update({'Host': host})
|
||||
sigheaders = '(request-target) user-agent host date digest content-type'
|
||||
to_be_signed = _build_signed_string(sigheaders, r.method, r.path_url, r.headers, bodydigest)
|
||||
signer = PKCS1_v1_5.new(self.privkey)
|
||||
digest = SHA256.new()
|
||||
digest.update(to_be_signed.encode('utf-8'))
|
||||
sig = base64.b64encode(signer.sign(digest))
|
||||
sig = sig.decode('utf-8')
|
||||
headers = {
|
||||
'Signature': f'keyId="{self.keyid}",algorithm="rsa-sha256",headers="{sigheaders}",signature="{sig}"'
|
||||
}
|
||||
logger.info(f'signed request headers={headers}')
|
||||
r.headers.update(headers)
|
||||
return r
|
@@ -1,69 +0,0 @@
|
||||
import base64
|
||||
import hashlib
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
from typing import Dict
|
||||
|
||||
from Crypto.Hash import SHA256
|
||||
from Crypto.Signature import PKCS1_v1_5
|
||||
from pyld import jsonld
|
||||
|
||||
# cache the downloaded "schemas", otherwise the library is super slow
|
||||
# (https://github.com/digitalbazaar/pyld/issues/70)
|
||||
_CACHE: Dict[str, Any] = {}
|
||||
LOADER = jsonld.requests_document_loader()
|
||||
|
||||
def _caching_document_loader(url: str) -> Any:
|
||||
if url in _CACHE:
|
||||
return _CACHE[url]
|
||||
resp = LOADER(url)
|
||||
_CACHE[url] = resp
|
||||
return resp
|
||||
|
||||
jsonld.set_document_loader(_caching_document_loader)
|
||||
|
||||
|
||||
def options_hash(doc):
|
||||
doc = dict(doc['signature'])
|
||||
for k in ['type', 'id', 'signatureValue']:
|
||||
if k in doc:
|
||||
del doc[k]
|
||||
doc['@context'] = 'https://w3id.org/identity/v1'
|
||||
normalized = jsonld.normalize(doc, {'algorithm': 'URDNA2015', 'format': 'application/nquads'})
|
||||
h = hashlib.new('sha256')
|
||||
h.update(normalized.encode('utf-8'))
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def doc_hash(doc):
|
||||
doc = dict(doc)
|
||||
if 'signature' in doc:
|
||||
del doc['signature']
|
||||
normalized = jsonld.normalize(doc, {'algorithm': 'URDNA2015', 'format': 'application/nquads'})
|
||||
h = hashlib.new('sha256')
|
||||
h.update(normalized.encode('utf-8'))
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def verify_signature(doc, pubkey):
|
||||
to_be_signed = options_hash(doc) + doc_hash(doc)
|
||||
signature = doc['signature']['signatureValue']
|
||||
signer = PKCS1_v1_5.new(pubkey)
|
||||
digest = SHA256.new()
|
||||
digest.update(to_be_signed.encode('utf-8'))
|
||||
return signer.verify(digest, base64.b64decode(signature))
|
||||
|
||||
|
||||
def generate_signature(doc, privkey):
|
||||
options = {
|
||||
'type': 'RsaSignature2017',
|
||||
'creator': doc['actor'] + '#main-key',
|
||||
'created': datetime.utcnow().replace(microsecond=0).isoformat() + 'Z',
|
||||
}
|
||||
doc['signature'] = options
|
||||
to_be_signed = options_hash(doc) + doc_hash(doc)
|
||||
signer = PKCS1_v1_5.new(privkey)
|
||||
digest = SHA256.new()
|
||||
digest.update(to_be_signed.encode('utf-8'))
|
||||
sig = base64.b64encode(signer.sign(digest))
|
||||
options['signatureValue'] = sig.decode('utf-8')
|
@@ -16,53 +16,100 @@ class ObjectService(object):
|
||||
self._known_instances = set()
|
||||
|
||||
def _fetch_remote(self, object_id):
|
||||
print(f'fetch remote {object_id}')
|
||||
print(f"fetch remote {object_id}")
|
||||
check_url(object_id)
|
||||
resp = requests.get(object_id, headers={
|
||||
'Accept': 'application/activity+json',
|
||||
'User-Agent': self._user_agent,
|
||||
})
|
||||
resp = requests.get(
|
||||
object_id,
|
||||
headers={
|
||||
"Accept": "application/activity+json",
|
||||
"User-Agent": self._user_agent,
|
||||
},
|
||||
)
|
||||
if resp.status_code == 404:
|
||||
raise ActivityNotFoundError(f'{object_id} cannot be fetched, 404 error not found')
|
||||
raise ActivityNotFoundError(
|
||||
f"{object_id} cannot be fetched, 404 error not found"
|
||||
)
|
||||
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
def _fetch(self, object_id):
|
||||
instance = urlparse(object_id)._replace(path='', query='', fragment='').geturl()
|
||||
instance = urlparse(object_id)._replace(path="", query="", fragment="").geturl()
|
||||
if instance not in self._known_instances:
|
||||
self._known_instances.add(instance)
|
||||
if not self._instances.find_one({'instance': instance}):
|
||||
self._instances.insert({'instance': instance, 'first_object': object_id})
|
||||
if not self._instances.find_one({"instance": instance}):
|
||||
self._instances.insert(
|
||||
{"instance": instance, "first_object": object_id}
|
||||
)
|
||||
|
||||
obj = self._inbox.find_one({'$or': [{'remote_id': object_id}, {'type': 'Create', 'activity.object.id': object_id}]})
|
||||
obj = self._inbox.find_one(
|
||||
{
|
||||
"$or": [
|
||||
{"remote_id": object_id},
|
||||
{"type": "Create", "activity.object.id": object_id},
|
||||
]
|
||||
}
|
||||
)
|
||||
if obj:
|
||||
if obj['remote_id'] == object_id:
|
||||
return obj['activity']
|
||||
return obj['activity']['object']
|
||||
if obj["remote_id"] == object_id:
|
||||
return obj["activity"]
|
||||
return obj["activity"]["object"]
|
||||
|
||||
obj = self._outbox.find_one({'$or': [{'remote_id': object_id}, {'type': 'Create', 'activity.object.id': object_id}]})
|
||||
obj = self._outbox.find_one(
|
||||
{
|
||||
"$or": [
|
||||
{"remote_id": object_id},
|
||||
{"type": "Create", "activity.object.id": object_id},
|
||||
]
|
||||
}
|
||||
)
|
||||
if obj:
|
||||
if obj['remote_id'] == object_id:
|
||||
return obj['activity']
|
||||
return obj['activity']['object']
|
||||
if obj["remote_id"] == object_id:
|
||||
return obj["activity"]
|
||||
return obj["activity"]["object"]
|
||||
|
||||
return self._fetch_remote(object_id)
|
||||
|
||||
def get(self, object_id, reload_cache=False, part_of_stream=False, announce_published=None):
|
||||
def get(
|
||||
self,
|
||||
object_id,
|
||||
reload_cache=False,
|
||||
part_of_stream=False,
|
||||
announce_published=None,
|
||||
):
|
||||
if reload_cache:
|
||||
obj = self._fetch(object_id)
|
||||
self._col.update({'object_id': object_id}, {'$set': {'cached_object': obj, 'meta.part_of_stream': part_of_stream, 'meta.announce_published': announce_published}}, upsert=True)
|
||||
self._col.update(
|
||||
{"object_id": object_id},
|
||||
{
|
||||
"$set": {
|
||||
"cached_object": obj,
|
||||
"meta.part_of_stream": part_of_stream,
|
||||
"meta.announce_published": announce_published,
|
||||
}
|
||||
},
|
||||
upsert=True,
|
||||
)
|
||||
return obj
|
||||
|
||||
cached_object = self._col.find_one({'object_id': object_id})
|
||||
cached_object = self._col.find_one({"object_id": object_id})
|
||||
if cached_object:
|
||||
print(f'ObjectService: {cached_object}')
|
||||
return cached_object['cached_object']
|
||||
print(f"ObjectService: {cached_object}")
|
||||
return cached_object["cached_object"]
|
||||
|
||||
obj = self._fetch(object_id)
|
||||
|
||||
self._col.update({'object_id': object_id}, {'$set': {'cached_object': obj, 'meta.part_of_stream': part_of_stream, 'meta.announce_published': announce_published}}, upsert=True)
|
||||
self._col.update(
|
||||
{"object_id": object_id},
|
||||
{
|
||||
"$set": {
|
||||
"cached_object": obj,
|
||||
"meta.part_of_stream": part_of_stream,
|
||||
"meta.announce_published": announce_published,
|
||||
}
|
||||
},
|
||||
upsert=True,
|
||||
)
|
||||
# print(f'ObjectService: {obj}')
|
||||
|
||||
return obj
|
||||
|
@@ -1,37 +1,34 @@
|
||||
import ipaddress
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import opengraph
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from .urlutils import check_url
|
||||
from .urlutils import is_url_valid
|
||||
from little_boxes.urlutils import check_url
|
||||
from little_boxes.urlutils import is_url_valid
|
||||
|
||||
|
||||
def links_from_note(note):
|
||||
tags_href= set()
|
||||
for t in note.get('tag', []):
|
||||
h = t.get('href')
|
||||
tags_href = set()
|
||||
for t in note.get("tag", []):
|
||||
h = t.get("href")
|
||||
if h:
|
||||
# TODO(tsileo): fetch the URL for Actor profile, type=mention
|
||||
tags_href.add(h)
|
||||
|
||||
links = set()
|
||||
soup = BeautifulSoup(note['content'])
|
||||
for link in soup.find_all('a'):
|
||||
h = link.get('href')
|
||||
if h.startswith('http') and h not in tags_href and is_url_valid(h):
|
||||
soup = BeautifulSoup(note["content"])
|
||||
for link in soup.find_all("a"):
|
||||
h = link.get("href")
|
||||
if h.startswith("http") and h not in tags_href and is_url_valid(h):
|
||||
links.add(h)
|
||||
|
||||
return links
|
||||
|
||||
|
||||
def fetch_og_metadata(user_agent, col, remote_id):
|
||||
doc = col.find_one({'remote_id': remote_id})
|
||||
doc = col.find_one({"remote_id": remote_id})
|
||||
if not doc:
|
||||
raise ValueError
|
||||
note = doc['activity']['object']
|
||||
note = doc["activity"]["object"]
|
||||
print(note)
|
||||
links = links_from_note(note)
|
||||
if not links:
|
||||
@@ -40,9 +37,11 @@ def fetch_og_metadata(user_agent, col, remote_id):
|
||||
htmls = []
|
||||
for l in links:
|
||||
check_url(l)
|
||||
r = requests.get(l, headers={'User-Agent': user_agent})
|
||||
r = requests.get(l, headers={"User-Agent": user_agent})
|
||||
r.raise_for_status()
|
||||
htmls.append(r.text)
|
||||
links_og_metadata = [dict(opengraph.OpenGraph(html=html)) for html in htmls]
|
||||
col.update_one({'remote_id': remote_id}, {'$set': {'meta.og_metadata': links_og_metadata}})
|
||||
col.update_one(
|
||||
{"remote_id": remote_id}, {"$set": {"meta.og_metadata": links_og_metadata}}
|
||||
)
|
||||
return len(links)
|
||||
|
@@ -1,47 +0,0 @@
|
||||
import ipaddress
|
||||
import logging
|
||||
import os
|
||||
import socket
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from . import strtobool
|
||||
from .errors import Error
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class InvalidURLError(Error):
|
||||
pass
|
||||
|
||||
|
||||
def is_url_valid(url: str) -> bool:
|
||||
parsed = urlparse(url)
|
||||
if parsed.scheme not in ['http', 'https']:
|
||||
return False
|
||||
|
||||
# XXX in debug mode, we want to allow requests to localhost to test the federation with local instances
|
||||
debug_mode = strtobool(os.getenv('MICROBLOGPUB_DEBUG', 'false'))
|
||||
if debug_mode:
|
||||
return True
|
||||
|
||||
if parsed.hostname in ['localhost']:
|
||||
return False
|
||||
|
||||
try:
|
||||
ip_address = socket.getaddrinfo(parsed.hostname, parsed.port or 80)[0][4][0]
|
||||
except socket.gaierror:
|
||||
logger.exception(f'failed to lookup url {url}')
|
||||
return False
|
||||
|
||||
if ipaddress.ip_address(ip_address).is_private:
|
||||
logger.info(f'rejecting private URL {url}')
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def check_url(url: str) -> None:
|
||||
if not is_url_valid(url):
|
||||
raise InvalidURLError(f'"{url}" is invalid')
|
||||
|
||||
return None
|
@@ -1,75 +0,0 @@
|
||||
import logging
|
||||
from typing import Any
|
||||
from typing import Dict
|
||||
from typing import Optional
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
|
||||
from .urlutils import check_url
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def webfinger(resource: str) -> Optional[Dict[str, Any]]:
|
||||
"""Mastodon-like WebFinger resolution to retrieve the activity stream Actor URL.
|
||||
"""
|
||||
logger.info(f'performing webfinger resolution for {resource}')
|
||||
protos = ['https', 'http']
|
||||
if resource.startswith('http://'):
|
||||
protos.reverse()
|
||||
host = urlparse(resource).netloc
|
||||
elif resource.startswith('https://'):
|
||||
host = urlparse(resource).netloc
|
||||
else:
|
||||
if resource.startswith('acct:'):
|
||||
resource = resource[5:]
|
||||
if resource.startswith('@'):
|
||||
resource = resource[1:]
|
||||
_, host = resource.split('@', 1)
|
||||
resource='acct:'+resource
|
||||
|
||||
# Security check on the url (like not calling localhost)
|
||||
check_url(f'https://{host}')
|
||||
|
||||
for i, proto in enumerate(protos):
|
||||
try:
|
||||
url = f'{proto}://{host}/.well-known/webfinger'
|
||||
resp = requests.get(
|
||||
url,
|
||||
{'resource': resource}
|
||||
)
|
||||
except requests.ConnectionError:
|
||||
# If we tried https first and the domain is "http only"
|
||||
if i == 0:
|
||||
continue
|
||||
break
|
||||
if resp.status_code == 404:
|
||||
return None
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
|
||||
def get_remote_follow_template(resource: str) -> Optional[str]:
|
||||
data = webfinger(resource)
|
||||
if data is None:
|
||||
return None
|
||||
for link in data['links']:
|
||||
if link.get('rel') == 'http://ostatus.org/schema/1.0/subscribe':
|
||||
return link.get('template')
|
||||
return None
|
||||
|
||||
|
||||
def get_actor_url(resource: str) -> Optional[str]:
|
||||
"""Mastodon-like WebFinger resolution to retrieve the activity stream Actor URL.
|
||||
|
||||
Returns:
|
||||
the Actor URL or None if the resolution failed.
|
||||
"""
|
||||
data = webfinger(resource)
|
||||
if data is None:
|
||||
return None
|
||||
for link in data['links']:
|
||||
if link.get('rel') == 'self' and link.get('type') == 'application/activity+json':
|
||||
return link.get('href')
|
||||
return None
|
Reference in New Issue
Block a user