@@ -4,9 +4,9 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def strtobool(s: str) -> bool:
|
||||
if s in ['y', 'yes', 'true', 'on', '1']:
|
||||
if s in ["y", "yes", "true", "on", "1"]:
|
||||
return True
|
||||
if s in ['n', 'no', 'false', 'off', '0']:
|
||||
if s in ["n", "no", "false", "off", "0"]:
|
||||
return False
|
||||
|
||||
raise ValueError(f'cannot convert {s} to bool')
|
||||
raise ValueError(f"cannot convert {s} to bool")
|
||||
|
@@ -1,65 +0,0 @@
|
||||
from typing import Optional, Dict, List, Any
|
||||
|
||||
import requests
|
||||
|
||||
from .errors import RecursionLimitExceededError
|
||||
from .errors import UnexpectedActivityTypeError
|
||||
|
||||
|
||||
def _do_req(url: str, headers: Dict[str, str]) -> Dict[str, Any]:
|
||||
resp = requests.get(url, headers=headers)
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
|
||||
def parse_collection(
|
||||
payload: Optional[Dict[str, Any]] = None,
|
||||
url: Optional[str] = None,
|
||||
user_agent: Optional[str] = None,
|
||||
level: int = 0,
|
||||
do_req: Any = _do_req,
|
||||
) -> List[str]:
|
||||
"""Resolve/fetch a `Collection`/`OrderedCollection`."""
|
||||
if level > 3:
|
||||
raise RecursionLimitExceededError('recursion limit exceeded')
|
||||
|
||||
# Go through all the pages
|
||||
headers = {'Accept': 'application/activity+json'}
|
||||
if user_agent:
|
||||
headers['User-Agent'] = user_agent
|
||||
|
||||
out: List[str] = []
|
||||
if url:
|
||||
payload = do_req(url, headers)
|
||||
if not payload:
|
||||
raise ValueError('must at least prove a payload or an URL')
|
||||
|
||||
if payload['type'] in ['Collection', 'OrderedCollection']:
|
||||
if 'orderedItems' in payload:
|
||||
return payload['orderedItems']
|
||||
if 'items' in payload:
|
||||
return payload['items']
|
||||
if 'first' in payload:
|
||||
if 'orderedItems' in payload['first']:
|
||||
out.extend(payload['first']['orderedItems'])
|
||||
if 'items' in payload['first']:
|
||||
out.extend(payload['first']['items'])
|
||||
n = payload['first'].get('next')
|
||||
if n:
|
||||
out.extend(parse_collection(url=n, user_agent=user_agent, level=level+1, do_req=do_req))
|
||||
return out
|
||||
|
||||
while payload:
|
||||
if payload['type'] in ['CollectionPage', 'OrderedCollectionPage']:
|
||||
if 'orderedItems' in payload:
|
||||
out.extend(payload['orderedItems'])
|
||||
if 'items' in payload:
|
||||
out.extend(payload['items'])
|
||||
n = payload.get('next')
|
||||
if n is None:
|
||||
break
|
||||
payload = do_req(n, headers)
|
||||
else:
|
||||
raise UnexpectedActivityTypeError('unexpected activity type {}'.format(payload['type']))
|
||||
|
||||
return out
|
@@ -1,81 +0,0 @@
|
||||
import logging
|
||||
|
||||
import requests
|
||||
from urllib.parse import urlparse
|
||||
from Crypto.PublicKey import RSA
|
||||
|
||||
from .urlutils import check_url
|
||||
from .errors import ActivityNotFoundError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class NotAnActorError(Exception):
|
||||
def __init__(self, activity):
|
||||
self.activity = activity
|
||||
|
||||
|
||||
class ActorService(object):
|
||||
def __init__(self, user_agent, col, actor_id, actor_data, instances):
|
||||
logger.debug(f'Initializing ActorService user_agent={user_agent}')
|
||||
self._user_agent = user_agent
|
||||
self._col = col
|
||||
self._in_mem = {actor_id: actor_data}
|
||||
self._instances = instances
|
||||
self._known_instances = set()
|
||||
|
||||
def _fetch(self, actor_url):
|
||||
logger.debug(f'fetching remote object {actor_url}')
|
||||
|
||||
check_url(actor_url)
|
||||
|
||||
resp = requests.get(actor_url, headers={
|
||||
'Accept': 'application/activity+json',
|
||||
'User-Agent': self._user_agent,
|
||||
})
|
||||
if resp.status_code == 404:
|
||||
raise ActivityNotFoundError(f'{actor_url} cannot be fetched, 404 not found error')
|
||||
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
def get(self, actor_url, reload_cache=False):
|
||||
logger.info(f'get actor {actor_url} (reload_cache={reload_cache})')
|
||||
|
||||
if actor_url in self._in_mem:
|
||||
return self._in_mem[actor_url]
|
||||
|
||||
instance = urlparse(actor_url)._replace(path='', query='', fragment='').geturl()
|
||||
if instance not in self._known_instances:
|
||||
self._known_instances.add(instance)
|
||||
if not self._instances.find_one({'instance': instance}):
|
||||
self._instances.insert({'instance': instance, 'first_object': actor_url})
|
||||
|
||||
if reload_cache:
|
||||
actor = self._fetch(actor_url)
|
||||
self._in_mem[actor_url] = actor
|
||||
self._col.update({'actor_id': actor_url}, {'$set': {'cached_response': actor}}, upsert=True)
|
||||
return actor
|
||||
|
||||
cached_actor = self._col.find_one({'actor_id': actor_url})
|
||||
if cached_actor:
|
||||
return cached_actor['cached_response']
|
||||
|
||||
actor = self._fetch(actor_url)
|
||||
if not 'type' in actor:
|
||||
raise NotAnActorError(None)
|
||||
if actor['type'] != 'Person':
|
||||
raise NotAnActorError(actor)
|
||||
|
||||
self._col.update({'actor_id': actor_url}, {'$set': {'cached_response': actor}}, upsert=True)
|
||||
self._in_mem[actor_url] = actor
|
||||
return actor
|
||||
|
||||
def get_public_key(self, actor_url, reload_cache=False):
|
||||
profile = self.get(actor_url, reload_cache=reload_cache)
|
||||
pub = profile['publicKey']
|
||||
return pub['id'], RSA.importKey(pub['publicKeyPem'])
|
||||
|
||||
def get_inbox_url(self, actor_url, reload_cache=False):
|
||||
profile = self.get(actor_url, reload_cache=reload_cache)
|
||||
return profile.get('inbox')
|
@@ -1,58 +0,0 @@
|
||||
import typing
|
||||
import re
|
||||
|
||||
from bleach.linkifier import Linker
|
||||
from markdown import markdown
|
||||
|
||||
from utils.webfinger import get_actor_url
|
||||
from config import USERNAME, BASE_URL, ID
|
||||
from config import ACTOR_SERVICE
|
||||
|
||||
from typing import List, Optional, Tuple, Dict, Any, Union, Type
|
||||
|
||||
|
||||
def set_attrs(attrs, new=False):
|
||||
attrs[(None, u'target')] = u'_blank'
|
||||
attrs[(None, u'class')] = u'external'
|
||||
attrs[(None, u'rel')] = u'noopener'
|
||||
attrs[(None, u'title')] = attrs[(None, u'href')]
|
||||
return attrs
|
||||
|
||||
|
||||
LINKER = Linker(callbacks=[set_attrs])
|
||||
HASHTAG_REGEX = re.compile(r"(#[\d\w\.]+)")
|
||||
MENTION_REGEX = re.compile(r"@[\d\w_.+-]+@[\d\w-]+\.[\d\w\-.]+")
|
||||
|
||||
|
||||
def hashtagify(content: str) -> Tuple[str, List[Dict[str, str]]]:
|
||||
tags = []
|
||||
for hashtag in re.findall(HASHTAG_REGEX, content):
|
||||
tag = hashtag[1:]
|
||||
link = f'<a href="{BASE_URL}/tags/{tag}" class="mention hashtag" rel="tag">#<span>{tag}</span></a>'
|
||||
tags.append(dict(href=f'{BASE_URL}/tags/{tag}', name=hashtag, type='Hashtag'))
|
||||
content = content.replace(hashtag, link)
|
||||
return content, tags
|
||||
|
||||
|
||||
def mentionify(content: str) -> Tuple[str, List[Dict[str, str]]]:
|
||||
tags = []
|
||||
for mention in re.findall(MENTION_REGEX, content):
|
||||
_, username, domain = mention.split('@')
|
||||
actor_url = get_actor_url(mention)
|
||||
p = ACTOR_SERVICE.get(actor_url)
|
||||
print(p)
|
||||
tags.append(dict(type='Mention', href=p['id'], name=mention))
|
||||
link = f'<span class="h-card"><a href="{p["url"]}" class="u-url mention">@<span>{username}</span></a></span>'
|
||||
content = content.replace(mention, link)
|
||||
return content, tags
|
||||
|
||||
|
||||
def parse_markdown(content: str) -> Tuple[str, List[Dict[str, str]]]:
|
||||
tags = []
|
||||
content = LINKER.linkify(content)
|
||||
content, hashtag_tags = hashtagify(content)
|
||||
tags.extend(hashtag_tags)
|
||||
content, mention_tags = mentionify(content)
|
||||
tags.extend(mention_tags)
|
||||
content = markdown(content)
|
||||
return content, tags
|
@@ -1,37 +0,0 @@
|
||||
|
||||
class Error(Exception):
|
||||
status_code = 400
|
||||
|
||||
def __init__(self, message, status_code=None, payload=None):
|
||||
Exception.__init__(self)
|
||||
self.message = message
|
||||
if status_code is not None:
|
||||
self.status_code = status_code
|
||||
self.payload = payload
|
||||
|
||||
def to_dict(self):
|
||||
rv = dict(self.payload or ())
|
||||
rv['message'] = self.message
|
||||
return rv
|
||||
|
||||
def __repr__(self):
|
||||
return f'{self.__class__.__qualname__}({self.message!r}, payload={self.payload!r}, status_code={self.status_code})'
|
||||
|
||||
|
||||
class NotFromOutboxError(Error):
|
||||
pass
|
||||
|
||||
class ActivityNotFoundError(Error):
|
||||
status_code = 404
|
||||
|
||||
|
||||
class BadActivityError(Error):
|
||||
pass
|
||||
|
||||
|
||||
class RecursionLimitExceededError(BadActivityError):
|
||||
pass
|
||||
|
||||
|
||||
class UnexpectedActivityTypeError(BadActivityError):
|
||||
pass
|
@@ -1,94 +0,0 @@
|
||||
"""Implements HTTP signature for Flask requests.
|
||||
|
||||
Mastodon instances won't accept requests that are not signed using this scheme.
|
||||
|
||||
"""
|
||||
from datetime import datetime
|
||||
from urllib.parse import urlparse
|
||||
from typing import Any, Dict, Optional
|
||||
import base64
|
||||
import hashlib
|
||||
import logging
|
||||
|
||||
from flask import request
|
||||
from requests.auth import AuthBase
|
||||
|
||||
from Crypto.Signature import PKCS1_v1_5
|
||||
from Crypto.Hash import SHA256
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _build_signed_string(signed_headers: str, method: str, path: str, headers: Any, body_digest: str) -> str:
|
||||
out = []
|
||||
for signed_header in signed_headers.split(' '):
|
||||
if signed_header == '(request-target)':
|
||||
out.append('(request-target): '+method.lower()+' '+path)
|
||||
elif signed_header == 'digest':
|
||||
out.append('digest: '+body_digest)
|
||||
else:
|
||||
out.append(signed_header+': '+headers[signed_header])
|
||||
return '\n'.join(out)
|
||||
|
||||
|
||||
def _parse_sig_header(val: Optional[str]) -> Optional[Dict[str, str]]:
|
||||
if not val:
|
||||
return None
|
||||
out = {}
|
||||
for data in val.split(','):
|
||||
k, v = data.split('=', 1)
|
||||
out[k] = v[1:len(v)-1]
|
||||
return out
|
||||
|
||||
|
||||
def _verify_h(signed_string, signature, pubkey):
|
||||
signer = PKCS1_v1_5.new(pubkey)
|
||||
digest = SHA256.new()
|
||||
digest.update(signed_string.encode('utf-8'))
|
||||
return signer.verify(digest, signature)
|
||||
|
||||
|
||||
def _body_digest() -> str:
|
||||
h = hashlib.new('sha256')
|
||||
h.update(request.data)
|
||||
return 'SHA-256='+base64.b64encode(h.digest()).decode('utf-8')
|
||||
|
||||
|
||||
def verify_request(actor_service) -> bool:
|
||||
hsig = _parse_sig_header(request.headers.get('Signature'))
|
||||
if not hsig:
|
||||
logger.debug('no signature in header')
|
||||
return False
|
||||
logger.debug(f'hsig={hsig}')
|
||||
signed_string = _build_signed_string(hsig['headers'], request.method, request.path, request.headers, _body_digest())
|
||||
_, rk = actor_service.get_public_key(hsig['keyId'])
|
||||
return _verify_h(signed_string, base64.b64decode(hsig['signature']), rk)
|
||||
|
||||
|
||||
class HTTPSigAuth(AuthBase):
|
||||
def __init__(self, keyid, privkey):
|
||||
self.keyid = keyid
|
||||
self.privkey = privkey
|
||||
|
||||
def __call__(self, r):
|
||||
logger.info(f'keyid={self.keyid}')
|
||||
host = urlparse(r.url).netloc
|
||||
bh = hashlib.new('sha256')
|
||||
bh.update(r.body.encode('utf-8'))
|
||||
bodydigest = 'SHA-256='+base64.b64encode(bh.digest()).decode('utf-8')
|
||||
date = datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT')
|
||||
r.headers.update({'Digest': bodydigest, 'Date': date})
|
||||
r.headers.update({'Host': host})
|
||||
sigheaders = '(request-target) user-agent host date digest content-type'
|
||||
to_be_signed = _build_signed_string(sigheaders, r.method, r.path_url, r.headers, bodydigest)
|
||||
signer = PKCS1_v1_5.new(self.privkey)
|
||||
digest = SHA256.new()
|
||||
digest.update(to_be_signed.encode('utf-8'))
|
||||
sig = base64.b64encode(signer.sign(digest))
|
||||
sig = sig.decode('utf-8')
|
||||
headers = {
|
||||
'Signature': f'keyId="{self.keyid}",algorithm="rsa-sha256",headers="{sigheaders}",signature="{sig}"'
|
||||
}
|
||||
logger.info(f'signed request headers={headers}')
|
||||
r.headers.update(headers)
|
||||
return r
|
54
utils/key.py
54
utils/key.py
@@ -1,22 +1,22 @@
|
||||
import os
|
||||
import binascii
|
||||
|
||||
from Crypto.PublicKey import RSA
|
||||
import os
|
||||
from typing import Callable
|
||||
|
||||
KEY_DIR = os.path.join(
|
||||
os.path.dirname(os.path.abspath(__file__)), '..', 'config'
|
||||
)
|
||||
from little_boxes.key import Key
|
||||
|
||||
KEY_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "config")
|
||||
|
||||
|
||||
def _new_key() -> str:
|
||||
return binascii.hexlify(os.urandom(32)).decode('utf-8')
|
||||
return binascii.hexlify(os.urandom(32)).decode("utf-8")
|
||||
|
||||
|
||||
def get_secret_key(name: str, new_key: Callable[[], str] = _new_key) -> str:
|
||||
key_path = os.path.join(KEY_DIR, f'{name}.key')
|
||||
"""Loads or generates a cryptographic key."""
|
||||
key_path = os.path.join(KEY_DIR, f"{name}.key")
|
||||
if not os.path.exists(key_path):
|
||||
k = new_key()
|
||||
with open(key_path, 'w+') as f:
|
||||
with open(key_path, "w+") as f:
|
||||
f.write(k)
|
||||
return k
|
||||
|
||||
@@ -24,23 +24,19 @@ def get_secret_key(name: str, new_key: Callable[[], str] = _new_key) -> str:
|
||||
return f.read()
|
||||
|
||||
|
||||
class Key(object):
|
||||
DEFAULT_KEY_SIZE = 2048
|
||||
def __init__(self, user: str, domain: str, create: bool = True) -> None:
|
||||
user = user.replace('.', '_')
|
||||
domain = domain.replace('.', '_')
|
||||
key_path = os.path.join(KEY_DIR, f'key_{user}_{domain}.pem')
|
||||
if os.path.isfile(key_path):
|
||||
with open(key_path) as f:
|
||||
self.privkey_pem = f.read()
|
||||
self.privkey = RSA.importKey(self.privkey_pem)
|
||||
self.pubkey_pem = self.privkey.publickey().exportKey('PEM').decode('utf-8')
|
||||
else:
|
||||
if not create:
|
||||
raise Exception('must init private key first')
|
||||
k = RSA.generate(self.DEFAULT_KEY_SIZE)
|
||||
self.privkey_pem = k.exportKey('PEM').decode('utf-8')
|
||||
self.pubkey_pem = k.publickey().exportKey('PEM').decode('utf-8')
|
||||
with open(key_path, 'w') as f:
|
||||
f.write(self.privkey_pem)
|
||||
self.privkey = k
|
||||
def get_key(owner: str, user: str, domain: str) -> Key:
|
||||
""""Loads or generates an RSA key."""
|
||||
k = Key(owner)
|
||||
user = user.replace(".", "_")
|
||||
domain = domain.replace(".", "_")
|
||||
key_path = os.path.join(KEY_DIR, f"key_{user}_{domain}.pem")
|
||||
if os.path.isfile(key_path):
|
||||
with open(key_path) as f:
|
||||
privkey_pem = f.read()
|
||||
k.load(privkey_pem)
|
||||
else:
|
||||
k.new()
|
||||
with open(key_path, "w") as f:
|
||||
f.write(k.privkey_pem)
|
||||
|
||||
return k
|
||||
|
@@ -1,70 +0,0 @@
|
||||
from pyld import jsonld
|
||||
import hashlib
|
||||
from datetime import datetime
|
||||
|
||||
from Crypto.Signature import PKCS1_v1_5
|
||||
from Crypto.Hash import SHA256
|
||||
import base64
|
||||
|
||||
from typing import Any, Dict
|
||||
|
||||
|
||||
# cache the downloaded "schemas", otherwise the library is super slow
|
||||
# (https://github.com/digitalbazaar/pyld/issues/70)
|
||||
_CACHE: Dict[str, Any] = {}
|
||||
LOADER = jsonld.requests_document_loader()
|
||||
|
||||
def _caching_document_loader(url: str) -> Any:
|
||||
if url in _CACHE:
|
||||
return _CACHE[url]
|
||||
resp = LOADER(url)
|
||||
_CACHE[url] = resp
|
||||
return resp
|
||||
|
||||
jsonld.set_document_loader(_caching_document_loader)
|
||||
|
||||
|
||||
def options_hash(doc):
|
||||
doc = dict(doc['signature'])
|
||||
for k in ['type', 'id', 'signatureValue']:
|
||||
if k in doc:
|
||||
del doc[k]
|
||||
doc['@context'] = 'https://w3id.org/identity/v1'
|
||||
normalized = jsonld.normalize(doc, {'algorithm': 'URDNA2015', 'format': 'application/nquads'})
|
||||
h = hashlib.new('sha256')
|
||||
h.update(normalized.encode('utf-8'))
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def doc_hash(doc):
|
||||
doc = dict(doc)
|
||||
if 'signature' in doc:
|
||||
del doc['signature']
|
||||
normalized = jsonld.normalize(doc, {'algorithm': 'URDNA2015', 'format': 'application/nquads'})
|
||||
h = hashlib.new('sha256')
|
||||
h.update(normalized.encode('utf-8'))
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def verify_signature(doc, pubkey):
|
||||
to_be_signed = options_hash(doc) + doc_hash(doc)
|
||||
signature = doc['signature']['signatureValue']
|
||||
signer = PKCS1_v1_5.new(pubkey)
|
||||
digest = SHA256.new()
|
||||
digest.update(to_be_signed.encode('utf-8'))
|
||||
return signer.verify(digest, base64.b64decode(signature))
|
||||
|
||||
|
||||
def generate_signature(doc, privkey):
|
||||
options = {
|
||||
'type': 'RsaSignature2017',
|
||||
'creator': doc['actor'] + '#main-key',
|
||||
'created': datetime.utcnow().replace(microsecond=0).isoformat() + 'Z',
|
||||
}
|
||||
doc['signature'] = options
|
||||
to_be_signed = options_hash(doc) + doc_hash(doc)
|
||||
signer = PKCS1_v1_5.new(privkey)
|
||||
digest = SHA256.new()
|
||||
digest.update(to_be_signed.encode('utf-8'))
|
||||
sig = base64.b64encode(signer.sign(digest))
|
||||
options['signatureValue'] = sig.decode('utf-8')
|
@@ -1,67 +1,21 @@
|
||||
import requests
|
||||
from urllib.parse import urlparse
|
||||
import logging
|
||||
|
||||
from .urlutils import check_url
|
||||
from .errors import ActivityNotFoundError
|
||||
from little_boxes.activitypub import get_backend
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ObjectService(object):
|
||||
def __init__(self, user_agent, col, inbox, outbox, instances):
|
||||
self._user_agent = user_agent
|
||||
self._col = col
|
||||
self._inbox = inbox
|
||||
self._outbox = outbox
|
||||
self._instances = instances
|
||||
self._known_instances = set()
|
||||
def __init__(self):
|
||||
logger.debug("Initializing ObjectService")
|
||||
self._cache = {}
|
||||
|
||||
def _fetch_remote(self, object_id):
|
||||
print(f'fetch remote {object_id}')
|
||||
check_url(object_id)
|
||||
resp = requests.get(object_id, headers={
|
||||
'Accept': 'application/activity+json',
|
||||
'User-Agent': self._user_agent,
|
||||
})
|
||||
if resp.status_code == 404:
|
||||
raise ActivityNotFoundError(f'{object_id} cannot be fetched, 404 error not found')
|
||||
def get(self, iri, reload_cache=False):
|
||||
logger.info(f"get actor {iri} (reload_cache={reload_cache})")
|
||||
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
def _fetch(self, object_id):
|
||||
instance = urlparse(object_id)._replace(path='', query='', fragment='').geturl()
|
||||
if instance not in self._known_instances:
|
||||
self._known_instances.add(instance)
|
||||
if not self._instances.find_one({'instance': instance}):
|
||||
self._instances.insert({'instance': instance, 'first_object': object_id})
|
||||
|
||||
obj = self._inbox.find_one({'$or': [{'remote_id': object_id}, {'type': 'Create', 'activity.object.id': object_id}]})
|
||||
if obj:
|
||||
if obj['remote_id'] == object_id:
|
||||
return obj['activity']
|
||||
return obj['activity']['object']
|
||||
|
||||
obj = self._outbox.find_one({'$or': [{'remote_id': object_id}, {'type': 'Create', 'activity.object.id': object_id}]})
|
||||
if obj:
|
||||
if obj['remote_id'] == object_id:
|
||||
return obj['activity']
|
||||
return obj['activity']['object']
|
||||
|
||||
return self._fetch_remote(object_id)
|
||||
|
||||
def get(self, object_id, reload_cache=False, part_of_stream=False, announce_published=None):
|
||||
if reload_cache:
|
||||
obj = self._fetch(object_id)
|
||||
self._col.update({'object_id': object_id}, {'$set': {'cached_object': obj, 'meta.part_of_stream': part_of_stream, 'meta.announce_published': announce_published}}, upsert=True)
|
||||
return obj
|
||||
|
||||
cached_object = self._col.find_one({'object_id': object_id})
|
||||
if cached_object:
|
||||
print(f'ObjectService: {cached_object}')
|
||||
return cached_object['cached_object']
|
||||
|
||||
obj = self._fetch(object_id)
|
||||
|
||||
self._col.update({'object_id': object_id}, {'$set': {'cached_object': obj, 'meta.part_of_stream': part_of_stream, 'meta.announce_published': announce_published}}, upsert=True)
|
||||
# print(f'ObjectService: {obj}')
|
||||
if not reload_cache and iri in self._cache:
|
||||
return self._cache[iri]
|
||||
|
||||
obj = get_backend().fetch_iri(iri)
|
||||
self._cache[iri] = obj
|
||||
return obj
|
||||
|
@@ -1,36 +1,34 @@
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import ipaddress
|
||||
import opengraph
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from .urlutils import is_url_valid, check_url
|
||||
from little_boxes.urlutils import check_url
|
||||
from little_boxes.urlutils import is_url_valid
|
||||
|
||||
|
||||
def links_from_note(note):
|
||||
tags_href= set()
|
||||
for t in note.get('tag', []):
|
||||
h = t.get('href')
|
||||
tags_href = set()
|
||||
for t in note.get("tag", []):
|
||||
h = t.get("href")
|
||||
if h:
|
||||
# TODO(tsileo): fetch the URL for Actor profile, type=mention
|
||||
tags_href.add(h)
|
||||
|
||||
links = set()
|
||||
soup = BeautifulSoup(note['content'])
|
||||
for link in soup.find_all('a'):
|
||||
h = link.get('href')
|
||||
if h.startswith('http') and h not in tags_href and is_url_valid(h):
|
||||
soup = BeautifulSoup(note["content"])
|
||||
for link in soup.find_all("a"):
|
||||
h = link.get("href")
|
||||
if h.startswith("http") and h not in tags_href and is_url_valid(h):
|
||||
links.add(h)
|
||||
|
||||
return links
|
||||
|
||||
|
||||
def fetch_og_metadata(user_agent, col, remote_id):
|
||||
doc = col.find_one({'remote_id': remote_id})
|
||||
doc = col.find_one({"remote_id": remote_id})
|
||||
if not doc:
|
||||
raise ValueError
|
||||
note = doc['activity']['object']
|
||||
note = doc["activity"]["object"]
|
||||
print(note)
|
||||
links = links_from_note(note)
|
||||
if not links:
|
||||
@@ -39,9 +37,11 @@ def fetch_og_metadata(user_agent, col, remote_id):
|
||||
htmls = []
|
||||
for l in links:
|
||||
check_url(l)
|
||||
r = requests.get(l, headers={'User-Agent': user_agent})
|
||||
r = requests.get(l, headers={"User-Agent": user_agent})
|
||||
r.raise_for_status()
|
||||
htmls.append(r.text)
|
||||
links_og_metadata = [dict(opengraph.OpenGraph(html=html)) for html in htmls]
|
||||
col.update_one({'remote_id': remote_id}, {'$set': {'meta.og_metadata': links_og_metadata}})
|
||||
col.update_one(
|
||||
{"remote_id": remote_id}, {"$set": {"meta.og_metadata": links_og_metadata}}
|
||||
)
|
||||
return len(links)
|
||||
|
@@ -1,47 +0,0 @@
|
||||
import logging
|
||||
import os
|
||||
import socket
|
||||
import ipaddress
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from . import strtobool
|
||||
from .errors import Error
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class InvalidURLError(Error):
|
||||
pass
|
||||
|
||||
|
||||
def is_url_valid(url: str) -> bool:
|
||||
parsed = urlparse(url)
|
||||
if parsed.scheme not in ['http', 'https']:
|
||||
return False
|
||||
|
||||
# XXX in debug mode, we want to allow requests to localhost to test the federation with local instances
|
||||
debug_mode = strtobool(os.getenv('MICROBLOGPUB_DEBUG', 'false'))
|
||||
if debug_mode:
|
||||
return True
|
||||
|
||||
if parsed.hostname in ['localhost']:
|
||||
return False
|
||||
|
||||
try:
|
||||
ip_address = socket.getaddrinfo(parsed.hostname, parsed.port or 80)[0][4][0]
|
||||
except socket.gaierror:
|
||||
logger.exception(f'failed to lookup url {url}')
|
||||
return False
|
||||
|
||||
if ipaddress.ip_address(ip_address).is_private:
|
||||
logger.info(f'rejecting private URL {url}')
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def check_url(url: str) -> None:
|
||||
if not is_url_valid(url):
|
||||
raise InvalidURLError(f'"{url}" is invalid')
|
||||
|
||||
return None
|
@@ -1,75 +0,0 @@
|
||||
from urllib.parse import urlparse
|
||||
from typing import Dict, Any
|
||||
from typing import Optional
|
||||
import logging
|
||||
|
||||
import requests
|
||||
|
||||
from .urlutils import check_url
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def webfinger(resource: str) -> Optional[Dict[str, Any]]:
|
||||
"""Mastodon-like WebFinger resolution to retrieve the activity stream Actor URL.
|
||||
"""
|
||||
logger.info(f'performing webfinger resolution for {resource}')
|
||||
protos = ['https', 'http']
|
||||
if resource.startswith('http://'):
|
||||
protos.reverse()
|
||||
host = urlparse(resource).netloc
|
||||
elif resource.startswith('https://'):
|
||||
host = urlparse(resource).netloc
|
||||
else:
|
||||
if resource.startswith('acct:'):
|
||||
resource = resource[5:]
|
||||
if resource.startswith('@'):
|
||||
resource = resource[1:]
|
||||
_, host = resource.split('@', 1)
|
||||
resource='acct:'+resource
|
||||
|
||||
# Security check on the url (like not calling localhost)
|
||||
check_url(f'https://{host}')
|
||||
|
||||
for i, proto in enumerate(protos):
|
||||
try:
|
||||
url = f'{proto}://{host}/.well-known/webfinger'
|
||||
resp = requests.get(
|
||||
url,
|
||||
{'resource': resource}
|
||||
)
|
||||
except requests.ConnectionError:
|
||||
# If we tried https first and the domain is "http only"
|
||||
if i == 0:
|
||||
continue
|
||||
break
|
||||
if resp.status_code == 404:
|
||||
return None
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
|
||||
def get_remote_follow_template(resource: str) -> Optional[str]:
|
||||
data = webfinger(resource)
|
||||
if data is None:
|
||||
return None
|
||||
for link in data['links']:
|
||||
if link.get('rel') == 'http://ostatus.org/schema/1.0/subscribe':
|
||||
return link.get('template')
|
||||
return None
|
||||
|
||||
|
||||
def get_actor_url(resource: str) -> Optional[str]:
|
||||
"""Mastodon-like WebFinger resolution to retrieve the activity stream Actor URL.
|
||||
|
||||
Returns:
|
||||
the Actor URL or None if the resolution failed.
|
||||
"""
|
||||
data = webfinger(resource)
|
||||
if data is None:
|
||||
return None
|
||||
for link in data['links']:
|
||||
if link.get('rel') == 'self' and link.get('type') == 'application/activity+json':
|
||||
return link.get('href')
|
||||
return None
|
Reference in New Issue
Block a user