Switch to Little boxes, fixes #1 (#8)

This commit is contained in:
Thomas Sileo
2018-06-19 00:10:19 +02:00
committed by GitHub
parent 070e39bdfe
commit 8d5f4a8e98
20 changed files with 1529 additions and 2640 deletions

View File

@@ -4,9 +4,9 @@ logger = logging.getLogger(__name__)
def strtobool(s: str) -> bool:
if s in ['y', 'yes', 'true', 'on', '1']:
if s in ["y", "yes", "true", "on", "1"]:
return True
if s in ['n', 'no', 'false', 'off', '0']:
if s in ["n", "no", "false", "off", "0"]:
return False
raise ValueError(f'cannot convert {s} to bool')
raise ValueError(f"cannot convert {s} to bool")

View File

@@ -1,65 +0,0 @@
from typing import Optional, Dict, List, Any
import requests
from .errors import RecursionLimitExceededError
from .errors import UnexpectedActivityTypeError
def _do_req(url: str, headers: Dict[str, str]) -> Dict[str, Any]:
resp = requests.get(url, headers=headers)
resp.raise_for_status()
return resp.json()
def parse_collection(
payload: Optional[Dict[str, Any]] = None,
url: Optional[str] = None,
user_agent: Optional[str] = None,
level: int = 0,
do_req: Any = _do_req,
) -> List[str]:
"""Resolve/fetch a `Collection`/`OrderedCollection`."""
if level > 3:
raise RecursionLimitExceededError('recursion limit exceeded')
# Go through all the pages
headers = {'Accept': 'application/activity+json'}
if user_agent:
headers['User-Agent'] = user_agent
out: List[str] = []
if url:
payload = do_req(url, headers)
if not payload:
raise ValueError('must at least prove a payload or an URL')
if payload['type'] in ['Collection', 'OrderedCollection']:
if 'orderedItems' in payload:
return payload['orderedItems']
if 'items' in payload:
return payload['items']
if 'first' in payload:
if 'orderedItems' in payload['first']:
out.extend(payload['first']['orderedItems'])
if 'items' in payload['first']:
out.extend(payload['first']['items'])
n = payload['first'].get('next')
if n:
out.extend(parse_collection(url=n, user_agent=user_agent, level=level+1, do_req=do_req))
return out
while payload:
if payload['type'] in ['CollectionPage', 'OrderedCollectionPage']:
if 'orderedItems' in payload:
out.extend(payload['orderedItems'])
if 'items' in payload:
out.extend(payload['items'])
n = payload.get('next')
if n is None:
break
payload = do_req(n, headers)
else:
raise UnexpectedActivityTypeError('unexpected activity type {}'.format(payload['type']))
return out

View File

@@ -1,81 +0,0 @@
import logging
import requests
from urllib.parse import urlparse
from Crypto.PublicKey import RSA
from .urlutils import check_url
from .errors import ActivityNotFoundError
logger = logging.getLogger(__name__)
class NotAnActorError(Exception):
def __init__(self, activity):
self.activity = activity
class ActorService(object):
def __init__(self, user_agent, col, actor_id, actor_data, instances):
logger.debug(f'Initializing ActorService user_agent={user_agent}')
self._user_agent = user_agent
self._col = col
self._in_mem = {actor_id: actor_data}
self._instances = instances
self._known_instances = set()
def _fetch(self, actor_url):
logger.debug(f'fetching remote object {actor_url}')
check_url(actor_url)
resp = requests.get(actor_url, headers={
'Accept': 'application/activity+json',
'User-Agent': self._user_agent,
})
if resp.status_code == 404:
raise ActivityNotFoundError(f'{actor_url} cannot be fetched, 404 not found error')
resp.raise_for_status()
return resp.json()
def get(self, actor_url, reload_cache=False):
logger.info(f'get actor {actor_url} (reload_cache={reload_cache})')
if actor_url in self._in_mem:
return self._in_mem[actor_url]
instance = urlparse(actor_url)._replace(path='', query='', fragment='').geturl()
if instance not in self._known_instances:
self._known_instances.add(instance)
if not self._instances.find_one({'instance': instance}):
self._instances.insert({'instance': instance, 'first_object': actor_url})
if reload_cache:
actor = self._fetch(actor_url)
self._in_mem[actor_url] = actor
self._col.update({'actor_id': actor_url}, {'$set': {'cached_response': actor}}, upsert=True)
return actor
cached_actor = self._col.find_one({'actor_id': actor_url})
if cached_actor:
return cached_actor['cached_response']
actor = self._fetch(actor_url)
if not 'type' in actor:
raise NotAnActorError(None)
if actor['type'] != 'Person':
raise NotAnActorError(actor)
self._col.update({'actor_id': actor_url}, {'$set': {'cached_response': actor}}, upsert=True)
self._in_mem[actor_url] = actor
return actor
def get_public_key(self, actor_url, reload_cache=False):
profile = self.get(actor_url, reload_cache=reload_cache)
pub = profile['publicKey']
return pub['id'], RSA.importKey(pub['publicKeyPem'])
def get_inbox_url(self, actor_url, reload_cache=False):
profile = self.get(actor_url, reload_cache=reload_cache)
return profile.get('inbox')

View File

@@ -1,58 +0,0 @@
import typing
import re
from bleach.linkifier import Linker
from markdown import markdown
from utils.webfinger import get_actor_url
from config import USERNAME, BASE_URL, ID
from config import ACTOR_SERVICE
from typing import List, Optional, Tuple, Dict, Any, Union, Type
def set_attrs(attrs, new=False):
attrs[(None, u'target')] = u'_blank'
attrs[(None, u'class')] = u'external'
attrs[(None, u'rel')] = u'noopener'
attrs[(None, u'title')] = attrs[(None, u'href')]
return attrs
LINKER = Linker(callbacks=[set_attrs])
HASHTAG_REGEX = re.compile(r"(#[\d\w\.]+)")
MENTION_REGEX = re.compile(r"@[\d\w_.+-]+@[\d\w-]+\.[\d\w\-.]+")
def hashtagify(content: str) -> Tuple[str, List[Dict[str, str]]]:
tags = []
for hashtag in re.findall(HASHTAG_REGEX, content):
tag = hashtag[1:]
link = f'<a href="{BASE_URL}/tags/{tag}" class="mention hashtag" rel="tag">#<span>{tag}</span></a>'
tags.append(dict(href=f'{BASE_URL}/tags/{tag}', name=hashtag, type='Hashtag'))
content = content.replace(hashtag, link)
return content, tags
def mentionify(content: str) -> Tuple[str, List[Dict[str, str]]]:
tags = []
for mention in re.findall(MENTION_REGEX, content):
_, username, domain = mention.split('@')
actor_url = get_actor_url(mention)
p = ACTOR_SERVICE.get(actor_url)
print(p)
tags.append(dict(type='Mention', href=p['id'], name=mention))
link = f'<span class="h-card"><a href="{p["url"]}" class="u-url mention">@<span>{username}</span></a></span>'
content = content.replace(mention, link)
return content, tags
def parse_markdown(content: str) -> Tuple[str, List[Dict[str, str]]]:
tags = []
content = LINKER.linkify(content)
content, hashtag_tags = hashtagify(content)
tags.extend(hashtag_tags)
content, mention_tags = mentionify(content)
tags.extend(mention_tags)
content = markdown(content)
return content, tags

View File

@@ -1,37 +0,0 @@
class Error(Exception):
status_code = 400
def __init__(self, message, status_code=None, payload=None):
Exception.__init__(self)
self.message = message
if status_code is not None:
self.status_code = status_code
self.payload = payload
def to_dict(self):
rv = dict(self.payload or ())
rv['message'] = self.message
return rv
def __repr__(self):
return f'{self.__class__.__qualname__}({self.message!r}, payload={self.payload!r}, status_code={self.status_code})'
class NotFromOutboxError(Error):
pass
class ActivityNotFoundError(Error):
status_code = 404
class BadActivityError(Error):
pass
class RecursionLimitExceededError(BadActivityError):
pass
class UnexpectedActivityTypeError(BadActivityError):
pass

View File

@@ -1,94 +0,0 @@
"""Implements HTTP signature for Flask requests.
Mastodon instances won't accept requests that are not signed using this scheme.
"""
from datetime import datetime
from urllib.parse import urlparse
from typing import Any, Dict, Optional
import base64
import hashlib
import logging
from flask import request
from requests.auth import AuthBase
from Crypto.Signature import PKCS1_v1_5
from Crypto.Hash import SHA256
logger = logging.getLogger(__name__)
def _build_signed_string(signed_headers: str, method: str, path: str, headers: Any, body_digest: str) -> str:
out = []
for signed_header in signed_headers.split(' '):
if signed_header == '(request-target)':
out.append('(request-target): '+method.lower()+' '+path)
elif signed_header == 'digest':
out.append('digest: '+body_digest)
else:
out.append(signed_header+': '+headers[signed_header])
return '\n'.join(out)
def _parse_sig_header(val: Optional[str]) -> Optional[Dict[str, str]]:
if not val:
return None
out = {}
for data in val.split(','):
k, v = data.split('=', 1)
out[k] = v[1:len(v)-1]
return out
def _verify_h(signed_string, signature, pubkey):
signer = PKCS1_v1_5.new(pubkey)
digest = SHA256.new()
digest.update(signed_string.encode('utf-8'))
return signer.verify(digest, signature)
def _body_digest() -> str:
h = hashlib.new('sha256')
h.update(request.data)
return 'SHA-256='+base64.b64encode(h.digest()).decode('utf-8')
def verify_request(actor_service) -> bool:
hsig = _parse_sig_header(request.headers.get('Signature'))
if not hsig:
logger.debug('no signature in header')
return False
logger.debug(f'hsig={hsig}')
signed_string = _build_signed_string(hsig['headers'], request.method, request.path, request.headers, _body_digest())
_, rk = actor_service.get_public_key(hsig['keyId'])
return _verify_h(signed_string, base64.b64decode(hsig['signature']), rk)
class HTTPSigAuth(AuthBase):
def __init__(self, keyid, privkey):
self.keyid = keyid
self.privkey = privkey
def __call__(self, r):
logger.info(f'keyid={self.keyid}')
host = urlparse(r.url).netloc
bh = hashlib.new('sha256')
bh.update(r.body.encode('utf-8'))
bodydigest = 'SHA-256='+base64.b64encode(bh.digest()).decode('utf-8')
date = datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT')
r.headers.update({'Digest': bodydigest, 'Date': date})
r.headers.update({'Host': host})
sigheaders = '(request-target) user-agent host date digest content-type'
to_be_signed = _build_signed_string(sigheaders, r.method, r.path_url, r.headers, bodydigest)
signer = PKCS1_v1_5.new(self.privkey)
digest = SHA256.new()
digest.update(to_be_signed.encode('utf-8'))
sig = base64.b64encode(signer.sign(digest))
sig = sig.decode('utf-8')
headers = {
'Signature': f'keyId="{self.keyid}",algorithm="rsa-sha256",headers="{sigheaders}",signature="{sig}"'
}
logger.info(f'signed request headers={headers}')
r.headers.update(headers)
return r

View File

@@ -1,22 +1,22 @@
import os
import binascii
from Crypto.PublicKey import RSA
import os
from typing import Callable
KEY_DIR = os.path.join(
os.path.dirname(os.path.abspath(__file__)), '..', 'config'
)
from little_boxes.key import Key
KEY_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "config")
def _new_key() -> str:
return binascii.hexlify(os.urandom(32)).decode('utf-8')
return binascii.hexlify(os.urandom(32)).decode("utf-8")
def get_secret_key(name: str, new_key: Callable[[], str] = _new_key) -> str:
key_path = os.path.join(KEY_DIR, f'{name}.key')
"""Loads or generates a cryptographic key."""
key_path = os.path.join(KEY_DIR, f"{name}.key")
if not os.path.exists(key_path):
k = new_key()
with open(key_path, 'w+') as f:
with open(key_path, "w+") as f:
f.write(k)
return k
@@ -24,23 +24,19 @@ def get_secret_key(name: str, new_key: Callable[[], str] = _new_key) -> str:
return f.read()
class Key(object):
DEFAULT_KEY_SIZE = 2048
def __init__(self, user: str, domain: str, create: bool = True) -> None:
user = user.replace('.', '_')
domain = domain.replace('.', '_')
key_path = os.path.join(KEY_DIR, f'key_{user}_{domain}.pem')
if os.path.isfile(key_path):
with open(key_path) as f:
self.privkey_pem = f.read()
self.privkey = RSA.importKey(self.privkey_pem)
self.pubkey_pem = self.privkey.publickey().exportKey('PEM').decode('utf-8')
else:
if not create:
raise Exception('must init private key first')
k = RSA.generate(self.DEFAULT_KEY_SIZE)
self.privkey_pem = k.exportKey('PEM').decode('utf-8')
self.pubkey_pem = k.publickey().exportKey('PEM').decode('utf-8')
with open(key_path, 'w') as f:
f.write(self.privkey_pem)
self.privkey = k
def get_key(owner: str, user: str, domain: str) -> Key:
""""Loads or generates an RSA key."""
k = Key(owner)
user = user.replace(".", "_")
domain = domain.replace(".", "_")
key_path = os.path.join(KEY_DIR, f"key_{user}_{domain}.pem")
if os.path.isfile(key_path):
with open(key_path) as f:
privkey_pem = f.read()
k.load(privkey_pem)
else:
k.new()
with open(key_path, "w") as f:
f.write(k.privkey_pem)
return k

View File

@@ -1,70 +0,0 @@
from pyld import jsonld
import hashlib
from datetime import datetime
from Crypto.Signature import PKCS1_v1_5
from Crypto.Hash import SHA256
import base64
from typing import Any, Dict
# cache the downloaded "schemas", otherwise the library is super slow
# (https://github.com/digitalbazaar/pyld/issues/70)
_CACHE: Dict[str, Any] = {}
LOADER = jsonld.requests_document_loader()
def _caching_document_loader(url: str) -> Any:
if url in _CACHE:
return _CACHE[url]
resp = LOADER(url)
_CACHE[url] = resp
return resp
jsonld.set_document_loader(_caching_document_loader)
def options_hash(doc):
doc = dict(doc['signature'])
for k in ['type', 'id', 'signatureValue']:
if k in doc:
del doc[k]
doc['@context'] = 'https://w3id.org/identity/v1'
normalized = jsonld.normalize(doc, {'algorithm': 'URDNA2015', 'format': 'application/nquads'})
h = hashlib.new('sha256')
h.update(normalized.encode('utf-8'))
return h.hexdigest()
def doc_hash(doc):
doc = dict(doc)
if 'signature' in doc:
del doc['signature']
normalized = jsonld.normalize(doc, {'algorithm': 'URDNA2015', 'format': 'application/nquads'})
h = hashlib.new('sha256')
h.update(normalized.encode('utf-8'))
return h.hexdigest()
def verify_signature(doc, pubkey):
to_be_signed = options_hash(doc) + doc_hash(doc)
signature = doc['signature']['signatureValue']
signer = PKCS1_v1_5.new(pubkey)
digest = SHA256.new()
digest.update(to_be_signed.encode('utf-8'))
return signer.verify(digest, base64.b64decode(signature))
def generate_signature(doc, privkey):
options = {
'type': 'RsaSignature2017',
'creator': doc['actor'] + '#main-key',
'created': datetime.utcnow().replace(microsecond=0).isoformat() + 'Z',
}
doc['signature'] = options
to_be_signed = options_hash(doc) + doc_hash(doc)
signer = PKCS1_v1_5.new(privkey)
digest = SHA256.new()
digest.update(to_be_signed.encode('utf-8'))
sig = base64.b64encode(signer.sign(digest))
options['signatureValue'] = sig.decode('utf-8')

View File

@@ -1,67 +1,21 @@
import requests
from urllib.parse import urlparse
import logging
from .urlutils import check_url
from .errors import ActivityNotFoundError
from little_boxes.activitypub import get_backend
logger = logging.getLogger(__name__)
class ObjectService(object):
def __init__(self, user_agent, col, inbox, outbox, instances):
self._user_agent = user_agent
self._col = col
self._inbox = inbox
self._outbox = outbox
self._instances = instances
self._known_instances = set()
def __init__(self):
logger.debug("Initializing ObjectService")
self._cache = {}
def _fetch_remote(self, object_id):
print(f'fetch remote {object_id}')
check_url(object_id)
resp = requests.get(object_id, headers={
'Accept': 'application/activity+json',
'User-Agent': self._user_agent,
})
if resp.status_code == 404:
raise ActivityNotFoundError(f'{object_id} cannot be fetched, 404 error not found')
def get(self, iri, reload_cache=False):
logger.info(f"get actor {iri} (reload_cache={reload_cache})")
resp.raise_for_status()
return resp.json()
def _fetch(self, object_id):
instance = urlparse(object_id)._replace(path='', query='', fragment='').geturl()
if instance not in self._known_instances:
self._known_instances.add(instance)
if not self._instances.find_one({'instance': instance}):
self._instances.insert({'instance': instance, 'first_object': object_id})
obj = self._inbox.find_one({'$or': [{'remote_id': object_id}, {'type': 'Create', 'activity.object.id': object_id}]})
if obj:
if obj['remote_id'] == object_id:
return obj['activity']
return obj['activity']['object']
obj = self._outbox.find_one({'$or': [{'remote_id': object_id}, {'type': 'Create', 'activity.object.id': object_id}]})
if obj:
if obj['remote_id'] == object_id:
return obj['activity']
return obj['activity']['object']
return self._fetch_remote(object_id)
def get(self, object_id, reload_cache=False, part_of_stream=False, announce_published=None):
if reload_cache:
obj = self._fetch(object_id)
self._col.update({'object_id': object_id}, {'$set': {'cached_object': obj, 'meta.part_of_stream': part_of_stream, 'meta.announce_published': announce_published}}, upsert=True)
return obj
cached_object = self._col.find_one({'object_id': object_id})
if cached_object:
print(f'ObjectService: {cached_object}')
return cached_object['cached_object']
obj = self._fetch(object_id)
self._col.update({'object_id': object_id}, {'$set': {'cached_object': obj, 'meta.part_of_stream': part_of_stream, 'meta.announce_published': announce_published}}, upsert=True)
# print(f'ObjectService: {obj}')
if not reload_cache and iri in self._cache:
return self._cache[iri]
obj = get_backend().fetch_iri(iri)
self._cache[iri] = obj
return obj

View File

@@ -1,36 +1,34 @@
from urllib.parse import urlparse
import ipaddress
import opengraph
import requests
from bs4 import BeautifulSoup
from .urlutils import is_url_valid, check_url
from little_boxes.urlutils import check_url
from little_boxes.urlutils import is_url_valid
def links_from_note(note):
tags_href= set()
for t in note.get('tag', []):
h = t.get('href')
tags_href = set()
for t in note.get("tag", []):
h = t.get("href")
if h:
# TODO(tsileo): fetch the URL for Actor profile, type=mention
tags_href.add(h)
links = set()
soup = BeautifulSoup(note['content'])
for link in soup.find_all('a'):
h = link.get('href')
if h.startswith('http') and h not in tags_href and is_url_valid(h):
soup = BeautifulSoup(note["content"])
for link in soup.find_all("a"):
h = link.get("href")
if h.startswith("http") and h not in tags_href and is_url_valid(h):
links.add(h)
return links
def fetch_og_metadata(user_agent, col, remote_id):
doc = col.find_one({'remote_id': remote_id})
doc = col.find_one({"remote_id": remote_id})
if not doc:
raise ValueError
note = doc['activity']['object']
note = doc["activity"]["object"]
print(note)
links = links_from_note(note)
if not links:
@@ -39,9 +37,11 @@ def fetch_og_metadata(user_agent, col, remote_id):
htmls = []
for l in links:
check_url(l)
r = requests.get(l, headers={'User-Agent': user_agent})
r = requests.get(l, headers={"User-Agent": user_agent})
r.raise_for_status()
htmls.append(r.text)
links_og_metadata = [dict(opengraph.OpenGraph(html=html)) for html in htmls]
col.update_one({'remote_id': remote_id}, {'$set': {'meta.og_metadata': links_og_metadata}})
col.update_one(
{"remote_id": remote_id}, {"$set": {"meta.og_metadata": links_og_metadata}}
)
return len(links)

View File

@@ -1,47 +0,0 @@
import logging
import os
import socket
import ipaddress
from urllib.parse import urlparse
from . import strtobool
from .errors import Error
logger = logging.getLogger(__name__)
class InvalidURLError(Error):
pass
def is_url_valid(url: str) -> bool:
parsed = urlparse(url)
if parsed.scheme not in ['http', 'https']:
return False
# XXX in debug mode, we want to allow requests to localhost to test the federation with local instances
debug_mode = strtobool(os.getenv('MICROBLOGPUB_DEBUG', 'false'))
if debug_mode:
return True
if parsed.hostname in ['localhost']:
return False
try:
ip_address = socket.getaddrinfo(parsed.hostname, parsed.port or 80)[0][4][0]
except socket.gaierror:
logger.exception(f'failed to lookup url {url}')
return False
if ipaddress.ip_address(ip_address).is_private:
logger.info(f'rejecting private URL {url}')
return False
return True
def check_url(url: str) -> None:
if not is_url_valid(url):
raise InvalidURLError(f'"{url}" is invalid')
return None

View File

@@ -1,75 +0,0 @@
from urllib.parse import urlparse
from typing import Dict, Any
from typing import Optional
import logging
import requests
from .urlutils import check_url
logger = logging.getLogger(__name__)
def webfinger(resource: str) -> Optional[Dict[str, Any]]:
"""Mastodon-like WebFinger resolution to retrieve the activity stream Actor URL.
"""
logger.info(f'performing webfinger resolution for {resource}')
protos = ['https', 'http']
if resource.startswith('http://'):
protos.reverse()
host = urlparse(resource).netloc
elif resource.startswith('https://'):
host = urlparse(resource).netloc
else:
if resource.startswith('acct:'):
resource = resource[5:]
if resource.startswith('@'):
resource = resource[1:]
_, host = resource.split('@', 1)
resource='acct:'+resource
# Security check on the url (like not calling localhost)
check_url(f'https://{host}')
for i, proto in enumerate(protos):
try:
url = f'{proto}://{host}/.well-known/webfinger'
resp = requests.get(
url,
{'resource': resource}
)
except requests.ConnectionError:
# If we tried https first and the domain is "http only"
if i == 0:
continue
break
if resp.status_code == 404:
return None
resp.raise_for_status()
return resp.json()
def get_remote_follow_template(resource: str) -> Optional[str]:
data = webfinger(resource)
if data is None:
return None
for link in data['links']:
if link.get('rel') == 'http://ostatus.org/schema/1.0/subscribe':
return link.get('template')
return None
def get_actor_url(resource: str) -> Optional[str]:
"""Mastodon-like WebFinger resolution to retrieve the activity stream Actor URL.
Returns:
the Actor URL or None if the resolution failed.
"""
data = webfinger(resource)
if data is None:
return None
for link in data['links']:
if link.get('rel') == 'self' and link.get('type') == 'application/activity+json':
return link.get('href')
return None