Initial import

This commit is contained in:
Thomas Sileo
2018-05-18 20:41:41 +02:00
commit 43e113e420
50 changed files with 4378 additions and 0 deletions

0
utils/__init__.py Normal file
View File

72
utils/actor_service.py Normal file
View File

@@ -0,0 +1,72 @@
import logging
import requests
from urllib.parse import urlparse
from Crypto.PublicKey import RSA
logger = logging.getLogger(__name__)
class NotAnActorError(Exception):
def __init__(self, activity):
self.activity = activity
class ActorService(object):
def __init__(self, user_agent, col, actor_id, actor_data, instances):
logger.debug(f'Initializing ActorService user_agent={user_agent}')
self._user_agent = user_agent
self._col = col
self._in_mem = {actor_id: actor_data}
self._instances = instances
self._known_instances = set()
def _fetch(self, actor_url):
logger.debug(f'fetching remote object {actor_url}')
resp = requests.get(actor_url, headers={
'Accept': 'application/activity+json',
'User-Agent': self._user_agent,
})
resp.raise_for_status()
return resp.json()
def get(self, actor_url, reload_cache=False):
logger.info(f'get actor {actor_url} (reload_cache={reload_cache})')
if actor_url in self._in_mem:
return self._in_mem[actor_url]
instance = urlparse(actor_url)._replace(path='', query='', fragment='').geturl()
if instance not in self._known_instances:
self._known_instances.add(instance)
if not self._instances.find_one({'instance': instance}):
self._instances.insert({'instance': instance, 'first_object': actor_url})
if reload_cache:
actor = self._fetch(actor_url)
self._in_mem[actor_url] = actor
self._col.update({'actor_id': actor_url}, {'$set': {'cached_response': actor}}, upsert=True)
return actor
cached_actor = self._col.find_one({'actor_id': actor_url})
if cached_actor:
return cached_actor['cached_response']
actor = self._fetch(actor_url)
if not 'type' in actor:
raise NotAnActorError(None)
if actor['type'] != 'Person':
raise NotAnActorError(actor)
self._col.update({'actor_id': actor_url}, {'$set': {'cached_response': actor}}, upsert=True)
self._in_mem[actor_url] = actor
return actor
def get_public_key(self, actor_url, reload_cache=False):
profile = self.get(actor_url, reload_cache=reload_cache)
pub = profile['publicKey']
return pub['id'], RSA.importKey(pub['publicKeyPem'])
def get_inbox_url(self, actor_url, reload_cache=False):
profile = self.get(actor_url, reload_cache=reload_cache)
return profile.get('inbox')

57
utils/content_helper.py Normal file
View File

@@ -0,0 +1,57 @@
import typing
import re
from bleach.linkifier import Linker
from markdown import markdown
from utils.webfinger import get_actor_url
from config import USERNAME, BASE_URL, ID
from config import ACTOR_SERVICE
from typing import List, Optional, Tuple, Dict, Any, Union, Type
def set_attrs(attrs, new=False):
attrs[(None, u'target')] = u'_blank'
attrs[(None, u'class')] = u'external'
attrs[(None, u'rel')] = u'noopener'
attrs[(None, u'title')] = attrs[(None, u'href')]
return attrs
LINKER = Linker(callbacks=[set_attrs])
HASHTAG_REGEX = re.compile(r"(#[\d\w\.]+)")
MENTION_REGEX = re.compile(r"@[\d\w_.+-]+@[\d\w-]+\.[\d\w\-.]+")
def hashtagify(content: str) -> Tuple[str, List[Dict[str, str]]]:
tags = []
for hashtag in re.findall(HASHTAG_REGEX, content):
tag = hashtag[1:]
link = f'<a href="{BASE_URL}/tags/{tag}" class="mention hashtag" rel="tag">#<span>{tag}</span></a>'
tags.append(dict(href=f'{BASE_URL}/tags/{tag}', name=hashtag, type='Hashtag'))
content = content.replace(hashtag, link)
return content, tags
def mentionify(content: str) -> Tuple[str, List[Dict[str, str]]]:
tags = []
for mention in re.findall(MENTION_REGEX, content):
_, username, domain = mention.split('@')
actor_url = get_actor_url(mention)
p = ACTOR_SERVICE.get(actor_url)
tags.append(dict(type='Mention', href=p['id'], name=mention))
link = f'<span class="h-card"><a href="{p.url}" class="u-url mention">@<span>{username}</span></a></span>'
content = content.replace(mention, link)
return content, tags
def parse_markdown(content: str) -> Tuple[str, List[Dict[str, str]]]:
tags = []
content = LINKER.linkify(content)
content, hashtag_tags = hashtagify(content)
tags.extend(hashtag_tags)
content, mention_tags = mentionify(content)
tags.extend(mention_tags)
content = markdown(content)
return content, tags

87
utils/httpsig.py Normal file
View File

@@ -0,0 +1,87 @@
"""Implements HTTP signature for Flask requests.
Mastodon instances won't accept requests that are not signed using this scheme.
"""
from datetime import datetime
from urllib.parse import urlparse
from typing import Any, Dict
import base64
import hashlib
from flask import request
from requests.auth import AuthBase
from Crypto.Signature import PKCS1_v1_5
from Crypto.Hash import SHA256
def _build_signed_string(signed_headers: str, method: str, path: str, headers: Any, body_digest: str) -> str:
out = []
for signed_header in signed_headers.split(' '):
if signed_header == '(request-target)':
out.append('(request-target): '+method.lower()+' '+path)
elif signed_header == 'digest':
out.append('digest: '+body_digest)
else:
out.append(signed_header+': '+headers[signed_header])
return '\n'.join(out)
def _parse_sig_header(val: str) -> Dict[str, str]:
out = {}
for data in val.split(','):
k, v = data.split('=', 1)
out[k] = v[1:len(v)-1]
return out
def _verify_h(signed_string, signature, pubkey):
signer = PKCS1_v1_5.new(pubkey)
digest = SHA256.new()
digest.update(signed_string.encode('utf-8'))
return signer.verify(digest, signature)
def _body_digest() -> str:
h = hashlib.new('sha256')
h.update(request.data)
return 'SHA-256='+base64.b64encode(h.digest()).decode('utf-8')
def verify_request(actor_service) -> bool:
hsig = _parse_sig_header(request.headers.get('Signature'))
signed_string = _build_signed_string(hsig['headers'], request.method, request.path, request.headers, _body_digest())
_, rk = actor_service.get_public_key(hsig['keyId'])
return _verify_h(signed_string, base64.b64decode(hsig['signature']), rk)
class HTTPSigAuth(AuthBase):
def __init__(self, keyid, privkey):
self.keyid = keyid
self.privkey = privkey
def __call__(self, r):
host = urlparse(r.url).netloc
bh = hashlib.new('sha256')
bh.update(r.body.encode('utf-8'))
bodydigest = 'SHA-256='+base64.b64encode(bh.digest()).decode('utf-8')
date = datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT')
r.headers.update({'Digest': bodydigest, 'Date': date})
r.headers.update({'Host': host})
sigheaders = '(request-target) user-agent host date digest content-type'
to_be_signed = _build_signed_string(sigheaders, r.method, r.path_url, r.headers, bodydigest)
signer = PKCS1_v1_5.new(self.privkey)
digest = SHA256.new()
digest.update(to_be_signed.encode('utf-8'))
sig = base64.b64encode(signer.sign(digest))
sig = sig.decode('utf-8')
headers = {
'Signature': 'keyId="{keyid}",algorithm="rsa-sha256",headers="{headers}",signature="{signature}"'.format(
keyid=self.keyid,
signature=sig,
headers=sigheaders,
),
}
r.headers.update(headers)
return r

39
utils/key.py Normal file
View File

@@ -0,0 +1,39 @@
import os
import binascii
from Crypto.PublicKey import RSA
KEY_DIR = 'config/'
def get_secret_key(name:str) -> str:
key_path = f'{KEY_DIR}{name}.key'
if not os.path.exists(key_path):
k = binascii.hexlify(os.urandom(32)).decode('utf-8')
with open(key_path, 'w+') as f:
f.write(k)
return k
with open(key_path) as f:
return f.read()
class Key(object):
def __init__(self, user: str, domain: str, create: bool = True) -> None:
user = user.replace('.', '_')
domain = domain.replace('.', '_')
key_path = f'{KEY_DIR}/key_{user}_{domain}.pem'
if os.path.isfile(key_path):
with open(key_path) as f:
self.privkey_pem = f.read()
self.privkey = RSA.importKey(self.privkey_pem)
self.pubkey_pem = self.privkey.publickey().exportKey('PEM').decode('utf-8')
else:
if not create:
raise Exception('must init private key first')
k = RSA.generate(4096)
self.privkey_pem = k.exportKey('PEM').decode('utf-8')
self.pubkey_pem = k.publickey().exportKey('PEM').decode('utf-8')
with open(key_path, 'w') as f:
f.write(self.privkey_pem)
self.privkey = k

53
utils/linked_data_sig.py Normal file
View File

@@ -0,0 +1,53 @@
from pyld import jsonld
import hashlib
from datetime import datetime
from Crypto.Signature import PKCS1_v1_5
from Crypto.Hash import SHA256
import base64
def options_hash(doc):
doc = dict(doc['signature'])
for k in ['type', 'id', 'signatureValue']:
if k in doc:
del doc[k]
doc['@context'] = 'https://w3id.org/identity/v1'
normalized = jsonld.normalize(doc, {'algorithm': 'URDNA2015', 'format': 'application/nquads'})
h = hashlib.new('sha256')
h.update(normalized.encode('utf-8'))
return h.hexdigest()
def doc_hash(doc):
doc = dict(doc)
if 'signature' in doc:
del doc['signature']
normalized = jsonld.normalize(doc, {'algorithm': 'URDNA2015', 'format': 'application/nquads'})
h = hashlib.new('sha256')
h.update(normalized.encode('utf-8'))
return h.hexdigest()
def verify_signature(doc, pubkey):
to_be_signed = options_hash(doc) + doc_hash(doc)
signature = doc['signature']['signatureValue']
signer = PKCS1_v1_5.new(pubkey)
digest = SHA256.new()
digest.update(to_be_signed.encode('utf-8'))
return signer.verify(digest, base64.b64decode(signature))
def generate_signature(doc, privkey):
options = {
'type': 'RsaSignature2017',
'creator': doc['actor'] + '#main-key',
'created': datetime.utcnow().replace(microsecond=0).isoformat() + 'Z',
}
doc['signature'] = options
to_be_signed = options_hash(doc) + doc_hash(doc)
signer = PKCS1_v1_5.new(privkey)
digest = SHA256.new()
digest.update(to_be_signed.encode('utf-8'))
sig = base64.b64encode(signer.sign(digest))
options['signatureValue'] = sig.decode('utf-8')

60
utils/object_service.py Normal file
View File

@@ -0,0 +1,60 @@
import requests
from urllib.parse import urlparse
class ObjectService(object):
def __init__(self, user_agent, col, inbox, outbox, instances):
self._user_agent = user_agent
self._col = col
self._inbox = inbox
self._outbox = outbox
self._instances = instances
self._known_instances = set()
def _fetch_remote(self, object_id):
print(f'fetch remote {object_id}')
resp = requests.get(object_id, headers={
'Accept': 'application/activity+json',
'User-Agent': self._user_agent,
})
resp.raise_for_status()
return resp.json()
def _fetch(self, object_id):
instance = urlparse(object_id)._replace(path='', query='', fragment='').geturl()
if instance not in self._known_instances:
self._known_instances.add(instance)
if not self._instances.find_one({'instance': instance}):
self._instances.insert({'instance': instance, 'first_object': object_id})
obj = self._inbox.find_one({'$or': [{'remote_id': object_id}, {'type': 'Create', 'activity.object.id': object_id}]})
if obj:
if obj['remote_id'] == object_id:
return obj['activity']
return obj['activity']['object']
obj = self._outbox.find_one({'$or': [{'remote_id': object_id}, {'type': 'Create', 'activity.object.id': object_id}]})
if obj:
if obj['remote_id'] == object_id:
return obj['activity']
return obj['activity']['object']
return self._fetch_remote(object_id)
def get(self, object_id, reload_cache=False, part_of_stream=False, announce_published=None):
if reload_cache:
obj = self._fetch(object_id)
self._col.update({'object_id': object_id}, {'$set': {'cached_object': obj, 'meta.part_of_stream': part_of_stream, 'meta.announce_published': announce_published}}, upsert=True)
return obj
cached_object = self._col.find_one({'object_id': object_id})
if cached_object:
print(f'ObjectService: {cached_object}')
return cached_object['cached_object']
obj = self._fetch(object_id)
self._col.update({'object_id': object_id}, {'$set': {'cached_object': obj, 'meta.part_of_stream': part_of_stream, 'meta.announce_published': announce_published}}, upsert=True)
# print(f'ObjectService: {obj}')
return obj

46
utils/opengraph.py Normal file
View File

@@ -0,0 +1,46 @@
from urllib.parse import urlparse
import ipaddress
import opengraph
import requests
from bs4 import BeautifulSoup
from .urlutils import is_url_valid
def links_from_note(note):
tags_href= set()
for t in note.get('tag', []):
h = t.get('href')
if h:
# TODO(tsileo): fetch the URL for Actor profile, type=mention
tags_href.add(h)
links = set()
soup = BeautifulSoup(note['content'])
for link in soup.find_all('a'):
h = link.get('href')
if h.startswith('http') and h not in tags_href and is_url_valid(h):
links.add(h)
return links
def fetch_og_metadata(user_agent, col, remote_id):
doc = col.find_one({'remote_id': remote_id})
if not doc:
raise ValueError
note = doc['activity']['object']
print(note)
links = links_from_note(note)
if not links:
return 0
# FIXME(tsileo): set the user agent by giving HTML directly to OpenGraph
htmls = []
for l in links:
r = requests.get(l, headers={'User-Agent': user_agent})
r.raise_for_status()
htmls.append(r.text)
links_og_metadata = [dict(opengraph.OpenGraph(html=html)) for html in htmls]
col.update_one({'remote_id': remote_id}, {'$set': {'meta.og_metadata': links_og_metadata}})
return len(links)

27
utils/urlutils.py Normal file
View File

@@ -0,0 +1,27 @@
import logging
import socket
import ipaddress
from urllib.parse import urlparse
logger = logging.getLogger(__name__)
def is_url_valid(url):
parsed = urlparse(url)
if parsed.scheme not in ['http', 'https']:
return False
if parsed.hostname in ['localhost']:
return False
try:
ip_address = socket.getaddrinfo(parsed.hostname, parsed.port or 80)[0][4][0]
except socket.gaierror:
logger.exception(f'failed to lookup url {url}')
return False
if ipaddress.ip_address(ip_address).is_private:
logger.info(f'rejecting private URL {url}')
return False
return True

63
utils/webfinger.py Normal file
View File

@@ -0,0 +1,63 @@
from typing import Optional
from urllib.parse import urlparse
import requests
def get_remote_follow_template(resource: str) -> Optional[str]:
"""Mastodon-like WebFinger resolution to retrieve the activity stream Actor URL.
Returns:
the Actor URL or None if the resolution failed.
"""
if resource.startswith('http'):
host = urlparse(resource).netloc
else:
if resource.startswith('acct:'):
resource = resource[5:]
if resource.startswith('@'):
resource = resource[1:]
_, host = resource.split('@', 1)
resource='acct:'+resource
resp = requests.get(
f'https://{host}/.well-known/webfinger',
{'resource': resource}
)
print(resp, resp.request.url)
if resp.status_code == 404:
return None
resp.raise_for_status()
data = resp.json()
for link in data['links']:
if link.get('rel') == 'http://ostatus.org/schema/1.0/subscribe':
return link.get('template')
return None
def get_actor_url(resource: str) -> Optional[str]:
"""Mastodon-like WebFinger resolution to retrieve the activity stream Actor URL.
Returns:
the Actor URL or None if the resolution failed.
"""
if resource.startswith('http'):
host = urlparse(resource).netloc
else:
if resource.startswith('acct:'):
resource = resource[5:]
if resource.startswith('@'):
resource = resource[1:]
_, host = resource.split('@', 1)
resource='acct:'+resource
resp = requests.get(
f'https://{host}/.well-known/webfinger',
{'resource': resource}
)
print(resp, resp.request.url)
if resp.status_code == 404:
return None
resp.raise_for_status()
data = resp.json()
for link in data['links']:
if link.get('rel') == 'self' and link.get('type') == 'application/activity+json':
return link.get('href')
return None