Initial import
This commit is contained in:
0
utils/__init__.py
Normal file
0
utils/__init__.py
Normal file
72
utils/actor_service.py
Normal file
72
utils/actor_service.py
Normal file
@@ -0,0 +1,72 @@
|
||||
import logging
|
||||
|
||||
import requests
|
||||
from urllib.parse import urlparse
|
||||
from Crypto.PublicKey import RSA
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class NotAnActorError(Exception):
|
||||
def __init__(self, activity):
|
||||
self.activity = activity
|
||||
|
||||
|
||||
class ActorService(object):
|
||||
def __init__(self, user_agent, col, actor_id, actor_data, instances):
|
||||
logger.debug(f'Initializing ActorService user_agent={user_agent}')
|
||||
self._user_agent = user_agent
|
||||
self._col = col
|
||||
self._in_mem = {actor_id: actor_data}
|
||||
self._instances = instances
|
||||
self._known_instances = set()
|
||||
|
||||
def _fetch(self, actor_url):
|
||||
logger.debug(f'fetching remote object {actor_url}')
|
||||
resp = requests.get(actor_url, headers={
|
||||
'Accept': 'application/activity+json',
|
||||
'User-Agent': self._user_agent,
|
||||
})
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
def get(self, actor_url, reload_cache=False):
|
||||
logger.info(f'get actor {actor_url} (reload_cache={reload_cache})')
|
||||
|
||||
if actor_url in self._in_mem:
|
||||
return self._in_mem[actor_url]
|
||||
|
||||
instance = urlparse(actor_url)._replace(path='', query='', fragment='').geturl()
|
||||
if instance not in self._known_instances:
|
||||
self._known_instances.add(instance)
|
||||
if not self._instances.find_one({'instance': instance}):
|
||||
self._instances.insert({'instance': instance, 'first_object': actor_url})
|
||||
|
||||
if reload_cache:
|
||||
actor = self._fetch(actor_url)
|
||||
self._in_mem[actor_url] = actor
|
||||
self._col.update({'actor_id': actor_url}, {'$set': {'cached_response': actor}}, upsert=True)
|
||||
return actor
|
||||
|
||||
cached_actor = self._col.find_one({'actor_id': actor_url})
|
||||
if cached_actor:
|
||||
return cached_actor['cached_response']
|
||||
|
||||
actor = self._fetch(actor_url)
|
||||
if not 'type' in actor:
|
||||
raise NotAnActorError(None)
|
||||
if actor['type'] != 'Person':
|
||||
raise NotAnActorError(actor)
|
||||
|
||||
self._col.update({'actor_id': actor_url}, {'$set': {'cached_response': actor}}, upsert=True)
|
||||
self._in_mem[actor_url] = actor
|
||||
return actor
|
||||
|
||||
def get_public_key(self, actor_url, reload_cache=False):
|
||||
profile = self.get(actor_url, reload_cache=reload_cache)
|
||||
pub = profile['publicKey']
|
||||
return pub['id'], RSA.importKey(pub['publicKeyPem'])
|
||||
|
||||
def get_inbox_url(self, actor_url, reload_cache=False):
|
||||
profile = self.get(actor_url, reload_cache=reload_cache)
|
||||
return profile.get('inbox')
|
57
utils/content_helper.py
Normal file
57
utils/content_helper.py
Normal file
@@ -0,0 +1,57 @@
|
||||
import typing
|
||||
import re
|
||||
|
||||
from bleach.linkifier import Linker
|
||||
from markdown import markdown
|
||||
|
||||
from utils.webfinger import get_actor_url
|
||||
from config import USERNAME, BASE_URL, ID
|
||||
from config import ACTOR_SERVICE
|
||||
|
||||
from typing import List, Optional, Tuple, Dict, Any, Union, Type
|
||||
|
||||
|
||||
def set_attrs(attrs, new=False):
|
||||
attrs[(None, u'target')] = u'_blank'
|
||||
attrs[(None, u'class')] = u'external'
|
||||
attrs[(None, u'rel')] = u'noopener'
|
||||
attrs[(None, u'title')] = attrs[(None, u'href')]
|
||||
return attrs
|
||||
|
||||
|
||||
LINKER = Linker(callbacks=[set_attrs])
|
||||
HASHTAG_REGEX = re.compile(r"(#[\d\w\.]+)")
|
||||
MENTION_REGEX = re.compile(r"@[\d\w_.+-]+@[\d\w-]+\.[\d\w\-.]+")
|
||||
|
||||
|
||||
def hashtagify(content: str) -> Tuple[str, List[Dict[str, str]]]:
|
||||
tags = []
|
||||
for hashtag in re.findall(HASHTAG_REGEX, content):
|
||||
tag = hashtag[1:]
|
||||
link = f'<a href="{BASE_URL}/tags/{tag}" class="mention hashtag" rel="tag">#<span>{tag}</span></a>'
|
||||
tags.append(dict(href=f'{BASE_URL}/tags/{tag}', name=hashtag, type='Hashtag'))
|
||||
content = content.replace(hashtag, link)
|
||||
return content, tags
|
||||
|
||||
|
||||
def mentionify(content: str) -> Tuple[str, List[Dict[str, str]]]:
|
||||
tags = []
|
||||
for mention in re.findall(MENTION_REGEX, content):
|
||||
_, username, domain = mention.split('@')
|
||||
actor_url = get_actor_url(mention)
|
||||
p = ACTOR_SERVICE.get(actor_url)
|
||||
tags.append(dict(type='Mention', href=p['id'], name=mention))
|
||||
link = f'<span class="h-card"><a href="{p.url}" class="u-url mention">@<span>{username}</span></a></span>'
|
||||
content = content.replace(mention, link)
|
||||
return content, tags
|
||||
|
||||
|
||||
def parse_markdown(content: str) -> Tuple[str, List[Dict[str, str]]]:
|
||||
tags = []
|
||||
content = LINKER.linkify(content)
|
||||
content, hashtag_tags = hashtagify(content)
|
||||
tags.extend(hashtag_tags)
|
||||
content, mention_tags = mentionify(content)
|
||||
tags.extend(mention_tags)
|
||||
content = markdown(content)
|
||||
return content, tags
|
87
utils/httpsig.py
Normal file
87
utils/httpsig.py
Normal file
@@ -0,0 +1,87 @@
|
||||
"""Implements HTTP signature for Flask requests.
|
||||
|
||||
Mastodon instances won't accept requests that are not signed using this scheme.
|
||||
|
||||
"""
|
||||
from datetime import datetime
|
||||
from urllib.parse import urlparse
|
||||
from typing import Any, Dict
|
||||
import base64
|
||||
import hashlib
|
||||
|
||||
from flask import request
|
||||
from requests.auth import AuthBase
|
||||
|
||||
from Crypto.Signature import PKCS1_v1_5
|
||||
from Crypto.Hash import SHA256
|
||||
|
||||
|
||||
def _build_signed_string(signed_headers: str, method: str, path: str, headers: Any, body_digest: str) -> str:
|
||||
out = []
|
||||
for signed_header in signed_headers.split(' '):
|
||||
if signed_header == '(request-target)':
|
||||
out.append('(request-target): '+method.lower()+' '+path)
|
||||
elif signed_header == 'digest':
|
||||
out.append('digest: '+body_digest)
|
||||
else:
|
||||
out.append(signed_header+': '+headers[signed_header])
|
||||
return '\n'.join(out)
|
||||
|
||||
|
||||
def _parse_sig_header(val: str) -> Dict[str, str]:
|
||||
out = {}
|
||||
for data in val.split(','):
|
||||
k, v = data.split('=', 1)
|
||||
out[k] = v[1:len(v)-1]
|
||||
return out
|
||||
|
||||
|
||||
def _verify_h(signed_string, signature, pubkey):
|
||||
signer = PKCS1_v1_5.new(pubkey)
|
||||
digest = SHA256.new()
|
||||
digest.update(signed_string.encode('utf-8'))
|
||||
return signer.verify(digest, signature)
|
||||
|
||||
|
||||
def _body_digest() -> str:
|
||||
h = hashlib.new('sha256')
|
||||
h.update(request.data)
|
||||
return 'SHA-256='+base64.b64encode(h.digest()).decode('utf-8')
|
||||
|
||||
|
||||
def verify_request(actor_service) -> bool:
|
||||
hsig = _parse_sig_header(request.headers.get('Signature'))
|
||||
signed_string = _build_signed_string(hsig['headers'], request.method, request.path, request.headers, _body_digest())
|
||||
_, rk = actor_service.get_public_key(hsig['keyId'])
|
||||
return _verify_h(signed_string, base64.b64decode(hsig['signature']), rk)
|
||||
|
||||
|
||||
class HTTPSigAuth(AuthBase):
|
||||
def __init__(self, keyid, privkey):
|
||||
self.keyid = keyid
|
||||
self.privkey = privkey
|
||||
|
||||
def __call__(self, r):
|
||||
host = urlparse(r.url).netloc
|
||||
bh = hashlib.new('sha256')
|
||||
bh.update(r.body.encode('utf-8'))
|
||||
bodydigest = 'SHA-256='+base64.b64encode(bh.digest()).decode('utf-8')
|
||||
date = datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT')
|
||||
r.headers.update({'Digest': bodydigest, 'Date': date})
|
||||
r.headers.update({'Host': host})
|
||||
sigheaders = '(request-target) user-agent host date digest content-type'
|
||||
to_be_signed = _build_signed_string(sigheaders, r.method, r.path_url, r.headers, bodydigest)
|
||||
signer = PKCS1_v1_5.new(self.privkey)
|
||||
digest = SHA256.new()
|
||||
digest.update(to_be_signed.encode('utf-8'))
|
||||
sig = base64.b64encode(signer.sign(digest))
|
||||
sig = sig.decode('utf-8')
|
||||
headers = {
|
||||
'Signature': 'keyId="{keyid}",algorithm="rsa-sha256",headers="{headers}",signature="{signature}"'.format(
|
||||
keyid=self.keyid,
|
||||
signature=sig,
|
||||
headers=sigheaders,
|
||||
),
|
||||
}
|
||||
r.headers.update(headers)
|
||||
return r
|
39
utils/key.py
Normal file
39
utils/key.py
Normal file
@@ -0,0 +1,39 @@
|
||||
import os
|
||||
import binascii
|
||||
|
||||
from Crypto.PublicKey import RSA
|
||||
|
||||
KEY_DIR = 'config/'
|
||||
|
||||
|
||||
def get_secret_key(name:str) -> str:
|
||||
key_path = f'{KEY_DIR}{name}.key'
|
||||
if not os.path.exists(key_path):
|
||||
k = binascii.hexlify(os.urandom(32)).decode('utf-8')
|
||||
with open(key_path, 'w+') as f:
|
||||
f.write(k)
|
||||
return k
|
||||
|
||||
with open(key_path) as f:
|
||||
return f.read()
|
||||
|
||||
|
||||
class Key(object):
|
||||
def __init__(self, user: str, domain: str, create: bool = True) -> None:
|
||||
user = user.replace('.', '_')
|
||||
domain = domain.replace('.', '_')
|
||||
key_path = f'{KEY_DIR}/key_{user}_{domain}.pem'
|
||||
if os.path.isfile(key_path):
|
||||
with open(key_path) as f:
|
||||
self.privkey_pem = f.read()
|
||||
self.privkey = RSA.importKey(self.privkey_pem)
|
||||
self.pubkey_pem = self.privkey.publickey().exportKey('PEM').decode('utf-8')
|
||||
else:
|
||||
if not create:
|
||||
raise Exception('must init private key first')
|
||||
k = RSA.generate(4096)
|
||||
self.privkey_pem = k.exportKey('PEM').decode('utf-8')
|
||||
self.pubkey_pem = k.publickey().exportKey('PEM').decode('utf-8')
|
||||
with open(key_path, 'w') as f:
|
||||
f.write(self.privkey_pem)
|
||||
self.privkey = k
|
53
utils/linked_data_sig.py
Normal file
53
utils/linked_data_sig.py
Normal file
@@ -0,0 +1,53 @@
|
||||
from pyld import jsonld
|
||||
import hashlib
|
||||
from datetime import datetime
|
||||
|
||||
from Crypto.Signature import PKCS1_v1_5
|
||||
from Crypto.Hash import SHA256
|
||||
import base64
|
||||
|
||||
|
||||
def options_hash(doc):
|
||||
doc = dict(doc['signature'])
|
||||
for k in ['type', 'id', 'signatureValue']:
|
||||
if k in doc:
|
||||
del doc[k]
|
||||
doc['@context'] = 'https://w3id.org/identity/v1'
|
||||
normalized = jsonld.normalize(doc, {'algorithm': 'URDNA2015', 'format': 'application/nquads'})
|
||||
h = hashlib.new('sha256')
|
||||
h.update(normalized.encode('utf-8'))
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def doc_hash(doc):
|
||||
doc = dict(doc)
|
||||
if 'signature' in doc:
|
||||
del doc['signature']
|
||||
normalized = jsonld.normalize(doc, {'algorithm': 'URDNA2015', 'format': 'application/nquads'})
|
||||
h = hashlib.new('sha256')
|
||||
h.update(normalized.encode('utf-8'))
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def verify_signature(doc, pubkey):
|
||||
to_be_signed = options_hash(doc) + doc_hash(doc)
|
||||
signature = doc['signature']['signatureValue']
|
||||
signer = PKCS1_v1_5.new(pubkey)
|
||||
digest = SHA256.new()
|
||||
digest.update(to_be_signed.encode('utf-8'))
|
||||
return signer.verify(digest, base64.b64decode(signature))
|
||||
|
||||
|
||||
def generate_signature(doc, privkey):
|
||||
options = {
|
||||
'type': 'RsaSignature2017',
|
||||
'creator': doc['actor'] + '#main-key',
|
||||
'created': datetime.utcnow().replace(microsecond=0).isoformat() + 'Z',
|
||||
}
|
||||
doc['signature'] = options
|
||||
to_be_signed = options_hash(doc) + doc_hash(doc)
|
||||
signer = PKCS1_v1_5.new(privkey)
|
||||
digest = SHA256.new()
|
||||
digest.update(to_be_signed.encode('utf-8'))
|
||||
sig = base64.b64encode(signer.sign(digest))
|
||||
options['signatureValue'] = sig.decode('utf-8')
|
60
utils/object_service.py
Normal file
60
utils/object_service.py
Normal file
@@ -0,0 +1,60 @@
|
||||
import requests
|
||||
from urllib.parse import urlparse
|
||||
|
||||
|
||||
class ObjectService(object):
|
||||
def __init__(self, user_agent, col, inbox, outbox, instances):
|
||||
self._user_agent = user_agent
|
||||
self._col = col
|
||||
self._inbox = inbox
|
||||
self._outbox = outbox
|
||||
self._instances = instances
|
||||
self._known_instances = set()
|
||||
|
||||
def _fetch_remote(self, object_id):
|
||||
print(f'fetch remote {object_id}')
|
||||
resp = requests.get(object_id, headers={
|
||||
'Accept': 'application/activity+json',
|
||||
'User-Agent': self._user_agent,
|
||||
})
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
def _fetch(self, object_id):
|
||||
instance = urlparse(object_id)._replace(path='', query='', fragment='').geturl()
|
||||
if instance not in self._known_instances:
|
||||
self._known_instances.add(instance)
|
||||
if not self._instances.find_one({'instance': instance}):
|
||||
self._instances.insert({'instance': instance, 'first_object': object_id})
|
||||
|
||||
obj = self._inbox.find_one({'$or': [{'remote_id': object_id}, {'type': 'Create', 'activity.object.id': object_id}]})
|
||||
if obj:
|
||||
if obj['remote_id'] == object_id:
|
||||
return obj['activity']
|
||||
return obj['activity']['object']
|
||||
|
||||
obj = self._outbox.find_one({'$or': [{'remote_id': object_id}, {'type': 'Create', 'activity.object.id': object_id}]})
|
||||
if obj:
|
||||
if obj['remote_id'] == object_id:
|
||||
return obj['activity']
|
||||
return obj['activity']['object']
|
||||
|
||||
return self._fetch_remote(object_id)
|
||||
|
||||
def get(self, object_id, reload_cache=False, part_of_stream=False, announce_published=None):
|
||||
if reload_cache:
|
||||
obj = self._fetch(object_id)
|
||||
self._col.update({'object_id': object_id}, {'$set': {'cached_object': obj, 'meta.part_of_stream': part_of_stream, 'meta.announce_published': announce_published}}, upsert=True)
|
||||
return obj
|
||||
|
||||
cached_object = self._col.find_one({'object_id': object_id})
|
||||
if cached_object:
|
||||
print(f'ObjectService: {cached_object}')
|
||||
return cached_object['cached_object']
|
||||
|
||||
obj = self._fetch(object_id)
|
||||
|
||||
self._col.update({'object_id': object_id}, {'$set': {'cached_object': obj, 'meta.part_of_stream': part_of_stream, 'meta.announce_published': announce_published}}, upsert=True)
|
||||
# print(f'ObjectService: {obj}')
|
||||
|
||||
return obj
|
46
utils/opengraph.py
Normal file
46
utils/opengraph.py
Normal file
@@ -0,0 +1,46 @@
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import ipaddress
|
||||
import opengraph
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from .urlutils import is_url_valid
|
||||
|
||||
|
||||
def links_from_note(note):
|
||||
tags_href= set()
|
||||
for t in note.get('tag', []):
|
||||
h = t.get('href')
|
||||
if h:
|
||||
# TODO(tsileo): fetch the URL for Actor profile, type=mention
|
||||
tags_href.add(h)
|
||||
|
||||
links = set()
|
||||
soup = BeautifulSoup(note['content'])
|
||||
for link in soup.find_all('a'):
|
||||
h = link.get('href')
|
||||
if h.startswith('http') and h not in tags_href and is_url_valid(h):
|
||||
links.add(h)
|
||||
|
||||
return links
|
||||
|
||||
|
||||
def fetch_og_metadata(user_agent, col, remote_id):
|
||||
doc = col.find_one({'remote_id': remote_id})
|
||||
if not doc:
|
||||
raise ValueError
|
||||
note = doc['activity']['object']
|
||||
print(note)
|
||||
links = links_from_note(note)
|
||||
if not links:
|
||||
return 0
|
||||
# FIXME(tsileo): set the user agent by giving HTML directly to OpenGraph
|
||||
htmls = []
|
||||
for l in links:
|
||||
r = requests.get(l, headers={'User-Agent': user_agent})
|
||||
r.raise_for_status()
|
||||
htmls.append(r.text)
|
||||
links_og_metadata = [dict(opengraph.OpenGraph(html=html)) for html in htmls]
|
||||
col.update_one({'remote_id': remote_id}, {'$set': {'meta.og_metadata': links_og_metadata}})
|
||||
return len(links)
|
27
utils/urlutils.py
Normal file
27
utils/urlutils.py
Normal file
@@ -0,0 +1,27 @@
|
||||
import logging
|
||||
import socket
|
||||
import ipaddress
|
||||
from urllib.parse import urlparse
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def is_url_valid(url):
|
||||
parsed = urlparse(url)
|
||||
if parsed.scheme not in ['http', 'https']:
|
||||
return False
|
||||
|
||||
if parsed.hostname in ['localhost']:
|
||||
return False
|
||||
|
||||
try:
|
||||
ip_address = socket.getaddrinfo(parsed.hostname, parsed.port or 80)[0][4][0]
|
||||
except socket.gaierror:
|
||||
logger.exception(f'failed to lookup url {url}')
|
||||
return False
|
||||
|
||||
if ipaddress.ip_address(ip_address).is_private:
|
||||
logger.info(f'rejecting private URL {url}')
|
||||
return False
|
||||
|
||||
return True
|
63
utils/webfinger.py
Normal file
63
utils/webfinger.py
Normal file
@@ -0,0 +1,63 @@
|
||||
from typing import Optional
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
|
||||
def get_remote_follow_template(resource: str) -> Optional[str]:
|
||||
"""Mastodon-like WebFinger resolution to retrieve the activity stream Actor URL.
|
||||
|
||||
Returns:
|
||||
the Actor URL or None if the resolution failed.
|
||||
"""
|
||||
if resource.startswith('http'):
|
||||
host = urlparse(resource).netloc
|
||||
else:
|
||||
if resource.startswith('acct:'):
|
||||
resource = resource[5:]
|
||||
if resource.startswith('@'):
|
||||
resource = resource[1:]
|
||||
_, host = resource.split('@', 1)
|
||||
resource='acct:'+resource
|
||||
resp = requests.get(
|
||||
f'https://{host}/.well-known/webfinger',
|
||||
{'resource': resource}
|
||||
)
|
||||
print(resp, resp.request.url)
|
||||
if resp.status_code == 404:
|
||||
return None
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
for link in data['links']:
|
||||
if link.get('rel') == 'http://ostatus.org/schema/1.0/subscribe':
|
||||
return link.get('template')
|
||||
return None
|
||||
|
||||
|
||||
def get_actor_url(resource: str) -> Optional[str]:
|
||||
"""Mastodon-like WebFinger resolution to retrieve the activity stream Actor URL.
|
||||
|
||||
Returns:
|
||||
the Actor URL or None if the resolution failed.
|
||||
"""
|
||||
if resource.startswith('http'):
|
||||
host = urlparse(resource).netloc
|
||||
else:
|
||||
if resource.startswith('acct:'):
|
||||
resource = resource[5:]
|
||||
if resource.startswith('@'):
|
||||
resource = resource[1:]
|
||||
_, host = resource.split('@', 1)
|
||||
resource='acct:'+resource
|
||||
resp = requests.get(
|
||||
f'https://{host}/.well-known/webfinger',
|
||||
{'resource': resource}
|
||||
)
|
||||
print(resp, resp.request.url)
|
||||
if resp.status_code == 404:
|
||||
return None
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
for link in data['links']:
|
||||
if link.get('rel') == 'self' and link.get('type') == 'application/activity+json':
|
||||
return link.get('href')
|
||||
return None
|
Reference in New Issue
Block a user