Fix OG parsing
This commit is contained in:
@@ -74,7 +74,9 @@ def fetch_og_metadata(user_agent, links):
|
||||
logger.debug(f"failed to HEAD {l}: {err!r}")
|
||||
continue
|
||||
|
||||
if not h.headers.get("content-type").startswith("text/html"):
|
||||
if h.headers.get("content-type") and not h.headers.get(
|
||||
"content-type"
|
||||
).startswith("text/html"):
|
||||
logger.debug(f"skipping {l} for bad content type")
|
||||
continue
|
||||
|
||||
@@ -92,6 +94,12 @@ def fetch_og_metadata(user_agent, links):
|
||||
logger.debug(f"failed to GET {l}: {err!r}")
|
||||
continue
|
||||
|
||||
# FIXME(tsileo): check mimetype via the URL too (like we do for images)
|
||||
if not r.headers.get("content-type") or not r.headers.get(
|
||||
"content-type"
|
||||
).startswith("text/html"):
|
||||
continue
|
||||
|
||||
r.encoding = "UTF-8"
|
||||
html = r.text
|
||||
try:
|
||||
|
Reference in New Issue
Block a user