Fix OG parsing
This commit is contained in:
@@ -74,7 +74,9 @@ def fetch_og_metadata(user_agent, links):
|
|||||||
logger.debug(f"failed to HEAD {l}: {err!r}")
|
logger.debug(f"failed to HEAD {l}: {err!r}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if not h.headers.get("content-type").startswith("text/html"):
|
if h.headers.get("content-type") and not h.headers.get(
|
||||||
|
"content-type"
|
||||||
|
).startswith("text/html"):
|
||||||
logger.debug(f"skipping {l} for bad content type")
|
logger.debug(f"skipping {l} for bad content type")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@@ -92,6 +94,12 @@ def fetch_og_metadata(user_agent, links):
|
|||||||
logger.debug(f"failed to GET {l}: {err!r}")
|
logger.debug(f"failed to GET {l}: {err!r}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# FIXME(tsileo): check mimetype via the URL too (like we do for images)
|
||||||
|
if not r.headers.get("content-type") or not r.headers.get(
|
||||||
|
"content-type"
|
||||||
|
).startswith("text/html"):
|
||||||
|
continue
|
||||||
|
|
||||||
r.encoding = "UTF-8"
|
r.encoding = "UTF-8"
|
||||||
html = r.text
|
html = r.text
|
||||||
try:
|
try:
|
||||||
|
Reference in New Issue
Block a user