www-from-model/code/get_person_depiction_urls.py
Stephan Heunis e3e8a96285
All checks were successful
Deploy on webserver / Build site and deploy on success (push) Successful in 37s
- switch to dtc get-records
- Update depictions workflow to use inject-links-pid and updated jq logic
- rather use a new python script to filter person-depiction-distribution-urls
- ignore addurl failures
2026-03-03 23:44:02 +01:00

45 lines
No EOL
1.3 KiB
Python
Executable file

#!/usr/bin/env python3
"""Generate person-depiction-distribution-urls
"""
import sys
import json
from urllib.parse import urlparse, unquote
from pathlib import Path
def get_extension(url):
"""
Extract the file extension from a URL.
Ignores query parameters and fragments.
"""
path = urlparse(url).path
return Path(unquote(path)).suffix.lstrip('.')
for line in sys.stdin:
line = line.strip()
if not line:
continue
try:
person = json.loads(line)
except json.JSONDecodeError:
# skip invalid JSON
continue
pid = person.get("pid")
if not pid:
continue
curie_ref = pid.split(":", 1)[-1]
depictions = person.get("depictions", [])
for dep in depictions:
if dep.get("kind") != "xyzrins:depiction-types/e9a34f7d-d05e-4591-bb45-f8a0c499e07b":
continue
distributions = dep.get("distributions", [])
for dist in distributions:
for char in dist.get("characterized_by", []):
if char.get("predicate") != "dcat:downloadUrl":
continue
url = char.get("object")
if not url:
continue
ext = get_extension(url)
print(f"{curie_ref}\t{ext}\t{url}")