264 lines
No EOL
10 KiB
Python
264 lines
No EOL
10 KiB
Python
# /// script
|
|
# requires-python = ">=3.12"
|
|
# dependencies = [
|
|
# "dump-things-pyclient @ https://hub.psychoinformatics.de/datalink/dump-things-pyclient.git",
|
|
# "icalendar",
|
|
# "rich",
|
|
# "rich-click",
|
|
# ]
|
|
# ///
|
|
from icalendar import Calendar
|
|
from os import environ
|
|
import urllib.request
|
|
import rich_click as click
|
|
from dump_things_pyclient.communicate import (
|
|
collection_write_record,
|
|
collection_read_record_with_pid,
|
|
)
|
|
|
|
# example target format:
|
|
# https://hedgedoc.psychoinformatics.de/3cSouq0YSJ6m64_ArWpJEg?edit
|
|
# TODO:
|
|
# - create merit-based award in psyinf pool, add pid here
|
|
# - create record for this script in pool, add pid here
|
|
|
|
|
|
urls = {
|
|
'juniorgroup': 'https://webmail.fz-juelich.de/owa/calendar/d61ec0ce8d704cb293df97fbb3c8fe23@fz-juelich.de/2e3e5a7baff44a3780cb2f872bef77b07381212461382934376/calendar.ics',
|
|
'funding': 'https://webmail.fz-juelich.de/owa/calendar/d61ec0ce8d704cb293df97fbb3c8fe23@fz-juelich.de/a7ad1dee32cf49749872136f3a9223191125633822045414026/calendar.ics',
|
|
'award': 'https://webmail.fz-juelich.de/owa/calendar/d61ec0ce8d704cb293df97fbb3c8fe23@fz-juelich.de/2f6c8a7e45e5416f89c22258bba6ae0114099503221118509202/calendar.ics'
|
|
}
|
|
|
|
competition_types = {'funding': "xyzrins:competition-types/4e49ac7d-d6da-4131-806b-6425491e26fd",
|
|
'award': "xyzrins:competition-types/16e38ddd-6323-4fc7-abad-398609bf8541", # pid does not yet exist
|
|
'juniorgroup': "xyzrins:competition-types/4e49ac7d-d6da-4131-806b-6425491e26fd"}
|
|
|
|
|
|
def get_calender(url: str) -> dict:
|
|
with urllib.request.urlopen(url) as f:
|
|
calsource = f.read().decode('utf-8')
|
|
cal = Calendar.from_ical(calsource)
|
|
return cal
|
|
|
|
|
|
def create_competitions(pool: str = 'https://pool.psychoinformatics.de/api',
|
|
collection: str = 'public',
|
|
) -> None:
|
|
for competition_type, url in urls.items():
|
|
cal = get_calender(url)
|
|
competition_type = competition_types[competition_type]
|
|
for event in cal.events:
|
|
pid = 'xyzrins:competition/' + event.get("UID").ical_value
|
|
old_rec = _check_existing_record(pool, collection, pid)
|
|
if old_rec:
|
|
do_not_edit = _check_for_immutable_infos(old_rec)
|
|
tmpl = old_rec
|
|
else:
|
|
print(f"New record: {pid}")
|
|
tmpl = {'schema_type': 'xyzri:XYZCompetition'}
|
|
do_not_edit = []
|
|
record = _assemble_record(pid=pid,
|
|
info=event,
|
|
competition_type=competition_type,
|
|
do_not_edit=do_not_edit,
|
|
tmpl=tmpl,
|
|
calurl=url,
|
|
old_rec=old_rec)
|
|
if record is not None:
|
|
_upload_records(record, pool)
|
|
return
|
|
|
|
|
|
def _check_existing_record(pool: str,
|
|
collection: str,
|
|
pid: str) -> dict | None:
|
|
record = collection_read_record_with_pid(
|
|
service_url=pool,
|
|
collection=collection,
|
|
pid=pid,
|
|
token=environ['DTC_TOKEN']
|
|
)
|
|
return record
|
|
|
|
|
|
def _check_for_immutable_infos(rec: dict) -> list:
|
|
# get a list of all slots in the record. Treat those without an
|
|
# attribute about machine-generation as immutable
|
|
generated_infos = \
|
|
[dict['value'] for dict in rec['attributes'][0]['attributes'] \
|
|
if 'importedFrom' in rec['attributes'][0]['predicate']]
|
|
# don't touch keys if they don't have a machine-generated annotation
|
|
do_not_edit = [key for key in rec.keys() if key not in generated_infos]
|
|
return do_not_edit
|
|
|
|
|
|
def _extend_or_add(tmpl: dict,
|
|
key: str,
|
|
value: str | dict,
|
|
do_not_edit: list,
|
|
generated_infos: list,
|
|
generated: str | None = None,
|
|
is_list: bool = False
|
|
) -> (dict, list):
|
|
# If a key already exists in the template, update it. Otherwise,
|
|
# create it. However, do not touch the template if the key is not
|
|
# to be updated. Keep a record of machine-annotated items (generated) in
|
|
# generated_infos. Generated is a list of strings, which serve as
|
|
# flexible descriptions. E.g. "associated-with-funder"
|
|
if generated in do_not_edit:
|
|
return tmpl, generated_infos
|
|
if tmpl.get(key, None) is not None:
|
|
if type(tmpl[key]) == str:
|
|
# single value, just update
|
|
tmpl[key] = value
|
|
# there already is content, add to it
|
|
elif is_list:
|
|
tmpl[key].extend(value)
|
|
else:
|
|
tmpl[key].update(value)
|
|
else:
|
|
# key does not yet exist
|
|
if is_list:
|
|
tmpl[key] = [value]
|
|
else:
|
|
tmpl[key] = value
|
|
if generated is not None:
|
|
generated_infos.append(generated)
|
|
return tmpl, generated_infos
|
|
|
|
|
|
def _add_machine_prov(tmpl: dict,
|
|
generated_infos: list,
|
|
calurl: str,
|
|
scriptpid: str = "xyzrins:instruments/54be0232-d05e-4941-bfba-70716cfd6b05", # pid does not yet exist
|
|
) -> dict:
|
|
prov = {'attributes': [
|
|
{'predicate': 'http://purl.org/pav/importedFrom',
|
|
'value': calurl,
|
|
'attributes': []
|
|
}]}
|
|
for value in generated_infos:
|
|
new = {'predicate': 'prov:generated',
|
|
'value': value,
|
|
'characterized_by': [{
|
|
'predicate': 'prov:generated_by',
|
|
'object': scriptpid
|
|
}]}
|
|
prov['attributes'][0]['attributes'].append(new)
|
|
tmpl.update(prov)
|
|
return tmpl
|
|
|
|
|
|
def _assemble_record(pid: str,
|
|
info: dict,
|
|
competition_type: str,
|
|
do_not_edit: list,
|
|
tmpl: dict,
|
|
calurl: str,
|
|
old_rec: str | None = None,
|
|
) -> dict | None:
|
|
# Take a template (tmpl) and calendar (info), and write calendar
|
|
# information into the template.
|
|
# keep a list of machine-written infos
|
|
generated_infos = []
|
|
tmpl["pid"] = pid
|
|
tmpl["kind"] = competition_type
|
|
#funder, homepage = _find_funder(info)
|
|
#if funder:
|
|
# assoc = {'object': f"{funder}", #funder would need to be a valid object
|
|
# 'roles': ['marcrel:fnd']}
|
|
# tmpl, generated_infos = _extend_or_add(tmpl,
|
|
# "associated_with",
|
|
# assoc,
|
|
# do_not_edit,
|
|
# generated_infos,
|
|
# 'associated-with-funder',
|
|
# is_list=True)
|
|
#if homepage:
|
|
# homepage = {'predicate': 'foaf:homepage',
|
|
# 'value': f'{homepage}'}
|
|
# tmpl, generated_infos = _extend_or_add(tmpl,
|
|
# "attributes",
|
|
# homepage,
|
|
# do_not_edit,
|
|
# generated_infos,
|
|
# 'attribute-homepage',
|
|
# is_list=True)
|
|
tmpl, generated_infos = _extend_or_add(tmpl,
|
|
"application_deadline",
|
|
info.get("DTSTART").td.isoformat(),
|
|
do_not_edit,
|
|
generated_infos,
|
|
'deadline')
|
|
tmpl, generated_infos = _extend_or_add(tmpl,
|
|
'title',
|
|
info.get("SUMMARY").ical_value,
|
|
do_not_edit,
|
|
generated_infos,
|
|
'title')
|
|
# description needs to be stripped from newlines to be valid
|
|
desc = info.get("DESCRIPTION").ical_value.replace('\n', ' ')
|
|
if 'Please note' in desc:
|
|
desc = desc.split('Please note')[0]
|
|
tmpl, generated_infos = _extend_or_add(tmpl,
|
|
'description',
|
|
desc,
|
|
do_not_edit,
|
|
generated_infos,
|
|
'description')
|
|
# if the record already existed, check if anything changed
|
|
if old_rec is not None:
|
|
if old_rec == tmpl:
|
|
print(f"No change for existing cecord with PID {pid}.")
|
|
return None
|
|
tmpl = _add_machine_prov(tmpl, generated_infos, calurl)
|
|
return tmpl
|
|
|
|
|
|
def _find_funder(info: dict) -> (str, str):
|
|
funder = None
|
|
homepage = None
|
|
for line in info.get("DESCRIPTION").splitlines():
|
|
if line.startswith("Information Funding Organization: "):
|
|
# strip "Information Funding Organization: ", split url from name
|
|
funder, homepage = line[34:].split('<')
|
|
homepage = homepage.strip('>')
|
|
return funder, homepage
|
|
|
|
|
|
def _upload_records(record: dict,
|
|
pool: str = 'https://pool.psychoinformatics.de/api',
|
|
):
|
|
print(f"submitting record with pid {record['pid']}...")
|
|
collection_write_record(
|
|
service_url=pool,
|
|
collection='public',
|
|
class_name='XYZCompetition',
|
|
record=record,
|
|
format='json',
|
|
token=environ['DTC_TOKEN']
|
|
)
|
|
return
|
|
|
|
|
|
@click.command()
|
|
@click.option('--dtc-api-url', '-a', default='https://pool.psychoinformatics.de/api')
|
|
@click.option('--dtc-collection', '-c', default='public')
|
|
def main(
|
|
dtc_api_url: str = 'https://pool.psychoinformatics.de/api',
|
|
dtc_collection: str = 'public',
|
|
) -> None:
|
|
"""
|
|
Scrape the outlook funding calendars of the FZJ and write events as
|
|
Competition records into the knowledge pool at --dtc-api-url, into the
|
|
collection determined by --dtc-collection.
|
|
"""
|
|
if environ.get('DTC_TOKEN', None) is None:
|
|
print("DTC_TOKEN required in environment! Aborting.")
|
|
return
|
|
create_competitions(pool=dtc_api_url,
|
|
collection=dtc_collection)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main() |