improve auto-curate #1

Merged
cmo merged 1 commit from improve_auto_curate into main 2025-12-05 10:01:39 +00:00
3 changed files with 65 additions and 16 deletions

View file

@ -48,20 +48,31 @@ The tool reads a token from the environment variable `DUMPTHINGS_TOKEN` if set.
Move records from inboxes into the curated part of a collection. Move records from inboxes into the curated part of a collection.
``` ```
usage: auto-curate [-h] [--exclude [EXCLUDE ...]] [--list-labels] [--list-only] base_url collection usage: auto_curate [-h] [--destination-base-url DEST_SERVICE_URL] [--destination-collection DEST_COLLECTION] [--destination-token DEST_TOKEN] [--exclude [EXCLUDE ...]] [--list-labels] [--list-only] [-p PID]
SOURCE_SERVICE_URL SOURCE_COLLECTION
Automatically move records from the incoming areas of a collection to the curated area of the same collection, or to the incoming area of another collection.
positional arguments: positional arguments:
base_url SOURCE_SERVICE_URL
collection SOURCE_COLLECTION
options: options:
-h, --help show this help message and exit -h, --help show this help message and exit
--exclude, -e [EXCLUDE ...] dont' move records from inbox with label EXCLUDE --destination-base-url DEST_SERVICE_URL
--list-labels, -l list all inbox labels, don't move any records select a different dump-thing-service, i.e. not SOURCE_SERVICE_URL, as destination for auto-curated records
--list-only, -o list all records in all inboxes, don't move any record --destination-collection DEST_COLLECTION
select a different collection, i.e. not the SOURCE_COLLECTION of SOURCE_SERVICE_URL, as destination for auto-curated records
--destination-token DEST_TOKEN
if provided, this token will be used for the destination service, otherwise ${CURATOR_TOKEN} will be used
--exclude, -e [EXCLUDE ...]
exclude an inbox on the source collection
--list-labels, -l
--list-only, -o
-p, --pid PID if provided, process only records that match the given PIDs. NOTE: matching does not involve CURIE-resolution!
``` ```
`auto-curate` requires that the environment variable `CURATOR_TOKEN` is set, and contains a valid curator-token `auto-curate` requires that the environment variable `CURATOR_TOKEN` is set, and contains a valid curator-token.
#### build-local-triple-store #### build-local-triple-store

View file

@ -1 +1 @@
__version__ = '0.1.0' __version__ = '0.2.0'

View file

@ -16,20 +16,54 @@ from triple_tools.communicate import (
def main(): def main():
argument_parser = argparse.ArgumentParser() argument_parser = argparse.ArgumentParser(
argument_parser.add_argument('base_url') prog='auto_curate',
argument_parser.add_argument('collection') description="""
argument_parser.add_argument('--exclude', '-e', nargs='*', default=[]) Automatically move records from the incoming areas of a
collection to the curated area of the same collection, or to
the incoming area of another collection.
"""
)
argument_parser.add_argument('base_url', metavar='SOURCE_SERVICE_URL')
argument_parser.add_argument('collection', metavar='SOURCE_COLLECTION')
argument_parser.add_argument(
'--destination-base-url',
default=None,
metavar='DEST_SERVICE_URL',
help='select a different dump-thing-service, i.e. not SOURCE_SERVICE_URL, as destination for auto-curated records',
)
argument_parser.add_argument(
'--destination-collection',
default=None,
metavar='DEST_COLLECTION',
help='select a different collection, i.e. not the SOURCE_COLLECTION of SOURCE_SERVICE_URL, as destination for auto-curated records',
),
argument_parser.add_argument(
'--destination-token',
default=None,
metavar='DEST_TOKEN',
help='if provided, this token will be used for the destination service, otherwise ${CURATOR_TOKEN} will be used',
)
argument_parser.add_argument('--exclude', '-e', nargs='*', default=[], help='exclude an inbox on the source collection')
argument_parser.add_argument('--list-labels', '-l', action='store_true') argument_parser.add_argument('--list-labels', '-l', action='store_true')
argument_parser.add_argument('--list-only', '-o', action='store_true') argument_parser.add_argument('--list-only', '-o', action='store_true')
argument_parser.add_argument(
'-p', '--pid', action='append',
help='if provided, process only records that match the given PIDs. NOTE: matching does not involve CURIE-resolution!',
)
arguments = argument_parser.parse_args() arguments = argument_parser.parse_args()
print(arguments)
curator_token = os.environ.get('CURATOR_TOKEN') curator_token = os.environ.get('CURATOR_TOKEN')
if curator_token is None: if curator_token is None:
print('ERROR: CURATOR_TOKEN not set', file=sys.stderr, flush=True) print('ERROR: CURATOR_TOKEN not set', file=sys.stderr, flush=True)
return 1 return 1
destination_url = arguments.destination_base_url or arguments.base_url
destination_collection = arguments.destination_collection or arguments.collection
destination_token = arguments.destination_token or curator_token
for label in get_labels( for label in get_labels(
url_base=arguments.base_url, url_base=arguments.base_url,
collection=arguments.collection, collection=arguments.collection,
@ -48,6 +82,10 @@ def main():
label=label, label=label,
token=curator_token token=curator_token
): ):
if arguments.pid:
if record['pid'] not in arguments.pid:
continue
if arguments.list_only: if arguments.list_only:
print(f'{label}:\t{record}') print(f'{label}:\t{record}')
continue continue
@ -55,8 +93,8 @@ def main():
class_name = re.search('([_A-Za-z0-9]*$)', record['schema_type']).group(0) class_name = re.search('([_A-Za-z0-9]*$)', record['schema_type']).group(0)
# Store record in collection # Store record in collection
post_to_url( post_to_url(
f'{arguments.base_url}/{arguments.collection}/curated/record/{class_name}', f'{destination_url}/{destination_collection}/curated/record/{class_name}',
token=curator_token, token=destination_token,
content=record, content=record,
) )