improve auto-curate #1

Merged
cmo merged 1 commit from improve_auto_curate into main 2025-12-05 10:01:39 +00:00
3 changed files with 65 additions and 16 deletions

View file

@ -48,20 +48,31 @@ The tool reads a token from the environment variable `DUMPTHINGS_TOKEN` if set.
Move records from inboxes into the curated part of a collection.
```
usage: auto-curate [-h] [--exclude [EXCLUDE ...]] [--list-labels] [--list-only] base_url collection
usage: auto_curate [-h] [--destination-base-url DEST_SERVICE_URL] [--destination-collection DEST_COLLECTION] [--destination-token DEST_TOKEN] [--exclude [EXCLUDE ...]] [--list-labels] [--list-only] [-p PID]
SOURCE_SERVICE_URL SOURCE_COLLECTION
Automatically move records from the incoming areas of a collection to the curated area of the same collection, or to the incoming area of another collection.
positional arguments:
base_url
collection
SOURCE_SERVICE_URL
SOURCE_COLLECTION
options:
-h, --help show this help message and exit
--exclude, -e [EXCLUDE ...] dont' move records from inbox with label EXCLUDE
--list-labels, -l list all inbox labels, don't move any records
--list-only, -o list all records in all inboxes, don't move any record
--destination-base-url DEST_SERVICE_URL
select a different dump-thing-service, i.e. not SOURCE_SERVICE_URL, as destination for auto-curated records
--destination-collection DEST_COLLECTION
select a different collection, i.e. not the SOURCE_COLLECTION of SOURCE_SERVICE_URL, as destination for auto-curated records
--destination-token DEST_TOKEN
if provided, this token will be used for the destination service, otherwise ${CURATOR_TOKEN} will be used
--exclude, -e [EXCLUDE ...]
exclude an inbox on the source collection
--list-labels, -l
--list-only, -o
-p, --pid PID if provided, process only records that match the given PIDs. NOTE: matching does not involve CURIE-resolution!
```
`auto-curate` requires that the environment variable `CURATOR_TOKEN` is set, and contains a valid curator-token
`auto-curate` requires that the environment variable `CURATOR_TOKEN` is set, and contains a valid curator-token.
#### build-local-triple-store

View file

@ -1 +1 @@
__version__ = '0.1.0'
__version__ = '0.2.0'

View file

@ -16,20 +16,54 @@ from triple_tools.communicate import (
def main():
argument_parser = argparse.ArgumentParser()
argument_parser.add_argument('base_url')
argument_parser.add_argument('collection')
argument_parser.add_argument('--exclude', '-e', nargs='*', default=[])
argument_parser = argparse.ArgumentParser(
prog='auto_curate',
description="""
Automatically move records from the incoming areas of a
collection to the curated area of the same collection, or to
the incoming area of another collection.
"""
)
argument_parser.add_argument('base_url', metavar='SOURCE_SERVICE_URL')
argument_parser.add_argument('collection', metavar='SOURCE_COLLECTION')
argument_parser.add_argument(
'--destination-base-url',
default=None,
metavar='DEST_SERVICE_URL',
help='select a different dump-thing-service, i.e. not SOURCE_SERVICE_URL, as destination for auto-curated records',
)
argument_parser.add_argument(
'--destination-collection',
default=None,
metavar='DEST_COLLECTION',
help='select a different collection, i.e. not the SOURCE_COLLECTION of SOURCE_SERVICE_URL, as destination for auto-curated records',
),
argument_parser.add_argument(
'--destination-token',
default=None,
metavar='DEST_TOKEN',
help='if provided, this token will be used for the destination service, otherwise ${CURATOR_TOKEN} will be used',
)
argument_parser.add_argument('--exclude', '-e', nargs='*', default=[], help='exclude an inbox on the source collection')
argument_parser.add_argument('--list-labels', '-l', action='store_true')
argument_parser.add_argument('--list-only', '-o', action='store_true')
argument_parser.add_argument(
'-p', '--pid', action='append',
help='if provided, process only records that match the given PIDs. NOTE: matching does not involve CURIE-resolution!',
)
arguments = argument_parser.parse_args()
print(arguments)
curator_token = os.environ.get('CURATOR_TOKEN')
if curator_token is None:
print('ERROR: CURATOR_TOKEN not set', file=sys.stderr, flush=True)
return 1
destination_url = arguments.destination_base_url or arguments.base_url
destination_collection = arguments.destination_collection or arguments.collection
destination_token = arguments.destination_token or curator_token
for label in get_labels(
url_base=arguments.base_url,
collection=arguments.collection,
@ -48,6 +82,10 @@ def main():
label=label,
token=curator_token
):
if arguments.pid:
if record['pid'] not in arguments.pid:
continue
if arguments.list_only:
print(f'{label}:\t{record}')
continue
@ -55,8 +93,8 @@ def main():
class_name = re.search('([_A-Za-z0-9]*$)', record['schema_type']).group(0)
# Store record in collection
post_to_url(
f'{arguments.base_url}/{arguments.collection}/curated/record/{class_name}',
token=curator_token,
f'{destination_url}/{destination_collection}/curated/record/{class_name}',
token=destination_token,
content=record,
)