improve auto-curate #1
3 changed files with 65 additions and 16 deletions
25
README.md
25
README.md
|
|
@ -48,20 +48,31 @@ The tool reads a token from the environment variable `DUMPTHINGS_TOKEN` if set.
|
||||||
Move records from inboxes into the curated part of a collection.
|
Move records from inboxes into the curated part of a collection.
|
||||||
|
|
||||||
```
|
```
|
||||||
usage: auto-curate [-h] [--exclude [EXCLUDE ...]] [--list-labels] [--list-only] base_url collection
|
usage: auto_curate [-h] [--destination-base-url DEST_SERVICE_URL] [--destination-collection DEST_COLLECTION] [--destination-token DEST_TOKEN] [--exclude [EXCLUDE ...]] [--list-labels] [--list-only] [-p PID]
|
||||||
|
SOURCE_SERVICE_URL SOURCE_COLLECTION
|
||||||
|
|
||||||
|
Automatically move records from the incoming areas of a collection to the curated area of the same collection, or to the incoming area of another collection.
|
||||||
|
|
||||||
positional arguments:
|
positional arguments:
|
||||||
base_url
|
SOURCE_SERVICE_URL
|
||||||
collection
|
SOURCE_COLLECTION
|
||||||
|
|
||||||
options:
|
options:
|
||||||
-h, --help show this help message and exit
|
-h, --help show this help message and exit
|
||||||
--exclude, -e [EXCLUDE ...] dont' move records from inbox with label EXCLUDE
|
--destination-base-url DEST_SERVICE_URL
|
||||||
--list-labels, -l list all inbox labels, don't move any records
|
select a different dump-thing-service, i.e. not SOURCE_SERVICE_URL, as destination for auto-curated records
|
||||||
--list-only, -o list all records in all inboxes, don't move any record
|
--destination-collection DEST_COLLECTION
|
||||||
|
select a different collection, i.e. not the SOURCE_COLLECTION of SOURCE_SERVICE_URL, as destination for auto-curated records
|
||||||
|
--destination-token DEST_TOKEN
|
||||||
|
if provided, this token will be used for the destination service, otherwise ${CURATOR_TOKEN} will be used
|
||||||
|
--exclude, -e [EXCLUDE ...]
|
||||||
|
exclude an inbox on the source collection
|
||||||
|
--list-labels, -l
|
||||||
|
--list-only, -o
|
||||||
|
-p, --pid PID if provided, process only records that match the given PIDs. NOTE: matching does not involve CURIE-resolution!
|
||||||
```
|
```
|
||||||
|
|
||||||
`auto-curate` requires that the environment variable `CURATOR_TOKEN` is set, and contains a valid curator-token
|
`auto-curate` requires that the environment variable `CURATOR_TOKEN` is set, and contains a valid curator-token.
|
||||||
|
|
||||||
|
|
||||||
#### build-local-triple-store
|
#### build-local-triple-store
|
||||||
|
|
|
||||||
|
|
@ -1 +1 @@
|
||||||
__version__ = '0.1.0'
|
__version__ = '0.2.0'
|
||||||
|
|
|
||||||
|
|
@ -16,20 +16,54 @@ from triple_tools.communicate import (
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
argument_parser = argparse.ArgumentParser()
|
argument_parser = argparse.ArgumentParser(
|
||||||
argument_parser.add_argument('base_url')
|
prog='auto_curate',
|
||||||
argument_parser.add_argument('collection')
|
description="""
|
||||||
argument_parser.add_argument('--exclude', '-e', nargs='*', default=[])
|
Automatically move records from the incoming areas of a
|
||||||
|
collection to the curated area of the same collection, or to
|
||||||
|
the incoming area of another collection.
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
argument_parser.add_argument('base_url', metavar='SOURCE_SERVICE_URL')
|
||||||
|
argument_parser.add_argument('collection', metavar='SOURCE_COLLECTION')
|
||||||
|
argument_parser.add_argument(
|
||||||
|
'--destination-base-url',
|
||||||
|
default=None,
|
||||||
|
metavar='DEST_SERVICE_URL',
|
||||||
|
help='select a different dump-thing-service, i.e. not SOURCE_SERVICE_URL, as destination for auto-curated records',
|
||||||
|
)
|
||||||
|
argument_parser.add_argument(
|
||||||
|
'--destination-collection',
|
||||||
|
default=None,
|
||||||
|
metavar='DEST_COLLECTION',
|
||||||
|
help='select a different collection, i.e. not the SOURCE_COLLECTION of SOURCE_SERVICE_URL, as destination for auto-curated records',
|
||||||
|
),
|
||||||
|
argument_parser.add_argument(
|
||||||
|
'--destination-token',
|
||||||
|
default=None,
|
||||||
|
metavar='DEST_TOKEN',
|
||||||
|
help='if provided, this token will be used for the destination service, otherwise ${CURATOR_TOKEN} will be used',
|
||||||
|
)
|
||||||
|
argument_parser.add_argument('--exclude', '-e', nargs='*', default=[], help='exclude an inbox on the source collection')
|
||||||
argument_parser.add_argument('--list-labels', '-l', action='store_true')
|
argument_parser.add_argument('--list-labels', '-l', action='store_true')
|
||||||
argument_parser.add_argument('--list-only', '-o', action='store_true')
|
argument_parser.add_argument('--list-only', '-o', action='store_true')
|
||||||
|
argument_parser.add_argument(
|
||||||
|
'-p', '--pid', action='append',
|
||||||
|
help='if provided, process only records that match the given PIDs. NOTE: matching does not involve CURIE-resolution!',
|
||||||
|
)
|
||||||
|
|
||||||
arguments = argument_parser.parse_args()
|
arguments = argument_parser.parse_args()
|
||||||
|
print(arguments)
|
||||||
|
|
||||||
curator_token = os.environ.get('CURATOR_TOKEN')
|
curator_token = os.environ.get('CURATOR_TOKEN')
|
||||||
if curator_token is None:
|
if curator_token is None:
|
||||||
print('ERROR: CURATOR_TOKEN not set', file=sys.stderr, flush=True)
|
print('ERROR: CURATOR_TOKEN not set', file=sys.stderr, flush=True)
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
|
destination_url = arguments.destination_base_url or arguments.base_url
|
||||||
|
destination_collection = arguments.destination_collection or arguments.collection
|
||||||
|
destination_token = arguments.destination_token or curator_token
|
||||||
|
|
||||||
for label in get_labels(
|
for label in get_labels(
|
||||||
url_base=arguments.base_url,
|
url_base=arguments.base_url,
|
||||||
collection=arguments.collection,
|
collection=arguments.collection,
|
||||||
|
|
@ -48,6 +82,10 @@ def main():
|
||||||
label=label,
|
label=label,
|
||||||
token=curator_token
|
token=curator_token
|
||||||
):
|
):
|
||||||
|
if arguments.pid:
|
||||||
|
if record['pid'] not in arguments.pid:
|
||||||
|
continue
|
||||||
|
|
||||||
if arguments.list_only:
|
if arguments.list_only:
|
||||||
print(f'{label}:\t{record}')
|
print(f'{label}:\t{record}')
|
||||||
continue
|
continue
|
||||||
|
|
@ -55,8 +93,8 @@ def main():
|
||||||
class_name = re.search('([_A-Za-z0-9]*$)', record['schema_type']).group(0)
|
class_name = re.search('([_A-Za-z0-9]*$)', record['schema_type']).group(0)
|
||||||
# Store record in collection
|
# Store record in collection
|
||||||
post_to_url(
|
post_to_url(
|
||||||
f'{arguments.base_url}/{arguments.collection}/curated/record/{class_name}',
|
f'{destination_url}/{destination_collection}/curated/record/{class_name}',
|
||||||
token=curator_token,
|
token=destination_token,
|
||||||
content=record,
|
content=record,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue