Improve auto-curate #9

Merged
cmo merged 5 commits from auto-correct-opt-in into master 2026-01-19 11:19:49 +00:00
3 changed files with 31 additions and 2 deletions

9
CHANGELOG.md Normal file
View file

@ -0,0 +1,9 @@
# 0.1.4 (2026-01-19)
## New features
- Add -d/--dry-run option to auto-curate. If given, auto-curate will not modify
any data, but only print what it would do.
- Add -i/--include option to include only the given inboxes into the auto-create
process

View file

@ -64,6 +64,12 @@ def _main():
default=[], default=[],
help='exclude an inbox on the source collection (repeatable)', help='exclude an inbox on the source collection (repeatable)',
) )
argument_parser.add_argument(
'-i', '--include',
action='append',
default=[],
help='process only the given inbox, all other inboxes are ignored (repeatable, -e/--exclude is applied after inclusion)',
)
argument_parser.add_argument( argument_parser.add_argument(
'-l', '--list-labels', '-l', '--list-labels',
action='store_true', action='store_true',
@ -79,6 +85,11 @@ def _main():
action='append', action='append',
help='if provided, process only records that match the given PIDs. NOTE: matching does not involve CURIE-resolution!', help='if provided, process only records that match the given PIDs. NOTE: matching does not involve CURIE-resolution!',
) )
argument_parser.add_argument(
'-d', '--dry-run',
action='store_true',
help='if provided, do not alter any data, instead print what would be done',
)
arguments = argument_parser.parse_args() arguments = argument_parser.parse_args()
curator_token = os.environ.get(token_name) curator_token = os.environ.get(token_name)
@ -107,6 +118,10 @@ def _main():
collection=arguments.collection, collection=arguments.collection,
token=curator_token): token=curator_token):
if arguments.include and label not in arguments.include:
logger.debug('ignoring non-included incoming label: %s', label)
continue
if label in arguments.exclude: if label in arguments.exclude:
logger.debug('ignoring excluded incoming label: %s', label) logger.debug('ignoring excluded incoming label: %s', label)
continue continue
@ -141,7 +156,7 @@ def _main():
# `record_dir+stl`, or `sqlite+stl`. # `record_dir+stl`, or `sqlite+stl`.
try: try:
class_name = re.search('([_A-Za-z0-9]*$)', record['schema_type']).group(0) class_name = re.search('([_A-Za-z0-9]*$)', record['schema_type']).group(0)
except IndexError: except (IndexError, KeyError):
global stl_info global stl_info
if not stl_info: if not stl_info:
print( print(
@ -159,6 +174,11 @@ def _main():
flush=True) flush=True)
continue continue
if arguments.dry_run:
print(f'WRITE record "{record["pid"]}" of class "{class_name}" to "{destination_collection}@{destination_url}"')
print(f'DELETE record "{record["pid"]}" from inbox "{label}" of "{arguments.collection}@{arguments.service_url}"')
continue
# Store record in destination collection # Store record in destination collection
curated_write_record( curated_write_record(
service_url=destination_url, service_url=destination_url,

View file

@ -1,6 +1,6 @@
[project] [project]
name = "dump-things-pyclient" name = "dump-things-pyclient"
version = "0.1.3" version = "0.1.4"
description = "A client library and some CLI command for dump-things-services" description = "A client library and some CLI command for dump-things-services"
readme = "README.md" readme = "README.md"
requires-python = ">=3.11" requires-python = ">=3.11"