Improve auto-curate #9

Merged
cmo merged 5 commits from auto-correct-opt-in into master 2026-01-19 11:19:49 +00:00
3 changed files with 31 additions and 2 deletions

9
CHANGELOG.md Normal file
View file

@ -0,0 +1,9 @@
# 0.1.4 (2026-01-19)
## New features
- Add -d/--dry-run option to auto-curate. If given, auto-curate will not modify
any data, but only print what it would do.
- Add -i/--include option to include only the given inboxes into the auto-create
process

View file

@ -64,6 +64,12 @@ def _main():
default=[],
help='exclude an inbox on the source collection (repeatable)',
)
argument_parser.add_argument(
'-i', '--include',
action='append',
default=[],
help='process only the given inbox, all other inboxes are ignored (repeatable, -e/--exclude is applied after inclusion)',
)
argument_parser.add_argument(
'-l', '--list-labels',
action='store_true',
@ -79,6 +85,11 @@ def _main():
action='append',
help='if provided, process only records that match the given PIDs. NOTE: matching does not involve CURIE-resolution!',
)
argument_parser.add_argument(
'-d', '--dry-run',
action='store_true',
help='if provided, do not alter any data, instead print what would be done',
)
arguments = argument_parser.parse_args()
curator_token = os.environ.get(token_name)
@ -107,6 +118,10 @@ def _main():
collection=arguments.collection,
token=curator_token):
if arguments.include and label not in arguments.include:
logger.debug('ignoring non-included incoming label: %s', label)
continue
if label in arguments.exclude:
logger.debug('ignoring excluded incoming label: %s', label)
continue
@ -141,7 +156,7 @@ def _main():
# `record_dir+stl`, or `sqlite+stl`.
try:
class_name = re.search('([_A-Za-z0-9]*$)', record['schema_type']).group(0)
except IndexError:
except (IndexError, KeyError):
global stl_info
if not stl_info:
print(
@ -159,6 +174,11 @@ def _main():
flush=True)
continue
if arguments.dry_run:
print(f'WRITE record "{record["pid"]}" of class "{class_name}" to "{destination_collection}@{destination_url}"')
print(f'DELETE record "{record["pid"]}" from inbox "{label}" of "{arguments.collection}@{arguments.service_url}"')
continue
# Store record in destination collection
curated_write_record(
service_url=destination_url,

View file

@ -1,6 +1,6 @@
[project]
name = "dump-things-pyclient"
version = "0.1.3"
version = "0.1.4"
description = "A client library and some CLI command for dump-things-services"
readme = "README.md"
requires-python = ">=3.11"