Improve change-set handling #35

Merged
cmo merged 5 commits from improve-change-set into master 2026-03-10 13:24:42 +00:00
3 changed files with 73 additions and 18 deletions

View file

@ -1,3 +1,19 @@
# 0.2.14 (2026-03-10)
## Improvements
- JSON files in change sets (create by `auto-curate --create-change-set`) are
now pretty printed with an indentation of 2.
- The directory in which the change set is created is written to stdout when
`dtc auto-curate --create-change-set ...` exits.
- The output of git-process execution is captured and not written to stdout
or stderr on `dtc auto-curate --create-change-set ...`.
- All records that are added to a change set are removed from their inbox.
# 0.2.13 (2026-03-10) # 0.2.13 (2026-03-10)
## New features ## New features

View file

@ -234,7 +234,11 @@ def auto_curate(
if dry_run: if dry_run:
console.print(f'[DRY_RUN]:INITIALIZING GIT REPO: {create_change_set}') console.print(f'[DRY_RUN]:INITIALIZING GIT REPO: {create_change_set}')
else: else:
subprocess.run(['git', 'init', str(create_change_set)], check=1) subprocess.run(
['git', 'init', str(create_change_set)],
capture_output=True,
check=True,
)
if post_change_set: if post_change_set:
if list_labels or list_records: if list_labels or list_records:
@ -381,7 +385,9 @@ def auto_curate(
if result != 0: if result != 0:
return result return result
if output is not None: if create_change_set:
click.echo(str(create_change_set))
elif output is not None:
click.echo(json.dumps(output, ensure_ascii=False)) click.echo(json.dumps(output, ensure_ascii=False))
return 0 return 0
@ -534,7 +540,7 @@ def _update_change_set(
console.print(f'[DRY_RUN]:STORE existing record [green]"{record["pid"]}"[/green] of class "{class_name}" from curated area at "{file_path.resolve()}"') console.print(f'[DRY_RUN]:STORE existing record [green]"{record["pid"]}"[/green] of class "{class_name}" from curated area at "{file_path.resolve()}"')
else: else:
file_path.write_text( file_path.write_text(
json.dumps(existing_record, ensure_ascii=False) + '\n', json.dumps(existing_record, ensure_ascii=False, indent=2) + '\n',
encoding='utf8' encoding='utf8'
) )
else: else:
@ -547,7 +553,12 @@ def _update_change_set(
if dry_run: if dry_run:
console.print(f'[DRY_RUN]:GIT ADD {intra_repo_file_path} (cdw={change_set})') console.print(f'[DRY_RUN]:GIT ADD {intra_repo_file_path} (cdw={change_set})')
else: else:
subprocess.run(['git', 'add', str(intra_repo_file_path)], cwd=change_set, check=True) subprocess.run(
['git', 'add', str(intra_repo_file_path)],
capture_output=True,
cwd=change_set,
check=True,
)
# Write the new record without annotations to disk # Write the new record without annotations to disk
annotations = record.pop('annotations', None) annotations = record.pop('annotations', None)
@ -555,7 +566,7 @@ def _update_change_set(
console.print(f'[DRY_RUN]:STORE new record [green]"{record["pid"]}"[/green] of class "{class_name}" from inbox {label} at "{file_path.resolve()}"') console.print(f'[DRY_RUN]:STORE new record [green]"{record["pid"]}"[/green] of class "{class_name}" from inbox {label} at "{file_path.resolve()}"')
else: else:
file_path.write_text( file_path.write_text(
json.dumps(record, ensure_ascii=False) + '\n', json.dumps(record, ensure_ascii=False, indent=2) + '\n',
encoding='utf8' encoding='utf8'
) )
if dry_run: if dry_run:
@ -566,10 +577,35 @@ def _update_change_set(
encoding='utf8' encoding='utf8'
) )
if dry_run:
console.print(f'[DRY_RUN]:DELETING record with pid [green]{record["pid"]}[/green]')
else:
# Delete record from incoming area
try:
incoming_delete_record(
service_url=destination_service_url,
collection=destination_collection,
label=label,
pid=record['pid'],
token=destination_token,
session=session,
)
except HTTPError as e:
console.print(
f'[red]Error[/red]: deleting record with pid [green]{record["pid"]}[/green] failed: {e}: {e.response.text}',
)
return 1
if dry_run: if dry_run:
console.print(f'[DRY_RUN]:GIT COMMIT in {change_set}') console.print(f'[DRY_RUN]:GIT COMMIT in {change_set}')
else: else:
subprocess.run(['git', 'commit', '-m', 'commit curated state'], cwd=change_set, check=True) subprocess.run(
['git', 'commit', '-m', 'commit curated state'],
capture_output=True,
cwd=change_set,
check=True,
)
return 0 return 0

View file

@ -138,6 +138,10 @@ def test_auto_curate_create_change_set_end_to_end(dump_things_service, tmp_path_
) )
assert result.exit_code == 0, 'dtc post-records failed' assert result.exit_code == 0, 'dtc post-records failed'
# Check that there are records in the incoming area of 'tester'
incoming_records = tuple(read_records_from_store(store=store, incoming='tester'))
assert incoming_records != tuple()
# Create a change set # Create a change set
result = runner.invoke( result = runner.invoke(
cli, cli,
@ -174,18 +178,13 @@ def test_auto_curate_create_change_set_end_to_end(dump_things_service, tmp_path_
capture_output=True, capture_output=True,
) )
lines = [line.strip() for line in result.stdout.decode().splitlines()] lines = [line.strip() for line in result.stdout.decode().splitlines()]
# Every diff should be seven lines long, the line at index 5 contains the diffing_pids = [
# previous content, the line at index 6 contains the new content. int(line[13:].split()[0][-5:])
for patch in range(int(len(lines) / 7)): for line in lines if line.startswith('diff --git')
old = json.loads(lines[(7 * patch) + 5][1:])
new = json.loads(lines[(7 * patch) + 6][1:]) ]
pid = int(lines[7 * patch][-5:]) assert all(map(lambda pid: pid in diffing_pids, new_records))
if pid in new_records: assert all(map(lambda pid: pid in diffing_pids, modified_curated_records))
assert old == None
assert new == new_records[pid]
else:
assert old == new_curated_records[pid]
assert new == modified_curated_records[pid]
# Check that annotations are stored in the change set # Check that annotations are stored in the change set
annotations = { annotations = {
@ -194,6 +193,10 @@ def test_auto_curate_create_change_set_end_to_end(dump_things_service, tmp_path_
} }
assert len(annotations) == len(modified_curated_records) + len(new_curated_records) assert len(annotations) == len(modified_curated_records) + len(new_curated_records)
# Check that all inboxes are empty
incoming_records = tuple(read_records_from_store(store=store, incoming='tester'))
assert incoming_records == tuple()
def test_auto_curate_post_change_set_end_to_end(dump_things_service, tmp_path_factory): def test_auto_curate_post_change_set_end_to_end(dump_things_service, tmp_path_factory):
port, store = dump_things_service port, store = dump_things_service