502 lines
17 KiB
Python
502 lines
17 KiB
Python
import json
|
|
import random
|
|
import subprocess
|
|
from itertools import chain
|
|
|
|
import pytest
|
|
from click.testing import CliRunner
|
|
|
|
from dump_things_pyclient.commands.dtc import cli
|
|
from dump_things_pyclient.tests.common import (
|
|
add_unique_records,
|
|
read_records_from_store,
|
|
)
|
|
|
|
|
|
prefix = 'https://www.example.com/ac_e2e_test/'
|
|
|
|
|
|
def _create_unique_records(offset: int, count: int) -> dict[int, dict]:
|
|
while True:
|
|
pids = tuple(set(random.randrange(offset, offset + 10000) for _ in range(count)))
|
|
if len(pids) == count:
|
|
break
|
|
return {
|
|
# NOTE: the order of keys is relevant because there are JSON-string
|
|
# comparisons in the change-set tests below.
|
|
pid: {
|
|
'schema_type': 'test:Person',
|
|
'pid': prefix + f'person_{pid}',
|
|
'family_name': f'grieg_{pid}',
|
|
'given_name': f'erwin_{pid}',
|
|
}
|
|
for pid in pids
|
|
}
|
|
|
|
|
|
def test_auto_curate_basic_end_to_end(dump_things_service):
|
|
port, store = dump_things_service
|
|
|
|
|
|
new_records = tuple(_create_unique_records(10000, 5).values())
|
|
|
|
# Add records to inbox
|
|
runner = CliRunner()
|
|
result = runner.invoke(
|
|
cli,
|
|
['--token=user_1', 'post-records', f'http://127.0.0.1:{port}', 'collection_1', 'Person'],
|
|
input='\n'.join(
|
|
json.dumps(record, ensure_ascii=False) for record in new_records
|
|
) + '\n'
|
|
)
|
|
assert result.exit_code == 0, 'dtc post-records failed'
|
|
|
|
# Ensure that the records do not yet exist in the curated area
|
|
stored_curated_records = tuple(
|
|
map(
|
|
lambda e: e[2],
|
|
read_records_from_store(store, class_name='Person', remove_keys=['annotations'])
|
|
)
|
|
)
|
|
for record in new_records:
|
|
assert record not in stored_curated_records, 'record already exists, possibly a random number collision'
|
|
|
|
# Perform auto-curation
|
|
result = runner.invoke(
|
|
cli,
|
|
['--token=token-curator', 'auto-curate', '-i', 'test_user_1', f'http://127.0.0.1:{port}', 'collection_1'],
|
|
)
|
|
assert result.exit_code == 0, 'dtc auto-curate failed'
|
|
|
|
# Check that the inbox is empty
|
|
stored_inbox_records = tuple(
|
|
map(
|
|
lambda e: e[2],
|
|
read_records_from_store(store, incoming='test_user_1')
|
|
)
|
|
)
|
|
assert stored_inbox_records == tuple(), 'Inbox not clean after auto-curation'
|
|
|
|
# Check that the records are in the curated area
|
|
stored_curated_records = tuple(
|
|
map(
|
|
lambda e: e[2],
|
|
read_records_from_store(store, class_name='Person', remove_keys=['annotations'])
|
|
)
|
|
)
|
|
for record in new_records:
|
|
assert record in stored_curated_records
|
|
|
|
|
|
def test_auto_curate_create_change_set_end_to_end(dump_things_service, tmp_path_factory):
|
|
port, store = dump_things_service
|
|
change_set_dir = tmp_path_factory.mktemp('create_change_set')
|
|
|
|
new_records = _create_unique_records(20000, 5)
|
|
new_curated_records = _create_unique_records(30000, 5)
|
|
|
|
# Add new curated records to inbox and auto-curate them to move them to
|
|
# the curated area.
|
|
runner = CliRunner()
|
|
result = runner.invoke(
|
|
cli,
|
|
['--token=user_1', 'post-records', f'http://127.0.0.1:{port}', 'collection_1', 'Person'],
|
|
input='\n'.join(
|
|
json.dumps(record, ensure_ascii=False) for record in new_curated_records.values()
|
|
) + '\n'
|
|
)
|
|
assert result.exit_code == 0, 'dtc post-records failed'
|
|
|
|
# Perform auto-curation
|
|
result = runner.invoke(
|
|
cli,
|
|
['--token=token-curator', 'auto-curate', '-i', 'test_user_1', f'http://127.0.0.1:{port}', 'collection_1'],
|
|
)
|
|
assert result.exit_code == 0, 'dtc auto-curate failed'
|
|
|
|
# Modify the record that were auto-curated and upload those together with
|
|
# newly created records to the inbox.
|
|
modified_curated_records = {
|
|
# NOTE: the order of keys is relevant because there are JSON-string
|
|
# comparisons in the change-set tests below.
|
|
pid: {
|
|
'schema_type': 'test:Person',
|
|
'pid': record['pid'],
|
|
'family_name': record['family_name'],
|
|
'given_name': record['given_name'].replace('erwin', 'edvard'),
|
|
}
|
|
for pid, record in new_curated_records.items()
|
|
}
|
|
|
|
# Upload the modified (already curated) records and the new records to the
|
|
# inbox.
|
|
result = runner.invoke(
|
|
cli,
|
|
['--token=user_1', 'post-records', f'http://127.0.0.1:{port}', 'collection_1', 'Person'],
|
|
input='\n'.join(
|
|
json.dumps(record, ensure_ascii=False)
|
|
for record in chain(modified_curated_records.values(), new_records.values())
|
|
) + '\n'
|
|
)
|
|
assert result.exit_code == 0, 'dtc post-records failed'
|
|
|
|
# Check that there are records in the incoming area of 'test_user_1'
|
|
incoming_records = tuple(read_records_from_store(store=store, incoming='test_user_1'))
|
|
assert incoming_records != tuple()
|
|
|
|
# Create a change set
|
|
result = runner.invoke(
|
|
cli,
|
|
[
|
|
'--token=token-curator',
|
|
'auto-curate',
|
|
'--create-change-set', str(change_set_dir),
|
|
'-i', 'test_user_1',
|
|
f'http://127.0.0.1:{port}', 'collection_1',
|
|
],
|
|
)
|
|
assert result.exit_code == 0, 'dtc auto-curate --create-change-set failed'
|
|
|
|
# Check the number of modified records in the change set
|
|
result = subprocess.run(
|
|
['git', 'status', '-s',],
|
|
cwd=str(change_set_dir),
|
|
check=True,
|
|
capture_output=True,
|
|
)
|
|
lines = [
|
|
line.strip()
|
|
for line in result.stdout.decode().splitlines()
|
|
if 'annotations' not in line
|
|
]
|
|
assert len(lines) == len(new_records) + len(modified_curated_records), f'unexpected number of modified records: {len(lines)}'
|
|
assert all(l.startswith('M records/test_user_1') for l in lines), f'unexpected status for modified records: {lines}'
|
|
|
|
# Check for expected diff content
|
|
result = subprocess.run(
|
|
['git', 'diff', '-p',],
|
|
cwd=str(change_set_dir),
|
|
check=True,
|
|
capture_output=True,
|
|
)
|
|
lines = [line.strip() for line in result.stdout.decode().splitlines()]
|
|
diffing_pids = [
|
|
int(line[13:].split()[0][-5:])
|
|
for line in lines if line.startswith('diff --git')
|
|
|
|
]
|
|
assert all(map(lambda pid: pid in diffing_pids, new_records))
|
|
assert all(map(lambda pid: pid in diffing_pids, modified_curated_records))
|
|
|
|
# Check that annotations are stored in the change set
|
|
annotations = {
|
|
int(p.name[-5:]): json.loads(p.read_text())
|
|
for p in (change_set_dir / 'annotations').glob('test_user_1/*')
|
|
}
|
|
assert len(annotations) == len(modified_curated_records) + len(new_curated_records)
|
|
|
|
# Check that all inboxes are empty
|
|
incoming_records = tuple(read_records_from_store(store=store, incoming='test_user_1'))
|
|
assert incoming_records == tuple()
|
|
|
|
|
|
def test_auto_curate_post_change_set_end_to_end(dump_things_service, tmp_path_factory):
|
|
port, store = dump_things_service
|
|
change_set_dir = tmp_path_factory.mktemp('post_change_set')
|
|
|
|
new_records = _create_unique_records(40000, 5)
|
|
|
|
# Add new records to inbox of 'test_user_1'.
|
|
runner = CliRunner()
|
|
result = runner.invoke(
|
|
cli,
|
|
['--token=user_1', 'post-records', f'http://127.0.0.1:{port}', 'collection_1', 'Person'],
|
|
input='\n'.join(
|
|
json.dumps(record, ensure_ascii=False) for record in new_records.values()
|
|
) + '\n'
|
|
)
|
|
assert result.exit_code == 0, 'dtc post-records failed'
|
|
|
|
# Create a change set with the records from 'test_user_1's' inbox
|
|
result = runner.invoke(
|
|
cli,
|
|
[
|
|
'--token=token-curator',
|
|
'auto-curate',
|
|
'--create-change-set', str(change_set_dir),
|
|
'-i', 'test_user_1',
|
|
f'http://127.0.0.1:{port}', 'collection_1',
|
|
],
|
|
)
|
|
assert result.exit_code == 0, 'dtc auto-curate --create-change-set failed'
|
|
|
|
# Post the changeset without annotations
|
|
result = runner.invoke(
|
|
cli,
|
|
[
|
|
'--token=token-curator',
|
|
'auto-curate',
|
|
'--post-change-set', str(change_set_dir),
|
|
f'http://127.0.0.1:{port}', 'collection_1',
|
|
],
|
|
)
|
|
assert result.exit_code == 0, 'dtc auto-curate --post-change-set failed'
|
|
|
|
# Check that new records have been posted without annotations
|
|
curated_records = tuple(
|
|
map(
|
|
lambda e: e[2],
|
|
read_records_from_store(
|
|
store,
|
|
class_name='Person',
|
|
)
|
|
)
|
|
)
|
|
|
|
for record in new_records.values():
|
|
assert record in curated_records
|
|
for record in curated_records:
|
|
try:
|
|
record_pid = int(record['pid'][-5:])
|
|
if record_pid in new_records:
|
|
assert 'annotations' not in record, f'unexpected annotations in {record}'
|
|
except ValueError:
|
|
continue
|
|
|
|
# Post the changeset with annotations
|
|
result = runner.invoke(
|
|
cli,
|
|
[
|
|
'--token=token-curator',
|
|
'auto-curate',
|
|
'--post-change-set', str(change_set_dir),
|
|
'--add-annotations',
|
|
f'http://127.0.0.1:{port}', 'collection_1',
|
|
],
|
|
)
|
|
assert result.exit_code == 0, 'dtc auto-curate --post-change-set --add-annotations failed'
|
|
|
|
# Check that new records have been posted with annotations
|
|
curated_records = tuple(
|
|
map(
|
|
lambda e: e[2],
|
|
read_records_from_store(
|
|
store,
|
|
class_name='Person',
|
|
)
|
|
)
|
|
)
|
|
|
|
# Check that all record content has been posted
|
|
cleaned_curated_records = tuple(
|
|
{
|
|
'schema_type': r['schema_type'],
|
|
'pid': r['pid'],
|
|
'family_name': r['family_name'],
|
|
'given_name': r['given_name'],
|
|
}
|
|
for r in curated_records
|
|
)
|
|
|
|
for record in new_records.values():
|
|
assert record in cleaned_curated_records
|
|
|
|
# Check that annotations were posted.
|
|
annotations = {
|
|
p.name.replace('-_', '/').replace('--', '-'): json.loads(p.read_text())
|
|
for p in change_set_dir.glob('annotations/test_user_1/*')
|
|
}
|
|
|
|
for record in curated_records:
|
|
try:
|
|
record_pid = int(record['pid'][-5:])
|
|
if record_pid in new_records:
|
|
assert 'annotations' in record, f'missing annotations in {record}'
|
|
assert record['annotations'] == annotations[record['pid']]
|
|
except ValueError:
|
|
continue
|
|
|
|
|
|
def test_auto_curate_create_changeset_opt_in_end_to_end(dump_things_service, tmp_path_factory):
|
|
port, store = dump_things_service
|
|
change_set_dir_both = tmp_path_factory.mktemp('create_change_set_both')
|
|
change_set_dir_single = tmp_path_factory.mktemp('create_change_set_single')
|
|
|
|
new_curated_records = _create_unique_records(30000, 5)
|
|
|
|
# Add new curated records to inbox and auto-curate them to move them to
|
|
# the curated area.
|
|
runner = CliRunner()
|
|
|
|
for path, opt_in, expected_paths in (
|
|
(change_set_dir_single, None, {'test_user_1', 'test_user_2'}),
|
|
(change_set_dir_both, 'test_user_1', {'test_user_1'}),
|
|
):
|
|
for token in ('user_1', 'user_2'):
|
|
result = runner.invoke(
|
|
cli,
|
|
[f'--token={token}', 'post-records', f'http://127.0.0.1:{port}', 'collection_1', 'Person'],
|
|
input='\n'.join(
|
|
json.dumps(record, ensure_ascii=False) for record in new_curated_records.values()
|
|
) + '\n'
|
|
)
|
|
assert result.exit_code == 0, f'dtc post-records with token {token} failed'
|
|
|
|
# Create a change set
|
|
result = runner.invoke(
|
|
cli,
|
|
[
|
|
'--token=token-curator', 'auto-curate',
|
|
] + (
|
|
['-i', opt_in] if opt_in else []
|
|
) + [
|
|
'--create-change-set', str(path),
|
|
f'http://127.0.0.1:{port}', 'collection_1',
|
|
],
|
|
)
|
|
|
|
# Check that the change set contains the expected directories.
|
|
change_set_paths = set(
|
|
map(
|
|
lambda x: x.name,
|
|
path.glob('records/*')
|
|
)
|
|
)
|
|
assert change_set_paths == expected_paths
|
|
|
|
|
|
@pytest.mark.parametrize('create_changeset', [True, False])
|
|
def test_keep_inboxes(dump_things_service, tmp_path_factory, create_changeset):
|
|
port, store = dump_things_service
|
|
if create_changeset:
|
|
change_set_dir = tmp_path_factory.mktemp('create_changeset_keep_inboxes')
|
|
unique_records = add_unique_records(
|
|
port,
|
|
'collection_1',
|
|
5,
|
|
f'test_keep_inboxes_{create_changeset}',
|
|
token='user_1',
|
|
)
|
|
|
|
runner = CliRunner()
|
|
result = runner.invoke(
|
|
cli,
|
|
[
|
|
'--token=token-curator',
|
|
'auto-curate',
|
|
] + (['--create-change-set', str(change_set_dir)] if create_changeset else []) + [
|
|
'--keep-inboxes',
|
|
f'http://127.0.0.1:{port}', 'collection_1',
|
|
],
|
|
)
|
|
assert result.exit_code == 0, 'auto-curate --keep-inboxes failed'
|
|
|
|
cleaned_incoming_records = [
|
|
{k: v for k, v in record_info[2].items() if k not in ('annotations',)}
|
|
for record_info in read_records_from_store(
|
|
store,
|
|
collection='collection_1',
|
|
incoming='test_user_1',
|
|
class_name='Person',
|
|
)
|
|
]
|
|
for record in unique_records.values():
|
|
assert record in cleaned_incoming_records
|
|
|
|
|
|
def test_auto_curate_if_changes(dump_things_service, monkeypatch):
|
|
from dump_things_pyclient.commands.dtc_plugins.auto_curate import console
|
|
|
|
port, store = dump_things_service
|
|
|
|
print_calls = []
|
|
monkeypatch.setattr(
|
|
console,
|
|
'print',
|
|
lambda *args: print_calls.extend(args),
|
|
)
|
|
|
|
existing_record = {
|
|
"pid": f"test:auto_curate_if_changes",
|
|
"given_name": f"markus",
|
|
'schema_type': 'test:Person',
|
|
'annotations': {
|
|
'https://submitter.example.com': 'submitter_1',
|
|
'https://counter.example.com': '1',
|
|
},
|
|
}
|
|
|
|
new_record = {
|
|
k: v for k, v in existing_record.items() if k not in ('annotations',)
|
|
}
|
|
new_record['annotations'] = {
|
|
'https://submitter.example.com': 'submitter_2',
|
|
'https://counter.example.com': '2',
|
|
}
|
|
|
|
runner = CliRunner()
|
|
|
|
# Post the existing record directly into the curated area
|
|
result = runner.invoke(
|
|
cli,
|
|
[
|
|
'--token=token-curator',
|
|
'post-records',
|
|
'--curated',
|
|
f'http://127.0.0.1:{port}', 'collection_1', '*',
|
|
],
|
|
input=json.dumps(existing_record, ensure_ascii=False)
|
|
)
|
|
assert result.exit_code == 0
|
|
print_calls = []
|
|
|
|
# Post the new record to the users inbox
|
|
result = runner.invoke(
|
|
cli,
|
|
[
|
|
'--token=user_1',
|
|
'post-records',
|
|
f'http://127.0.0.1:{port}', 'collection_1', '*',
|
|
],
|
|
input=json.dumps(new_record, ensure_ascii=False)
|
|
)
|
|
assert result.exit_code == 0
|
|
print_calls = []
|
|
|
|
# Try to auto-curate the new record with `--only-if-modifying`, ignoring
|
|
# `annotations`. This should not post the record to the curated area, but
|
|
# emit a message that the record was not posted
|
|
result = runner.invoke(
|
|
cli,
|
|
[
|
|
'--token=token-curator',
|
|
'auto-curate',
|
|
'--include', 'test_user_1',
|
|
'--only-if-modifying',
|
|
'--jsonpath-spec', 'annotations',
|
|
'--keep-inboxes',
|
|
f'http://127.0.0.1:{port}', 'collection_1',
|
|
],
|
|
input=json.dumps(new_record, ensure_ascii=False)
|
|
)
|
|
assert result.exit_code == 0
|
|
assert 'skipping writing of record [green]test:auto_curate_if_changes[/green] because a matching record already exists' in print_calls
|
|
print_calls = []
|
|
|
|
# Try to post the new record with `--only-if-modifying`, that should post
|
|
# the record to curated, because the annotations are different from the
|
|
# existing record.
|
|
result = runner.invoke(
|
|
cli,
|
|
[
|
|
'--token=token-curator',
|
|
'auto-curate',
|
|
'--include', 'test_user_1',
|
|
'--only-if-modifying',
|
|
'--keep-inboxes',
|
|
f'http://127.0.0.1:{port}', 'collection_1',
|
|
],
|
|
input=json.dumps(new_record, ensure_ascii=False)
|
|
)
|
|
assert result.exit_code == 0
|
|
assert 'skipping writing of record [green]test:auto_curate_if_changes[/green] because a matching record already exists' not in print_calls
|