dump-things-server/dump_things_service/tests/test_basic.py
2026-01-28 21:35:59 +01:00

448 lines
14 KiB
Python

from pathlib import Path
import pytest # F401
from .. import (
HTTP_200_OK,
HTTP_401_UNAUTHORIZED,
HTTP_403_FORBIDDEN,
HTTP_404_NOT_FOUND,
)
from ..__about__ import __version__
from ..utils import cleaned_json
from .create_store import (
given_name,
pid,
)
from .test_utils import basic_write_locations
# Path to a local simple test schema
schema_file = Path(__file__).parent / 'testschema.yaml'
extra_record = {
'schema_type': 'abc:Person',
'pid': 'abc:aaaa',
'given_name': 'DavidÖÄÜ',
}
delete_record = {
'schema_type': 'abc:Person',
'pid': 'abc:delete-me',
'given_name': 'Detlef',
}
unicode_name = 'AlienÖÄÜ-ß👽'
unicode_bytes = unicode_name.encode('utf-8')
unicode_record = {
'schema_type': 'abc:Person',
'pid': 'abc:unicode-test',
'given_name': unicode_name,
}
def test_search_by_pid(fastapi_client_simple):
test_client, _ = fastapi_client_simple
for i in range(1, 9):
response = test_client.get(
f'/collection_{i}/record?pid={pid}',
headers={'x-dumpthings-token': 'basic_access'},
)
assert response.status_code == HTTP_200_OK
assert response.json() == {
'schema_type': 'abc:Person',
'pid': pid,
'given_name': given_name,
}
def test_get_all(fastapi_client_simple):
test_client, _ = fastapi_client_simple
for i in range(1, 9):
response = test_client.get(
f'/collection_{i}/records/',
headers={'x-dumpthings-token': 'basic_access'},
)
assert response.status_code == HTTP_200_OK
assert len(response.json()) in (1, 3)
for i in range(1, 9):
response = test_client.get(
f'/collection_{i}/records/p/',
headers={'x-dumpthings-token': 'basic_access'},
)
assert response.status_code == HTTP_200_OK
assert response.json()['total'] in (1, 3)
def test_delete(fastapi_client_simple):
test_client, _ = fastapi_client_simple
response = test_client.post(
'/collection_1/record/Person',
headers={'x-dumpthings-token': 'token-1'},
json=delete_record,
)
assert response.status_code == HTTP_200_OK
# Check that the record exists
response = test_client.get(
'/collection_1/record?pid=abc:delete-me',
headers={'x-dumpthings-token': 'token-1'},
)
assert response.status_code == HTTP_200_OK
assert response.json()['pid'] == 'abc:delete-me'
response = test_client.delete(
'/collection_1/record?pid=abc:delete-me',
headers={'x-dumpthings-token': 'token-1'},
)
assert response.status_code == HTTP_200_OK
assert response.json() is True
response = test_client.get(
'/collection_1/record?pid=abc:delete-me',
headers={'x-dumpthings-token': 'token-1'},
)
assert response.status_code == HTTP_200_OK
assert response.json() is None
response = test_client.delete(
'/collection_1/record?pid=abc:delete-me',
headers={'x-dumpthings-token': 'token-1'},
)
assert response.status_code == HTTP_404_NOT_FOUND
def test_hashed_token(fastapi_client_simple):
test_client, _ = fastapi_client_simple
response = test_client.get(
f'/collection_1/record?pid={pid}',
headers={'x-dumpthings-token': 'cmo-cmo'},
)
assert response.status_code == HTTP_200_OK
assert response.json() == {
'schema_type': 'abc:Person',
'pid': pid,
'given_name': given_name,
}
response = test_client.get(
f'/collection_1/record?pid={pid}',
headers={'x-dumpthings-token': 'cmo-33b726a7e2b9eaf1f8f124049822ade31cb6516a4d8221634b01d13d793bfe16'},
)
assert response.status_code == HTTP_401_UNAUTHORIZED
def test_search_by_class(fastapi_client_simple):
test_client, _ = fastapi_client_simple
for i in range(1, 9):
response = test_client.get(
f'/collection_{i}/records/Thing',
headers={'x-dumpthings-token': 'basic_access'},
)
assert response.status_code == HTTP_200_OK
json_result = response.json()
if len(json_result) == 3: # noqa: PLR2004
assert response.json() == [
{
'given_name': 'curated',
'pid': 'abc:curated',
'schema_type': 'abc:Person',
},
{
'given_name': 'mode_curated',
'pid': 'abc:mode_test',
'schema_type': 'abc:Person',
},
{
'given_name': 'WolfgangÖÄß',
'pid': 'abc:some_timee@x.com',
'schema_type': 'abc:Person',
},
]
else:
# If only one record is present, it is the global test record
assert json_result == [
{
'given_name': 'WolfgangÖÄß',
'pid': 'abc:some_timee@x.com',
'schema_type': 'abc:Person',
},
]
def test_search_by_pid_no_token(fastapi_client_simple):
test_client, _ = fastapi_client_simple
for i in range(1, 9):
response = test_client.get(
f'/collection_{i}/record?pid={pid}',
)
assert response.status_code == HTTP_200_OK
assert response.json() == {
'schema_type': 'abc:Person',
'pid': pid,
'given_name': given_name,
}
def test_store_record(fastapi_client_simple):
test_client, _ = fastapi_client_simple
# Store a record in two collections
for i, token in basic_write_locations:
response = test_client.post(
f'/collection_{i}/record/Person',
headers={'x-dumpthings-token': token},
json=extra_record,
)
assert response.status_code == HTTP_200_OK
# Check that the existing record and the new records can be retrieved
# from both collections
for i, token in basic_write_locations:
response = test_client.get(
f'/collection_{i}/record?pid={extra_record["pid"]}',
headers={'x-dumpthings-token': token},
)
assert response.status_code == HTTP_200_OK
assert (
cleaned_json(response.json(), remove_keys=('annotations',)) == extra_record
)
# Check that other collections do not report the new record
for i in range(3, 6):
response = test_client.get(
f'/collection_{i}/records/Person',
headers={'x-dumpthings-token': 'basic_access'},
)
assert response.json() == [
{
'schema_type': 'abc:Person',
'pid': pid,
'given_name': given_name,
}
]
# Check that subclasses are retrieved
for i, token in basic_write_locations:
response = test_client.get(
f'/collection_{i}/records/Thing',
headers={'x-dumpthings-token': token},
)
cleaned_response = cleaned_json(response.json(), remove_keys=('annotations',))
assert extra_record in cleaned_response
assert {
'schema_type': 'abc:Person',
'pid': pid,
'given_name': given_name,
} in cleaned_response
# Check pagination
for i, token in basic_write_locations:
response = test_client.get(
f'/collection_{i}/records/p/Thing',
headers={'x-dumpthings-token': token},
)
assert response.status_code == HTTP_200_OK
for key in ('items', 'total', 'page', 'size', 'pages'):
assert key in response.json()
records = response.json()['items']
cleaned_response = cleaned_json(records, remove_keys=('annotations',))
assert extra_record in cleaned_response
assert {
'schema_type': 'abc:Person',
'pid': pid,
'given_name': given_name,
} in cleaned_response
def test_encoding(fastapi_client_simple):
test_client, store_path = fastapi_client_simple
# Store a record with non-ASCII characters in collections via the API. that
# will trigger the YAML-dumping, which should be checked
response = test_client.post(
'/collection_1/record/Person',
headers={'x-dumpthings-token': 'token-1'},
json=unicode_record,
)
assert response.status_code == HTTP_200_OK
# Check that no '\\x'-encoding is present in files
for item in store_path.glob('**/*.yaml'):
encoded_content = item.read_bytes()
assert b'\\x' not in encoded_content
if b'Alien' in encoded_content:
assert unicode_bytes in encoded_content
def test_global_store_write_fails(fastapi_client_simple):
test_client, _ = fastapi_client_simple
for i in range(1, 9):
# Since we provide no token, the default token will be used. This will
# only allow reading from curated, not posting.
response = test_client.post(
f'/collection_{i}/record/Person', json={'pid': extra_record['pid']}
)
assert response.status_code == HTTP_403_FORBIDDEN
@pytest.mark.skip(reason='No runtime store adding yet')
def test_token_store_adding(fastapi_client_simple):
test_client, store_dir = fastapi_client_simple
response = test_client.post(
'/collection_1/record/Person',
headers={'x-dumpthings-token': 'david_bowie'},
json={'pid': extra_record['pid']},
)
assert response.status_code == HTTP_401_UNAUTHORIZED
# Create collection-directory and token-directory and retry
(store_dir / 'token_stores' / 'collection_1' / 'david_bowie').mkdir()
response = test_client.post(
'/collection_1/record/Person',
headers={'x-dumpthings-token': 'david_bowie'},
json={'pid': extra_record['pid']},
)
assert response.status_code == HTTP_200_OK
def test_funky_pid(fastapi_client_simple):
test_client, _ = fastapi_client_simple
record_pid = 'dlflatsocial:contributors/someone'
for i, token in basic_write_locations:
response = test_client.post(
f'/collection_{i}/record/Person',
headers={'x-dumpthings-token': token},
json={'pid': record_pid},
)
assert response.status_code == HTTP_200_OK
# Try to find it
for i, token in basic_write_locations:
response = test_client.get(
f'/collection_{i}/record?pid={record_pid}',
headers={'x-dumpthings-token': token},
)
assert response.status_code == HTTP_200_OK
def test_token_store_priority(fastapi_client_simple):
test_client, store_dir = fastapi_client_simple
# Post a record with the same pid as the global store's test record, but
# with different content.
response = test_client.post(
'/collection_1/record/Person',
headers={'x-dumpthings-token': 'token-1'},
json={'pid': pid, 'given_name': 'DavidÖÄß'},
)
assert response.status_code == HTTP_200_OK
# Check that the new record is returned with the token
response = test_client.get(
f'/collection_1/record?pid={pid}',
headers={'x-dumpthings-token': 'token-1'},
)
assert response.status_code == HTTP_200_OK
assert response.json()['given_name'] == 'DavidÖÄß'
# Check that the global test record is returned with basic access
response = test_client.get(
f'/collection_1/record?pid={pid}',
headers={'x-dumpthings-token': 'basic_access'},
)
assert response.status_code == HTTP_200_OK
assert response.json()['given_name'] == given_name
def test_unknown_token(fastapi_client_simple):
test_client, _ = fastapi_client_simple
# Check that fetching with an unknown token is handled gracefully
response = test_client.get(
'/collection_1/record?pid=abc:unknown-token',
headers={'x-dumpthings-token': 'unknown_token'},
)
assert response.status_code == HTTP_401_UNAUTHORIZED
# Check that posting with an unknown token is handled gracefully
response = test_client.post(
'/collection_1/record/Person',
json={'pid': 'abc:unknown-token'},
headers={'x-dumpthings-token': 'unknown_token'},
)
assert response.status_code == HTTP_401_UNAUTHORIZED
def test_curie_expansion(fastapi_client_simple):
test_client, _ = fastapi_client_simple
# Check that the pid is expanded correctly
response = test_client.get(
'/collection_1/record?pid=http%3A%2F%2Fexample.org%2Fperson-schema%2Fabc%2Fmode_test',
)
assert response.status_code == HTTP_200_OK
assert response.json() == {
'schema_type': 'abc:Person',
'pid': 'abc:mode_test',
'given_name': 'mode_curated',
}
def test_server(fastapi_client_simple):
test_client, _ = fastapi_client_simple
response = test_client.get(
'/server',
)
test_schema_classes = [
'Thing',
'Agent',
'InstantaneousEvent',
'Person',
]
flat_social_classes = [
'Thing',
'Property',
'ValueSpecification',
'FlatThing',
'FlatProperty',
'AnnotationTag',
'Organization',
'Person',
'Project',
]
assert response.status_code == HTTP_200_OK
assert response.json() == {
'version': __version__,
'collections': [
{
'name': f'collection_{i}',
'schema': str(schema_file),
'classes': test_schema_classes,
}
for i in range(1, 9)
] + [
{
'name': f'collection_dlflatsocial-{i}',
'schema': 'https://concepts.datalad.org/s/flat-social/unreleased.yaml',
'classes': flat_social_classes,
}
for i in range(1, 3)
],
}
def test_ignore_classes(fastapi_client_simple):
test_client, _ = fastapi_client_simple
for class_name in ('Organization', 'Project'):
response = test_client.post(
f'/collection_dlflatsocial-1/record/{class_name}',
headers={'x-dumpthings-token': 'token-1'},
json={'pid': f'dlflatsocial:c_{class_name}'},
)
assert response.status_code == HTTP_200_OK
response = test_client.post(
f'/collection_dlflatsocial-2/record/{class_name}',
headers={'x-dumpthings-token': 'token-1'},
json={'pid': f'dlflatsocial:c_{class_name}'},
)
assert response.status_code == HTTP_404_NOT_FOUND