Use request sessions to reuse existing connections #25

Merged
cmo merged 13 commits from issue-24 into master 2026-02-04 21:01:58 +00:00
9 changed files with 225 additions and 71 deletions

View file

@ -19,6 +19,7 @@ from rich.progress import track
from ...communicate import ( from ...communicate import (
HTTPError, HTTPError,
curated_write_record, curated_write_record,
get_session,
incoming_delete_record, incoming_delete_record,
incoming_read_labels, incoming_read_labels,
incoming_read_records, incoming_read_records,
@ -126,10 +127,8 @@ def cli(
dry_run, dry_run,
) )
except HTTPError as e: except HTTPError as e:
rprint( console.print(
f'[red]Error[/red]: {e}: {e.response.text}', f'[red]Error[/red]: {e}: {e.response.text}',
file=sys.stderr,
flush=True,
) )
return 1 return 1
@ -154,6 +153,9 @@ def auto_curate(
console.print(f'[red]Error[/red]: no token was provided (use --token or DTC_TOKEN environment variable)') console.print(f'[red]Error[/red]: no token was provided (use --token or DTC_TOKEN environment variable)')
return 1 return 1
if destination_collection is None:
destination_collection = collection
if destination_service_url is None: if destination_service_url is None:
destination_service_url = service_url destination_service_url = service_url
@ -173,10 +175,12 @@ def auto_curate(
if list_labels: if list_labels:
output = [] output = []
session = get_session()
all_labels = incoming_read_labels( all_labels = incoming_read_labels(
service_url=service_url, service_url=service_url,
collection=collection, collection=collection,
token=obj, token=obj,
session=session,
) )
for label in all_labels: for label in all_labels:
if include and label not in include: if include and label not in include:
@ -200,6 +204,7 @@ def auto_curate(
collection=collection, collection=collection,
label=label, label=label,
token=obj, token=obj,
session=session,
) )
# Get the first entry to find the total number of records # Get the first entry to find the total number of records
@ -241,19 +246,25 @@ def auto_curate(
class_name = re.search('([_A-Za-z0-9]*$)', record['schema_type']).group(0) class_name = re.search('([_A-Za-z0-9]*$)', record['schema_type']).group(0)
except (IndexError, KeyError): except (IndexError, KeyError):
global stl_info global stl_info
console.print(f'[yellow]Warning[/yellow]: ignoring record with pid {record["pid"]} because `schema_type` attribute is missing.') console.print(f'[yellow]Warning[/yellow]: ignoring record with pid [yellow]{record["pid"]}[/yellow] because `schema_type` attribute is missing.')
if not stl_info: if not stl_info:
console.print( console.print(
' Please ensure that `schema_type` is stored in the records ' ' [yellow]Please ensure that `schema_type` is stored in the records. Note: '
'or that the associated incoming area store has a backend with a ' 'if the incoming area store has a backend with a "Schema Type Layer", i.e., '
'"Schema Type Layer", i.e., "record_dir+stl" or "sqlite+stl"."', '"record_dir+stl" or "sqlite+stl", `schema_type` will not be stored on persistent '
'storage and will not be returned when retrieving records from the incoming area. '
'dump-things-service <= 5.4.0 circumvented the "Schema Type Layer", therefore they '
'will return records without `schema_type` attributes on curator access to '
'incoming areas or curated areas. Therefore it might be a good idea to NOT use a '
'"Schema Type Layer" in collections that shall be auto-curated, when using '
'dump-things-service <= 5.4.0.[/yellow]',
) )
stl_info = True stl_info = True
continue continue
if dry_run: if dry_run:
console.print(f'WRITE record [green]"{record["pid"]}"[/green] of class "{class_name}" to "{destination_collection}@{destination_service_url}"') console.print(f'WRITE record [green]"{record["pid"]}"[/green] of class "{class_name}" to collection "{destination_collection}" on "{destination_service_url}"')
console.print(f'DELETE record [green]"{record["pid"]}"[/green] from inbox "{label}" of "{collection}@{service_url}"') console.print(f'DELETE record [green]"{record["pid"]}"[/green] from inbox "{label}" of collection "{collection}" on "{service_url}"')
continue continue
# Store record in destination collection # Store record in destination collection
@ -264,12 +275,13 @@ def auto_curate(
class_name=class_name, class_name=class_name,
record=record, record=record,
token=destination_token, token=destination_token,
session=session,
) )
except HTTPError as e: except HTTPError as e:
console.print( console.print(
f'[red]Error[/red]: writing record with pid {record["pid"]} failed: {e}: {e.response.text}', f'[red]Error[/red]: writing record with pid {record["pid"]} failed: {e}: {e.response.text}',
) )
raise return 1
# Delete record from incoming area # Delete record from incoming area
try: try:
@ -279,12 +291,13 @@ def auto_curate(
label=label, label=label,
pid=record['pid'], pid=record['pid'],
token=curator_token, token=curator_token,
session=session,
) )
except HTTPError as e: except HTTPError as e:
console.print( console.print(
f'[red]ERROR[/red]: deleting record with pid {record["pid"]} failed: {e}: {e.response.text}', f'[red]ERROR[/red]: deleting record with pid {record["pid"]} failed: {e}: {e.response.text}',
) )
raise return 1
if output is not None: if output is not None:
rprint(json.dumps(output, ensure_ascii=False)) rprint(json.dumps(output, ensure_ascii=False))

View file

@ -4,6 +4,7 @@ import rich_click as click
from ...communicate import ( from ...communicate import (
HTTPError, HTTPError,
get_session,
incoming_delete_record, incoming_delete_record,
incoming_read_records, incoming_read_records,
) )
@ -71,11 +72,13 @@ def clean_incoming(
click.echo('ERROR: token not provided', err=True) click.echo('ERROR: token not provided', err=True)
return 1 return 1
session = get_session()
for record, _, _, _, _ in incoming_read_records( for record, _, _, _, _ in incoming_read_records(
service_url=service_url, service_url=service_url,
collection=collection, collection=collection,
label=inbox_label, label=inbox_label,
token=token, token=token,
session=session,
): ):
if list_only: if list_only:
click.echo(json.dumps(record, ensure_ascii=False)) click.echo(json.dumps(record, ensure_ascii=False))
@ -88,6 +91,6 @@ def clean_incoming(
label=inbox_label, label=inbox_label,
pid=record['pid'], pid=record['pid'],
token=token, token=token,
session=session,
) )
return 0 return 0

View file

@ -4,9 +4,12 @@ import sys
from functools import partial from functools import partial
import rich_click as click import rich_click as click
from rich.progress import track
from rich.console import Console
from ...communicate import ( from ...communicate import (
HTTPError, HTTPError,
get_session,
collection_delete_record, collection_delete_record,
curated_delete_record, curated_delete_record,
incoming_delete_record, incoming_delete_record,
@ -18,6 +21,8 @@ subcommand_name = 'delete-records'
logger = logging.getLogger('delete-records') logger = logging.getLogger('delete-records')
console = Console(file=sys.stderr)
@click.command(short_help='Delete records from a dump-things collection') @click.command(short_help='Delete records from a dump-things collection')
@click.pass_obj @click.pass_obj
@ -83,7 +88,7 @@ def cli(
ignore_errors, ignore_errors,
) )
except HTTPError as e: except HTTPError as e:
click.echo(f'ERROR: {e}: {e.response.text}', err=True) console.print(f'[red]Error[/red]: {e}: {e.response.text}')
return 1 return 1
@ -102,16 +107,15 @@ def delete_records(
click.echo(f'WARNING: no token provided', err=True) click.echo(f'WARNING: no token provided', err=True)
if incoming and curated: if incoming and curated:
click.echo( console.print('[red]Error[/red]: -i/--incoming and -c/--curated are mutually exclusive')
'ERROR: -i/--incoming and -c/--curated are mutually exclusive',
err=True,
)
return 1 return 1
session = get_session()
kwargs = dict( kwargs = dict(
service_url=service_url, service_url=service_url,
collection=collection, collection=collection,
token=token, token=token,
session=session,
) )
if incoming == '-': if incoming == '-':
@ -134,17 +138,18 @@ def delete_records(
if not pids: if not pids:
pids = sys.stdin pids = sys.stdin
for pid in pids: for pid in track(pids, console=console):
try: try:
operation( operation(
service_url=service_url, service_url=service_url,
collection=collection, collection=collection,
pid=pid.strip(), pid=pid.strip(),
token=token, token=token,
session=session,
) )
except HTTPError as e: except HTTPError as e:
console.print(f'[red]Error[/red]: while deleting pid {pid}: {e}, {e.response.text}')
if ignore_errors: if ignore_errors:
click.echo(f'ERROR: while deleting pid {pid}: {e}', err=True)
continue continue
raise return 1
return 0 return 0

View file

@ -1,4 +1,5 @@
import json import json
import sys
from collections import defaultdict from collections import defaultdict
from itertools import count from itertools import count
from pathlib import Path from pathlib import Path
@ -8,10 +9,13 @@ from typing import (
) )
import rich_click as click import rich_click as click
from rich.console import Console
from rich.progress import track
from ...communicate import ( from ...communicate import (
HTTPError, HTTPError,
curated_read_records, curated_read_records,
get_session,
incoming_read_labels, incoming_read_labels,
incoming_read_records, incoming_read_records,
server, server,
@ -20,6 +24,8 @@ from ...communicate import (
subcommand_name = 'export' subcommand_name = 'export'
console = Console(file=sys.stderr)
@click.command(short_help='Export a collection to the file system') @click.command(short_help='Export a collection to the file system')
@click.pass_obj @click.pass_obj
@ -76,9 +82,9 @@ def cli(
ignore_errors, ignore_errors,
) )
except HTTPError as e: except HTTPError as e:
click.echo(f'ERROR: {e}: {e.response.text}', err=True) console.print(f'[red]Error[/red]: {e}: {e.response.text}')
except ValueError as e: except ValueError as e:
click.echo(f'ERROR: {e}', err=True) console.print(f'[red]Error[/red]: {e}')
return 1 return 1
@ -92,14 +98,15 @@ def export(
token = obj token = obj
if token is None: if token is None:
click.echo(f'ERROR: no token provided', err=True) console.print(f'[red]Error[/red]: no token provided')
return 1 return 1
server_info = server(service_url) session = get_session()
server_info = server(service_url, session=session)
collection_info = ([c for c in server_info['collections'] if c['name'] == collection] or None)[0] collection_info = ([c for c in server_info['collections'] if c['name'] == collection] or None)[0]
if not collection_info: if not collection_info:
click.echo(f'ERROR: no collection {collection} on service', err=True) console.print(f'[red]Error[/red]: no collection {collection} on service')
return 1 return 1
description = { description = {
@ -116,6 +123,7 @@ def export(
curated_destination = destination / 'curated' curated_destination = destination / 'curated'
curated_destination.mkdir() curated_destination.mkdir()
console.print('Exporting records from curated area')
_store_records( _store_records(
map( map(
lambda x: x[0], lambda x: x[0],
@ -123,6 +131,7 @@ def export(
service_url=service_url, service_url=service_url,
collection=collection, collection=collection,
token=token, token=token,
session=session,
) )
), ),
curated_destination, curated_destination,
@ -134,7 +143,9 @@ def export(
service_url=service_url, service_url=service_url,
collection=collection, collection=collection,
token=token, token=token,
session=session,
): ):
console.print(f'Exporting records from incoming area: {label}')
incoming_destination = destination / 'incoming' / label incoming_destination = destination / 'incoming' / label
incoming_destination.mkdir(parents=True, exist_ok=False) incoming_destination.mkdir(parents=True, exist_ok=False)
_store_records( _store_records(
@ -145,6 +156,7 @@ def export(
collection=collection, collection=collection,
label=label, label=label,
token=token, token=token,
session=session,
) )
), ),
incoming_destination, incoming_destination,
@ -162,16 +174,13 @@ def _store_records(
created_dirs = set() created_dirs = set()
class_counters = defaultdict(count) class_counters = defaultdict(count)
for record in source: for record in track(source, console=console):
class_name = _de_prefix(record.get('schema_type', None)) class_name = _de_prefix(record.get('schema_type', None))
if class_name is None: if class_name is None:
if ignore_errors: if ignore_errors:
click.echo( console.print(f'[red]Error[/red]: no `schema type` in record {record["pid"]}')
f'WARNING: no `schema_type` in record `{record["pid"]}`',
err=True
)
continue continue
msg = f'no `schema_type` in record `{record["pid"]}`' msg = f'no `schema_type` in record {record["pid"]}'
raise ValueError(msg) raise ValueError(msg)
next_name_for_class = f'{next(class_counters[class_name]):09d}.json' next_name_for_class = f'{next(class_counters[class_name]):09d}.json'

View file

@ -1,7 +1,9 @@
import json import json
import sys
from functools import partial from functools import partial
import rich_click as click import rich_click as click
from rich.console import Console
from ...communicate import ( from ...communicate import (
HTTPError, HTTPError,
@ -11,6 +13,7 @@ from ...communicate import (
curated_read_records, curated_read_records,
curated_read_records_of_class, curated_read_records_of_class,
curated_read_record_with_pid, curated_read_record_with_pid,
get_session,
incoming_read_labels, incoming_read_labels,
incoming_read_records, incoming_read_records,
incoming_read_records_of_class, incoming_read_records_of_class,
@ -20,6 +23,8 @@ from ...communicate import (
subcommand_name = 'get-records' subcommand_name = 'get-records'
console = Console(file=sys.stderr)
@click.command(short_help='Get records from a dump-things collection') @click.command(short_help='Get records from a dump-things collection')
@click.pass_obj @click.pass_obj
@ -48,7 +53,7 @@ subcommand_name = 'get-records'
@click.option( @click.option(
'--incoming', '-i', '--incoming', '-i',
metavar='LABEL', metavar='LABEL',
help='read from the collection\'s inbox with label LABEL, if LABEL is "-", return labels of all collection inboxes and exit', help='read from the collection\'s inbox with label LABEL, if LABEL is "-", print labels of all collection inboxes and exit',
) )
@click.option( @click.option(
'--curated', '-c', '--curated', '-c',
@ -144,7 +149,7 @@ def cli(
pagination, pagination,
) )
except HTTPError as e: except HTTPError as e:
click.echo(f'ERROR: {e}: {e.response.text}', err=True) console.print(f'[red]Error[/red]: {e}: {e.response.text}')
return 1 return 1
@ -167,19 +172,18 @@ def get_records(
token = obj token = obj
if token is None: if token is None:
click.echo(f'WARNING: no token provided', err=True) console.print(f'[yellow]Warning[/yellow]: no token provided')
if incoming and curated: if incoming and curated:
click.echo( console.print('[red]Error[/red]: -i/--incoming and -c/--curated are mutually exclusive')
'ERROR: -i/--incoming and -c/--curated are mutually exclusive',
err=True,
)
return 1 return 1
session = get_session()
kwargs = dict( kwargs = dict(
service_url=service_url, service_url=service_url,
collection=collection, collection=collection,
token=token, token=token,
session=session,
) )
if incoming == '-': if incoming == '-':
@ -187,7 +191,9 @@ def get_records(
click.echo('\n'.join( click.echo('\n'.join(
map( map(
partial(json.dumps, ensure_ascii=False), partial(json.dumps, ensure_ascii=False),
result))) result
)
))
return 0 return 0
elif pid: elif pid:
@ -251,8 +257,8 @@ def get_records(
if pagination: if pagination:
for record in result: for record in result:
print(json.dumps(record, ensure_ascii=False)) click.echo(json.dumps(record, ensure_ascii=False))
else: else:
for record in result: for record in result:
print(json.dumps(record[0], ensure_ascii=False)) click.echo(json.dumps(record[0], ensure_ascii=False))
return 0 return 0

View file

@ -1,15 +1,20 @@
import logging import logging
import sys
import rich_click as click import rich_click as click
from rich.console import Console
from ...communicate import ( from ...communicate import (
HTTPError, HTTPError,
get_session,
maintenance as communicate_maintenance, maintenance as communicate_maintenance,
) )
logger = logging.getLogger('maintenance') logger = logging.getLogger('maintenance')
console = Console(file=sys.stderr)
subcommand_name = 'maintenance' subcommand_name = 'maintenance'
@ -61,13 +66,15 @@ def maintenance(
): ):
token = obj token = obj
if token is None: if token is None:
click.echo('ERROR: no token provided', err=True) console.print('[red]Error[/red]: no token provided')
return 1 return 1
session = get_session()
communicate_maintenance( communicate_maintenance(
service_url=service_url, service_url=service_url,
collection=collection, collection=collection,
active=active, active=active,
token=token, token=token,
session=session,
) )
return 0 return 0

View file

@ -7,11 +7,11 @@ import rich_click as click
from rich.console import Console from rich.console import Console
from rich.progress import track from rich.progress import track
from .auto_curate import console
from ...communicate import ( from ...communicate import (
HTTPError, HTTPError,
curated_write_record, curated_write_record,
collection_write_record, collection_write_record,
get_session,
) )
@ -95,6 +95,7 @@ def post_records(
else: else:
write_record = collection_write_record write_record = collection_write_record
session = get_session()
for index, line in zip(count(), track(sys.stdin, console=console)): for index, line in zip(count(), track(sys.stdin, console=console)):
try: try:
@ -112,6 +113,7 @@ def post_records(
class_name=cls, class_name=cls,
record=record, record=record,
token=token, token=token,
session=session,
) )
except HTTPError as e: except HTTPError as e:
console.print( console.print(

View file

@ -1,16 +1,21 @@
import json import json
import logging import logging
import sys
import rich_click as click import rich_click as click
from rich.console import Console
from ...communicate import ( from ...communicate import (
HTTPError, HTTPError,
get_paginated, get_paginated,
get_session,
) )
logger = logging.getLogger('read-pages') logger = logging.getLogger('read-pages')
console = Console(file=sys.stderr)
@click.command(short_help='Read records from paginated dump-things endpoints') @click.command(short_help='Read records from paginated dump-things endpoints')
@click.pass_obj @click.pass_obj
@ -106,8 +111,9 @@ def read_pages(
token = obj token = obj
if token is None: if token is None:
click.echo(f'WARNING: no token provided', err=True) console.print(f'[yellow]Warning[/yellow]: no token provided')
session = get_session()
result = get_paginated( result = get_paginated(
url=url, url=url,
token=token, token=token,
@ -121,7 +127,8 @@ def read_pages(
if matching is not None if matching is not None
else {} else {}
), ),
} },
session=session,
) )
if stats: if stats:

View file

@ -9,6 +9,7 @@ from typing import (
) )
import requests import requests
from requests import Session
from requests.exceptions import HTTPError from requests.exceptions import HTTPError
from . import JSON from . import JSON
@ -17,6 +18,7 @@ from . import JSON
__all__ = [ __all__ = [
'HTTPError', 'HTTPError',
'JSON', 'JSON',
'get_session',
'get_paginated', 'get_paginated',
'get', 'get',
'collection_get_classes', 'collection_get_classes',
@ -45,12 +47,21 @@ __all__ = [
logger = logging.getLogger('dump_things_pyclient') logger = logging.getLogger('dump_things_pyclient')
def get_session() -> Session:
"""Return a session that can be used to reuse connections
:return: a Session-object that can be passed to most functions in this module
"""
return requests.Session()
def get_paginated(url: str, def get_paginated(url: str,
token: str | None = None, token: str | None = None,
first_page: int = 1, first_page: int = 1,
page_size: int = 100, page_size: int = 100,
last_page: int | None = None, last_page: int | None = None,
parameters: dict[str, str] | None = None, parameters: dict[str, str] | None = None,
session: Session | None = None,
) -> Generator[tuple[JSON, int, int, int, int], None, None]: ) -> Generator[tuple[JSON, int, int, int, int], None, None]:
"""Read all records from a paginated endpoint """Read all records from a paginated endpoint
@ -62,6 +73,7 @@ def get_paginated(url: str,
:param last_page: [optional] last page to return (default: None (return all pages)) :param last_page: [optional] last page to return (default: None (return all pages))
:param parameters: [optional] parameters to pass to the endpoint, the :param parameters: [optional] parameters to pass to the endpoint, the
parameter `page` is set automatically in this function parameter `page` is set automatically in this function
:param session: [optional] if set it will be used for making requests
:return: a Generator yielding tuples containing the current record, the :return: a Generator yielding tuples containing the current record, the
current page number, the total number of pages, the size of the pages, current page number, the total number of pages, the size of the pages,
@ -72,7 +84,7 @@ def get_paginated(url: str,
return return
for page in count(start=first_page): for page in count(start=first_page):
result = _get_page(url, token, first_page=page, page_size=page_size, parameters=parameters) result = _get_page(url, token, first_page=page, page_size=page_size, parameters=parameters, session=session)
total_pages, page_size, total_items = result['pages'], result['size'], result['total'] total_pages, page_size, total_items = result['pages'], result['size'], result['total']
if total_pages == 0: if total_pages == 0:
return return
@ -90,6 +102,7 @@ def get_paginated(url: str,
def get(url: str, def get(url: str,
token: str | None = None, token: str | None = None,
parameters: dict[str, str] | None = None, parameters: dict[str, str] | None = None,
session: Session | None = None,
) -> JSON: ) -> JSON:
"""Read JSON object from a non-paginated endpoint """Read JSON object from a non-paginated endpoint
@ -97,14 +110,16 @@ def get(url: str,
:param token: [optional] if str: token to authenticate against the endpoint, :param token: [optional] if str: token to authenticate against the endpoint,
if None: no token will be sent to the endpoint if None: no token will be sent to the endpoint
:param parameters: [optional] parameters to pass to the endpoint :param parameters: [optional] parameters to pass to the endpoint
:param session: [optional] if set it will be used for making requests
:return: JSON object :return: JSON object
""" """
return _get_from_url(url, token, parameters) return _get_from_url(url, token, parameters, session)
def collection_get_classes(service_url: str, def collection_get_classes(service_url: str,
collection: str, collection: str,
session: Session | None = None,
) -> Generator[str, None, None]: ) -> Generator[str, None, None]:
"""Read classes that are supported by the collection """Read classes that are supported by the collection
@ -114,12 +129,17 @@ def collection_get_classes(service_url: str,
:param service_url: the base URL of the service, i.e., the URL up to :param service_url: the base URL of the service, i.e., the URL up to
`/<collection>/...` or `/server` `/<collection>/...` or `/server`
:param collection: the name of the collection :param collection: the name of the collection
:param session: [optional] if set it will be used for making requests
:return: a generator yielding names of the supported classes :return: a generator yielding names of the supported classes
""" """
service_url = f'{service_url[:-1]}' if service_url.endswith('/') else service_url service_url = f'{service_url[:-1]}' if service_url.endswith('/') else service_url
matcher = re.compile(f'/{collection}/record/([A-Z][_a-zA-Z0-9]*)$') matcher = re.compile(f'/{collection}/record/([A-Z][_a-zA-Z0-9]*)$')
open_api_spec = _get_from_url(service_url + '/openapi.json', None) open_api_spec = _get_from_url(
service_url + '/openapi.json',
token=None,
session=session,
)
for path in open_api_spec['paths']: for path in open_api_spec['paths']:
match = matcher.match(path) match = matcher.match(path)
if match: if match:
@ -131,6 +151,7 @@ def collection_read_record_with_pid(service_url: str,
pid: str, pid: str,
format: str = 'json', format: str = 'json',
token: str | None = None, token: str | None = None,
session: Session | None = None,
) -> dict | None: ) -> dict | None:
"""Read record with the given pid from the collection on the service """Read record with the given pid from the collection on the service
@ -146,13 +167,16 @@ def collection_read_record_with_pid(service_url: str,
either `json` or `ttl` either `json` or `ttl`
:param token: [optional] if set, a token to authenticate against :param token: [optional] if set, a token to authenticate against
the endpoint, if None: no token will be sent to the endpoint the endpoint, if None: no token will be sent to the endpoint
:param session: [optional] if set it will be used for making requests
:return: The record, if it exists, None otherwise. :return: The record, if it exists, None otherwise.
""" """
return get( return get(
url=_build_url(service_url, collection, 'record'), url=_build_url(service_url, collection, 'record'),
token=token, token=token,
parameters={'pid': pid, 'format': format}) parameters={'pid': pid, 'format': format},
session=session,
)
def collection_read_records(service_url: str, def collection_read_records(service_url: str,
@ -163,6 +187,7 @@ def collection_read_records(service_url: str,
page: int = 1, page: int = 1,
size: int = 100, size: int = 100,
last_page: int | None = None, last_page: int | None = None,
session: Session | None = None,
) -> Generator[tuple[dict, int, int, int, int], None, None]: ) -> Generator[tuple[dict, int, int, int, int], None, None]:
"""Read records from the collection on the service """Read records from the collection on the service
@ -179,6 +204,7 @@ def collection_read_records(service_url: str,
:param size: int: the number of records in an individual pages (default: 100) :param size: int: the number of records in an individual pages (default: 100)
:param last_page: int | None: if int, the last page that should be returned :param last_page: int | None: if int, the last page that should be returned
if None, all pages following `page` will be returned if None, all pages following `page` will be returned
:param session: [optional] if set it will be used for making requests
:return: A generator yielding tuples containing: the current record, the :return: A generator yielding tuples containing: the current record, the
current page number, the total number of pages, the size of the current page number, the total number of pages, the size of the
@ -192,7 +218,9 @@ def collection_read_records(service_url: str,
last_page=last_page, last_page=last_page,
parameters= { parameters= {
'format': format, 'format': format,
**({'matching': matching} if matching else {})}) **({'matching': matching} if matching else {})},
session=session,
)
def collection_read_records_of_class( def collection_read_records_of_class(
@ -205,6 +233,7 @@ def collection_read_records_of_class(
page: int = 1, page: int = 1,
size: int = 100, size: int = 100,
last_page: int | None = None, last_page: int | None = None,
session: Session | None = None,
) -> Generator[tuple[dict, int, int, int, int], None, None]: ) -> Generator[tuple[dict, int, int, int, int], None, None]:
"""Read records of the specified class from the collection on the service """Read records of the specified class from the collection on the service
@ -222,6 +251,7 @@ def collection_read_records_of_class(
:param size: int: the number of records in an individual pages (default: 100) :param size: int: the number of records in an individual pages (default: 100)
:param last_page: int | None: if int, the last page that should be returned :param last_page: int | None: if int, the last page that should be returned
if None, all pages following `page` will be returned if None, all pages following `page` will be returned
:param session: [optional] if set it will be used for making requests
:return: A generator yielding tuples containing: the current record, the :return: A generator yielding tuples containing: the current record, the
current page number, the total number of pages, the size of the current page number, the total number of pages, the size of the
@ -235,7 +265,9 @@ def collection_read_records_of_class(
last_page=last_page, last_page=last_page,
parameters= { parameters= {
'format': format, 'format': format,
**({'matching': matching} if matching else {})}) **({'matching': matching} if matching else {})},
session=session,
)
def collection_write_record( def collection_write_record(
@ -245,6 +277,7 @@ def collection_write_record(
record: dict | str, record: dict | str,
format: str = 'json', format: str = 'json',
token: str | None = None, token: str | None = None,
session: Session | None = None,
) -> list[JSON]: ) -> list[JSON]:
"""Write a record of the specified class to an inbox in the collection on the service """Write a record of the specified class to an inbox in the collection on the service
@ -258,6 +291,7 @@ def collection_write_record(
:param token: [optional] if set, a token to authenticate against :param token: [optional] if set, a token to authenticate against
the endpoint, if None: no token will be sent to the endpoint the endpoint, if None: no token will be sent to the endpoint
The token must have write access to incoming area in the collection The token must have write access to incoming area in the collection
:param session: [optional] if set it will be used for making requests
:return list[JSON]: a list of records that was written. There might be more :return list[JSON]: a list of records that was written. There might be more
than one record due to inlined-relations extraction. The individual than one record due to inlined-relations extraction. The individual
@ -268,7 +302,9 @@ def collection_write_record(
url=_build_url(service_url, collection, f'record/{class_name}'), url=_build_url(service_url, collection, f'record/{class_name}'),
token=token, token=token,
params={'format': format}, params={'format': format},
**(dict(json=record) if format == 'json' else dict(data=record))) session=session,
**(dict(json=record) if format == 'json' else dict(data=record)),
)
def collection_validate_record( def collection_validate_record(
@ -278,6 +314,7 @@ def collection_validate_record(
record: dict | str, record: dict | str,
format: str = 'json', format: str = 'json',
token: str | None = None, token: str | None = None,
session: Session | None = None,
) -> list[JSON]: ) -> list[JSON]:
"""Validate a record of the specified class in the collection on the service """Validate a record of the specified class in the collection on the service
@ -294,6 +331,7 @@ def collection_validate_record(
:param token: [optional] if set, a token to authenticate against :param token: [optional] if set, a token to authenticate against
the endpoint, if None: no token will be sent to the endpoint the endpoint, if None: no token will be sent to the endpoint
The token must have write access to incoming area in the collection The token must have write access to incoming area in the collection
:param session: [optional] if set it will be used for making requests
:return: True :return: True
""" """
@ -302,7 +340,9 @@ def collection_validate_record(
url=_build_url(service_url, collection, f'validate/{class_name}'), url=_build_url(service_url, collection, f'validate/{class_name}'),
token=token, token=token,
params={'format': format}, params={'format': format},
**(dict(json=record) if format == 'json' else dict(data=record))) session=session,
**(dict(json=record) if format == 'json' else dict(data=record)),
)
def collection_delete_record( def collection_delete_record(
@ -310,6 +350,7 @@ def collection_delete_record(
collection: str, collection: str,
pid: str, pid: str,
token: str | None = None, token: str | None = None,
session: Session | None = None,
) -> bool: ) -> bool:
"""Delete the record with the given pid from the collection on the service """Delete the record with the given pid from the collection on the service
@ -319,19 +360,23 @@ def collection_delete_record(
:param pid: the PID of the record that should be deleted :param pid: the PID of the record that should be deleted
:param token: [optional] if set, a token to authenticate against :param token: [optional] if set, a token to authenticate against
the endpoint, if None: no token will be sent to the endpoint the endpoint, if None: no token will be sent to the endpoint
:param session: [optional] if set it will be used for making requests
:return: True if the record was deleted, False otherwise :return: True if the record was deleted, False otherwise
""" """
return _delete_url( return _delete_url(
url=_build_url(service_url, collection, 'record'), url=_build_url(service_url, collection, 'record'),
token=token, token=token,
params={'pid': pid}) params={'pid': pid},
session=session,
)
def curated_read_record_with_pid(service_url: str, def curated_read_record_with_pid(service_url: str,
collection: str, collection: str,
pid: str, pid: str,
token: str | None = None, token: str | None = None,
session: Session | None = None,
) -> dict | None: ) -> dict | None:
"""Read record with the given pid from curated area of the collection on the service """Read record with the given pid from curated area of the collection on the service
@ -345,13 +390,16 @@ def curated_read_record_with_pid(service_url: str,
:param token: [optional] if set, a token to authenticate against :param token: [optional] if set, a token to authenticate against
the endpoint, if None: no token will be sent to the endpoint. A the endpoint, if None: no token will be sent to the endpoint. A
token must have curator-rights token must have curator-rights
:param session: [optional] if set it will be used for making requests
:return: The record, if it exists, None otherwise :return: The record, if it exists, None otherwise
""" """
return get( return get(
url=_build_url(service_url, collection, 'curated/record'), url=_build_url(service_url, collection, 'curated/record'),
token=token, token=token,
parameters={'pid': pid}) parameters={'pid': pid},
session=session,
)
def curated_read_records(service_url: str, def curated_read_records(service_url: str,
@ -361,6 +409,7 @@ def curated_read_records(service_url: str,
page: int = 1, page: int = 1,
size: int = 100, size: int = 100,
last_page: int | None = None, last_page: int | None = None,
session: Session | None = None,
) -> Generator[tuple[dict, int, int, int, int], None, None]: ) -> Generator[tuple[dict, int, int, int, int], None, None]:
"""Read records from the curated area the collection on the service """Read records from the curated area the collection on the service
@ -379,6 +428,7 @@ def curated_read_records(service_url: str,
:param size: int: the number of records in an individual pages (default: 100) :param size: int: the number of records in an individual pages (default: 100)
:param last_page: int | None: if int, the last page that should be returned :param last_page: int | None: if int, the last page that should be returned
if None, all pages following `page` will be returned if None, all pages following `page` will be returned
:param session: [optional] if set it will be used for making requests
:return: A generator yielding tuples containing: the current record, the :return: A generator yielding tuples containing: the current record, the
current page number, the total number of pages, the size of the current page number, the total number of pages, the size of the
@ -390,7 +440,9 @@ def curated_read_records(service_url: str,
first_page=page, first_page=page,
page_size=size, page_size=size,
last_page=last_page, last_page=last_page,
parameters={'matching': matching} if matching else {}) parameters={'matching': matching} if matching else {},
session=session,
)
def curated_read_records_of_class( def curated_read_records_of_class(
@ -402,6 +454,7 @@ def curated_read_records_of_class(
page: int = 1, page: int = 1,
size: int = 100, size: int = 100,
last_page: int | None = None, last_page: int | None = None,
session: Session | None = None,
) -> Generator[tuple[dict, int, int, int, int], None, None]: ) -> Generator[tuple[dict, int, int, int, int], None, None]:
"""Read records of class `class_name` from the curated area the collection on the service """Read records of class `class_name` from the curated area the collection on the service
@ -421,6 +474,7 @@ def curated_read_records_of_class(
:param size: int: the number of records in an individual pages (default: 100) :param size: int: the number of records in an individual pages (default: 100)
:param last_page: int | None: if int, the last page that should be returned :param last_page: int | None: if int, the last page that should be returned
if None, all pages following `page` will be returned if None, all pages following `page` will be returned
:param session: [optional] if set it will be used for making requests
:return: A generator yielding tuples containing: the current record, the :return: A generator yielding tuples containing: the current record, the
current page number, the total number of pages, the size of the current page number, the total number of pages, the size of the
@ -432,7 +486,9 @@ def curated_read_records_of_class(
first_page=page, first_page=page,
page_size=size, page_size=size,
last_page=last_page, last_page=last_page,
parameters={'matching': matching} if matching else {}) parameters={'matching': matching} if matching else {},
session=session,
)
def curated_write_record( def curated_write_record(
@ -441,6 +497,7 @@ def curated_write_record(
class_name: str, class_name: str,
record: dict, record: dict,
token: str | None = None, token: str | None = None,
session: Session | None = None,
) -> list[JSON]: ) -> list[JSON]:
"""Write a record of the specified class to the curated area of the collection on the service """Write a record of the specified class to the curated area of the collection on the service
@ -456,13 +513,16 @@ def curated_write_record(
:param token: [optional] if set, a token to authenticate against :param token: [optional] if set, a token to authenticate against
the endpoint, if None: no token will be sent to the endpoint the endpoint, if None: no token will be sent to the endpoint
A given token must have curator-rights for the collection A given token must have curator-rights for the collection
:param session: [optional] if set it will be used for making requests
:return list[JSON]: a list containing the record that was written :return list[JSON]: a list containing the record that was written
""" """
return _post_to_url( return _post_to_url(
url=_build_url(service_url, collection, f'curated/record/{class_name}'), url=_build_url(service_url, collection, f'curated/record/{class_name}'),
token=token, token=token,
json=record) session=session,
json=record,
)
def curated_delete_record( def curated_delete_record(
@ -470,6 +530,7 @@ def curated_delete_record(
collection: str, collection: str,
pid: str, pid: str,
token: str | None = None, token: str | None = None,
session: Session | None = None,
) -> bool: ) -> bool:
"""Delete the record with the given pid from the curated area of the collection on the service """Delete the record with the given pid from the curated area of the collection on the service
@ -480,17 +541,22 @@ def curated_delete_record(
:param token: [optional] if set, a token to authenticate against :param token: [optional] if set, a token to authenticate against
the endpoint, if None: no token will be sent to the endpoint the endpoint, if None: no token will be sent to the endpoint
A given token must have curator-rights for the collection A given token must have curator-rights for the collection
:param session: [optional] if set it will be used for making requests
:return: True if the record was deleted, False otherwise :return: True if the record was deleted, False otherwise
""" """
return _delete_url( return _delete_url(
url=_build_url(service_url, collection, 'curated/record'), url=_build_url(service_url, collection, 'curated/record'),
token=token, token=token,
params={'pid': pid}) params={'pid': pid},
session=session,
)
def incoming_read_labels(service_url: str, def incoming_read_labels(service_url: str,
collection: str, collection: str,
token: str | None = None, token: str | None = None,
session: Session | None = None,
) -> Generator[str, None, None]: ) -> Generator[str, None, None]:
"""Read all incoming labels for the collection on the service. """Read all incoming labels for the collection on the service.
@ -500,12 +566,15 @@ def incoming_read_labels(service_url: str,
:param token: [optional] if set, a token to authenticate against :param token: [optional] if set, a token to authenticate against
the endpoint, if None: no token will be sent to the endpoint the endpoint, if None: no token will be sent to the endpoint
A given token must have curator-rights for the collection A given token must have curator-rights for the collection
:param session: [optional] if set it will be used for making requests
:return: list[str]: a list of incoming area labels :return: list[str]: a list of incoming area labels
""" """
yield from _get_from_url( yield from _get_from_url(
url=_build_url(service_url, collection,'incoming/'), url=_build_url(service_url, collection,'incoming/'),
token=token) token=token,
session=session,
)
def incoming_read_record_with_pid(service_url: str, def incoming_read_record_with_pid(service_url: str,
@ -513,6 +582,7 @@ def incoming_read_record_with_pid(service_url: str,
label: str, label: str,
pid: str, pid: str,
token: str | None = None, token: str | None = None,
session: Session | None = None,
) -> dict | None: ) -> dict | None:
"""Read record with the given pid from the specified incoming area of the collection on the service """Read record with the given pid from the specified incoming area of the collection on the service
@ -527,13 +597,16 @@ def incoming_read_record_with_pid(service_url: str,
:param token: [optional] if set, a token to authenticate against :param token: [optional] if set, a token to authenticate against
the endpoint, if None: no token will be sent to the endpoint. A the endpoint, if None: no token will be sent to the endpoint. A
token must have curator-rights token must have curator-rights
:param session: [optional] if set it will be used for making requests
:return: The record, if it exists, None otherwise :return: The record, if it exists, None otherwise
""" """
return get( return get(
url=_build_incoming_url(service_url, collection, label, 'record'), url=_build_incoming_url(service_url, collection, label, 'record'),
token=token, token=token,
parameters={'pid': pid}) parameters={'pid': pid},
session=session,
)
def incoming_read_records(service_url: str, def incoming_read_records(service_url: str,
@ -544,6 +617,7 @@ def incoming_read_records(service_url: str,
page: int = 1, page: int = 1,
size: int = 100, size: int = 100,
last_page: int | None = None, last_page: int | None = None,
session: Session | None = None,
) -> Generator[tuple[dict, int, int, int, int], None, None]: ) -> Generator[tuple[dict, int, int, int, int], None, None]:
"""Read records from the specified incoming area the collection on the service """Read records from the specified incoming area the collection on the service
@ -563,6 +637,7 @@ def incoming_read_records(service_url: str,
:param size: int: the number of records in an individual pages (default: 100) :param size: int: the number of records in an individual pages (default: 100)
:param last_page: int | None: if int, the last page that should be returned :param last_page: int | None: if int, the last page that should be returned
if None, all pages following `page` will be returned if None, all pages following `page` will be returned
:param session: [optional] if set it will be used for making requests
:return: A generator yielding tuples containing: the current record, the :return: A generator yielding tuples containing: the current record, the
current page number, the total number of pages, the size of the current page number, the total number of pages, the size of the
@ -574,7 +649,9 @@ def incoming_read_records(service_url: str,
first_page=page, first_page=page,
page_size=size, page_size=size,
last_page=last_page, last_page=last_page,
parameters={'matching': matching} if matching else {}) parameters={'matching': matching} if matching else {},
session=session,
)
def incoming_read_records_of_class( def incoming_read_records_of_class(
@ -587,6 +664,7 @@ def incoming_read_records_of_class(
page: int = 1, page: int = 1,
size: int = 100, size: int = 100,
last_page: int | None = None, last_page: int | None = None,
session: Session | None = None,
) -> Generator[tuple[dict, int, int, int, int], None, None]: ) -> Generator[tuple[dict, int, int, int, int], None, None]:
"""Read records of the specified class from the specified incoming area the collection on the service """Read records of the specified class from the specified incoming area the collection on the service
@ -607,6 +685,7 @@ def incoming_read_records_of_class(
:param size: int: the number of records in an individual pages (default: 100) :param size: int: the number of records in an individual pages (default: 100)
:param last_page: int | None: if int, the last page that should be returned :param last_page: int | None: if int, the last page that should be returned
if None, all pages following `page` will be returned if None, all pages following `page` will be returned
:param session: [optional] if set it will be used for making requests
:return: A generator yielding tuples containing: the current record, the :return: A generator yielding tuples containing: the current record, the
current page number, the total number of pages, the size of the current page number, the total number of pages, the size of the
@ -618,7 +697,9 @@ def incoming_read_records_of_class(
first_page=page, first_page=page,
page_size=size, page_size=size,
last_page=last_page, last_page=last_page,
parameters={'matching': matching} if matching else {}) parameters={'matching': matching} if matching else {},
session=session,
)
def incoming_write_record( def incoming_write_record(
@ -628,6 +709,7 @@ def incoming_write_record(
class_name: str, class_name: str,
record: dict, record: dict,
token: str | None = None, token: str | None = None,
session: Session | None = None,
) -> list[JSON]: ) -> list[JSON]:
"""Write a record of the specified class to the specified incoming area of the collection on the service """Write a record of the specified class to the specified incoming area of the collection on the service
@ -644,13 +726,16 @@ def incoming_write_record(
:param token: [optional] if set, a token to authenticate against :param token: [optional] if set, a token to authenticate against
the endpoint, if None: no token will be sent to the endpoint the endpoint, if None: no token will be sent to the endpoint
A given token must have curator-rights for the collection A given token must have curator-rights for the collection
:param session: [optional] if set it will be used for making requests
:return list[JSON]: a list containing the record that was written :return list[JSON]: a list containing the record that was written
""" """
return _post_to_url( return _post_to_url(
url=_build_incoming_url(service_url, collection, label, f'record/{class_name}'), url=_build_incoming_url(service_url, collection, label, f'record/{class_name}'),
token=token, token=token,
json=record) json=record,
session=session,
)
def incoming_delete_record( def incoming_delete_record(
@ -659,6 +744,7 @@ def incoming_delete_record(
label: str, label: str,
pid: str, pid: str,
token: str | None = None, token: str | None = None,
session: Session | None = None,
) -> bool: ) -> bool:
"""Delete the record with the given pid from the specified incoming area of the collection on the service """Delete the record with the given pid from the specified incoming area of the collection on the service
@ -670,22 +756,27 @@ def incoming_delete_record(
:param token: [optional] if set, a token to authenticate against :param token: [optional] if set, a token to authenticate against
the endpoint, if None: no token will be sent to the endpoint the endpoint, if None: no token will be sent to the endpoint
A given token must have curator-rights for the collection A given token must have curator-rights for the collection
:param session: [optional] if set, it will be used for requests
:return: True if the record was deleted, False otherwise :return: True if the record was deleted, False otherwise
""" """
return _delete_url( return _delete_url(
url=_build_incoming_url(service_url, collection, label,'record'), url=_build_incoming_url(service_url, collection, label,'record'),
token=token, token=token,
params={'pid': pid}) params={'pid': pid},
session=session,
)
def server( def server(
service_url: str, service_url: str,
session: Session | None = None,
) -> JSON: ) -> JSON:
"""Get server-information from the service """Get server-information from the service
:param service_url: the base URL of the service, i.e., the URL up to :param service_url: the base URL of the service, i.e., the URL up to
`/<collection>/...` or `/server` `/<collection>/...` or `/server`
:param session: an optional requests.Session object to use for making requests
:return: information returned by the `<service_url>/server` endpoint :return: information returned by the `<service_url>/server` endpoint
""" """
@ -693,7 +784,8 @@ def server(
(f'{service_url[:-1]}' if service_url.endswith('/') else service_url) (f'{service_url[:-1]}' if service_url.endswith('/') else service_url)
+ '/server' + '/server'
) )
return _do_request(requests.get, url=url, token=None, params=None) method = session.get if session else requests.get
return _do_request(method, url=url, token=None, params=None)
def maintenance( def maintenance(
@ -701,6 +793,7 @@ def maintenance(
collection: str, collection: str,
active: bool, active: bool,
token: str, token: str,
session: Session | None = None,
) -> None: ) -> None:
"""Activate or deactivate maintenance mode of a collection """Activate or deactivate maintenance mode of a collection
@ -711,6 +804,7 @@ def maintenance(
non-active (`False`). non-active (`False`).
:param token: a token to authenticate against the endpoint, the token :param token: a token to authenticate against the endpoint, the token
must have curator-rights for the collection must have curator-rights for the collection
:param session: an optional requests.Session object to use for making requests
""" """
url = ( url = (
(f'{service_url[:-1]}' if service_url.endswith('/') else service_url) (f'{service_url[:-1]}' if service_url.endswith('/') else service_url)
@ -719,30 +813,37 @@ def maintenance(
_post_to_url( _post_to_url(
url=url, url=url,
token=token, token=token,
json={'collection': collection, 'active': active} session=session,
json={'collection': collection, 'active': active},
) )
def _get_from_url(url: str, def _get_from_url(url: str,
token: str | None, token: str | None,
params: dict[str, str] | None = None, params: dict[str, str] | None = None,
session: Session | None = None,
) -> JSON: ) -> JSON:
return _do_request(requests.get, url, token, params=params) method = session.get if session else requests.get
return _do_request(method, url, token, params=params)
def _post_to_url(url: str, def _post_to_url(url: str,
token: str | None, token: str | None,
params: dict[str, str] | None = None, params: dict[str, str] | None = None,
session: Session | None = None,
**kwargs, **kwargs,
) -> JSON: ) -> JSON:
return _do_request(requests.post, url, token, params, **kwargs) method = session.post if session else requests.post
return _do_request(method, url, token, params=params, **kwargs)
def _delete_url(url: str, def _delete_url(url: str,
token: str | None, token: str | None,
params: dict[str, str] | None = None, params: dict[str, str] | None = None,
session: Session | None = None,
) -> JSON: ) -> JSON:
return _do_request(requests.delete, url, token, params=params) method = session.delete if session else requests.delete
return _do_request(method, url, token, params=params)
def _do_request(method: Callable, def _do_request(method: Callable,
@ -784,11 +885,12 @@ def _get_page(url_base: str,
first_page: int = 1, first_page: int = 1,
page_size: int = 100, page_size: int = 100,
parameters: dict | None = None, parameters: dict | None = None,
session: Session | None = None,
) -> JSON: ) -> JSON:
parameters = parameters or {} parameters = parameters or {}
parameters['page'] = first_page parameters['page'] = first_page
parameters['size'] = page_size parameters['size'] = page_size
return _get_from_url(url_base, token, parameters) return _get_from_url(url_base, token, parameters, session)
def _check_format_value(format: str) -> None: def _check_format_value(format: str) -> None: