add --json-error-messages option #32

Merged
cmo merged 15 commits from json-error-output into master 2026-02-11 10:16:06 +00:00
12 changed files with 365 additions and 191 deletions

View file

@ -5,6 +5,12 @@
- Add `dtc` subcommand `version`. `version ` prints the version of `dtc` and
exits.
- Add the option `--json-error-messages` to `dtc` subcommands: `export`,
`import`, and `delete-records`.
- Ensure that all `dtc` subcommands return an non-zero exit status on error.
## Improvements
- Improve help messages of `dtc` subcommands `delete-records` and `import`.

View file

@ -111,7 +111,8 @@ def cli(
A token is required and will be used to authenticate the requests.
The token must have curator-rights."""
try:
return auto_curate(
sys.exit(
auto_curate(
obj,
service_url,
collection,
@ -125,11 +126,12 @@ def cli(
list_records,
dry_run,
)
)
except HTTPError as e:
console.print(
f'[red]Error[/red]: {e}: {e.response.text}',
)
return 1
sys.exit(1)
def auto_curate(
@ -278,7 +280,7 @@ def auto_curate(
)
except HTTPError as e:
console.print(
f'[red]Error[/red]: writing record with pid {record["pid"]} failed: {e}: {e.response.text}',
f'[red]Error[/red]: writing record with pid [green]{record["pid"]}[/green] failed: {e}: {e.response.text}',
)
return 1
@ -294,7 +296,7 @@ def auto_curate(
)
except HTTPError as e:
console.print(
f'[red]ERROR[/red]: deleting record with pid {record["pid"]} failed: {e}: {e.response.text}',
f'[red]Error[/red]: deleting record with pid [green]{record["pid"]}[/green] failed: {e}: {e.response.text}',
)
return 1

View file

@ -1,5 +1,7 @@
import json
import sys
from rich.console import Console
import rich_click as click
from ...communicate import (
@ -12,6 +14,8 @@ from ...communicate import (
subcommand_name = 'clean-incoming'
console = Console(file=sys.stderr)
@click.command(short_help='Remove records from an inbox of a dump-things collection')
@click.pass_obj
@ -48,16 +52,18 @@ def cli(
A token with curator rights has to be provided.
"""
try:
return clean_incoming(
sys.exit(
clean_incoming(
obj,
service_url,
collection,
inbox_label,
list_only,
)
)
except HTTPError as e:
click.echo(f'ERROR: {e}: {e.response.text}', err=True)
return 1
console.print(f'[red]Error[/red]: {e}: {e.response.text}')
sys.exit(1)
def clean_incoming(
@ -69,7 +75,7 @@ def clean_incoming(
):
token = obj
if token is None:
click.echo('ERROR: token not provided', err=True)
console.print('[red]Error[/red]: no token provided')
return 1
session = get_session()

View file

@ -75,6 +75,15 @@ console = Console(file=sys.stderr)
'lead to warnings about records that cannot be deleted. The command '
'will print a list of all PIDs that could not be deleted.',
)
@click.option(
'--json-error-messages',
default=False,
is_flag=True,
help='if this flag is given, output information about failed delete '
'operations to stdout. The format is JSONL (JSON lines), each JSON '
'record contains the detailed error message, the PID of the record '
'that could not be deleted.'
)
def cli(
obj,
service_url,
@ -84,6 +93,7 @@ def cli(
incoming,
ignore_errors,
class_name,
json_error_messages,
):
"""Delete records from a collection on a dump-things-service
@ -112,7 +122,7 @@ def cli(
`get-records` are deleted (this second pass will require a CURATOR token).
"""
try:
return delete_records(
sys.exit(delete_records(
obj,
service_url,
collection,
@ -121,10 +131,11 @@ def cli(
incoming,
ignore_errors,
class_name,
)
json_error_messages,
))
except HTTPError as e:
console.print(f'[red]Error[/red]: {e}: {e.response.text}')
return 1
sys.exit(1)
def delete_records(
@ -136,6 +147,7 @@ def delete_records(
incoming,
ignore_errors,
class_name,
json_error_messages,
):
if incoming and curated:
console.print('[red]Error[/red]: -i/--incoming and -c/--curated are mutually exclusive')
@ -189,7 +201,8 @@ def delete_records(
if not pids:
pids = sys.stdin
result = []
return_code = 0
errors = []
explanation = False
for pid in track(pids, console=console):
try:
@ -207,17 +220,23 @@ def delete_records(
f'not in the incoming area associated with the provided token. To delete such records, '
f'use the option -c/--curated.[/yellow]'
)
result.append(pid)
console.print(f'[yellow]Warning[/yellow]: could not delete record with pid [green]{pid}[/green]: {e}: {e.response.text}')
continue
console.print(f'[red]Error[/red]: could not delete record with pid [green]{pid}[/green]: {e}, {e.response.text}')
_append_error(errors, e, pid, collection)
return_code = 1
if ignore_errors:
continue
return 1
break
if result:
click.echo('\n'.join(result))
return 0
console.print(f'[red]Error[/red]: could not delete record with pid [green]{pid}[/green]: {e}, {e.response.text}')
_append_error(errors, e, pid, collection)
return_code = 1
if ignore_errors:
continue
break
if errors and json_error_messages:
click.echo('\n'.join(errors))
return return_code
def _get_pids_for_class(
@ -241,3 +260,21 @@ def _get_pids_for_class(
**kwargs,
)
)
def _append_error(
container: list,
e: HTTPError,
pid: str,
collection: str,
):
container.append(
json.dumps(
{
'status': 'error',
'pid': pid,
'collection': collection,
'details': e.response.text,
}
)
)

View file

@ -71,6 +71,16 @@ console = Console(file=sys.stderr)
'schema_type-attribute is removed because the class is encoded in the '
'storage path of the records.'
)
@click.option(
'--json-error-messages',
default=False,
is_flag=True,
help='if this flag is given, output information about failed read or write '
'operations to stdout. The format is JSONL (JSON lines), each JSON '
'record contains the operation type (read, write), a detailed error '
'message, and additional context dependent information, e.g., the PID '
'of the record that could not be written to the file system.'
)
def cli(
obj: Any,
service_url: str,
@ -79,6 +89,7 @@ def cli(
output_format: str,
ignore_errors: bool,
keep_schema_type: bool,
json_error_messages: bool,
):
"""Export a collection to disk
@ -92,7 +103,8 @@ def cli(
A token with curator rights has to be provided.
"""
try:
return export(
sys.exit(
export(
obj,
service_url,
collection,
@ -100,12 +112,14 @@ def cli(
output_format,
ignore_errors,
keep_schema_type,
json_error_messages,
)
)
except HTTPError as e:
console.print(f'[red]Error[/red]: {e}: {e.response.text}')
except Exception as e:
except ValueError as e:
console.print(f'[red]Error[/red]: {e}')
return 1
sys.exit(1)
def export(
@ -116,7 +130,8 @@ def export(
output_format: str,
ignore_errors: bool,
keep_schema_type: bool,
):
json_error_messages: bool,
) -> int:
token = obj
if token is None:
@ -125,10 +140,10 @@ def export(
session = get_session()
server_info = server(service_url, session=session)
collection_info = ([c for c in server_info['collections'] if c['name'] == collection] or None)[0]
collection_info = ([c for c in server_info['collections'] if c['name'] == collection] or [None])[0]
if not collection_info:
console.print(f'[red]Error[/red]: no collection {collection} on service')
console.print(f'[red]Error[/red]: no such collection: {collection}')
return 1
description = {
@ -148,7 +163,7 @@ def export(
curated_destination.mkdir()
console.print('Exporting records from curated area')
_store_records(
failed = _store_records(
curated_read_records(
service_url=service_url,
collection=collection,
@ -172,6 +187,7 @@ def export(
console.print(f'Exporting records from incoming area: {label}')
incoming_destination = destination / 'incoming' / label
incoming_destination.mkdir(parents=True, exist_ok=False)
failed.extend(
_store_records(
incoming_read_records(
service_url=service_url,
@ -186,6 +202,12 @@ def export(
keep_schema_type,
source_name=f'incoming area: {label}'
)
)
if failed:
if json_error_messages:
click.echo('\n'.join(failed))
return 1
return 0
@ -197,25 +219,29 @@ def _store_records(
ignore_errors: bool,
keep_schema_type: bool,
source_name: str,
):
created_dirs = set()
) -> list:
# Get the first result from the source to determine the total number
# of records.
if output_format not in writer:
msg = f'unsupported output format: {output_format}'
raise ValueError(msg)
created_dirs = set()
failed = []
# Get the first result from the source to determine the total number of records.
try:
first_tuple = next(source)
except StopIteration:
return
return failed
total = first_tuple[4]
for record, _, _, _, _ in track(chain([first_tuple], source), total=total, console=console):
schema_type = record.get('schema_type', None)
if schema_type is None:
_handle_schema_type_error(failed, record, source_name)
if ignore_errors:
console.print(f'[red]Error[/red]: no `schema type` in record [red]{record["pid"]}[/red] in {source_name}')
continue
msg = f'no `schema_type` in record {record["pid"]}'
raise ValueError(msg)
break
class_name = _de_prefix(schema_type)
if not keep_schema_type:
@ -230,15 +256,13 @@ def _store_records(
file_dir.mkdir(parents=True, exist_ok=False)
created_dirs.add(file_dir)
try:
writer[output_format](
file_dir=file_dir,
file_name=file_name,
record=record,
)
except KeyError as e:
msg = f'unsupported output format: {output_format}'
raise ValueError(msg)
return failed
def _de_prefix(
@ -261,6 +285,24 @@ def _hash_p3(
return hex_digest[:3], hex_digest[3:]
def _handle_schema_type_error(
container: list,
record: dict,
source_name: str,
):
console.print(f'[red]Error[/red]: no `schema type` in record [red]{record["pid"]}[/red] in {source_name}')
container.append(
json.dumps(
{
'status': 'error',
'pid': record['pid'],
'source': source_name,
'message': f'no `schema type` in record {record["pid"]}',
}
)
)
def write_json(
file_dir: Path,
file_name: str,

View file

@ -132,7 +132,8 @@ def cli(
to be provided.
"""
try:
return get_records(
sys.exit(
get_records(
obj,
service_url,
collection,
@ -148,9 +149,10 @@ def cli(
stats,
pagination,
)
)
except HTTPError as e:
console.print(f'[red]Error[/red]: {e}: {e.response.text}')
return 1
sys.exit(1)
def get_records(
@ -168,7 +170,7 @@ def get_records(
last_page,
stats,
pagination,
):
) -> int:
token = obj
if token is None:
@ -219,7 +221,7 @@ def get_records(
else:
kwargs['format'] = format_
result = collection_read_record_with_pid(**kwargs)
print(json.dumps(result, ensure_ascii=False))
click.echo(json.dumps(result, ensure_ascii=False))
return 0
elif cls:

View file

@ -62,12 +62,23 @@ console = Console(file=sys.stderr)
is_flag=True,
help='log errors an continue import instead of raising an exception',
)
@click.option(
'--json-error-messages',
default=False,
is_flag=True,
help='if this flag is given, output information about failed read or write '
'operations to stdout. The format is JSONL (JSON lines), each JSON '
'record contains the operation type (read, write), a detailed error '
'message, and additional context dependent information, e.g., the PID '
'of the record that could not be posted to the collection.'
)
def cli(
obj: Any,
source: Path,
service_url: str,
collection: str,
ignore_errors,
json_error_messages,
):
"""Import a collection or part of a collection from disk
@ -85,18 +96,21 @@ def cli(
A token with curator rights has to be provided.
"""
try:
return import_collection(
sys.exit(
import_collection(
obj,
service_url,
collection,
source,
ignore_errors,
json_error_messages,
)
)
except HTTPError as e:
click.echo(f'ERROR: {e}: {e.response.text}', err=True)
console.print(f'[red]Error[/red]: {e}: {e.response.text}')
except ValueError as e:
click.echo(f'ERROR: {e}', err=True)
return 1
console.print(f'[red]Error[/red]: {e}')
sys.exit(1)
def import_collection(
@ -105,9 +119,11 @@ def import_collection(
collection: str,
source: Path,
ignore_errors: bool,
):
json_error_messages: bool,
) -> int:
if token is None:
click.echo(f'ERROR: no token provided', err=True)
console.print(f'[red]Error[/red]: no token provided')
return 1
with (source / 'description.json').open() as description_file:
@ -117,7 +133,7 @@ def import_collection(
# Import the curated records
curated_source = source / 'curated'
result = _load_records(
failed = _load_records(
service_url=service_url or description['service_url'],
collection=collection or description['name'],
token=token,
@ -127,33 +143,38 @@ def import_collection(
ignore_errors=ignore_errors,
session=session,
)
if result != 0 and not ignore_errors:
return result
if failed and not ignore_errors:
if json_error_messages:
click.echo('\n'.join(failed))
sys.exit(1)
# Import incoming areas
errors = 0
for incoming_source in (source / 'incoming').glob('*'):
label = incoming_source.name
console.print(f'processing incoming area {label}')
result = _load_records(
failed.extend(
_load_records(
service_url,
collection,
token,
incoming_source,
partial(incoming_write_record, label=label),
location_info=f'incoming area {label}',
session=session,
ignore_errors=ignore_errors,
session=session,
)
)
if result != 0:
if ignore_errors:
errors += 1
else:
return result
if failed and not ignore_errors:
break
return int(errors > 0)
if failed:
if json_error_messages:
click.echo('\n'.join(failed))
return 1
return 0
def _load_records(
@ -163,13 +184,15 @@ def _load_records(
source: Path,
writer: Callable,
location_info: str,
ignore_errors: bool,
session: Session,
ignore_errors: bool = False,
) -> int:
errors = 0
) -> list:
failed = []
for path in source.glob('*'):
class_name = path.parts[-1]
result = _load_class_records(
failed.extend(
_load_class_records(
service_url=service_url,
collection=collection,
token=token,
@ -177,16 +200,14 @@ def _load_records(
writer=writer,
class_name=class_name,
location_info=location_info,
session=session,
ignore_errors=ignore_errors,
session=session,
)
if result != 0:
if ignore_errors:
errors += 1
else:
return result
)
if failed and not ignore_errors:
break
return int(errors > 0)
return failed
def _load_class_records(
@ -197,15 +218,17 @@ def _load_class_records(
writer: Callable,
class_name: str,
location_info: str,
ignore_errors: bool,
session: Session,
ignore_errors: bool = False,
) -> int:
) -> list:
total = reduce(
lambda s, path: s + (0 if path.is_dir() else 1),
source.glob('**/*'),
0,
)
failed = []
for path in track(
source.glob('**/*'),
description = f'processing {location_info}, class {class_name} ... ',
@ -220,9 +243,9 @@ def _load_class_records(
with path.open('rt') as file:
record = _file_reader[file_format.lower()](file)
except Exception as e:
console.print(f'[red]Error[/red]: could not read {file_format}-file: {path}`: {e}')
_process_read_error(failed, file_format, path, str(e))
if not ignore_errors:
return 1
return failed
continue
try:
@ -235,14 +258,15 @@ def _load_class_records(
session=session,
)
except HTTPError as e:
console.print(f'[red]Error[/red]: could not write record with pid [green]{record["pid"]}[/green] of class `{class_name}` (file: [yellow]{path}[/yellow]) to: "{location_info}": {e} {e.response.text}')
_process_write_error(failed, record, class_name, collection, location_info, path, e.response.text)
if not ignore_errors:
return 1
break
except Exception as e:
console.print(f'[red]Error[/red]: could not write record with pid [green]{record["pid"]}[/green] of class `{class_name}` (file: [yellow]{path}[/yellow]) to: "{location_info}": {e}')
_process_write_error(failed, record, class_name, collection, location_info, path, str(e))
if not ignore_errors:
return 1
return 0
break
return failed
def _json_reader(
@ -257,6 +281,53 @@ def _yaml_reader(
return yaml.load(file, Loader=yaml.SafeLoader)
def _process_read_error(
container: list,
file_format: str,
path: Path,
message: str,
):
console.print(f'[red]Error[/red]: could not read {file_format}-file: {path}`: {message}')
container.append(
json.dumps(
{
'status': 'error',
'operation': 'read',
'format': file_format,
'path': str(path),
'details': message,
}
)
)
def _process_write_error(
container: list,
record: dict,
class_name: str,
collection: str,
location_info: str,
path: Path,
message: str,
):
pid = record['pid']
console.print(f'[red]Error[/red]: could not write record with pid [green]{pid}[/green] of class `{class_name}` (file: [yellow]{path}[/yellow]) to: "{location_info} of collection {collection}": {message}')
container.append(
json.dumps(
{
'status': 'error',
'operation': 'write',
'pid': pid,
'collection': collection,
'path': str(path),
'details': message,
}
)
)
_file_reader = {
'json': _json_reader,
'yaml': _yaml_reader,

View file

@ -1,6 +1,9 @@
import json
import sys
import rich_click as click
from rich.console import Console
from ...communicate import (
HTTPError,
@ -11,6 +14,8 @@ from ...communicate import (
subcommand_name = 'list-incoming'
console = Console(file=sys.stderr)
@click.command(short_help='List inboxes of a dump-things collection')
@click.pass_obj
@ -42,15 +47,17 @@ def cli(
A token with curator rights has to be provided.
"""
try:
return list_incoming(
sys.exit(
list_incoming(
obj,
service_url,
collection,
show_records,
)
)
except HTTPError as e:
click.echo(f'ERROR: {e}: {e.response.text}', err=True)
return 1
console.print(f'[red]Error[/red]: {e}: {e.response.text}')
sys.exit(1)
def list_incoming(
@ -58,7 +65,7 @@ def list_incoming(
service_url,
collection,
show_records,
):
) -> int:
token = obj
if token is None:
click.echo('ERROR: token not provided', err=True)

View file

@ -47,15 +47,17 @@ def cli(
This command expects a server version >= 5.4.0"""
try:
return maintenance(
sys.exit(
maintenance(
obj,
service_url,
collection,
active,
)
)
except HTTPError as e:
click.echo(f'ERROR: {e}: {e.response.text}', err=True)
return 1
click.echo(f'[red]Error[/red]: {e}: {e.response.text}')
sys.exit(1)
def maintenance(
@ -63,7 +65,7 @@ def maintenance(
service_url: str,
collection: str,
active: bool,
):
) -> int:
token = obj
if token is None:
console.print('[red]Error[/red]: no token provided')

View file

@ -65,17 +65,15 @@ def cli(
A token is required and will be used to authenticate the requests.
If the `--curated`-option is provided, the token must have
curator-rights."""
try:
return post_records(
sys.exit(
post_records(
obj,
service_url,
collection,
cls,
curated,
)
except HTTPError as e:
click.echo(f'ERROR: {e}: {e.response.text}', err=True)
return 1
)
def post_records(
@ -84,7 +82,7 @@ def post_records(
collection,
cls,
curated,
):
) -> int:
token = obj
if token is None:
console.print('[red]Error[/red]: No token provided')

View file

@ -81,7 +81,8 @@ def cli(
"""
try:
return read_pages(
sys.exit(
read_pages(
obj,
url,
page_size,
@ -92,9 +93,10 @@ def cli(
matching,
pagination,
)
)
except HTTPError as e:
click.echo(f'ERROR: {e}: {e.response.text}', err=True)
return 1
console.print(f'[red]Error[/red]: {e}: {e.response.text}')
sys.exit(1)
def read_pages(
@ -107,7 +109,7 @@ def read_pages(
format_,
matching,
pagination,
):
) -> int:
token = obj
if token is None:

View file

@ -1,7 +1,6 @@
import importlib.metadata
import rich_click as click
from rich import print as rprint
subcommand_name = 'version'
@ -11,5 +10,5 @@ subcommand_name = 'version'
def cli():
"""Show the version of `dtc` and exit
"""
rprint(importlib.metadata.version('dump-things-pyclient'))
click.echo(importlib.metadata.version('dump-things-pyclient'))
return 0