add delete-class-instances option to delete-records #29

Merged
cmo merged 1 commit from issue-18 into master 2026-02-06 12:33:09 +00:00

View file

@ -2,6 +2,7 @@ import json
import logging
import sys
from functools import partial
from typing import Iterable
import rich_click as click
from rich.progress import track
@ -11,9 +12,12 @@ from ...communicate import (
HTTPError,
get_session,
collection_delete_record,
collection_read_records_of_class,
curated_delete_record,
curated_read_records_of_class,
incoming_delete_record,
incoming_read_labels,
incoming_read_records_of_class,
)
@ -56,6 +60,21 @@ console = Console(file=sys.stderr)
is_flag=True,
help='ignore errors when deleting a pid and continue with remaining pids',
)
@click.option(
'--class', '-C', 'class_name',
metavar='CLASS',
default=None,
help='delete ALL records of class CLASS from the collection\'s incoming '
'area that is associated with the token. Can be '
'combined with `-i/--incoming LABEL` or `-c/--curated` to delete all '
'records of class CLASS from the incoming area `LABEL` or from the '
'curated area. Note: if neither `-c/--curated` nor `-i/--incoming` is '
'specified, the command cannot reliably determine which records are '
'stored in the incoming area associated with a token and which '
'records are stored in the curated area of the collection. This can '
'lead to warnings about records that cannot be deleted. The command '
'will print a list of all PIDs that could not be deleted.',
)
def cli(
obj,
service_url,
@ -64,17 +83,20 @@ def cli(
curated,
incoming,
ignore_errors,
class_name,
):
"""Delete records from a collection on a dump-things-service
This command delete the records given by PIDS from the collection COLLECTION
of the dump-things service SERVICE_URL. If no PIDS are provided on the
command line, the pids that should be deleted are read from stdin (one pid
of the dump-things service SERVICE_URL. If no pids are provided on the
command line, the pid that should be deleted are read from stdin (one pid
per line, lines are stripped).
By default, the records will be deleted from the inbox associated with the
token. If the option -c/--curated is given, the records are deleted from
token. If the option `-c/--curated` is given, the records are deleted from
the curated area of the collection (this requires a token with
curator rights). If the option `-i/--incoming LABEL` is given, the records
are deleted from the inbox specified by `LABEL` (this requires a token with
curator rights).
"""
try:
@ -86,6 +108,7 @@ def cli(
curated,
incoming,
ignore_errors,
class_name,
)
except HTTPError as e:
console.print(f'[red]Error[/red]: {e}: {e.response.text}')
@ -93,23 +116,30 @@ def cli(
def delete_records(
obj,
token,
service_url,
collection,
pids,
curated,
incoming,
ignore_errors,
class_name,
):
token = obj
if token is None:
click.echo(f'WARNING: no token provided', err=True)
if incoming and curated:
console.print('[red]Error[/red]: -i/--incoming and -c/--curated are mutually exclusive')
return 1
if pids and class_name:
console.print('[red]Error[/red]: -C/--class cannot be combined with PID-specification')
return 1
if token is None:
console.print(f'[yellow]Warning[/yellow]: no token provided')
return 1
if class_name and pids:
console.print(f'[yellow]Warning[/yellow]: PIDs will be ignored because -C/--class was provided')
session = get_session()
kwargs = dict(
service_url=service_url,
@ -135,21 +165,67 @@ def delete_records(
else:
operation = collection_delete_record
if not pids:
pids = sys.stdin
# If a class name is specified, read the PIDs from the server
if class_name:
pids = _get_pids_for_class(
class_name=class_name,
incoming=incoming,
curated=curated,
kwargs=kwargs,
)
else:
if not pids:
pids = sys.stdin
result = []
explanation = False
for pid in track(pids, console=console):
try:
operation(
service_url=service_url,
collection=collection,
pid=pid.strip(),
token=token,
session=session,
**kwargs,
)
except HTTPError as e:
console.print(f'[red]Error[/red]: while deleting pid {pid}: {e}, {e.response.text}')
if class_name and not (incoming or curated):
if not explanation:
explanation = True
console.print(
f'[yellow]The record with pid {pid} could not be deleted, this is not an error, iff: '
f'the record is only contained in the curated area of collection {collection}, and '
f'not in the incoming area associated with the provided token. To delete such records, '
f'use the option -c/--curated.[/yellow]'
)
result.append(pid)
console.print(f'[yellow]Warning[/yellow]: could not delete record with pid [green]{pid}[/green]: {e}: {e.response.text}')
continue
console.print(f'[red]Error[/red]: could not delete record with pid [green]{pid}[/green]: {e}, {e.response.text}')
if ignore_errors:
continue
return 1
if result:
click.echo('\n'.join(result))
return 0
def _get_pids_for_class(
class_name: str,
incoming: str | None,
curated: bool,
kwargs,
) -> Iterable[str]:
if incoming:
operation = partial(incoming_read_records_of_class, label=incoming)
elif curated:
operation = curated_read_records_of_class
else:
operation = collection_read_records_of_class
return map(
lambda result: result[0]['pid'],
operation(
class_name=class_name,
**kwargs,
)
)