add delete-class-instances option to delete-records #29

Merged
cmo merged 1 commit from issue-18 into master 2026-02-06 12:33:09 +00:00

View file

@ -2,6 +2,7 @@ import json
import logging import logging
import sys import sys
from functools import partial from functools import partial
from typing import Iterable
import rich_click as click import rich_click as click
from rich.progress import track from rich.progress import track
@ -11,9 +12,12 @@ from ...communicate import (
HTTPError, HTTPError,
get_session, get_session,
collection_delete_record, collection_delete_record,
collection_read_records_of_class,
curated_delete_record, curated_delete_record,
curated_read_records_of_class,
incoming_delete_record, incoming_delete_record,
incoming_read_labels, incoming_read_labels,
incoming_read_records_of_class,
) )
@ -56,6 +60,21 @@ console = Console(file=sys.stderr)
is_flag=True, is_flag=True,
help='ignore errors when deleting a pid and continue with remaining pids', help='ignore errors when deleting a pid and continue with remaining pids',
) )
@click.option(
'--class', '-C', 'class_name',
metavar='CLASS',
default=None,
help='delete ALL records of class CLASS from the collection\'s incoming '
'area that is associated with the token. Can be '
'combined with `-i/--incoming LABEL` or `-c/--curated` to delete all '
'records of class CLASS from the incoming area `LABEL` or from the '
'curated area. Note: if neither `-c/--curated` nor `-i/--incoming` is '
'specified, the command cannot reliably determine which records are '
'stored in the incoming area associated with a token and which '
'records are stored in the curated area of the collection. This can '
'lead to warnings about records that cannot be deleted. The command '
'will print a list of all PIDs that could not be deleted.',
)
def cli( def cli(
obj, obj,
service_url, service_url,
@ -64,17 +83,20 @@ def cli(
curated, curated,
incoming, incoming,
ignore_errors, ignore_errors,
class_name,
): ):
"""Delete records from a collection on a dump-things-service """Delete records from a collection on a dump-things-service
This command delete the records given by PIDS from the collection COLLECTION This command delete the records given by PIDS from the collection COLLECTION
of the dump-things service SERVICE_URL. If no PIDS are provided on the of the dump-things service SERVICE_URL. If no pids are provided on the
command line, the pids that should be deleted are read from stdin (one pid command line, the pid that should be deleted are read from stdin (one pid
per line, lines are stripped). per line, lines are stripped).
By default, the records will be deleted from the inbox associated with the By default, the records will be deleted from the inbox associated with the
token. If the option -c/--curated is given, the records are deleted from token. If the option `-c/--curated` is given, the records are deleted from
the curated area of the collection (this requires a token with the curated area of the collection (this requires a token with
curator rights). If the option `-i/--incoming LABEL` is given, the records
are deleted from the inbox specified by `LABEL` (this requires a token with
curator rights). curator rights).
""" """
try: try:
@ -86,6 +108,7 @@ def cli(
curated, curated,
incoming, incoming,
ignore_errors, ignore_errors,
class_name,
) )
except HTTPError as e: except HTTPError as e:
console.print(f'[red]Error[/red]: {e}: {e.response.text}') console.print(f'[red]Error[/red]: {e}: {e.response.text}')
@ -93,23 +116,30 @@ def cli(
def delete_records( def delete_records(
obj, token,
service_url, service_url,
collection, collection,
pids, pids,
curated, curated,
incoming, incoming,
ignore_errors, ignore_errors,
class_name,
): ):
token = obj
if token is None:
click.echo(f'WARNING: no token provided', err=True)
if incoming and curated: if incoming and curated:
console.print('[red]Error[/red]: -i/--incoming and -c/--curated are mutually exclusive') console.print('[red]Error[/red]: -i/--incoming and -c/--curated are mutually exclusive')
return 1 return 1
if pids and class_name:
console.print('[red]Error[/red]: -C/--class cannot be combined with PID-specification')
return 1
if token is None:
console.print(f'[yellow]Warning[/yellow]: no token provided')
return 1
if class_name and pids:
console.print(f'[yellow]Warning[/yellow]: PIDs will be ignored because -C/--class was provided')
session = get_session() session = get_session()
kwargs = dict( kwargs = dict(
service_url=service_url, service_url=service_url,
@ -135,21 +165,67 @@ def delete_records(
else: else:
operation = collection_delete_record operation = collection_delete_record
if not pids: # If a class name is specified, read the PIDs from the server
pids = sys.stdin if class_name:
pids = _get_pids_for_class(
class_name=class_name,
incoming=incoming,
curated=curated,
kwargs=kwargs,
)
else:
if not pids:
pids = sys.stdin
result = []
explanation = False
for pid in track(pids, console=console): for pid in track(pids, console=console):
try: try:
operation( operation(
service_url=service_url,
collection=collection,
pid=pid.strip(), pid=pid.strip(),
token=token, **kwargs,
session=session,
) )
except HTTPError as e: except HTTPError as e:
console.print(f'[red]Error[/red]: while deleting pid {pid}: {e}, {e.response.text}') if class_name and not (incoming or curated):
if not explanation:
explanation = True
console.print(
f'[yellow]The record with pid {pid} could not be deleted, this is not an error, iff: '
f'the record is only contained in the curated area of collection {collection}, and '
f'not in the incoming area associated with the provided token. To delete such records, '
f'use the option -c/--curated.[/yellow]'
)
result.append(pid)
console.print(f'[yellow]Warning[/yellow]: could not delete record with pid [green]{pid}[/green]: {e}: {e.response.text}')
continue
console.print(f'[red]Error[/red]: could not delete record with pid [green]{pid}[/green]: {e}, {e.response.text}')
if ignore_errors: if ignore_errors:
continue continue
return 1 return 1
if result:
click.echo('\n'.join(result))
return 0 return 0
def _get_pids_for_class(
class_name: str,
incoming: str | None,
curated: bool,
kwargs,
) -> Iterable[str]:
if incoming:
operation = partial(incoming_read_records_of_class, label=incoming)
elif curated:
operation = curated_read_records_of_class
else:
operation = collection_read_records_of_class
return map(
lambda result: result[0]['pid'],
operation(
class_name=class_name,
**kwargs,
)
)