diff --git a/dump_things_service/audit/gitaudit.py b/dump_things_service/audit/gitaudit.py index 9818874..94e2ea5 100644 --- a/dump_things_service/audit/gitaudit.py +++ b/dump_things_service/audit/gitaudit.py @@ -7,8 +7,8 @@ committed. Changes are annotated with a time stamp and a user-id """ import hashlib +import json import re -import string from datetime import datetime from pathlib import Path @@ -23,6 +23,9 @@ from datalad_core.runners import ( from . import AuditBackend +index_file_name = 'gitaudit_index.log' + + class GitAuditBackend(AuditBackend): def __init__( @@ -42,8 +45,6 @@ class GitAuditBackend(AuditBackend): author_id: str | None = None, ) -> None: author_id = committer_id if author_id is None else author_id - committer_id = self._escape_person_id(committer_id) - author_id = self._escape_person_id(author_id) record_id = record['pid'] location = self._get_location_for(record_id) if self._has_pending_changes(location): @@ -80,13 +81,13 @@ class GitAuditBackend(AuditBackend): capture_output=True, ).decode().splitlines() # Get the log entry - log_entry = tuple( + log_line = tuple( filter( lambda l: not l.startswith('+++') and l.startswith('+'), log_diff_lines, ) )[0][1:] - time_stamp, committer_id, author_id = log_entry.split(' ') + log_entry = json.loads(log_line) # Get the YAML diff yaml_diff_lines = call_git( @@ -103,7 +104,13 @@ class GitAuditBackend(AuditBackend): capture_output=True, ).decode() changes.append( - (time_stamp, committer_id, author_id, yaml_diff, yaml_content) + ( + log_entry['time_stamp'], + log_entry['committer_id'], + log_entry['author_id'], + yaml_diff, + yaml_content, + ) ) changes.sort() @@ -154,8 +161,13 @@ class GitAuditBackend(AuditBackend): author_id: str, ) -> None: time_stamp = datetime.now().isoformat() + entry = { + 'time_stamp': time_stamp, + 'committer_id': committer_id, + 'author_id': author_id, + } log_content = self._read_from_repo_path(log_location).decode() - log_content += f'{time_stamp} {committer_id} {author_id}\n' + log_content += json.dumps(entry, ensure_ascii=False) + '\n' self.current_change_set[log_location] = log_content def _add_index_entry( @@ -229,7 +241,7 @@ class GitAuditBackend(AuditBackend): else: self.path.mkdir(parents=True) is_empty = True - self.index_path = self.path / 'index.log' + self.index_path = self.path / index_file_name if is_empty: call_git(['init', '--bare', str(self.path)], capture_output=True) @@ -258,7 +270,6 @@ class GitAuditBackend(AuditBackend): self.index.add(record_id) def _rebuild_index(self): - print('rebuilding index') tree_entries = call_git( ['ls-tree', '-r', 'master:'], cwd=self.path, @@ -267,10 +278,8 @@ class GitAuditBackend(AuditBackend): with open(self.index_path, 'wt') as f: for line in tree_entries: if not line.endswith('.yaml'): - print('ignoring line:', repr(line)) continue - print('got line:', repr(line)) - flag, object_type, object_hash, file_name = line.split() + flag, object_type, object_hash, file_name = line.split(maxsplit=3) record = yaml.safe_load( call_git( ['show', object_hash], @@ -278,22 +287,4 @@ class GitAuditBackend(AuditBackend): capture_output=True, ).decode() ) - print('got record:', repr(record)) f.write(record['pid'] + '\n') - - def _escape_person_id( - self, - person_id: str, - ): - if not person_id: - msg = f'empty ID string not allowed: {person_id}' - raise ValueError(msg) - if any( - map( - lambda character: character in person_id, - string.whitespace - ) - ): - msg = f'ID string must not contain whitespace: {person_id}' - raise ValueError(msg) - return person_id