Use JSONL in gitaudit log-entries #201

Merged
cmo merged 3 commits from jsonl_gitaudit into master 2026-03-20 11:49:17 +00:00

View file

@ -7,8 +7,8 @@ committed.
Changes are annotated with a time stamp and a user-id
"""
import hashlib
import json
import re
import string
from datetime import datetime
from pathlib import Path
@ -23,6 +23,9 @@ from datalad_core.runners import (
from . import AuditBackend
index_file_name = 'gitaudit_index.log'
class GitAuditBackend(AuditBackend):
def __init__(
@ -42,8 +45,6 @@ class GitAuditBackend(AuditBackend):
author_id: str | None = None,
) -> None:
author_id = committer_id if author_id is None else author_id
committer_id = self._escape_person_id(committer_id)
author_id = self._escape_person_id(author_id)
record_id = record['pid']
location = self._get_location_for(record_id)
if self._has_pending_changes(location):
@ -80,13 +81,13 @@ class GitAuditBackend(AuditBackend):
capture_output=True,
).decode().splitlines()
# Get the log entry
log_entry = tuple(
log_line = tuple(
filter(
lambda l: not l.startswith('+++') and l.startswith('+'),
log_diff_lines,
)
)[0][1:]
time_stamp, committer_id, author_id = log_entry.split(' ')
log_entry = json.loads(log_line)
# Get the YAML diff
yaml_diff_lines = call_git(
@ -103,7 +104,13 @@ class GitAuditBackend(AuditBackend):
capture_output=True,
).decode()
changes.append(
(time_stamp, committer_id, author_id, yaml_diff, yaml_content)
(
log_entry['time_stamp'],
log_entry['committer_id'],
log_entry['author_id'],
yaml_diff,
yaml_content,
)
)
changes.sort()
@ -154,8 +161,13 @@ class GitAuditBackend(AuditBackend):
author_id: str,
) -> None:
time_stamp = datetime.now().isoformat()
entry = {
'time_stamp': time_stamp,
'committer_id': committer_id,
'author_id': author_id,
}
log_content = self._read_from_repo_path(log_location).decode()
log_content += f'{time_stamp} {committer_id} {author_id}\n'
log_content += json.dumps(entry, ensure_ascii=False) + '\n'
self.current_change_set[log_location] = log_content
def _add_index_entry(
@ -229,7 +241,7 @@ class GitAuditBackend(AuditBackend):
else:
self.path.mkdir(parents=True)
is_empty = True
self.index_path = self.path / 'index.log'
self.index_path = self.path / index_file_name
if is_empty:
call_git(['init', '--bare', str(self.path)], capture_output=True)
@ -258,7 +270,6 @@ class GitAuditBackend(AuditBackend):
self.index.add(record_id)
def _rebuild_index(self):
print('rebuilding index')
tree_entries = call_git(
['ls-tree', '-r', 'master:'],
cwd=self.path,
@ -267,10 +278,8 @@ class GitAuditBackend(AuditBackend):
with open(self.index_path, 'wt') as f:
for line in tree_entries:
if not line.endswith('.yaml'):
print('ignoring line:', repr(line))
continue
print('got line:', repr(line))
flag, object_type, object_hash, file_name = line.split()
flag, object_type, object_hash, file_name = line.split(maxsplit=3)
record = yaml.safe_load(
call_git(
['show', object_hash],
@ -278,22 +287,4 @@ class GitAuditBackend(AuditBackend):
capture_output=True,
).decode()
)
print('got record:', repr(record))
f.write(record['pid'] + '\n')
def _escape_person_id(
self,
person_id: str,
):
if not person_id:
msg = f'empty ID string not allowed: {person_id}'
raise ValueError(msg)
if any(
map(
lambda character: character in person_id,
string.whitespace
)
):
msg = f'ID string must not contain whitespace: {person_id}'
raise ValueError(msg)
return person_id