Use JSONL in gitaudit log-entries #201

Merged
cmo merged 3 commits from jsonl_gitaudit into master 2026-03-20 11:49:17 +00:00

View file

@ -7,8 +7,8 @@ committed.
Changes are annotated with a time stamp and a user-id Changes are annotated with a time stamp and a user-id
""" """
import hashlib import hashlib
import json
import re import re
import string
from datetime import datetime from datetime import datetime
from pathlib import Path from pathlib import Path
@ -23,6 +23,9 @@ from datalad_core.runners import (
from . import AuditBackend from . import AuditBackend
index_file_name = 'gitaudit_index.log'
class GitAuditBackend(AuditBackend): class GitAuditBackend(AuditBackend):
def __init__( def __init__(
@ -42,8 +45,6 @@ class GitAuditBackend(AuditBackend):
author_id: str | None = None, author_id: str | None = None,
) -> None: ) -> None:
author_id = committer_id if author_id is None else author_id author_id = committer_id if author_id is None else author_id
committer_id = self._escape_person_id(committer_id)
author_id = self._escape_person_id(author_id)
record_id = record['pid'] record_id = record['pid']
location = self._get_location_for(record_id) location = self._get_location_for(record_id)
if self._has_pending_changes(location): if self._has_pending_changes(location):
@ -80,13 +81,13 @@ class GitAuditBackend(AuditBackend):
capture_output=True, capture_output=True,
).decode().splitlines() ).decode().splitlines()
# Get the log entry # Get the log entry
log_entry = tuple( log_line = tuple(
filter( filter(
lambda l: not l.startswith('+++') and l.startswith('+'), lambda l: not l.startswith('+++') and l.startswith('+'),
log_diff_lines, log_diff_lines,
) )
)[0][1:] )[0][1:]
time_stamp, committer_id, author_id = log_entry.split(' ') log_entry = json.loads(log_line)
# Get the YAML diff # Get the YAML diff
yaml_diff_lines = call_git( yaml_diff_lines = call_git(
@ -103,7 +104,13 @@ class GitAuditBackend(AuditBackend):
capture_output=True, capture_output=True,
).decode() ).decode()
changes.append( changes.append(
(time_stamp, committer_id, author_id, yaml_diff, yaml_content) (
log_entry['time_stamp'],
log_entry['committer_id'],
log_entry['author_id'],
yaml_diff,
yaml_content,
)
) )
changes.sort() changes.sort()
@ -154,8 +161,13 @@ class GitAuditBackend(AuditBackend):
author_id: str, author_id: str,
) -> None: ) -> None:
time_stamp = datetime.now().isoformat() time_stamp = datetime.now().isoformat()
entry = {
'time_stamp': time_stamp,
'committer_id': committer_id,
'author_id': author_id,
}
log_content = self._read_from_repo_path(log_location).decode() log_content = self._read_from_repo_path(log_location).decode()
log_content += f'{time_stamp} {committer_id} {author_id}\n' log_content += json.dumps(entry, ensure_ascii=False) + '\n'
self.current_change_set[log_location] = log_content self.current_change_set[log_location] = log_content
def _add_index_entry( def _add_index_entry(
@ -229,7 +241,7 @@ class GitAuditBackend(AuditBackend):
else: else:
self.path.mkdir(parents=True) self.path.mkdir(parents=True)
is_empty = True is_empty = True
self.index_path = self.path / 'index.log' self.index_path = self.path / index_file_name
if is_empty: if is_empty:
call_git(['init', '--bare', str(self.path)], capture_output=True) call_git(['init', '--bare', str(self.path)], capture_output=True)
@ -258,7 +270,6 @@ class GitAuditBackend(AuditBackend):
self.index.add(record_id) self.index.add(record_id)
def _rebuild_index(self): def _rebuild_index(self):
print('rebuilding index')
tree_entries = call_git( tree_entries = call_git(
['ls-tree', '-r', 'master:'], ['ls-tree', '-r', 'master:'],
cwd=self.path, cwd=self.path,
@ -267,10 +278,8 @@ class GitAuditBackend(AuditBackend):
with open(self.index_path, 'wt') as f: with open(self.index_path, 'wt') as f:
for line in tree_entries: for line in tree_entries:
if not line.endswith('.yaml'): if not line.endswith('.yaml'):
print('ignoring line:', repr(line))
continue continue
print('got line:', repr(line)) flag, object_type, object_hash, file_name = line.split(maxsplit=3)
flag, object_type, object_hash, file_name = line.split()
record = yaml.safe_load( record = yaml.safe_load(
call_git( call_git(
['show', object_hash], ['show', object_hash],
@ -278,22 +287,4 @@ class GitAuditBackend(AuditBackend):
capture_output=True, capture_output=True,
).decode() ).decode()
) )
print('got record:', repr(record))
f.write(record['pid'] + '\n') f.write(record['pid'] + '\n')
def _escape_person_id(
self,
person_id: str,
):
if not person_id:
msg = f'empty ID string not allowed: {person_id}'
raise ValueError(msg)
if any(
map(
lambda character: character in person_id,
string.whitespace
)
):
msg = f'ID string must not contain whitespace: {person_id}'
raise ValueError(msg)
return person_id