Use JSONL in gitaudit log-entries #201
1 changed files with 21 additions and 30 deletions
|
|
@ -7,8 +7,8 @@ committed.
|
||||||
Changes are annotated with a time stamp and a user-id
|
Changes are annotated with a time stamp and a user-id
|
||||||
"""
|
"""
|
||||||
import hashlib
|
import hashlib
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
import string
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
@ -23,6 +23,9 @@ from datalad_core.runners import (
|
||||||
from . import AuditBackend
|
from . import AuditBackend
|
||||||
|
|
||||||
|
|
||||||
|
index_file_name = 'gitaudit_index.log'
|
||||||
|
|
||||||
|
|
||||||
class GitAuditBackend(AuditBackend):
|
class GitAuditBackend(AuditBackend):
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
|
@ -42,8 +45,6 @@ class GitAuditBackend(AuditBackend):
|
||||||
author_id: str | None = None,
|
author_id: str | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
author_id = committer_id if author_id is None else author_id
|
author_id = committer_id if author_id is None else author_id
|
||||||
committer_id = self._escape_person_id(committer_id)
|
|
||||||
author_id = self._escape_person_id(author_id)
|
|
||||||
record_id = record['pid']
|
record_id = record['pid']
|
||||||
location = self._get_location_for(record_id)
|
location = self._get_location_for(record_id)
|
||||||
if self._has_pending_changes(location):
|
if self._has_pending_changes(location):
|
||||||
|
|
@ -80,13 +81,13 @@ class GitAuditBackend(AuditBackend):
|
||||||
capture_output=True,
|
capture_output=True,
|
||||||
).decode().splitlines()
|
).decode().splitlines()
|
||||||
# Get the log entry
|
# Get the log entry
|
||||||
log_entry = tuple(
|
log_line = tuple(
|
||||||
filter(
|
filter(
|
||||||
lambda l: not l.startswith('+++') and l.startswith('+'),
|
lambda l: not l.startswith('+++') and l.startswith('+'),
|
||||||
log_diff_lines,
|
log_diff_lines,
|
||||||
)
|
)
|
||||||
)[0][1:]
|
)[0][1:]
|
||||||
time_stamp, committer_id, author_id = log_entry.split(' ')
|
log_entry = json.loads(log_line)
|
||||||
|
|
||||||
# Get the YAML diff
|
# Get the YAML diff
|
||||||
yaml_diff_lines = call_git(
|
yaml_diff_lines = call_git(
|
||||||
|
|
@ -103,7 +104,13 @@ class GitAuditBackend(AuditBackend):
|
||||||
capture_output=True,
|
capture_output=True,
|
||||||
).decode()
|
).decode()
|
||||||
changes.append(
|
changes.append(
|
||||||
(time_stamp, committer_id, author_id, yaml_diff, yaml_content)
|
(
|
||||||
|
log_entry['time_stamp'],
|
||||||
|
log_entry['committer_id'],
|
||||||
|
log_entry['author_id'],
|
||||||
|
yaml_diff,
|
||||||
|
yaml_content,
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
changes.sort()
|
changes.sort()
|
||||||
|
|
@ -154,8 +161,13 @@ class GitAuditBackend(AuditBackend):
|
||||||
author_id: str,
|
author_id: str,
|
||||||
) -> None:
|
) -> None:
|
||||||
time_stamp = datetime.now().isoformat()
|
time_stamp = datetime.now().isoformat()
|
||||||
|
entry = {
|
||||||
|
'time_stamp': time_stamp,
|
||||||
|
'committer_id': committer_id,
|
||||||
|
'author_id': author_id,
|
||||||
|
}
|
||||||
log_content = self._read_from_repo_path(log_location).decode()
|
log_content = self._read_from_repo_path(log_location).decode()
|
||||||
log_content += f'{time_stamp} {committer_id} {author_id}\n'
|
log_content += json.dumps(entry, ensure_ascii=False) + '\n'
|
||||||
self.current_change_set[log_location] = log_content
|
self.current_change_set[log_location] = log_content
|
||||||
|
|
||||||
def _add_index_entry(
|
def _add_index_entry(
|
||||||
|
|
@ -229,7 +241,7 @@ class GitAuditBackend(AuditBackend):
|
||||||
else:
|
else:
|
||||||
self.path.mkdir(parents=True)
|
self.path.mkdir(parents=True)
|
||||||
is_empty = True
|
is_empty = True
|
||||||
self.index_path = self.path / 'index.log'
|
self.index_path = self.path / index_file_name
|
||||||
|
|
||||||
if is_empty:
|
if is_empty:
|
||||||
call_git(['init', '--bare', str(self.path)], capture_output=True)
|
call_git(['init', '--bare', str(self.path)], capture_output=True)
|
||||||
|
|
@ -258,7 +270,6 @@ class GitAuditBackend(AuditBackend):
|
||||||
self.index.add(record_id)
|
self.index.add(record_id)
|
||||||
|
|
||||||
def _rebuild_index(self):
|
def _rebuild_index(self):
|
||||||
print('rebuilding index')
|
|
||||||
tree_entries = call_git(
|
tree_entries = call_git(
|
||||||
['ls-tree', '-r', 'master:'],
|
['ls-tree', '-r', 'master:'],
|
||||||
cwd=self.path,
|
cwd=self.path,
|
||||||
|
|
@ -267,10 +278,8 @@ class GitAuditBackend(AuditBackend):
|
||||||
with open(self.index_path, 'wt') as f:
|
with open(self.index_path, 'wt') as f:
|
||||||
for line in tree_entries:
|
for line in tree_entries:
|
||||||
if not line.endswith('.yaml'):
|
if not line.endswith('.yaml'):
|
||||||
print('ignoring line:', repr(line))
|
|
||||||
continue
|
continue
|
||||||
print('got line:', repr(line))
|
flag, object_type, object_hash, file_name = line.split(maxsplit=3)
|
||||||
flag, object_type, object_hash, file_name = line.split()
|
|
||||||
record = yaml.safe_load(
|
record = yaml.safe_load(
|
||||||
call_git(
|
call_git(
|
||||||
['show', object_hash],
|
['show', object_hash],
|
||||||
|
|
@ -278,22 +287,4 @@ class GitAuditBackend(AuditBackend):
|
||||||
capture_output=True,
|
capture_output=True,
|
||||||
).decode()
|
).decode()
|
||||||
)
|
)
|
||||||
print('got record:', repr(record))
|
|
||||||
f.write(record['pid'] + '\n')
|
f.write(record['pid'] + '\n')
|
||||||
|
|
||||||
def _escape_person_id(
|
|
||||||
self,
|
|
||||||
person_id: str,
|
|
||||||
):
|
|
||||||
if not person_id:
|
|
||||||
msg = f'empty ID string not allowed: {person_id}'
|
|
||||||
raise ValueError(msg)
|
|
||||||
if any(
|
|
||||||
map(
|
|
||||||
lambda character: character in person_id,
|
|
||||||
string.whitespace
|
|
||||||
)
|
|
||||||
):
|
|
||||||
msg = f'ID string must not contain whitespace: {person_id}'
|
|
||||||
raise ValueError(msg)
|
|
||||||
return person_id
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue