copy yet unreleased code into project #200
2 changed files with 301 additions and 1 deletions
|
|
@ -13,7 +13,6 @@ from datetime import datetime
|
|||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
from datalad_core.git_utils import apply_changeset
|
||||
from datalad_core.repo import Repo
|
||||
from datalad_core.runners import (
|
||||
call_git,
|
||||
|
|
@ -21,6 +20,7 @@ from datalad_core.runners import (
|
|||
)
|
||||
|
||||
from . import AuditBackend
|
||||
from .gitutils import apply_changeset
|
||||
|
||||
|
||||
class GitAuditBackend(AuditBackend):
|
||||
|
|
|
|||
300
dump_things_service/audit/gitutils.py
Normal file
300
dump_things_service/audit/gitutils.py
Normal file
|
|
@ -0,0 +1,300 @@
|
|||
# This is taken from the minilad-branch of `datalad-core`, i.e.:
|
||||
#
|
||||
# https://hub.datalad.org/datalad/datalad-core/src/branch/minilad/datalad_core/git_utils/apply_changeset.py#
|
||||
#
|
||||
# which is not yet released. Once the `minilad`-branch is merged, this
|
||||
# should be removed and `apply_changeset` should be imported from
|
||||
# `datalad_core.gitutils`.
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
from collections.abc import Mapping
|
||||
from pathlib import (
|
||||
Path,
|
||||
PurePosixPath,
|
||||
)
|
||||
|
||||
from datalad_core.git_utils import types as gt
|
||||
from datalad_core.git_utils.interrogators import get_object_name
|
||||
from datalad_core.git_utils.status import iter_repo_status
|
||||
from datalad_core.repo import (
|
||||
Repo,
|
||||
Worktree,
|
||||
)
|
||||
from datalad_core.runners import (
|
||||
call_git,
|
||||
call_git_oneline,
|
||||
)
|
||||
|
||||
|
||||
def apply_changeset(
|
||||
target: Repo | Worktree,
|
||||
changes: Mapping[
|
||||
PurePosixPath | str, None | str | Path | tuple[gt.GitObjectMode, str]
|
||||
],
|
||||
*,
|
||||
message: str,
|
||||
branch: str | None = None,
|
||||
force: bool = False,
|
||||
restage: bool = False,
|
||||
) -> str | None:
|
||||
"""Apply a changeset to a (bare) repository
|
||||
|
||||
The ``target`` parameter identified the repository or worktree
|
||||
to apply the changeset to. When an effective change was made,
|
||||
it is committed with ``message`` as the commit message.
|
||||
When the changeset is not to be applied on top of ``HEAD``,
|
||||
a different ref can be given via ``branch``. ``HEAD`` need not
|
||||
exist for using this function (i.e., can be used for an initial
|
||||
commit too).
|
||||
|
||||
A changeset (``changes``) is a mapping from a path (relative to the
|
||||
repository root), to change specification. The following specification
|
||||
values are supported:
|
||||
|
||||
- ``None``: remove content
|
||||
- any ``str``-type value: point to (new) blob created from this string
|
||||
- any ``Path`` instance: point to (new) blob created from the content
|
||||
of this file
|
||||
- ``tuple[GitObjectMode, str]``: point to Git object of a mode given
|
||||
by the first tuple item. The nature of the second value is determined
|
||||
by the object mode:
|
||||
|
||||
- file|executable: <object name>
|
||||
- symlink: <target path>
|
||||
- tree: <object name>
|
||||
- submodule: <subproject commit>
|
||||
|
||||
Returns the ID of a created commit, or ``None`` if not commit was made.
|
||||
|
||||
Removed files are not deleted from a worktree, but are left as untracked
|
||||
content.
|
||||
"""
|
||||
if not changes:
|
||||
# nothing to do, early exit
|
||||
return None
|
||||
|
||||
repo = target.repo if isinstance(target, Worktree) else target
|
||||
|
||||
# look for any parent commit. will fail with an unknown branch
|
||||
# (should have created it before)
|
||||
try:
|
||||
parent = get_object_name(repo.path, branch or 'HEAD')
|
||||
except ValueError:
|
||||
if branch:
|
||||
# only tolerate an absent HEAD
|
||||
raise
|
||||
parent = None
|
||||
|
||||
# 1. If not bare, store the state of the index to be able to
|
||||
# restage content
|
||||
# restage is the same format as `index_info`, directly prepared for
|
||||
# git-update-index
|
||||
restage_items = _check_for_conflicts(
|
||||
target, changes, branch=branch, force=force, restage=restage
|
||||
)
|
||||
# 2. Create a temporary index to build the commit
|
||||
with tempfile.TemporaryDirectory(
|
||||
prefix='index',
|
||||
dir=target.git_dir,
|
||||
) as tmpdir:
|
||||
index_file = Path(tmpdir) / 'index'
|
||||
env = dict(os.environ, GIT_INDEX_FILE=str(index_file))
|
||||
# 3. Read any parent state into the TMP index
|
||||
if parent:
|
||||
call_git(['read-tree', '-q', parent], env=env, cwd=repo.path)
|
||||
# 4. Apply the changes
|
||||
index_info: list[str] = []
|
||||
for path, spec in changes.items():
|
||||
_prep_update_item(
|
||||
cwd=repo.path,
|
||||
env=env,
|
||||
index_info=index_info,
|
||||
path=path,
|
||||
spec=spec,
|
||||
)
|
||||
call_git(
|
||||
['update-index', '-q', '-z', '--index-info'],
|
||||
inputs='\0'.join(index_info),
|
||||
env=env,
|
||||
text=True,
|
||||
cwd=repo.path,
|
||||
)
|
||||
# 5. Commit the changes
|
||||
tree_id = call_git_oneline(['write-tree'], env=env, cwd=repo.path)
|
||||
|
||||
# avoid empty commit by comparing the tree we ended up with, with the tree
|
||||
# linked to the parent state
|
||||
if (
|
||||
parent
|
||||
and call_git_oneline(['rev-parse', f'{parent}^{{tree}}'], cwd=repo.path)
|
||||
== tree_id
|
||||
):
|
||||
return None
|
||||
|
||||
commit_cmd = ['commit-tree', tree_id, '-m', message]
|
||||
if parent:
|
||||
commit_cmd.extend(('-p', parent))
|
||||
commit_id = call_git_oneline(commit_cmd, cwd=repo.path)
|
||||
# 6. Update the ref to point to the new commit
|
||||
call_git(
|
||||
[
|
||||
'update-ref',
|
||||
# using HEAD will run with whatever is the default branch name,
|
||||
# also works in bare repos
|
||||
f'refs/heads/{branch}' if branch else 'HEAD',
|
||||
commit_id,
|
||||
],
|
||||
cwd=repo.path,
|
||||
)
|
||||
|
||||
if isinstance(target, Repo) or branch is not None:
|
||||
return commit_id
|
||||
|
||||
# 7. If not bare and not branch, merge commit into index
|
||||
# read-tree will unavoidably cause staged content to be unstaged
|
||||
call_git(['read-tree', '-m', branch or 'HEAD'], cwd=target.path)
|
||||
# 8. If not bare and not branch, restage content
|
||||
if restage:
|
||||
call_git(
|
||||
['update-index', '-q', '-z', '--index-info'],
|
||||
inputs='\0'.join(restage_items),
|
||||
text=True,
|
||||
cwd=repo.path,
|
||||
)
|
||||
# 9. If not bare and not branch, update checkout
|
||||
call_git(['checkout-index', '-f', '-u', '-a'], cwd=target.path)
|
||||
return commit_id
|
||||
|
||||
|
||||
def _prep_update_item(
|
||||
cwd: Path,
|
||||
env: Mapping[str, str],
|
||||
index_info: list[str],
|
||||
path: PurePosixPath | str,
|
||||
spec: None | str | Path | tuple[gt.GitObjectMode, str],
|
||||
):
|
||||
if not isinstance(path, (str, PurePosixPath)):
|
||||
msg = f'Unsupported path type in change specification {path!r}'
|
||||
raise TypeError(msg)
|
||||
path_str = str(path)
|
||||
match spec:
|
||||
case None:
|
||||
# use magic mode '0' to remove from index
|
||||
index_info.append(f'0 {40 * "0"}\t{path_str}')
|
||||
case str():
|
||||
oid = call_git_oneline(
|
||||
['hash-object', '-t', 'blob', '-w', '--stdin'],
|
||||
inputs=spec,
|
||||
cwd=cwd,
|
||||
env=env,
|
||||
)
|
||||
index_info.append(f'{gt.GitObjectMode.FILE} {oid} 0\t{path_str}')
|
||||
case Path():
|
||||
oid = call_git_oneline(
|
||||
[
|
||||
'hash-object',
|
||||
'-t',
|
||||
'blob',
|
||||
'-w',
|
||||
# use --path to enable filters (think EOL conversion)
|
||||
f'--path={spec}',
|
||||
str(spec),
|
||||
],
|
||||
cwd=cwd,
|
||||
env=env,
|
||||
)
|
||||
index_info.append(f'{gt.GitObjectMode.FILE} {oid} 0\t{path_str}')
|
||||
case tuple() if spec[0] is gt.GitObjectMode.SYMLINK:
|
||||
oid = call_git_oneline(
|
||||
['hash-object', '-t', 'blob', '-w', '--stdin'],
|
||||
inputs=spec[1],
|
||||
cwd=cwd,
|
||||
env=env,
|
||||
)
|
||||
index_info.append(f'{gt.GitObjectMode.SYMLINK} {oid} 0\t{path_str}')
|
||||
case tuple():
|
||||
index_info.append(f'{spec[0]} {spec[1]} 0\t{path_str}')
|
||||
case _:
|
||||
msg = f'Unsupported change specification {spec!r}'
|
||||
raise ValueError(msg)
|
||||
|
||||
|
||||
def _check_for_conflicts(
|
||||
target: Repo | Worktree,
|
||||
changes: Mapping[
|
||||
PurePosixPath | str, None | str | Path | tuple[gt.GitObjectMode, str]
|
||||
],
|
||||
*,
|
||||
branch: str | None = None,
|
||||
force: bool = False,
|
||||
restage: bool = False,
|
||||
) -> list[str]:
|
||||
if not isinstance(target, Worktree):
|
||||
# no worktree, no chance for conflicts
|
||||
return []
|
||||
|
||||
if force and not restage:
|
||||
# we can fully ignore the state of the worktree and that of the index
|
||||
return []
|
||||
|
||||
if (
|
||||
branch
|
||||
and call_git_oneline(['branch', '--show-current'], cwd=target.path) != branch
|
||||
):
|
||||
# the changeset targets a branch/ref that is not the current branch.
|
||||
# no chance for conflicts with the worktree
|
||||
return []
|
||||
|
||||
status = {
|
||||
r.path: r
|
||||
for r in iter_repo_status(
|
||||
target.path,
|
||||
untracked_files=gt.UntrackedFilesMode.ALL,
|
||||
# we do not care about submodules, but we want them
|
||||
# listed of the subproject commit is modified, to
|
||||
# be able to detect conflicts.
|
||||
ignore_submodules=gt.IgnoreSubmodulesMode.DIRTY,
|
||||
)
|
||||
}
|
||||
if not force:
|
||||
# abort for untracked content that conflicts with changeset
|
||||
untracked_content: set[PurePosixPath] = {
|
||||
PurePosixPath(r.path)
|
||||
for r in status.values()
|
||||
if isinstance(r, gt.RepoUntrackedRecord)
|
||||
}
|
||||
untracked_conflict = untracked_content.intersection(
|
||||
PurePosixPath(r) for r in changes
|
||||
)
|
||||
if untracked_conflict:
|
||||
msg = (
|
||||
'Refuse to apply changeset with conflicting untracked worktree content'
|
||||
)
|
||||
raise ValueError(msg)
|
||||
|
||||
# abort for unstaged modifications of the worktree (would be lost by the
|
||||
# final sync of the worktree with the index)
|
||||
if any(
|
||||
r.modification.unstaged != gt.ModificationStateType.UNMODIFIED
|
||||
for r in status.values()
|
||||
if isinstance(r, gt.RepoModificationRecord)
|
||||
):
|
||||
msg = (
|
||||
'Refuse to apply changeset to worktree with '
|
||||
'unstaged/uncommitted modifications'
|
||||
)
|
||||
raise ValueError(msg)
|
||||
|
||||
# restage is the same format as `index_info`, directly prepared for
|
||||
# git-update-index
|
||||
return [
|
||||
f'{r.mode_index} {r.name_index} 0\t{r.path}'
|
||||
for r in status.values()
|
||||
if isinstance(r, gt.RepoModificationRecord)
|
||||
and r.modification.staged
|
||||
in (
|
||||
gt.ModificationStateType.ADDED,
|
||||
gt.ModificationStateType.MODIFIED,
|
||||
)
|
||||
]
|
||||
Loading…
Add table
Add a link
Reference in a new issue