82 lines
2.5 KiB
Python
82 lines
2.5 KiB
Python
import enum
|
|
import hashlib
|
|
from functools import partial
|
|
from pathlib import Path
|
|
from typing import Callable, Literal
|
|
|
|
from pydantic import BaseModel, ConfigDict
|
|
|
|
|
|
class MappingMethod(enum.Enum):
|
|
digest_md5 = 'digest-md5'
|
|
digest_md5_p3 = 'digest-md5-p3'
|
|
digest_md5_p3_p3 = 'digest-md5-p3-p3'
|
|
digest_sha1 = 'digest-sha1'
|
|
digest_sha1_p3 = 'digest-sha1-p3'
|
|
digest_sha1_p3_p3 = 'digest-sha1-p3-p3'
|
|
after_last_colon = 'after-last-colon'
|
|
|
|
|
|
class CollectionDirConfigContent(BaseModel):
|
|
model_config = ConfigDict(extra='forbid', use_enum_values=True)
|
|
type: Literal['records']
|
|
version: Literal[1]
|
|
schema: str
|
|
format: Literal['yaml']
|
|
idfx: MappingMethod
|
|
|
|
|
|
def get_hex_digest(hasher: Callable, data: str) -> str:
|
|
hash_context = hasher(data.encode())
|
|
return hash_context.hexdigest()
|
|
|
|
|
|
def mapping_digest_p3(
|
|
hasher: Callable,
|
|
pid: str,
|
|
suffix: str,
|
|
) -> Path:
|
|
hex_digest = get_hex_digest(hasher, pid)
|
|
return Path(hex_digest[:3]) / (hex_digest[3:] + '.' + suffix)
|
|
|
|
|
|
def mapping_digest_p3_p3(
|
|
hasher: Callable,
|
|
pid: str,
|
|
suffix: str,
|
|
) -> Path:
|
|
hex_digest = get_hex_digest(hasher, pid)
|
|
return Path(hex_digest[:3]) / hex_digest[3:6] / (hex_digest[6:] + '.' + suffix)
|
|
|
|
|
|
def mapping_digest(hasher: Callable, pid: str, suffix: str) -> Path:
|
|
hex_digest = get_hex_digest(hasher, pid)
|
|
return Path(hex_digest + '.' + suffix)
|
|
|
|
|
|
def mapping_after_last_colon(pid: str, suffix: str) -> Path:
|
|
plain_result = pid.split(':')[-1]
|
|
# Escape any colons and slashes in the pid
|
|
escaped_result = (
|
|
plain_result.replace('_', '__').replace('/', '_s').replace('.', '_d')
|
|
)
|
|
return Path(escaped_result + '.' + suffix)
|
|
|
|
|
|
mapping_functions = {
|
|
MappingMethod.digest_md5: partial(mapping_digest, hashlib.md5),
|
|
MappingMethod.digest_md5_p3: partial(mapping_digest_p3, hashlib.md5),
|
|
MappingMethod.digest_md5_p3_p3: partial(mapping_digest_p3_p3, hashlib.md5),
|
|
MappingMethod.digest_sha1: partial(mapping_digest, hashlib.sha1),
|
|
MappingMethod.digest_sha1_p3: partial(mapping_digest_p3, hashlib.sha1),
|
|
MappingMethod.digest_sha1_p3_p3: partial(mapping_digest_p3_p3, hashlib.sha1),
|
|
MappingMethod.after_last_colon: mapping_after_last_colon,
|
|
}
|
|
|
|
|
|
def get_mapping_function_by_name(mapping_function_name: str) -> Callable:
|
|
return mapping_functions[MappingMethod(mapping_function_name)]
|
|
|
|
|
|
def get_mapping_function(collection_config: CollectionDirConfigContent):
|
|
return mapping_functions[collection_config.idfx]
|