Add an encoding parameter to io.load_tabby #116

Open
mslw wants to merge 2 commits from mslw/encoding2 into main
2 changed files with 11 additions and 3 deletions

View file

@ -28,6 +28,7 @@ def load_tabby(
jsonld: bool = True, jsonld: bool = True,
recursive: bool = True, recursive: bool = True,
cpaths: List | None = None, cpaths: List | None = None,
encoding: str | None = None,
) -> Dict | List: ) -> Dict | List:
"""Load a tabby (TSV) record as structured (JSON(-LD)) data """Load a tabby (TSV) record as structured (JSON(-LD)) data
@ -48,11 +49,16 @@ def load_tabby(
With the ``jsonld`` flag, a declared or default JSON-LD context is With the ``jsonld`` flag, a declared or default JSON-LD context is
loaded and inserted into the record. loaded and inserted into the record.
Tsv file encoding used when reading can be specified with the
``encoding`` parameter.
""" """
ldr = _TabbyLoader( ldr = _TabbyLoader(
jsonld=jsonld, jsonld=jsonld,
recursive=recursive, recursive=recursive,
cpaths=cpaths, cpaths=cpaths,
encoding=encoding,
) )
return ldr(src=src, single=single) return ldr(src=src, single=single)
@ -63,6 +69,7 @@ class _TabbyLoader:
jsonld: bool = True, jsonld: bool = True,
recursive: bool = True, recursive: bool = True,
cpaths: List[Path] | None = None, cpaths: List[Path] | None = None,
encoding: str | None = None,
): ):
std_convention_path = Path(__file__).parent / 'conventions' std_convention_path = Path(__file__).parent / 'conventions'
if cpaths is None: if cpaths is None:
@ -70,6 +77,7 @@ class _TabbyLoader:
else: else:
cpaths.append(std_convention_path) cpaths.append(std_convention_path)
self._cpaths = cpaths self._cpaths = cpaths
self._encoding = encoding
self._jsonld = jsonld self._jsonld = jsonld
self._recursive = recursive self._recursive = recursive
@ -95,7 +103,7 @@ class _TabbyLoader:
trace=trace, trace=trace,
) )
with src.open(newline='') as tsvfile: with src.open(newline='', encoding=self._encoding) as tsvfile:
reader = csv.reader(tsvfile, delimiter='\t') reader = csv.reader(tsvfile, delimiter='\t')
# row_id is useful for error reporting # row_id is useful for error reporting
for row_id, row in enumerate(reader): for row_id, row in enumerate(reader):
@ -146,7 +154,7 @@ class _TabbyLoader:
# to do with any possibly loaded JSON data # to do with any possibly loaded JSON data
fieldnames = None fieldnames = None
with src.open(newline='') as tsvfile: with src.open(newline='', encoding=self._encoding) as tsvfile:
# we cannot use DictReader -- we need to support identically named # we cannot use DictReader -- we need to support identically named
# columns # columns
reader = csv.reader(tsvfile, delimiter='\t') reader = csv.reader(tsvfile, delimiter='\t')

View file

@ -28,7 +28,7 @@ covers the entire document, including content inserted from other tables.
When individual tables require a different context specification, it can be When individual tables require a different context specification, it can be
declared in the respective ``<prefix>_<table-name>.ctx.jsonld`` side-car files. declared in the respective ``<prefix>_<table-name>.ctx.jsonld`` side-car files.
Such a context is inserted in each metadata object read from the respective Such a context is inserted in each metadata object read from the respective
table. Standard JSON-LD rules for context scoping and propgation apply to the table. Standard JSON-LD rules for context scoping and propagation apply to the
semantics of such a declaration. semantics of such a declaration.
A third approach to context specification is a record-global A third approach to context specification is a record-global