2 changed files with 64 additions and 10 deletions
--- a/datalad_tabby/io/load.py
+++ b/datalad_tabby/io/load.py
@ -10,6 +10,8 @@ from typing import (
    List,
 )
 from charset_normalizer import from_path as cs_from_path
 from .load_utils import (
    _assign_context,
    _compact_obj,
@ -28,6 +30,7 @@ def load_tabby(
    jsonld: bool = True,
    recursive: bool = True,
    cpaths: List | None = None,
    encoding: str | None = None,
 ) -> Dict | List:
    """Load a tabby (TSV) record as structured (JSON(-LD)) data
@ -48,11 +51,14 @@ def load_tabby(
    With the ``jsonld`` flag, a declared or default JSON-LD context is
    loaded and inserted into the record.
    Encoding used when reading tsv files can be specified as ``encoding``.
    """
    ldr = _TabbyLoader(
        jsonld=jsonld,
        recursive=recursive,
        cpaths=cpaths,
        encoding=encoding,
    )
    return ldr(src=src, single=single)
@ -63,6 +69,7 @@ class _TabbyLoader:
        jsonld: bool = True,
        recursive: bool = True,
        cpaths: List[Path] | None = None,
        encoding: str | None = None,
    ):
        std_convention_path = Path(__file__).parent / 'conventions'
        if cpaths is None:
@ -70,6 +77,7 @@ class _TabbyLoader:
        else:
            cpaths.append(std_convention_path)
        self._cpaths = cpaths
        self._encoding = encoding
        self._jsonld = jsonld
        self._recursive = recursive
@ -94,8 +102,24 @@ class _TabbyLoader:
                src=src,
                trace=trace,
            )
        if self._encoding is not None:
            tsv_obj = self._parse_tsv_single(src, encoding=self._encoding)
        else:
            try:
                tsv_obj = self._parse_tsv_single(src)
            except UnicodeDecodeError:
                # by default Path.open() uses locale.getencoding()
                # that didn't work, try guessing
                encoding = cs_from_path(src).best().encoding
                tsv_obj = self._parse_tsv_single(src, encoding=encoding)
-        with src.open(newline='') as tsvfile:
+        obj.update(tsv_obj)
        return self._postproc_obj(obj, src=src, trace=trace)
    def _parse_tsv_single(self, src: Path, encoding: str | None = None) -> Dict:
        obj = {}
        with src.open(newline='', encoding=encoding) as tsvfile:
            reader = csv.reader(tsvfile, delimiter='\t')
            # row_id is useful for error reporting
            for row_id, row in enumerate(reader):
@ -117,8 +141,7 @@ class _TabbyLoader:
                # we support "sequence" values via multi-column values
                # supporting two ways just adds unnecessary complexity
                obj[key] = val
-
+        return obj
        return self._postproc_obj(obj, src=src, trace=trace)
    def _load_many(
        self,
@ -144,19 +167,49 @@ class _TabbyLoader:
        # the table field/column names have purposefully _nothing_
        # to do with any possibly loaded JSON data
-        fieldnames = None
+        if self._encoding is not None:
            tsv_array = self._parse_tsv_many(
                src, obj_tmpl, trace=trace, fieldnames=None, encoding=self._encoding
            )
        else:
            try:
                tsv_array = self._parse_tsv_many(
                    src, obj_tmpl, trace=trace, fieldnames=None
                )
            except UnicodeDecodeError:
                # by default Path.open() uses locale.getencoding()
                # that didn't work, try guessing
                encoding = cs_from_path(src).best().encoding
                tsv_array = self._parse_tsv_many(
                    src, obj_tmpl, trace=trace, fieldnames=None, encoding=encoding
                )
-        with src.open(newline='') as tsvfile:
+        array.extend(tsv_array)
        return array
    def _parse_tsv_many(
        self,
        src: Path,
        obj_tmpl: Dict,
        trace: List,
        fieldnames: List | None = None,
        encoding: str | None = None,
    ) -> List[Dict]:
        array = []
        with src.open(newline="", encoding=encoding) as tsvfile:
            # we cannot use DictReader -- we need to support identically named
            # columns
-            reader = csv.reader(tsvfile, delimiter='\t')
+            reader = csv.reader(tsvfile, delimiter="\t")
            # row_id is useful for error reporting
            for row_id, row in enumerate(reader):
                # row is a list of field, with only as many items
                # as this particular row has columns
-                if not len(row) \
+                if (
-                        or row[0].startswith('#') \
+                    not len(row)
-                        or all(v is None for v in row):
+                    or row[0].startswith("#")
                    or all(v is None for v in row)
                ):
                    # skip empty rows, rows with no key, or rows with
                    # a comment key
                    continue
--- a/setup.cfg
+++ b/setup.cfg
@ -17,6 +17,7 @@ install_requires =
    datalad >= 0.18.0
    datalad-next @ git+https://github.com/datalad/datalad-next.git@main
    datalad-metalad
    charset-normalizer
    openpyxl
    pyld
 packages = find_namespace: