Add json2ttl: a streaming JSON->TTL converter #2

Merged
cmo merged 1 commit from add-json2ttl into main 2025-12-09 21:46:32 +00:00
3 changed files with 76 additions and 0 deletions

View file

@ -138,6 +138,30 @@ options:
`list-incoming` requires that the environment variable `CURATOR_TOKEN` is set, and contains a valid curator-token `list-incoming` requires that the environment variable `CURATOR_TOKEN` is set, and contains a valid curator-token
#### json2ttl
Convert a stream of JSON lines into a stream of TTL lines, i.e., strings that
contain TTL-documents with one string per line.
```
usage: json2ttl [-h] schema
positional arguments:
schema
options:
-h, --help show this help message and exit
```
This can be used, for example, together with `read-pages` to convert all
records in a collection to TTL:
```
> read-pages 'https://pool.v0.edu.datalad.org/api' 'public'|json2ttl 'https://concepts.datalad.org/s/demo-research-assets/unreleased.yaml'
"@prefix ISSN: <http://identifiers.org/issn/> .\n@prefix bibo: <http://purl.org/ontology/bibo/> .\n@prefix dlcommonmx: <https://concepts.datalad.org/s/common-mixin/unreleased/> .\n@prefix dlrelationsmx: <https://concepts.datalad.org/s/relations-mixin/unreleased/> .\n@prefix xyzra: <https://concepts.datalad.org/s/demo-research-assets/unreleased/> .\n\nISSN:2475-9066 a xyzra:XYZPublicationVenue ;\n dlcommonmx:title \"Journal of Open Source Software\" ;\n dlrelationsmx:kind bibo:Journal .\n\n"
...
```
## SPARQL search over a collection with qlever ## SPARQL search over a collection with qlever
The provide SPARQL search for a collection the following steps are necessary: The provide SPARQL search for a collection the following steps are necessary:

View file

@ -42,6 +42,7 @@ build-local-triple-store = "triple_tools.build_local_triple_store:main"
clean-incoming = "triple_tools.clean_incoming:main" clean-incoming = "triple_tools.clean_incoming:main"
list-incoming = "triple_tools.list_incoming:main" list-incoming = "triple_tools.list_incoming:main"
read-pages = "triple_tools.read_pages:main" read-pages = "triple_tools.read_pages:main"
json2ttl = "triple_tools.json2ttl:main"
[tool.hatch.build.targets.wheel] [tool.hatch.build.targets.wheel]
exclude = [ exclude = [

51
triple_tools/json2ttl.py Normal file
View file

@ -0,0 +1,51 @@
from __future__ import annotations
import argparse
import json
import re
import sys
from dump_things_service.converter import (
Format,
FormatConverter,
)
def main():
argument_parser = argparse.ArgumentParser()
argument_parser.add_argument('schema')
arguments = argument_parser.parse_args()
print(f'Creating converter for schema {arguments.schema} ...', file=sys.stderr, end='', flush=True)
converter = FormatConverter(
arguments.schema,
input_format=Format.json,
output_format=Format.ttl,
)
print(' done', file=sys.stderr, flush=True)
error = False
for line in sys.stdin:
json_object = json.loads(line)
object_class = json_object.get('schema_type')
if object_class is None:
print(f'ERROR: No schema_type in {json_object}', file=sys.stderr, flush=True)
continue
class_name = re.search('([_A-Za-z0-9]*$)', object_class).group(0)
try:
ttl = converter.convert(json_object, class_name)
except ValueError as ve:
print(f'ERROR: conversion failed for {json_object}: {ve}', file=sys.stderr, flush=True)
continue
print(json.dumps(ttl))
return 1 if error else 0
if __name__ == '__main__':
sys.exit(main())