Add json2ttl: a streaming JSON->TTL converter #2
3 changed files with 76 additions and 0 deletions
24
README.md
24
README.md
|
|
@ -138,6 +138,30 @@ options:
|
||||||
`list-incoming` requires that the environment variable `CURATOR_TOKEN` is set, and contains a valid curator-token
|
`list-incoming` requires that the environment variable `CURATOR_TOKEN` is set, and contains a valid curator-token
|
||||||
|
|
||||||
|
|
||||||
|
#### json2ttl
|
||||||
|
|
||||||
|
Convert a stream of JSON lines into a stream of TTL lines, i.e., strings that
|
||||||
|
contain TTL-documents with one string per line.
|
||||||
|
|
||||||
|
```
|
||||||
|
usage: json2ttl [-h] schema
|
||||||
|
|
||||||
|
positional arguments:
|
||||||
|
schema
|
||||||
|
|
||||||
|
options:
|
||||||
|
-h, --help show this help message and exit
|
||||||
|
```
|
||||||
|
|
||||||
|
This can be used, for example, together with `read-pages` to convert all
|
||||||
|
records in a collection to TTL:
|
||||||
|
|
||||||
|
```
|
||||||
|
> read-pages 'https://pool.v0.edu.datalad.org/api' 'public'|json2ttl 'https://concepts.datalad.org/s/demo-research-assets/unreleased.yaml'
|
||||||
|
"@prefix ISSN: <http://identifiers.org/issn/> .\n@prefix bibo: <http://purl.org/ontology/bibo/> .\n@prefix dlcommonmx: <https://concepts.datalad.org/s/common-mixin/unreleased/> .\n@prefix dlrelationsmx: <https://concepts.datalad.org/s/relations-mixin/unreleased/> .\n@prefix xyzra: <https://concepts.datalad.org/s/demo-research-assets/unreleased/> .\n\nISSN:2475-9066 a xyzra:XYZPublicationVenue ;\n dlcommonmx:title \"Journal of Open Source Software\" ;\n dlrelationsmx:kind bibo:Journal .\n\n"
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
## SPARQL search over a collection with qlever
|
## SPARQL search over a collection with qlever
|
||||||
|
|
||||||
The provide SPARQL search for a collection the following steps are necessary:
|
The provide SPARQL search for a collection the following steps are necessary:
|
||||||
|
|
|
||||||
|
|
@ -42,6 +42,7 @@ build-local-triple-store = "triple_tools.build_local_triple_store:main"
|
||||||
clean-incoming = "triple_tools.clean_incoming:main"
|
clean-incoming = "triple_tools.clean_incoming:main"
|
||||||
list-incoming = "triple_tools.list_incoming:main"
|
list-incoming = "triple_tools.list_incoming:main"
|
||||||
read-pages = "triple_tools.read_pages:main"
|
read-pages = "triple_tools.read_pages:main"
|
||||||
|
json2ttl = "triple_tools.json2ttl:main"
|
||||||
|
|
||||||
[tool.hatch.build.targets.wheel]
|
[tool.hatch.build.targets.wheel]
|
||||||
exclude = [
|
exclude = [
|
||||||
|
|
|
||||||
51
triple_tools/json2ttl.py
Normal file
51
triple_tools/json2ttl.py
Normal file
|
|
@ -0,0 +1,51 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from dump_things_service.converter import (
|
||||||
|
Format,
|
||||||
|
FormatConverter,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
argument_parser = argparse.ArgumentParser()
|
||||||
|
argument_parser.add_argument('schema')
|
||||||
|
|
||||||
|
arguments = argument_parser.parse_args()
|
||||||
|
|
||||||
|
print(f'Creating converter for schema {arguments.schema} ...', file=sys.stderr, end='', flush=True)
|
||||||
|
converter = FormatConverter(
|
||||||
|
arguments.schema,
|
||||||
|
input_format=Format.json,
|
||||||
|
output_format=Format.ttl,
|
||||||
|
)
|
||||||
|
print(' done', file=sys.stderr, flush=True)
|
||||||
|
|
||||||
|
error = False
|
||||||
|
|
||||||
|
for line in sys.stdin:
|
||||||
|
json_object = json.loads(line)
|
||||||
|
|
||||||
|
object_class = json_object.get('schema_type')
|
||||||
|
if object_class is None:
|
||||||
|
print(f'ERROR: No schema_type in {json_object}', file=sys.stderr, flush=True)
|
||||||
|
continue
|
||||||
|
|
||||||
|
class_name = re.search('([_A-Za-z0-9]*$)', object_class).group(0)
|
||||||
|
try:
|
||||||
|
ttl = converter.convert(json_object, class_name)
|
||||||
|
except ValueError as ve:
|
||||||
|
print(f'ERROR: conversion failed for {json_object}: {ve}', file=sys.stderr, flush=True)
|
||||||
|
continue
|
||||||
|
|
||||||
|
print(json.dumps(ttl))
|
||||||
|
|
||||||
|
return 1 if error else 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main())
|
||||||
Loading…
Add table
Add a link
Reference in a new issue