Add json2ttl: a streaming JSON->TTL converter #2
3 changed files with 76 additions and 0 deletions
24
README.md
24
README.md
|
|
@ -138,6 +138,30 @@ options:
|
|||
`list-incoming` requires that the environment variable `CURATOR_TOKEN` is set, and contains a valid curator-token
|
||||
|
||||
|
||||
#### json2ttl
|
||||
|
||||
Convert a stream of JSON lines into a stream of TTL lines, i.e., strings that
|
||||
contain TTL-documents with one string per line.
|
||||
|
||||
```
|
||||
usage: json2ttl [-h] schema
|
||||
|
||||
positional arguments:
|
||||
schema
|
||||
|
||||
options:
|
||||
-h, --help show this help message and exit
|
||||
```
|
||||
|
||||
This can be used, for example, together with `read-pages` to convert all
|
||||
records in a collection to TTL:
|
||||
|
||||
```
|
||||
> read-pages 'https://pool.v0.edu.datalad.org/api' 'public'|json2ttl 'https://concepts.datalad.org/s/demo-research-assets/unreleased.yaml'
|
||||
"@prefix ISSN: <http://identifiers.org/issn/> .\n@prefix bibo: <http://purl.org/ontology/bibo/> .\n@prefix dlcommonmx: <https://concepts.datalad.org/s/common-mixin/unreleased/> .\n@prefix dlrelationsmx: <https://concepts.datalad.org/s/relations-mixin/unreleased/> .\n@prefix xyzra: <https://concepts.datalad.org/s/demo-research-assets/unreleased/> .\n\nISSN:2475-9066 a xyzra:XYZPublicationVenue ;\n dlcommonmx:title \"Journal of Open Source Software\" ;\n dlrelationsmx:kind bibo:Journal .\n\n"
|
||||
...
|
||||
```
|
||||
|
||||
## SPARQL search over a collection with qlever
|
||||
|
||||
The provide SPARQL search for a collection the following steps are necessary:
|
||||
|
|
|
|||
|
|
@ -42,6 +42,7 @@ build-local-triple-store = "triple_tools.build_local_triple_store:main"
|
|||
clean-incoming = "triple_tools.clean_incoming:main"
|
||||
list-incoming = "triple_tools.list_incoming:main"
|
||||
read-pages = "triple_tools.read_pages:main"
|
||||
json2ttl = "triple_tools.json2ttl:main"
|
||||
|
||||
[tool.hatch.build.targets.wheel]
|
||||
exclude = [
|
||||
|
|
|
|||
51
triple_tools/json2ttl.py
Normal file
51
triple_tools/json2ttl.py
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
|
||||
from dump_things_service.converter import (
|
||||
Format,
|
||||
FormatConverter,
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
argument_parser = argparse.ArgumentParser()
|
||||
argument_parser.add_argument('schema')
|
||||
|
||||
arguments = argument_parser.parse_args()
|
||||
|
||||
print(f'Creating converter for schema {arguments.schema} ...', file=sys.stderr, end='', flush=True)
|
||||
converter = FormatConverter(
|
||||
arguments.schema,
|
||||
input_format=Format.json,
|
||||
output_format=Format.ttl,
|
||||
)
|
||||
print(' done', file=sys.stderr, flush=True)
|
||||
|
||||
error = False
|
||||
|
||||
for line in sys.stdin:
|
||||
json_object = json.loads(line)
|
||||
|
||||
object_class = json_object.get('schema_type')
|
||||
if object_class is None:
|
||||
print(f'ERROR: No schema_type in {json_object}', file=sys.stderr, flush=True)
|
||||
continue
|
||||
|
||||
class_name = re.search('([_A-Za-z0-9]*$)', object_class).group(0)
|
||||
try:
|
||||
ttl = converter.convert(json_object, class_name)
|
||||
except ValueError as ve:
|
||||
print(f'ERROR: conversion failed for {json_object}: {ve}', file=sys.stderr, flush=True)
|
||||
continue
|
||||
|
||||
print(json.dumps(ttl))
|
||||
|
||||
return 1 if error else 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
Loading…
Add table
Add a link
Reference in a new issue