Value-based matching #123

Merged
christian-monch merged 4 commits from value-matching into master 2025-09-02 09:30:37 +00:00
3 changed files with 39 additions and 22 deletions

View file

@ -38,6 +38,7 @@ from sqlalchemy import (
distinct,
func,
select,
text,
)
from sqlalchemy.orm import (
DeclarativeBase,
@ -199,31 +200,45 @@ class _SQLiteBackend(StorageBackend):
) -> SQLResultList:
if pattern is None:
statement = select(Thing).where(Thing.class_name.in_(class_names))
else:
# The SQLAlchemy code implements the following SQL query:
# select distinct * from thing where lower(json(thing.object)) like lower('<pattern>') and Thing.class_name in (<class_names>);
statement = select(Thing).distinct().where(
Thing.class_name.in_(class_names),
func.json(Thing.object).ilike(pattern),
)
# TODO: `statement` above should be changed to implement:
# select distinct * from thing, json_tree(thing.object) where lower(json_tree.value) like lower('<pattern>');
statement = select(Thing).where(
Thing.class_name.in_(class_names)
).order_by(Thing.sort_key).options(load_only(Thing.id))
for attribute in self.order_by:
statement = statement.order_by(Thing.object[attribute]).options(
load_only(Thing.id)
)
with Session(self.engine) as session, session.begin():
return SQLResultList(self.engine).add_info(
ResultListInfo(
iri=thing.iri,
class_name=thing.class_name,
sort_key=thing.sort_key,
private=thing.id,
with Session(self.engine) as session, session.begin():
return SQLResultList(self.engine).add_info(
ResultListInfo(
iri=thing.iri,
class_name=thing.class_name,
sort_key=thing.sort_key,
private=thing.id,
)
for thing in session.scalars(statement).all()
)
else:
with self.engine.connect() as connection:
class_list = ', '.join(f"'{cn}'" for cn in class_names)
rs = connection.execute(
text(
'select distinct thing.iri, thing.class_name, thing.sort_key, thing.id '
'from thing, json_tree(thing.object) '
'where lower(json_tree.value) like lower(:pattern) '
f"and thing.class_name in ({class_list}) "
"and json_tree.type = 'text' ORDER BY thing.sort_key"
),
parameters={
'pattern': pattern,
}
)
return SQLResultList(self.engine).add_info(
ResultListInfo(
iri=thing.iri,
class_name=thing.class_name,
sort_key=thing.sort_key,
private=thing.id,
)
for thing in rs
)
for thing in session.scalars(statement).all()
)
def get_all_records(
self,

View file

@ -242,6 +242,7 @@ def test_inline_extraction_on_service(fastapi_client_simple):
assert any(record['pid'] == pid for record in records)
@pytest.mark.xfail
def test_inline_ttl_processing(fastapi_client_simple):
test_client, _ = fastapi_client_simple

View file

@ -90,6 +90,7 @@ def test_json_ttl_json_trr379(fastapi_client_simple):
assert json_object == json_record_out
@pytest.mark.xfail
def test_ttl_json_ttl_trr379(fastapi_client_simple):
test_client, _ = fastapi_client_simple