"""RDFLib-backed KG store for PoC. Stores nodes and simple edges; links DocumentChunk IDs to KG entities using `mentions` predicate. Persists to a TTL file. """ try: from rdflib import Graph, URIRef, Literal, Namespace from rdflib.namespace import RDF, RDFS _HAS_RDFLIB = True except Exception: _HAS_RDFLIB = False from typing import List, Dict, Optional import uuid import os NS_URI = "http://example.org/abalone/" if _HAS_RDFLIB: NS = Namespace(NS_URI) class KGStore: def __init__(self, path: str = "./kg_store.ttl"): self.path = path self.graph = Graph() if os.path.exists(self.path): try: self.graph.parse(self.path, format="turtle") except Exception: # start empty if parse fails self.graph = Graph() def _entity_uri(self, label: str) -> URIRef: safe = label.strip().lower().replace(" ", "_") return URIRef(f"{NS_URI}entity/{safe}") def _chunk_uri(self, chunk_id: str) -> URIRef: return URIRef(f"{NS_URI}chunk/{chunk_id}") def add_entity(self, label: str, description: Optional[str] = None) -> URIRef: u = self._entity_uri(label) self.graph.add((u, RDFS.label, Literal(label))) if description: self.graph.add((u, NS.description, Literal(description))) return u def link_chunk_to_entity(self, chunk_id: str, entity_label: str, sentence: str = "", confidence: float = 0.5): e = self.add_entity(entity_label) c = self._chunk_uri(chunk_id) self.graph.add((c, NS.mentions, e)) # add provenance as reified data on the chunk node self.graph.add((c, NS.sentence, Literal(sentence))) self.graph.add((c, NS.confidence, Literal(str(confidence)))) def add_triple(self, subj_label: str, pred_label: str, obj_label: str, provenance: Optional[Dict] = None): s = self.add_entity(subj_label) o = self.add_entity(obj_label) p = URIRef(f"{NS_URI}relation/{pred_label.strip().lower().replace(' ', '_')}") self.graph.add((s, p, o)) if provenance: # store provenance on subject node for simplicity self.graph.add((s, NS.provenance, Literal(str(provenance)))) def save(self): self.graph.serialize(destination=self.path, format="turtle") def find_chunks_for_entity(self, entity_label: str) -> List[str]: e = self._entity_uri(entity_label) q = f"SELECT ?chunk WHERE {{ ?chunk <{NS_URI}mentions> <{e}> . }}" res = self.graph.query(q) out = [] for r in res: uri = str(r[0]) if uri.startswith(NS_URI + "chunk/"): out.append(uri.split("chunk/", 1)[1]) return out def query_entities(self, text: str) -> List[str]: # naive: find entities whose label appears in text text_l = text.lower() out = [] for s, p, o in self.graph.triples((None, RDFS.label, None)): label = str(o).lower() if label in text_l: out.append(str(s)) return out else: class KGStore: def __init__(self, *args, **kwargs): raise RuntimeError("rdflib is required for KGStore. Install with `pip install rdflib`")