Spaces:
Sleeping
Sleeping
File size: 4,149 Bytes
9af036a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 | """Standalone validation: Graph-RAG engine (no LLM token required)."""
import sys, json
sys.path.insert(0, ".")
from backend.rag import RAGEngine
engine = RAGEngine(chunk_size=95, chunk_overlap=0)
doc = (
"Alpha Dynamics acquired Beta Labs in 2023 to expand its diagnostics portfolio.\n\n"
"Beta Labs later formed a strategic alliance with Orion Health for hospital analytics.\n\n"
"Orion Health announced a joint research program with Nova BioSystems focused on predictive care."
)
engine.ingest(doc)
# ββ 1. Graph index ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
print("=== GRAPH INDEX ===")
nodes = len(engine._entity_to_chunks)
edges = sum(len(v) for v in engine._entity_graph.values()) // 2
print(f" nodes : {nodes}")
print(f" edges : {edges}")
print(f" chunks : {len(engine.chunks)}")
top = sorted(engine._entity_to_chunks.items(), key=lambda x: len(x[1]), reverse=True)[:5]
print(" top entities:", [(e, sorted(c)) for e, c in top])
assert nodes > 0, "Graph must have nodes"
assert edges > 0, "Graph must have edges"
# ββ 2. Retrieval β 2-hop ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
print("\n=== RETRIEVAL: 2-hop ===")
r = engine.retrieve("How is Orion Health connected to Alpha Dynamics?")
for c in r:
print(f" chunk {c.index} score={c.score:.4f} {c.text[:70]}")
assert any(c.index == 0 for c in r), "Must include Alpha Dynamics chunk"
assert any(c.index == 1 for c in r), "Must include bridge chunk (Beta Labs)"
# ββ 3. Retrieval β 3-hop ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
print("\n=== RETRIEVAL: 3-hop ===")
r2 = engine.retrieve("What links Nova BioSystems to Alpha Dynamics?")
for c in r2:
print(f" chunk {c.index} score={c.score:.4f} {c.text[:70]}")
assert len(r2) >= 2, "Needs at least 2 bridge chunks for 3-hop"
# ββ 4. JSON parser β clean JSON βββββββββββββββββββββββββββββββββββββββββββββββ
print("\n=== JSON PARSER ===")
sample = {
"answer": "Alpha Dynamics -> Beta Labs -> Orion Health",
"reasoning_type": "multi-hop",
"path": ["Alpha Dynamics -> Beta Labs", "Beta Labs -> Orion Health"],
"used_chunks": ["0", "1"],
"justification": "Alpha acquired Beta, Beta allied with Orion.",
}
p = engine._parse_graph_response(json.dumps(sample))
assert p["reasoning_type"] == "multi-hop"
assert len(p["path"]) == 2
print(f" reasoning_type : {p['reasoning_type']}")
print(f" path : {p['path']}")
print(f" used_chunks : {p['used_chunks']}")
# ββ 5. JSON parser β markdown fenced βββββββββββββββββββββββββββββββββββββββββ
fenced = "```json\n" + json.dumps(sample) + "\n```"
p2 = engine._parse_graph_response(fenced)
assert p2["reasoning_type"] == "multi-hop", "Must strip markdown fences"
print(f" fenced input parsed OK : {p2['reasoning_type']}")
# ββ 6. JSON parser β fallback on non-JSON ββββββββββββββββββββββββββββββββββββ
p3 = engine._parse_graph_response("I cannot answer that question.")
assert p3["reasoning_type"] == "direct"
print(f" fallback answer: {p3['answer'][:50]}")
# ββ 7. Negative: completely unknown entity (not in graph at all) ββββββββββββββ
print("\n=== RETRIEVAL: negative (unknown entity) ===")
r3 = engine.retrieve("Who is the founder of ZetaCorp Robotics?")
_, conf = engine._confidence_from_retrieved(r3)
print(f" confidence_label: {conf} (expected Low β ZetaCorp is not in graph)")
assert conf == "Low", "Unknown-entity queries should produce Low confidence"
print("\nALL ASSERTIONS PASSED - Graph-RAG engine fully validated.")
|