Spaces:
Sleeping
Sleeping
| """Standalone validation: Graph-RAG engine (no LLM token required).""" | |
| import sys, json | |
| sys.path.insert(0, ".") | |
| from backend.rag import RAGEngine | |
| engine = RAGEngine(chunk_size=95, chunk_overlap=0) | |
| doc = ( | |
| "Alpha Dynamics acquired Beta Labs in 2023 to expand its diagnostics portfolio.\n\n" | |
| "Beta Labs later formed a strategic alliance with Orion Health for hospital analytics.\n\n" | |
| "Orion Health announced a joint research program with Nova BioSystems focused on predictive care." | |
| ) | |
| engine.ingest(doc) | |
| # ββ 1. Graph index ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| print("=== GRAPH INDEX ===") | |
| nodes = len(engine._entity_to_chunks) | |
| edges = sum(len(v) for v in engine._entity_graph.values()) // 2 | |
| print(f" nodes : {nodes}") | |
| print(f" edges : {edges}") | |
| print(f" chunks : {len(engine.chunks)}") | |
| top = sorted(engine._entity_to_chunks.items(), key=lambda x: len(x[1]), reverse=True)[:5] | |
| print(" top entities:", [(e, sorted(c)) for e, c in top]) | |
| assert nodes > 0, "Graph must have nodes" | |
| assert edges > 0, "Graph must have edges" | |
| # ββ 2. Retrieval β 2-hop ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| print("\n=== RETRIEVAL: 2-hop ===") | |
| r = engine.retrieve("How is Orion Health connected to Alpha Dynamics?") | |
| for c in r: | |
| print(f" chunk {c.index} score={c.score:.4f} {c.text[:70]}") | |
| assert any(c.index == 0 for c in r), "Must include Alpha Dynamics chunk" | |
| assert any(c.index == 1 for c in r), "Must include bridge chunk (Beta Labs)" | |
| # ββ 3. Retrieval β 3-hop ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| print("\n=== RETRIEVAL: 3-hop ===") | |
| r2 = engine.retrieve("What links Nova BioSystems to Alpha Dynamics?") | |
| for c in r2: | |
| print(f" chunk {c.index} score={c.score:.4f} {c.text[:70]}") | |
| assert len(r2) >= 2, "Needs at least 2 bridge chunks for 3-hop" | |
| # ββ 4. JSON parser β clean JSON βββββββββββββββββββββββββββββββββββββββββββββββ | |
| print("\n=== JSON PARSER ===") | |
| sample = { | |
| "answer": "Alpha Dynamics -> Beta Labs -> Orion Health", | |
| "reasoning_type": "multi-hop", | |
| "path": ["Alpha Dynamics -> Beta Labs", "Beta Labs -> Orion Health"], | |
| "used_chunks": ["0", "1"], | |
| "justification": "Alpha acquired Beta, Beta allied with Orion.", | |
| } | |
| p = engine._parse_graph_response(json.dumps(sample)) | |
| assert p["reasoning_type"] == "multi-hop" | |
| assert len(p["path"]) == 2 | |
| print(f" reasoning_type : {p['reasoning_type']}") | |
| print(f" path : {p['path']}") | |
| print(f" used_chunks : {p['used_chunks']}") | |
| # ββ 5. JSON parser β markdown fenced βββββββββββββββββββββββββββββββββββββββββ | |
| fenced = "```json\n" + json.dumps(sample) + "\n```" | |
| p2 = engine._parse_graph_response(fenced) | |
| assert p2["reasoning_type"] == "multi-hop", "Must strip markdown fences" | |
| print(f" fenced input parsed OK : {p2['reasoning_type']}") | |
| # ββ 6. JSON parser β fallback on non-JSON ββββββββββββββββββββββββββββββββββββ | |
| p3 = engine._parse_graph_response("I cannot answer that question.") | |
| assert p3["reasoning_type"] == "direct" | |
| print(f" fallback answer: {p3['answer'][:50]}") | |
| # ββ 7. Negative: completely unknown entity (not in graph at all) ββββββββββββββ | |
| print("\n=== RETRIEVAL: negative (unknown entity) ===") | |
| r3 = engine.retrieve("Who is the founder of ZetaCorp Robotics?") | |
| _, conf = engine._confidence_from_retrieved(r3) | |
| print(f" confidence_label: {conf} (expected Low β ZetaCorp is not in graph)") | |
| assert conf == "Low", "Unknown-entity queries should produce Low confidence" | |
| print("\nALL ASSERTIONS PASSED - Graph-RAG engine fully validated.") | |