PDF-Assit_RAG / backend /tests /test_graph_builder.py
Param20h's picture
deploy: pure backend API with keywords fix
7c46845 unverified
Raw
History Blame Contribute Delete
2.75 kB
import json
from app.rag import graph_builder
class FakeEntity:
def __init__(self, text, label):
self.text = text
self.label_ = label
class FakeDoc:
def __init__(self, entities):
self.ents = entities
class FakeNlp:
def __call__(self, text):
entities = []
for value, label in (
("OpenAI", "ORG"),
("Microsoft", "ORG"),
("Azure", "PRODUCT"),
("Ignored Date", "DATE"),
):
if value in text:
entities.append(FakeEntity(value, label))
return FakeDoc(entities)
def test_extract_entities_filters_configured_labels(monkeypatch):
monkeypatch.setattr(graph_builder, "_nlp", FakeNlp())
entities = graph_builder.extract_entities("OpenAI works with Microsoft on Ignored Date")
assert {entity.text for entity in entities} == {"OpenAI", "Microsoft"}
assert {entity.label for entity in entities} == {"ORG"}
def test_build_graph_tracks_entity_edges_and_weights(monkeypatch):
monkeypatch.setattr(graph_builder, "_nlp", FakeNlp())
chunks = [
{
"text": "OpenAI works with Microsoft.",
"page": 1,
"chunk_index": 0,
},
{
"text": "OpenAI and Microsoft use Azure.",
"page": 2,
"chunk_index": 1,
},
]
graph = graph_builder.build_graph(chunks)
openai_id = "ORG:openai"
microsoft_id = "ORG:microsoft"
azure_id = "PRODUCT:azure"
assert graph.nodes[openai_id]["name"] == "OpenAI"
assert graph.nodes[openai_id]["pages"] == [1, 2]
assert graph[openai_id][microsoft_id]["weight"] == 2
assert graph[openai_id][microsoft_id]["pages"] == [1, 2]
assert graph.has_edge(microsoft_id, azure_id)
def test_save_load_and_delete_graph_roundtrip(tmp_path, monkeypatch):
monkeypatch.setattr(graph_builder.settings, "GRAPH_PERSIST_DIR", str(tmp_path))
graph = graph_builder.build_graph([])
graph.add_node("ORG:openai", name="OpenAI", label="ORG", mentions=1, pages=[1], chunks=[0])
path = graph_builder.save_graph(graph, user_id="user-1", document_id="doc-1")
payload = json.loads(path.read_text(encoding="utf-8"))
loaded = graph_builder.load_graph(user_id="user-1", document_id="doc-1")
assert payload["metadata"]["document_id"] == "doc-1"
assert loaded.nodes["ORG:openai"]["name"] == "OpenAI"
graph_builder.delete_graph(user_id="user-1", document_id="doc-1")
assert not path.exists()
def test_empty_chunks_produce_empty_graph(monkeypatch):
monkeypatch.setattr(graph_builder, "_nlp", FakeNlp())
graph = graph_builder.build_graph([])
assert graph.number_of_nodes() == 0
assert graph.number_of_edges() == 0