Spaces:
Running
Running
File size: 2,888 Bytes
acfcc03 d1df4ad acfcc03 d1df4ad 767c4e6 d1df4ad 767c4e6 d1df4ad acfcc03 fd63c95 acfcc03 fd63c95 acfcc03 fd63c95 acfcc03 767c4e6 acfcc03 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 | import asyncio
import json
import sqlite3
import pytest
from app.pipeline.nodes.log_eval import _PENDING_TASKS, make_log_eval_node
@pytest.mark.asyncio
async def test_log_eval_stores_chunk_metadata_without_text(tmp_path) -> None:
db_path = str(tmp_path / "interactions.db")
node = make_log_eval_node(db_path)
state = {
"session_id": "s1",
"query": "What work experience does Darshan have?",
"answer": "He worked at VK Live.",
"reranked_chunks": [
{
"text": "Phone +44 7818 975908 and email someone@example.com",
"metadata": {
"doc_id": "resume-rag",
"source_title": "Resume",
"source_type": "resume",
"section": "Work Experience",
"rerank_score": 0.9,
},
}
],
"latency_ms": 123,
"cached": False,
"path": "rag",
"is_enumeration_query": False,
}
await node(state)
with sqlite3.connect(db_path) as conn:
row = conn.execute("SELECT reranked_chunks_json FROM interactions LIMIT 1").fetchone()
assert row is not None
payload = json.loads(row[0])
assert payload and payload[0]["doc_id"] == "resume-rag"
assert payload[0]["source_title"] == "Resume"
assert "text" not in payload[0]
@pytest.mark.asyncio
async def test_log_eval_sends_sanitized_axiom_payload(monkeypatch, tmp_path) -> None:
db_path = str(tmp_path / "interactions.db")
node = make_log_eval_node(db_path)
captured: dict = {}
async def _fake_ship_to_axiom(record: dict) -> None:
await asyncio.sleep(0)
captured["record"] = record
monkeypatch.setattr("app.pipeline.nodes.log_eval.ship_to_axiom", _fake_ship_to_axiom)
await node(
{
"session_id": "s1",
"query": "What work experience does Darshan have?",
"answer": "He worked at VK Live.",
"reranked_chunks": [
{
"text": "Phone +44 7818 975908 and email someone@example.com",
"metadata": {
"doc_id": "resume-rag",
"source_title": "Resume",
"source_type": "resume",
"section": "Work Experience",
"rerank_score": 0.9,
},
}
],
"latency_ms": 123,
"cached": False,
"path": "rag",
"is_enumeration_query": False,
"top_rerank_score": 0.9,
}
)
await asyncio.gather(*list(_PENDING_TASKS))
record = captured.get("record")
assert record is not None
assert "reranked_chunks" not in record
assert record["chunk_count"] == 1
assert record["top_chunk_doc_id"] == "resume-rag"
|