personabot-api / tests /test_log_eval_privacy.py
GitHub Actions
Deploy f4d188a
767c4e6
import asyncio
import json
import sqlite3
import pytest
from app.pipeline.nodes.log_eval import _PENDING_TASKS, make_log_eval_node
@pytest.mark.asyncio
async def test_log_eval_stores_chunk_metadata_without_text(tmp_path) -> None:
db_path = str(tmp_path / "interactions.db")
node = make_log_eval_node(db_path)
state = {
"session_id": "s1",
"query": "What work experience does Darshan have?",
"answer": "He worked at VK Live.",
"reranked_chunks": [
{
"text": "Phone +44 7818 975908 and email someone@example.com",
"metadata": {
"doc_id": "resume-rag",
"source_title": "Resume",
"source_type": "resume",
"section": "Work Experience",
"rerank_score": 0.9,
},
}
],
"latency_ms": 123,
"cached": False,
"path": "rag",
"is_enumeration_query": False,
}
await node(state)
with sqlite3.connect(db_path) as conn:
row = conn.execute("SELECT reranked_chunks_json FROM interactions LIMIT 1").fetchone()
assert row is not None
payload = json.loads(row[0])
assert payload and payload[0]["doc_id"] == "resume-rag"
assert payload[0]["source_title"] == "Resume"
assert "text" not in payload[0]
@pytest.mark.asyncio
async def test_log_eval_sends_sanitized_axiom_payload(monkeypatch, tmp_path) -> None:
db_path = str(tmp_path / "interactions.db")
node = make_log_eval_node(db_path)
captured: dict = {}
async def _fake_ship_to_axiom(record: dict) -> None:
await asyncio.sleep(0)
captured["record"] = record
monkeypatch.setattr("app.pipeline.nodes.log_eval.ship_to_axiom", _fake_ship_to_axiom)
await node(
{
"session_id": "s1",
"query": "What work experience does Darshan have?",
"answer": "He worked at VK Live.",
"reranked_chunks": [
{
"text": "Phone +44 7818 975908 and email someone@example.com",
"metadata": {
"doc_id": "resume-rag",
"source_title": "Resume",
"source_type": "resume",
"section": "Work Experience",
"rerank_score": 0.9,
},
}
],
"latency_ms": 123,
"cached": False,
"path": "rag",
"is_enumeration_query": False,
"top_rerank_score": 0.9,
}
)
await asyncio.gather(*list(_PENDING_TASKS))
record = captured.get("record")
assert record is not None
assert "reranked_chunks" not in record
assert record["chunk_count"] == 1
assert record["top_chunk_doc_id"] == "resume-rag"