import asyncio import json import sqlite3 import pytest from app.pipeline.nodes.log_eval import _PENDING_TASKS, make_log_eval_node @pytest.mark.asyncio async def test_log_eval_stores_chunk_metadata_without_text(tmp_path) -> None: db_path = str(tmp_path / "interactions.db") node = make_log_eval_node(db_path) state = { "session_id": "s1", "query": "What work experience does Darshan have?", "answer": "He worked at VK Live.", "reranked_chunks": [ { "text": "Phone +44 7818 975908 and email someone@example.com", "metadata": { "doc_id": "resume-rag", "source_title": "Resume", "source_type": "resume", "section": "Work Experience", "rerank_score": 0.9, }, } ], "latency_ms": 123, "cached": False, "path": "rag", "is_enumeration_query": False, } await node(state) with sqlite3.connect(db_path) as conn: row = conn.execute("SELECT reranked_chunks_json FROM interactions LIMIT 1").fetchone() assert row is not None payload = json.loads(row[0]) assert payload and payload[0]["doc_id"] == "resume-rag" assert payload[0]["source_title"] == "Resume" assert "text" not in payload[0] @pytest.mark.asyncio async def test_log_eval_sends_sanitized_axiom_payload(monkeypatch, tmp_path) -> None: db_path = str(tmp_path / "interactions.db") node = make_log_eval_node(db_path) captured: dict = {} async def _fake_ship_to_axiom(record: dict) -> None: await asyncio.sleep(0) captured["record"] = record monkeypatch.setattr("app.pipeline.nodes.log_eval.ship_to_axiom", _fake_ship_to_axiom) await node( { "session_id": "s1", "query": "What work experience does Darshan have?", "answer": "He worked at VK Live.", "reranked_chunks": [ { "text": "Phone +44 7818 975908 and email someone@example.com", "metadata": { "doc_id": "resume-rag", "source_title": "Resume", "source_type": "resume", "section": "Work Experience", "rerank_score": 0.9, }, } ], "latency_ms": 123, "cached": False, "path": "rag", "is_enumeration_query": False, "top_rerank_score": 0.9, } ) await asyncio.gather(*list(_PENDING_TASKS)) record = captured.get("record") assert record is not None assert "reranked_chunks" not in record assert record["chunk_count"] == 1 assert record["top_chunk_doc_id"] == "resume-rag"