File size: 2,888 Bytes
acfcc03
d1df4ad
 
 
 
 
acfcc03
d1df4ad
 
767c4e6
 
d1df4ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
767c4e6
d1df4ad
 
 
 
 
 
 
 
 
acfcc03
 
 
fd63c95
acfcc03
 
 
 
fd63c95
acfcc03
 
 
fd63c95
acfcc03
767c4e6
acfcc03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import asyncio
import json
import sqlite3

import pytest

from app.pipeline.nodes.log_eval import _PENDING_TASKS, make_log_eval_node


@pytest.mark.asyncio
async def test_log_eval_stores_chunk_metadata_without_text(tmp_path) -> None:
    db_path = str(tmp_path / "interactions.db")
    node = make_log_eval_node(db_path)

    state = {
        "session_id": "s1",
        "query": "What work experience does Darshan have?",
        "answer": "He worked at VK Live.",
        "reranked_chunks": [
            {
                "text": "Phone +44 7818 975908 and email someone@example.com",
                "metadata": {
                    "doc_id": "resume-rag",
                    "source_title": "Resume",
                    "source_type": "resume",
                    "section": "Work Experience",
                    "rerank_score": 0.9,
                },
            }
        ],
        "latency_ms": 123,
        "cached": False,
        "path": "rag",
        "is_enumeration_query": False,
    }

    await node(state)

    with sqlite3.connect(db_path) as conn:
        row = conn.execute("SELECT reranked_chunks_json FROM interactions LIMIT 1").fetchone()

    assert row is not None
    payload = json.loads(row[0])
    assert payload and payload[0]["doc_id"] == "resume-rag"
    assert payload[0]["source_title"] == "Resume"
    assert "text" not in payload[0]


@pytest.mark.asyncio
async def test_log_eval_sends_sanitized_axiom_payload(monkeypatch, tmp_path) -> None:
    db_path = str(tmp_path / "interactions.db")
    node = make_log_eval_node(db_path)
    captured: dict = {}

    async def _fake_ship_to_axiom(record: dict) -> None:
        await asyncio.sleep(0)
        captured["record"] = record

    monkeypatch.setattr("app.pipeline.nodes.log_eval.ship_to_axiom", _fake_ship_to_axiom)

    await node(
        {
            "session_id": "s1",
            "query": "What work experience does Darshan have?",
            "answer": "He worked at VK Live.",
            "reranked_chunks": [
                {
                    "text": "Phone +44 7818 975908 and email someone@example.com",
                    "metadata": {
                        "doc_id": "resume-rag",
                        "source_title": "Resume",
                        "source_type": "resume",
                        "section": "Work Experience",
                        "rerank_score": 0.9,
                    },
                }
            ],
            "latency_ms": 123,
            "cached": False,
            "path": "rag",
            "is_enumeration_query": False,
            "top_rerank_score": 0.9,
        }
    )

    await asyncio.gather(*list(_PENDING_TASKS))

    record = captured.get("record")
    assert record is not None
    assert "reranked_chunks" not in record
    assert record["chunk_count"] == 1
    assert record["top_chunk_doc_id"] == "resume-rag"