File size: 6,137 Bytes
ad3d8b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
import json
import os
import sys
from types import SimpleNamespace
from pathlib import Path

import pytest

sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))

from backend.app.services import metrics_logger as ml  # noqa: E402
from backend.app.services import scrape_pipeline as sp  # noqa: E402


class DummyProgress:
    def __call__(self, *args, **kwargs):
        return None


@pytest.mark.asyncio
async def test_cache_hit_metrics(monkeypatch):
    monkeypatch.setattr(sp, "ENABLE_METRICS", True)
    monkeypatch.setattr(sp.gr, "update", lambda **kwargs: {"update": kwargs})
    monkeypatch.setattr(sp, "is_cached", lambda url: True)
    monkeypatch.setattr(
        sp,
        "get_cached_knowledge",
        lambda url: {"metadata": {"name": "CachedSite", "url": url, "pages_scraped": 2}},
    )
    monkeypatch.setattr(sp, "knowledge_to_chatbot_context", lambda knowledge: "ctx")
    monkeypatch.setattr(sp, "build_status_new", lambda *args, **kwargs: "status")

    result = await sp.run_full_research_new("https://example.com", progress=DummyProgress())
    _, _, _, _, _, _, stats = result

    assert stats["cache_hit"] is True
    assert "tcr_seconds" in stats and stats["tcr_seconds"] >= 0


@pytest.mark.asyncio
async def test_tcr_metrics_non_cache(monkeypatch, tmp_path):
    monkeypatch.setattr(sp, "ENABLE_METRICS", True)
    monkeypatch.setattr(sp.gr, "update", lambda **kwargs: {"update": kwargs})
    monkeypatch.setattr(sp, "is_cached", lambda url: False)
    monkeypatch.setattr(
        sp,
        "scrape_website",
        lambda url: {
            "success": True,
            "total_pages": 1,
            "pages": [{"title": "Home", "description": "", "sections": [], "content": "", "url": url, "page_type": "homepage"}],
            "errors": [],
        },
    )
    monkeypatch.setattr(sp, "format_scraped_content_for_context", lambda scraped_data: "content")
    monkeypatch.setattr(
        sp,
        "analyze_content_gaps",
        lambda scraped_content, url: SimpleNamespace(has_gaps=False, gaps_found=[], confidence_score=10, recommended_searches=[]),
    )
    monkeypatch.setattr(sp, "knowledge_to_chatbot_context", lambda knowledge: "ctx")
    monkeypatch.setattr(sp, "extract_name_from_text", lambda text, url: "Site")
    monkeypatch.setattr(sp, "create_knowledge_json", lambda url, scraped_data, web_search_results, raw_name: {})
    monkeypatch.setattr(sp, "save_knowledge_json", lambda knowledge, url: tmp_path / "stub.json")
    monkeypatch.setattr(sp, "build_status_new", lambda *args, **kwargs: "status")

    result = await sp.run_full_research_new("https://example.com", progress=DummyProgress())
    _, _, _, _, _, _, stats = result

    assert stats["cache_hit"] is False
    assert "tcr_seconds" in stats and stats["tcr_seconds"] >= 0


def test_log_chat_answer(tmp_path):
    log_file = tmp_path / "chat.jsonl"
    ml.log_chat_answer(
        question="Q?",
        answer="A!",
        provenance="primary_only",
        user="user@example.com",
        log_path=log_file,
    )

    data = log_file.read_text(encoding="utf-8").strip().splitlines()
    assert len(data) == 1
    record = json.loads(data[0])
    assert record["question"] == "Q?"
    assert record["answer"] == "A!"
    assert record["provenance"] == "primary_only"
    assert record["user"] == "user@example.com"


def test_save_job_metrics_no_supabase(monkeypatch):
    monkeypatch.setattr(ml, "get_supabase_client", lambda: None)
    ml.save_job_metrics_to_supabase("https://example.com", {"cache_hit": True})
    # Should not raise


def test_save_chat_answer_no_supabase(monkeypatch):
    monkeypatch.setattr(ml, "get_supabase_client", lambda: None)
    ml.save_chat_answer_to_supabase("q", "a", system_prompt="ctx")
    # Should not raise


def test_save_job_metrics_payload(monkeypatch):
    captured = {}

    class Table:
        def __init__(self, name):
            self.name = name

        def insert(self, payload):
            captured["table"] = self.name
            captured["payload"] = payload
            return self

        def execute(self):
            captured["executed"] = True
            return True

    class Client:
        def table(self, name):
            return Table(name)

    monkeypatch.setattr(ml, "get_supabase_client", lambda: Client())
    ml.save_job_metrics_to_supabase(
        "https://example.com",
        {"cache_hit": True, "tcr_seconds": 1.5, "searches_run": 2, "pages_scraped": 3, "gaps_found": 1},
        user_id="user-1",
    )
    assert captured["table"] == "metrics_job_runs"
    assert captured["payload"]["url"] == "https://example.com"
    assert captured["payload"]["cache_hit"] is True
    assert captured["payload"]["tcr_seconds"] == 1.5
    assert captured["payload"]["searches_run"] == 2
    assert captured["payload"]["pages_scraped"] == 3
    assert captured["payload"]["gaps_found"] == 1
    assert captured["payload"]["user_id"] == "user-1"
    assert captured["executed"] is True


def test_save_chat_answer_payload(monkeypatch):
    captured = {}

    class Table:
        def __init__(self, name):
            self.name = name

        def insert(self, payload):
            captured["table"] = self.name
            captured["payload"] = payload
            return self

        def execute(self):
            captured["executed"] = True
            return True

    class Client:
        def table(self, name):
            return Table(name)

    monkeypatch.setattr(ml, "get_supabase_client", lambda: Client())
    ml.save_chat_answer_to_supabase(
        question="How?",
        answer="Here",
        system_prompt="Contains SECONDARY SOURCE",
        user_id="user-2",
        url="https://example.com",
    )
    assert captured["table"] == "metrics_chat_answers"
    assert captured["payload"]["question"] == "How?"
    assert captured["payload"]["answer"] == "Here"
    assert captured["payload"]["provenance"] == "primary_plus_secondary"
    assert captured["payload"]["url"] == "https://example.com"
    assert captured["payload"]["user_id"] == "user-2"
    assert captured["executed"] is True