File size: 3,021 Bytes
63c75d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import sqlite3
from config import settings


META_MARKERS = (
    "using the",
    "i'll use",
    "i will use",
    "tool call",
    "internal process",
    "routing this",
)


def score_session(rows: list[dict]) -> dict:
    """
    Compute a quality score 0.0-1.0 for a session based on spooled entries.
    Flags are list of string tags.
    """
    if not rows:
        return {"score": 0.0, "flags": ["no_data"]}

    flags = []
    tool_results = [r for r in rows if r.get("role") == "toolResult"]
    assistant_msgs = [r for r in rows if r.get("role") == "assistant"]
    user_msgs = [r for r in rows if r.get("role") == "user"]

    score = 1.0

    long_results = [
        r for r in tool_results if r.get("original_length", 0) > settings.max_toolresult_chars * 2
    ]
    if long_results:
        flags.append("excessively_long_tool_output")
        score -= 0.1 * min(1.0, len(long_results) / max(1, len(tool_results)))

    if assistant_msgs and len(tool_results) > len(assistant_msgs) * 5:
        flags.append("high_tool_call_ratio")
        score -= 0.05

    empty_results = [r for r in tool_results if not r.get("clean_text") or len(r.get("clean_text", "")) < 5]
    if empty_results:
        flags.append("empty_tool_results")
        score -= 0.05

    if len(rows) < 3 and not tool_results:
        flags.append("minimal_session")

    assistant_text_total = sum(len(r.get("clean_text", "")) for r in assistant_msgs)
    tool_text_total = sum(len(r.get("clean_text", "")) for r in tool_results)
    if assistant_text_total and tool_text_total > assistant_text_total * 3:
        flags.append("tool_output_dominates_context")
        score -= 0.08

    meta_msgs = [
        r for r in assistant_msgs
        if any(marker in r.get("clean_text", "").lower() for marker in META_MARKERS)
    ]
    if meta_msgs:
        flags.append("meta_process_narration")
        score -= 0.05
        
    fts_empty_queries = [
        r for r in tool_results 
        if r.get("tool_name", "") == "session_search" 
        and ("0 results" in r.get("clean_text", "").lower() or "no results found" in r.get("clean_text", "").lower())
    ]
    if len(fts_empty_queries) > 2:
        flags.append("fts_query_noise")
        score -= 0.15

    score = max(0.0, min(1.0, score))

    return {
        "score": round(score, 3),
        "flags": sorted(set(flags)),
        "tool_result_count": len(tool_results),
        "assistant_msg_count": len(assistant_msgs),
        "user_msg_count": len(user_msgs),
        "assistant_text_total": assistant_text_total,
        "tool_text_total": tool_text_total,
    }


def get_all_spooled_entries(conn: sqlite3.Connection, since: str | None = None) -> list[dict]:
    if since:
        rows = conn.execute(
            "SELECT * FROM spooled_entries WHERE indexed_at > ? ORDER BY indexed_at",
            (since,),
        ).fetchall()
    else:
        rows = conn.execute("SELECT * FROM spooled_entries ORDER BY indexed_at").fetchall()
    return [dict(r) for r in rows]