import sqlite3 from config import settings META_MARKERS = ( "using the", "i'll use", "i will use", "tool call", "internal process", "routing this", ) def score_session(rows: list[dict]) -> dict: """ Compute a quality score 0.0-1.0 for a session based on spooled entries. Flags are list of string tags. """ if not rows: return {"score": 0.0, "flags": ["no_data"]} flags = [] tool_results = [r for r in rows if r.get("role") == "toolResult"] assistant_msgs = [r for r in rows if r.get("role") == "assistant"] user_msgs = [r for r in rows if r.get("role") == "user"] score = 1.0 long_results = [ r for r in tool_results if r.get("original_length", 0) > settings.max_toolresult_chars * 2 ] if long_results: flags.append("excessively_long_tool_output") score -= 0.1 * min(1.0, len(long_results) / max(1, len(tool_results))) if assistant_msgs and len(tool_results) > len(assistant_msgs) * 5: flags.append("high_tool_call_ratio") score -= 0.05 empty_results = [r for r in tool_results if not r.get("clean_text") or len(r.get("clean_text", "")) < 5] if empty_results: flags.append("empty_tool_results") score -= 0.05 if len(rows) < 3 and not tool_results: flags.append("minimal_session") assistant_text_total = sum(len(r.get("clean_text", "")) for r in assistant_msgs) tool_text_total = sum(len(r.get("clean_text", "")) for r in tool_results) if assistant_text_total and tool_text_total > assistant_text_total * 3: flags.append("tool_output_dominates_context") score -= 0.08 meta_msgs = [ r for r in assistant_msgs if any(marker in r.get("clean_text", "").lower() for marker in META_MARKERS) ] if meta_msgs: flags.append("meta_process_narration") score -= 0.05 fts_empty_queries = [ r for r in tool_results if r.get("tool_name", "") == "session_search" and ("0 results" in r.get("clean_text", "").lower() or "no results found" in r.get("clean_text", "").lower()) ] if len(fts_empty_queries) > 2: flags.append("fts_query_noise") score -= 0.15 score = max(0.0, min(1.0, score)) return { "score": round(score, 3), "flags": sorted(set(flags)), "tool_result_count": len(tool_results), "assistant_msg_count": len(assistant_msgs), "user_msg_count": len(user_msgs), "assistant_text_total": assistant_text_total, "tool_text_total": tool_text_total, } def get_all_spooled_entries(conn: sqlite3.Connection, since: str | None = None) -> list[dict]: if since: rows = conn.execute( "SELECT * FROM spooled_entries WHERE indexed_at > ? ORDER BY indexed_at", (since,), ).fetchall() else: rows = conn.execute("SELECT * FROM spooled_entries ORDER BY indexed_at").fetchall() return [dict(r) for r in rows]