File size: 3,199 Bytes
754d8d3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import os
import json
from datetime import datetime
from pathlib import Path


def ensure_directories():
    """Create necessary directories if they don't exist."""
    Path("data/policies").mkdir(parents=True, exist_ok=True)
    Path("logs").mkdir(parents=True, exist_ok=True)
    Path("chroma_db").mkdir(parents=True, exist_ok=True)


def log_query(question, retrieved_chunks, response, prompt_type="improved"):
    """Log query details to JSONL file."""
    log_entry = {
        "timestamp": datetime.now().isoformat(),
        "question": question,
        "prompt_type": prompt_type,
        "num_chunks_retrieved": len(retrieved_chunks),
        "chunks": [
            {
                "text": chunk["text"][:200] + "..." if len(chunk["text"]) > 200 else chunk["text"],
                "metadata": chunk.get("metadata", {})
            }
            for chunk in retrieved_chunks
        ],
        "response": response
    }

    log_file = "logs/queries.jsonl"
    with open(log_file, "a", encoding="utf-8") as f:
        f.write(json.dumps(log_entry, ensure_ascii=False) + "\n")


def get_groq_api_key():
    """Get Groq API key from environment."""
    api_key = os.getenv("GROQ_API_KEY")
    if not api_key:
        raise ValueError("GROQ_API_KEY environment variable not set")
    return api_key


def safe_json_parse(text):
    """Safely parse JSON from LLM response."""
    try:
        # Try to find JSON in the response
        start = text.find("{")
        end = text.rfind("}") + 1
        if start != -1 and end > start:
            json_str = text[start:end]
            return json.loads(json_str)
        return None
    except Exception:
        return None


# ============================================================
# ⭐ NEW: Simple RAG Evaluation Metrics
# ============================================================

def evaluate_response(question: str, response: dict, prompt_type: str) -> dict:
    """

    Generate simple evaluation metrics for RAG output.



    Metrics:

    - Accuracy (basic heuristic)

    - Groundedness (based on evidence presence)

    - Hallucination Risk

    - Prompt Version

    """

    answer = response.get("answer", "")
    evidence = response.get("evidence", [])

    # ---------------------------
    # Accuracy (simple heuristic)
    # ---------------------------
    if isinstance(answer, str) and answer.startswith("I don't know"):
        accuracy = "⚠️"
    else:
        accuracy = "✅"

    # ---------------------------
    # Groundedness
    # ---------------------------
    groundedness = "✅" if evidence else "⚠️"

    # ---------------------------
    # Hallucination Risk
    # ---------------------------
    if isinstance(answer, str) and answer.startswith("I don't know"):
        hallucination = "LOW"
    elif evidence:
        hallucination = "LOW"
    else:
        hallucination = "MEDIUM"

    evaluation = {
        "Accuracy": accuracy,
        "Groundedness": groundedness,
        "Hallucination Risk": hallucination,
        "Prompt Version": prompt_type
    }

    return evaluation