Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| from datetime import datetime | |
| from pathlib import Path | |
| def ensure_directories(): | |
| """Create necessary directories if they don't exist.""" | |
| Path("data/policies").mkdir(parents=True, exist_ok=True) | |
| Path("logs").mkdir(parents=True, exist_ok=True) | |
| Path("chroma_db").mkdir(parents=True, exist_ok=True) | |
| def log_query(question, retrieved_chunks, response, prompt_type="improved"): | |
| """Log query details to JSONL file.""" | |
| log_entry = { | |
| "timestamp": datetime.now().isoformat(), | |
| "question": question, | |
| "prompt_type": prompt_type, | |
| "num_chunks_retrieved": len(retrieved_chunks), | |
| "chunks": [ | |
| { | |
| "text": chunk["text"][:200] + "..." if len(chunk["text"]) > 200 else chunk["text"], | |
| "metadata": chunk.get("metadata", {}) | |
| } | |
| for chunk in retrieved_chunks | |
| ], | |
| "response": response | |
| } | |
| log_file = "logs/queries.jsonl" | |
| with open(log_file, "a", encoding="utf-8") as f: | |
| f.write(json.dumps(log_entry, ensure_ascii=False) + "\n") | |
| def get_groq_api_key(): | |
| """Get Groq API key from environment.""" | |
| api_key = os.getenv("GROQ_API_KEY") | |
| if not api_key: | |
| raise ValueError("GROQ_API_KEY environment variable not set") | |
| return api_key | |
| def safe_json_parse(text): | |
| """Safely parse JSON from LLM response.""" | |
| try: | |
| # Try to find JSON in the response | |
| start = text.find("{") | |
| end = text.rfind("}") + 1 | |
| if start != -1 and end > start: | |
| json_str = text[start:end] | |
| return json.loads(json_str) | |
| return None | |
| except Exception: | |
| return None | |
| # ============================================================ | |
| # ⭐ NEW: Simple RAG Evaluation Metrics | |
| # ============================================================ | |
| def evaluate_response(question: str, response: dict, prompt_type: str) -> dict: | |
| """ | |
| Generate simple evaluation metrics for RAG output. | |
| Metrics: | |
| - Accuracy (basic heuristic) | |
| - Groundedness (based on evidence presence) | |
| - Hallucination Risk | |
| - Prompt Version | |
| """ | |
| answer = response.get("answer", "") | |
| evidence = response.get("evidence", []) | |
| # --------------------------- | |
| # Accuracy (simple heuristic) | |
| # --------------------------- | |
| if isinstance(answer, str) and answer.startswith("I don't know"): | |
| accuracy = "⚠️" | |
| else: | |
| accuracy = "✅" | |
| # --------------------------- | |
| # Groundedness | |
| # --------------------------- | |
| groundedness = "✅" if evidence else "⚠️" | |
| # --------------------------- | |
| # Hallucination Risk | |
| # --------------------------- | |
| if isinstance(answer, str) and answer.startswith("I don't know"): | |
| hallucination = "LOW" | |
| elif evidence: | |
| hallucination = "LOW" | |
| else: | |
| hallucination = "MEDIUM" | |
| evaluation = { | |
| "Accuracy": accuracy, | |
| "Groundedness": groundedness, | |
| "Hallucination Risk": hallucination, | |
| "Prompt Version": prompt_type | |
| } | |
| return evaluation | |