File size: 3,905 Bytes
f9e2c6d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 | """
helpers.py β Shared utility functions used across the project.
"""
import logging
import sys
from pathlib import Path
from typing import List
from langchain_core.documents import Document
def setup_logging(level: int = logging.INFO) -> None:
"""Configure root logger with a clean, consistent format."""
logging.basicConfig(
level=level,
format="%(asctime)s | %(levelname)-8s | %(name)s β %(message)s",
datefmt="%H:%M:%S",
handlers=[logging.StreamHandler(sys.stdout)],
)
def format_chunks_for_display(docs: List[Document], scores: List[float]) -> str:
"""
Format retrieved chunks into a human-readable string for the Gradio UI.
Args:
docs: Retrieved LangChain Documents.
scores: Corresponding similarity scores.
Returns:
Formatted multi-line string.
"""
parts = []
for i, (doc, score) in enumerate(zip(docs, scores), 1):
meta = doc.metadata
file_path = meta.get("file_path", "unknown")
symbol = meta.get("symbol_name", "")
symbol_type = meta.get("symbol_type", "chunk")
score_str = f"{score:.3f}" if score > 0 else "N/A (MMR)"
header = f"βββ [{i}] {file_path}"
if symbol:
header += f" βΊ {symbol_type}:{symbol}"
header += f" (score: {score_str}) βββ"
parts.append(f"{header}\n{doc.page_content.strip()}")
return "\n\n".join(parts)
def format_metrics_for_display(retrieval_metrics, answer_scores) -> str:
"""
Format all evaluation metrics into a readable dashboard string.
Args:
retrieval_metrics: RetrievalMetrics Pydantic model.
answer_scores: AnswerQualityScores Pydantic model.
Returns:
Formatted metrics string.
"""
lines = [
"ββββββββββββββββββββββββββββββββββββββββ",
"β EVALUATION METRICS PANEL β",
"β βββββββββββββββββββββββββββββββββββββββ£",
"β RETRIEVAL METRICS β",
f"β Recall@{retrieval_metrics.top_k:<2} : {retrieval_metrics.recall_at_k:.4f} β",
f"β MRR : {retrieval_metrics.mrr:.4f} β",
f"β nDCG@{retrieval_metrics.top_k:<2} : {retrieval_metrics.ndcg:.4f} β",
f"β Relevant chunks : {retrieval_metrics.num_relevant}/{retrieval_metrics.top_k} β",
"β βββββββββββββββββββββββββββββββββββββββ£",
"β ANSWER QUALITY (LLM Judge) β",
f"β Accuracy : {answer_scores.accuracy}/5 β",
f"β Completeness : {answer_scores.completeness}/5 β",
f"β Relevance : {answer_scores.relevance}/5 β",
f"β Groundedness : {answer_scores.groundedness}/5 β",
f"β Overall Score : {answer_scores.overall:.2f}/5.00 β",
"β βββββββββββββββββββββββββββββββββββββββ£",
f"β Reasoning: {answer_scores.reasoning[:38]:<38}",
"ββββββββββββββββββββββββββββββββββββββββ",
]
return "\n".join(lines)
def save_temp_file(file_bytes: bytes, filename: str) -> Path:
"""
Save raw bytes to the uploads directory.
Args:
file_bytes: Raw file content.
filename: Target filename.
Returns:
Path to the saved file.
"""
from config import UPLOAD_DIR
dest = UPLOAD_DIR / filename
dest.write_bytes(file_bytes)
return dest |