| """ |
| helpers.py β Shared utility functions used across the project. |
| """ |
|
|
| import logging |
| import sys |
| from pathlib import Path |
| from typing import List |
|
|
| from langchain_core.documents import Document |
|
|
|
|
| def setup_logging(level: int = logging.INFO) -> None: |
| """Configure root logger with a clean, consistent format.""" |
| logging.basicConfig( |
| level=level, |
| format="%(asctime)s | %(levelname)-8s | %(name)s β %(message)s", |
| datefmt="%H:%M:%S", |
| handlers=[logging.StreamHandler(sys.stdout)], |
| ) |
|
|
|
|
| def format_chunks_for_display(docs: List[Document], scores: List[float]) -> str: |
| """ |
| Format retrieved chunks into a human-readable string for the Gradio UI. |
| |
| Args: |
| docs: Retrieved LangChain Documents. |
| scores: Corresponding similarity scores. |
| |
| Returns: |
| Formatted multi-line string. |
| """ |
| parts = [] |
| for i, (doc, score) in enumerate(zip(docs, scores), 1): |
| meta = doc.metadata |
| file_path = meta.get("file_path", "unknown") |
| symbol = meta.get("symbol_name", "") |
| symbol_type = meta.get("symbol_type", "chunk") |
| score_str = f"{score:.3f}" if score > 0 else "N/A (MMR)" |
|
|
| header = f"βββ [{i}] {file_path}" |
| if symbol: |
| header += f" βΊ {symbol_type}:{symbol}" |
| header += f" (score: {score_str}) βββ" |
|
|
| parts.append(f"{header}\n{doc.page_content.strip()}") |
|
|
| return "\n\n".join(parts) |
|
|
|
|
| def format_metrics_for_display(retrieval_metrics, answer_scores) -> str: |
| """ |
| Format all evaluation metrics into a readable dashboard string. |
| |
| Args: |
| retrieval_metrics: RetrievalMetrics Pydantic model. |
| answer_scores: AnswerQualityScores Pydantic model. |
| |
| Returns: |
| Formatted metrics string. |
| """ |
| lines = [ |
| "ββββββββββββββββββββββββββββββββββββββββ", |
| "β EVALUATION METRICS PANEL β", |
| "β βββββββββββββββββββββββββββββββββββββββ£", |
| "β RETRIEVAL METRICS β", |
| f"β Recall@{retrieval_metrics.top_k:<2} : {retrieval_metrics.recall_at_k:.4f} β", |
| f"β MRR : {retrieval_metrics.mrr:.4f} β", |
| f"β nDCG@{retrieval_metrics.top_k:<2} : {retrieval_metrics.ndcg:.4f} β", |
| f"β Relevant chunks : {retrieval_metrics.num_relevant}/{retrieval_metrics.top_k} β", |
| "β βββββββββββββββββββββββββββββββββββββββ£", |
| "β ANSWER QUALITY (LLM Judge) β", |
| f"β Accuracy : {answer_scores.accuracy}/5 β", |
| f"β Completeness : {answer_scores.completeness}/5 β", |
| f"β Relevance : {answer_scores.relevance}/5 β", |
| f"β Groundedness : {answer_scores.groundedness}/5 β", |
| f"β Overall Score : {answer_scores.overall:.2f}/5.00 β", |
| "β βββββββββββββββββββββββββββββββββββββββ£", |
| f"β Reasoning: {answer_scores.reasoning[:38]:<38}", |
| "ββββββββββββββββββββββββββββββββββββββββ", |
| ] |
| return "\n".join(lines) |
|
|
|
|
| def save_temp_file(file_bytes: bytes, filename: str) -> Path: |
| """ |
| Save raw bytes to the uploads directory. |
| |
| Args: |
| file_bytes: Raw file content. |
| filename: Target filename. |
| |
| Returns: |
| Path to the saved file. |
| """ |
| from config import UPLOAD_DIR |
| dest = UPLOAD_DIR / filename |
| dest.write_bytes(file_bytes) |
| return dest |