Quoota_RAG / logger.py
Malaji71's picture
Create logger.py
8c643f8 verified
import csv
import os
from datetime import datetime
from typing import Dict, List, Optional
# Configuración
CSV_FILE = "quoota_interactions_log.csv"
CSV_ENCODING = "utf-8-sig" # UTF-8 con BOM para Excel
# Columnas del CSV (en orden exacto)
CSV_COLUMNS = [
"timestamp",
"user_input",
"stars",
"coach_notes",
"conflict_type",
"confidence",
"num_sources",
"unique_tags",
"sources_raw",
"logical_works_used",
"categories_used",
"faiss_weights_applied",
"temperature_used",
"top_p_used",
"academic_response",
"practical_response"
]
def _ensure_csv_exists():
"""Crea el CSV con headers si no existe"""
if not os.path.exists(CSV_FILE):
with open(CSV_FILE, 'w', newline='', encoding=CSV_ENCODING) as f:
writer = csv.DictWriter(f, fieldnames=CSV_COLUMNS)
writer.writeheader()
def _format_list(items: List[str]) -> str:
"""Convierte lista a string separado por | para CSV"""
return "|".join(items) if items else ""
def _format_dict(d: Dict) -> str:
"""Convierte dict a string key:value separado por | para CSV"""
if not d:
return ""
return "|".join([f"{k}:{v}" for k, v in d.items()])
def save_interaction(
user_input: str,
academic_response: str,
practical_response: str,
metadata: Dict,
temperature_acad: float,
top_p_acad: float,
) -> str:
"""
Guarda una interacción completa en el CSV.
Args:
user_input: Caso de conflicto introducido
academic_response: Análisis académico completo (RAG24)
practical_response: Guía práctica completa (RAG7)
metadata: Dict con conflict_type, confidence, num_sources, etc.
temperature_acad: Temperatura usada en análisis académico
top_p_acad: Top-p usado en análisis académico
Returns:
timestamp: ID único de la interacción (para actualizar después)
"""
_ensure_csv_exists()
timestamp = datetime.now().isoformat()
# Preparar fila
row = {
"timestamp": timestamp,
"user_input": user_input,
"stars": "", # Vacío inicialmente, se llena con update_rating()
"coach_notes": "", # Vacío inicialmente
"conflict_type": metadata.get("conflict_type", "unknown"),
"confidence": metadata.get("confidence", 0),
"num_sources": metadata.get("num_sources", 0),
"unique_tags": _format_list(metadata.get("unique_tags", [])),
"sources_raw": _format_list(metadata.get("sources_raw", [])),
"logical_works_used": "", # TODO: implementar en agent.py
"categories_used": _format_list(list(metadata.get("faiss_weights_applied", {}).keys())),
"faiss_weights_applied": _format_dict(metadata.get("faiss_weights_applied", {})),
"temperature_used": temperature_acad,
"top_p_used": top_p_acad,
"academic_response": academic_response,
"practical_response": practical_response
}
# Append al CSV
with open(CSV_FILE, 'a', newline='', encoding=CSV_ENCODING) as f:
writer = csv.DictWriter(f, fieldnames=CSV_COLUMNS)
writer.writerow(row)
return timestamp
def update_rating(
timestamp: str,
stars: int,
coach_notes: str
):
"""
Actualiza stars y coach_notes de una interacción existente.
Args:
timestamp: ID de la interacción (ISO format)
stars: Puntuación 1-5
coach_notes: Comentarios del coach
"""
if not os.path.exists(CSV_FILE):
raise FileNotFoundError(f"CSV no existe: {CSV_FILE}")
# Leer todas las filas
rows = []
with open(CSV_FILE, 'r', newline='', encoding=CSV_ENCODING) as f:
reader = csv.DictReader(f)
rows = list(reader)
# Buscar y actualizar la fila correspondiente
found = False
for row in rows:
if row["timestamp"] == timestamp:
row["stars"] = str(stars)
row["coach_notes"] = coach_notes
found = True
break
if not found:
raise ValueError(f"No se encontró interacción con timestamp: {timestamp}")
# Reescribir CSV completo
with open(CSV_FILE, 'w', newline='', encoding=CSV_ENCODING) as f:
writer = csv.DictWriter(f, fieldnames=CSV_COLUMNS)
writer.writeheader()
writer.writerows(rows)
def get_stats() -> Dict:
"""
Retorna estadísticas del CSV para debugging.
Returns:
Dict con: total_interactions, avg_stars, conflict_types, etc.
"""
if not os.path.exists(CSV_FILE):
return {"total_interactions": 0}
with open(CSV_FILE, 'r', newline='', encoding=CSV_ENCODING) as f:
reader = csv.DictReader(f)
rows = list(reader)
total = len(rows)
stars_list = [int(r["stars"]) for r in rows if r["stars"].isdigit()]
avg_stars = sum(stars_list) / len(stars_list) if stars_list else 0
conflict_types = {}
for row in rows:
ct = row["conflict_type"]
conflict_types[ct] = conflict_types.get(ct, 0) + 1
return {
"total_interactions": total,
"avg_stars": round(avg_stars, 2),
"conflict_types": conflict_types,
"rated_interactions": len(stars_list)
}