Spaces:
Sleeping
Sleeping
| """ | |
| Comparación: Pipeline vs Agente Básico vs Agente ReAct. | |
| Ejecutar: python -m agents.comparison | |
| """ | |
| import logging | |
| import os | |
| import re | |
| import time | |
| from dotenv import load_dotenv | |
| from openai import OpenAI | |
| from agents.basic_agent import BasicAgent | |
| from agents.react_agent import ReactAgent | |
| from agents.tools import search_docs | |
| load_dotenv() | |
| logging.basicConfig(level=logging.WARNING) | |
| # ── Colores ANSI ────────────────────────────────────────────── | |
| BOLD = "\033[1m" | |
| DIM = "\033[2m" | |
| CYAN = "\033[36m" | |
| BLUE = "\033[34m" # Thought / Razonamiento | |
| GREEN = "\033[32m" # Respuesta | |
| YELLOW = "\033[33m" # Action | |
| MAGENTA = "\033[35m" # Observation | |
| RED = "\033[31m" | |
| RESET = "\033[0m" | |
| # ── Pipeline RAG estático ───────────────────────────────────── | |
| _pipeline_client = OpenAI( | |
| base_url="https://api.groq.com/openai/v1", | |
| api_key=os.getenv("GROQ_API_KEY"), | |
| timeout=30.0, | |
| ) | |
| def run_pipeline(query: str) -> dict: | |
| """Pipeline RAG estático: retrieve → generate. Sin loops, sin decisiones.""" | |
| context = search_docs(query) | |
| try: | |
| response = _pipeline_client.chat.completions.create( | |
| model="openai/gpt-oss-120b", | |
| messages=[ | |
| { | |
| "role": "system", | |
| "content": ( | |
| "Responde la pregunta basándote SOLO en el contexto proporcionado. " | |
| "Si el contexto no contiene la respuesta, indica que no tienes " | |
| "información suficiente. Responde en español." | |
| ), | |
| }, | |
| { | |
| "role": "user", | |
| "content": f"Contexto:\n{context}\n\nPregunta: {query}", | |
| }, | |
| ], | |
| temperature=0, | |
| max_tokens=1024, | |
| ) | |
| answer = response.choices[0].message.content.strip() | |
| except Exception as e: | |
| answer = f"Error: {e}" | |
| return {"answer": answer, "steps": 1, "approach": "pipeline"} | |
| # ── Preguntas de prueba ─────────────────────────────────────── | |
| QUESTIONS = [ | |
| # EASY — respuesta directa | |
| { | |
| "question": "¿Cuántos días de vacaciones tienen los empleados en su primer año?", | |
| "category": "FÁCIL", | |
| "keywords": ["12", "días", "primer"], | |
| }, | |
| { | |
| "question": "¿Cuál es el horario de soporte técnico?", | |
| "category": "FÁCIL", | |
| "keywords": ["horario", "soporte", "lunes"], | |
| }, | |
| # MEDIUM — requiere buscar en el documento correcto | |
| { | |
| "question": "¿Qué documentos necesito entregar en mi primer día de trabajo?", | |
| "category": "MEDIA", | |
| "keywords": ["primer día", "onboarding", "recepción"], | |
| }, | |
| { | |
| "question": "¿Cómo reporto un problema técnico urgente?", | |
| "category": "MEDIA", | |
| "keywords": ["P1", "crítico", "soporte", "NovaHub", "ticket"], | |
| }, | |
| # HARD — combinar info o reformular | |
| { | |
| "question": "Si soy nuevo empleado y mi laptop no funciona el primer día, ¿qué debo hacer?", | |
| "category": "DIFÍCIL", | |
| "keywords": ["soporte", "equipo", "TI", "ticket"], | |
| }, | |
| { | |
| "question": "¿Puedo tomar vacaciones durante mi periodo de prueba?", | |
| "category": "DIFÍCIL", | |
| "keywords": ["prueba", "vacaciones", "90", "días"], | |
| }, | |
| # UNANSWERABLE — no está en los documentos | |
| { | |
| "question": "¿Cuál es el salario promedio de los ingenieros?", | |
| "category": "SIN RESPUESTA", | |
| "keywords": [], | |
| }, | |
| { | |
| "question": "¿La empresa tiene oficina en Barcelona?", | |
| "category": "SIN RESPUESTA", | |
| "keywords": [], | |
| }, | |
| ] | |
| # ── Evaluación heurística ───────────────────────────────────── | |
| def _check_answer(answer: str | None, keywords: list[str]) -> bool: | |
| """Evalúa si la respuesta contiene las keywords esperadas.""" | |
| if not answer: | |
| return False | |
| if not keywords: | |
| # Para preguntas sin respuesta, aceptar si dice que no tiene info | |
| lower = answer.lower() | |
| return any( | |
| phrase in lower | |
| for phrase in [ | |
| "no ", | |
| "no se encontr", | |
| "no tengo", | |
| "no cuento", | |
| "no hay información", | |
| "no dispongo", | |
| ] | |
| ) | |
| lower = answer.lower() | |
| return any(kw.lower() in lower for kw in keywords) | |
| def _has_source(answer: str | None) -> bool: | |
| """Verifica si la respuesta menciona un documento fuente.""" | |
| if not answer: | |
| return False | |
| sources = [ | |
| "manual_onboarding", | |
| "politica_vacaciones", | |
| "proceso_soporte", | |
| "onboarding", | |
| "política", | |
| "soporte técnico", | |
| ] | |
| lower = answer.lower() | |
| return any(s.lower() in lower for s in sources) | |
| def _count_reformulations(steps: list[dict]) -> int: | |
| """Cuenta cuántas veces se reformuló la búsqueda.""" | |
| search_actions = [] | |
| for s in steps: | |
| action = s.get("action", "") | |
| if "search_docs" in action: | |
| search_actions.append(action) | |
| return max(0, len(search_actions) - 1) | |
| # ── Formato de salida ───────────────────────────────────────── | |
| def _print_header(question: str, category: str): | |
| width = 64 | |
| print() | |
| print(f"{BOLD}{CYAN}╔{'═' * width}╗{RESET}") | |
| print(f"{BOLD}{CYAN}║ [{category}] {question[:width - len(category) - 6]}{RESET}") | |
| print(f"{BOLD}{CYAN}╚{'═' * width}╝{RESET}") | |
| def _print_pipeline(result: dict, elapsed: float): | |
| answer = result["answer"] | |
| print(f"\n{CYAN}┌─ PIPELINE ────────────────────────────────────────────────────┐{RESET}") | |
| print(f"{CYAN}│{RESET} {GREEN}{BOLD}Respuesta:{RESET}") | |
| for line in _wrap(answer, 58): | |
| print(f"{CYAN}│{RESET} {GREEN}{line}{RESET}") | |
| print(f"{CYAN}│{RESET} {DIM}Pasos: {result['steps']} | Tiempo: {elapsed:.1f}s{RESET}") | |
| print(f"{CYAN}└──────────────────────────────────────────────────────────────┘{RESET}") | |
| def _print_basic(result: dict, elapsed: float): | |
| print(f"\n{CYAN}┌─ AGENTE BÁSICO (Act-Only) ────────────────────────────────────┐{RESET}") | |
| for s in result["steps"]: | |
| action = s["action"][:65] | |
| print(f"{CYAN}│{RESET} {YELLOW}Action {s['step']}:{RESET} {YELLOW}{action}{RESET}") | |
| if s.get("observation") and s["action"] != s.get("observation", ""): | |
| obs = s["observation"][:62] | |
| print(f"{CYAN}│{RESET} {MAGENTA}Obs {s['step']}:{RESET} {MAGENTA}{obs}{RESET}") | |
| answer = result.get("answer") or "(sin respuesta)" | |
| print(f"{CYAN}│{RESET}") | |
| print(f"{CYAN}│{RESET} {GREEN}{BOLD}Respuesta:{RESET}") | |
| for line in _wrap(answer, 56): | |
| print(f"{CYAN}│{RESET} {GREEN}{line}{RESET}") | |
| print( | |
| f"{CYAN}│{RESET} {DIM}Pasos: {result['total_steps']} | Tiempo: {elapsed:.1f}s{RESET}" | |
| ) | |
| print(f"{CYAN}└──────────────────────────────────────────────────────────────┘{RESET}") | |
| def _print_react(result: dict, elapsed: float): | |
| print(f"\n{CYAN}┌─ AGENTE REACT ─────────────────────────────────────────────────┐{RESET}") | |
| for s in result["steps"]: | |
| thought = s.get("thought", "")[:62] | |
| action = s["action"][:65] | |
| print(f"{CYAN}│{RESET} {BLUE}Thought {s['step']}:{RESET} {BLUE}{thought}{RESET}") | |
| print(f"{CYAN}│{RESET} {YELLOW}Action {s['step']}:{RESET} {YELLOW}{action}{RESET}") | |
| if s.get("observation") and s["action"] != s.get("observation", ""): | |
| obs = s["observation"][:62] | |
| print(f"{CYAN}│{RESET} {MAGENTA}Obs {s['step']}:{RESET} {MAGENTA}{obs}{RESET}") | |
| answer = result.get("answer") or "(sin respuesta)" | |
| print(f"{CYAN}│{RESET}") | |
| print(f"{CYAN}│{RESET} {GREEN}{BOLD}Respuesta:{RESET}") | |
| for line in _wrap(answer, 56): | |
| print(f"{CYAN}│{RESET} {GREEN}{line}{RESET}") | |
| print( | |
| f"{CYAN}│{RESET} {DIM}Pasos: {result['total_steps']} | Tiempo: {elapsed:.1f}s{RESET}" | |
| ) | |
| print(f"{CYAN}└──────────────────────────────────────────────────────────────┘{RESET}") | |
| def _wrap(text: str, width: int) -> list[str]: | |
| """Simple line wrapper.""" | |
| words = text.split() | |
| lines: list[str] = [] | |
| current = "" | |
| for word in words: | |
| if len(current) + len(word) + 1 > width: | |
| lines.append(current) | |
| current = word | |
| else: | |
| current = f"{current} {word}" if current else word | |
| if current: | |
| lines.append(current) | |
| return lines or [""] | |
| # ── Tabla resumen ───────────────────────────────────────────── | |
| def _print_summary(metrics: dict): | |
| """Imprime la tabla resumen de la comparación.""" | |
| p = metrics["pipeline"] | |
| b = metrics["basic"] | |
| r = metrics["react"] | |
| total = metrics["total"] | |
| def _winner(p_val, b_val, r_val, lower_is_better=True): | |
| vals = {"Pipeline": p_val, "Básico": b_val, "ReAct": r_val} | |
| if lower_is_better: | |
| return min(vals, key=vals.get) | |
| return max(vals, key=vals.get) | |
| rows = [ | |
| ( | |
| "Promedio pasos", | |
| f"{p['total_steps'] / total:.1f}", | |
| f"{b['total_steps'] / total:.1f}", | |
| f"{r['total_steps'] / total:.1f}", | |
| _winner( | |
| p["total_steps"] / total, | |
| b["total_steps"] / total, | |
| r["total_steps"] / total, | |
| ), | |
| ), | |
| ( | |
| "Promedio tiempo", | |
| f"{p['total_time'] / total:.1f}s", | |
| f"{b['total_time'] / total:.1f}s", | |
| f"{r['total_time'] / total:.1f}s", | |
| _winner(p["total_time"], b["total_time"], r["total_time"]), | |
| ), | |
| ( | |
| "Resp. correctas", | |
| f"{p['correct']}/{total}", | |
| f"{b['correct']}/{total}", | |
| f"{r['correct']}/{total}", | |
| _winner(p["correct"], b["correct"], r["correct"], lower_is_better=False), | |
| ), | |
| ( | |
| "Resp. con fuente", | |
| f"{p['with_source']}/{total}", | |
| f"{b['with_source']}/{total}", | |
| f"{r['with_source']}/{total}", | |
| _winner( | |
| p["with_source"], | |
| b["with_source"], | |
| r["with_source"], | |
| lower_is_better=False, | |
| ), | |
| ), | |
| ( | |
| "Trazabilidad", | |
| "Ninguna", | |
| "Parcial", | |
| "Completa", | |
| "ReAct", | |
| ), | |
| ( | |
| "Reformulaciones", | |
| "0", | |
| str(b["reformulations"]), | |
| str(r["reformulations"]), | |
| _winner( | |
| 0, | |
| b["reformulations"], | |
| r["reformulations"], | |
| lower_is_better=False, | |
| ), | |
| ), | |
| ] | |
| print(f"\n\n{BOLD}{'═' * 80}{RESET}") | |
| print(f"{BOLD} TABLA RESUMEN{RESET}") | |
| print(f"{BOLD}{'═' * 80}{RESET}") | |
| hdr = f"{'Métrica':<20} {'Pipeline':>10} {'Ag. Básico':>12} {'Ag. ReAct':>12} {'Ganador':>14}" | |
| print(f"\n{BOLD}{hdr}{RESET}") | |
| print("─" * 72) | |
| for metric, pv, bv, rv, winner in rows: | |
| print(f"{metric:<20} {pv:>10} {bv:>12} {rv:>12} {BOLD}{winner:>14}{RESET}") | |
| print("─" * 72) | |
| # ── Análisis ────────────────────────────────────────────────── | |
| def _print_analysis(): | |
| print(f"\n{BOLD}ANÁLISIS DE RESULTADOS{RESET}") | |
| print("=" * 60) | |
| print( | |
| f""" | |
| {GREEN}1. PIPELINE:{RESET} Rápido y predecible, pero sin capacidad de | |
| adaptación. Si el retrieval falla, la respuesta falla. | |
| No hay forma de reformular o buscar alternativas. | |
| {YELLOW}2. AGENTE BÁSICO ({YELLOW}Action{RESET} only):{RESET} Puede usar herramientas | |
| iterativamente, pero sin razonamiento explícito tiende | |
| a repetir búsquedas similares sin reformular. No puede | |
| explicar POR QUÉ tomó cada decisión. | |
| {BLUE}3. AGENTE REACT{RESET} ({BLUE}Thought{RESET} → {YELLOW}Action{RESET} → {MAGENTA}Obs{RESET}): | |
| Más lento pero significativamente más preciso. Puede | |
| reformular búsquedas fallidas, combinar información de | |
| múltiples fuentes, y cada decisión es trazable a un | |
| {BLUE}pensamiento explícito{RESET}. Ideal para tareas complejas | |
| donde la precisión importa más que la velocidad. | |
| {DIM}Leyenda de colores:{RESET} | |
| {BLUE}Azul{RESET} = Razonamiento (Thought) | |
| {YELLOW}Amarillo{RESET} = Acción (Action) | |
| {MAGENTA}Rosa{RESET} = Observación (Observation) | |
| {GREEN}Verde{RESET} = Respuesta final | |
| """ | |
| ) | |
| # ── Main ────────────────────────────────────────────────────── | |
| def main(): | |
| print(f"\n{BOLD}{CYAN}{'═' * 70}{RESET}") | |
| print(f"{BOLD}{CYAN} COMPARACIÓN: Pipeline vs Agente Básico vs Agente ReAct{RESET}") | |
| print(f"{BOLD}{CYAN}{'═' * 70}{RESET}") | |
| print(f"{DIM} Modelo: gpt-oss-120b | Docs: data/*.txt | VectorDB: ChromaDB{RESET}") | |
| basic_agent = BasicAgent() | |
| react_agent = ReactAgent() | |
| metrics = { | |
| "pipeline": { | |
| "total_steps": 0, | |
| "total_time": 0.0, | |
| "correct": 0, | |
| "with_source": 0, | |
| "reformulations": 0, | |
| }, | |
| "basic": { | |
| "total_steps": 0, | |
| "total_time": 0.0, | |
| "correct": 0, | |
| "with_source": 0, | |
| "reformulations": 0, | |
| }, | |
| "react": { | |
| "total_steps": 0, | |
| "total_time": 0.0, | |
| "correct": 0, | |
| "with_source": 0, | |
| "reformulations": 0, | |
| }, | |
| "total": len(QUESTIONS), | |
| } | |
| for q in QUESTIONS: | |
| question = q["question"] | |
| keywords = q["keywords"] | |
| category = q["category"] | |
| _print_header(question, category) | |
| # ── Pipeline ── | |
| t0 = time.time() | |
| p_result = run_pipeline(question) | |
| p_time = time.time() - t0 | |
| _print_pipeline(p_result, p_time) | |
| metrics["pipeline"]["total_steps"] += p_result["steps"] | |
| metrics["pipeline"]["total_time"] += p_time | |
| if _check_answer(p_result["answer"], keywords): | |
| metrics["pipeline"]["correct"] += 1 | |
| if _has_source(p_result["answer"]): | |
| metrics["pipeline"]["with_source"] += 1 | |
| # ── Agente Básico ── | |
| t0 = time.time() | |
| b_result = basic_agent.run(question, verbose=False) | |
| b_time = time.time() - t0 | |
| _print_basic(b_result, b_time) | |
| metrics["basic"]["total_steps"] += b_result["total_steps"] | |
| metrics["basic"]["total_time"] += b_time | |
| if _check_answer(b_result.get("answer"), keywords): | |
| metrics["basic"]["correct"] += 1 | |
| if _has_source(b_result.get("answer")): | |
| metrics["basic"]["with_source"] += 1 | |
| metrics["basic"]["reformulations"] += _count_reformulations(b_result["steps"]) | |
| # ── Agente ReAct ── | |
| t0 = time.time() | |
| r_result = react_agent.run(question, verbose=False) | |
| r_time = time.time() - t0 | |
| _print_react(r_result, r_time) | |
| metrics["react"]["total_steps"] += r_result["total_steps"] | |
| metrics["react"]["total_time"] += r_time | |
| if _check_answer(r_result.get("answer"), keywords): | |
| metrics["react"]["correct"] += 1 | |
| if _has_source(r_result.get("answer")): | |
| metrics["react"]["with_source"] += 1 | |
| metrics["react"]["reformulations"] += _count_reformulations(r_result["steps"]) | |
| _print_summary(metrics) | |
| _print_analysis() | |
| if __name__ == "__main__": | |
| main() | |