Spaces:
Sleeping
Sleeping
| """Pipeline de compilación COMPLETA con MIPROv2. | |
| Correr OFFLINE antes de clase. Toma ~10-15 minutos y puede quemar varios | |
| miles de tokens de Groq. Cachea el resultado en JSON — si ya existe, aborta | |
| (pasar --force para recompilar). | |
| Output: | |
| - dspy_lab/qa_optimized_mipro.json (programa compilado) | |
| - evals/reports/mipro_report.md (comparativa baseline vs optimized) | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import os | |
| import time | |
| from pathlib import Path | |
| import dspy | |
| import litellm | |
| from dspy.teleprompt import MIPROv2 | |
| from rich.console import Console | |
| from data.loader import load_eval_set | |
| from dspy_lab.metric import docops_metric | |
| from dspy_lab.signatures import AnswerWithContext | |
| OUT_PROGRAM = Path("dspy_lab/qa_optimized_mipro.json") | |
| OUT_REPORT = Path("evals/reports/mipro_report.md") | |
| def _avg_score(program: dspy.Module, examples: list[dspy.Example]) -> float: | |
| scores: list[float] = [] | |
| for ex in examples: | |
| try: | |
| pred = program(context=ex.context, question=ex.question) | |
| scores.append(docops_metric(ex, pred)) | |
| except Exception: | |
| scores.append(0.0) | |
| return sum(scores) / len(scores) if scores else 0.0 | |
| def main() -> None: | |
| parser = argparse.ArgumentParser(description=__doc__) | |
| parser.add_argument( | |
| "--force", action="store_true", help="Recompilar aunque el cache exista." | |
| ) | |
| args = parser.parse_args() | |
| console = Console() | |
| console.print("[bold cyan]dspy_lab/optimize.py — MIPROv2 full compile[/bold cyan]") | |
| if OUT_PROGRAM.exists() and not args.force: | |
| console.print(f"[yellow]cache hit[/yellow] → {OUT_PROGRAM}") | |
| console.print("Usa --force para recompilar.") | |
| return | |
| api_key = os.environ.get("GROQ_API_KEY") | |
| if not api_key: | |
| raise SystemExit( | |
| "[error] GROQ_API_KEY no está definido. Expórtalo o añádelo a .env." | |
| ) | |
| # Groq free tier: 8k TPM para openai/gpt-oss-120b. | |
| # Configuramos retries agresivos en litellm para que espere automáticamente. | |
| litellm.num_retries = 5 | |
| litellm.request_timeout = 120 | |
| lm = dspy.LM( | |
| "groq/openai/gpt-oss-120b", | |
| api_key=api_key, | |
| temperature=0.0, | |
| max_tokens=2048, | |
| ) | |
| dspy.configure(lm=lm) | |
| trainset, valset, testset = load_eval_set(splits=(0.6, 0.2, 0.2)) | |
| console.print( | |
| f"train={len(trainset)} val={len(valset)} test={len(testset)}" | |
| ) | |
| qa_baseline = dspy.ChainOfThought(AnswerWithContext) | |
| console.print("Evaluando baseline...") | |
| t0 = time.perf_counter() | |
| baseline_score = _avg_score(qa_baseline, valset) | |
| baseline_time = time.perf_counter() - t0 | |
| console.print(f" baseline: {baseline_score:.3f} ({baseline_time:.1f}s)") | |
| console.print("Compilando con MIPROv2 (auto=light)...") | |
| optimizer = MIPROv2(metric=docops_metric, auto="light") | |
| t0 = time.perf_counter() | |
| qa_optimized = optimizer.compile( | |
| qa_baseline, trainset=trainset, valset=valset, requires_permission_to_run=False | |
| ) | |
| compile_time = time.perf_counter() - t0 | |
| console.print(f" compile: {compile_time:.1f}s") | |
| t0 = time.perf_counter() | |
| optimized_score = _avg_score(qa_optimized, valset) | |
| eval_time = time.perf_counter() - t0 | |
| console.print(f" optimized: {optimized_score:.3f} ({eval_time:.1f}s)") | |
| OUT_PROGRAM.parent.mkdir(parents=True, exist_ok=True) | |
| qa_optimized.save(str(OUT_PROGRAM)) | |
| console.print(f"Programa guardado en {OUT_PROGRAM}") | |
| OUT_REPORT.parent.mkdir(parents=True, exist_ok=True) | |
| delta = optimized_score - baseline_score | |
| OUT_REPORT.write_text( | |
| f"""# MIPROv2 compile report | |
| ## Config | |
| - LM: `groq/openai/gpt-oss-120b` | |
| - Metric: `docops_metric` (0.6 * faithfulness + 0.4 * answer_relevancy) | |
| - Optimizer: `MIPROv2(auto="light")` | |
| - Splits: train={len(trainset)} / val={len(valset)} / test={len(testset)} | |
| ## Results | |
| | Program | docops_metric (avg val) | | |
| |---------|------------------------:| | |
| | Baseline (ChainOfThought) | {baseline_score:.3f} | | |
| | Optimized (MIPROv2) | {optimized_score:.3f} | | |
| | **Delta** | **{delta:+.3f}** | | |
| ## Times | |
| - Baseline eval: {baseline_time:.1f}s | |
| - Compile: {compile_time:.1f}s | |
| - Optimized eval: {eval_time:.1f}s | |
| """, | |
| encoding="utf-8", | |
| ) | |
| console.print(f"Reporte guardado en {OUT_REPORT}") | |
| if __name__ == "__main__": | |
| main() | |