"""Evaluation runner helper (Quick Start).""" from typing import Any, Dict, Optional import torch import torch.nn as nn from torch.utils.data import DataLoader from llm_lab.config import EvalConfig from .full_evaluator import FullEvaluator from .checklist import InsightChecklist def run_evaluation( model: nn.Module, tokenizer: Any, val_dataloader: DataLoader, device: torch.device = None, dtype: torch.dtype = torch.bfloat16, metrics_history: Optional[Dict[str, list]] = None, config: Optional[EvalConfig] = None, ) -> Dict[str, Any]: """Runs all evaluations in one call. Usage (Colab): ```python from llm_lab.evaluation import run_evaluation # After training is complete report = run_evaluation( model=trainer.model, tokenizer=tokenizer, val_dataloader=val_dl, metrics_history=trainer.metrics.history, ) ``` """ if device is None: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") evaluator = FullEvaluator( model=model, tokenizer=tokenizer, val_dataloader=val_dataloader, device=device, config=config, dtype=dtype, metrics_history=metrics_history, ) report = evaluator.run_full_evaluation() # Insight checklist InsightChecklist.run_checklist(report, metrics_history) return report