| """Evaluation runner helper (Quick Start).""" |
|
|
| from typing import Any, Dict, Optional |
|
|
| import torch |
| import torch.nn as nn |
| from torch.utils.data import DataLoader |
|
|
| from llm_lab.config import EvalConfig |
| from .full_evaluator import FullEvaluator |
| from .checklist import InsightChecklist |
|
|
|
|
| def run_evaluation( |
| model: nn.Module, |
| tokenizer: Any, |
| val_dataloader: DataLoader, |
| device: torch.device = None, |
| dtype: torch.dtype = torch.bfloat16, |
| metrics_history: Optional[Dict[str, list]] = None, |
| config: Optional[EvalConfig] = None, |
| ) -> Dict[str, Any]: |
| """Runs all evaluations in one call. |
| |
| Usage (Colab): |
| ```python |
| from llm_lab.evaluation import run_evaluation |
| |
| # After training is complete |
| report = run_evaluation( |
| model=trainer.model, |
| tokenizer=tokenizer, |
| val_dataloader=val_dl, |
| metrics_history=trainer.metrics.history, |
| ) |
| ``` |
| """ |
| if device is None: |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
| evaluator = FullEvaluator( |
| model=model, |
| tokenizer=tokenizer, |
| val_dataloader=val_dataloader, |
| device=device, |
| config=config, |
| dtype=dtype, |
| metrics_history=metrics_history, |
| ) |
|
|
| report = evaluator.run_full_evaluation() |
|
|
| |
| InsightChecklist.run_checklist(report, metrics_history) |
|
|
| return report |
|
|