File size: 1,404 Bytes
858e8b2 8a58ffe 858e8b2 8a58ffe 858e8b2 8a58ffe 858e8b2 8a58ffe 858e8b2 8a58ffe | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 | """Evaluation runner helper (Quick Start)."""
from typing import Any, Dict, Optional
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from llm_lab.config import EvalConfig
from .full_evaluator import FullEvaluator
from .checklist import InsightChecklist
def run_evaluation(
model: nn.Module,
tokenizer: Any,
val_dataloader: DataLoader,
device: torch.device = None,
dtype: torch.dtype = torch.bfloat16,
metrics_history: Optional[Dict[str, list]] = None,
config: Optional[EvalConfig] = None,
) -> Dict[str, Any]:
"""Runs all evaluations in one call.
Usage (Colab):
```python
from llm_lab.evaluation import run_evaluation
# After training is complete
report = run_evaluation(
model=trainer.model,
tokenizer=tokenizer,
val_dataloader=val_dl,
metrics_history=trainer.metrics.history,
)
```
"""
if device is None:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
evaluator = FullEvaluator(
model=model,
tokenizer=tokenizer,
val_dataloader=val_dataloader,
device=device,
config=config,
dtype=dtype,
metrics_history=metrics_history,
)
report = evaluator.run_full_evaluation()
# Insight checklist
InsightChecklist.run_checklist(report, metrics_history)
return report
|