Vjeong's picture
docs: translate all Korean comments and docstrings to English
858e8b2
"""Evaluation runner helper (Quick Start)."""
from typing import Any, Dict, Optional
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from llm_lab.config import EvalConfig
from .full_evaluator import FullEvaluator
from .checklist import InsightChecklist
def run_evaluation(
model: nn.Module,
tokenizer: Any,
val_dataloader: DataLoader,
device: torch.device = None,
dtype: torch.dtype = torch.bfloat16,
metrics_history: Optional[Dict[str, list]] = None,
config: Optional[EvalConfig] = None,
) -> Dict[str, Any]:
"""Runs all evaluations in one call.
Usage (Colab):
```python
from llm_lab.evaluation import run_evaluation
# After training is complete
report = run_evaluation(
model=trainer.model,
tokenizer=tokenizer,
val_dataloader=val_dl,
metrics_history=trainer.metrics.history,
)
```
"""
if device is None:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
evaluator = FullEvaluator(
model=model,
tokenizer=tokenizer,
val_dataloader=val_dataloader,
device=device,
config=config,
dtype=dtype,
metrics_history=metrics_history,
)
report = evaluator.run_full_evaluation()
# Insight checklist
InsightChecklist.run_checklist(report, metrics_history)
return report