Zenith-7b-V1 / tests /evaluation /__init__.py
Zandy-Wandy's picture
Upload Zenith-7B model
8d18b7c verified
"""Comprehensive Evaluation and Benchmarking Suite for Zenith Models"""
from .benchmark import BenchmarkSuite, BenchmarkConfig
from .metrics import (
compute_perplexity,
compute_accuracy,
compute_em_score,
compute_f1_score,
compute_eq_metrics,
compute_code_metrics,
compute_reasoning_metrics,
)
from .eval_datasets import (
EvaluationDataset,
load_human_eval,
load_mbpp,
load_gsm8k,
load_math,
load_truthfulqa,
load_emotional_bench,
)
from .comparative_eval import ComparativeEvaluator, ModelComparison
__all__ = [
"BenchmarkSuite",
"BenchmarkConfig",
"compute_perplexity",
"compute_accuracy",
"compute_em_score",
"compute_f1_score",
"compute_eq_metrics",
"compute_code_metrics",
"compute_reasoning_metrics",
"EvaluationDataset",
"load_human_eval",
"load_mbpp",
"load_gsm8k",
"load_math",
"load_truthfulqa",
"load_emotional_bench",
"ComparativeEvaluator",
"ModelComparison",
]