Spaces:
Sleeping
Sleeping
File size: 1,850 Bytes
7dfe46c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 | import sys
from pathlib import Path
import logging
# Add src to path
sys.path.append(str(Path(__file__).parent / "src"))
from src.logger import setup_logging
from src.config import Config
from src.dataset_loader import DatasetLoader
from src.evaluator import KoreanQAEvaluator
def run_legacy_evaluation():
"""Run evaluation using the legacy approach but with new logging."""
# Setup logging
logger_setup = setup_logging(log_level="INFO")
logger = logger_setup.get_logger(__name__)
logger.warning("Using legacy evaluation script. Consider migrating to main.py")
try:
# Load configuration
script_dir = Path(__file__).parent
config_path = script_dir / "src" / "config.yaml"
config = Config(str(config_path))
# Log evaluation start
dataset_path = script_dir / "assets" / "bench_korean.csv"
logger_setup.log_evaluation_start(str(dataset_path), config.gemini_model)
# Load dataset
dataset_loader = DatasetLoader()
dataset = dataset_loader.load_from_csv(str(dataset_path))
# Initialize evaluator
evaluator = KoreanQAEvaluator(
model_name=config.gemini_model,
api_key=config.google_api_key,
threshold=0.8,
verbose_mode=True
)
# Run evaluation
results = evaluator.evaluate_dataset(dataset)
# Save results
output_path = evaluator.save_results(results)
# Log evaluation end
logger_setup.log_evaluation_end(results)
logger.info(f"Legacy evaluation completed. Results saved to: {output_path}")
except Exception as e:
logger.error(f"Legacy evaluation failed: {e}")
raise
if __name__ == "__main__":
run_legacy_evaluation()
|