File size: 1,850 Bytes
7dfe46c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import sys
from pathlib import Path
import logging

# Add src to path
sys.path.append(str(Path(__file__).parent / "src"))

from src.logger import setup_logging
from src.config import Config
from src.dataset_loader import DatasetLoader
from src.evaluator import KoreanQAEvaluator

def run_legacy_evaluation():
    """Run evaluation using the legacy approach but with new logging."""
    # Setup logging
    logger_setup = setup_logging(log_level="INFO")
    logger = logger_setup.get_logger(__name__)
    
    logger.warning("Using legacy evaluation script. Consider migrating to main.py")
    
    try:
        # Load configuration
        script_dir = Path(__file__).parent
        config_path = script_dir / "src" / "config.yaml"
        config = Config(str(config_path))
        
        # Log evaluation start
        dataset_path = script_dir / "assets" / "bench_korean.csv"
        logger_setup.log_evaluation_start(str(dataset_path), config.gemini_model)
        
        # Load dataset
        dataset_loader = DatasetLoader()
        dataset = dataset_loader.load_from_csv(str(dataset_path))
        
        # Initialize evaluator
        evaluator = KoreanQAEvaluator(
            model_name=config.gemini_model,
            api_key=config.google_api_key,
            threshold=0.8,
            verbose_mode=True
        )
        
        # Run evaluation
        results = evaluator.evaluate_dataset(dataset)
        
        # Save results
        output_path = evaluator.save_results(results)
        
        # Log evaluation end
        logger_setup.log_evaluation_end(results)
        
        logger.info(f"Legacy evaluation completed. Results saved to: {output_path}")
        
    except Exception as e:
        logger.error(f"Legacy evaluation failed: {e}")
        raise

if __name__ == "__main__":
    run_legacy_evaluation()