#!/usr/bin/env python3
"""
Simple script to run model evaluation on CNN/DailyMail dataset
"""

import os
import sys
import logging
from pathlib import Path

# Add project root to path
project_root = Path(__file__).parent
sys.path.insert(0, str(project_root))

from evaluation.dataset_loader import CNNDailyMailLoader
from evaluation.model_evaluator import ModelEvaluator
from evaluation.results_analyzer import ResultsAnalyzer

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

def main():
    """Run comprehensive evaluation"""
    
    # Configuration
    SAMPLE_SIZE = 50  # Number of samples to evaluate
    OUTPUT_DIR = "evaluation_results"
    
    # Create output directory
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    
    logger.info("Starting Smart Summarizer Evaluation")
    logger.info(f"Sample size: {SAMPLE_SIZE}")
    
    try:
        # Step 1: Load dataset
        logger.info("Step 1: Loading CNN/DailyMail dataset...")
        loader = CNNDailyMailLoader()
        dataset = loader.load_dataset()
        
        # Step 2: Create evaluation subset
        logger.info("Step 2: Creating evaluation subset...")
        eval_data = loader.create_evaluation_subset(size=SAMPLE_SIZE)
        loader.save_evaluation_data(eval_data, f"{OUTPUT_DIR}/eval_data.json")
        
        # Step 3: Categorize by topics
        logger.info("Step 3: Categorizing by topics...")
        categorized_data = loader.categorize_by_topic(eval_data)
        
        # Save categorized data
        for topic, data in categorized_data.items():
            if data:
                loader.save_evaluation_data(data, f"{OUTPUT_DIR}/data_{topic}.json")
                logger.info(f"  {topic}: {len(data)} articles")
        
        # Step 4: Initialize models
        logger.info("Step 4: Initializing models...")
        evaluator = ModelEvaluator()
        evaluator.initialize_models()
        
        # Step 5: Run overall evaluation
        logger.info("Step 5: Running overall evaluation...")
        overall_results = evaluator.evaluate_all_models(eval_data, max_samples=SAMPLE_SIZE)
        
        # Save overall results
        evaluator.save_results(overall_results, f"{OUTPUT_DIR}/results_overall.json")
        
        # Create overall comparison
        comparison_df = evaluator.compare_models(overall_results)
        comparison_df.to_csv(f"{OUTPUT_DIR}/comparison_overall.csv", index=False)
        
        print("\n" + "="*60)
        print("OVERALL EVALUATION RESULTS")
        print("="*60)
        print(comparison_df.to_string(index=False))
        
        # Step 6: Run topic-based evaluation
        logger.info("Step 6: Running topic-based evaluation...")
        topic_results = {}
        
        for topic, data in categorized_data.items():
            if len(data) >= 5:  # Only evaluate topics with sufficient data
                logger.info(f"  Evaluating topic: {topic}")
                topic_results[topic] = evaluator.evaluate_all_models(data, max_samples=20)
                
                # Save topic results
                evaluator.save_results(topic_results[topic], f"{OUTPUT_DIR}/results_{topic}.json")
                
                # Create topic comparison
                topic_comparison = evaluator.compare_models(topic_results[topic])
                topic_comparison.to_csv(f"{OUTPUT_DIR}/comparison_{topic}.csv", index=False)
                
                print(f"\n{topic.upper()} TOPIC RESULTS:")
                print("-" * 40)
                print(topic_comparison.to_string(index=False))
        
        # Step 7: Create visualizations and analysis
        logger.info("Step 7: Creating analysis and visualizations...")
        analyzer = ResultsAnalyzer()
        
        # Overall performance charts
        analyzer.create_performance_charts(overall_results, OUTPUT_DIR)
        
        # Topic analysis if we have topic results
        if topic_results:
            analyzer.analyze_topic_performance(topic_results, OUTPUT_DIR)
        
        # Detailed report
        analyzer.create_detailed_report(overall_results, OUTPUT_DIR)
        
        print(f"\n" + "="*60)
        print("EVALUATION COMPLETE")
        print("="*60)
        print(f"Results saved to: {OUTPUT_DIR}/")
        print("Files created:")
        print(f"  - results_overall.json (detailed results)")
        print(f"  - comparison_overall.csv (summary table)")
        print(f"  - performance_comparison.png (charts)")
        print(f"  - evaluation_report.md (detailed report)")
        if topic_results:
            print(f"  - topic_performance_heatmap.png (topic analysis)")
            print(f"  - topic_summary.csv (topic breakdown)")
        
    except Exception as e:
        logger.error(f"Evaluation failed: {e}")
        raise

if __name__ == "__main__":
    main()