import pandas as pd
import os
from typing import Optional, List

class DataLoader:
    def __init__(self, results_dir: str = "./data"):
        self.results_dir = results_dir
        self.df_all: Optional[pd.DataFrame] = None
        self.reload_data()

    def reload_data(self) -> str:
        csv_path = os.path.join(self.results_dir, "results.csv")

        if not os.path.exists(csv_path):
            self._create_sample_data(csv_path)

        try:
            self.df_all = pd.read_csv(csv_path)
            # Ensure numeric columns are float
            numeric_cols = self.df_all.select_dtypes(include=['float64', 'int64']).columns
            for col in numeric_cols:
                self.df_all[col] = pd.to_numeric(self.df_all[col], errors='coerce')
            return f"✅ Loaded {len(self.df_all)} models from {csv_path}"
        except Exception as e:
            self.df_all = None
            return f"❌ Error loading data: {str(e)}"

    def _create_sample_data(self, path: str):
        os.makedirs(os.path.dirname(path), exist_ok=True)
        data = {
            "Model": [
                "NVIDIA Cosmos", "HunyuanVideo-1.5", "WAN 2.2", "CogVideoX-I2V", "YUME 1.5",
                "Matrix-game 2.0", "HY-World 1.5",
                "CameraCtrl", "MotionCtrl", "CamI2V", "RealCam-I2V", "videox-fun-Wan", "AC3D", "ASTRA"
            ],
            "Category": [
                "Text-Conditioned", "Text-Conditioned", "Text-Conditioned", "Text-Conditioned", "Text-Conditioned",
                "One-hot", "One-hot",
                "Intrinsics/Extrinsics", "Intrinsics/Extrinsics", "Intrinsics/Extrinsics",
                "Intrinsics/Extrinsics", "Intrinsics/Extrinsics", "Intrinsics/Extrinsics", "Intrinsics/Extrinsics"
            ],
            "Average": [0.6275, 0.7188, 0.5731, 0.6963, 0.6209, 0.5663, 0.7873,
                        0.5762, 0.5486, 0.5765, 0.6865, 0.7474, 0.7149, 0.5980],
            "Image Quality": [0.6778, 0.7128, 0.5545, 0.6521, 0.6232, 0.4851, 0.6675,
                              0.4473, 0.4562, 0.5284, 0.6227, 0.6410, 0.4573, 0.5335],
            "Brightness Consistency": [0.6952, 0.7027, 0.3886, 0.8988, 0.3810, 0.2963, 0.8051,
                                        0.3717, 0.3980, 0.4343, 0.4130, 0.5972, 0.7307, 0.5091],
            "Color Temperature": [0.7170, 0.7477, 0.3411, 0.8129, 0.4165, 0.2937, 0.7819,
                                  0.2511, 0.2012, 0.3568, 0.5547, 0.5473, 0.6524, 0.4338],
            "Sharpness Retention": [0.4363, 0.5545, 0.3428, 0.7951, 0.4023, 0.4149, 0.6634,
                                    0.4545, 0.4294, 0.4297, 0.6269, 0.5998, 0.5332, 0.5488],
            "Motion Smoothness": [0.9907, 0.9908, 0.9557, 0.9938, 0.9765, 0.9848, 0.9921,
                                  0.9796, 0.9735, 0.9861, 0.9860, 0.9858, 0.9919, 0.9799],
            "Trajectory Accuracy": [0.4955, 0.6844, 0.6514, 0.5950, 0.7113, 0.7008, 0.7472,
                                    0.6778, 0.6730, 0.6314, 0.5630, 0.7172, 0.5785, 0.6115],
            "Memory Symmetry": [0.3738, 0.6336, 0.4480, 0.6010, 0.5276, 0.3311, 0.8481,
                                0.4279, 0.3098, 0.3631, 0.7948, 0.9009, 0.9068, 0.4323],
            "Trajectory Alignment": [0.6419, 0.6449, 0.5703, 0.4084, 0.5988, 0.6362, 0.6776,
                                     0.6097, 0.5932, 0.6038, 0.6668, 0.6876, 0.6250, 0.5518],
            "Year": [2024] * 14
        }
        df = pd.DataFrame(data)
        df.to_csv(path, index=False)
        print(f"Created sample data at {path}")

    def get_open_source_choices(self) -> List[str]:
        if self.df_all is None:
            return ["All"]
        if "Open Source" not in self.df_all.columns:
            return ["All"]
        choices = ["All"] + sorted(self.df_all["Open Source"].dropna().unique().tolist())
        return choices

    def get_year_choices(self) -> List[str]:
        if self.df_all is None:
            return ["All"]
        if "Year" not in self.df_all.columns:
            return ["All"]
        choices = ["All"] + sorted(self.df_all["Year"].dropna().unique().tolist(), reverse=True)
        return choices

    def get_category_choices(self) -> List[str]:
        if self.df_all is None:
            return ["All"]
        if "Category" not in self.df_all.columns:
            return ["All"]
        choices = ["All"] + sorted(self.df_all["Category"].dropna().unique().tolist())
        return choices

    def filter_data(self, model_filter: str = "", open_source_filter: str = "All",
                   year_filter: str = "All", category_filter: str = "All") -> pd.DataFrame:
        if self.df_all is None:
            return pd.DataFrame()
        df = self.df_all.copy()
        if model_filter:
            df = df[df["Model"].str.contains(model_filter, case=False, na=False)]
        if open_source_filter != "All" and "Open Source" in df.columns:
            df = df[df["Open Source"] == open_source_filter]
        if year_filter != "All" and "Year" in df.columns:
            df = df[df["Year"] == int(year_filter)]
        if category_filter != "All" and "Category" in df.columns:
            df = df[df["Category"] == category_filter]
        return df