import pandas as pd
import evaluate
import csv
import io
import tempfile
import os
import shutil
import sys
import traceback

# Dynamically add the mdd_eval directory to path to allow imports
current_dir = os.path.dirname(os.path.abspath(__file__))
mdd_eval_path = os.path.join(current_dir, "mdd_eval")
if mdd_eval_path not in sys.path:
    sys.path.append(mdd_eval_path)

try:
    from mdd_eval.align_data import evaluate_from_dfs
    from mdd_eval.ins_del_cor_sub_analysis import analyze_alignment
except ImportError:
    # If standard import fails, try relative import if possible or just rely on sys.path
    import align_data
    import ins_del_cor_sub_analysis
    evaluate_from_dfs = align_data.evaluate_from_dfs
    analyze_alignment = ins_del_cor_sub_analysis.analyze_alignment

wer = evaluate.load("wer")

def load_leaderboard(db_path):
    try:
        if os.path.exists(db_path):
            df = pd.read_csv(db_path)
            return df
        else:
            return pd.DataFrame()
    except Exception:
        # Return empty dataframe on error (e.g. EmptyDataError)
        return pd.DataFrame()

try:
    from datasets import load_dataset
except ImportError:
    load_dataset = None

IDS_CACHE_FILE = "IDs.txt"

def get_allowed_ids():
    print("DEBUG: Entering get_allowed_ids...", flush=True)
    # Return matched IDs from cache or huggingface
    if os.path.exists(IDS_CACHE_FILE):
        print(f"DEBUG: Found {IDS_CACHE_FILE}, reading...", flush=True)
        with open(IDS_CACHE_FILE, 'r') as f:
            ids = set(line.strip() for line in f if line.strip())
        if ids:
            print(f"Loaded {len(ids)} allowed IDs from {IDS_CACHE_FILE}", flush=True)
            return ids
    
    # If not cached, load from HF
    hf_token = os.environ.get("SPACE_HF_TOKEN") or os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
    print(f"DEBUG: Checking HF_TOKEN: {'Present' if hf_token else 'Missing'}", flush=True)
    
    if hf_token and load_dataset:
        try:
            print("Fetching allowed IDs from IqraEval/QuranMB.v2 (split='test')...", flush=True)
            # Use streaming to avoid downloading audio files
            dataset = load_dataset("IqraEval/QuranMB.v2", split="test", token=hf_token, streaming=True)
            
            allowed_ids = set()
            print("DEBUG: Iterating dataset...", flush=True)
            count = 0
            for item in dataset:
                if "ID" in item:
                    allowed_ids.add(str(item["ID"]).strip())
                elif "id" in item:
                    allowed_ids.add(str(item["id"]).strip())
                count += 1
                if count % 1000 == 0:
                    print(f"DEBUG: Processed {count} items...", flush=True)
            
            if allowed_ids:
                with open(IDS_CACHE_FILE, 'w') as f:
                    for i in sorted(allowed_ids):
                        f.write(f"{i}\n")
                print(f"Cached {len(allowed_ids)} IDs to {IDS_CACHE_FILE}", flush=True)
                return allowed_ids
            else:
                 print("DEBUG: No IDs found in dataset.", flush=True)
        except Exception as e:
            print(f"Error fetching allowed IDs: {e}", flush=True)
            pass
    
    print("DEBUG: Returning None from get_allowed_ids", flush=True)
    return None

def load_ground_truth_references(ground_truth_path):
    # Get allowed IDs first
    allowed_ids = get_allowed_ids()
    
    # Attempt to load from Hugging Face if HF_TOKEN is present
    hf_token = os.environ.get("SPACE_HF_TOKEN") or os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
    if hf_token and load_dataset:
        try:
            print("Attempting to load ground truth from Hugging Face (IqraEval/QuranMB.v2.labels)...")
            # Using 'train' split as per user request/implication or previous context?
            # User said "QuranMB.v2 train with key" for IDs. 
            # For LABELS, previous successful load was 'train' split (from logs).
            dataset = load_dataset("IqraEval/QuranMB.v2.labels", split="train", token=hf_token)
            
            references = []
            for item in dataset:
                rid = str(item.get("ID")).strip()
                
                # Filter if allowed_ids list exists
                if allowed_ids and rid not in allowed_ids:
                    continue
                    
                ref_data = {
                    "ID": rid,
                    "Reference_phn": item.get("reference"),
                    "Annotation_phn": item.get("canonical")
                }
                # Fallback mapping if keys differ slightly in actual dataset
                if not ref_data["Reference_phn"] and "Reference_phn" in item:
                    ref_data["Reference_phn"] = item["Reference_phn"]
                if not ref_data["Annotation_phn"] and "Annotation_phn" in item:
                    ref_data["Annotation_phn"] = item["Annotation_phn"]
                    
                references.append(ref_data)
            
            print(f"Successfully loaded {len(references)} filtered references from Hugging Face.")
            return references
        except Exception as e:
            print(f"Failed to load from Hugging Face: {e}")
            print("Falling back to local CSV file.")
    
    # Fallback to local file
    if not os.path.exists(ground_truth_path):
        print(f"Warning: Ground truth file not found at {ground_truth_path}")
        return []

    with open(ground_truth_path, newline='') as f:
        reader = csv.DictReader(f)
        references = []
        for row in reader:
            rid = str(row.get("id")).strip()
            # Filter if allowed_ids list exists
            if allowed_ids and rid not in allowed_ids:
                continue

            ref_data = {"ID": rid}
            if "Reference_phn" in row:
                ref_data["Reference_phn"] = row["Reference_phn"]
            elif "reference_phn" in row:
                ref_data["Reference_phn"] = row["reference_phn"]
            elif "correct_phoneme" in row:
                ref_data["Reference_phn"] = row["correct_phoneme"]
            
            if "Annotation_phn" in row:
                ref_data["Annotation_phn"] = row["Annotation_phn"]
            elif "annotation_phn" in row:
                ref_data["Annotation_phn"] = row["annotation_phn"]
            elif "Reference_phn" in ref_data:
                ref_data["Annotation_phn"] = ref_data["Reference_phn"]
            references.append(ref_data)
            
    print(f"Loaded {len(references)} filtered references from local file.")
    return references

def parse_submission_csv(file_obj):
    # Handle different input types (bytes or string)
    content = ""
    if isinstance(file_obj, bytes):
        content = file_obj.decode("utf-8")
    elif hasattr(file_obj, 'read'): # File-like object
        try:
             content = file_obj.read()
             if isinstance(content, bytes):
                 content = content.decode("utf-8")
        except Exception:
             # If read fails or it's not actually a readable file object in the way we expect
             pass
    
    if not content and isinstance(file_obj, str):
        if os.path.exists(file_obj): # File path
             with open(file_obj, 'r', encoding='utf-8') as f:
                content = f.read()
        else:
            # Maybe it's just a string content?
            content = file_obj

    if not content:
        raise ValueError("Could not read content from submission file.")

    text_stream = io.StringIO(content)
    reader = csv.DictReader(text_stream)
    
    if not reader.fieldnames:
         raise ValueError("CSV file is empty or missing headers.")

    # case-insensitive check and strip whitespace
    lower_fieldnames = [f.lower().strip() for f in reader.fieldnames]
    
    # Validate columns
    id_col = None
    pred_col = None
    
    if "id" in lower_fieldnames: id_col = "id"
    if "predicted_phoneme" in lower_fieldnames: pred_col = "predicted_phoneme"
    elif "labels" in lower_fieldnames: pred_col = "labels"
    elif "prediction" in lower_fieldnames: pred_col = "prediction" # Added resilience
    
    if not id_col or not pred_col:
            raise ValueError(f"Submission CSV must contain columns: 'id', 'predicted_phoneme' (or 'Labels')")
    
    results = []
    for row in reader:
        # map lower case keys to values and strip whitespace from keys
        row_lower = {k.lower().strip(): v for k, v in row.items()}
        id_val = row_lower.get("id")
        pred_val = row_lower.get(pred_col)
        results.append({"ID": id_val, "Prediction": pred_val})

    return results

def calculate_comprehensive_metrics(submission_file_obj, references):
    metrics = {}
    error_message = None
    
    if not references:
        return None, "Error: Ground truth references could not be loaded. Please ensure 'ground_truth.csv' exists locally or HF_TOKEN is set to access 'IqraEval/QuranMB.v2.labels'."

    temp_dir = tempfile.mkdtemp()
    try:
        temp_aligned_dir = os.path.join(temp_dir, "aligned")
        os.makedirs(temp_aligned_dir, exist_ok=True)

        # Prepare ground truth DataFrame
        truth_df = pd.DataFrame(references)
        # Ensure correct column names
        truth_df = truth_df.rename(columns={"id": "ID", "reference_phn": "Reference_phn", "annotation_phn": "Annotation_phn"})
        
        # Prepare prediction DataFrame
        predictions = parse_submission_csv(submission_file_obj)
        pred_df = pd.DataFrame(predictions)
        
        if not all(col in truth_df.columns for col in ["ID", "Reference_phn", "Annotation_phn"]):
            return None, "Error: Ground truth references missing required columns."
        
        if not all(col in pred_df.columns for col in ["ID", "Prediction"]):
             return None, "Error: Submission missing 'ID' or 'Prediction' columns."

        # Validating IDs mismatch
        truth_ids = set(truth_df["ID"].astype(str).str.strip())
        pred_ids = set(pred_df["ID"].astype(str).str.strip())
        
        if len(truth_ids) != len(pred_ids):
             return None, f"Error: Mismatch in number of predictions. Expected {len(truth_ids)} IDs, but got {len(pred_ids)}. Please ensure your submission covers the entire test set."
        
        missing_ids = truth_ids - pred_ids
        if missing_ids:
             # Show a few missing IDs as example
             example_missing = list(missing_ids)[:3]
             return None, f"Error: Submission IDs do not match ground truth. Missing IDs example: {example_missing}"

        # --- Step 1: Run alignment ---
        
        corr_rate, acc = evaluate_from_dfs(
            truth_df=truth_df,
            pred_df=pred_df,
            output_dir=temp_aligned_dir,
            wov=False,
            print_output=False
        )

        # --- Step 2: Run Analysis ---
        metrics = analyze_alignment(temp_aligned_dir)
        
        if "Error" in metrics:
            return None, f"Analysis Error: {metrics['Error']}"

        # Ensure Samples is included
        pred_ids = set(pred_df["ID"].unique())
        truth_ids = set(truth_df["ID"].unique())
        metrics["Samples"] = len(pred_ids.intersection(truth_ids))
        
        # Add basic metrics if missing
        if "Accuracy" not in metrics:
            metrics["Accuracy"] = acc
        
        if "PER" not in metrics:
             metrics["PER"] = 1.0 - acc

    except Exception as e:
        # import traceback
        error_message = f"Error during metric calculation: {str(e)}"
        print(traceback.format_exc())
    finally:
        # Clean up temporary files
        if temp_dir and os.path.exists(temp_dir):
            shutil.rmtree(temp_dir)

    return metrics if not error_message else None, error_message

def calculate_per_score(submission_file_obj, references):
    """Legacy function for backward compatibility."""
    metrics, error_message = calculate_comprehensive_metrics(submission_file_obj, references)
    if metrics is None:
        return 0.0, 0
    return metrics.get("PER", 0.0), metrics.get("Samples", 0)


custom_css = """
#leaderboard-table td, #leaderboard-table th {
    white-space: nowrap;
    min-width: 100px;
    text-align: center !important;
}
#leaderboard-table th > div, #leaderboard-table th > span {
    justify-content: center !important;
    text-align: center !important;
    width: 100%;
    display: flex;
}
"""