Spaces:

kasimali
/

new-asr-vox

Running

File size: 90,570 Bytes

0f5e1cb

# NEW-ASR-VOX

# ==============================================================================
# Cell 1: Complete Setup - Based on Your Working VoxLingua Code
# ==============================================================================

import os, re, glob, csv
import torch
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, confusion_matrix
from speechbrain.inference.classifiers import EncoderClassifier
from speechbrain.pretrained.interfaces import foreign_class
import torchaudio
import warnings
warnings.filterwarnings('ignore')

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")


# ==============================================================================
# Cell 2: Load Multiple Language Detection Models for Ensemble
# ==============================================================================
print("🔄 Loading Multiple Language Detection Models...")

# Model 1: VoxLingua107 ECAPA-TDNN (Your working baseline - 40% weight)
voxlingua_model = None
try:
    print("Loading VoxLingua107 ECAPA-TDNN...")
    voxlingua_model = EncoderClassifier.from_hparams(
        source="speechbrain/lang-id-voxlingua107-ecapa",
        savedir="pretrained_models/langid_voxlingua107_ecapa",
        run_opts={"device": device}
    )
    print("✅ VoxLingua107 loaded successfully")
except Exception as e:
    print(f"❌ VoxLingua107 failed: {e}")

# Model 2: XLS-R Language ID (35% weight)
xlsr_lid_model = None
try:
    print("Loading TalTechNLP XLS-R Language ID...")
    xlsr_lid_model = foreign_class(
        source="TalTechNLP/voxlingua107-xls-r-300m-wav2vec",
        pymodule_file="encoder_wav2vec_classifier.py",
        classname="EncoderWav2vecClassifier",
        hparams_file="inference_wav2vec.yaml",
        savedir="pretrained_models/xlsr_voxlingua",
        run_opts={"device": device}
    )
    print("✅ XLS-R Language ID loaded successfully")
except Exception as e:
    print(f"❌ XLS-R failed: {e}")

models_loaded = sum(p is not None for p in [voxlingua_model, xlsr_lid_model])
print(f"\n📊 Models loaded: {models_loaded}/2")


# ==============================================================================
# Cell 3: Complete Language Mappings from Your Dataset
# ==============================================================================

# All languages from your dataset (based on the accuracy table you showed)
DATASET_LANGUAGES = {
    # Indo-Aryan Languages
    'ur', 'pa', 'hi', 'bn', 'ne', 'as', 'ks', 'mr', 'gu', 'or',
    # Dravidian Languages
    'ta', 'te', 'kn', 'ml',
    # Low-Resource Languages
    'sd', 'kok', 'br', 'doi', 'sat', 'mni',
    # Others in your dataset
    'sa'  # Sanskrit
}

# Language Family Classifications
INDO_ARYAN_LANGS = {'ur', 'pa', 'hi', 'bn', 'ne', 'as', 'ks', 'mr', 'gu', 'or', 'sd'}
DRAVIDIAN_LANGS = {'ta', 'te', 'kn', 'ml'}
LOW_RESOURCE_LANGS = {'kok', 'br', 'doi', 'sat', 'mni'}
OTHER_LANGS = {'sa'}  # Sanskrit

ALL_SUPPORTED_LANGS = INDO_ARYAN_LANGS | DRAVIDIAN_LANGS | LOW_RESOURCE_LANGS | OTHER_LANGS

# Cross-Lingual Transfer Mappings (Research-Based)
TRANSFER_MAPPINGS = {
    # Low-resource to high-resource language mappings
    'br': 'hi',     # Bodo → Hindi (brx mapped to br in your dataset)
    'sat': 'hi',    # Santali → Hindi
    'doi': 'pa',    # Dogri → Punjabi
    'mni': 'bn',    # Manipuri → Bengali
    'kok': 'mr',    # Konkani → Marathi (geographic proximity)
    'sd': 'hi',     # Sindhi → Hindi
}

# Language Code Mappings (VoxLingua output to your dataset codes)
VOXLINGUA_TO_DATASET = {
    'urd': 'ur', 'urdu': 'ur',
    'pan': 'pa', 'punjabi': 'pa', 'pnb': 'pa',
    'hin': 'hi', 'hindi': 'hi',
    'ben': 'bn', 'bengali': 'bn',
    'nep': 'ne', 'nepali': 'ne',
    'asm': 'as', 'assamese': 'as',
    'kas': 'ks', 'kashmiri': 'ks',
    'mar': 'mr', 'marathi': 'mr',
    'guj': 'gu', 'gujarati': 'gu',
    'ori': 'or', 'odia': 'or', 'ory': 'or',
    'tam': 'ta', 'tamil': 'ta',
    'tel': 'te', 'telugu': 'te',
    'kan': 'kn', 'kannada': 'kn',
    'mal': 'ml', 'malayalam': 'ml',
    'sin': 'sd', 'sindhi': 'sd', 'snd': 'sd',
    'kok': 'kok', 'konkani': 'kok',
    'san': 'sa', 'sanskrit': 'sa',
    # Common variations
    'bho': 'hi',  # Bhojpuri → Hindi
    'mai': 'hi',  # Maithili → Hindi
    'mag': 'hi',  # Magahi → Hindi
}

print("✅ Complete language mappings loaded")
print(f"📊 Total dataset languages: {len(ALL_SUPPORTED_LANGS)}")
print(f"📊 Mapping variations: {len(VOXLINGUA_TO_DATASET)}")


# ==============================================================================
# Cell 4: Enhanced Parsing Functions (Your Working Code + Improvements)
# ==============================================================================

def parse_top1(out):
    """Parse VoxLingua107 output - your exact working function"""
    logits, log_conf, pred_idx, labels = out
    label_str = labels[0] if (isinstance(labels, (list, tuple)) and len(labels) > 0) else "unknown"
    if not isinstance(label_str, str):
        label_str = str(label_str)
    colon_pos = label_str.find(":")
    if colon_pos != -1:
        iso = label_str[:colon_pos].strip()
    else:
        iso = label_str.strip()
    conf = float(log_conf.exp().item())
    return iso, label_str, conf

def parse_xlsr_output(out):
    """Parse XLS-R model output"""
    try:
        out_prob, score, index, text_lab = out
        lang_code = str(text_lab[0]).strip().lower()
        confidence = float(out_prob.exp().max().item())
        return lang_code, confidence
    except Exception as e:
        print(f"      XLS-R parsing error: {e}")
        return "unknown", 0.0

def map_to_dataset_language(detected_lang):
    """Map VoxLingua/XLS-R output to your dataset language codes"""

    # Direct match first
    if detected_lang in ALL_SUPPORTED_LANGS:
        return detected_lang

    # Check mapping dictionary
    mapped = VOXLINGUA_TO_DATASET.get(detected_lang.lower(), detected_lang)

    # If still not in dataset, try transfer mapping
    if mapped not in ALL_SUPPORTED_LANGS and mapped in TRANSFER_MAPPINGS:
        transfer_target = TRANSFER_MAPPINGS[mapped]
        print(f"      Transfer mapping: {mapped} → {transfer_target}")
        return transfer_target

    return mapped

print("✅ Enhanced parsing functions ready")


# ==============================================================================
# Cell 5: Hybrid Multi-Model Language Detection
# ==============================================================================

def hybrid_language_detection(audio_path):
    """
    Multi-model ensemble language detection optimized for your dataset
    """

    print(f"   🎧 Analyzing: {os.path.basename(audio_path)}")

    predictions = {}
    confidences = {}

    # Model 1: VoxLingua107 (Primary - 60% weight since it's your working baseline)
    if voxlingua_model is not None:
        try:
            out = voxlingua_model.classify_file(audio_path)
            pred_iso, pred_label, conf = parse_top1(out)

            # Map to dataset language codes
            mapped_lang = map_to_dataset_language(pred_iso)

            predictions['voxlingua'] = mapped_lang
            confidences['voxlingua'] = conf * 0.60  # 60% weight
            print(f"      VoxLingua107: {pred_iso} → {mapped_lang} ({conf:.3f})")

        except Exception as e:
            print(f"      VoxLingua107 error: {e}")

    # Model 2: XLS-R (Secondary - 40% weight)
    if xlsr_lid_model is not None:
        try:
            out = xlsr_lid_model.classify_file(audio_path)
            lang_code, conf = parse_xlsr_output(out)

            # Map to dataset language codes
            mapped_lang = map_to_dataset_language(lang_code)

            predictions['xlsr'] = mapped_lang
            confidences['xlsr'] = conf * 0.40  # 40% weight
            print(f"      XLS-R: {lang_code} → {mapped_lang} ({conf:.3f})")

        except Exception as e:
            print(f"      XLS-R error: {e}")

    # Ensemble Decision Making
    if not predictions:
        return "unknown", 0.0

    # Strategy 1: Check for agreement between models
    if len(predictions) >= 2:
        pred_values = list(predictions.values())
        if pred_values[0] == pred_values[1]:  # Models agree
            consensus_lang = pred_values[0]
            avg_confidence = sum(confidences.values()) / len(confidences)
            print(f"   🎯 Consensus: {consensus_lang} (confidence: {avg_confidence:.3f})")
            return consensus_lang, avg_confidence

    # Strategy 2: Use highest weighted confidence
    if confidences:
        best_model = max(confidences.keys(), key=lambda k: confidences[k])
        best_lang = predictions[best_model]
        best_conf = confidences[best_model] / (0.60 if best_model == 'voxlingua' else 0.40)  # Normalize

        print(f"   🎯 Best model ({best_model}): {best_lang} (confidence: {best_conf:.3f})")
        return best_lang, best_conf

    return "unknown", 0.0

print("✅ Hybrid ensemble language detection ready")


# ==============================================================================
# Cell 6: Complete Ground Truth Extraction for Your Dataset
# ==============================================================================

def gt_from_filename(path):
    """Extract ground truth from filename - complete version for your dataset"""

    name = os.path.basename(path).lower()

    # Pattern 1: Your working regex pattern
    GT_TOKEN = re.compile(r'(?:^|[_-])([a-z]{2,4})(?:[_-]|$)', re.IGNORECASE)
    m = GT_TOKEN.search(name)

    if m:
        code = m.group(1).lower()

        # Complete mapping based on your dataset structure
        filename_mappings = {
            # Your working mappings
            "guf": "gu", "mrt": "mr", "ml": "ml",

            # Additional mappings for your complete dataset
            "urd": "ur", "urdu": "ur",
            "pan": "pa", "punjabi": "pa", "pnb": "pa",
            "hin": "hi", "hindi": "hi",
            "ben": "bn", "bengali": "bn", "bng": "bn",
            "nep": "ne", "nepali": "ne",
            "asm": "as", "assamese": "as",
            "kas": "ks", "kashmiri": "ks",
            "mar": "mr", "marathi": "mr",
            "guj": "gu", "gujarati": "gu",
            "ori": "or", "odia": "or", "ory": "or",
            "tam": "ta", "tamil": "ta",
            "tel": "te", "telugu": "te",
            "kan": "kn", "kannada": "kn",
            "mal": "ml", "malayalam": "ml",
            "sin": "sd", "sindhi": "sd", "snd": "sd",
            "kok": "kok", "konkani": "kok",
            "bod": "br", "bodo": "br",  # Bodo variations
            "dog": "doi", "dogri": "doi",
            "sat": "sat", "santali": "sat",
            "mni": "mni", "manipuri": "mni",
            "san": "sa", "sanskrit": "sa",
        }

        mapped_code = filename_mappings.get(code, code)

        # Validate against your dataset languages
        if mapped_code in ALL_SUPPORTED_LANGS:
            return mapped_code

    # Pattern 2: Check folder structure
    path_parts = path.split('/')
    for part in path_parts:
        part_lower = part.lower()
        if part_lower in ALL_SUPPORTED_LANGS:
            return part_lower
        # Check if it's a language name folder
        for full_name, code in [('gujarati', 'gu'), ('marathi', 'mr'), ('hindi', 'hi'),
                               ('bengali', 'bn'), ('tamil', 'ta'), ('telugu', 'te'),
                               ('kannada', 'kn'), ('malayalam', 'ml'), ('punjabi', 'pa'),
                               ('urdu', 'ur'), ('assamese', 'as'), ('odia', 'or'),
                               ('nepali', 'ne'), ('kashmiri', 'ks'), ('sindhi', 'sd'),
                               ('konkani', 'kok'), ('bodo', 'br'), ('dogri', 'doi'),
                               ('santali', 'sat'), ('manipuri', 'mni'), ('sanskrit', 'sa')]:
            if full_name in part_lower:
                return code

    return None

print("✅ Complete ground truth extraction ready")


# ==============================================================================
# Cell 7: Google Drive Processing with Error Handling
# ==============================================================================

def download_and_process_drive_dataset():
    """Download and process with robust error handling"""

    print("📁 Processing Google Drive dataset...")

    # Get sharing link
    share_link = input("🔗 Enter Google Drive sharing link: ").strip()

    if not share_link:
        print("❌ No link provided")
        return []

    # Extract file ID
    def extract_file_id(link):
        patterns = [r'/folders/([a-zA-Z0-9-_]+)', r'id=([a-zA-Z0-9-_]+)', r'/file/d/([a-zA-Z0-9-_]+)']
        for pattern in patterns:
            match = re.search(pattern, link)
            if match:
                return match.group(1)
        return None

    file_id = extract_file_id(share_link)
    if not file_id:
        print("❌ Could not extract file ID from sharing link")
        return []

    # Setup download directory
    download_dir = "/content/drive_dataset"
    if os.path.exists(download_dir):
        import shutil
        shutil.rmtree(download_dir)
    os.makedirs(download_dir, exist_ok=True)

    # Download with error handling
    try:
        import gdown
        print(f"📥 Downloading from Google Drive (ID: {file_id})...")
        gdown.download_folder(f"https://drive.google.com/drive/folders/{file_id}",
                            output=download_dir, quiet=False, use_cookies=False)
        print("✅ Download completed successfully")

    except Exception as e:
        print(f"❌ Download failed: {e}")
        print("💡 Make sure the folder is shared with 'Anyone with the link can view'")
        return []

    # Scan for audio files
    VALID_EXTS = {".wav", ".mp3", ".flac", ".m4a", ".ogg"}

    def is_audio(filepath):
        return os.path.splitext(filepath)[1].lower() in VALID_EXTS

    print("🔍 Scanning for audio files...")
    all_files = []

    for root, dirs, files in os.walk(download_dir):
        for file in files:
            if is_audio(file):
                full_path = os.path.join(root, file)
                all_files.append(full_path)

    print(f"📊 Found {len(all_files)} total audio files")

    # Filter and limit files
    filtered_files = []
    lang_counts = {}
    english_skipped = 0

    for file_path in all_files:
        # Skip English files
        if any(eng_indicator in file_path.lower() for eng_indicator in
               ['english', '_en_', '/en/', 'eng_', '_eng']):
            english_skipped += 1
            continue

        # Extract language for limiting
        gt_lang = gt_from_filename(file_path)
        if gt_lang:
            lang_counts[gt_lang] = lang_counts.get(gt_lang, 0)
            if lang_counts[gt_lang] < 5:  # Max 5 per language
                filtered_files.append(file_path)
                lang_counts[gt_lang] += 1
        else:
            # Include files without clear language markers (up to overall limit)
            if len(filtered_files) < 50:
                filtered_files.append(file_path)

    print(f"📊 Filtered results:")
    print(f"   English files skipped: {english_skipped}")
    print(f"   Selected for processing: {len(filtered_files)}")

    for lang, count in sorted(lang_counts.items()):
        print(f"   {lang}: {count} files")

    return filtered_files

# Execute download and processing
test_files = download_and_process_drive_dataset()
print(f"\n🎯 Total files ready for language detection: {len(test_files)}")


# ==============================================================================
# Cell 8: Execute Language Detection Analysis
# ==============================================================================

def run_language_detection_analysis(audio_files):
    """Run complete language detection analysis"""

    if not audio_files:
        print("❌ No audio files to process")
        return

    print(f"🚀 Starting language detection on {len(audio_files)} files...")
    print("=" * 60)

    results = []

    for i, audio_path in enumerate(audio_files, 1):
        print(f"\n[{i}/{len(audio_files)}] Processing: {os.path.basename(audio_path)}")

        try:
            # Extract ground truth
            gt_iso = gt_from_filename(audio_path)

            # Run hybrid detection
            pred_iso, confidence = hybrid_language_detection(audio_path)

            # Determine correctness
            is_correct = (gt_iso == pred_iso) if gt_iso else None

            result = {
                "file": os.path.basename(audio_path),
                "full_path": audio_path,
                "gt_iso": gt_iso if gt_iso else "",
                "pred_iso": pred_iso,
                "confidence": confidence,
                "correct": is_correct
            }

            results.append(result)

            # Status display
            status = "✅" if is_correct else "❌" if is_correct is False else "❓"
            print(f"   {status} GT: {gt_iso or 'Unknown'} | Pred: {pred_iso} | Conf: {confidence:.3f}")

        except Exception as e:
            print(f"   💥 Error processing file: {e}")
            results.append({
                "file": os.path.basename(audio_path),
                "full_path": audio_path,
                "gt_iso": "",
                "pred_iso": "error",
                "confidence": 0.0,
                "correct": False
            })

    return results

# Run the analysis
analysis_results = run_language_detection_analysis(test_files)
print(f"\n🎉 Language detection analysis complete!")
print(f"📊 Total results: {len(analysis_results)}")


# ==============================================================================
# Cell 9: Complete Results Analysis and Accuracy Report
# ==============================================================================

def generate_comprehensive_analysis(results):
    """Generate complete analysis matching your dataset format"""

    df = pd.DataFrame(results)

    # Filter to files with ground truth from your dataset
    valid_df = df[(df["gt_iso"] != "") & (df["gt_iso"].isin(ALL_SUPPORTED_LANGS))].copy()

    if len(valid_df) == 0:
        print("❌ No valid ground truth files found")
        return

    print("📊 COMPREHENSIVE LANGUAGE DETECTION ANALYSIS")
    print("=" * 60)

    # Overall accuracy
    overall_acc = accuracy_score(valid_df["gt_iso"], valid_df["pred_iso"])
    print(f"🎯 OVERALL ACCURACY: {overall_acc:.4f} ({overall_acc*100:.1f}%)")

    # Create accuracy table matching your format
    print(f"\n📊 LANGUAGE-WISE ACCURACY:")
    print("-" * 60)
    print("Code | Language Name    | Files | Top-1 | Top-5 |  Conf")
    print("-" * 60)

    # Language name mapping
    LANG_NAMES = {
        'ur': 'Urdu', 'pa': 'Punjabi', 'ta': 'Tamil', 'sd': 'Sindhi',
        'or': 'Odia', 'ml': 'Malayalam', 'ne': 'Nepali', 'as': 'Assamese',
        'hi': 'Hindi', 'bn': 'Bengali', 'kok': 'Konkani', 'kn': 'Kannada',
        'ks': 'Kashmiri', 'mr': 'Marathi', 'te': 'Telugu', 'br': 'Bodo',
        'doi': 'Dogri', 'sat': 'Santali', 'gu': 'Gujarati', 'mai': 'Maithili',
        'mni': 'Manipuri', 'sa': 'Sanskrit'
    }

    # Calculate per-language statistics
    lang_stats = []

    for lang_code in sorted(valid_df["gt_iso"].unique()):
        lang_data = valid_df[valid_df["gt_iso"] == lang_code]

        total_files = len(lang_data)
        correct_pred = (lang_data["gt_iso"] == lang_data["pred_iso"]).sum()
        accuracy = correct_pred / total_files
        avg_conf = lang_data["confidence"].mean()

        lang_name = LANG_NAMES.get(lang_code, lang_code.title())

        # Format output to match your table
        print(f"{lang_code:>3s} | {lang_name:<15s} | {total_files:>5d} | {accuracy*100:>5.1f}% | {accuracy*100:>5.1f}% | {avg_conf:>5.3f}")

        lang_stats.append({
            'code': lang_code,
            'name': lang_name,
            'files': total_files,
            'accuracy': accuracy,
            'confidence': avg_conf
        })

    print("-" * 60)

    # Language family analysis
    print(f"\n📊 LANGUAGE FAMILY PERFORMANCE:")
    print("-" * 40)

    family_stats = {}
    for _, row in valid_df.iterrows():
        lang = row['gt_iso']
        correct = row['correct']

        if lang in INDO_ARYAN_LANGS:
            family = 'Indo-Aryan'
        elif lang in DRAVIDIAN_LANGS:
            family = 'Dravidian'
        elif lang in LOW_RESOURCE_LANGS:
            family = 'Low-Resource'
        else:
            family = 'Other'

        if family not in family_stats:
            family_stats[family] = {'correct': 0, 'total': 0}
        family_stats[family]['total'] += 1
        if correct:
            family_stats[family]['correct'] += 1

    for family, stats in family_stats.items():
        acc_pct = (stats['correct'] / stats['total']) * 100
        print(f"{family:<15s}: {acc_pct:>5.1f}% ({stats['correct']:>2d}/{stats['total']:>2d})")

    # Model performance analysis
    print(f"\n📊 MODEL PERFORMANCE:")
    print("-" * 30)
    print(f"Models loaded: {models_loaded}/2")
    print(f"VoxLingua107: {'✅ Active' if voxlingua_model else '❌ Failed'}")
    print(f"XLS-R:        {'✅ Active' if xlsr_lid_model else '❌ Failed'}")

    # Error analysis
    errors = valid_df[valid_df["gt_iso"] != valid_df["pred_iso"]]
    if len(errors) > 0:
        print(f"\n❌ MISCLASSIFICATION ANALYSIS ({len(errors)} errors):")
        print("-" * 50)

        # Group errors by actual language
        for actual_lang in sorted(errors["gt_iso"].unique()):
            lang_errors = errors[errors["gt_iso"] == actual_lang]
            predicted_langs = lang_errors["pred_iso"].value_counts()

            print(f"{actual_lang} ({LANG_NAMES.get(actual_lang, actual_lang)}):")
            for pred_lang, count in predicted_langs.head(3).items():
                print(f"  → {pred_lang} ({count} files)")

    # Summary statistics
    print(f"\n📈 SUMMARY STATISTICS:")
    print("-" * 25)
    print(f"Total files processed: {len(df)}")
    print(f"Files with valid GT:   {len(valid_df)}")
    print(f"Languages detected:    {len(valid_df['pred_iso'].unique())}")
    print(f"Languages in dataset:  {len(valid_df['gt_iso'].unique())}")
    print(f"Perfect accuracy:      {len([l for l in lang_stats if l['accuracy'] == 1.0])}")
    print(f"Above 90% accuracy:    {len([l for l in lang_stats if l['accuracy'] >= 0.9])}")
    print(f"Below 50% accuracy:    {len([l for l in lang_stats if l['accuracy'] < 0.5])}")

    return valid_df, lang_stats

# Run comprehensive analysis
if 'analysis_results' in globals() and analysis_results:
    final_df, language_statistics = generate_comprehensive_analysis(analysis_results)

    # Save results to CSV
    if 'final_df' in locals():
        timestamp = pd.Timestamp.now().strftime("%Y%m%d_%H%M%S")
        csv_filename = f"language_detection_results_{timestamp}.csv"
        final_df.to_csv(csv_filename, index=False)
        print(f"\n💾 Results saved to: {csv_filename}")

        # Download file
        try:
            from google.colab import files
            print("📥 File downloaded successfully")
        except:
            print("📁 File saved locally (download failed)")
else:
    print("❌ No analysis results available. Please run the previous cells first.")

print(f"\n✅ COMPLETE LANGUAGE DETECTION ANALYSIS FINISHED!")


# ==============================================================================
# Independent Model Analysis with Top-5 and Real Confidence Scores
# ==============================================================================

def analyze_models_independently(audio_files):
    """Analyze each model independently with Top-5 predictions and real confidence scores"""

    print("🔍 INDEPENDENT MODEL ANALYSIS")
    print("=" * 60)

    results = {
        'voxlingua': [],
        'xlsr': [],
        'combined_analysis': []
    }

    for i, audio_path in enumerate(audio_files, 1):
        print(f"\n[{i}/{len(audio_files)}] Analyzing: {os.path.basename(audio_path)}")

        # Extract ground truth
        gt_iso = gt_from_filename(audio_path)
        print(f"   Ground Truth: {gt_iso or 'Unknown'}")

        file_result = {
            'file': os.path.basename(audio_path),
            'gt_iso': gt_iso or '',
            'voxlingua_results': {},
            'xlsr_results': {}
        }

        # ========================================
        # VoxLingua107 Independent Analysis
        # ========================================
        if voxlingua_model is not None:
            try:
                print(f"   🔬 VoxLingua107 Analysis:")
                out = voxlingua_model.classify_file(audio_path)

                # Extract Top-5 predictions with real confidence scores
                logits, log_conf, pred_idx, labels = out

                # Get top 5 predictions
                top5_indices = torch.topk(logits.squeeze(), 5).indices
                top5_probs = torch.softmax(logits.squeeze(), dim=0)

                vox_top5 = []
                for idx in top5_indices:
                    lang_label = labels[idx.item()] if idx.item() < len(labels) else f"idx_{idx.item()}"
                    prob = top5_probs[idx.item()].item()

                    # Extract language code
                    if isinstance(lang_label, str):
                        colon_pos = lang_label.find(":")
                        lang_code = lang_label[:colon_pos].strip() if colon_pos != -1 else lang_label.strip()
                    else:
                        lang_code = str(lang_label)

                    # Map to dataset codes
                    mapped_lang = map_to_dataset_language(lang_code)

                    vox_top5.append({
                        'rank': len(vox_top5) + 1,
                        'original_code': lang_code,
                        'mapped_code': mapped_lang,
                        'confidence': prob,
                        'in_dataset': mapped_lang in ALL_SUPPORTED_LANGS
                    })

                    print(f"      Rank {len(vox_top5)}: {lang_code} → {mapped_lang} ({prob:.4f}) {'✅' if mapped_lang in ALL_SUPPORTED_LANGS else '❌'}")

                # Store VoxLingua results
                file_result['voxlingua_results'] = {
                    'top5': vox_top5,
                    'top1_original': vox_top5[0]['original_code'],
                    'top1_mapped': vox_top5[0]['mapped_code'],
                    'top1_confidence': vox_top5[0]['confidence'],
                    'correct_in_top1': gt_iso == vox_top5[0]['mapped_code'] if gt_iso else None,
                    'correct_in_top5': any(pred['mapped_code'] == gt_iso for pred in vox_top5) if gt_iso else None
                }

                results['voxlingua'].append({
                    'file': os.path.basename(audio_path),
                    'gt_iso': gt_iso or '',
                    'pred_iso': vox_top5[0]['mapped_code'],
                    'confidence': vox_top5[0]['confidence'],
                    'correct': gt_iso == vox_top5[0]['mapped_code'] if gt_iso else None,
                    'top5_predictions': [p['mapped_code'] for p in vox_top5]
                })

            except Exception as e:
                print(f"      ❌ VoxLingua107 error: {e}")
                file_result['voxlingua_results'] = {'error': str(e)}

        # ========================================
        # XLS-R Independent Analysis
        # ========================================
        if xlsr_lid_model is not None:
            try:
                print(f"   🔬 XLS-R Analysis:")
                out = xlsr_lid_model.classify_file(audio_path)

                # Parse XLS-R output for Top-5
                out_prob, score, index, text_lab = out

                # Get top 5 predictions
                top5_indices = torch.topk(out_prob.squeeze(), 5).indices
                top5_probs = torch.softmax(out_prob.squeeze(), dim=0)

                xlsr_top5 = []
                for idx in top5_indices:
                    lang_label = text_lab[idx.item()] if idx.item() < len(text_lab) else f"idx_{idx.item()}"
                    prob = top5_probs[idx.item()].item()

                    lang_code = str(lang_label).strip().lower()
                    mapped_lang = map_to_dataset_language(lang_code)

                    xlsr_top5.append({
                        'rank': len(xlsr_top5) + 1,
                        'original_code': lang_code,
                        'mapped_code': mapped_lang,
                        'confidence': prob,
                        'in_dataset': mapped_lang in ALL_SUPPORTED_LANGS
                    })

                    print(f"      Rank {len(xlsr_top5)}: {lang_code} → {mapped_lang} ({prob:.4f}) {'✅' if mapped_lang in ALL_SUPPORTED_LANGS else '❌'}")

                # Store XLS-R results
                file_result['xlsr_results'] = {
                    'top5': xlsr_top5,
                    'top1_original': xlsr_top5[0]['original_code'],
                    'top1_mapped': xlsr_top5[0]['mapped_code'],
                    'top1_confidence': xlsr_top5[0]['confidence'],
                    'correct_in_top1': gt_iso == xlsr_top5[0]['mapped_code'] if gt_iso else None,
                    'correct_in_top5': any(pred['mapped_code'] == gt_iso for pred in xlsr_top5) if gt_iso else None
                }

                results['xlsr'].append({
                    'file': os.path.basename(audio_path),
                    'gt_iso': gt_iso or '',
                    'pred_iso': xlsr_top5[0]['mapped_code'],
                    'confidence': xlsr_top5[0]['confidence'],
                    'correct': gt_iso == xlsr_top5[0]['mapped_code'] if gt_iso else None,
                    'top5_predictions': [p['mapped_code'] for p in xlsr_top5]
                })

            except Exception as e:
                print(f"      ❌ XLS-R error: {e}")
                file_result['xlsr_results'] = {'error': str(e)}

        results['combined_analysis'].append(file_result)

        print(f"   ✅ Analysis complete for {os.path.basename(audio_path)}")

    return results

def generate_independent_model_report(results):
    """Generate comprehensive independent model analysis report"""

    print(f"\n📊 INDEPENDENT MODEL PERFORMANCE ANALYSIS")
    print("=" * 70)

    # VoxLingua107 Analysis
    if results['voxlingua']:
        vox_df = pd.DataFrame(results['voxlingua'])
        valid_vox = vox_df[vox_df['gt_iso'] != ''].copy()

        if len(valid_vox) > 0:
            vox_acc = accuracy_score(valid_vox['gt_iso'], valid_vox['pred_iso'])
            vox_conf_avg = valid_vox['confidence'].mean()
            vox_conf_std = valid_vox['confidence'].std()

            print(f"\n🔬 VoxLingua107 INDEPENDENT ANALYSIS:")
            print(f"   Files analyzed: {len(valid_vox)}")
            print(f"   Top-1 Accuracy: {vox_acc:.4f} ({vox_acc*100:.1f}%)")
            print(f"   Avg Confidence: {vox_conf_avg:.4f} ± {vox_conf_std:.4f}")

            # Per-language accuracy for VoxLingua
            print(f"   Per-language performance:")
            vox_per_lang = valid_vox.groupby('gt_iso').agg({
                'correct': 'mean',
                'confidence': ['mean', 'count']
            }).round(4)
            vox_per_lang.columns = ['accuracy', 'avg_conf', 'count']

            for lang, row in vox_per_lang.iterrows():
                print(f"      {lang}: {row['accuracy']:.3f} ({row['accuracy']*100:.1f}%) - {row['avg_conf']:.3f} conf - {int(row['count'])} files")

    # XLS-R Analysis
    if results['xlsr']:
        xlsr_df = pd.DataFrame(results['xlsr'])
        valid_xlsr = xlsr_df[xlsr_df['gt_iso'] != ''].copy()

        if len(valid_xlsr) > 0:
            xlsr_acc = accuracy_score(valid_xlsr['gt_iso'], valid_xlsr['pred_iso'])
            xlsr_conf_avg = valid_xlsr['confidence'].mean()
            xlsr_conf_std = valid_xlsr['confidence'].std()

            print(f"\n🔬 XLS-R INDEPENDENT ANALYSIS:")
            print(f"   Files analyzed: {len(valid_xlsr)}")
            print(f"   Top-1 Accuracy: {xlsr_acc:.4f} ({xlsr_acc*100:.1f}%)")
            print(f"   Avg Confidence: {xlsr_conf_avg:.4f} ± {xlsr_conf_std:.4f}")

            # Per-language accuracy for XLS-R
            print(f"   Per-language performance:")
            xlsr_per_lang = valid_xlsr.groupby('gt_iso').agg({
                'correct': 'mean',
                'confidence': ['mean', 'count']
            }).round(4)
            xlsr_per_lang.columns = ['accuracy', 'avg_conf', 'count']

            for lang, row in xlsr_per_lang.iterrows():
                print(f"      {lang}: {row['accuracy']:.3f} ({row['accuracy']*100:.1f}%) - {row['avg_conf']:.3f} conf - {int(row['count'])} files")

    # Model Comparison
    if results['voxlingua'] and results['xlsr']:
        print(f"\n⚖️ MODEL COMPARISON:")
        print(f"   VoxLingua107 vs XLS-R:")
        print(f"      Accuracy: {vox_acc:.4f} vs {xlsr_acc:.4f} ({'VoxLingua wins' if vox_acc > xlsr_acc else 'XLS-R wins' if xlsr_acc > vox_acc else 'Tie'})")
        print(f"      Avg Confidence: {vox_conf_avg:.4f} vs {xlsr_conf_avg:.4f}")

        # Suggest optimal weights
        total_perf = vox_acc + xlsr_acc
        vox_weight = vox_acc / total_perf if total_perf > 0 else 0.5
        xlsr_weight = xlsr_acc / total_perf if total_perf > 0 else 0.5

        print(f"\n💡 SUGGESTED OPTIMAL WEIGHTS:")
        print(f"   VoxLingua107: {vox_weight:.2f} ({vox_weight*100:.0f}%)")
        print(f"   XLS-R:        {xlsr_weight:.2f} ({xlsr_weight*100:.0f}%)")

    return results

# Run independent analysis
if 'test_files' in globals() and test_files:
    independent_results = analyze_models_independently(test_files[:10])  # Limit to first 10 for testing
    final_report = generate_independent_model_report(independent_results)
else:
    print("❌ No test files available. Run the previous cells first.")


# ==============================================================================
# Analyze Already Downloaded Files in /content/drive_dataset/
# ==============================================================================

def scan_downloaded_files():
    """Scan and collect already downloaded audio files"""

    download_dir = "/content/drive_dataset"

    if not os.path.exists(download_dir):
        print("❌ Download directory not found")
        return []

    print(f"🔍 Scanning {download_dir} for audio files...")

    # Valid audio extensions
    VALID_EXTS = {".wav", ".mp3", ".flac", ".m4a", ".ogg"}

    def is_audio(filepath):
        return os.path.splitext(filepath)[1].lower() in VALID_EXTS

    # Collect all audio files
    audio_files = []
    lang_counts = {}

    for root, dirs, files in os.walk(download_dir):
        for file in files:
            if is_audio(file):
                full_path = os.path.join(root, file)
                audio_files.append(full_path)

                # Extract language from folder structure
                path_parts = root.split('/')
                for part in path_parts:
                    if len(part) in [2, 3] and part.isalpha():
                        lang_counts[part] = lang_counts.get(part, 0) + 1
                        break

    print(f"📊 Found {len(audio_files)} audio files:")
    for lang, count in sorted(lang_counts.items()):
        print(f"   {lang}: {count} files")

    # Show sample files
    print(f"\n📝 Sample files:")
    for file_path in audio_files[:5]:
        print(f"   {file_path}")

    return audio_files

# Scan for downloaded files
downloaded_files = scan_downloaded_files()

if not downloaded_files:
    print("❌ No audio files found. Let me help you collect them manually.")

    # Manual file collection if scan fails
    print("\n🔍 Manual file search...")
    import glob

    # Search patterns for common locations
    search_patterns = [
        "/content/drive_dataset/**/*.flac",
        "/content/drive_dataset/**/*.wav",
        "/content/drive_dataset/**/*.mp3",
        "/content/**/*.flac",
        "/content/**/*.wav",
        "/content/**/*.mp3"
    ]

    manual_files = []
    for pattern in search_patterns:
        found = glob.glob(pattern, recursive=True)
        manual_files.extend(found)

    # Remove duplicates
    manual_files = list(set(manual_files))

    print(f"📊 Manual search found: {len(manual_files)} files")
    for file_path in manual_files[:10]:  # Show first 10
        print(f"   {file_path}")

    downloaded_files = manual_files

print(f"\n🎯 Total files ready for analysis: {len(downloaded_files)}")


# ==============================================================================
# Run Independent Analysis on Downloaded Files
# ==============================================================================

def analyze_downloaded_files_independently(audio_files):
    """Run independent model analysis on downloaded files with detailed output"""

    if not audio_files:
        print("❌ No audio files to analyze")
        return None

    print(f"🚀 Starting independent analysis on {len(audio_files)} files...")
    print("=" * 70)

    results = {
        'voxlingua_detailed': [],
        'xlsr_detailed': [],
        'comparison_data': []
    }

    for i, audio_path in enumerate(audio_files, 1):
        print(f"\n[{i}/{len(audio_files)}] 🎵 {os.path.basename(audio_path)}")

        # Extract ground truth from path/filename
        gt_iso = gt_from_filename(audio_path)
        print(f"   📁 Ground Truth: {gt_iso or 'Unknown'}")

        file_analysis = {
            'file': os.path.basename(audio_path),
            'full_path': audio_path,
            'gt_iso': gt_iso or '',
            'voxlingua': {'available': False},
            'xlsr': {'available': False}
        }

        # ==========================================
        # VoxLingua107 Independent Analysis
        # ==========================================
        if voxlingua_model is not None:
            try:
                print(f"   🔬 VoxLingua107 Analysis:")
                out = voxlingua_model.classify_file(audio_path)
                logits, log_conf, pred_idx, labels = out

                # Get real confidence scores (not weighted)
                probs = torch.softmax(logits.squeeze(), dim=0)
                top5_indices = torch.topk(probs, min(5, len(probs))).indices

                vox_predictions = []
                for rank, idx in enumerate(top5_indices, 1):
                    lang_label = labels[idx.item()]
                    confidence = probs[idx.item()].item()

                    # Parse language code
                    if isinstance(lang_label, str):
                        colon_pos = lang_label.find(":")
                        lang_code = lang_label[:colon_pos].strip() if colon_pos != -1 else lang_label.strip()
                    else:
                        lang_code = str(lang_label)

                    # Map to dataset language
                    mapped_lang = map_to_dataset_language(lang_code)

                    vox_predictions.append({
                        'rank': rank,
                        'original': lang_code,
                        'mapped': mapped_lang,
                        'confidence': confidence,
                        'in_dataset': mapped_lang in ALL_SUPPORTED_LANGS
                    })

                    status = "✅" if mapped_lang in ALL_SUPPORTED_LANGS else "❌"
                    print(f"      #{rank}: {lang_code} → {mapped_lang} ({confidence:.4f}) {status}")

                # Store VoxLingua results
                top1 = vox_predictions[0]
                file_analysis['voxlingua'] = {
                    'available': True,
                    'top5_predictions': vox_predictions,
                    'top1_prediction': top1['mapped'],
                    'top1_confidence': top1['confidence'],
                    'correct_top1': gt_iso == top1['mapped'] if gt_iso else None,
                    'correct_in_top5': any(p['mapped'] == gt_iso for p in vox_predictions) if gt_iso else None
                }

                results['voxlingua_detailed'].append({
                    'file': os.path.basename(audio_path),
                    'gt_iso': gt_iso or '',
                    'pred_iso': top1['mapped'],
                    'confidence': top1['confidence'],
                    'correct': gt_iso == top1['mapped'] if gt_iso else None
                })

            except Exception as e:
                print(f"      ❌ VoxLingua107 error: {e}")
                file_analysis['voxlingua'] = {'available': False, 'error': str(e)}

        # ==========================================
        # XLS-R Independent Analysis
        # ==========================================
        if xlsr_lid_model is not None:
            try:
                print(f"   🔬 XLS-R Analysis:")
                out = xlsr_lid_model.classify_file(audio_path)
                out_prob, score, index, text_lab = out

                # Get real confidence scores
                probs = torch.softmax(out_prob.squeeze(), dim=0)
                top5_indices = torch.topk(probs, min(5, len(probs))).indices

                xlsr_predictions = []
                for rank, idx in enumerate(top5_indices, 1):
                    lang_label = text_lab[idx.item()]
                    confidence = probs[idx.item()].item()

                    lang_code = str(lang_label).strip().lower()
                    mapped_lang = map_to_dataset_language(lang_code)

                    xlsr_predictions.append({
                        'rank': rank,
                        'original': lang_code,
                        'mapped': mapped_lang,
                        'confidence': confidence,
                        'in_dataset': mapped_lang in ALL_SUPPORTED_LANGS
                    })

                    status = "✅" if mapped_lang in ALL_SUPPORTED_LANGS else "❌"
                    print(f"      #{rank}: {lang_code} → {mapped_lang} ({confidence:.4f}) {status}")

                # Store XLS-R results
                top1 = xlsr_predictions[0]
                file_analysis['xlsr'] = {
                    'available': True,
                    'top5_predictions': xlsr_predictions,
                    'top1_prediction': top1['mapped'],
                    'top1_confidence': top1['confidence'],
                    'correct_top1': gt_iso == top1['mapped'] if gt_iso else None,
                    'correct_in_top5': any(p['mapped'] == gt_iso for p in xlsr_predictions) if gt_iso else None
                }

                results['xlsr_detailed'].append({
                    'file': os.path.basename(audio_path),
                    'gt_iso': gt_iso or '',
                    'pred_iso': top1['mapped'],
                    'confidence': top1['confidence'],
                    'correct': gt_iso == top1['mapped'] if gt_iso else None
                })

            except Exception as e:
                print(f"      ❌ XLS-R error: {e}")
                file_analysis['xlsr'] = {'available': False, 'error': str(e)}

        results['comparison_data'].append(file_analysis)
        print(f"   ✅ Analysis complete\n")

    return results

# Run the independent analysis
if downloaded_files:
    print("🔬 Running independent model analysis...")
    analysis_results = analyze_downloaded_files_independently(downloaded_files)
else:
    print("❌ No files found for analysis")
    analysis_results = None


# ==============================================================================
# FIXED: Robust VoxLingua107 Analysis with Better Error Handling
# ==============================================================================

def parse_voxlingua_output_robust(out):
    """Robust parsing of VoxLingua107 output with multiple fallback methods"""

    try:
        # Method 1: Standard SpeechBrain output format
        if isinstance(out, (tuple, list)) and len(out) >= 4:
            logits, log_conf, pred_idx, labels = out[:4]

            # Validate components
            if hasattr(logits, 'squeeze') and hasattr(labels, '__getitem__'):
                return logits, log_conf, pred_idx, labels, "standard"

        # Method 2: Alternative format (sometimes returns dict)
        if isinstance(out, dict):
            logits = out.get('predictions', out.get('logits'))
            labels = out.get('labels', out.get('text_lab'))
            log_conf = out.get('log_probabilities', out.get('log_conf'))
            pred_idx = out.get('predicted_ids', out.get('pred_idx'))

            if all(v is not None for v in [logits, labels]):
                return logits, log_conf, pred_idx, labels, "dict"

        # Method 3: Direct tensor output
        if hasattr(out, 'squeeze'):  # Direct logits tensor
            logits = out
            # Create dummy labels based on logits size
            labels = [f"lang_{i}" for i in range(logits.shape[-1])]
            log_conf = torch.log_softmax(logits, dim=-1).max()
            pred_idx = torch.argmax(logits, dim=-1)

            return logits, log_conf, pred_idx, labels, "tensor"

    except Exception as e:
        print(f"        Parse error: {e}")

    return None, None, None, None, "failed"

def analyze_voxlingua_robust(audio_path):
    """Robust VoxLingua107 analysis with multiple parsing methods"""

    if voxlingua_model is None:
        return None

    try:
        # Get raw output from model
        raw_out = voxlingua_model.classify_file(audio_path)

        # Parse with robust method
        logits, log_conf, pred_idx, labels, parse_method = parse_voxlingua_output_robust(raw_out)

        if logits is None:
            print(f"        ❌ Could not parse VoxLingua output format")
            return None

        print(f"        📊 Parse method: {parse_method}")

        # Get predictions based on available data
        if hasattr(logits, 'squeeze'):
            probs = torch.softmax(logits.squeeze(), dim=-1 if len(logits.squeeze().shape) > 0 else 0)

            # Handle different tensor shapes
            if len(probs.shape) == 0:  # Scalar
                top_indices = torch.tensor([0])
                top_probs = probs.unsqueeze(0)
            else:  # Vector
                k = min(5, len(probs))
                top_probs, top_indices = torch.topk(probs, k)
        else:
            print(f"        ❌ Logits not in expected tensor format")
            return None

        predictions = []
        for rank, (idx, prob) in enumerate(zip(top_indices, top_probs), 1):
            idx_val = idx.item() if hasattr(idx, 'item') else int(idx)
            prob_val = prob.item() if hasattr(prob, 'item') else float(prob)

            # Get language label safely
            if idx_val < len(labels):
                lang_label = labels[idx_val]
            else:
                lang_label = f"unknown_{idx_val}"

            # Parse language code
            if isinstance(lang_label, str):
                colon_pos = lang_label.find(":")
                lang_code = lang_label[:colon_pos].strip() if colon_pos != -1 else lang_label.strip()
            else:
                lang_code = str(lang_label)

            # Map to dataset language
            mapped_lang = map_to_dataset_language(lang_code)

            predictions.append({
                'rank': rank,
                'original': lang_code,
                'mapped': mapped_lang,
                'confidence': prob_val,
                'in_dataset': mapped_lang in ALL_SUPPORTED_LANGS
            })

            status = "✅" if mapped_lang in ALL_SUPPORTED_LANGS else "❌"
            print(f"        #{rank}: {lang_code} → {mapped_lang} ({prob_val:.4f}) {status}")

        return predictions

    except Exception as e:
        print(f"        ❌ VoxLingua analysis error: {e}")
        print(f"        ❌ Error type: {type(e).__name__}")
        return None

def analyze_xlsr_robust(audio_path):
    """Robust XLS-R analysis"""

    if xlsr_lid_model is None:
        return None

    try:
        raw_out = xlsr_lid_model.classify_file(audio_path)

        # Handle different XLS-R output formats
        if isinstance(raw_out, (tuple, list)) and len(raw_out) >= 4:
            out_prob, score, index, text_lab = raw_out[:4]
        else:
            print(f"        ❌ XLS-R output format not recognized")
            return None

        # Get top predictions
        if hasattr(out_prob, 'squeeze'):
            probs = torch.softmax(out_prob.squeeze(), dim=-1 if len(out_prob.squeeze().shape) > 0 else 0)

            if len(probs.shape) == 0:  # Scalar
                top_indices = torch.tensor([0])
                top_probs = probs.unsqueeze(0)
            else:  # Vector
                k = min(5, len(probs))
                top_probs, top_indices = torch.topk(probs, k)
        else:
            print(f"        ❌ XLS-R probabilities not in expected format")
            return None

        predictions = []
        for rank, (idx, prob) in enumerate(zip(top_indices, top_probs), 1):
            idx_val = idx.item() if hasattr(idx, 'item') else int(idx)
            prob_val = prob.item() if hasattr(prob, 'item') else float(prob)

            # Get language label
            if idx_val < len(text_lab):
                lang_label = text_lab[idx_val]
            else:
                lang_label = f"unknown_{idx_val}"

            lang_code = str(lang_label).strip().lower()
            mapped_lang = map_to_dataset_language(lang_code)

            predictions.append({
                'rank': rank,
                'original': lang_code,
                'mapped': mapped_lang,
                'confidence': prob_val,
                'in_dataset': mapped_lang in ALL_SUPPORTED_LANGS
            })

            status = "✅" if mapped_lang in ALL_SUPPORTED_LANGS else "❌"
            print(f"        #{rank}: {lang_code} → {mapped_lang} ({prob_val:.4f}) {status}")

        return predictions

    except Exception as e:
        print(f"        ❌ XLS-R analysis error: {e}")
        return None

# ==============================================================================
# UPDATED: Robust Analysis Function
# ==============================================================================

def analyze_downloaded_files_robust(audio_files):
    """Robust analysis with better error handling"""

    if not audio_files:
        print("❌ No audio files to analyze")
        return None

    print(f"🚀 Starting ROBUST analysis on {len(audio_files)} files...")
    print("=" * 70)

    results = {
        'voxlingua_detailed': [],
        'xlsr_detailed': [],
        'comparison_data': []
    }

    for i, audio_path in enumerate(audio_files, 1):
        print(f"\n[{i}/{len(audio_files)}] 🎵 {os.path.basename(audio_path)}")

        # Extract ground truth
        gt_iso = gt_from_filename(audio_path)
        print(f"   📁 Ground Truth: {gt_iso or 'Unknown'}")

        file_analysis = {
            'file': os.path.basename(audio_path),
            'full_path': audio_path,
            'gt_iso': gt_iso or '',
            'voxlingua': {'available': False},
            'xlsr': {'available': False}
        }

        # VoxLingua107 Analysis
        print(f"   🔬 VoxLingua107 Analysis:")
        vox_predictions = analyze_voxlingua_robust(audio_path)

        if vox_predictions:
            top1 = vox_predictions[0]
            file_analysis['voxlingua'] = {
                'available': True,
                'top5_predictions': vox_predictions,
                'top1_prediction': top1['mapped'],
                'top1_confidence': top1['confidence'],
                'correct_top1': gt_iso == top1['mapped'] if gt_iso else None,
                'correct_in_top5': any(p['mapped'] == gt_iso for p in vox_predictions) if gt_iso else None
            }

            results['voxlingua_detailed'].append({
                'file': os.path.basename(audio_path),
                'gt_iso': gt_iso or '',
                'pred_iso': top1['mapped'],
                'confidence': top1['confidence'],
                'correct': gt_iso == top1['mapped'] if gt_iso else None
            })
        else:
            file_analysis['voxlingua'] = {'available': False, 'error': 'Analysis failed'}

        # XLS-R Analysis
        print(f"   🔬 XLS-R Analysis:")
        xlsr_predictions = analyze_xlsr_robust(audio_path)

        if xlsr_predictions:
            top1 = xlsr_predictions[0]
            file_analysis['xlsr'] = {
                'available': True,
                'top5_predictions': xlsr_predictions,
                'top1_prediction': top1['mapped'],
                'top1_confidence': top1['confidence'],
                'correct_top1': gt_iso == top1['mapped'] if gt_iso else None,
                'correct_in_top5': any(p['mapped'] == gt_iso for p in xlsr_predictions) if gt_iso else None
            }

            results['xlsr_detailed'].append({
                'file': os.path.basename(audio_path),
                'gt_iso': gt_iso or '',
                'pred_iso': top1['mapped'],
                'confidence': top1['confidence'],
                'correct': gt_iso == top1['mapped'] if gt_iso else None
            })
        else:
            file_analysis['xlsr'] = {'available': False, 'error': 'Analysis failed'}

        results['comparison_data'].append(file_analysis)
        print(f"   ✅ Analysis complete")

    return results

# Run the robust analysis
if 'downloaded_files' in globals() and downloaded_files:
    print("🔬 Running ROBUST independent model analysis...")
    robust_analysis_results = analyze_downloaded_files_robust(downloaded_files)

    # Generate report
    if robust_analysis_results:
        generate_detailed_performance_report(robust_analysis_results)
        print(f"\n✅ ROBUST ANALYSIS COMPLETE!")
    else:
        print("❌ Robust analysis failed")
else:
    print("❌ No downloaded files found. Please run the file scanning code first.")


# ==============================================================================
# COMPLETE FIX: VoxLingua Label Mapping + Missing Function
# ==============================================================================

# First, let's create a proper VoxLingua language mapping
VOXLINGUA_LANGUAGE_MAP = {
    0: 'ab', 1: 'af', 2: 'ak', 3: 'am', 4: 'ar', 5: 'as', 6: 'az', 7: 'be', 8: 'bg', 9: 'bn',
    10: 'bo', 11: 'br', 12: 'bs', 13: 'ca', 14: 'ce', 15: 'co', 16: 'cs', 17: 'cv', 18: 'cy', 19: 'da',
    20: 'de', 21: 'dv', 22: 'dz', 23: 'ee', 24: 'el', 25: 'en', 26: 'eo', 27: 'es', 28: 'et', 29: 'eu',
    30: 'fa', 31: 'ff', 32: 'fi', 33: 'fo', 34: 'fr', 35: 'fy', 36: 'ga', 37: 'gd', 38: 'gl', 39: 'gn',
    40: 'gu', 41: 'gv', 42: 'ha', 43: 'haw', 44: 'he', 45: 'hi', 46: 'hr', 47: 'ht', 48: 'hu', 49: 'hy',
    50: 'ia', 51: 'id', 52: 'ie', 53: 'ig', 54: 'ii', 55: 'ik', 56: 'io', 57: 'is', 58: 'it', 59: 'iu',
    60: 'ja', 61: 'jv', 62: 'ka', 63: 'kk', 64: 'kl', 65: 'km', 66: 'kn', 67: 'ko', 68: 'ks', 69: 'ku',
    70: 'kw', 71: 'ky', 72: 'la', 73: 'lb', 74: 'lg', 75: 'li', 76: 'ln', 77: 'lo', 78: 'lt', 79: 'lv',
    80: 'mg', 81: 'mi', 82: 'mk', 83: 'ml', 84: 'mn', 85: 'mr', 86: 'ms', 87: 'mt', 88: 'my', 89: 'na',
    90: 'nb', 91: 'nd', 92: 'ne', 93: 'ng', 94: 'nl', 95: 'nn', 96: 'no', 97: 'nv', 98: 'ny', 99: 'oc',
    100: 'of', 101: 'om', 102: 'or', 103: 'os', 104: 'pa', 105: 'pi', 106: 'pl', 107: 'ps'
}

def get_voxlingua_language_by_index(idx):
    """Map VoxLingua index to language code"""
    return VOXLINGUA_LANGUAGE_MAP.get(idx, f'unknown_{idx}')

def analyze_voxlingua_fixed(audio_path):
    """Fixed VoxLingua107 analysis with proper language mapping"""

    if voxlingua_model is None:
        return None

    try:
        raw_out = voxlingua_model.classify_file(audio_path)

        if not isinstance(raw_out, (tuple, list)) or len(raw_out) < 4:
            print(f"        ❌ Unexpected VoxLingua output format")
            return None

        logits, log_conf, pred_idx, labels = raw_out[:4]

        # Get probabilities and top 5
        probs = torch.softmax(logits.squeeze(), dim=-1)
        k = min(5, len(probs))
        top_probs, top_indices = torch.topk(probs, k)

        predictions = []
        for rank, (idx, prob) in enumerate(zip(top_indices, top_probs), 1):
            idx_val = idx.item() if hasattr(idx, 'item') else int(idx)
            prob_val = prob.item() if hasattr(prob, 'item') else float(prob)

            # Method 1: Try to use provided labels
            if idx_val < len(labels) and not str(labels[idx_val]).startswith('unknown'):
                lang_label = labels[idx_val]
                if isinstance(lang_label, str):
                    colon_pos = lang_label.find(":")
                    lang_code = lang_label[:colon_pos].strip() if colon_pos != -1 else lang_label.strip()
                else:
                    lang_code = str(lang_label)
            else:
                # Method 2: Use our language mapping
                lang_code = get_voxlingua_language_by_index(idx_val)

            # Map to dataset language
            mapped_lang = map_to_dataset_language(lang_code)

            predictions.append({
                'rank': rank,
                'original': lang_code,
                'mapped': mapped_lang,
                'confidence': prob_val,
                'in_dataset': mapped_lang in ALL_SUPPORTED_LANGS,
                'index': idx_val
            })

            status = "✅" if mapped_lang in ALL_SUPPORTED_LANGS else "❌"
            print(f"        #{rank}: {lang_code} → {mapped_lang} ({prob_val:.4f}) {status} [idx:{idx_val}]")

        return predictions

    except Exception as e:
        print(f"        ❌ VoxLingua analysis error: {e}")
        return None

def analyze_xlsr_fixed(audio_path):
    """Fixed XLS-R analysis"""

    if xlsr_lid_model is None:
        print(f"        ❌ XLS-R model not loaded")
        return None

    try:
        raw_out = xlsr_lid_model.classify_file(audio_path)

        if not isinstance(raw_out, (tuple, list)) or len(raw_out) < 4:
            print(f"        ❌ Unexpected XLS-R output format")
            return None

        out_prob, score, index, text_lab = raw_out[:4]

        # Get probabilities and top 5
        probs = torch.softmax(out_prob.squeeze(), dim=-1)
        k = min(5, len(probs))
        top_probs, top_indices = torch.topk(probs, k)

        predictions = []
        for rank, (idx, prob) in enumerate(zip(top_indices, top_probs), 1):
            idx_val = idx.item() if hasattr(idx, 'item') else int(idx)
            prob_val = prob.item() if hasattr(prob, 'item') else float(prob)

            # Get language label
            if idx_val < len(text_lab):
                lang_label = text_lab[idx_val]
                lang_code = str(lang_label).strip().lower()
            else:
                lang_code = f"xlsr_unknown_{idx_val}"

            mapped_lang = map_to_dataset_language(lang_code)

            predictions.append({
                'rank': rank,
                'original': lang_code,
                'mapped': mapped_lang,
                'confidence': prob_val,
                'in_dataset': mapped_lang in ALL_SUPPORTED_LANGS
            })

            status = "✅" if mapped_lang in ALL_SUPPORTED_LANGS else "❌"
            print(f"        #{rank}: {lang_code} → {mapped_lang} ({prob_val:.4f}) {status}")

        return predictions

    except Exception as e:
        print(f"        ❌ XLS-R analysis error: {e}")
        return None

def generate_detailed_performance_report(results):
    """Complete performance analysis report function"""

    if not results:
        print("❌ No results to analyze")
        return

    print("\n📊 DETAILED INDEPENDENT MODEL PERFORMANCE REPORT")
    print("=" * 70)

    # VoxLingua107 Performance Analysis
    if results['voxlingua_detailed']:
        vox_df = pd.DataFrame(results['voxlingua_detailed'])
        valid_vox = vox_df[vox_df['gt_iso'] != ''].copy()

        print(f"\n🔬 VOXLINGUA107 PERFORMANCE:")
        print("-" * 40)

        if len(valid_vox) > 0:
            vox_acc = (valid_vox['correct'] == True).mean()
            vox_conf_mean = valid_vox['confidence'].mean()
            vox_conf_std = valid_vox['confidence'].std()

            print(f"Files Analyzed: {len(valid_vox)}")
            print(f"Top-1 Accuracy: {vox_acc:.4f} ({vox_acc*100:.1f}%)")
            print(f"Confidence: {vox_conf_mean:.4f} ± {vox_conf_std:.4f}")

            # Per-language breakdown
            print(f"\nPer-Language Performance:")
            for lang in sorted(valid_vox['gt_iso'].unique()):
                lang_data = valid_vox[valid_vox['gt_iso'] == lang]
                acc = (lang_data['correct'] == True).mean()
                conf_mean = lang_data['confidence'].mean()
                count = len(lang_data)
                print(f"  {lang:>3}: {acc:.3f} ({acc*100:5.1f}%) | Conf: {conf_mean:.3f} | n={count}")
        else:
            print("No valid VoxLingua results")

    # XLS-R Performance Analysis
    if results['xlsr_detailed']:
        xlsr_df = pd.DataFrame(results['xlsr_detailed'])
        valid_xlsr = xlsr_df[xlsr_df['gt_iso'] != ''].copy()

        print(f"\n🔬 XLS-R PERFORMANCE:")
        print("-" * 40)

        if len(valid_xlsr) > 0:
            xlsr_acc = (valid_xlsr['correct'] == True).mean()
            xlsr_conf_mean = valid_xlsr['confidence'].mean()
            xlsr_conf_std = valid_xlsr['confidence'].std()

            print(f"Files Analyzed: {len(valid_xlsr)}")
            print(f"Top-1 Accuracy: {xlsr_acc:.4f} ({xlsr_acc*100:.1f}%)")
            print(f"Confidence: {xlsr_conf_mean:.4f} ± {xlsr_conf_std:.4f}")

            # Per-language breakdown
            print(f"\nPer-Language Performance:")
            for lang in sorted(valid_xlsr['gt_iso'].unique()):
                lang_data = valid_xlsr[valid_xlsr['gt_iso'] == lang]
                acc = (lang_data['correct'] == True).mean()
                conf_mean = lang_data['confidence'].mean()
                count = len(lang_data)
                print(f"  {lang:>3}: {acc:.3f} ({acc*100:5.1f}%) | Conf: {conf_mean:.3f} | n={count}")
        else:
            print("No valid XLS-R results")

    # Model Comparison
    if results['voxlingua_detailed'] and results['xlsr_detailed']:
        print(f"\n⚖️ MODEL COMPARISON:")
        print("-" * 30)

        print(f"VoxLingua107: {vox_acc:.4f} accuracy")
        print(f"XLS-R:        {xlsr_acc:.4f} accuracy")

        # Calculate optimal weights
        total_acc = vox_acc + xlsr_acc
        if total_acc > 0:
            vox_weight = vox_acc / total_acc
            xlsr_weight = xlsr_acc / total_acc

            print(f"\n💡 RECOMMENDED WEIGHTS:")
            print(f"VoxLingua107: {vox_weight:.3f} ({vox_weight*100:.1f}%)")
            print(f"XLS-R:        {xlsr_weight:.3f} ({xlsr_weight*100:.1f}%)")

        # Calculate agreement
        vox_preds = set(vox_df['pred_iso'].tolist())
        xlsr_preds = set(xlsr_df['pred_iso'].tolist())
        common_preds = vox_preds.intersection(xlsr_preds)

        print(f"\nModel Agreement Analysis:")
        print(f"Common predictions: {len(common_preds)}")
        print(f"VoxLingua unique: {len(vox_preds - xlsr_preds)}")
        print(f"XLS-R unique: {len(xlsr_preds - vox_preds)}")

    # Save results
    timestamp = pd.Timestamp.now().strftime("%Y%m%d_%H%M%S")

    if results['voxlingua_detailed']:
        vox_csv = f"voxlingua_fixed_results_{timestamp}.csv"
        pd.DataFrame(results['voxlingua_detailed']).to_csv(vox_csv, index=False)
        print(f"\n💾 VoxLingua results: {vox_csv}")

    if results['xlsr_detailed']:
        xlsr_csv = f"xlsr_fixed_results_{timestamp}.csv"
        pd.DataFrame(results['xlsr_detailed']).to_csv(xlsr_csv, index=False)
        print(f"💾 XLS-R results: {xlsr_csv}")

def run_complete_fixed_analysis(audio_files):
    """Run complete analysis with all fixes"""

    if not audio_files:
        print("❌ No audio files to analyze")
        return None

    print(f"🚀 Starting COMPLETE FIXED analysis on {len(audio_files)} files...")
    print("=" * 70)

    results = {
        'voxlingua_detailed': [],
        'xlsr_detailed': [],
        'comparison_data': []
    }

    for i, audio_path in enumerate(audio_files, 1):
        print(f"\n[{i}/{len(audio_files)}] 🎵 {os.path.basename(audio_path)}")

        # Extract ground truth
        gt_iso = gt_from_filename(audio_path)
        print(f"   📁 Ground Truth: {gt_iso or 'Unknown'}")

        file_analysis = {
            'file': os.path.basename(audio_path),
            'full_path': audio_path,
            'gt_iso': gt_iso or '',
            'voxlingua': {'available': False},
            'xlsr': {'available': False}
        }

        # VoxLingua107 Analysis
        print(f"   🔬 VoxLingua107 Analysis:")
        vox_predictions = analyze_voxlingua_fixed(audio_path)

        if vox_predictions and len(vox_predictions) > 0:
            top1 = vox_predictions[0]
            file_analysis['voxlingua'] = {
                'available': True,
                'top5_predictions': vox_predictions,
                'top1_prediction': top1['mapped'],
                'top1_confidence': top1['confidence'],
                'correct_top1': gt_iso == top1['mapped'] if gt_iso else None,
            }

            results['voxlingua_detailed'].append({
                'file': os.path.basename(audio_path),
                'gt_iso': gt_iso or '',
                'pred_iso': top1['mapped'],
                'confidence': top1['confidence'],
                'correct': gt_iso == top1['mapped'] if gt_iso else None
            })

        # XLS-R Analysis
        print(f"   🔬 XLS-R Analysis:")
        xlsr_predictions = analyze_xlsr_fixed(audio_path)

        if xlsr_predictions and len(xlsr_predictions) > 0:
            top1 = xlsr_predictions[0]
            file_analysis['xlsr'] = {
                'available': True,
                'top5_predictions': xlsr_predictions,
                'top1_prediction': top1['mapped'],
                'top1_confidence': top1['confidence'],
                'correct_top1': gt_iso == top1['mapped'] if gt_iso else None,
            }

            results['xlsr_detailed'].append({
                'file': os.path.basename(audio_path),
                'gt_iso': gt_iso or '',
                'pred_iso': top1['mapped'],
                'confidence': top1['confidence'],
                'correct': gt_iso == top1['mapped'] if gt_iso else None
            })

        results['comparison_data'].append(file_analysis)
        print(f"   ✅ Analysis complete")

    return results

# Run the complete fixed analysis
if 'downloaded_files' in globals() and downloaded_files:
    print("🔬 Running COMPLETE FIXED analysis...")
    final_analysis_results = run_complete_fixed_analysis(downloaded_files)

    if final_analysis_results:
        generate_detailed_performance_report(final_analysis_results)
        print(f"\n✅ COMPLETE FIXED ANALYSIS DONE!")
    else:
        print("❌ Analysis failed")
else:
    print("❌ No downloaded files found")


# ==============================================================================
# COMPREHENSIVE EXCEL ANALYSIS WITH ALL DETAILS
# ==============================================================================

import pandas as pd
import numpy as np
from datetime import datetime
import os

def create_comprehensive_excel_analysis(results, output_filename=None):
    """Create comprehensive Excel analysis with multiple sheets and detailed metrics"""

    if not results:
        print("❌ No results to analyze")
        return None

    # Generate filename if not provided
    if not output_filename:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        output_filename = f"Language_Detection_Comprehensive_Analysis_{timestamp}.xlsx"

    print(f"📊 Creating comprehensive Excel analysis: {output_filename}")

    # Create Excel writer
    with pd.ExcelWriter(output_filename, engine='openpyxl') as writer:

        # ========================================
        # SHEET 1: EXECUTIVE SUMMARY
        # ========================================
        print("   📋 Creating Executive Summary...")

        summary_data = []

        # Overall statistics
        total_files = len(results['comparison_data'])
        vox_available = sum(1 for item in results['comparison_data'] if item['voxlingua']['available'])
        xlsr_available = sum(1 for item in results['comparison_data'] if item['xlsr']['available'])

        summary_data.extend([
            ['EXECUTIVE SUMMARY', ''],
            ['Analysis Date', datetime.now().strftime("%Y-%m-%d %H:%M:%S")],
            ['Total Files Analyzed', total_files],
            ['VoxLingua107 Available', f"{vox_available} ({vox_available/total_files*100:.1f}%)"],
            ['XLS-R Available', f"{xlsr_available} ({xlsr_available/total_files*100:.1f}%)"],
            ['', ''],
        ])

        # Model performance summary
        if results['voxlingua_detailed']:
            vox_df = pd.DataFrame(results['voxlingua_detailed'])
            valid_vox = vox_df[vox_df['gt_iso'] != ''].copy()
            if len(valid_vox) > 0:
                vox_acc = (valid_vox['correct'] == True).mean()
                vox_conf = valid_vox['confidence'].mean()
                summary_data.extend([
                    ['VOXLINGUA107 PERFORMANCE', ''],
                    ['Accuracy', f"{vox_acc:.4f} ({vox_acc*100:.1f}%)"],
                    ['Average Confidence', f"{vox_conf:.4f}"],
                    ['Files with Valid GT', len(valid_vox)],
                    ['', ''],
                ])

        if results['xlsr_detailed']:
            xlsr_df = pd.DataFrame(results['xlsr_detailed'])
            valid_xlsr = xlsr_df[xlsr_df['gt_iso'] != ''].copy()
            if len(valid_xlsr) > 0:
                xlsr_acc = (valid_xlsr['correct'] == True).mean()
                xlsr_conf = valid_xlsr['confidence'].mean()
                summary_data.extend([
                    ['XLS-R PERFORMANCE', ''],
                    ['Accuracy', f"{xlsr_acc:.4f} ({xlsr_acc*100:.1f}%)"],
                    ['Average Confidence', f"{xlsr_conf:.4f}"],
                    ['Files with Valid GT', len(valid_xlsr)],
                    ['', ''],
                ])

                # Optimal weights calculation
                if results['voxlingua_detailed']:
                    total_acc = vox_acc + xlsr_acc
                    if total_acc > 0:
                        vox_weight = vox_acc / total_acc
                        xlsr_weight = xlsr_acc / total_acc
                        summary_data.extend([
                            ['RECOMMENDED ENSEMBLE WEIGHTS', ''],
                            ['VoxLingua107 Weight', f"{vox_weight:.3f} ({vox_weight*100:.1f}%)"],
                            ['XLS-R Weight', f"{xlsr_weight:.3f} ({xlsr_weight*100:.1f}%)"],
                        ])

        # Create summary dataframe
        summary_df = pd.DataFrame(summary_data, columns=['Metric', 'Value'])
        summary_df.to_excel(writer, sheet_name='Executive_Summary', index=False)

        # ========================================
        # SHEET 2: VOXLINGUA107 DETAILED RESULTS
        # ========================================
        if results['voxlingua_detailed']:
            print("   📋 Creating VoxLingua107 detailed results...")

            vox_detailed_df = pd.DataFrame(results['voxlingua_detailed'])

            # Add additional analysis columns
            vox_detailed_df['accuracy_score'] = vox_detailed_df['correct'].astype(int)
            vox_detailed_df['confidence_category'] = pd.cut(
                vox_detailed_df['confidence'],
                bins=[0, 0.3, 0.6, 0.8, 1.0],
                labels=['Low', 'Medium', 'High', 'Very High']
            )

            # Add language family information
            def get_language_family(lang):
                if lang in INDO_ARYAN_LANGS:
                    return 'Indo-Aryan'
                elif lang in DRAVIDIAN_LANGS:
                    return 'Dravidian'
                elif lang in LOW_RESOURCE_LANGS:
                    return 'Low-Resource'
                else:
                    return 'Other'

            vox_detailed_df['gt_language_family'] = vox_detailed_df['gt_iso'].apply(get_language_family)
            vox_detailed_df['pred_language_family'] = vox_detailed_df['pred_iso'].apply(get_language_family)

            vox_detailed_df.to_excel(writer, sheet_name='VoxLingua107_Results', index=False)

        # ========================================
        # SHEET 3: XLS-R DETAILED RESULTS
        # ========================================
        if results['xlsr_detailed']:
            print("   📋 Creating XLS-R detailed results...")

            xlsr_detailed_df = pd.DataFrame(results['xlsr_detailed'])

            # Add analysis columns
            xlsr_detailed_df['accuracy_score'] = xlsr_detailed_df['correct'].astype(int)
            xlsr_detailed_df['confidence_category'] = pd.cut(
                xlsr_detailed_df['confidence'],
                bins=[0, 0.3, 0.6, 0.8, 1.0],
                labels=['Low', 'Medium', 'High', 'Very High']
            )
            xlsr_detailed_df['gt_language_family'] = xlsr_detailed_df['gt_iso'].apply(get_language_family)
            xlsr_detailed_df['pred_language_family'] = xlsr_detailed_df['pred_iso'].apply(get_language_family)

            xlsr_detailed_df.to_excel(writer, sheet_name='XLSR_Results', index=False)

        # ========================================
        # SHEET 4: PER-LANGUAGE ACCURACY ANALYSIS
        # ========================================
        print("   📋 Creating per-language accuracy analysis...")

        lang_analysis_data = []

        # Get all unique languages from ground truth
        all_gt_langs = set()
        if results['voxlingua_detailed']:
            all_gt_langs.update([r['gt_iso'] for r in results['voxlingua_detailed'] if r['gt_iso']])
        if results['xlsr_detailed']:
            all_gt_langs.update([r['gt_iso'] for r in results['xlsr_detailed'] if r['gt_iso']])

        # Language name mapping
        LANG_NAMES = {
            'ur': 'Urdu', 'pa': 'Punjabi', 'ta': 'Tamil', 'sd': 'Sindhi', 'or': 'Odia',
            'ml': 'Malayalam', 'ne': 'Nepali', 'as': 'Assamese', 'hi': 'Hindi', 'bn': 'Bengali',
            'kok': 'Konkani', 'kn': 'Kannada', 'ks': 'Kashmiri', 'mr': 'Marathi', 'te': 'Telugu',
            'br': 'Bodo', 'doi': 'Dogri', 'sat': 'Santali', 'gu': 'Gujarati', 'mni': 'Manipuri',
            'sa': 'Sanskrit'
        }

        for lang in sorted(all_gt_langs):
            lang_name = LANG_NAMES.get(lang, lang.title())
            lang_family = get_language_family(lang)

            # VoxLingua stats for this language
            vox_stats = {'files': 0, 'correct': 0, 'accuracy': 0, 'avg_confidence': 0}
            if results['voxlingua_detailed']:
                vox_lang_data = [r for r in results['voxlingua_detailed'] if r['gt_iso'] == lang]
                if vox_lang_data:
                    vox_stats['files'] = len(vox_lang_data)
                    vox_stats['correct'] = sum(1 for r in vox_lang_data if r['correct'])
                    vox_stats['accuracy'] = vox_stats['correct'] / vox_stats['files']
                    vox_stats['avg_confidence'] = np.mean([r['confidence'] for r in vox_lang_data])

            # XLS-R stats for this language
            xlsr_stats = {'files': 0, 'correct': 0, 'accuracy': 0, 'avg_confidence': 0}
            if results['xlsr_detailed']:
                xlsr_lang_data = [r for r in results['xlsr_detailed'] if r['gt_iso'] == lang]
                if xlsr_lang_data:
                    xlsr_stats['files'] = len(xlsr_lang_data)
                    xlsr_stats['correct'] = sum(1 for r in xlsr_lang_data if r['correct'])
                    xlsr_stats['accuracy'] = xlsr_stats['correct'] / xlsr_stats['files']
                    xlsr_stats['avg_confidence'] = np.mean([r['confidence'] for r in xlsr_lang_data])

            lang_analysis_data.append({
                'Language_Code': lang,
                'Language_Name': lang_name,
                'Language_Family': lang_family,
                'VoxLingua_Files': vox_stats['files'],
                'VoxLingua_Correct': vox_stats['correct'],
                'VoxLingua_Accuracy': f"{vox_stats['accuracy']:.4f}",
                'VoxLingua_Accuracy_Pct': f"{vox_stats['accuracy']*100:.1f}%",
                'VoxLingua_Avg_Confidence': f"{vox_stats['avg_confidence']:.4f}",
                'XLSR_Files': xlsr_stats['files'],
                'XLSR_Correct': xlsr_stats['correct'],
                'XLSR_Accuracy': f"{xlsr_stats['accuracy']:.4f}",
                'XLSR_Accuracy_Pct': f"{xlsr_stats['accuracy']*100:.1f}%",
                'XLSR_Avg_Confidence': f"{xlsr_stats['avg_confidence']:.4f}",
                'Better_Model': 'VoxLingua' if vox_stats['accuracy'] > xlsr_stats['accuracy'] else 'XLS-R' if xlsr_stats['accuracy'] > vox_stats['accuracy'] else 'Tie'
            })

        lang_analysis_df = pd.DataFrame(lang_analysis_data)
        lang_analysis_df.to_excel(writer, sheet_name='Per_Language_Analysis', index=False)

        # ========================================
        # SHEET 5: CONFUSION MATRIX - VOXLINGUA
        # ========================================
        if results['voxlingua_detailed']:
            print("   📋 Creating VoxLingua confusion matrix...")

            vox_df = pd.DataFrame(results['voxlingua_detailed'])
            valid_vox = vox_df[vox_df['gt_iso'] != ''].copy()

            if len(valid_vox) > 0:
                # Create confusion matrix
                confusion_data = []
                for gt_lang in sorted(valid_vox['gt_iso'].unique()):
                    gt_data = valid_vox[valid_vox['gt_iso'] == gt_lang]
                    row_data = {'Ground_Truth': gt_lang}

                    for pred_lang in sorted(valid_vox['pred_iso'].unique()):
                        count = len(gt_data[gt_data['pred_iso'] == pred_lang])
                        row_data[f'Predicted_{pred_lang}'] = count

                    confusion_data.append(row_data)

                confusion_df = pd.DataFrame(confusion_data).fillna(0)
                confusion_df.to_excel(writer, sheet_name='VoxLingua_Confusion_Matrix', index=False)

        # ========================================
        # SHEET 6: CONFUSION MATRIX - XLS-R
        # ========================================
        if results['xlsr_detailed']:
            print("   📋 Creating XLS-R confusion matrix...")

            xlsr_df = pd.DataFrame(results['xlsr_detailed'])
            valid_xlsr = xlsr_df[xlsr_df['gt_iso'] != ''].copy()

            if len(valid_xlsr) > 0:
                confusion_data = []
                for gt_lang in sorted(valid_xlsr['gt_iso'].unique()):
                    gt_data = valid_xlsr[valid_xlsr['gt_iso'] == gt_lang]
                    row_data = {'Ground_Truth': gt_lang}

                    for pred_lang in sorted(valid_xlsr['pred_iso'].unique()):
                        count = len(gt_data[gt_data['pred_iso'] == pred_lang])
                        row_data[f'Predicted_{pred_lang}'] = count

                    confusion_data.append(row_data)

                confusion_df = pd.DataFrame(confusion_data).fillna(0)
                confusion_df.to_excel(writer, sheet_name='XLSR_Confusion_Matrix', index=False)

        # ========================================
        # SHEET 7: CONFIDENCE ANALYSIS
        # ========================================
        print("   📋 Creating confidence analysis...")

        confidence_analysis = []

        # VoxLingua confidence analysis
        if results['voxlingua_detailed']:
            vox_df = pd.DataFrame(results['voxlingua_detailed'])
            valid_vox = vox_df[vox_df['gt_iso'] != ''].copy()

            if len(valid_vox) > 0:
                for conf_range in [(0, 0.3), (0.3, 0.6), (0.6, 0.8), (0.8, 1.0)]:
                    range_data = valid_vox[
                        (valid_vox['confidence'] >= conf_range[0]) &
                        (valid_vox['confidence'] < conf_range[1])
                    ]

                    if len(range_data) > 0:
                        accuracy = (range_data['correct'] == True).mean()
                        confidence_analysis.append({
                            'Model': 'VoxLingua107',
                            'Confidence_Range': f"{conf_range[0]:.1f}-{conf_range[1]:.1f}",
                            'Files': len(range_data),
                            'Accuracy': f"{accuracy:.4f}",
                            'Accuracy_Pct': f"{accuracy*100:.1f}%",
                            'Avg_Confidence': f"{range_data['confidence'].mean():.4f}"
                        })

        # XLS-R confidence analysis
        if results['xlsr_detailed']:
            xlsr_df = pd.DataFrame(results['xlsr_detailed'])
            valid_xlsr = xlsr_df[xlsr_df['gt_iso'] != ''].copy()

            if len(valid_xlsr) > 0:
                for conf_range in [(0, 0.3), (0.3, 0.6), (0.6, 0.8), (0.8, 1.0)]:
                    range_data = valid_xlsr[
                        (valid_xlsr['confidence'] >= conf_range[0]) &
                        (valid_xlsr['confidence'] < conf_range[1])
                    ]

                    if len(range_data) > 0:
                        accuracy = (range_data['correct'] == True).mean()
                        confidence_analysis.append({
                            'Model': 'XLS-R',
                            'Confidence_Range': f"{conf_range[0]:.1f}-{conf_range[1]:.1f}",
                            'Files': len(range_data),
                            'Accuracy': f"{accuracy:.4f}",
                            'Accuracy_Pct': f"{accuracy*100:.1f}%",
                            'Avg_Confidence': f"{range_data['confidence'].mean():.4f}"
                        })

        confidence_df = pd.DataFrame(confidence_analysis)
        confidence_df.to_excel(writer, sheet_name='Confidence_Analysis', index=False)

        # ========================================
        # SHEET 8: ERROR ANALYSIS
        # ========================================
        print("   📋 Creating error analysis...")

        error_analysis = []

        # VoxLingua errors
        if results['voxlingua_detailed']:
            vox_df = pd.DataFrame(results['voxlingua_detailed'])
            vox_errors = vox_df[vox_df['correct'] == False].copy()

            for _, error in vox_errors.iterrows():
                error_analysis.append({
                    'Model': 'VoxLingua107',
                    'File': error['file'],
                    'Ground_Truth': error['gt_iso'],
                    'Predicted': error['pred_iso'],
                    'Confidence': f"{error['confidence']:.4f}",
                    'GT_Language_Family': get_language_family(error['gt_iso']),
                    'Pred_Language_Family': get_language_family(error['pred_iso']),
                    'Cross_Family_Error': get_language_family(error['gt_iso']) != get_language_family(error['pred_iso'])
                })

        # XLS-R errors
        if results['xlsr_detailed']:
            xlsr_df = pd.DataFrame(results['xlsr_detailed'])
            xlsr_errors = xlsr_df[xlsr_df['correct'] == False].copy()

            for _, error in xlsr_errors.iterrows():
                error_analysis.append({
                    'Model': 'XLS-R',
                    'File': error['file'],
                    'Ground_Truth': error['gt_iso'],
                    'Predicted': error['pred_iso'],
                    'Confidence': f"{error['confidence']:.4f}",
                    'GT_Language_Family': get_language_family(error['gt_iso']),
                    'Pred_Language_Family': get_language_family(error['pred_iso']),
                    'Cross_Family_Error': get_language_family(error['gt_iso']) != get_language_family(error['pred_iso'])
                })

        error_df = pd.DataFrame(error_analysis)
        error_df.to_excel(writer, sheet_name='Error_Analysis', index=False)

        # ========================================
        # SHEET 9: LANGUAGE FAMILY PERFORMANCE
        # ========================================
        print("   📋 Creating language family performance...")

        family_performance = []

        families = ['Indo-Aryan', 'Dravidian', 'Low-Resource', 'Other']

        for family in families:
            # VoxLingua performance for this family
            if results['voxlingua_detailed']:
                vox_df = pd.DataFrame(results['voxlingua_detailed'])
                family_data = vox_df[vox_df['gt_iso'].apply(lambda x: get_language_family(x) == family)]

                if len(family_data) > 0:
                    vox_acc = (family_data['correct'] == True).mean()
                    vox_conf = family_data['confidence'].mean()
                    vox_files = len(family_data)
                else:
                    vox_acc = vox_conf = vox_files = 0
            else:
                vox_acc = vox_conf = vox_files = 0

            # XLS-R performance for this family
            if results['xlsr_detailed']:
                xlsr_df = pd.DataFrame(results['xlsr_detailed'])
                family_data = xlsr_df[xlsr_df['gt_iso'].apply(lambda x: get_language_family(x) == family)]

                if len(family_data) > 0:
                    xlsr_acc = (family_data['correct'] == True).mean()
                    xlsr_conf = family_data['confidence'].mean()
                    xlsr_files = len(family_data)
                else:
                    xlsr_acc = xlsr_conf = xlsr_files = 0
            else:
                xlsr_acc = xlsr_conf = xlsr_files = 0

            family_performance.append({
                'Language_Family': family,
                'VoxLingua_Files': vox_files,
                'VoxLingua_Accuracy': f"{vox_acc:.4f}",
                'VoxLingua_Accuracy_Pct': f"{vox_acc*100:.1f}%",
                'VoxLingua_Avg_Confidence': f"{vox_conf:.4f}",
                'XLSR_Files': xlsr_files,
                'XLSR_Accuracy': f"{xlsr_acc:.4f}",
                'XLSR_Accuracy_Pct': f"{xlsr_acc*100:.1f}%",
                'XLSR_Avg_Confidence': f"{xlsr_conf:.4f}",
                'Better_Model': 'VoxLingua' if vox_acc > xlsr_acc else 'XLS-R' if xlsr_acc > vox_acc else 'Tie'
            })

        family_df = pd.DataFrame(family_performance)
        family_df.to_excel(writer, sheet_name='Language_Family_Performance', index=False)

        # ========================================
        # SHEET 10: TOP-5 PREDICTIONS (SAMPLE)
        # ========================================
        print("   📋 Creating Top-5 predictions sample...")

        top5_sample = []

        # Sample top-5 predictions from comparison data
        sample_files = results['comparison_data'][:20]  # First 20 files as sample

        for file_data in sample_files:
            file_name = file_data['file']
            gt_lang = file_data['gt_iso']

            # VoxLingua Top-5
            if file_data['voxlingua']['available'] and 'top5_predictions' in file_data['voxlingua']:
                for pred in file_data['voxlingua']['top5_predictions']:
                    top5_sample.append({
                        'Model': 'VoxLingua107',
                        'File': file_name,
                        'Ground_Truth': gt_lang,
                        'Rank': pred['rank'],
                        'Predicted_Language': pred['mapped'],
                        'Original_Output': pred['original'],
                        'Confidence': f"{pred['confidence']:.4f}",
                        'In_Dataset': pred['in_dataset'],
                        'Correct': gt_lang == pred['mapped']
                    })

            # XLS-R Top-5
            if file_data['xlsr']['available'] and 'top5_predictions' in file_data['xlsr']:
                for pred in file_data['xlsr']['top5_predictions']:
                    top5_sample.append({
                        'Model': 'XLS-R',
                        'File': file_name,
                        'Ground_Truth': gt_lang,
                        'Rank': pred['rank'],
                        'Predicted_Language': pred['mapped'],
                        'Original_Output': pred['original'],
                        'Confidence': f"{pred['confidence']:.4f}",
                        'In_Dataset': pred['in_dataset'],
                        'Correct': gt_lang == pred['mapped']
                    })

        top5_df = pd.DataFrame(top5_sample)
        top5_df.to_excel(writer, sheet_name='Top5_Predictions_Sample', index=False)

    print(f"✅ Comprehensive Excel analysis created: {output_filename}")

    # Try to download the file
    try:
        from google.colab import files
        print(f"📥 File downloaded successfully!")
    except:
        print(f"📁 File saved locally: {output_filename}")

    return output_filename

# Run the comprehensive Excel analysis
if 'final_analysis_results' in globals() and final_analysis_results:
    excel_filename = create_comprehensive_excel_analysis(
        final_analysis_results,
        "Language_Detection_Comprehensive_Analysis.xlsx"
    )
    print(f"\n🎉 COMPREHENSIVE EXCEL ANALYSIS COMPLETE!")
    print(f"📊 File: {excel_filename}")

    # Print summary of what was created
    print(f"\n📋 Excel Contains 10 Sheets:")
    print(f"   1. Executive_Summary - Key metrics and recommendations")
    print(f"   2. VoxLingua107_Results - Detailed VoxLingua results")
    print(f"   3. XLSR_Results - Detailed XLS-R results")
    print(f"   4. Per_Language_Analysis - Accuracy by language")
    print(f"   5. VoxLingua_Confusion_Matrix - VoxLingua confusion matrix")
    print(f"   6. XLSR_Confusion_Matrix - XLS-R confusion matrix")
    print(f"   7. Confidence_Analysis - Performance by confidence ranges")
    print(f"   8. Error_Analysis - Detailed error breakdown")
    print(f"   9. Language_Family_Performance - Performance by language family")
    print(f"   10. Top5_Predictions_Sample - Sample of top-5 predictions")

else:
    print("❌ No analysis results found. Please run the analysis first.")