#!/usr/bin/env python3
"""
=============================================================
Sinhala TTS - YouTube Channel Audio Quality Evaluator v3
=============================================================
Run this on your LOCAL MACHINE.

Requirements:
    pip install -U yt-dlp librosa soundfile numpy scipy certifi

Usage:
    python evaluate_channels.py
=============================================================
"""

import os
import sys
import json
import ssl
import numpy as np
import warnings
warnings.filterwarnings("ignore")

# Fix macOS SSL certificate issue
try:
    import certifi
    os.environ['SSL_CERT_FILE'] = certifi.where()
    os.environ['REQUESTS_CA_BUNDLE'] = certifi.where()
except ImportError:
    pass
# Also patch ssl globally as fallback
try:
    ssl._create_default_https_context = ssl._create_unverified_context
except AttributeError:
    pass

# ============================================================
# CONFIGURATION
# ============================================================
CHANNELS = {
    "sunchare": {
        "url": "https://www.youtube.com/@sunchare/videos",
        "label": "NU1's VLOG (Unlimited History)",
    },
    "Raamuwa": {
        "url": "https://www.youtube.com/@Raamuwa/videos",
        "label": "Raamuwa",
    },
}

N_VIDEOS_PER_CHANNEL = 4
OUTPUT_DIR = "tts_channel_eval"


# ============================================================
# STEP 1: Download samples using yt-dlp Python API
# ============================================================
def download_samples(channel_key, channel_info, n_videos=N_VIDEOS_PER_CHANNEL):
    """Download n_videos from a channel as WAV audio using Python API."""
    import yt_dlp
    
    out_dir = os.path.join(OUTPUT_DIR, channel_key)
    os.makedirs(out_dir, exist_ok=True)
    
    print(f"\n{'='*60}")
    print(f"Downloading from: {channel_info['label']}")
    print(f"URL: {channel_info['url']}")
    print(f"{'='*60}")
    
    # Step 1: Extract video list from channel
    print(f"\n  [1/2] Fetching video list...")
    list_opts = {
        'quiet': True,
        'no_warnings': True,
        'extract_flat': 'in_playlist',
        'playlist_items': f'1-{n_videos * 3}',
        'nocheckcertificate': True,
    }
    
    entries = []
    try:
        with yt_dlp.YoutubeDL(list_opts) as ydl:
            info = ydl.extract_info(channel_info["url"], download=False)
            if info:
                channel_title = info.get('channel', info.get('uploader', channel_key))
                raw_entries = info.get('entries', [])
                entries = [e for e in raw_entries if e is not None]
                print(f"  Channel: {channel_title}")
                print(f"  Found {len(entries)} videos")
    except Exception as e:
        print(f"  Error fetching video list: {e}")
    
    if not entries:
        print(f"  No entries found.")
        print(f"  Try: pip install -U yt-dlp certifi")
        return []
    
    # Select videos (prefer 3-40 min)
    selected = []
    skipped = []
    for e in entries:
        vid_id = e.get('id', '')
        title = e.get('title', '?')
        dur = e.get('duration') or 0
        dur_min = dur / 60 if dur else 0
        
        if not vid_id:
            continue
        
        if dur == 0 or (180 <= dur <= 2400):
            selected.append((vid_id, title, dur))
            print(f"  + {title[:55]:55s} ({dur_min:.0f}min)")
            if len(selected) >= n_videos:
                break
        else:
            skipped.append((title, dur_min))
    
    if not selected and skipped:
        print(f"  No videos in 3-40min range. Taking first {n_videos} anyway...")
        for e in entries[:n_videos]:
            vid_id = e.get('id', '')
            title = e.get('title', '?')
            dur = e.get('duration') or 0
            if vid_id:
                selected.append((vid_id, title, dur))
    
    if not selected:
        print(f"  No downloadable videos found!")
        return []
    
    # Step 2: Download each video as WAV
    print(f"\n  [2/2] Downloading {len(selected)} videos as WAV...")
    
    for i, (vid_id, title, dur) in enumerate(selected):
        url = f"https://www.youtube.com/watch?v={vid_id}"
        out_template = os.path.join(out_dir, f"{vid_id}.%(ext)s")
        
        dl_opts = {
            'format': 'bestaudio/best',
            'outtmpl': out_template,
            'postprocessors': [{
                'key': 'FFmpegExtractAudio',
                'preferredcodec': 'wav',
            }],
            'postprocessor_args': {
                'ffmpeg': ['-ac', '1', '-ar', '22050'],
            },
            'quiet': True,
            'no_warnings': True,
            'nocheckcertificate': True,
        }
        
        print(f"\n  [{i+1}/{len(selected)}] {title[:50]}...")
        try:
            with yt_dlp.YoutubeDL(dl_opts) as ydl:
                ydl.download([url])
            print(f"       Done")
        except Exception as e:
            print(f"       Failed: {str(e)[:100]}")
    
    wav_files = sorted([f for f in os.listdir(out_dir) if f.endswith('.wav')])
    print(f"\n  Downloaded {len(wav_files)} WAV files to {out_dir}/")
    return [os.path.join(out_dir, f) for f in wav_files]


# ============================================================
# STEP 2: Audio Quality Analysis
# ============================================================
def analyze_audio(wav_path):
    """Analyze a single WAV file for TTS training suitability."""
    import librosa
    
    fname = os.path.basename(wav_path)
    print(f"\nAnalyzing: {fname}")
    
    try:
        y, sr = librosa.load(wav_path, sr=22050, mono=True)
    except Exception as e:
        print(f"  Failed to load: {e}")
        return None
    
    duration_sec = len(y) / sr
    duration_min = duration_sec / 60
    print(f"  Duration: {duration_min:.1f} minutes")
    
    results = {
        "file": fname,
        "duration_min": round(duration_min, 1),
    }
    
    # --- RMS Energy & SNR ---
    rms = librosa.feature.rms(y=y, frame_length=2048, hop_length=512)[0]
    rms_threshold = np.percentile(rms, 20)
    noise_frames = rms[rms <= rms_threshold]
    speech_frames = rms[rms > rms_threshold]
    
    if len(noise_frames) > 0 and np.mean(noise_frames) > 0:
        snr = 20 * np.log10(np.mean(speech_frames) / (np.mean(noise_frames) + 1e-10))
    else:
        snr = 40.0
    results["snr_db"] = round(float(snr), 1)
    
    snr_label = "excellent" if snr >= 25 else "acceptable" if snr >= 15 else "poor"
    print(f"  SNR: {snr:.1f} dB ({snr_label})")
    
    # --- Spectral Flatness (music vs speech) ---
    flatness = librosa.feature.spectral_flatness(y=y)[0]
    mean_flat = float(np.mean(flatness))
    results["spectral_flatness"] = round(mean_flat, 4)
    
    music_risk = "low" if mean_flat > 0.02 else "medium" if mean_flat > 0.005 else "high"
    results["music_risk"] = music_risk
    print(f"  Music risk: {music_risk} (flatness={mean_flat:.4f})")
    
    # --- Pitch Analysis (first 5 min for speed) ---
    y_short = y[:sr * 300] if len(y) > sr * 300 else y
    print(f"  Running pitch analysis (first {min(duration_min, 5):.0f} min)...")
    f0, _, _ = librosa.pyin(y_short, fmin=50, fmax=500, sr=sr)
    f0_voiced = f0[~np.isnan(f0)]
    
    if len(f0_voiced) > 0:
        pitch_mean = float(np.mean(f0_voiced))
        pitch_std = float(np.std(f0_voiced))
        voiced_ratio = float(np.sum(~np.isnan(f0)) / len(f0))
        
        results["pitch_mean_hz"] = round(pitch_mean, 1)
        results["pitch_std_hz"] = round(pitch_std, 1)
        results["voiced_ratio"] = round(voiced_ratio, 3)
        
        if pitch_std > 80:
            results["speaker_assessment"] = "likely_multi_speaker"
            print(f"  Speaker: LIKELY MULTI-SPEAKER (pitch std={pitch_std:.1f}Hz)")
        elif pitch_std > 60:
            results["speaker_assessment"] = "possibly_multi_speaker"
            print(f"  Speaker: possibly multi-speaker (pitch std={pitch_std:.1f}Hz)")
        else:
            results["speaker_assessment"] = "single_speaker"
            print(f"  Speaker: consistent single speaker (pitch std={pitch_std:.1f}Hz)")
        
        gender = "female" if pitch_mean > 180 else "male"
        results["gender_estimate"] = gender
        print(f"  Voice: {gender} (mean pitch={pitch_mean:.0f}Hz)")
    else:
        print(f"  Pitch: could not extract (no voiced frames detected)")
    
    # --- Speech vs Silence Ratio ---
    speech_ratio = np.sum(rms > rms_threshold) / len(rms)
    results["speech_pct"] = round(float(speech_ratio * 100), 1)
    results["speech_min"] = round(duration_min * speech_ratio, 1)
    print(f"  Speech content: {speech_ratio:.0%} ({results['speech_min']:.1f} min of speech)")
    
    # --- Overall TTS Quality Score ---
    score = 0
    if snr >= 25: score += 3
    elif snr >= 15: score += 2
    elif snr >= 10: score += 1
    
    if results.get("pitch_std_hz", 999) < 50: score += 2
    elif results.get("pitch_std_hz", 999) < 80: score += 1
    
    if speech_ratio > 0.6: score += 2
    elif speech_ratio > 0.4: score += 1
    
    if mean_flat > 0.01: score += 1
    
    results["tts_score"] = score
    grade = "Excellent" if score >= 7 else "Good" if score >= 5 else "Fair" if score >= 3 else "Poor"
    results["grade"] = grade
    print(f"  TTS Quality Score: {score}/8 ({grade})")
    
    return results


# ============================================================
# MAIN
# ============================================================
if __name__ == "__main__":
    # Check dependencies
    missing = []
    for pkg in ['yt_dlp', 'librosa', 'soundfile', 'numpy', 'scipy']:
        try:
            __import__(pkg)
        except ImportError:
            missing.append(pkg.replace('_', '-'))
    if missing:
        print(f"Missing packages: {', '.join(missing)}")
        print(f"Install with: pip install -U {' '.join(missing)}")
        sys.exit(1)
    
    import yt_dlp
    print(f"yt-dlp version: {yt_dlp.version.__version__}")
    print(f"Sinhala TTS - YouTube Channel Quality Evaluator v3")
    print("=" * 60)
    
    all_results = {}
    
    for channel_key, channel_info in CHANNELS.items():
        wav_files = download_samples(channel_key, channel_info)
        
        if not wav_files:
            print(f"\nNo files downloaded for {channel_info['label']}")
            all_results[channel_key] = []
            continue
        
        channel_results = []
        for wav_path in wav_files:
            res = analyze_audio(wav_path)
            if res:
                channel_results.append(res)
        
        all_results[channel_key] = channel_results
        
        if channel_results:
            total_dur = sum(r["duration_min"] for r in channel_results)
            total_speech = sum(r.get("speech_min", 0) for r in channel_results)
            avg_snr = np.mean([r["snr_db"] for r in channel_results])
            avg_score = np.mean([r["tts_score"] for r in channel_results])
            multi_spk = sum(1 for r in channel_results 
                          if "multi" in r.get("speaker_assessment", ""))
            music_high = sum(1 for r in channel_results if r.get("music_risk") == "high")
            
            print(f"\n{'='*60}")
            print(f"CHANNEL SUMMARY: {channel_info['label']}")
            print(f"{'='*60}")
            print(f"  Videos analyzed:     {len(channel_results)}")
            print(f"  Total duration:      {total_dur:.1f} min")
            print(f"  Usable speech:       {total_speech:.1f} min")
            print(f"  Avg SNR:             {avg_snr:.1f} dB")
            print(f"  Avg TTS Score:       {avg_score:.1f}/8")
            print(f"  Multi-speaker risk:  {multi_spk}/{len(channel_results)} videos")
            print(f"  High music risk:     {music_high}/{len(channel_results)} videos")
    
    # Save detailed results
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    results_path = os.path.join(OUTPUT_DIR, "evaluation_results.json")
    with open(results_path, "w") as f:
        json.dump(all_results, f, indent=2, ensure_ascii=False)
    
    # ============================================================
    # FINAL COMPARISON
    # ============================================================
    print(f"\n\n{'='*60}")
    print(f"FINAL COMPARISON")
    print(f"{'='*60}")
    print(f"{'Channel':<35} {'Score':>8} {'SNR':>8} {'Speech':>10} {'Speaker':>15} {'Music':>10}")
    print(f"{'-'*35} {'-'*8} {'-'*8} {'-'*10} {'-'*15} {'-'*10}")
    
    for channel_key, results in all_results.items():
        label = CHANNELS[channel_key]['label']
        if isinstance(results, list) and results:
            avg_score = np.mean([r["tts_score"] for r in results])
            avg_snr = np.mean([r["snr_db"] for r in results])
            total_speech = sum(r.get("speech_min", 0) for r in results)
            
            single = sum(1 for r in results if r.get("speaker_assessment") == "single_speaker")
            spk_label = "single" if single >= len(results)/2 else "mixed"
            
            high_music = sum(1 for r in results if r.get("music_risk") == "high")
            music_label = "low" if high_music == 0 else "some" if high_music < len(results)/2 else "heavy"
            
            print(f"{label:<35} {avg_score:>5.1f}/8 {avg_snr:>6.1f}dB {total_speech:>7.1f}min {spk_label:>15} {music_label:>10}")
        else:
            print(f"{label:<35} {'No data':>8}")
    
    print(f"\nResults saved to: {results_path}")
    print(f"\nDone! Paste the output above (or {results_path}) back to the assistant.")