import os
import sys
import time
import base64
import torch
import tempfile
import traceback
import uvicorn
import gc
from fastapi import FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import HTMLResponse

# --- [v164] 🚀 PRO GPU ENGINE (ULTIMATE STABILITY) ---
# This version enforces float32 for STT to avoid CUBLAS errors and uses batch_size=1.
print(f"--- [v164] 📡 BOOTING ENGINE ---")

# 🛠️ CRITICAL: TORCHAUDIO MONKEYPATCH 🛠️
import torchaudio
import soundfile as sf
def HeroLoad(filepath, **kwargs):
    try:
        data, samplerate = sf.read(filepath)
        if len(data.shape) == 1:
            data = data.reshape(1, -1)
        else:
            data = data.T
        return torch.from_numpy(data).float(), samplerate
    except Exception as e:
        print(f"--- [v162] ❌ PATCHED LOAD FAILED: {e} ---")
        return torchaudio.load_orig(filepath, **kwargs)

if not hasattr(torchaudio, 'load_orig'):
    torchaudio.load_orig = torchaudio.load
    torchaudio.load = HeroLoad
    print("--- [v164] 🩹 TORCHAUDIO PATCH APPLIED ---")

from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
from TTS.api import TTS
from deep_translator import GoogleTranslator

try:
    import chatterbox_utils
    HAS_CHATTERBOX = True
except ImportError:
    HAS_CHATTERBOX = False

try:
    import spaces
    HAS_SPACES = True
except ImportError:
    HAS_SPACES = False
    class spaces:
        @staticmethod
        def GPU(duration=60):
            def decorator(func):
                return func
            return decorator

os.environ["COQUI_TOS_AGREED"] = "1"
os.environ["PYTHONWARNINGS"] = "ignore"

app = FastAPI()
app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])

MODELS = {"stt": None, "tts": None, "gpu_id": 0}

def get_best_gpu():
    """Architecture for multi-GPU support (Switch)."""
    if not torch.cuda.is_available(): return "cpu"
    # Select GPU with most free memory if multiple exist
    # For ZeroGPU, this defaults to the allocated MIG instance.
    return f"cuda:{MODELS['gpu_id']}"

@spaces.GPU(duration=120)
def gpu_stt_full(temp_path, lang):
    global MODELS
    device = get_best_gpu()
    
    if MODELS.get("stt") is None:
        print(f"--- [v164] 📥 LOADING WHISPER LARGE (FP32) ON {device} ---")
        model_id = "openai/whisper-large-v3-turbo"
        # Using float32 to resolve CUBLAS_STATUS_EXECUTION_FAILED on H200/A10G MIG
        model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch.float32).to(device)
        processor = AutoProcessor.from_pretrained(model_id)
        MODELS["stt"] = pipeline(
            "automatic-speech-recognition",
            model=model,
            tokenizer=processor.tokenizer,
            feature_extractor=processor.feature_extractor,
            chunk_length_s=30, 
            device=device
        )
    
    print(f"--- [v164] 🎙️ WHISPER INFERENCE (TEMP 0, BS 1) ---")
    res = MODELS["stt"](
        temp_path, 
        batch_size=1, # Ultimate stability
        generate_kwargs={
            "language": lang if lang and len(lang) <= 3 else None,
            "temperature": 0.0,
            "return_timestamps": True
        }
    )
    
    # Post-inference cleanup
    torch.cuda.empty_cache()
    gc.collect()
    
    return res["text"].strip()

@spaces.GPU(duration=180)
def gpu_tts_full(text, mapped_lang, speaker_path):
    global MODELS
    device = "cuda"
    
    if MODELS.get("tts") is None:
        print(f"--- [v164] 📥 LOADING XTTS V2 ON GPU ---")
        MODELS["tts"] = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
    else:
        try: MODELS["tts"].to(device)
        except: pass

    print(f"--- [v164] 🔊 XTTS GPU INFERENCE ---")
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as out_f:
        out_p = out_f.name
    
    MODELS["tts"].tts_to_file(text=text, language=mapped_lang, file_path=out_p, speaker_wav=speaker_path)
    
    with open(out_p, "rb") as f:
        audio_b64 = base64.b64encode(f.read()).decode()
    
    if os.path.exists(out_p): os.unlink(out_p)
    
    # Cleanup to prevent ZeroGPU worker errors
    torch.cuda.empty_cache()
    gc.collect()
    
    return audio_b64

async def handle_process(request: Request):
    t1 = time.time()
    try:
        data = await request.json()
        action = data.get("action")
        if action == "health": return {"status": "awake", "v": "164"}
        
        print(f"--- [v164] 🛠️ API REQUEST: {action.upper()} ---")
        
        stt_text = ""
        # 🟢 SPEECH-TO-TEXT
        if action in ["stt", "s2st"]:
            audio_b64 = data.get("file")
            if not audio_b64: return {"error": "Missing audio data"}
            
            audio_bytes = base64.b64decode(audio_b64)
            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
                f.write(audio_bytes); temp_path = f.name
            try:
                stt_text = gpu_stt_full(temp_path, data.get("lang"))
                print(f"--- [v162] 🎙️ TEXT: {stt_text[:100]}... ---")
            finally:
                if os.path.exists(temp_path): os.unlink(temp_path)
            
            if action == "stt": return {"text": stt_text}

        # 🔵 TEXT-TO-SPEECH
        if action in ["tts", "s2st"]:
            text = (data.get("text") if action == "tts" else stt_text).strip()
            if not text: return {"error": "Input text is empty"}
            
            target = data.get("target_lang") or data.get("lang") or "en"
            trans_text = text
            
            if action == "s2st":
                print(f"--- [v164] 🌏 TRANSLATING TO {target} ---")
                trans_text = GoogleTranslator(source='auto', target=target).translate(stt_text)
                text = trans_text
                print(f"--- [v164] 📝 TRANS: {text[:100]}... ---")

            XTTS_MAP = {"en": "en", "de": "de", "fr": "fr", "es": "es", "it": "it", "pl": "pl", "pt": "pt", "tr": "tr", "ru": "ru", "nl": "nl", "cs": "cs", "ar": "ar", "hu": "hu", "ko": "ko", "hi": "hi", "zh": "zh-cn"}
            clean_lang = target.split('-')[0].lower()
            mapped_lang = XTTS_MAP.get(clean_lang) or ("zh-cn" if clean_lang == "zh" else None)
            
            if not mapped_lang:
                if HAS_CHATTERBOX:
                    audio_bytes = chatterbox_utils.run_chatterbox_inference(text, clean_lang)
                    audio_b64 = base64.b64encode(audio_bytes).decode()
                else: return {"error": f"Language {clean_lang} not supported by XTTS/Chatterbox"}
            else:
                speaker_wav_b64 = data.get("speaker_wav")
                speaker_path = None
                if speaker_wav_b64:
                    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
                        f.write(base64.b64decode(speaker_wav_b64)); speaker_path = f.name
                else:
                    speaker_path = "default_speaker.wav"
                    if not os.path.exists(speaker_path): speaker_path = None
                
                try:
                    # EXECUTE GPU TTS
                    audio_b64 = gpu_tts_full(text, mapped_lang, speaker_path)
                finally:
                    if speaker_wav_b64 and speaker_path and os.path.exists(speaker_path): os.unlink(speaker_path)
            
            if action == "tts": return {"audio": audio_b64}
            return {"text": stt_text, "translated": trans_text, "audio": audio_b64}

    except Exception as e:
        print(f"❌ [v164] ENGINE ERROR: {traceback.format_exc()}")
        return {"error": str(e)}
    finally:
        print(f"--- [v164] ✨ MISSION COMPLETED ({time.time()-t1:.1f}s) ---")

@app.post("/process")
@app.post("/api/v1/process")
async def api_process(request: Request): return await handle_process(request)

@app.get("/health")
def health(): 
    return {
        "status": "ready", 
        "v": "164", 
        "gpu": torch.cuda.is_available(),
        "devices": torch.cuda.device_count(),
        "engine": "Full GPU PRO (Stable)",
        "stt": "Whisper-v3-Turbo (FP32-GPU)",
        "tts": "XTTS-v2 (GPU)"
    }

@app.get("/", response_class=HTMLResponse)
def root(): return "<h1>🚀 PRO AI Engine v164 (GPU MODE)</h1>"

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=7860)