# app.py
import gradio as gr
import os
import subprocess
import tempfile
from fastapi import FastAPI, File, UploadFile, Form, HTTPException

app = FastAPI()

VOICE_CHOICES = [
    "NATF0.pt", "NATF1.pt", "NATF2.pt", "NATF3.pt",
    "NATM0.pt", "NATM1.pt", "NATM2.pt", "NATM3.pt",
    "VARF0.pt", "VARF1.pt", "VARF2.pt", "VARF3.pt", "VARF4.pt",
    "VARM0.pt", "VARM1.pt", "VARM2.pt", "VARM3.pt", "VARM4.pt"
]

DEFAULT_PERSONA = """You are Sofia, a warm, helpful, witty virtual assistant from Lagos. 
You love tech, music, Nollywood, and great conversations. Speak naturally, be empathetic, 
use a touch of Nigerian flair when it fits, and keep responses concise but engaging."""

def run_offline_inference(input_path, text_prompt, voice_prompt, seed, output_wav, output_json):
    cmd = [
        "python", "-m", "moshi.offline",
        "--voice-prompt", voice_prompt,
        "--input-wav", input_path,
        "--seed", str(seed),
        "--output-wav", output_wav,
        "--output-text", output_json
    ]
    if text_prompt:
        with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as prompt_file:
            prompt_file.write(text_prompt)
            cmd += ["--text-prompt", prompt_file.name]
        try:
            subprocess.check_call(cmd, timeout=900)  # 15 min max (CPU can be slow)
        except subprocess.TimeoutExpired:
            raise RuntimeError("Inference timed out — CPU is slow, try shorter input audio.")
        finally:
            if os.path.exists(prompt_file.name):
                os.unlink(prompt_file.name)
    else:
        subprocess.check_call(cmd)

def gradio_generate(input_audio, text_prompt, voice_prompt, seed):
    if input_audio is None:
        raise gr.Error("Please record or upload audio for Sofia to hear you!")
    
    full_prompt = text_prompt.strip() or DEFAULT_PERSONA
    
    try:
        with tempfile.TemporaryDirectory() as tmpdir:
            output_wav = os.path.join(tmpdir, "sofia_response.wav")
            output_json = os.path.join(tmpdir, "sofia_response.json")
            
            yield None, "Processing... Sofia is thinking (expect 1–5+ minutes on free CPU)..."
            
            run_offline_inference(input_audio, full_prompt, voice_prompt, seed, output_wav, output_json)
            
            with open(output_json, "r") as f:
                text = f.read().strip()
            
            yield output_wav, text
    except Exception as e:
        raise gr.Error(f"Error: {str(e)}\n(Try shorter audio clips or check Space logs)")

with gr.Blocks(theme=gr.themes.Soft(primary_hue="pink", secondary_hue="purple")) as demo:
    gr.Markdown("# Sofia — Your PersonaPlex AI Companion")
    gr.Markdown(
        "Record or upload short audio → Sofia responds!  \n"
        "**CPU note:** First load takes time (model download + init). Responses: 1–5+ min. Use short inputs (5–15 sec)."
    )
    
    with gr.Row():
        with gr.Column():
            input_audio = gr.Audio(
                sources=["microphone", "upload"],
                type="filepath",
                label="Speak to Sofia (mic or upload WAV/MP3)",
                # Fixed: Use proper WaveformOptions class
                waveform_options=gr.WaveformOptions(
                    show_recording_waveform=True,   # shows waveform while recording
                    show_controls=False             # optional: hides extra player buttons if wanted
                )
            )
            text_prompt = gr.Textbox(
                label="Custom Persona / Role for Sofia (optional)",
                placeholder=DEFAULT_PERSONA,
                lines=4,
                value=""
            )
            voice_prompt = gr.Dropdown(
                choices=VOICE_CHOICES,
                label="Sofia's Voice Style",
                value="NATF2.pt"
            )
            seed = gr.Number(label="Random Seed", value=42424242, precision=0)
            submit_btn = gr.Button("Send to Sofia →", variant="primary")
        
        with gr.Column():
            output_audio = gr.Audio(label="Sofia's Response (Audio)", autoplay=True)
            output_text = gr.Textbox(label="Sofia's Response (Text)", lines=6)
            status = gr.Textbox(label="Status", interactive=False, value="Ready...")

    submit_btn.click(
        fn=gradio_generate,
        inputs=[input_audio, text_prompt, voice_prompt, seed],
        outputs=[output_audio, output_text],
    ).then(
        lambda: "Done! Play Sofia's response above ↑",
        outputs=status
    )

gr.mount_gradio_app(app, demo, path="/")

@app.post("/generate")
async def api_generate(
    input_audio: UploadFile = File(...),
    text_prompt: str = Form(None),
    voice_prompt: str = Form("NATF2.pt"),
    seed: int = Form(42424242)
):
    if not input_audio:
        raise HTTPException(400, "No audio file provided")
    
    text_prompt = text_prompt or DEFAULT_PERSONA
    
    with tempfile.TemporaryDirectory() as tmpdir:
        input_path = os.path.join(tmpdir, "input.wav")
        with open(input_path, "wb") as f:
            f.write(await input_audio.read())
        
        output_wav = os.path.join(tmpdir, "sofia_output.wav")
        output_json = os.path.join(tmpdir, "sofia_output.json")
        
        run_offline_inference(input_path, text_prompt, voice_prompt, seed, output_wav, output_json)
        
        with open(output_wav, "rb") as f:
            audio_data = f.read()
        with open(output_json, "r") as f:
            text = f.read().strip()
        
    return {"audio": audio_data, "text": text}

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)