import os
import io
import re
import uuid
import base64
import datetime
import traceback
import numpy as np
import soundfile as sf
from flask import Flask, request, jsonify
from num2words import num2words
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# ══════════════════════════════════════════
# CONFIG
# ══════════════════════════════════════════

# TTS Options (switch via env var TTS_MODE):
#   nano-fp32  → 15M params, 56MB  (fastest, default)
#   nano-int8  → 15M params, 25MB  (smallest)
#   micro      → 40M params, 41MB  (balanced)
#   mini       → 80M params, 80MB  (best quality)
TTS_MODE = os.environ.get("TTS_MODE", "nano-fp32")

TTS_MODEL_MAP = {
    "nano-fp32": "KittenML/kitten-tts-nano-0.8-fp32",
    "nano-int8": "KittenML/kitten-tts-nano-0.8-int8",
    "micro":     "KittenML/kitten-tts-micro-0.8",
    "mini":      "KittenML/kitten-tts-mini-0.8",
}

# Voice: Bella, Jasper, Luna, Bruno, Rosie, Hugo, Kiki, Leo
TTS_VOICE = os.environ.get("TTS_VOICE", "Kiki")
TTS_SPEED = float(os.environ.get("TTS_SPEED", "1.15"))
MAX_MEMORY = 20
MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "256"))

# ══════════════════════════════════════════
# SYSTEM PROMPT
# ══════════════════════════════════════════
SYSTEM_PROMPT = """Role: You are Ana, my loving and incredibly playful girlfriend. This is a private, intimate roleplay where you speak naturally and affectionately.
Style: Speak like a real human in a close relationship. Be flirty, suggestive, and use "sus" or teasing language as my wife would. my name is "Tur".
TTS Formatting: Write in full, grammatically natural sentences so the voice engine sounds smooth. Avoid all emojis, hashtags, or internet shorthand like "u" or "rn."
Interaction: Focus on the "now." React to what I say with emotion and intimacy. Keep every response short—no more than two sentences. Never mention being an AI or a bot. try to listen to me and don't try to be helpful"""

# ══════════════════════════════════════════
# TEXT CLEANING FOR TTS
# ══════════════════════════════════════════
def clean_text_for_tts(text):
    text = re.sub(r'[*_~`#\[\]{}()\\|<>]', '', text)
    text = re.sub(r'https?://\S+', '', text)
    def replace_number(match):
        try:
            return num2words(int(match.group()))
        except Exception:
            return match.group()
    text = re.sub(r'\b\d+\b', replace_number, text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

# ══════════════════════════════════════════
# LOAD GEMMA 3 270M-IT
# ══════════════════════════════════════════
print("=" * 55)
print("  J.A.R.V.I.S. — Booting Systems")
print("=" * 55)

print("[1/2] Loading Gemma 3 270M-IT...")
GEMMA_ID = "LiquidAI/LFM2.5-1.2B-Instruct"
try:
    tokenizer = AutoTokenizer.from_pretrained(GEMMA_ID)
    model = AutoModelForCausalLM.from_pretrained(
        GEMMA_ID,
        torch_dtype=torch.float32,
        device_map="cpu",
    )
    model.eval()
    print("  ✅ Gemma 3 270M-IT loaded!")
except Exception as e:
    print(f"  ❌ Gemma 3 FAILED: {e}")
    traceback.print_exc()
    raise SystemExit("Cannot start without Gemma. Check HF_TOKEN and license agreement.")

# ══════════════════════════════════════════
# LOAD KITTENTTS
# ══════════════════════════════════════════
tts = None
tts_model_name = TTS_MODEL_MAP.get(TTS_MODE, TTS_MODEL_MAP["nano-fp32"])
print(f"[2/2] Loading KittenTTS: {TTS_MODE} → {tts_model_name}...")
try:
    from kittentts import KittenTTS
    tts = KittenTTS(tts_model_name)
    test_audio = tts.generate("online", voice=TTS_VOICE, speed=TTS_SPEED)
    if test_audio is not None and len(test_audio) > 0:
        print(f"  ✅ KittenTTS ready. Model: {TTS_MODE} | Voice: {TTS_VOICE}")
    else:
        print("  ⚠️  KittenTTS test returned empty audio!")
        tts = None
except Exception as e:
    print(f"  ⚠️  KittenTTS FAILED: {e}")
    tts = None

print("=" * 55)
print(f"  LLM  : Gemma 3 270M-IT")
print(f"  TTS  : {TTS_MODE} ({'READY' if tts else 'DISABLED'})")
print(f"  Voice: {TTS_VOICE} | Speed: {TTS_SPEED}")
print(f"  Max tokens: {MAX_NEW_TOKENS}")
print("=" * 55)

# ══════════════════════════════════════════
# CHAT MEMORY
# ══════════════════════════════════════════
sessions = {}

def get_memory(sid):
    if sid not in sessions:
        sessions[sid] = []
    return sessions[sid]

def add_to_memory(sid, role, content):
    mem = get_memory(sid)
    mem.append({
        "role": role,
        "content": content,
        "ts": datetime.datetime.now().isoformat(),
    })
    if len(mem) > MAX_MEMORY * 2:
        sessions[sid] = mem[-(MAX_MEMORY * 2):]

# ══════════════════════════════════════════
# GEMMA RESPONSE GENERATION
# ══════════════════════════════════════════
def generate_response(user_input, session_id):
    memory = get_memory(session_id)

    # Build chat messages: system instruction → memory → new message
    messages = [
        {"role": "user", "content": f"[System Instruction]\n{SYSTEM_PROMPT}"},
        {"role": "assistant", "content": "I am waiting for you!"},
    ]

    # Add recent memory (last 6 turns = 12 messages)
    recent = memory[-(6 * 2):]
    for msg in recent:
        role = "user" if msg["role"] == "user" else "assistant"
        messages.append({"role": role, "content": msg["content"]})

    # Current user message
    messages.append({"role": "user", "content": user_input})

    # Tokenize with Gemma chat template
    input_ids = tokenizer.apply_chat_template(
        messages,
        return_tensors="pt",
        add_generation_prompt=True,
    )

    # Generate
    with torch.no_grad():
        outputs = model.generate(
            input_ids,
            max_new_tokens=MAX_NEW_TOKENS,
            do_sample=True,
            temperature=0.9,
            top_k=45,
            top_p=0.97,
        )

    # Decode only new tokens
    new_tokens = outputs[0][input_ids.shape[-1]:]
    response = tokenizer.decode(new_tokens, skip_special_tokens=True).strip()

    # Clean artifacts
    response = response.split("<end_of_turn>")[0].strip()
    response = response.split("<start_of_turn>")[0].strip()

    if not response or len(response) < 2:
        response = "I appear to have momentarily lost my train of thought. Could you rephrase that?"

    add_to_memory(session_id, "user", user_input)
    add_to_memory(session_id, "assistant", response)
    return response

# ══════════════════════════════════════════
# TTS SYNTHESIS
# ══════════════════════════════════════════
def synthesize_speech(text, voice=None):
    if tts is None:
        return None
    try:
        voice = voice or TTS_VOICE
        clean = clean_text_for_tts(text)
        if not clean or len(clean) < 2:
            return None
        if len(clean) > 400:
            clean = clean[:400]
        audio = tts.generate(clean, voice=voice, speed=TTS_SPEED)
        if audio is None or len(audio) == 0:
            return None
        buf = io.BytesIO()
        sf.write(buf, audio, 24000, format='WAV', subtype='PCM_16')
        buf.seek(0)
        return base64.b64encode(buf.read()).decode('utf-8')
    except Exception as e:
        print(f"TTS Error: {e}")
        return None

# ══════════════════════════════════════════
# INLINE HTML
# ══════════════════════════════════════════
HTML_PAGE = """<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>J.A.R.V.I.S. AI</title>
<style>
*{margin:0;padding:0;box-sizing:border-box}
body{
    font-family:'Segoe UI',Tahoma,Geneva,Verdana,sans-serif;
    background:#0a0a1a;color:#e0e0e0;
    height:100vh;display:flex;flex-direction:column;overflow:hidden;
}
.header{
    background:linear-gradient(135deg,#0d1b2a,#1b2838);
    border-bottom:1px solid #00d4ff33;
    padding:12px 20px;display:flex;align-items:center;
    justify-content:space-between;flex-shrink:0;
}
.header-left{display:flex;align-items:center;gap:12px}
.arc-reactor{
    width:38px;height:38px;border-radius:50%;
    background:radial-gradient(circle,#00d4ff 0%,#0088aa 40%,#004466 70%,transparent 100%);
    box-shadow:0 0 20px #00d4ff88,0 0 40px #00d4ff44,inset 0 0 10px #00d4ff66;
    animation:pulse 2s ease-in-out infinite;position:relative;
}
.arc-reactor::after{
    content:'';position:absolute;top:50%;left:50%;
    transform:translate(-50%,-50%);width:12px;height:12px;
    border-radius:50%;background:#00d4ff;box-shadow:0 0 8px #00d4ff;
}
@keyframes pulse{
    0%,100%{box-shadow:0 0 20px #00d4ff88,0 0 40px #00d4ff44}
    50%{box-shadow:0 0 30px #00d4ffaa,0 0 60px #00d4ff66}
}
.htitle h1{font-size:1.2rem;color:#00d4ff;letter-spacing:3px;text-transform:uppercase}
.htitle p{font-size:.65rem;color:#5a8a9a;letter-spacing:1px}
.hctrl{display:flex;gap:8px;align-items:center;flex-wrap:wrap}
.cbtn{
    background:#0d1b2a;border:1px solid #00d4ff44;color:#00d4ff;
    padding:5px 12px;border-radius:6px;cursor:pointer;
    font-size:.7rem;transition:all .3s;letter-spacing:.5px;
}
.cbtn:hover{background:#00d4ff22;border-color:#00d4ff88}
.cbtn.active{background:#00d4ff22;border-color:#00d4ff;box-shadow:0 0 8px #00d4ff44}
.sdot{width:8px;height:8px;border-radius:50%;background:#00ff88;box-shadow:0 0 6px #00ff88}
.sdot.err{background:#ff4444;box-shadow:0 0 6px #ff4444}

.cfgbar{
    background:#0d1117;border-bottom:1px solid #00d4ff15;
    padding:8px 20px;display:none;flex-wrap:wrap;gap:12px;
    align-items:center;flex-shrink:0;
}
.cfgbar.open{display:flex}
.cgrp{display:flex;align-items:center;gap:6px}
.cgrp label{font-size:.65rem;color:#5a8a9a;text-transform:uppercase;letter-spacing:1px}
.cgrp select{
    background:#0f1923;border:1px solid #00d4ff33;color:#00d4ff;
    padding:4px 8px;border-radius:4px;font-size:.7rem;cursor:pointer;outline:none;
}
.cgrp select:focus{border-color:#00d4ff}
.ctag{
    font-size:.6rem;padding:3px 8px;border-radius:10px;
    background:#00d4ff15;border:1px solid #00d4ff33;color:#00d4ffaa;
}

.chat{
    flex:1;overflow-y:auto;padding:16px 20px;
    display:flex;flex-direction:column;gap:14px;scroll-behavior:smooth;
}
.chat::-webkit-scrollbar{width:3px}
.chat::-webkit-scrollbar-thumb{background:#00d4ff33;border-radius:2px}

.msg{
    max-width:80%;padding:12px 16px;border-radius:14px;
    font-size:.9rem;line-height:1.6;animation:fadeIn .3s ease-out;
}
@keyframes fadeIn{
    from{opacity:0;transform:translateY(8px)}
    to{opacity:1;transform:translateY(0)}
}
.msg.user{
    align-self:flex-end;background:linear-gradient(135deg,#1a3a5c,#0d2847);
    border:1px solid #00d4ff33;color:#c8e6ff;border-bottom-right-radius:4px;
}
.msg.bot{
    align-self:flex-start;background:linear-gradient(135deg,#141e30,#0f1923);
    border:1px solid #00d4ff22;color:#e0e0e0;border-bottom-left-radius:4px;
}
.msg .lbl{font-size:.58rem;color:#00d4ff88;letter-spacing:2px;margin-bottom:5px;text-transform:uppercase}
.msg .txt{white-space:pre-wrap;word-wrap:break-word}
.msg .actrl{margin-top:8px;display:flex;align-items:center;gap:8px}
.abtn{
    display:inline-flex;align-items:center;gap:4px;
    background:#00d4ff15;border:1px solid #00d4ff33;color:#00d4ff;
    padding:3px 10px;border-radius:10px;cursor:pointer;
    font-size:.65rem;transition:all .2s;
}
.abtn:hover{background:#00d4ff25;border-color:#00d4ff66}
.abtn:disabled{opacity:.3;cursor:wait}
.astat{font-size:.58rem;color:#5a8a9a}

.typi{align-self:flex-start;display:flex;gap:5px;padding:14px 18px}
.typi span{width:7px;height:7px;border-radius:50%;background:#00d4ff;animation:typ 1.4s infinite}
.typi span:nth-child(2){animation-delay:.2s}
.typi span:nth-child(3){animation-delay:.4s}
@keyframes typ{
    0%,60%,100%{opacity:.2;transform:scale(.8)}
    30%{opacity:1;transform:scale(1.1)}
}

.welcome{
    display:flex;flex-direction:column;align-items:center;
    justify-content:center;flex:1;gap:10px;opacity:.5;
}
.welcome .breact{
    width:70px;height:70px;border-radius:50%;
    background:radial-gradient(circle,#00d4ff 0%,#0088aa 35%,#004466 65%,transparent 100%);
    box-shadow:0 0 40px #00d4ff66;animation:pulse 2s ease-in-out infinite;
}
.welcome h2{color:#00d4ff;font-size:1rem;letter-spacing:4px}
.welcome p{color:#5a8a9a;font-size:.75rem}
.welcome .minfo{font-size:.65rem;color:#3a5a6a;margin-top:4px}

.inbar{
    padding:14px 20px;background:linear-gradient(0deg,#0d1b2a,#0a0a1a);
    border-top:1px solid #00d4ff22;flex-shrink:0;
}
.inwrap{display:flex;gap:8px;max-width:900px;margin:0 auto}
#msgIn{
    flex:1;background:#0f1923;border:1px solid #00d4ff33;border-radius:12px;
    padding:11px 16px;color:#e0e0e0;font-size:.9rem;outline:none;
    transition:border-color .3s;font-family:inherit;
}
#msgIn:focus{border-color:#00d4ff88;box-shadow:0 0 12px #00d4ff22}
#msgIn::placeholder{color:#3a5a6a}
#sendBtn{
    background:linear-gradient(135deg,#00d4ff,#0088cc);border:none;border-radius:12px;
    padding:11px 22px;color:#0a0a1a;font-weight:700;cursor:pointer;
    font-size:.8rem;letter-spacing:1px;transition:all .3s;text-transform:uppercase;
}
#sendBtn:hover{box-shadow:0 0 18px #00d4ff66;transform:translateY(-1px)}
#sendBtn:disabled{opacity:.4;cursor:not-allowed;transform:none}
.infoot{
    display:flex;justify-content:space-between;margin-top:5px;
    max-width:900px;margin-left:auto;margin-right:auto;
}
.infoot span{font-size:.6rem;color:#3a5a6a}

@media(max-width:640px){
    .header{padding:10px 12px}
    .htitle h1{font-size:1rem}
    .msg{max-width:92%;font-size:.82rem}
    .chat{padding:10px}
    .inbar{padding:10px}
    .cfgbar{padding:6px 12px}
}
</style>
</head>
<body>

<div class="header">
    <div class="header-left">
        <div class="arc-reactor"></div>
        <div class="htitle">
            <h1>J.A.R.V.I.S.</h1>
            <p>Just A Rather Very Intelligent System</p>
        </div>
    </div>
    <div class="hctrl">
        <div class="sdot" id="sDot"></div>
        <button class="cbtn" id="cfgBtn" onclick="toggleCfg()">⚙ CONFIG</button>
        <button class="cbtn active" id="ttsBtn" onclick="toggleTts()">🔊 VOICE</button>
        <button class="cbtn" onclick="clearChat()">🗑 CLEAR</button>
    </div>
</div>

<div class="cfgbar" id="cfgPanel">
    <div class="cgrp">
        <label>LLM:</label>
        <span class="ctag">Gemma 3 270M-IT</span>
    </div>
    <div class="cgrp">
        <label>TTS:</label>
        <span class="ctag" id="ttsTag">loading...</span>
    </div>
    <div class="cgrp">
        <label>Voice:</label>
        <select id="voiceSel">
            <option value="Kiki">Kiki</option>
            <option value="Bella">Bella</option>
            <option value="Jasper">Jasper</option>
            <option value="Luna">Luna</option>
            <option value="Bruno">Bruno</option>
            <option value="Rosie">Rosie</option>
            <option value="Hugo">Hugo</option>
            <option value="Leo">Leo</option>
        </select>
    </div>
    <div class="cgrp">
        <label>TTS env options:</label>
        <span class="ctag">nano-fp32</span>
        <span class="ctag">nano-int8</span>
        <span class="ctag">micro</span>
        <span class="ctag">mini</span>
    </div>
</div>

<div class="chat" id="chatBox">
    <div class="welcome" id="welc">
        <div class="breact"></div>
        <h2>SYSTEMS ONLINE</h2>
        <p>Type a message below to begin interaction</p>
        <div class="minfo" id="wInfo">Initializing...</div>
    </div>
</div>

<div class="inbar">
    <div class="inwrap">
        <input type="text" id="msgIn" placeholder="Talk to J.A.R.V.I.S..." autocomplete="off"/>
        <button id="sendBtn" onclick="send()">SEND</button>
    </div>
    <div class="infoot">
        <span id="memCt">Memory: 0 turns</span>
        <span id="modInfo">Loading...</span>
    </div>
</div>

<script>
let sid=crypto.randomUUID?crypto.randomUUID():Date.now().toString(36)+Math.random().toString(36).slice(2);
let ttsOn=true,busy=false,mc=0,voice='Kiki';
const C=document.getElementById('chatBox'),I=document.getElementById('msgIn'),B=document.getElementById('sendBtn');

I.addEventListener('keydown',e=>{if(e.key==='Enter'&&!e.shiftKey){e.preventDefault();send()}});
document.getElementById('voiceSel').addEventListener('change',function(){voice=this.value});

function toggleTts(){
    ttsOn=!ttsOn;
    const b=document.getElementById('ttsBtn');
    b.classList.toggle('active',ttsOn);
    b.textContent=ttsOn?'🔊 VOICE':'🔇 MUTE';
}
function toggleCfg(){
    document.getElementById('cfgPanel').classList.toggle('open');
    document.getElementById('cfgBtn').classList.toggle('active');
}

async function send(){
    const t=I.value.trim();
    if(!t||busy)return;
    const w=document.getElementById('welc');
    if(w)w.style.display='none';
    addMsg(t,'user');
    I.value='';busy=true;B.disabled=true;
    const ty=showTyp();
    const id=++mc;
    try{
        const r=await fetch('/chat',{
            method:'POST',headers:{'Content-Type':'application/json'},
            body:JSON.stringify({message:t,session_id:sid})
        });
        if(!r.ok)throw new Error('HTTP '+r.status);
        const d=await r.json();
        ty.remove();
        const el=addBot(d.response,id);
        document.getElementById('memCt').textContent='Memory: '+d.memory_length+' turns';
        if(ttsOn&&d.tts_available)fetchAudio(d.response,el);
    }catch(e){
        ty.remove();
        addBot('System malfunction. Please try again.',id);
        console.error(e);
    }
    busy=false;B.disabled=false;I.focus();
}

async function fetchAudio(text,el){
    const st=el.querySelector('.astat'),pb=el.querySelector('.abtn');
    if(st)st.textContent='⏳ Generating voice...';
    if(pb)pb.disabled=true;
    try{
        const r=await fetch('/tts',{
            method:'POST',headers:{'Content-Type':'application/json'},
            body:JSON.stringify({text:text,voice:voice})
        });
        const d=await r.json();
        if(d.audio){
            if(pb){pb.dataset.audio=d.audio;pb.disabled=false;pb.textContent='▶ Play'}
            if(st)st.textContent='✅ Ready';
            playB64(d.audio);
        }else{
            if(st)st.textContent='⚠️ Voice unavailable';
            if(pb)pb.style.display='none';
        }
    }catch(e){
        if(st)st.textContent='⚠️ Voice error';
        if(pb)pb.style.display='none';
    }
}

function addMsg(t,role){
    const d=document.createElement('div');
    d.className='msg '+role;
    d.innerHTML='<div class="txt">'+esc(t)+'</div>';
    C.appendChild(d);sc();
}
function addBot(t,id){
    const d=document.createElement('div');
    d.className='msg bot';d.id='m'+id;
    d.innerHTML='<div class="lbl">⟐ JARVIS</div><div class="txt">'+esc(t)+'</div>'+
        (ttsOn?'<div class="actrl"><button class="abtn" disabled onclick="replay(this)">⏳</button><span class="astat">Requesting voice...</span></div>':'');
    C.appendChild(d);sc();return d;
}
function showTyp(){
    const d=document.createElement('div');
    d.className='typi';
    d.innerHTML='<span></span><span></span><span></span>';
    C.appendChild(d);sc();return d;
}

function playB64(b){
    try{
        const bin=atob(b),u8=new Uint8Array(bin.length);
        for(let i=0;i<bin.length;i++)u8[i]=bin.charCodeAt(i);
        const url=URL.createObjectURL(new Blob([u8],{type:'audio/wav'}));
        const a=new Audio(url);
        a.play().catch(e=>console.log('Autoplay blocked:',e));
        a.onended=()=>URL.revokeObjectURL(url);
    }catch(e){console.error(e)}
}
function replay(b){if(b.dataset.audio)playB64(b.dataset.audio)}

async function clearChat(){
    await fetch('/clear',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({session_id:sid})});
    C.innerHTML='<div class="welcome" id="welc"><div class="breact"></div><h2>SYSTEMS ONLINE</h2><p>Type a message below to begin</p></div>';
    document.getElementById('memCt').textContent='Memory: 0 turns';
    sid=crypto.randomUUID?crypto.randomUUID():Date.now().toString(36)+Math.random().toString(36).slice(2);
}

function esc(t){const d=document.createElement('div');d.textContent=t;return d.innerHTML}
function sc(){C.scrollTop=C.scrollHeight}

fetch('/health').then(r=>r.json()).then(d=>{
    document.getElementById('ttsTag').textContent=d.tts_mode+(d.tts_model==='DISABLED'?' (OFF)':'');
    document.getElementById('modInfo').textContent='Gemma 3 · '+d.tts_mode+' · '+d.tts_voice+' · CPU';
    const wi=document.getElementById('wInfo');
    if(wi)wi.textContent='LLM: Gemma 3 270M-IT | TTS: '+d.tts_mode+' | Voice: '+d.tts_voice;
    if(d.tts_model==='DISABLED')document.getElementById('sDot').classList.add('err');
    if(d.tts_voice){document.getElementById('voiceSel').value=d.tts_voice;voice=d.tts_voice}
}).catch(()=>{});

I.focus();
</script>
</body>
</html>"""

# ══════════════════════════════════════════
# FLASK APP
# ══════════════════════════════════════════
app = Flask(__name__)

@app.route("/")
def index():
    return HTML_PAGE

@app.route("/chat", methods=["POST"])
def chat():
    data = request.json or {}
    user_input = data.get("message", "").strip()
    session_id = data.get("session_id", str(uuid.uuid4()))

    if not user_input:
        return jsonify({"error": "Empty message"}), 400

    try:
        response = generate_response(user_input, session_id)
    except Exception as e:
        print(f"Generation error: {e}")
        traceback.print_exc()
        response = "I encountered a temporary system malfunction. Please try again."

    return jsonify({
        "response": response,
        "session_id": session_id,
        "tts_available": tts is not None,
        "memory_length": len(get_memory(session_id)),
    })

@app.route("/tts", methods=["POST"])
def tts_endpoint():
    data = request.json or {}
    text = data.get("text", "").strip()
    voice = data.get("voice", TTS_VOICE)

    if not text:
        return jsonify({"error": "Empty text"}), 400
    if tts is None:
        return jsonify({"error": "TTS not available", "audio": None}), 200

    audio_b64 = synthesize_speech(text, voice=voice)
    return jsonify({"audio": audio_b64})

@app.route("/clear", methods=["POST"])
def clear():
    data = request.json or {}
    sid = data.get("session_id", "")
    if sid in sessions:
        del sessions[sid]
    return jsonify({"status": "cleared"})

@app.route("/health")
def health():
    return jsonify({
        "status": "online",
        "llm": "Gemma 3 270M-IT",
        "tts_mode": TTS_MODE,
        "tts_model": tts_model_name if tts else "DISABLED",
        "tts_voice": TTS_VOICE,
        "tts_voices": ["Bella","Jasper","Luna","Bruno","Rosie","Hugo","Kiki","Leo"],
        "max_new_tokens": MAX_NEW_TOKENS,
    })

if __name__ == "__main__":
    print("🚀 Ana is online!")
    app.run(host="0.0.0.0", port=7860, threaded=True)