import os
import asyncio
import wave
import gradio as gr
import tempfile
from groq import Groq
from google import genai
from google.genai import types

BOT_NAME = "Nilla"
MOTOR_NAME = "Nilla-2026 GPT motor"
PROVIDER = "HumanV lab"
POR = os.environ.get("POR")
MODEL_ID = os.environ.get("MODEL_VERSION")
UK_SERVER_API = os.environ.get("UK_SERVER_API")
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")

client_gemini = genai.Client(
    api_key=UK_SERVER_API,
    http_options={"api_version": "v1alpha"}
)
client_groq = Groq(api_key=GROQ_API_KEY)

async def nilla_engine(audio_path, text_input, chat_history):
    if chat_history is None:
        chat_history = []
    
    user_text = ""
    is_voice = False

    if text_input and text_input.strip():
        user_text = text_input
        is_voice = False
    elif audio_path:
        with open(audio_path, "rb") as file:
            transcription = client_groq.audio.transcriptions.create(
                file=(audio_path, file.read()),
                model="whisper-large-v3",
                temperature=0,
                response_format="verbose_json",
            )
        user_text = transcription.text
        is_voice = True
    else:
        return None, "", "", chat_history

    output_path = None
    model_response_text = ""
    current_turns = chat_history + [{"role": "user", "parts": [{"text": user_text}]}]

    config = {
        "response_modalities": ["AUDIO"],
        "system_instruction": POR,
        "enable_affective_dialog": True,
        "output_audio_transcription": {}
    }

    try:
        async with client_gemini.aio.live.connect(model=MODEL_ID, config=config) as session:
            await session.send_client_content(turns=current_turns, turn_complete=True)

            if is_voice:
                temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
                output_path = temp_file.name
                with wave.open(output_path, "wb") as wav:
                    wav.setnchannels(1)
                    wav.setsampwidth(2)
                    wav.setframerate(24000)
                    async for response in session.receive():
                        if response.data:
                            wav.writeframes(response.data)
                        if response.server_content and response.server_content.output_transcription:
                            model_response_text += response.server_content.output_transcription.text
            else:
                async for response in session.receive():
                    if response.server_content and response.server_content.output_transcription:
                        model_response_text += response.server_content.output_transcription.text

        new_history = current_turns + [{"role": "model", "parts": [{"text": model_response_text}]}]
        return output_path, user_text, model_response_text, new_history
    except Exception:
        return None, user_text, "Error", chat_history

def run_interface(audio_file, text_input, chat_history):
    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)
    return loop.run_until_complete(nilla_engine(audio_file, text_input, chat_history))

with gr.Blocks(title=BOT_NAME) as demo:
    history_component = gr.JSON(value=[], visible=False)
    with gr.Row():
        in_audio = gr.Audio(label="Audio", type="filepath")
        in_text = gr.Textbox(label="Text")
    with gr.Row():
        out_audio = gr.Audio(label="Voice Resp")
        out_user = gr.Textbox(label="User Text")
        out_nilla = gr.Textbox(label="Nilla Text")
    
    btn = gr.Button("Process")
    btn.click(
        fn=run_interface,
        inputs=[in_audio, in_text, history_component],
        outputs=[out_audio, out_user, out_nilla, history_component],
        api_name="run_interface"
    )

if __name__ == "__main__":
    demo.launch()