# app.py
import gradio as gr
from chat import generate_response_stream, reset_conversation, switch_model
from config import MODEL_LIST, MAX_TOKENS, TEMPERATURE

def chat_interface(user_input, max_tokens, temperature, model_name):
    switch_model(model_name)
    return generate_response_stream(user_input, max_length=max_tokens, temperature=temperature)

iface = gr.Interface(
    fn=chat_interface,
    inputs=[
        gr.Textbox(lines=3, placeholder="Type your message here...", label="User Input"),
        gr.Slider(minimum=50, maximum=2000, step=50, label="Max Tokens", value=MAX_TOKENS),
        gr.Slider(minimum=0.1, maximum=1.0, step=0.1, label="Temperature", value=TEMPERATURE),
        gr.Dropdown(list(MODEL_LIST.keys()), label="Choose Model", value="ChatGpt Tune Base")
    ],
    outputs=gr.Textbox(label="AI Response"),
    title="ChatGpt Tune Multi-Model Streaming Optimized",
    description="Chat with multiple Hugging Face/safetensor models with streaming responses and low memory usage!"
)

reset_iface = gr.Interface(
    fn=reset_conversation,
    inputs=[],
    outputs="text",
    title="Reset Conversation",
    description="Clear memory for the current model"
)

demo = gr.TabbedInterface([iface, reset_iface], ["Chat", "Reset Conversation"])

if __name__ == "__main__":
    demo.launch(share=True)