# app.py import gradio as gr from chat import generate_response_stream, reset_conversation, switch_model from config import MODEL_LIST, MAX_TOKENS, TEMPERATURE def chat_interface(user_input, max_tokens, temperature, model_name): switch_model(model_name) return generate_response_stream(user_input, max_length=max_tokens, temperature=temperature) iface = gr.Interface( fn=chat_interface, inputs=[ gr.Textbox(lines=3, placeholder="Type your message here...", label="User Input"), gr.Slider(minimum=50, maximum=2000, step=50, label="Max Tokens", value=MAX_TOKENS), gr.Slider(minimum=0.1, maximum=1.0, step=0.1, label="Temperature", value=TEMPERATURE), gr.Dropdown(list(MODEL_LIST.keys()), label="Choose Model", value="ChatGpt Tune Base") ], outputs=gr.Textbox(label="AI Response"), title="ChatGpt Tune Multi-Model Streaming Optimized", description="Chat with multiple Hugging Face/safetensor models with streaming responses and low memory usage!" ) reset_iface = gr.Interface( fn=reset_conversation, inputs=[], outputs="text", title="Reset Conversation", description="Clear memory for the current model" ) demo = gr.TabbedInterface([iface, reset_iface], ["Chat", "Reset Conversation"]) if __name__ == "__main__": demo.launch(share=True)