|
|
|
|
|
import gradio as gr |
|
|
from chat import generate_response_stream, reset_conversation, switch_model |
|
|
from config import MODEL_LIST, MAX_TOKENS, TEMPERATURE |
|
|
|
|
|
def chat_interface(user_input, max_tokens, temperature, model_name): |
|
|
switch_model(model_name) |
|
|
return generate_response_stream(user_input, max_length=max_tokens, temperature=temperature) |
|
|
|
|
|
iface = gr.Interface( |
|
|
fn=chat_interface, |
|
|
inputs=[ |
|
|
gr.Textbox(lines=3, placeholder="Type your message here...", label="User Input"), |
|
|
gr.Slider(minimum=50, maximum=2000, step=50, label="Max Tokens", value=MAX_TOKENS), |
|
|
gr.Slider(minimum=0.1, maximum=1.0, step=0.1, label="Temperature", value=TEMPERATURE), |
|
|
gr.Dropdown(list(MODEL_LIST.keys()), label="Choose Model", value="ChatGpt Tune Base") |
|
|
], |
|
|
outputs=gr.Textbox(label="AI Response"), |
|
|
title="ChatGpt Tune Multi-Model Streaming Optimized", |
|
|
description="Chat with multiple Hugging Face/safetensor models with streaming responses and low memory usage!" |
|
|
) |
|
|
|
|
|
reset_iface = gr.Interface( |
|
|
fn=reset_conversation, |
|
|
inputs=[], |
|
|
outputs="text", |
|
|
title="Reset Conversation", |
|
|
description="Clear memory for the current model" |
|
|
) |
|
|
|
|
|
demo = gr.TabbedInterface([iface, reset_iface], ["Chat", "Reset Conversation"]) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch(share=True) |
|
|
|