hypr1 / app.py
Hyprlyf's picture
Update app.py
55455c6 verified
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
# Device
device = "cuda" if torch.cuda.is_available() else "cpu"
# Load Model
model_name = "Qwen/Qwen3-0.6B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
device_map="auto"
)
# Chat function
def chat_with_model(user_input, history=[]):
# Combine history into context
context = ""
for h in history:
context += f"User: {h[0]}\nAssistant: {h[1]}\n"
context += f"User: {user_input}\nAssistant:"
inputs = tokenizer(context, return_tensors="pt").to(device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=256,
temperature=0.7,
top_p=0.9,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Extract only assistant's last reply
if "Assistant:" in response:
reply = response.split("Assistant:")[-1].strip()
else:
reply = response.strip()
history.append((user_input, reply))
return history, history
# Gradio Chatbot UI
with gr.Blocks() as demo:
gr.Markdown("# 🤖 Hyprlyf/hypr1-instruct Chatbot")
chatbot = gr.Chatbot()
msg = gr.Textbox(placeholder="Type your message here...")
clear = gr.Button("Clear")
state = gr.State([])
def respond(message, state):
state, updated_history = chat_with_model(message, state)
return updated_history, state
msg.submit(respond, [msg, state], [chatbot, state])
clear.click(lambda: ([], []), None, [chatbot, state])
demo.launch(share=True)