File size: 1,826 Bytes
7f38d2b 2a31955 1a1c36c 7f38d2b c8c11f1 7f38d2b 1a1c36c c8c11f1 55455c6 c8c11f1 2a31955 c8c11f1 7f38d2b c8c11f1 2a31955 b2c55a0 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 | import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
# Device
device = "cuda" if torch.cuda.is_available() else "cpu"
# Load Model
model_name = "Qwen/Qwen3-0.6B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
device_map="auto"
)
# Chat function
def chat_with_model(user_input, history=[]):
# Combine history into context
context = ""
for h in history:
context += f"User: {h[0]}\nAssistant: {h[1]}\n"
context += f"User: {user_input}\nAssistant:"
inputs = tokenizer(context, return_tensors="pt").to(device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=256,
temperature=0.7,
top_p=0.9,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Extract only assistant's last reply
if "Assistant:" in response:
reply = response.split("Assistant:")[-1].strip()
else:
reply = response.strip()
history.append((user_input, reply))
return history, history
# Gradio Chatbot UI
with gr.Blocks() as demo:
gr.Markdown("# 🤖 Hyprlyf/hypr1-instruct Chatbot")
chatbot = gr.Chatbot()
msg = gr.Textbox(placeholder="Type your message here...")
clear = gr.Button("Clear")
state = gr.State([])
def respond(message, state):
state, updated_history = chat_with_model(message, state)
return updated_history, state
msg.submit(respond, [msg, state], [chatbot, state])
clear.click(lambda: ([], []), None, [chatbot, state])
demo.launch(share=True) |