STiFLeR7's picture
Update app.py
f14cf8c verified
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
model_id = "STiFLeR7/Qwen2.5-3B-GPTQ" # ✅ Your HF model repo
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
model_id,
device_map="auto",
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
trust_remote_code=True
).eval()
def chat_fn(message, history):
history = history or []
prompt = ""
for user, bot in history:
prompt += f"User: {user}\nAssistant: {bot}\n"
prompt += f"User: {message}\nAssistant:"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
output = model.generate(
**inputs,
max_new_tokens=256,
temperature=0.7,
top_p=0.9,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
decoded = tokenizer.decode(output[0], skip_special_tokens=True)
reply = decoded.split("Assistant:")[-1].strip()
history.append((message, reply))
return history, history
demo = gr.ChatInterface(
fn=chat_fn,
title="🧠 Qwen2.5-3B GPTQ Chatbot",
description="Running Qwen2.5-3B (GPTQ) from Hugging Face model repository",
theme="soft",
)
if __name__ == "__main__":
demo.launch()