MezayaAI / app.py
Mezaya's picture
Upload app.py with huggingface_hub
948874b verified
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import os
# Load model and tokenizer from Hugging Face Hub
# This assumes your model is uploaded to your-username/my-qwen-model
# and that MODEL_ID is set correctly either as an env var or hardcoded.
# For Hugging Face Spaces, your repo_id is usually inferred.
# You can also hardcode your model ID if you prefer:
# MODEL_ID = "your-username/my-qwen-2.5-3b-instruct"
MODEL_ID = os.getenv("MODEL_ID", "Qwen/Qwen2.5-3B-Instruct")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
device_map="auto"
)
def hf_chat(system_prompt: str, user_text: str, max_tokens: int = 220) -> str:
prompt = f'''<|system|>
{system_prompt.strip()}
<|user|>
{user_text.strip()}
<|assistant|>
'''
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.inference_mode():
output_ids = model.generate(
**inputs,
max_new_tokens=max_tokens,
do_sample=False,
temperature=0.0,
use_cache=True
)
generated_ids = output_ids[0][len(inputs.input_ids[0]):]
text = tokenizer.decode(generated_ids, skip_special_tokens=True)
for token in ["<|assistant|>", "<|user|>", "<|system|>", "<|im_end|>", "<|im_start|>" ]:
if token in text:
text = text.split(token)[0].strip()
return text.strip()
def predict(message, history, system_prompt_input):
# Reconstruct chat history for the model if needed, or just use current message
# For this simple example, we'll only use the current message and system prompt
response = hf_chat(system_prompt_input, message)
return response
with gr.Blocks() as demo:
gr.Markdown("# MezayaAI Qwen2.5-3B-Instruct Demo")
system_prompt_input = gr.Textbox(label="System Prompt", value="You are a helpful AI assistant.", lines=2)
chatbot = gr.ChatInterface(
predict,
chatbot=gr.Chatbot(height=300),
textbox=gr.Textbox(placeholder="Ask me a question", container=False, scale=7),
# examples=["What is the capital of France?", "Explain quantum physics."],
title="Qwen2.5-3B-Instruct Chat",
description="Ask Qwen2.5-3B-Instruct anything!",
theme="soft",
)
if __name__ == "__main__":
demo.launch(debug=True)