import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import torch import os # Load model and tokenizer from Hugging Face Hub # This assumes your model is uploaded to your-username/my-qwen-model # and that MODEL_ID is set correctly either as an env var or hardcoded. # For Hugging Face Spaces, your repo_id is usually inferred. # You can also hardcode your model ID if you prefer: # MODEL_ID = "your-username/my-qwen-2.5-3b-instruct" MODEL_ID = os.getenv("MODEL_ID", "Qwen/Qwen2.5-3B-Instruct") tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( MODEL_ID, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto" ) def hf_chat(system_prompt: str, user_text: str, max_tokens: int = 220) -> str: prompt = f'''<|system|> {system_prompt.strip()} <|user|> {user_text.strip()} <|assistant|> ''' inputs = tokenizer(prompt, return_tensors="pt").to(model.device) with torch.inference_mode(): output_ids = model.generate( **inputs, max_new_tokens=max_tokens, do_sample=False, temperature=0.0, use_cache=True ) generated_ids = output_ids[0][len(inputs.input_ids[0]):] text = tokenizer.decode(generated_ids, skip_special_tokens=True) for token in ["<|assistant|>", "<|user|>", "<|system|>", "<|im_end|>", "<|im_start|>" ]: if token in text: text = text.split(token)[0].strip() return text.strip() def predict(message, history, system_prompt_input): # Reconstruct chat history for the model if needed, or just use current message # For this simple example, we'll only use the current message and system prompt response = hf_chat(system_prompt_input, message) return response with gr.Blocks() as demo: gr.Markdown("# MezayaAI Qwen2.5-3B-Instruct Demo") system_prompt_input = gr.Textbox(label="System Prompt", value="You are a helpful AI assistant.", lines=2) chatbot = gr.ChatInterface( predict, chatbot=gr.Chatbot(height=300), textbox=gr.Textbox(placeholder="Ask me a question", container=False, scale=7), # examples=["What is the capital of France?", "Explain quantum physics."], title="Qwen2.5-3B-Instruct Chat", description="Ask Qwen2.5-3B-Instruct anything!", theme="soft", ) if __name__ == "__main__": demo.launch(debug=True)