import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch # --- 1. Model Initialization (Loads once when the app starts) --- print("🔄 Loading the AI model... This will take a moment on the first run.") # Model name from Hugging Face Hub model_name = "Qwen/Qwen2.5-1.5B-Instruct" # Load the tokenizer and model # We explicitly set `device_map="cpu"` to ensure it runs on the free CPU hardware. tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float32, # Use float32 for CPU stability device_map="cpu", trust_remote_code=True ) print("✅ AI Model loaded and ready!") # --- 2. The Core AI Function --- def chat_with_ai(message, history): """ Takes the user's message and chat history, generates a response from the AI model. """ # Construct the conversation prompt. The model expects a specific chat format. # Here we build a simple prompt with the conversation history. prompt = "" for user_msg, bot_msg in history: prompt += f"<|im_start|>user\n{user_msg}<|im_end|>\n" prompt += f"<|im_start|>assistant\n{bot_msg}<|im_end|>\n" # Add the current user message prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n" # Tokenize the input and generate a response inputs = tokenizer(prompt, return_tensors="pt").to(model.device) with torch.no_grad(): # Disable gradient calculation for faster inference outputs = model.generate( **inputs, max_new_tokens=512, # Maximum length of the new response temperature=0.7, # Controls randomness (lower = more deterministic) do_sample=True, # Enable sampling for more creative responses pad_token_id=tokenizer.eos_token_id ) # Decode only the newly generated tokens (skip the input prompt) generated_tokens = outputs[0][inputs['input_ids'].shape[1]:] response = tokenizer.decode(generated_tokens, skip_special_tokens=True) return response.strip() # Return the clean response # --- 3. Gradio Interface Setup --- # gr.ChatInterface provides a perfect, ready-made UI for chatbots. demo = gr.ChatInterface( fn=chat_with_ai, title="🤖 Free AI Assistant on Hugging Face Spaces", description="Ask me anything! I'm running entirely on a free CPU instance. Be patient, I'm thinking as fast as I can.", theme="soft", examples=["What is the capital of France?", "Explain quantum computing in simple terms.", "Write a short poem about coding."] ) # --- 4. Launch the App --- if __name__ == "__main__": demo.launch()