Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import torch | |
| # --- 1. Model Initialization (Loads once when the app starts) --- | |
| print("🔄 Loading the AI model... This will take a moment on the first run.") | |
| # Model name from Hugging Face Hub | |
| model_name = "Qwen/Qwen2.5-1.5B-Instruct" | |
| # Load the tokenizer and model | |
| # We explicitly set `device_map="cpu"` to ensure it runs on the free CPU hardware. | |
| tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| torch_dtype=torch.float32, # Use float32 for CPU stability | |
| device_map="cpu", | |
| trust_remote_code=True | |
| ) | |
| print("✅ AI Model loaded and ready!") | |
| # --- 2. The Core AI Function --- | |
| def chat_with_ai(message, history): | |
| """ | |
| Takes the user's message and chat history, generates a response from the AI model. | |
| """ | |
| # Construct the conversation prompt. The model expects a specific chat format. | |
| # Here we build a simple prompt with the conversation history. | |
| prompt = "" | |
| for user_msg, bot_msg in history: | |
| prompt += f"<|im_start|>user\n{user_msg}<|im_end|>\n" | |
| prompt += f"<|im_start|>assistant\n{bot_msg}<|im_end|>\n" | |
| # Add the current user message | |
| prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n" | |
| # Tokenize the input and generate a response | |
| inputs = tokenizer(prompt, return_tensors="pt").to(model.device) | |
| with torch.no_grad(): # Disable gradient calculation for faster inference | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=512, # Maximum length of the new response | |
| temperature=0.7, # Controls randomness (lower = more deterministic) | |
| do_sample=True, # Enable sampling for more creative responses | |
| pad_token_id=tokenizer.eos_token_id | |
| ) | |
| # Decode only the newly generated tokens (skip the input prompt) | |
| generated_tokens = outputs[0][inputs['input_ids'].shape[1]:] | |
| response = tokenizer.decode(generated_tokens, skip_special_tokens=True) | |
| return response.strip() # Return the clean response | |
| # --- 3. Gradio Interface Setup --- | |
| # gr.ChatInterface provides a perfect, ready-made UI for chatbots. | |
| demo = gr.ChatInterface( | |
| fn=chat_with_ai, | |
| title="🤖 Free AI Assistant on Hugging Face Spaces", | |
| description="Ask me anything! I'm running entirely on a free CPU instance. Be patient, I'm thinking as fast as I can.", | |
| theme="soft", | |
| examples=["What is the capital of France?", "Explain quantum computing in simple terms.", "Write a short poem about coding."] | |
| ) | |
| # --- 4. Launch the App --- | |
| if __name__ == "__main__": | |
| demo.launch() |