import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import torch MODEL_NAME = "Qwen/Qwen2.5-Coder-0.5B-Instruct" print("Loading model...") tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, device_map="cpu", torch_dtype=torch.float32, low_cpu_mem_usage=True ) model.eval() print("Model loaded!") def generate_code(prompt): if not prompt.strip(): return "Please enter a prompt." messages = [ {"role": "system", "content": "You are a helpful coding assistant. Output only clean code without explanations nor anything else. Code in HTML."}, {"role": "user", "content": prompt} ] text = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) inputs = tokenizer(text, return_tensors="pt") with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=1000, # keeps it fast do_sample=False # faster + more stable ) result = tokenizer.decode(outputs[0], skip_special_tokens=True) # Extract only assistant output if "assistant" in result: result = result.split("assistant")[-1] return result.strip() with gr.Blocks() as demo: gr.Markdown("# 💻 AI Code Generator (Local CPU)") gr.Markdown("Fast, simple, and runs fully locally 🚀") prompt = gr.Textbox( label="Your Prompt", placeholder="e.g. Create a Python calculator", lines=4 ) generate_btn = gr.Button("Generate Code") output = gr.Code( label="Generated Code", language="python" ) generate_btn.click( fn=generate_code, inputs=prompt, outputs=output ) gr.Markdown("📋 Use the built-in copy button in the code box!") demo.launch()