Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import torch | |
| MODEL_NAME = "Qwen/Qwen2.5-Coder-0.5B-Instruct" | |
| print("Loading model...") | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_NAME, | |
| device_map="cpu", | |
| torch_dtype=torch.float32, | |
| low_cpu_mem_usage=True | |
| ) | |
| model.eval() | |
| print("Model loaded!") | |
| def generate_code(prompt): | |
| if not prompt.strip(): | |
| return "Please enter a prompt." | |
| messages = [ | |
| {"role": "system", "content": "You are a helpful coding assistant. Output only clean code without explanations nor anything else. Code in HTML."}, | |
| {"role": "user", "content": prompt} | |
| ] | |
| text = tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=False, | |
| add_generation_prompt=True | |
| ) | |
| inputs = tokenizer(text, return_tensors="pt") | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=1000, # keeps it fast | |
| do_sample=False # faster + more stable | |
| ) | |
| result = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # Extract only assistant output | |
| if "assistant" in result: | |
| result = result.split("assistant")[-1] | |
| return result.strip() | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# π» AI Code Generator (Local CPU)") | |
| gr.Markdown("Fast, simple, and runs fully locally π") | |
| prompt = gr.Textbox( | |
| label="Your Prompt", | |
| placeholder="e.g. Create a Python calculator", | |
| lines=4 | |
| ) | |
| generate_btn = gr.Button("Generate Code") | |
| output = gr.Code( | |
| label="Generated Code", | |
| language="python" | |
| ) | |
| generate_btn.click( | |
| fn=generate_code, | |
| inputs=prompt, | |
| outputs=output | |
| ) | |
| gr.Markdown("π Use the built-in copy button in the code box!") | |
| demo.launch() |