Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| import spaces | |
| import tempfile | |
| import os | |
| import re | |
| from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig | |
| import time | |
| # ───────────────────────────────────────────── | |
| # CONFIGURATION: Small models that fit in 2 min | |
| # All models are free-to-use, no API key needed | |
| # ───────────────────────────────────────────── | |
| MODELS = { | |
| "Qwen2.5-Coder-1.5B (Fast)": "Qwen/Qwen2.5-Coder-1.5B-Instruct", | |
| "Qwen2.5-Coder-3B (Better)": "Qwen/Qwen2.5-Coder-3B-Instruct", | |
| "Phi-3-Mini (Lightweight)": "microsoft/phi-3-mini-4k-instruct", | |
| "Gemma 4 4B (Medium) - Budget-Friendly": "google/gemma-4-E4B-it", | |
| "Gemma 4 2B (Light) - Budget-Friendly": "google/gemma-4-E2B-it" | |
| } | |
| # Cache models at module level — load once, reuse | |
| _model_cache = {} | |
| _tokenizer_cache = {} | |
| def load_model_cached(model_id: str): | |
| """Load model once and cache it.""" | |
| if model_id not in _model_cache: | |
| print(f"Loading {model_id}...") | |
| # Use 8-bit quantization to fit smaller VRAM | |
| quant_config = BitsAndBytesConfig( | |
| load_in_8bit=True, | |
| bnb_8bit_compute_dtype=torch.bfloat16 | |
| ) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| device_map="auto", | |
| quantization_config=quant_config, | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| _model_cache[model_id] = model | |
| _tokenizer_cache[model_id] = tokenizer | |
| print(f"✓ {model_id} loaded successfully") | |
| return _model_cache[model_id], _tokenizer_cache[model_id] | |
| def call_llm(model, tokenizer, system: str, user: str, max_tokens: int = 256) -> str: | |
| """Single LLM call with strict token limit.""" | |
| # Build messages | |
| messages = [ | |
| {"role": "system", "content": system}, | |
| {"role": "user", "content": user}, | |
| ] | |
| # Format for model | |
| text = tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=False, | |
| add_generation_prompt=True | |
| ) | |
| # Tokenize | |
| inputs = tokenizer.encode(text, return_tensors="pt").to(model.device) | |
| # Generate with strict token limit | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| inputs, | |
| max_new_tokens=max_tokens, | |
| temperature=0.2, | |
| top_p=0.9, | |
| do_sample=True, | |
| pad_token_id=tokenizer.eos_token_id, | |
| ) | |
| # Decode | |
| response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # Extract only the assistant's response (remove system + user) | |
| if "assistant" in response: | |
| response = response.split("assistant")[-1].strip() | |
| return response | |
| def clean_code(raw: str) -> str: | |
| """Strip markdown fences from generated code.""" | |
| return re.sub(r"```(cpp|ino|arduino|c\+\+)?", "", raw).replace("```", "").strip() | |
| # ───────────────────────────────────────────── | |
| # CORE AGENTIC WORKFLOW | |
| # All inference is LOCAL on Spaces GPU | |
| # Model is cached so it loads only once | |
| # ───────────────────────────────────────────── | |
| def agentic_workflow(idea: str, model_id: str, progress=gr.Progress()): | |
| """3-phase agentic workflow with local inference.""" | |
| if not idea.strip(): | |
| raise gr.Error("Please describe your project idea first.") | |
| # Load model once (cached) | |
| model, tokenizer = load_model_cached(model_id) | |
| # ── PHASE 1: ARCHITECT ────────────────────────────────────── | |
| progress(0.05, desc="Phase 1/3 — Architecting hardware spec...") | |
| try: | |
| spec = call_llm( | |
| model, tokenizer, | |
| system=( | |
| "You are a Senior Hardware Architect. Write a concise technical spec: " | |
| "list microcontroller, sensors, actuators, power requirements. Use bullet points." | |
| ), | |
| user=idea, | |
| max_tokens=300, # Short spec | |
| ) | |
| except Exception as e: | |
| raise gr.Error(f"Phase 1 failed: {str(e)}") | |
| # ── PHASE 2: ENGINEER ─────────────────────────────────────── | |
| progress(0.40, desc="Phase 2/3 — Writing firmware code...") | |
| try: | |
| code_raw = call_llm( | |
| model, tokenizer, | |
| system=( | |
| "You are a Lead Firmware Engineer. Write complete Arduino .ino code " | |
| "based on this spec. Include #include, pins, setup(), loop(). " | |
| "Output ONLY code — no explanation." | |
| ), | |
| user=f"Spec:\n{spec}", | |
| max_tokens=600, # Larger for code | |
| ) | |
| code = clean_code(code_raw) | |
| except Exception as e: | |
| raise gr.Error(f"Phase 2 failed: {str(e)}") | |
| # ── PHASE 3: DESIGNER ─────────────────────────────────────── | |
| progress(0.75, desc="Phase 3/3 — Generating wiring guide...") | |
| try: | |
| wiring = call_llm( | |
| model, tokenizer, | |
| system=( | |
| "You are a Hardware Designer. Given Arduino code, produce a wiring table " | |
| "with columns: Component | Pin | Arduino Pin | Notes. Be precise." | |
| ), | |
| user=f"Code:\n{code[:500]}", # Use first 500 chars of code to save tokens | |
| max_tokens=300, | |
| ) | |
| except Exception as e: | |
| raise gr.Error(f"Phase 3 failed: {str(e)}") | |
| # Save .ino file | |
| progress(0.95, desc="Packaging files...") | |
| path = os.path.join(tempfile.gettempdir(), "project_sketch.ino") | |
| with open(path, "w") as f: | |
| f.write(code) | |
| progress(1.0, desc="✅ Done!") | |
| return spec, code, wiring, path | |
| # ───────────────────────────────────────────── | |
| # GRADIO UI | |
| # ───────────────────────────────────────────── | |
| theme = gr.themes.Soft(primary_hue="blue", secondary_hue="slate").set( | |
| button_primary_background_fill="*primary_600", | |
| button_primary_background_fill_hover="*primary_500", | |
| ) | |
| with gr.Blocks(title="AI Hardware Lab", theme=theme) as demo: | |
| gr.Markdown(""" | |
| # 🤖 AI Hardware Agent | |
| ### Local inference on free HF Spaces GPU — no credit card needed! | |
| ⚡ **3-phase pipeline:** Architect → Engineer → Wiring Designer | |
| 💡 **Models:** Small, fast, local — runs entirely on Spaces free GPU | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| idea_input = gr.Textbox( | |
| label="📝 Describe your project", | |
| placeholder="e.g. A PID-controlled balancing robot using MPU6050 and DC motors...", | |
| lines=3, | |
| ) | |
| model_sel = gr.Dropdown( | |
| choices=list(MODELS.values()), | |
| value=list(MODELS.values())[1], # Default to 3B | |
| label="🧠 Model (larger = better but slower)", | |
| ) | |
| build_btn = gr.Button("🚀 Build Hardware Package", variant="primary", size="lg") | |
| gr.Examples( | |
| examples=[ | |
| ["A soil moisture sensor with ESP32 that triggers a water pump when soil is dry"], | |
| ["A distance-controlled synthesizer using ultrasonic sensors and tone library"], | |
| ["A temperature-logged data logger with SD card using Arduino and DHT22"], | |
| ["A servo-based robotic arm with 4 joints controlled via joystick"], | |
| ], | |
| inputs=idea_input, | |
| ) | |
| with gr.Column(scale=1): | |
| gr.Markdown(""" | |
| ### 📥 Export | |
| **💚 Free models** — no API key needed | |
| **⏱️ Fits in 2 min:** Optimized for Spaces free GPU | |
| **🔗 Tip:** Start with Qwen-3B for best quality/speed balance | |
| """) | |
| download_btn = gr.DownloadButton("📥 Download .ino", visible=False) | |
| with gr.Tabs(): | |
| with gr.TabItem("🛠️ Firmware Code"): | |
| code_out = gr.Code(language="cpp", show_label=False) | |
| with gr.TabItem("🔌 Wiring Guide"): | |
| wiring_out = gr.Markdown() | |
| with gr.TabItem("📋 Technical Spec"): | |
| spec_out = gr.Markdown() | |
| # Event handler | |
| def on_success(spec, code, wiring, file_path): | |
| return spec, code, wiring, gr.update(value=file_path, visible=True) | |
| build_btn.click( | |
| fn=agentic_workflow, | |
| inputs=[idea_input, model_sel], | |
| outputs=[spec_out, code_out, wiring_out, download_btn], | |
| api_name="generate", | |
| ) | |
| demo.launch() |