Tamal321's picture
Library mismatch fixed
538ff5b
import gradio as gr
import torch
import spaces
import tempfile
import os
import re
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import time
# ─────────────────────────────────────────────
# CONFIGURATION: Small models that fit in 2 min
# All models are free-to-use, no API key needed
# ─────────────────────────────────────────────
MODELS = {
"Qwen2.5-Coder-1.5B (Fast)": "Qwen/Qwen2.5-Coder-1.5B-Instruct",
"Qwen2.5-Coder-3B (Better)": "Qwen/Qwen2.5-Coder-3B-Instruct",
"Phi-3-Mini (Lightweight)": "microsoft/phi-3-mini-4k-instruct",
"Gemma 4 4B (Medium) - Budget-Friendly": "google/gemma-4-E4B-it",
"Gemma 4 2B (Light) - Budget-Friendly": "google/gemma-4-E2B-it"
}
# Cache models at module level — load once, reuse
_model_cache = {}
_tokenizer_cache = {}
@spaces.GPU(duration=120)
def load_model_cached(model_id: str):
"""Load model once and cache it."""
if model_id not in _model_cache:
print(f"Loading {model_id}...")
# Use 8-bit quantization to fit smaller VRAM
quant_config = BitsAndBytesConfig(
load_in_8bit=True,
bnb_8bit_compute_dtype=torch.bfloat16
)
model = AutoModelForCausalLM.from_pretrained(
model_id,
device_map="auto",
quantization_config=quant_config,
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
_model_cache[model_id] = model
_tokenizer_cache[model_id] = tokenizer
print(f"✓ {model_id} loaded successfully")
return _model_cache[model_id], _tokenizer_cache[model_id]
def call_llm(model, tokenizer, system: str, user: str, max_tokens: int = 256) -> str:
"""Single LLM call with strict token limit."""
# Build messages
messages = [
{"role": "system", "content": system},
{"role": "user", "content": user},
]
# Format for model
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
# Tokenize
inputs = tokenizer.encode(text, return_tensors="pt").to(model.device)
# Generate with strict token limit
with torch.no_grad():
outputs = model.generate(
inputs,
max_new_tokens=max_tokens,
temperature=0.2,
top_p=0.9,
do_sample=True,
pad_token_id=tokenizer.eos_token_id,
)
# Decode
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Extract only the assistant's response (remove system + user)
if "assistant" in response:
response = response.split("assistant")[-1].strip()
return response
def clean_code(raw: str) -> str:
"""Strip markdown fences from generated code."""
return re.sub(r"```(cpp|ino|arduino|c\+\+)?", "", raw).replace("```", "").strip()
# ─────────────────────────────────────────────
# CORE AGENTIC WORKFLOW
# All inference is LOCAL on Spaces GPU
# Model is cached so it loads only once
# ─────────────────────────────────────────────
@spaces.GPU(duration=120)
def agentic_workflow(idea: str, model_id: str, progress=gr.Progress()):
"""3-phase agentic workflow with local inference."""
if not idea.strip():
raise gr.Error("Please describe your project idea first.")
# Load model once (cached)
model, tokenizer = load_model_cached(model_id)
# ── PHASE 1: ARCHITECT ──────────────────────────────────────
progress(0.05, desc="Phase 1/3 — Architecting hardware spec...")
try:
spec = call_llm(
model, tokenizer,
system=(
"You are a Senior Hardware Architect. Write a concise technical spec: "
"list microcontroller, sensors, actuators, power requirements. Use bullet points."
),
user=idea,
max_tokens=300, # Short spec
)
except Exception as e:
raise gr.Error(f"Phase 1 failed: {str(e)}")
# ── PHASE 2: ENGINEER ───────────────────────────────────────
progress(0.40, desc="Phase 2/3 — Writing firmware code...")
try:
code_raw = call_llm(
model, tokenizer,
system=(
"You are a Lead Firmware Engineer. Write complete Arduino .ino code "
"based on this spec. Include #include, pins, setup(), loop(). "
"Output ONLY code — no explanation."
),
user=f"Spec:\n{spec}",
max_tokens=600, # Larger for code
)
code = clean_code(code_raw)
except Exception as e:
raise gr.Error(f"Phase 2 failed: {str(e)}")
# ── PHASE 3: DESIGNER ───────────────────────────────────────
progress(0.75, desc="Phase 3/3 — Generating wiring guide...")
try:
wiring = call_llm(
model, tokenizer,
system=(
"You are a Hardware Designer. Given Arduino code, produce a wiring table "
"with columns: Component | Pin | Arduino Pin | Notes. Be precise."
),
user=f"Code:\n{code[:500]}", # Use first 500 chars of code to save tokens
max_tokens=300,
)
except Exception as e:
raise gr.Error(f"Phase 3 failed: {str(e)}")
# Save .ino file
progress(0.95, desc="Packaging files...")
path = os.path.join(tempfile.gettempdir(), "project_sketch.ino")
with open(path, "w") as f:
f.write(code)
progress(1.0, desc="✅ Done!")
return spec, code, wiring, path
# ─────────────────────────────────────────────
# GRADIO UI
# ─────────────────────────────────────────────
theme = gr.themes.Soft(primary_hue="blue", secondary_hue="slate").set(
button_primary_background_fill="*primary_600",
button_primary_background_fill_hover="*primary_500",
)
with gr.Blocks(title="AI Hardware Lab", theme=theme) as demo:
gr.Markdown("""
# 🤖 AI Hardware Agent
### Local inference on free HF Spaces GPU — no credit card needed!
⚡ **3-phase pipeline:** Architect → Engineer → Wiring Designer
💡 **Models:** Small, fast, local — runs entirely on Spaces free GPU
""")
with gr.Row():
with gr.Column(scale=2):
idea_input = gr.Textbox(
label="📝 Describe your project",
placeholder="e.g. A PID-controlled balancing robot using MPU6050 and DC motors...",
lines=3,
)
model_sel = gr.Dropdown(
choices=list(MODELS.values()),
value=list(MODELS.values())[1], # Default to 3B
label="🧠 Model (larger = better but slower)",
)
build_btn = gr.Button("🚀 Build Hardware Package", variant="primary", size="lg")
gr.Examples(
examples=[
["A soil moisture sensor with ESP32 that triggers a water pump when soil is dry"],
["A distance-controlled synthesizer using ultrasonic sensors and tone library"],
["A temperature-logged data logger with SD card using Arduino and DHT22"],
["A servo-based robotic arm with 4 joints controlled via joystick"],
],
inputs=idea_input,
)
with gr.Column(scale=1):
gr.Markdown("""
### 📥 Export
**💚 Free models** — no API key needed
**⏱️ Fits in 2 min:** Optimized for Spaces free GPU
**🔗 Tip:** Start with Qwen-3B for best quality/speed balance
""")
download_btn = gr.DownloadButton("📥 Download .ino", visible=False)
with gr.Tabs():
with gr.TabItem("🛠️ Firmware Code"):
code_out = gr.Code(language="cpp", show_label=False)
with gr.TabItem("🔌 Wiring Guide"):
wiring_out = gr.Markdown()
with gr.TabItem("📋 Technical Spec"):
spec_out = gr.Markdown()
# Event handler
def on_success(spec, code, wiring, file_path):
return spec, code, wiring, gr.update(value=file_path, visible=True)
build_btn.click(
fn=agentic_workflow,
inputs=[idea_input, model_sel],
outputs=[spec_out, code_out, wiring_out, download_btn],
api_name="generate",
)
demo.launch()