Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import requests | |
| import os | |
| HF_TOKEN = os.environ.get("HF_TOKEN", "") | |
| # ✅ CORRECT inference API URL | |
| API_URL = "https://api-inference.huggingface.co/asolomonqa/asmgenius-v1" | |
| HEADERS = { | |
| "Authorization": f"Bearer {HF_TOKEN}", | |
| "Content-Type": "application/json", | |
| } | |
| SYSTEM_PROMPT = """You are AsmGenius, an expert assembly language programming assistant specializing in x86-64 and ARM64 assembly code for Linux. You write correct, complete, and heavily commented assembly code. You understand English prompts even when they contain typos or informal phrasing. You ONLY help with assembly language programming.""" | |
| def generate_asm(user_message, history, max_new_tokens=500, temperature=0.1): | |
| prompt = f"<|system|>\n{SYSTEM_PROMPT}\n" | |
| for turn in history[-3:]: | |
| prompt += f"<|user|>\n{turn[0]}\n<|assistant|>\n{turn[1]}\n" | |
| prompt += f"<|user|>\n{user_message}\n<|assistant|>\n" | |
| payload = { | |
| "inputs": prompt, | |
| "parameters": { | |
| "max_new_tokens": int(max_new_tokens), | |
| "temperature": float(temperature), | |
| "top_p": 0.95, | |
| "repetition_penalty": 1.15, | |
| "do_sample": True, | |
| "return_full_text": False, | |
| }, | |
| "options": { | |
| "wait_for_model": True, | |
| "use_cache": False, | |
| } | |
| } | |
| try: | |
| r = requests.post(API_URL, headers=HEADERS, json=payload, timeout=180) | |
| if r.status_code == 503: | |
| return "Model is loading. Wait 30 seconds and try again." | |
| if r.status_code == 401: | |
| return "Auth failed. Check HF_TOKEN in Space Settings → Secrets." | |
| if r.status_code == 403: | |
| return "Access denied. Make model public or check token permissions." | |
| if r.status_code == 404: | |
| return f"404 — model not found. Check: {API_URL}" | |
| if r.status_code != 200: | |
| return f"Error {r.status_code}: {r.text[:300]}" | |
| result = r.json() | |
| if isinstance(result, list) and len(result) > 0: | |
| return result[0].get("generated_text", "No response").strip() | |
| if isinstance(result, dict): | |
| if "error" in result: | |
| return f"Model error: {result['error']}" | |
| return result.get("generated_text", str(result)).strip() | |
| return str(result).strip() | |
| except requests.Timeout: | |
| return "Timed out. Model may be overloaded. Try again." | |
| except Exception as e: | |
| return f"Request failed: {str(e)}" | |
| def chat(message, history, max_tokens, temperature): | |
| if not message.strip(): | |
| return "", history | |
| response = generate_asm(message, history, max_tokens, temperature) | |
| history.append((message, response)) | |
| return "", history | |
| EXAMPLES = [ | |
| "write a function that adds two numbers in x86-64 assembly", | |
| "show me ARM64 assembly for fibonacci", | |
| "i need x86-64 asm bubble sort on an array", | |
| "writ a hello world in x86-64 nasm", | |
| "how to make a loop from 1 to 10 in arm64", | |
| ] | |
| with gr.Blocks( | |
| theme=gr.themes.Base(primary_hue="orange", neutral_hue="slate"), | |
| title="AsmGenius AI" | |
| ) as demo: | |
| gr.Markdown(""" | |
| # ⚙️ AsmGenius AI | |
| ### x86-64 & ARM64 Assembly Code Expert | |
| Understands typos • Complete code • Every instruction explained | |
| > ⏳ First response takes 30-60s while model loads. | |
| --- | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| chatbot = gr.Chatbot(height=480, label="AsmGenius") | |
| with gr.Row(): | |
| msg_box = gr.Textbox( | |
| placeholder="Ask anything... typos are fine!", | |
| label="Prompt", | |
| lines=3, | |
| scale=5, | |
| ) | |
| with gr.Column(scale=1, min_width=80): | |
| send_btn = gr.Button("Send ▶", variant="primary") | |
| clear_btn = gr.Button("Clear 🗑") | |
| with gr.Column(scale=1): | |
| gr.Markdown("### ⚙️ Settings") | |
| max_tokens = gr.Slider(128, 800, value=500, step=64, | |
| label="Max tokens") | |
| temperature = gr.Slider(0.01, 1.0, value=0.1, step=0.05, | |
| label="Temperature") | |
| gr.Markdown("---") | |
| gr.Markdown("### 💡 Examples") | |
| gr.Examples(examples=[[e] for e in EXAMPLES], inputs=msg_box) | |
| gr.Markdown(""" | |
| --- | |
| ### 📋 Tips | |
| - Typos are **fine** | |
| - Specify **x86-64** or **ARM64** | |
| - Ask to **debug** broken code | |
| - First request: wait ~30-60s | |
| """) | |
| gr.Markdown(""" | |
| --- | |
| **AsmGenius v1** | CodeLlama-7B + QLoRA | 7,167 training examples | |
| """) | |
| send_btn.click(chat, | |
| [msg_box, chatbot, max_tokens, temperature], | |
| [msg_box, chatbot]) | |
| msg_box.submit(chat, | |
| [msg_box, chatbot, max_tokens, temperature], | |
| [msg_box, chatbot]) | |
| clear_btn.click(lambda: ([], []), outputs=[chatbot, chatbot]) | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |