Spaces:

asolomonqa
/

asmgenius

Sleeping

App Files Files Community

asmgenius / app.py

asolomonqa

Fix: correct API URL → api-inference.huggingface.co

fb5f364 verified 20 days ago

raw

history blame contribute delete

5.14 kB


	import gradio as gr
	import requests
	import os

	HF_TOKEN = os.environ.get("HF_TOKEN", "")


	# ✅ CORRECT inference API URL
	API_URL = "https://api-inference.huggingface.co/asolomonqa/asmgenius-v1"

	HEADERS = {
	"Authorization": f"Bearer {HF_TOKEN}",
	"Content-Type": "application/json",
	}

	SYSTEM_PROMPT = """You are AsmGenius, an expert assembly language programming assistant specializing in x86-64 and ARM64 assembly code for Linux. You write correct, complete, and heavily commented assembly code. You understand English prompts even when they contain typos or informal phrasing. You ONLY help with assembly language programming."""

	def generate_asm(user_message, history, max_new_tokens=500, temperature=0.1):
	prompt = f"<\|system\|>\n{SYSTEM_PROMPT}\n"
	for turn in history[-3:]:
	prompt += f"<\|user\|>\n{turn[0]}\n<\|assistant\|>\n{turn[1]}\n"
	prompt += f"<\|user\|>\n{user_message}\n<\|assistant\|>\n"

	payload = {
	"inputs": prompt,
	"parameters": {
	"max_new_tokens": int(max_new_tokens),
	"temperature": float(temperature),
	"top_p": 0.95,
	"repetition_penalty": 1.15,
	"do_sample": True,
	"return_full_text": False,
	},
	"options": {
	"wait_for_model": True,
	"use_cache": False,
	}
	}

	try:
	r = requests.post(API_URL, headers=HEADERS, json=payload, timeout=180)

	if r.status_code == 503:
	return "Model is loading. Wait 30 seconds and try again."
	if r.status_code == 401:
	return "Auth failed. Check HF_TOKEN in Space Settings → Secrets."
	if r.status_code == 403:
	return "Access denied. Make model public or check token permissions."
	if r.status_code == 404:
	return f"404 — model not found. Check: {API_URL}"
	if r.status_code != 200:
	return f"Error {r.status_code}: {r.text[:300]}"

	result = r.json()
	if isinstance(result, list) and len(result) > 0:
	return result[0].get("generated_text", "No response").strip()
	if isinstance(result, dict):
	if "error" in result:
	return f"Model error: {result['error']}"
	return result.get("generated_text", str(result)).strip()
	return str(result).strip()

	except requests.Timeout:
	return "Timed out. Model may be overloaded. Try again."
	except Exception as e:
	return f"Request failed: {str(e)}"

	def chat(message, history, max_tokens, temperature):
	if not message.strip():
	return "", history
	response = generate_asm(message, history, max_tokens, temperature)
	history.append((message, response))
	return "", history

	EXAMPLES = [
	"write a function that adds two numbers in x86-64 assembly",
	"show me ARM64 assembly for fibonacci",
	"i need x86-64 asm bubble sort on an array",
	"writ a hello world in x86-64 nasm",
	"how to make a loop from 1 to 10 in arm64",
	]

	with gr.Blocks(
	theme=gr.themes.Base(primary_hue="orange", neutral_hue="slate"),
	title="AsmGenius AI"
	) as demo:

	gr.Markdown("""
	# ⚙️ AsmGenius AI
	### x86-64 & ARM64 Assembly Code Expert
	Understands typos • Complete code • Every instruction explained
	> ⏳ First response takes 30-60s while model loads.
	---
	""")

	with gr.Row():
	with gr.Column(scale=3):
	chatbot = gr.Chatbot(height=480, label="AsmGenius")
	with gr.Row():
	msg_box = gr.Textbox(
	placeholder="Ask anything... typos are fine!",
	label="Prompt",
	lines=3,
	scale=5,
	)
	with gr.Column(scale=1, min_width=80):
	send_btn = gr.Button("Send ▶", variant="primary")
	clear_btn = gr.Button("Clear 🗑")

	with gr.Column(scale=1):
	gr.Markdown("### ⚙️ Settings")
	max_tokens = gr.Slider(128, 800, value=500, step=64,
	label="Max tokens")
	temperature = gr.Slider(0.01, 1.0, value=0.1, step=0.05,
	label="Temperature")
	gr.Markdown("---")
	gr.Markdown("### 💡 Examples")
	gr.Examples(examples=[[e] for e in EXAMPLES], inputs=msg_box)
	gr.Markdown("""
	---
	### 📋 Tips
	- Typos are fine
	- Specify x86-64 or ARM64
	- Ask to debug broken code
	- First request: wait ~30-60s
	""")

	gr.Markdown("""
	---
	AsmGenius v1 \| CodeLlama-7B + QLoRA \| 7,167 training examples
	""")

	send_btn.click(chat,
	[msg_box, chatbot, max_tokens, temperature],
	[msg_box, chatbot])
	msg_box.submit(chat,
	[msg_box, chatbot, max_tokens, temperature],
	[msg_box, chatbot])
	clear_btn.click(lambda: ([], []), outputs=[chatbot, chatbot])

	demo.launch(server_name="0.0.0.0", server_port=7860)