munyew's picture
fix: audioop stub + lazy llama-cpp, clean UTF-8, no BOM
6316b10 verified
import sys
import types
# Python 3.13 compat: audioop removed; stub it so pydub/gradio can load
try:
import audioop # noqa: F401
except ModuleNotFoundError:
sys.modules["audioop"] = types.ModuleType("audioop")
# Lazy-install llama-cpp-python (avoids build-time OOM/timeout on HF Spaces)
def _ensure_llama_cpp():
try:
import llama_cpp # noqa: F401
except ImportError:
import subprocess
print("Installing llama-cpp-python (first run — may take ~2 min)...", flush=True)
subprocess.check_call([
sys.executable, "-m", "pip", "install", "-q",
"--extra-index-url",
"https://abetlen.github.io/llama-cpp-python/whl/cpu",
"llama-cpp-python",
])
_ensure_llama_cpp()
import gradio as gr
import requests
import os
import time
import psutil
MAX_RAM_MB = 2048
DOWNLOAD_DIR = "/tmp/models"
TEST_PROMPT = "Hi Mina, aiyo today so hot sia"
def check_model_size(url):
try:
head = requests.head(url, allow_redirects=True, timeout=10)
content_length = head.headers.get("content-length")
if content_length:
return int(content_length) / (1024 * 1024), None
except Exception as e:
return None, str(e)
return None, None
def download_model(url):
os.makedirs(DOWNLOAD_DIR, exist_ok=True)
filename = url.split("/")[-1].split("?")[0]
filepath = os.path.join(DOWNLOAD_DIR, filename)
size_mb, err = check_model_size(url)
if err:
return None, f"Cannot reach URL: {err}"
if size_mb and size_mb > MAX_RAM_MB:
return None, (
f"Model too large for Raspberry Pi 4: {size_mb:.1f}MB > 2GB limit\n"
"Use Q2_K quantization to reduce model size."
)
try:
with requests.get(url, stream=True, timeout=30) as r:
r.raise_for_status()
downloaded = 0
with open(filepath, "wb") as f:
for chunk in r.iter_content(chunk_size=65536):
f.write(chunk)
downloaded += len(chunk)
if downloaded / (1024 * 1024) > MAX_RAM_MB:
os.remove(filepath)
return None, "Download exceeded 2GB Raspberry Pi 4 limit"
return filepath, None
except Exception as e:
return None, str(e)
def run_inference(model_url):
if not model_url or not model_url.strip():
return "No URL provided", "", "", "FAIL"
model_url = model_url.strip()
if ".gguf" not in model_url.lower():
return "Only GGUF format supported on Raspberry Pi 4", "", "", "FAIL"
yield "Checking model size...", "", "", "IN PROGRESS"
filepath, error = download_model(model_url)
if error:
yield f"{error}", "", "", "FAIL"
return
try:
from llama_cpp import Llama
yield "Loading model on simulated ARM CPU...", "", "", "IN PROGRESS"
mem_before = psutil.Process().memory_info().rss / (1024 * 1024)
t_start = time.time()
llm = Llama(model_path=filepath, n_ctx=256, n_threads=2, verbose=False)
output = llm(TEST_PROMPT, max_tokens=64, echo=False)
t_end = time.time()
mem_after = psutil.Process().memory_info().rss / (1024 * 1024)
inference_ms = (t_end - t_start) * 1000
memory_used_mb = mem_after - mem_before
output_text = output["choices"][0]["text"].strip()
badge = (
"PASS - Fits on Raspberry Pi 4 (2GB)"
if memory_used_mb <= MAX_RAM_MB
else f"FAIL - Memory {memory_used_mb:.0f}MB exceeded 2GB Pi 4 limit"
)
yield (
f"{inference_ms:.0f} ms",
f"{memory_used_mb:.0f} MB",
output_text,
badge,
)
except Exception as e:
yield "Inference error", "", str(e), "FAIL"
finally:
if filepath and os.path.exists(filepath):
os.remove(filepath)
with gr.Blocks(title="Virtual Raspberry Pi 4", theme=gr.themes.Soft()) as demo:
gr.Markdown(
"# Virtual Raspberry Pi 4\n"
"**Edge AI Test Environment - 2GB RAM Limit**\n\n"
"*IoT / Embedded Linux deployment testing for Project Mina*\n\n"
"> Simulates ARM Cortex-A72 with 2GB RAM. Use Q2_K models."
)
with gr.Row():
model_url_input = gr.Textbox(
label="GGUF Model URL",
placeholder="https://huggingface.co/user/repo/resolve/main/model-q2_k.gguf",
scale=4,
)
run_btn = gr.Button("Run Test", variant="primary", scale=1)
gr.Markdown(f"**Test prompt:** `{TEST_PROMPT}`")
with gr.Row():
inference_time_out = gr.Textbox(label="Inference Time", interactive=False)
memory_used_out = gr.Textbox(label="Memory Used", interactive=False)
output_text_out = gr.Textbox(label="Model Output", interactive=False, lines=4)
status_out = gr.Textbox(label="Result Badge", interactive=False)
run_btn.click(
run_inference,
inputs=[model_url_input],
outputs=[inference_time_out, memory_used_out, output_text_out, status_out],
)
if __name__ == "__main__":
demo.launch()