Spaces:

munyew
/

mina-test-raspberry-pi

Sleeping

App Files Files Community

mina-test-raspberry-pi / app.py

munyew

fix: audioop stub + lazy llama-cpp, clean UTF-8, no BOM

6316b10 verified 26 days ago

raw

history blame contribute delete

5.21 kB

	import sys
	import types

	# Python 3.13 compat: audioop removed; stub it so pydub/gradio can load
	try:
	import audioop # noqa: F401
	except ModuleNotFoundError:
	sys.modules["audioop"] = types.ModuleType("audioop")

	# Lazy-install llama-cpp-python (avoids build-time OOM/timeout on HF Spaces)
	def _ensure_llama_cpp():
	try:
	import llama_cpp # noqa: F401
	except ImportError:
	import subprocess
	print("Installing llama-cpp-python (first run — may take ~2 min)...", flush=True)
	subprocess.check_call([
	sys.executable, "-m", "pip", "install", "-q",
	"--extra-index-url",
	"https://abetlen.github.io/llama-cpp-python/whl/cpu",
	"llama-cpp-python",
	])

	_ensure_llama_cpp()

	import gradio as gr
	import requests
	import os
	import time
	import psutil

	MAX_RAM_MB = 2048
	DOWNLOAD_DIR = "/tmp/models"
	TEST_PROMPT = "Hi Mina, aiyo today so hot sia"


	def check_model_size(url):
	try:
	head = requests.head(url, allow_redirects=True, timeout=10)
	content_length = head.headers.get("content-length")
	if content_length:
	return int(content_length) / (1024 * 1024), None
	except Exception as e:
	return None, str(e)
	return None, None


	def download_model(url):
	os.makedirs(DOWNLOAD_DIR, exist_ok=True)
	filename = url.split("/")[-1].split("?")[0]
	filepath = os.path.join(DOWNLOAD_DIR, filename)

	size_mb, err = check_model_size(url)
	if err:
	return None, f"Cannot reach URL: {err}"
	if size_mb and size_mb > MAX_RAM_MB:
	return None, (
	f"Model too large for Raspberry Pi 4: {size_mb:.1f}MB > 2GB limit\n"
	"Use Q2_K quantization to reduce model size."
	)

	try:
	with requests.get(url, stream=True, timeout=30) as r:
	r.raise_for_status()
	downloaded = 0
	with open(filepath, "wb") as f:
	for chunk in r.iter_content(chunk_size=65536):
	f.write(chunk)
	downloaded += len(chunk)
	if downloaded / (1024 * 1024) > MAX_RAM_MB:
	os.remove(filepath)
	return None, "Download exceeded 2GB Raspberry Pi 4 limit"
	return filepath, None
	except Exception as e:
	return None, str(e)


	def run_inference(model_url):
	if not model_url or not model_url.strip():
	return "No URL provided", "", "", "FAIL"

	model_url = model_url.strip()
	if ".gguf" not in model_url.lower():
	return "Only GGUF format supported on Raspberry Pi 4", "", "", "FAIL"

	yield "Checking model size...", "", "", "IN PROGRESS"

	filepath, error = download_model(model_url)
	if error:
	yield f"{error}", "", "", "FAIL"
	return

	try:
	from llama_cpp import Llama

	yield "Loading model on simulated ARM CPU...", "", "", "IN PROGRESS"

	mem_before = psutil.Process().memory_info().rss / (1024 * 1024)
	t_start = time.time()

	llm = Llama(model_path=filepath, n_ctx=256, n_threads=2, verbose=False)
	output = llm(TEST_PROMPT, max_tokens=64, echo=False)

	t_end = time.time()
	mem_after = psutil.Process().memory_info().rss / (1024 * 1024)

	inference_ms = (t_end - t_start) * 1000
	memory_used_mb = mem_after - mem_before
	output_text = output["choices"][0]["text"].strip()

	badge = (
	"PASS - Fits on Raspberry Pi 4 (2GB)"
	if memory_used_mb <= MAX_RAM_MB
	else f"FAIL - Memory {memory_used_mb:.0f}MB exceeded 2GB Pi 4 limit"
	)

	yield (
	f"{inference_ms:.0f} ms",
	f"{memory_used_mb:.0f} MB",
	output_text,
	badge,
	)
	except Exception as e:
	yield "Inference error", "", str(e), "FAIL"
	finally:
	if filepath and os.path.exists(filepath):
	os.remove(filepath)


	with gr.Blocks(title="Virtual Raspberry Pi 4", theme=gr.themes.Soft()) as demo:
	gr.Markdown(
	"# Virtual Raspberry Pi 4\n"
	"Edge AI Test Environment - 2GB RAM Limit\n\n"
	"IoT / Embedded Linux deployment testing for Project Mina\n\n"
	"> Simulates ARM Cortex-A72 with 2GB RAM. Use Q2_K models."
	)
	with gr.Row():
	model_url_input = gr.Textbox(
	label="GGUF Model URL",
	placeholder="https://huggingface.co/user/repo/resolve/main/model-q2_k.gguf",
	scale=4,
	)
	run_btn = gr.Button("Run Test", variant="primary", scale=1)

	gr.Markdown(f"Test prompt: `{TEST_PROMPT}`")

	with gr.Row():
	inference_time_out = gr.Textbox(label="Inference Time", interactive=False)
	memory_used_out = gr.Textbox(label="Memory Used", interactive=False)

	output_text_out = gr.Textbox(label="Model Output", interactive=False, lines=4)
	status_out = gr.Textbox(label="Result Badge", interactive=False)

	run_btn.click(
	run_inference,
	inputs=[model_url_input],
	outputs=[inference_time_out, memory_used_out, output_text_out, status_out],
	)

	if __name__ == "__main__":
	demo.launch()