munyew's picture
fix: audioop stub for Python 3.13, clean UTF-8
5f00229 verified
import sys
import types
# Python 3.13 compat: audioop removed; stub it so pydub/gradio can load
try:
import audioop # noqa: F401
except ModuleNotFoundError:
sys.modules["audioop"] = types.ModuleType("audioop")
import gradio as gr
import requests
import os
import time
import psutil
MAX_RAM_MB = 4096
TEST_PROMPT = "Hi Mina, aiyo today so hot sia"
def get_available_memory_mb():
return psutil.virtual_memory().available / (1024 * 1024)
def run_transformer_inference(model_id):
if not model_id or not model_id.strip():
return "No model ID provided", "", "", "FAIL"
model_id = model_id.strip()
if model_id.lower().endswith(".gguf"):
return (
"GGUF not supported here",
"",
"Use munyew/mina-test-honor-magic8 for GGUF models",
"FAIL - Use the GGUF spaces for GGUF models",
)
yield "Loading model from HuggingFace Hub...", "", "", "IN PROGRESS"
available_mb = get_available_memory_mb()
if available_mb < 512:
yield (
"Insufficient memory",
f"Only {available_mb:.0f}MB available",
"",
"FAIL - Not enough RAM to load any model",
)
return
try:
from transformers import pipeline
import torch
yield "Initialising transformers pipeline (CPU)...", "", "", "IN PROGRESS"
mem_before = psutil.Process().memory_info().rss / (1024 * 1024)
t_start = time.time()
pipe = pipeline(
"text-generation",
model=model_id,
device="cpu",
torch_dtype=torch.float32,
trust_remote_code=True,
)
t_loaded = time.time()
mem_loaded = psutil.Process().memory_info().rss / (1024 * 1024)
load_mem_mb = mem_loaded - mem_before
if load_mem_mb > MAX_RAM_MB:
yield (
f"Model too large: {load_mem_mb:.0f}MB",
"",
"",
f"FAIL - {load_mem_mb:.0f}MB exceeds 4GB cloud minimum limit",
)
return
output = pipe(
TEST_PROMPT,
max_new_tokens=128,
do_sample=False,
pad_token_id=pipe.tokenizer.eos_token_id,
)
t_end = time.time()
mem_after = psutil.Process().memory_info().rss / (1024 * 1024)
load_time_s = t_loaded - t_start
infer_ms = (t_end - t_loaded) * 1000
total_mem_mb = mem_after - mem_before
generated = output[0]["generated_text"]
if generated.startswith(TEST_PROMPT):
generated = generated[len(TEST_PROMPT):].strip()
badge = (
f"PASS - {total_mem_mb:.0f}MB RAM (within 4GB cloud limit)"
if total_mem_mb <= MAX_RAM_MB
else f"FAIL - {total_mem_mb:.0f}MB exceeded 4GB cloud minimum limit"
)
yield (
f"Load: {load_time_s:.1f}s | Inference: {infer_ms:.0f}ms",
f"{total_mem_mb:.0f} MB",
generated,
badge,
)
except Exception as e:
err = str(e)
if "out of memory" in err.lower() or "oom" in err.lower():
yield "Out of Memory", "", "", "FAIL - OOM on 4GB cloud minimum"
else:
yield "Error loading model", "", err, "FAIL"
with gr.Blocks(title="Virtual Cloud Minimum", theme=gr.themes.Soft()) as demo:
gr.Markdown(
"# Virtual Cloud Minimum\n"
"**Transformer Model Test - 4GB RAM, CPU Only**\n\n"
"*Tests HuggingFace transformer models (not GGUF) - for SEA-LION and similar*\n\n"
"> Provide a HuggingFace model ID (e.g. `aisingapore/llm-sealion-1b`).\n"
"> GGUF models are not supported here."
)
with gr.Row():
model_id_input = gr.Textbox(
label="HuggingFace Model ID",
placeholder="aisingapore/llm-sealion-1b",
scale=4,
)
run_btn = gr.Button("Run Test", variant="primary", scale=1)
gr.Markdown(f"**Test prompt:** `{TEST_PROMPT}`")
with gr.Row():
timing_out = gr.Textbox(label="Timing", interactive=False)
memory_used_out = gr.Textbox(label="Memory Used", interactive=False)
output_text_out = gr.Textbox(label="Model Output", interactive=False, lines=4)
status_out = gr.Textbox(label="Result Badge", interactive=False, lines=2)
run_btn.click(
run_transformer_inference,
inputs=[model_id_input],
outputs=[timing_out, memory_used_out, output_text_out, status_out],
)
if __name__ == "__main__":
demo.launch()