Spaces:
Runtime error
Runtime error
File size: 4,636 Bytes
b44f4db fa7597f 5f00229 01308c5 5f00229 01308c5 5f00229 01308c5 5f00229 01308c5 5f00229 01308c5 5f00229 01308c5 5f00229 01308c5 5f00229 01308c5 5f00229 01308c5 5f00229 01308c5 5f00229 01308c5 5f00229 01308c5 5f00229 01308c5 5f00229 01308c5 5f00229 01308c5 5f00229 01308c5 5f00229 01308c5 5f00229 01308c5 5f00229 01308c5 5f00229 01308c5 5f00229 01308c5 5f00229 01308c5 5f00229 01308c5 5f00229 01308c5 5f00229 01308c5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 | import sys
import types
# Python 3.13 compat: audioop removed; stub it so pydub/gradio can load
try:
import audioop # noqa: F401
except ModuleNotFoundError:
sys.modules["audioop"] = types.ModuleType("audioop")
import gradio as gr
import requests
import os
import time
import psutil
MAX_RAM_MB = 4096
TEST_PROMPT = "Hi Mina, aiyo today so hot sia"
def get_available_memory_mb():
return psutil.virtual_memory().available / (1024 * 1024)
def run_transformer_inference(model_id):
if not model_id or not model_id.strip():
return "No model ID provided", "", "", "FAIL"
model_id = model_id.strip()
if model_id.lower().endswith(".gguf"):
return (
"GGUF not supported here",
"",
"Use munyew/mina-test-honor-magic8 for GGUF models",
"FAIL - Use the GGUF spaces for GGUF models",
)
yield "Loading model from HuggingFace Hub...", "", "", "IN PROGRESS"
available_mb = get_available_memory_mb()
if available_mb < 512:
yield (
"Insufficient memory",
f"Only {available_mb:.0f}MB available",
"",
"FAIL - Not enough RAM to load any model",
)
return
try:
from transformers import pipeline
import torch
yield "Initialising transformers pipeline (CPU)...", "", "", "IN PROGRESS"
mem_before = psutil.Process().memory_info().rss / (1024 * 1024)
t_start = time.time()
pipe = pipeline(
"text-generation",
model=model_id,
device="cpu",
torch_dtype=torch.float32,
trust_remote_code=True,
)
t_loaded = time.time()
mem_loaded = psutil.Process().memory_info().rss / (1024 * 1024)
load_mem_mb = mem_loaded - mem_before
if load_mem_mb > MAX_RAM_MB:
yield (
f"Model too large: {load_mem_mb:.0f}MB",
"",
"",
f"FAIL - {load_mem_mb:.0f}MB exceeds 4GB cloud minimum limit",
)
return
output = pipe(
TEST_PROMPT,
max_new_tokens=128,
do_sample=False,
pad_token_id=pipe.tokenizer.eos_token_id,
)
t_end = time.time()
mem_after = psutil.Process().memory_info().rss / (1024 * 1024)
load_time_s = t_loaded - t_start
infer_ms = (t_end - t_loaded) * 1000
total_mem_mb = mem_after - mem_before
generated = output[0]["generated_text"]
if generated.startswith(TEST_PROMPT):
generated = generated[len(TEST_PROMPT):].strip()
badge = (
f"PASS - {total_mem_mb:.0f}MB RAM (within 4GB cloud limit)"
if total_mem_mb <= MAX_RAM_MB
else f"FAIL - {total_mem_mb:.0f}MB exceeded 4GB cloud minimum limit"
)
yield (
f"Load: {load_time_s:.1f}s | Inference: {infer_ms:.0f}ms",
f"{total_mem_mb:.0f} MB",
generated,
badge,
)
except Exception as e:
err = str(e)
if "out of memory" in err.lower() or "oom" in err.lower():
yield "Out of Memory", "", "", "FAIL - OOM on 4GB cloud minimum"
else:
yield "Error loading model", "", err, "FAIL"
with gr.Blocks(title="Virtual Cloud Minimum", theme=gr.themes.Soft()) as demo:
gr.Markdown(
"# Virtual Cloud Minimum\n"
"**Transformer Model Test - 4GB RAM, CPU Only**\n\n"
"*Tests HuggingFace transformer models (not GGUF) - for SEA-LION and similar*\n\n"
"> Provide a HuggingFace model ID (e.g. `aisingapore/llm-sealion-1b`).\n"
"> GGUF models are not supported here."
)
with gr.Row():
model_id_input = gr.Textbox(
label="HuggingFace Model ID",
placeholder="aisingapore/llm-sealion-1b",
scale=4,
)
run_btn = gr.Button("Run Test", variant="primary", scale=1)
gr.Markdown(f"**Test prompt:** `{TEST_PROMPT}`")
with gr.Row():
timing_out = gr.Textbox(label="Timing", interactive=False)
memory_used_out = gr.Textbox(label="Memory Used", interactive=False)
output_text_out = gr.Textbox(label="Model Output", interactive=False, lines=4)
status_out = gr.Textbox(label="Result Badge", interactive=False, lines=2)
run_btn.click(
run_transformer_inference,
inputs=[model_id_input],
outputs=[timing_out, memory_used_out, output_text_out, status_out],
)
if __name__ == "__main__":
demo.launch()
|