Spaces:
Runtime error
Runtime error
| import sys | |
| import types | |
| # Python 3.13 compat: audioop removed; stub it so pydub/gradio can load | |
| try: | |
| import audioop # noqa: F401 | |
| except ModuleNotFoundError: | |
| sys.modules["audioop"] = types.ModuleType("audioop") | |
| import gradio as gr | |
| import requests | |
| import os | |
| import time | |
| import psutil | |
| MAX_RAM_MB = 4096 | |
| TEST_PROMPT = "Hi Mina, aiyo today so hot sia" | |
| def get_available_memory_mb(): | |
| return psutil.virtual_memory().available / (1024 * 1024) | |
| def run_transformer_inference(model_id): | |
| if not model_id or not model_id.strip(): | |
| return "No model ID provided", "", "", "FAIL" | |
| model_id = model_id.strip() | |
| if model_id.lower().endswith(".gguf"): | |
| return ( | |
| "GGUF not supported here", | |
| "", | |
| "Use munyew/mina-test-honor-magic8 for GGUF models", | |
| "FAIL - Use the GGUF spaces for GGUF models", | |
| ) | |
| yield "Loading model from HuggingFace Hub...", "", "", "IN PROGRESS" | |
| available_mb = get_available_memory_mb() | |
| if available_mb < 512: | |
| yield ( | |
| "Insufficient memory", | |
| f"Only {available_mb:.0f}MB available", | |
| "", | |
| "FAIL - Not enough RAM to load any model", | |
| ) | |
| return | |
| try: | |
| from transformers import pipeline | |
| import torch | |
| yield "Initialising transformers pipeline (CPU)...", "", "", "IN PROGRESS" | |
| mem_before = psutil.Process().memory_info().rss / (1024 * 1024) | |
| t_start = time.time() | |
| pipe = pipeline( | |
| "text-generation", | |
| model=model_id, | |
| device="cpu", | |
| torch_dtype=torch.float32, | |
| trust_remote_code=True, | |
| ) | |
| t_loaded = time.time() | |
| mem_loaded = psutil.Process().memory_info().rss / (1024 * 1024) | |
| load_mem_mb = mem_loaded - mem_before | |
| if load_mem_mb > MAX_RAM_MB: | |
| yield ( | |
| f"Model too large: {load_mem_mb:.0f}MB", | |
| "", | |
| "", | |
| f"FAIL - {load_mem_mb:.0f}MB exceeds 4GB cloud minimum limit", | |
| ) | |
| return | |
| output = pipe( | |
| TEST_PROMPT, | |
| max_new_tokens=128, | |
| do_sample=False, | |
| pad_token_id=pipe.tokenizer.eos_token_id, | |
| ) | |
| t_end = time.time() | |
| mem_after = psutil.Process().memory_info().rss / (1024 * 1024) | |
| load_time_s = t_loaded - t_start | |
| infer_ms = (t_end - t_loaded) * 1000 | |
| total_mem_mb = mem_after - mem_before | |
| generated = output[0]["generated_text"] | |
| if generated.startswith(TEST_PROMPT): | |
| generated = generated[len(TEST_PROMPT):].strip() | |
| badge = ( | |
| f"PASS - {total_mem_mb:.0f}MB RAM (within 4GB cloud limit)" | |
| if total_mem_mb <= MAX_RAM_MB | |
| else f"FAIL - {total_mem_mb:.0f}MB exceeded 4GB cloud minimum limit" | |
| ) | |
| yield ( | |
| f"Load: {load_time_s:.1f}s | Inference: {infer_ms:.0f}ms", | |
| f"{total_mem_mb:.0f} MB", | |
| generated, | |
| badge, | |
| ) | |
| except Exception as e: | |
| err = str(e) | |
| if "out of memory" in err.lower() or "oom" in err.lower(): | |
| yield "Out of Memory", "", "", "FAIL - OOM on 4GB cloud minimum" | |
| else: | |
| yield "Error loading model", "", err, "FAIL" | |
| with gr.Blocks(title="Virtual Cloud Minimum", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown( | |
| "# Virtual Cloud Minimum\n" | |
| "**Transformer Model Test - 4GB RAM, CPU Only**\n\n" | |
| "*Tests HuggingFace transformer models (not GGUF) - for SEA-LION and similar*\n\n" | |
| "> Provide a HuggingFace model ID (e.g. `aisingapore/llm-sealion-1b`).\n" | |
| "> GGUF models are not supported here." | |
| ) | |
| with gr.Row(): | |
| model_id_input = gr.Textbox( | |
| label="HuggingFace Model ID", | |
| placeholder="aisingapore/llm-sealion-1b", | |
| scale=4, | |
| ) | |
| run_btn = gr.Button("Run Test", variant="primary", scale=1) | |
| gr.Markdown(f"**Test prompt:** `{TEST_PROMPT}`") | |
| with gr.Row(): | |
| timing_out = gr.Textbox(label="Timing", interactive=False) | |
| memory_used_out = gr.Textbox(label="Memory Used", interactive=False) | |
| output_text_out = gr.Textbox(label="Model Output", interactive=False, lines=4) | |
| status_out = gr.Textbox(label="Result Badge", interactive=False, lines=2) | |
| run_btn.click( | |
| run_transformer_inference, | |
| inputs=[model_id_input], | |
| outputs=[timing_out, memory_used_out, output_text_out, status_out], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |