Spaces:

munyew
/

mina-test-cloud-minimum

Sleeping

App Files Files Community

munyew commited on 30 days ago

Commit

01308c5

verified ·

1 Parent(s): 79e1d99

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +152 -0

app.py ADDED Viewed

	@@ -0,0 +1,152 @@

+import gradio as gr
+import requests
+import os
+import time
+import psutil
+MAX_RAM_MB = 4096
+TEST_PROMPT = "Hi Mina, aiyo today so hot sia"
+def get_available_memory_mb():
+    return psutil.virtual_memory().available / (1024 * 1024)
+def run_transformer_inference(model_id):
+    if not model_id or not model_id.strip():
+        return "❌ No model ID provided", "", "", "⛔ FAIL"
+    model_id = model_id.strip()
+    # Reject GGUF paths
+    if model_id.lower().endswith(".gguf") or "/" not in model_id and model_id.lower().endswith(".gguf"):
+        return (
+            "❌ GGUF not supported here",
+            "",
+            "Use munyew/mina-test-honor-magic8 for GGUF models",
+            "⛔ FAIL — Use the GGUF spaces for GGUF models",
+        )
+    yield "⏳ Loading model from HuggingFace Hub...", "", "", "🔄 IN PROGRESS"
+    available_mb = get_available_memory_mb()
+    if available_mb < 512:
+        yield (
+            "❌ Insufficient memory",
+            f"Only {available_mb:.0f}MB available",
+            "",
+            "⛔ FAIL — Not enough RAM to load any model",
+        )
+        return
+    try:
+        from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
+        import torch
+        yield "⏳ Initialising transformers pipeline (CPU)...", "", "", "🔄 IN PROGRESS"
+        mem_before = psutil.Process().memory_info().rss / (1024 * 1024)
+        t_start = time.time()
+        pipe = pipeline(
+            "text-generation",
+            model=model_id,
+            device="cpu",
+            torch_dtype=torch.float32,
+            trust_remote_code=True,
+        )
+        t_loaded = time.time()
+        mem_loaded = psutil.Process().memory_info().rss / (1024 * 1024)
+        load_mem_mb = mem_loaded - mem_before
+        if load_mem_mb > MAX_RAM_MB:
+            yield (
+                f"❌ Model too large: {load_mem_mb:.0f}MB",
+                "",
+                "",
+                f"⛔ FAIL — {load_mem_mb:.0f}MB exceeds 4GB cloud minimum limit",
+            )
+            return
+        output = pipe(
+            TEST_PROMPT,
+            max_new_tokens=128,
+            do_sample=False,
+            pad_token_id=pipe.tokenizer.eos_token_id,
+        )
+        t_end = time.time()
+        mem_after = psutil.Process().memory_info().rss / (1024 * 1024)
+        load_time_s = t_loaded - t_start
+        infer_time_ms = (t_end - t_loaded) * 1000
+        total_mem_mb = mem_after - mem_before
+        generated_text = output[0]["generated_text"]
+        if generated_text.startswith(TEST_PROMPT):
+            generated_text = generated_text[len(TEST_PROMPT):].strip()
+        if total_mem_mb <= MAX_RAM_MB:
+            badge = f"✅ PASS — {total_mem_mb:.0f}MB RAM used (within 4GB cloud limit)"
+        else:
+            badge = f"⛔ FAIL — {total_mem_mb:.0f}MB exceeded 4GB cloud minimum limit"
+        yield (
+            f"⏱️ Load: {load_time_s:.1f}s | Inference: {infer_time_ms:.0f}ms",
+            f"💾 {total_mem_mb:.0f} MB",
+            generated_text,
+            badge,
+        )
+    except Exception as e:
+        err_str = str(e)
+        if "out of memory" in err_str.lower() or "oom" in err_str.lower():
+            yield (
+                "❌ Out of Memory",
+                "",
+                "",
+                "⛔ FAIL — Model caused OOM on 4GB cloud minimum",
+            )
+        else:
+            yield "❌ Error loading model", "", err_str, "⛔ FAIL"
+with gr.Blocks(title="Virtual Cloud Minimum", theme=gr.themes.Soft()) as demo:
+    gr.Markdown(
+        """
+# ☁️ Virtual Cloud Minimum
+**Transformer Model Test — 4GB RAM, CPU Only**
+*Tests HuggingFace transformer models (not GGUF) — for SEA-LION and similar*
+> Provide a HuggingFace model ID (e.g. `aisingapore/llm-sealion-1b`).
+> GGUF models are not supported here — use the dedicated GGUF spaces.
+"""
+    )
+    with gr.Row():
+        model_id_input = gr.Textbox(
+            label="HuggingFace Model ID",
+            placeholder="aisingapore/llm-sealion-1b",
+            scale=4,
+        )
+        run_btn = gr.Button("▶ Run Test", variant="primary", scale=1)
+    gr.Markdown(f"**Test prompt:** `{TEST_PROMPT}`")
+    with gr.Row():
+        timing_out = gr.Textbox(label="Timing", interactive=False)
+        memory_used_out = gr.Textbox(label="Memory Used", interactive=False)
+    output_text_out = gr.Textbox(label="Model Output", interactive=False, lines=4)
+    status_out = gr.Textbox(label="Result Badge", interactive=False, lines=2)
+    run_btn.click(
+        run_transformer_inference,
+        inputs=[model_id_input],
+        outputs=[timing_out, memory_used_out, output_text_out, status_out],
+    )
+if __name__ == "__main__":
+    demo.launch()