munyew commited on
Commit
01308c5
·
verified ·
1 Parent(s): 79e1d99

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +152 -0
app.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ import os
4
+ import time
5
+ import psutil
6
+
7
+ MAX_RAM_MB = 4096
8
+ TEST_PROMPT = "Hi Mina, aiyo today so hot sia"
9
+
10
+
11
+ def get_available_memory_mb():
12
+ return psutil.virtual_memory().available / (1024 * 1024)
13
+
14
+
15
+ def run_transformer_inference(model_id):
16
+ if not model_id or not model_id.strip():
17
+ return "❌ No model ID provided", "", "", "⛔ FAIL"
18
+
19
+ model_id = model_id.strip()
20
+
21
+ # Reject GGUF paths
22
+ if model_id.lower().endswith(".gguf") or "/" not in model_id and model_id.lower().endswith(".gguf"):
23
+ return (
24
+ "❌ GGUF not supported here",
25
+ "",
26
+ "Use munyew/mina-test-honor-magic8 for GGUF models",
27
+ "⛔ FAIL — Use the GGUF spaces for GGUF models",
28
+ )
29
+
30
+ yield "⏳ Loading model from HuggingFace Hub...", "", "", "🔄 IN PROGRESS"
31
+
32
+ available_mb = get_available_memory_mb()
33
+ if available_mb < 512:
34
+ yield (
35
+ "❌ Insufficient memory",
36
+ f"Only {available_mb:.0f}MB available",
37
+ "",
38
+ "⛔ FAIL — Not enough RAM to load any model",
39
+ )
40
+ return
41
+
42
+ try:
43
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
44
+ import torch
45
+
46
+ yield "⏳ Initialising transformers pipeline (CPU)...", "", "", "🔄 IN PROGRESS"
47
+
48
+ mem_before = psutil.Process().memory_info().rss / (1024 * 1024)
49
+ t_start = time.time()
50
+
51
+ pipe = pipeline(
52
+ "text-generation",
53
+ model=model_id,
54
+ device="cpu",
55
+ torch_dtype=torch.float32,
56
+ trust_remote_code=True,
57
+ )
58
+
59
+ t_loaded = time.time()
60
+ mem_loaded = psutil.Process().memory_info().rss / (1024 * 1024)
61
+ load_mem_mb = mem_loaded - mem_before
62
+
63
+ if load_mem_mb > MAX_RAM_MB:
64
+ yield (
65
+ f"❌ Model too large: {load_mem_mb:.0f}MB",
66
+ "",
67
+ "",
68
+ f"⛔ FAIL — {load_mem_mb:.0f}MB exceeds 4GB cloud minimum limit",
69
+ )
70
+ return
71
+
72
+ output = pipe(
73
+ TEST_PROMPT,
74
+ max_new_tokens=128,
75
+ do_sample=False,
76
+ pad_token_id=pipe.tokenizer.eos_token_id,
77
+ )
78
+
79
+ t_end = time.time()
80
+ mem_after = psutil.Process().memory_info().rss / (1024 * 1024)
81
+
82
+ load_time_s = t_loaded - t_start
83
+ infer_time_ms = (t_end - t_loaded) * 1000
84
+ total_mem_mb = mem_after - mem_before
85
+
86
+ generated_text = output[0]["generated_text"]
87
+ if generated_text.startswith(TEST_PROMPT):
88
+ generated_text = generated_text[len(TEST_PROMPT):].strip()
89
+
90
+ if total_mem_mb <= MAX_RAM_MB:
91
+ badge = f"✅ PASS — {total_mem_mb:.0f}MB RAM used (within 4GB cloud limit)"
92
+ else:
93
+ badge = f"⛔ FAIL — {total_mem_mb:.0f}MB exceeded 4GB cloud minimum limit"
94
+
95
+ yield (
96
+ f"⏱️ Load: {load_time_s:.1f}s | Inference: {infer_time_ms:.0f}ms",
97
+ f"💾 {total_mem_mb:.0f} MB",
98
+ generated_text,
99
+ badge,
100
+ )
101
+
102
+ except Exception as e:
103
+ err_str = str(e)
104
+ if "out of memory" in err_str.lower() or "oom" in err_str.lower():
105
+ yield (
106
+ "❌ Out of Memory",
107
+ "",
108
+ "",
109
+ "⛔ FAIL — Model caused OOM on 4GB cloud minimum",
110
+ )
111
+ else:
112
+ yield "❌ Error loading model", "", err_str, "⛔ FAIL"
113
+
114
+
115
+ with gr.Blocks(title="Virtual Cloud Minimum", theme=gr.themes.Soft()) as demo:
116
+ gr.Markdown(
117
+ """
118
+ # ☁️ Virtual Cloud Minimum
119
+ **Transformer Model Test — 4GB RAM, CPU Only**
120
+
121
+ *Tests HuggingFace transformer models (not GGUF) — for SEA-LION and similar*
122
+
123
+ > Provide a HuggingFace model ID (e.g. `aisingapore/llm-sealion-1b`).
124
+ > GGUF models are not supported here — use the dedicated GGUF spaces.
125
+ """
126
+ )
127
+
128
+ with gr.Row():
129
+ model_id_input = gr.Textbox(
130
+ label="HuggingFace Model ID",
131
+ placeholder="aisingapore/llm-sealion-1b",
132
+ scale=4,
133
+ )
134
+ run_btn = gr.Button("▶ Run Test", variant="primary", scale=1)
135
+
136
+ gr.Markdown(f"**Test prompt:** `{TEST_PROMPT}`")
137
+
138
+ with gr.Row():
139
+ timing_out = gr.Textbox(label="Timing", interactive=False)
140
+ memory_used_out = gr.Textbox(label="Memory Used", interactive=False)
141
+
142
+ output_text_out = gr.Textbox(label="Model Output", interactive=False, lines=4)
143
+ status_out = gr.Textbox(label="Result Badge", interactive=False, lines=2)
144
+
145
+ run_btn.click(
146
+ run_transformer_inference,
147
+ inputs=[model_id_input],
148
+ outputs=[timing_out, memory_used_out, output_text_out, status_out],
149
+ )
150
+
151
+ if __name__ == "__main__":
152
+ demo.launch()