turtle170 commited on
Commit
438c44e
·
verified ·
1 Parent(s): 7a06293

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +235 -52
app.py CHANGED
@@ -26,8 +26,8 @@ except ImportError:
26
  HF_TOKEN = os.environ.get("HF_TOKEN")
27
  SPACE_ID = os.environ.get("SPACE_ID")
28
  LOG_FILE = "engine_telemetry.json"
29
- RAM_LIMIT_PCT = 0.50
30
- SYSTEM_RESERVE_MB = 250
31
  DEFAULT_MODEL = "unsloth/Llama-3.2-1B-Instruct-GGUF"
32
  DEFAULT_QUANT = "Llama-3.2-1B-Instruct-Q4_K_M.gguf"
33
 
@@ -93,15 +93,22 @@ class ResourceMonitor:
93
 
94
  @staticmethod
95
  def validate_deployment(file_path: str) -> (bool, str):
96
- vm = psutil.virtual_memory()
97
- file_size_mb = os.path.getsize(file_path) / (1024**2)
98
- total_ram_mb = vm.total / (1024**2)
99
- avail_ram_mb = vm.available / (1024**2)
100
- if file_size_mb > (total_ram_mb * RAM_LIMIT_PCT):
101
- return False, f"Model size ({file_size_mb:.1f}MB) exceeds safety limit."
102
- if (file_size_mb + SYSTEM_RESERVE_MB) > avail_ram_mb:
103
- return False, f"Insufficient headroom for context (Need ~{file_size_mb+SYSTEM_RESERVE_MB:.1f}MB)."
104
- return True, "Passed."
 
 
 
 
 
 
 
105
 
106
  # --- ENGINE CORE ---
107
  class ZeroEngine:
@@ -116,36 +123,76 @@ class ZeroEngine:
116
  def list_ggufs(self, repo_id: str) -> List[str]:
117
  try:
118
  files = self.api.list_repo_files(repo_id=repo_id)
119
- return [f for f in files if f.endswith(".gguf")]
 
 
120
  except Exception as e:
121
  logger.error(f"Scan error: {e}")
122
  return []
123
 
124
  def boot_kernel(self, repo: str, filename: str) -> str:
 
125
  try:
126
- logger.info(f"Downloading {filename} from {repo}...")
127
- path = hf_hub_download(repo_id=repo, filename=filename, token=HF_TOKEN)
 
 
128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  valid, msg = ResourceMonitor.validate_deployment(path)
130
  if not valid:
131
- return msg
 
 
 
132
 
 
133
  with self.kernel_lock:
 
134
  if self.llm:
135
- del self.llm
136
- self.llm = Llama(
137
- model_path=path,
138
- n_ctx=2048,
139
- n_threads=2,
140
- use_mmap=True,
141
- n_batch=512,
142
- verbose=False
143
- )
144
- self.active_model_info = {"repo": repo, "file": filename}
145
- self.telemetry.track_load(repo, filename)
146
-
147
- return f"🟢 KERNEL ONLINE: {filename}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  except Exception as e:
 
149
  return f"🔴 BOOT FAILURE: {str(e)}"
150
 
151
  def stitch_cache(self, ghost_text: str) -> str:
@@ -157,6 +204,7 @@ class ZeroEngine:
157
  try:
158
  tokens = self.llm.tokenize(ghost_text.encode("utf-8"))
159
  self.llm.eval(tokens)
 
160
  except Exception as e:
161
  logger.error(f"KV Cache priming failed: {e}")
162
  finally:
@@ -200,30 +248,137 @@ class ZeroEngine:
200
  elapsed = time.time() - start_time
201
  tps = round(tokens_count / elapsed, 1) if elapsed > 0 else 0
202
 
203
- # Gradio 6.5.0: Update history dict structure
204
  history[-1]["content"] = f"{response_text}\n\n`[{tps} t/s]`"
205
  yield history
206
 
207
  self.telemetry.track_generation(tokens_count)
208
  except Exception as e:
 
209
  history[-1]["content"] = f"🔴 Runtime Error: {str(e)}"
210
  yield history
211
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  # --- UI INTERFACE ---
213
  kernel = ZeroEngine()
214
 
215
- # Removed 'theme' from gr.Blocks constructor (Moved to .launch())
216
- with gr.Blocks(title="ZeroEngine Kernel 6.5") as demo:
217
- gr.HTML("<div style='text-align: center; border-bottom: 2px solid #333; margin-bottom: 20px;'><h1>🛰️ ZEROENGINE V0.1</h1><p>Gradio 6.5.0 Production Build</p></div>")
 
 
 
 
 
 
 
 
 
 
 
 
218
 
219
  with gr.Row():
220
  with gr.Column(scale=8):
221
- # FIXED: Removed 'type="messages"' (deprecated/auto-detected in 6.5.0)
222
  chat_box = gr.Chatbot(
223
  label="Main Engine Feedback",
224
  height=650,
225
  show_label=False,
226
- autoscroll=True
 
227
  )
228
 
229
  with gr.Row():
@@ -235,7 +390,7 @@ with gr.Blocks(title="ZeroEngine Kernel 6.5") as demo:
235
  )
236
  send_btn = gr.Button("EXE", variant="primary", scale=1)
237
 
238
- with gr.Sidebar(label="Engine Room", open=True, width=350):
239
  gr.Markdown("### 🛠️ Hardware Status")
240
  ram_metric = gr.Label(label="RAM Usage", value="0/0 GB")
241
  cpu_metric = gr.Label(label="CPU Load", value="0%")
@@ -243,7 +398,7 @@ with gr.Blocks(title="ZeroEngine Kernel 6.5") as demo:
243
  gr.Markdown("---")
244
  gr.Markdown("### 📡 Model Control")
245
  repo_input = gr.Textbox(label="HuggingFace Repo", value=DEFAULT_MODEL)
246
- quant_dropdown = gr.Dropdown(label="Available Quants", choices=[])
247
 
248
  with gr.Row():
249
  scan_btn = gr.Button("SCAN", size="sm")
@@ -261,30 +416,59 @@ with gr.Blocks(title="ZeroEngine Kernel 6.5") as demo:
261
  stitch_status = gr.Markdown("Cache: `EMPTY`")
262
  stitch_btn = gr.Button("STITCH", size="sm")
263
 
264
- log_output = gr.Code(label="Kernel Logs", language="shell", value="[INIT] System Ready.")
 
 
 
 
 
265
 
266
  # --- UI LOGIC ---
267
  def update_stats():
268
- m = ResourceMonitor.get_metrics()
269
- return f"{m['ram_used_gb']}/{m['ram_total_gb']} GB", f"{m['cpu_usage_pct']}%"
 
 
 
 
270
 
271
  def on_scan(repo):
272
- files = kernel.list_ggufs(repo)
273
- if not files:
274
- return gr.update(choices=[], value=None), "No GGUFs found in repo."
275
- return gr.update(choices=files, value=files[0]), f"Found {len(files)} quants."
 
 
 
 
 
 
 
 
 
 
276
 
277
  def on_boot(repo, file):
278
- if not repo or not file:
279
- return "Selection Missing", gr.update()
280
- yield "System: Booting Kernel...", gr.update()
281
- res = kernel.boot_kernel(repo, file)
282
- yield res, gr.update()
 
 
 
 
 
 
 
 
 
283
 
284
- # FIXED: Use gr.Timer for periodic updates (Gradio 6.5.0 compatible)
285
  timer = gr.Timer(value=2)
286
  timer.tick(update_stats, None, [ram_metric, cpu_metric])
287
 
 
288
  scan_btn.click(on_scan, [repo_input], [quant_dropdown, log_output])
289
  boot_btn.click(on_boot, [repo_input, quant_dropdown], [boot_status, log_output])
290
 
@@ -301,9 +485,8 @@ with gr.Blocks(title="ZeroEngine Kernel 6.5") as demo:
301
 
302
  # --- LAUNCH ---
303
  if __name__ == "__main__":
304
- # FIXED: Theme and CSS parameters moved here for Gradio 6 compatibility
305
  demo.queue(max_size=20).launch(
306
  server_name="0.0.0.0",
307
- share=False,
308
- theme=gr.themes.Monochrome(primary_hue="blue", radius_size="none")
309
  )
 
26
  HF_TOKEN = os.environ.get("HF_TOKEN")
27
  SPACE_ID = os.environ.get("SPACE_ID")
28
  LOG_FILE = "engine_telemetry.json"
29
+ RAM_LIMIT_PCT = 0.85 # Increased from 0.50 to prevent false rejections
30
+ SYSTEM_RESERVE_MB = 500 # Increased reserve
31
  DEFAULT_MODEL = "unsloth/Llama-3.2-1B-Instruct-GGUF"
32
  DEFAULT_QUANT = "Llama-3.2-1B-Instruct-Q4_K_M.gguf"
33
 
 
93
 
94
  @staticmethod
95
  def validate_deployment(file_path: str) -> (bool, str):
96
+ try:
97
+ vm = psutil.virtual_memory()
98
+ file_size_mb = os.path.getsize(file_path) / (1024**2)
99
+ total_ram_mb = vm.total / (1024**2)
100
+ avail_ram_mb = vm.available / (1024**2)
101
+
102
+ logger.info(f"Validation - Model: {file_size_mb:.1f}MB | Available RAM: {avail_ram_mb:.1f}MB | Total: {total_ram_mb:.1f}MB")
103
+
104
+ if file_size_mb > (total_ram_mb * RAM_LIMIT_PCT):
105
+ return False, f"Model size ({file_size_mb:.1f}MB) exceeds safety limit ({total_ram_mb * RAM_LIMIT_PCT:.1f}MB)."
106
+ if (file_size_mb + SYSTEM_RESERVE_MB) > avail_ram_mb:
107
+ return False, f"Insufficient RAM. Need {file_size_mb+SYSTEM_RESERVE_MB:.1f}MB, have {avail_ram_mb:.1f}MB available."
108
+ return True, "Validation Passed."
109
+ except Exception as e:
110
+ logger.error(f"Validation error: {e}")
111
+ return False, f"Validation error: {str(e)}"
112
 
113
  # --- ENGINE CORE ---
114
  class ZeroEngine:
 
123
  def list_ggufs(self, repo_id: str) -> List[str]:
124
  try:
125
  files = self.api.list_repo_files(repo_id=repo_id)
126
+ ggufs = [f for f in files if f.endswith(".gguf")]
127
+ logger.info(f"Found {len(ggufs)} GGUF files in {repo_id}")
128
+ return ggufs
129
  except Exception as e:
130
  logger.error(f"Scan error: {e}")
131
  return []
132
 
133
  def boot_kernel(self, repo: str, filename: str) -> str:
134
+ """Boot kernel with proper error handling to prevent space crashes"""
135
  try:
136
+ if not repo or not filename:
137
+ return "🔴 ERROR: Repository or filename missing"
138
+
139
+ logger.info(f"[BOOT] Starting download: {filename} from {repo}")
140
 
141
+ # Download with timeout protection
142
+ try:
143
+ path = hf_hub_download(
144
+ repo_id=repo,
145
+ filename=filename,
146
+ token=HF_TOKEN,
147
+ local_files_only=False
148
+ )
149
+ logger.info(f"[BOOT] Download complete: {path}")
150
+ except Exception as e:
151
+ logger.error(f"[BOOT] Download failed: {e}")
152
+ return f"🔴 DOWNLOAD FAILED: {str(e)}"
153
+
154
+ # Validate before loading
155
  valid, msg = ResourceMonitor.validate_deployment(path)
156
  if not valid:
157
+ logger.warning(f"[BOOT] Validation failed: {msg}")
158
+ return f"🔴 VALIDATION FAILED: {msg}"
159
+
160
+ logger.info("[BOOT] Validation passed, initializing model...")
161
 
162
+ # Load model with proper cleanup
163
  with self.kernel_lock:
164
+ # Clear previous model
165
  if self.llm:
166
+ logger.info("[BOOT] Clearing previous model...")
167
+ try:
168
+ del self.llm
169
+ self.llm = None
170
+ except Exception as e:
171
+ logger.warning(f"[BOOT] Cleanup warning: {e}")
172
+
173
+ # Initialize new model with conservative settings
174
+ try:
175
+ logger.info("[BOOT] Loading model into memory...")
176
+ self.llm = Llama(
177
+ model_path=path,
178
+ n_ctx=2048,
179
+ n_threads=2,
180
+ use_mmap=True, # Critical: memory map to reduce RAM usage
181
+ n_batch=256, # Reduced from 512 to be safer
182
+ n_gpu_layers=0, # Force CPU only
183
+ verbose=False
184
+ )
185
+ self.active_model_info = {"repo": repo, "file": filename}
186
+ self.telemetry.track_load(repo, filename)
187
+ logger.info("[BOOT] Model loaded successfully!")
188
+ return f"🟢 KERNEL ONLINE: {filename}"
189
+ except Exception as e:
190
+ logger.error(f"[BOOT] Model loading failed: {e}")
191
+ self.llm = None
192
+ return f"🔴 LOAD FAILED: {str(e)}"
193
+
194
  except Exception as e:
195
+ logger.error(f"[BOOT] Unexpected error: {e}")
196
  return f"🔴 BOOT FAILURE: {str(e)}"
197
 
198
  def stitch_cache(self, ghost_text: str) -> str:
 
204
  try:
205
  tokens = self.llm.tokenize(ghost_text.encode("utf-8"))
206
  self.llm.eval(tokens)
207
+ logger.info(f"Ghost cache primed: {len(tokens)} tokens")
208
  except Exception as e:
209
  logger.error(f"KV Cache priming failed: {e}")
210
  finally:
 
248
  elapsed = time.time() - start_time
249
  tps = round(tokens_count / elapsed, 1) if elapsed > 0 else 0
250
 
251
+ # Update history with streaming content
252
  history[-1]["content"] = f"{response_text}\n\n`[{tps} t/s]`"
253
  yield history
254
 
255
  self.telemetry.track_generation(tokens_count)
256
  except Exception as e:
257
+ logger.error(f"Inference error: {e}")
258
  history[-1]["content"] = f"🔴 Runtime Error: {str(e)}"
259
  yield history
260
 
261
+ # --- CUSTOM CSS ---
262
+ CUSTOM_CSS = """
263
+ @import url('https://fonts.cdnfonts.com/css/consolas');
264
+
265
+ * {
266
+ font-family: 'Consolas', 'Courier New', monospace !important;
267
+ }
268
+
269
+ /* Global smooth rounded corners */
270
+ .gradio-container {
271
+ border-radius: 24px !important;
272
+ }
273
+
274
+ /* All buttons */
275
+ button {
276
+ border-radius: 16px !important;
277
+ transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
278
+ font-family: 'Consolas', monospace !important;
279
+ }
280
+
281
+ button:hover {
282
+ transform: translateY(-2px);
283
+ box-shadow: 0 8px 16px rgba(0,0,0,0.2) !important;
284
+ }
285
+
286
+ /* Input fields */
287
+ input, textarea, .gr-textbox, .gr-dropdown {
288
+ border-radius: 12px !important;
289
+ font-family: 'Consolas', monospace !important;
290
+ }
291
+
292
+ /* Chat messages */
293
+ .message {
294
+ border-radius: 16px !important;
295
+ font-family: 'Consolas', monospace !important;
296
+ }
297
+
298
+ /* Code blocks */
299
+ .gr-code {
300
+ border-radius: 12px !important;
301
+ font-family: 'Consolas', monospace !important;
302
+ }
303
+
304
+ /* Labels */
305
+ .gr-label {
306
+ border-radius: 12px !important;
307
+ font-family: 'Consolas', monospace !important;
308
+ }
309
+
310
+ /* Sidebar */
311
+ .gr-sidebar {
312
+ border-radius: 20px !important;
313
+ background: linear-gradient(135deg, rgba(20,20,40,0.95), rgba(10,10,20,0.98)) !important;
314
+ backdrop-filter: blur(10px) !important;
315
+ }
316
+
317
+ /* Markdown sections */
318
+ .gr-markdown {
319
+ font-family: 'Consolas', monospace !important;
320
+ }
321
+
322
+ /* Chatbot container */
323
+ .chatbot {
324
+ border-radius: 20px !important;
325
+ font-family: 'Consolas', monospace !important;
326
+ }
327
+
328
+ /* Dropdown menus */
329
+ .gr-dropdown-menu {
330
+ border-radius: 12px !important;
331
+ font-family: 'Consolas', monospace !important;
332
+ }
333
+
334
+ /* Column containers */
335
+ .gr-column {
336
+ border-radius: 16px !important;
337
+ }
338
+
339
+ /* Row containers */
340
+ .gr-row {
341
+ border-radius: 12px !important;
342
+ }
343
+
344
+ /* Smooth animations for all interactive elements */
345
+ * {
346
+ transition: all 0.2s ease !important;
347
+ }
348
+
349
+ /* Header styling */
350
+ h1, h2, h3, h4, h5, h6 {
351
+ font-family: 'Consolas', monospace !important;
352
+ }
353
+ """
354
+
355
  # --- UI INTERFACE ---
356
  kernel = ZeroEngine()
357
 
358
+ with gr.Blocks(title="ZeroEngine Kernel 6.5", css=CUSTOM_CSS) as demo:
359
+ gr.HTML("""
360
+ <div style='text-align: center; padding: 30px; border-radius: 24px;
361
+ background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%);
362
+ margin-bottom: 30px; box-shadow: 0 10px 30px rgba(0,0,0,0.3);'>
363
+ <h1 style='margin: 0; font-size: 3em; background: linear-gradient(90deg, #00d4ff, #7b2ff7);
364
+ -webkit-background-clip: text; -webkit-text-fill-color: transparent;
365
+ font-family: Consolas, monospace;'>
366
+ 🛰️ ZEROENGINE V0.1
367
+ </h1>
368
+ <p style='margin: 10px 0 0 0; color: #888; font-family: Consolas, monospace;'>
369
+ Gradio 6.5.0 Production Build | Smooth Rounded UI
370
+ </p>
371
+ </div>
372
+ """)
373
 
374
  with gr.Row():
375
  with gr.Column(scale=8):
 
376
  chat_box = gr.Chatbot(
377
  label="Main Engine Feedback",
378
  height=650,
379
  show_label=False,
380
+ autoscroll=True,
381
+ container=True
382
  )
383
 
384
  with gr.Row():
 
390
  )
391
  send_btn = gr.Button("EXE", variant="primary", scale=1)
392
 
393
+ with gr.Column(scale=3):
394
  gr.Markdown("### 🛠️ Hardware Status")
395
  ram_metric = gr.Label(label="RAM Usage", value="0/0 GB")
396
  cpu_metric = gr.Label(label="CPU Load", value="0%")
 
398
  gr.Markdown("---")
399
  gr.Markdown("### 📡 Model Control")
400
  repo_input = gr.Textbox(label="HuggingFace Repo", value=DEFAULT_MODEL)
401
+ quant_dropdown = gr.Dropdown(label="Available Quants", choices=[], interactive=True)
402
 
403
  with gr.Row():
404
  scan_btn = gr.Button("SCAN", size="sm")
 
416
  stitch_status = gr.Markdown("Cache: `EMPTY`")
417
  stitch_btn = gr.Button("STITCH", size="sm")
418
 
419
+ log_output = gr.Code(
420
+ label="Kernel Logs",
421
+ language="shell",
422
+ value="[INIT] System Ready.",
423
+ lines=5
424
+ )
425
 
426
  # --- UI LOGIC ---
427
  def update_stats():
428
+ try:
429
+ m = ResourceMonitor.get_metrics()
430
+ return f"{m['ram_used_gb']}/{m['ram_total_gb']} GB", f"{m['cpu_usage_pct']}%"
431
+ except Exception as e:
432
+ logger.error(f"Stats update error: {e}")
433
+ return "Error", "Error"
434
 
435
  def on_scan(repo):
436
+ try:
437
+ if not repo:
438
+ return gr.update(choices=[], value=None), "⚠️ Please enter a repository ID"
439
+
440
+ logger.info(f"Scanning repository: {repo}")
441
+ files = kernel.list_ggufs(repo)
442
+
443
+ if not files:
444
+ return gr.update(choices=[], value=None), f"❌ No GGUFs found in {repo}"
445
+
446
+ return gr.update(choices=files, value=files[0]), f"✅ Found {len(files)} GGUF file(s)"
447
+ except Exception as e:
448
+ logger.error(f"Scan error: {e}")
449
+ return gr.update(choices=[], value=None), f"🔴 Scan failed: {str(e)}"
450
 
451
  def on_boot(repo, file):
452
+ try:
453
+ if not repo or not file:
454
+ yield "🔴 ERROR: Repository and filename required", gr.update()
455
+ return
456
+
457
+ yield "⚙️ System: Initiating boot sequence...", gr.update()
458
+ time.sleep(0.5) # Small delay for UI feedback
459
+
460
+ result = kernel.boot_kernel(repo, file)
461
+ yield result, gr.update()
462
+
463
+ except Exception as e:
464
+ logger.error(f"Boot UI error: {e}")
465
+ yield f"🔴 BOOT ERROR: {str(e)}", gr.update()
466
 
467
+ # Timer for periodic stats updates
468
  timer = gr.Timer(value=2)
469
  timer.tick(update_stats, None, [ram_metric, cpu_metric])
470
 
471
+ # Event handlers
472
  scan_btn.click(on_scan, [repo_input], [quant_dropdown, log_output])
473
  boot_btn.click(on_boot, [repo_input, quant_dropdown], [boot_status, log_output])
474
 
 
485
 
486
  # --- LAUNCH ---
487
  if __name__ == "__main__":
 
488
  demo.queue(max_size=20).launch(
489
  server_name="0.0.0.0",
490
+ server_port=7860,
491
+ share=False
492
  )