Spaces:

turtle170
/

ZeroEngine

Running

App Files Files Community

turtle170 commited on 5 days ago

Commit

438c44e

verified ·

1 Parent(s): 7a06293

Update app.py

Browse files

Files changed (1) hide show

app.py +235 -52

app.py CHANGED Viewed

@@ -26,8 +26,8 @@ except ImportError:
 HF_TOKEN = os.environ.get("HF_TOKEN")
 SPACE_ID = os.environ.get("SPACE_ID")
 LOG_FILE = "engine_telemetry.json"
-RAM_LIMIT_PCT = 0.50
-SYSTEM_RESERVE_MB = 250
 DEFAULT_MODEL = "unsloth/Llama-3.2-1B-Instruct-GGUF"
 DEFAULT_QUANT = "Llama-3.2-1B-Instruct-Q4_K_M.gguf"
@@ -93,15 +93,22 @@ class ResourceMonitor:
     @staticmethod
     def validate_deployment(file_path: str) -> (bool, str):
-        vm = psutil.virtual_memory()
-        file_size_mb = os.path.getsize(file_path) / (1024**2)
-        total_ram_mb = vm.total / (1024**2)
-        avail_ram_mb = vm.available / (1024**2)
-        if file_size_mb > (total_ram_mb * RAM_LIMIT_PCT):
-            return False, f"Model size ({file_size_mb:.1f}MB) exceeds safety limit."
-        if (file_size_mb + SYSTEM_RESERVE_MB) > avail_ram_mb:
-            return False, f"Insufficient headroom for context (Need ~{file_size_mb+SYSTEM_RESERVE_MB:.1f}MB)."
-        return True, "Passed."
 # --- ENGINE CORE ---
 class ZeroEngine:
@@ -116,36 +123,76 @@ class ZeroEngine:
     def list_ggufs(self, repo_id: str) -> List[str]:
         try:
             files = self.api.list_repo_files(repo_id=repo_id)
-            return [f for f in files if f.endswith(".gguf")]
         except Exception as e:
             logger.error(f"Scan error: {e}")
             return []
     def boot_kernel(self, repo: str, filename: str) -> str:
         try:
-            logger.info(f"Downloading {filename} from {repo}...")
-            path = hf_hub_download(repo_id=repo, filename=filename, token=HF_TOKEN)
             valid, msg = ResourceMonitor.validate_deployment(path)
             if not valid:
-                return msg
             with self.kernel_lock:
                 if self.llm:
-                    del self.llm
-                self.llm = Llama(
-                    model_path=path,
-                    n_ctx=2048,
-                    n_threads=2,
-                    use_mmap=True,
-                    n_batch=512,
-                    verbose=False
-                )
-                self.active_model_info = {"repo": repo, "file": filename}
-                self.telemetry.track_load(repo, filename)
-            return f"🟢 KERNEL ONLINE: {filename}"
         except Exception as e:
             return f"🔴 BOOT FAILURE: {str(e)}"
     def stitch_cache(self, ghost_text: str) -> str:
@@ -157,6 +204,7 @@ class ZeroEngine:
             try:
                 tokens = self.llm.tokenize(ghost_text.encode("utf-8"))
                 self.llm.eval(tokens)
             except Exception as e:
                 logger.error(f"KV Cache priming failed: {e}")
             finally:
@@ -200,30 +248,137 @@ class ZeroEngine:
                 elapsed = time.time() - start_time
                 tps = round(tokens_count / elapsed, 1) if elapsed > 0 else 0
-                # Gradio 6.5.0: Update history dict structure
                 history[-1]["content"] = f"{response_text}\n\n`[{tps} t/s]`"
                 yield history
             self.telemetry.track_generation(tokens_count)
         except Exception as e:
             history[-1]["content"] = f"🔴 Runtime Error: {str(e)}"
             yield history
 # --- UI INTERFACE ---
 kernel = ZeroEngine()
-# Removed 'theme' from gr.Blocks constructor (Moved to .launch())
-with gr.Blocks(title="ZeroEngine Kernel 6.5") as demo:
-    gr.HTML("<div style='text-align: center; border-bottom: 2px solid #333; margin-bottom: 20px;'><h1>🛰️ ZEROENGINE V0.1</h1><p>Gradio 6.5.0 Production Build</p></div>")
     with gr.Row():
         with gr.Column(scale=8):
-            # FIXED: Removed 'type="messages"' (deprecated/auto-detected in 6.5.0)
             chat_box = gr.Chatbot(
                 label="Main Engine Feedback",
                 height=650,
                 show_label=False,
-                autoscroll=True
             )
             with gr.Row():
@@ -235,7 +390,7 @@ with gr.Blocks(title="ZeroEngine Kernel 6.5") as demo:
                 )
                 send_btn = gr.Button("EXE", variant="primary", scale=1)
-        with gr.Sidebar(label="Engine Room", open=True, width=350):
             gr.Markdown("### 🛠️ Hardware Status")
             ram_metric = gr.Label(label="RAM Usage", value="0/0 GB")
             cpu_metric = gr.Label(label="CPU Load", value="0%")
@@ -243,7 +398,7 @@ with gr.Blocks(title="ZeroEngine Kernel 6.5") as demo:
             gr.Markdown("---")
             gr.Markdown("### 📡 Model Control")
             repo_input = gr.Textbox(label="HuggingFace Repo", value=DEFAULT_MODEL)
-            quant_dropdown = gr.Dropdown(label="Available Quants", choices=[])
             with gr.Row():
                 scan_btn = gr.Button("SCAN", size="sm")
@@ -261,30 +416,59 @@ with gr.Blocks(title="ZeroEngine Kernel 6.5") as demo:
             stitch_status = gr.Markdown("Cache: `EMPTY`")
             stitch_btn = gr.Button("STITCH", size="sm")
-            log_output = gr.Code(label="Kernel Logs", language="shell", value="[INIT] System Ready.")
     # --- UI LOGIC ---
     def update_stats():
-        m = ResourceMonitor.get_metrics()
-        return f"{m['ram_used_gb']}/{m['ram_total_gb']} GB", f"{m['cpu_usage_pct']}%"
     def on_scan(repo):
-        files = kernel.list_ggufs(repo)
-        if not files:
-            return gr.update(choices=[], value=None), "No GGUFs found in repo."
-        return gr.update(choices=files, value=files[0]), f"Found {len(files)} quants."
     def on_boot(repo, file):
-        if not repo or not file:
-            return "Selection Missing", gr.update()
-        yield "System: Booting Kernel...", gr.update()
-        res = kernel.boot_kernel(repo, file)
-        yield res, gr.update()
-    # FIXED: Use gr.Timer for periodic updates (Gradio 6.5.0 compatible)
     timer = gr.Timer(value=2)
     timer.tick(update_stats, None, [ram_metric, cpu_metric])
     scan_btn.click(on_scan, [repo_input], [quant_dropdown, log_output])
     boot_btn.click(on_boot, [repo_input, quant_dropdown], [boot_status, log_output])
@@ -301,9 +485,8 @@ with gr.Blocks(title="ZeroEngine Kernel 6.5") as demo:
 # --- LAUNCH ---
 if __name__ == "__main__":
-    # FIXED: Theme and CSS parameters moved here for Gradio 6 compatibility
     demo.queue(max_size=20).launch(
         server_name="0.0.0.0",
-        share=False,
-        theme=gr.themes.Monochrome(primary_hue="blue", radius_size="none")
     )

 HF_TOKEN = os.environ.get("HF_TOKEN")
 SPACE_ID = os.environ.get("SPACE_ID")
 LOG_FILE = "engine_telemetry.json"
+RAM_LIMIT_PCT = 0.85  # Increased from 0.50 to prevent false rejections
+SYSTEM_RESERVE_MB = 500  # Increased reserve
 DEFAULT_MODEL = "unsloth/Llama-3.2-1B-Instruct-GGUF"
 DEFAULT_QUANT = "Llama-3.2-1B-Instruct-Q4_K_M.gguf"
     @staticmethod
     def validate_deployment(file_path: str) -> (bool, str):
+        try:
+            vm = psutil.virtual_memory()
+            file_size_mb = os.path.getsize(file_path) / (1024**2)
+            total_ram_mb = vm.total / (1024**2)
+            avail_ram_mb = vm.available / (1024**2)
+            logger.info(f"Validation - Model: {file_size_mb:.1f}MB | Available RAM: {avail_ram_mb:.1f}MB | Total: {total_ram_mb:.1f}MB")
+            if file_size_mb > (total_ram_mb * RAM_LIMIT_PCT):
+                return False, f"Model size ({file_size_mb:.1f}MB) exceeds safety limit ({total_ram_mb * RAM_LIMIT_PCT:.1f}MB)."
+            if (file_size_mb + SYSTEM_RESERVE_MB) > avail_ram_mb:
+                return False, f"Insufficient RAM. Need {file_size_mb+SYSTEM_RESERVE_MB:.1f}MB, have {avail_ram_mb:.1f}MB available."
+            return True, "Validation Passed."
+        except Exception as e:
+            logger.error(f"Validation error: {e}")
+            return False, f"Validation error: {str(e)}"
 # --- ENGINE CORE ---
 class ZeroEngine:
     def list_ggufs(self, repo_id: str) -> List[str]:
         try:
             files = self.api.list_repo_files(repo_id=repo_id)
+            ggufs = [f for f in files if f.endswith(".gguf")]
+            logger.info(f"Found {len(ggufs)} GGUF files in {repo_id}")
+            return ggufs
         except Exception as e:
             logger.error(f"Scan error: {e}")
             return []
     def boot_kernel(self, repo: str, filename: str) -> str:
+        """Boot kernel with proper error handling to prevent space crashes"""
         try:
+            if not repo or not filename:
+                return "🔴 ERROR: Repository or filename missing"
+            logger.info(f"[BOOT] Starting download: {filename} from {repo}")
+            # Download with timeout protection
+            try:
+                path = hf_hub_download(
+                    repo_id=repo,
+                    filename=filename,
+                    token=HF_TOKEN,
+                    local_files_only=False
+                )
+                logger.info(f"[BOOT] Download complete: {path}")
+            except Exception as e:
+                logger.error(f"[BOOT] Download failed: {e}")
+                return f"🔴 DOWNLOAD FAILED: {str(e)}"
+            # Validate before loading
             valid, msg = ResourceMonitor.validate_deployment(path)
             if not valid:
+                logger.warning(f"[BOOT] Validation failed: {msg}")
+                return f"🔴 VALIDATION FAILED: {msg}"
+            logger.info("[BOOT] Validation passed, initializing model...")
+            # Load model with proper cleanup
             with self.kernel_lock:
+                # Clear previous model
                 if self.llm:
+                    logger.info("[BOOT] Clearing previous model...")
+                    try:
+                        del self.llm
+                        self.llm = None
+                    except Exception as e:
+                        logger.warning(f"[BOOT] Cleanup warning: {e}")
+                # Initialize new model with conservative settings
+                try:
+                    logger.info("[BOOT] Loading model into memory...")
+                    self.llm = Llama(
+                        model_path=path,
+                        n_ctx=2048,
+                        n_threads=2,
+                        use_mmap=True,  # Critical: memory map to reduce RAM usage
+                        n_batch=256,    # Reduced from 512 to be safer
+                        n_gpu_layers=0, # Force CPU only
+                        verbose=False
+                    )
+                    self.active_model_info = {"repo": repo, "file": filename}
+                    self.telemetry.track_load(repo, filename)
+                    logger.info("[BOOT] Model loaded successfully!")
+                    return f"🟢 KERNEL ONLINE: {filename}"
+                except Exception as e:
+                    logger.error(f"[BOOT] Model loading failed: {e}")
+                    self.llm = None
+                    return f"🔴 LOAD FAILED: {str(e)}"
         except Exception as e:
+            logger.error(f"[BOOT] Unexpected error: {e}")
             return f"🔴 BOOT FAILURE: {str(e)}"
     def stitch_cache(self, ghost_text: str) -> str:
             try:
                 tokens = self.llm.tokenize(ghost_text.encode("utf-8"))
                 self.llm.eval(tokens)
+                logger.info(f"Ghost cache primed: {len(tokens)} tokens")
             except Exception as e:
                 logger.error(f"KV Cache priming failed: {e}")
             finally:
                 elapsed = time.time() - start_time
                 tps = round(tokens_count / elapsed, 1) if elapsed > 0 else 0
+                # Update history with streaming content
                 history[-1]["content"] = f"{response_text}\n\n`[{tps} t/s]`"
                 yield history
             self.telemetry.track_generation(tokens_count)
         except Exception as e:
+            logger.error(f"Inference error: {e}")
             history[-1]["content"] = f"🔴 Runtime Error: {str(e)}"
             yield history
+# --- CUSTOM CSS ---
+CUSTOM_CSS = """
+@import url('https://fonts.cdnfonts.com/css/consolas');
+* {
+    font-family: 'Consolas', 'Courier New', monospace !important;
+}
+/* Global smooth rounded corners */
+.gradio-container {
+    border-radius: 24px !important;
+}
+/* All buttons */
+button {
+    border-radius: 16px !important;
+    transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
+    font-family: 'Consolas', monospace !important;
+}
+button:hover {
+    transform: translateY(-2px);
+    box-shadow: 0 8px 16px rgba(0,0,0,0.2) !important;
+}
+/* Input fields */
+input, textarea, .gr-textbox, .gr-dropdown {
+    border-radius: 12px !important;
+    font-family: 'Consolas', monospace !important;
+}
+/* Chat messages */
+.message {
+    border-radius: 16px !important;
+    font-family: 'Consolas', monospace !important;
+}
+/* Code blocks */
+.gr-code {
+    border-radius: 12px !important;
+    font-family: 'Consolas', monospace !important;
+}
+/* Labels */
+.gr-label {
+    border-radius: 12px !important;
+    font-family: 'Consolas', monospace !important;
+}
+/* Sidebar */
+.gr-sidebar {
+    border-radius: 20px !important;
+    background: linear-gradient(135deg, rgba(20,20,40,0.95), rgba(10,10,20,0.98)) !important;
+    backdrop-filter: blur(10px) !important;
+}
+/* Markdown sections */
+.gr-markdown {
+    font-family: 'Consolas', monospace !important;
+}
+/* Chatbot container */
+.chatbot {
+    border-radius: 20px !important;
+    font-family: 'Consolas', monospace !important;
+}
+/* Dropdown menus */
+.gr-dropdown-menu {
+    border-radius: 12px !important;
+    font-family: 'Consolas', monospace !important;
+}
+/* Column containers */
+.gr-column {
+    border-radius: 16px !important;
+}
+/* Row containers */
+.gr-row {
+    border-radius: 12px !important;
+}
+/* Smooth animations for all interactive elements */
+* {
+    transition: all 0.2s ease !important;
+}
+/* Header styling */
+h1, h2, h3, h4, h5, h6 {
+    font-family: 'Consolas', monospace !important;
+}
+"""
 # --- UI INTERFACE ---
 kernel = ZeroEngine()
+with gr.Blocks(title="ZeroEngine Kernel 6.5", css=CUSTOM_CSS) as demo:
+    gr.HTML("""
+        <div style='text-align: center; padding: 30px; border-radius: 24px;
+                    background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%);
+                    margin-bottom: 30px; box-shadow: 0 10px 30px rgba(0,0,0,0.3);'>
+            <h1 style='margin: 0; font-size: 3em; background: linear-gradient(90deg, #00d4ff, #7b2ff7);
+                       -webkit-background-clip: text; -webkit-text-fill-color: transparent;
+                       font-family: Consolas, monospace;'>
+                🛰️ ZEROENGINE V0.1
+            </h1>
+            <p style='margin: 10px 0 0 0; color: #888; font-family: Consolas, monospace;'>
+                Gradio 6.5.0 Production Build | Smooth Rounded UI
+            </p>
+        </div>
+    """)
     with gr.Row():
         with gr.Column(scale=8):
             chat_box = gr.Chatbot(
                 label="Main Engine Feedback",
                 height=650,
                 show_label=False,
+                autoscroll=True,
+                container=True
             )
             with gr.Row():
                 )
                 send_btn = gr.Button("EXE", variant="primary", scale=1)
+        with gr.Column(scale=3):
             gr.Markdown("### 🛠️ Hardware Status")
             ram_metric = gr.Label(label="RAM Usage", value="0/0 GB")
             cpu_metric = gr.Label(label="CPU Load", value="0%")
             gr.Markdown("---")
             gr.Markdown("### 📡 Model Control")
             repo_input = gr.Textbox(label="HuggingFace Repo", value=DEFAULT_MODEL)
+            quant_dropdown = gr.Dropdown(label="Available Quants", choices=[], interactive=True)
             with gr.Row():
                 scan_btn = gr.Button("SCAN", size="sm")
             stitch_status = gr.Markdown("Cache: `EMPTY`")
             stitch_btn = gr.Button("STITCH", size="sm")
+            log_output = gr.Code(
+                label="Kernel Logs",
+                language="shell",
+                value="[INIT] System Ready.",
+                lines=5
+            )
     # --- UI LOGIC ---
     def update_stats():
+        try:
+            m = ResourceMonitor.get_metrics()
+            return f"{m['ram_used_gb']}/{m['ram_total_gb']} GB", f"{m['cpu_usage_pct']}%"
+        except Exception as e:
+            logger.error(f"Stats update error: {e}")
+            return "Error", "Error"
     def on_scan(repo):
+        try:
+            if not repo:
+                return gr.update(choices=[], value=None), "⚠️ Please enter a repository ID"
+            logger.info(f"Scanning repository: {repo}")
+            files = kernel.list_ggufs(repo)
+            if not files:
+                return gr.update(choices=[], value=None), f"❌ No GGUFs found in {repo}"
+            return gr.update(choices=files, value=files[0]), f"✅ Found {len(files)} GGUF file(s)"
+        except Exception as e:
+            logger.error(f"Scan error: {e}")
+            return gr.update(choices=[], value=None), f"🔴 Scan failed: {str(e)}"
     def on_boot(repo, file):
+        try:
+            if not repo or not file:
+                yield "🔴 ERROR: Repository and filename required", gr.update()
+                return
+            yield "⚙️ System: Initiating boot sequence...", gr.update()
+            time.sleep(0.5)  # Small delay for UI feedback
+            result = kernel.boot_kernel(repo, file)
+            yield result, gr.update()
+        except Exception as e:
+            logger.error(f"Boot UI error: {e}")
+            yield f"🔴 BOOT ERROR: {str(e)}", gr.update()
+    # Timer for periodic stats updates
     timer = gr.Timer(value=2)
     timer.tick(update_stats, None, [ram_metric, cpu_metric])
+    # Event handlers
     scan_btn.click(on_scan, [repo_input], [quant_dropdown, log_output])
     boot_btn.click(on_boot, [repo_input, quant_dropdown], [boot_status, log_output])
 # --- LAUNCH ---
 if __name__ == "__main__":
     demo.queue(max_size=20).launch(
         server_name="0.0.0.0",
+        server_port=7860,
+        share=False
     )