Spaces:

turtle170
/

ZeroEngine

Running

App Files Files Community

turtle170 commited on Jan 29

Commit

6db8c1d

verified ·

1 Parent(s): 145bfe5

Create app.py

Browse files

Files changed (1) hide show

app.py +155 -0

app.py ADDED Viewed

	@@ -0,0 +1,155 @@

+import gradio as gr
+import psutil
+import os
+import json
+import time
+from datetime import datetime
+from huggingface_hub import HfApi, hf_hub_download
+from llama_cpp import Llama
+# --- ENGINE CONFIGURATION ---
+HF_TOKEN = os.environ.get("HF_TOKEN")
+api = HfApi(token=HF_TOKEN)
+LOG_FILE = "engine_popularity.json"
+SYSTEM_BUFFER_MB = 200
+MODEL_MAX_RAM_PCT = 0.50
+class ZeroEngine:
+    def __init__(self):
+        self.llm = None
+        self.current_repo = ""
+        self.current_file = ""
+        self.popularity_data = self.load_logs()
+    def load_logs(self):
+        if os.path.exists(LOG_FILE):
+            with open(LOG_FILE, "r") as f:
+                return json.load(f)
+        return {"loads": {}, "last_sync": str(datetime.now())}
+    def sync_logs(self):
+        if not HF_TOKEN: return
+        with open(LOG_FILE, "w") as f:
+            json.dump(self.popularity_data, f)
+        try:
+            # Pushes the JSON to the current Space repository
+            repo_id = os.environ.get("SPACE_ID")
+            api.upload_file(
+                path_or_fileobj=LOG_FILE,
+                path_in_repo=LOG_FILE,
+                repo_id=repo_id,
+                repo_type="space"
+            )
+        except Exception as e:
+            print(f"Sync failed: {e}")
+    def get_metrics(self):
+        ram = psutil.virtual_memory()
+        return {
+            "available_gb": round(ram.available / (1024**3), 2),
+            "total_gb": round(ram.total / (1024**3), 2),
+            "cpu_pct": psutil.cpu_percent(interval=None)
+        }
+    def validate_and_load(self, repo, filename):
+        metrics = self.get_metrics()
+        available_ram_mb = metrics["available_gb"] * 1024
+        # 1. Fetch File Info
+        path = hf_hub_download(repo_id=repo, filename=filename, token=HF_TOKEN)
+        file_size_mb = os.path.getsize(path) / (1024**2)
+        # 2. RAM Safety Check
+        if file_size_mb > (metrics["total_gb"] * 1024 * MODEL_MAX_RAM_PCT):
+            return f"❌ DECLINED: Model ({file_size_mb:.1f}MB) exceeds 50% threshold."
+        if (file_size_mb + SYSTEM_BUFFER_MB) > available_ram_mb:
+            return f"❌ DECLINED: Insufficient RAM for safety buffer."
+        # 3. Load Model
+        if self.llm: del self.llm
+        self.llm = Llama(
+            model_path=path,
+            n_ctx=2048,
+            n_threads=1, # Fixed to 1 core for partitioning
+            n_batch=512,
+            use_mmap=True,
+            verbose=False
+        )
+        self.current_repo = repo
+        self.current_file = filename
+        # 4. Telemetry
+        self.popularity_data["loads"][filename] = self.popularity_data["loads"].get(filename, 0) + 1
+        self.sync_logs()
+        return f"✅ ZeroEngine Active: {filename}"
+engine = ZeroEngine()
+# --- UI INTERFACE ---
+with gr.Blocks(theme=gr.themes.Monochrome(), fill_height=True) as demo:
+    gr.Markdown("# 🛰️ ZeroEngine V0.1 Kernel")
+    with gr.Row():
+        # MAIN CHAT (Center)
+        with gr.Column(scale=8):
+            chatbot = gr.Chatbot(type="messages", label="Engine Output")
+            msg_input = gr.Textbox(placeholder="Input command for Active Slot...", label="Active Command")
+        # ENGINE SIDEBAR (Right)
+        with gr.Sidebar(label="Engine Room", open=False) as sidebar:
+            gr.Markdown("### 📊 Metrics")
+            ram_gauge = gr.Markdown("RAM: Calculating...")
+            cpu_gauge = gr.Markdown("CPU: Calculating...")
+            gr.Markdown("---")
+            gr.Markdown("### 📥 Model Loader")
+            repo_id = gr.Textbox(label="HF Repository", value="unsloth/Llama-3.2-1B-Instruct-GGUF")
+            file_select = gr.Dropdown(label="Quantization File", choices=[])
+            scan_btn = gr.Button("Scan Repository")
+            load_btn = gr.Button("ACTIVATE ENGINE", variant="primary")
+            status = gr.Markdown("Status: Standby")
+            gr.Markdown("---")
+            gr.Markdown("### 👻 Ghost Terminal (Queue)")
+            ghost_input = gr.Textbox(placeholder="Pre-type prompt here...", label="Queue Buffer")
+            gr.Markdown("_Queue inputs are tokenized and cached immediately upon slot availability._")
+    # --- LOGIC HANDLERS ---
+    def update_stats():
+        m = engine.get_metrics()
+        return f"**RAM:** {m['available_gb']}GB / {m['total_gb']}GB", f"**CPU (Shared):** {m['cpu_pct']}%"
+    def scan_repo(repo):
+        files = api.list_repo_files(repo_id=repo)
+        gguf_files = [f for f in files if f.endswith(".gguf")]
+        return gr.update(choices=gguf_files, value=gguf_files[0] if gguf_files else None)
+    def trigger_load(repo, file):
+        # Automatically open sidebar to show metrics during load
+        return engine.validate_and_load(repo, file), gr.update(open=True)
+    def chat_fn(message, history, ghost_msg):
+        if not engine.llm:
+            yield history + [{"role": "assistant", "content": "Error: Engine not initialized."}]
+            return
+        # Stitch Ghost Prompt if exists
+        full_prompt = f"{ghost_msg}\n{message}" if ghost_msg else message
+        response = ""
+        for chunk in engine.llm(full_prompt, max_tokens=1024, stream=True):
+            token = chunk["choices"][0].get("text", "")
+            response += token
+            yield history + [{"role": "user", "content": message}, {"role": "assistant", "content": response}]
+    # Events
+    demo.load(update_stats, None, [ram_gauge, cpu_gauge], every=2)
+    scan_btn.click(scan_repo, [repo_id], [file_select])
+    load_btn.click(trigger_load, [repo_id, file_select], [status, sidebar])
+    msg_input.submit(chat_fn, [msg_input, chatbot, ghost_input], [chatbot], concurrency_limit=2)
+    msg_input.submit(lambda: "", None, [msg_input]) # Clear active
+    msg_input.submit(lambda: "", None, [ghost_input]) # Clear ghost buffer after use
+demo.queue().launch()