Spaces:

turtle170
/

ZeroEngine

Running

App Files Files Community

turtle170 commited on Jan 29

Commit

ec969c4

verified ·

1 Parent(s): 6db8c1d

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -155

app.py DELETED Viewed

@@ -1,155 +0,0 @@
-import gradio as gr
-import psutil
-import os
-import json
-import time
-from datetime import datetime
-from huggingface_hub import HfApi, hf_hub_download
-from llama_cpp import Llama
-# --- ENGINE CONFIGURATION ---
-HF_TOKEN = os.environ.get("HF_TOKEN")
-api = HfApi(token=HF_TOKEN)
-LOG_FILE = "engine_popularity.json"
-SYSTEM_BUFFER_MB = 200
-MODEL_MAX_RAM_PCT = 0.50
-class ZeroEngine:
-    def __init__(self):
-        self.llm = None
-        self.current_repo = ""
-        self.current_file = ""
-        self.popularity_data = self.load_logs()
-    def load_logs(self):
-        if os.path.exists(LOG_FILE):
-            with open(LOG_FILE, "r") as f:
-                return json.load(f)
-        return {"loads": {}, "last_sync": str(datetime.now())}
-    def sync_logs(self):
-        if not HF_TOKEN: return
-        with open(LOG_FILE, "w") as f:
-            json.dump(self.popularity_data, f)
-        try:
-            # Pushes the JSON to the current Space repository
-            repo_id = os.environ.get("SPACE_ID")
-            api.upload_file(
-                path_or_fileobj=LOG_FILE,
-                path_in_repo=LOG_FILE,
-                repo_id=repo_id,
-                repo_type="space"
-            )
-        except Exception as e:
-            print(f"Sync failed: {e}")
-    def get_metrics(self):
-        ram = psutil.virtual_memory()
-        return {
-            "available_gb": round(ram.available / (1024**3), 2),
-            "total_gb": round(ram.total / (1024**3), 2),
-            "cpu_pct": psutil.cpu_percent(interval=None)
-        }
-    def validate_and_load(self, repo, filename):
-        metrics = self.get_metrics()
-        available_ram_mb = metrics["available_gb"] * 1024
-        # 1. Fetch File Info
-        path = hf_hub_download(repo_id=repo, filename=filename, token=HF_TOKEN)
-        file_size_mb = os.path.getsize(path) / (1024**2)
-        # 2. RAM Safety Check
-        if file_size_mb > (metrics["total_gb"] * 1024 * MODEL_MAX_RAM_PCT):
-            return f"❌ DECLINED: Model ({file_size_mb:.1f}MB) exceeds 50% threshold."
-        if (file_size_mb + SYSTEM_BUFFER_MB) > available_ram_mb:
-            return f"❌ DECLINED: Insufficient RAM for safety buffer."
-        # 3. Load Model
-        if self.llm: del self.llm
-        self.llm = Llama(
-            model_path=path,
-            n_ctx=2048,
-            n_threads=1, # Fixed to 1 core for partitioning
-            n_batch=512,
-            use_mmap=True,
-            verbose=False
-        )
-        self.current_repo = repo
-        self.current_file = filename
-        # 4. Telemetry
-        self.popularity_data["loads"][filename] = self.popularity_data["loads"].get(filename, 0) + 1
-        self.sync_logs()
-        return f"✅ ZeroEngine Active: {filename}"
-engine = ZeroEngine()
-# --- UI INTERFACE ---
-with gr.Blocks(theme=gr.themes.Monochrome(), fill_height=True) as demo:
-    gr.Markdown("# 🛰️ ZeroEngine V0.1 Kernel")
-    with gr.Row():
-        # MAIN CHAT (Center)
-        with gr.Column(scale=8):
-            chatbot = gr.Chatbot(type="messages", label="Engine Output")
-            msg_input = gr.Textbox(placeholder="Input command for Active Slot...", label="Active Command")
-        # ENGINE SIDEBAR (Right)
-        with gr.Sidebar(label="Engine Room", open=False) as sidebar:
-            gr.Markdown("### 📊 Metrics")
-            ram_gauge = gr.Markdown("RAM: Calculating...")
-            cpu_gauge = gr.Markdown("CPU: Calculating...")
-            gr.Markdown("---")
-            gr.Markdown("### 📥 Model Loader")
-            repo_id = gr.Textbox(label="HF Repository", value="unsloth/Llama-3.2-1B-Instruct-GGUF")
-            file_select = gr.Dropdown(label="Quantization File", choices=[])
-            scan_btn = gr.Button("Scan Repository")
-            load_btn = gr.Button("ACTIVATE ENGINE", variant="primary")
-            status = gr.Markdown("Status: Standby")
-            gr.Markdown("---")
-            gr.Markdown("### 👻 Ghost Terminal (Queue)")
-            ghost_input = gr.Textbox(placeholder="Pre-type prompt here...", label="Queue Buffer")
-            gr.Markdown("_Queue inputs are tokenized and cached immediately upon slot availability._")
-    # --- LOGIC HANDLERS ---
-    def update_stats():
-        m = engine.get_metrics()
-        return f"**RAM:** {m['available_gb']}GB / {m['total_gb']}GB", f"**CPU (Shared):** {m['cpu_pct']}%"
-    def scan_repo(repo):
-        files = api.list_repo_files(repo_id=repo)
-        gguf_files = [f for f in files if f.endswith(".gguf")]
-        return gr.update(choices=gguf_files, value=gguf_files[0] if gguf_files else None)
-    def trigger_load(repo, file):
-        # Automatically open sidebar to show metrics during load
-        return engine.validate_and_load(repo, file), gr.update(open=True)
-    def chat_fn(message, history, ghost_msg):
-        if not engine.llm:
-            yield history + [{"role": "assistant", "content": "Error: Engine not initialized."}]
-            return
-        # Stitch Ghost Prompt if exists
-        full_prompt = f"{ghost_msg}\n{message}" if ghost_msg else message
-        response = ""
-        for chunk in engine.llm(full_prompt, max_tokens=1024, stream=True):
-            token = chunk["choices"][0].get("text", "")
-            response += token
-            yield history + [{"role": "user", "content": message}, {"role": "assistant", "content": response}]
-    # Events
-    demo.load(update_stats, None, [ram_gauge, cpu_gauge], every=2)
-    scan_btn.click(scan_repo, [repo_id], [file_select])
-    load_btn.click(trigger_load, [repo_id, file_select], [status, sidebar])
-    msg_input.submit(chat_fn, [msg_input, chatbot, ghost_input], [chatbot], concurrency_limit=2)
-    msg_input.submit(lambda: "", None, [msg_input]) # Clear active
-    msg_input.submit(lambda: "", None, [ghost_input]) # Clear ghost buffer after use
-demo.queue().launch()