turtle170 commited on
Commit
6db8c1d
Β·
verified Β·
1 Parent(s): 145bfe5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +155 -0
app.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import psutil
3
+ import os
4
+ import json
5
+ import time
6
+ from datetime import datetime
7
+ from huggingface_hub import HfApi, hf_hub_download
8
+ from llama_cpp import Llama
9
+
10
+ # --- ENGINE CONFIGURATION ---
11
+ HF_TOKEN = os.environ.get("HF_TOKEN")
12
+ api = HfApi(token=HF_TOKEN)
13
+ LOG_FILE = "engine_popularity.json"
14
+ SYSTEM_BUFFER_MB = 200
15
+ MODEL_MAX_RAM_PCT = 0.50
16
+
17
+ class ZeroEngine:
18
+ def __init__(self):
19
+ self.llm = None
20
+ self.current_repo = ""
21
+ self.current_file = ""
22
+ self.popularity_data = self.load_logs()
23
+
24
+ def load_logs(self):
25
+ if os.path.exists(LOG_FILE):
26
+ with open(LOG_FILE, "r") as f:
27
+ return json.load(f)
28
+ return {"loads": {}, "last_sync": str(datetime.now())}
29
+
30
+ def sync_logs(self):
31
+ if not HF_TOKEN: return
32
+ with open(LOG_FILE, "w") as f:
33
+ json.dump(self.popularity_data, f)
34
+ try:
35
+ # Pushes the JSON to the current Space repository
36
+ repo_id = os.environ.get("SPACE_ID")
37
+ api.upload_file(
38
+ path_or_fileobj=LOG_FILE,
39
+ path_in_repo=LOG_FILE,
40
+ repo_id=repo_id,
41
+ repo_type="space"
42
+ )
43
+ except Exception as e:
44
+ print(f"Sync failed: {e}")
45
+
46
+ def get_metrics(self):
47
+ ram = psutil.virtual_memory()
48
+ return {
49
+ "available_gb": round(ram.available / (1024**3), 2),
50
+ "total_gb": round(ram.total / (1024**3), 2),
51
+ "cpu_pct": psutil.cpu_percent(interval=None)
52
+ }
53
+
54
+ def validate_and_load(self, repo, filename):
55
+ metrics = self.get_metrics()
56
+ available_ram_mb = metrics["available_gb"] * 1024
57
+
58
+ # 1. Fetch File Info
59
+ path = hf_hub_download(repo_id=repo, filename=filename, token=HF_TOKEN)
60
+ file_size_mb = os.path.getsize(path) / (1024**2)
61
+
62
+ # 2. RAM Safety Check
63
+ if file_size_mb > (metrics["total_gb"] * 1024 * MODEL_MAX_RAM_PCT):
64
+ return f"❌ DECLINED: Model ({file_size_mb:.1f}MB) exceeds 50% threshold."
65
+
66
+ if (file_size_mb + SYSTEM_BUFFER_MB) > available_ram_mb:
67
+ return f"❌ DECLINED: Insufficient RAM for safety buffer."
68
+
69
+ # 3. Load Model
70
+ if self.llm: del self.llm
71
+ self.llm = Llama(
72
+ model_path=path,
73
+ n_ctx=2048,
74
+ n_threads=1, # Fixed to 1 core for partitioning
75
+ n_batch=512,
76
+ use_mmap=True,
77
+ verbose=False
78
+ )
79
+ self.current_repo = repo
80
+ self.current_file = filename
81
+
82
+ # 4. Telemetry
83
+ self.popularity_data["loads"][filename] = self.popularity_data["loads"].get(filename, 0) + 1
84
+ self.sync_logs()
85
+
86
+ return f"βœ… ZeroEngine Active: {filename}"
87
+
88
+ engine = ZeroEngine()
89
+
90
+ # --- UI INTERFACE ---
91
+ with gr.Blocks(theme=gr.themes.Monochrome(), fill_height=True) as demo:
92
+ gr.Markdown("# πŸ›°οΈ ZeroEngine V0.1 Kernel")
93
+
94
+ with gr.Row():
95
+ # MAIN CHAT (Center)
96
+ with gr.Column(scale=8):
97
+ chatbot = gr.Chatbot(type="messages", label="Engine Output")
98
+ msg_input = gr.Textbox(placeholder="Input command for Active Slot...", label="Active Command")
99
+
100
+ # ENGINE SIDEBAR (Right)
101
+ with gr.Sidebar(label="Engine Room", open=False) as sidebar:
102
+ gr.Markdown("### πŸ“Š Metrics")
103
+ ram_gauge = gr.Markdown("RAM: Calculating...")
104
+ cpu_gauge = gr.Markdown("CPU: Calculating...")
105
+
106
+ gr.Markdown("---")
107
+ gr.Markdown("### πŸ“₯ Model Loader")
108
+ repo_id = gr.Textbox(label="HF Repository", value="unsloth/Llama-3.2-1B-Instruct-GGUF")
109
+ file_select = gr.Dropdown(label="Quantization File", choices=[])
110
+ scan_btn = gr.Button("Scan Repository")
111
+ load_btn = gr.Button("ACTIVATE ENGINE", variant="primary")
112
+ status = gr.Markdown("Status: Standby")
113
+
114
+ gr.Markdown("---")
115
+ gr.Markdown("### πŸ‘» Ghost Terminal (Queue)")
116
+ ghost_input = gr.Textbox(placeholder="Pre-type prompt here...", label="Queue Buffer")
117
+ gr.Markdown("_Queue inputs are tokenized and cached immediately upon slot availability._")
118
+
119
+ # --- LOGIC HANDLERS ---
120
+ def update_stats():
121
+ m = engine.get_metrics()
122
+ return f"**RAM:** {m['available_gb']}GB / {m['total_gb']}GB", f"**CPU (Shared):** {m['cpu_pct']}%"
123
+
124
+ def scan_repo(repo):
125
+ files = api.list_repo_files(repo_id=repo)
126
+ gguf_files = [f for f in files if f.endswith(".gguf")]
127
+ return gr.update(choices=gguf_files, value=gguf_files[0] if gguf_files else None)
128
+
129
+ def trigger_load(repo, file):
130
+ # Automatically open sidebar to show metrics during load
131
+ return engine.validate_and_load(repo, file), gr.update(open=True)
132
+
133
+ def chat_fn(message, history, ghost_msg):
134
+ if not engine.llm:
135
+ yield history + [{"role": "assistant", "content": "Error: Engine not initialized."}]
136
+ return
137
+
138
+ # Stitch Ghost Prompt if exists
139
+ full_prompt = f"{ghost_msg}\n{message}" if ghost_msg else message
140
+ response = ""
141
+
142
+ for chunk in engine.llm(full_prompt, max_tokens=1024, stream=True):
143
+ token = chunk["choices"][0].get("text", "")
144
+ response += token
145
+ yield history + [{"role": "user", "content": message}, {"role": "assistant", "content": response}]
146
+
147
+ # Events
148
+ demo.load(update_stats, None, [ram_gauge, cpu_gauge], every=2)
149
+ scan_btn.click(scan_repo, [repo_id], [file_select])
150
+ load_btn.click(trigger_load, [repo_id, file_select], [status, sidebar])
151
+ msg_input.submit(chat_fn, [msg_input, chatbot, ghost_input], [chatbot], concurrency_limit=2)
152
+ msg_input.submit(lambda: "", None, [msg_input]) # Clear active
153
+ msg_input.submit(lambda: "", None, [ghost_input]) # Clear ghost buffer after use
154
+
155
+ demo.queue().launch()