turtle170 commited on
Commit
ec969c4
·
verified ·
1 Parent(s): 6db8c1d

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -155
app.py DELETED
@@ -1,155 +0,0 @@
1
- import gradio as gr
2
- import psutil
3
- import os
4
- import json
5
- import time
6
- from datetime import datetime
7
- from huggingface_hub import HfApi, hf_hub_download
8
- from llama_cpp import Llama
9
-
10
- # --- ENGINE CONFIGURATION ---
11
- HF_TOKEN = os.environ.get("HF_TOKEN")
12
- api = HfApi(token=HF_TOKEN)
13
- LOG_FILE = "engine_popularity.json"
14
- SYSTEM_BUFFER_MB = 200
15
- MODEL_MAX_RAM_PCT = 0.50
16
-
17
- class ZeroEngine:
18
- def __init__(self):
19
- self.llm = None
20
- self.current_repo = ""
21
- self.current_file = ""
22
- self.popularity_data = self.load_logs()
23
-
24
- def load_logs(self):
25
- if os.path.exists(LOG_FILE):
26
- with open(LOG_FILE, "r") as f:
27
- return json.load(f)
28
- return {"loads": {}, "last_sync": str(datetime.now())}
29
-
30
- def sync_logs(self):
31
- if not HF_TOKEN: return
32
- with open(LOG_FILE, "w") as f:
33
- json.dump(self.popularity_data, f)
34
- try:
35
- # Pushes the JSON to the current Space repository
36
- repo_id = os.environ.get("SPACE_ID")
37
- api.upload_file(
38
- path_or_fileobj=LOG_FILE,
39
- path_in_repo=LOG_FILE,
40
- repo_id=repo_id,
41
- repo_type="space"
42
- )
43
- except Exception as e:
44
- print(f"Sync failed: {e}")
45
-
46
- def get_metrics(self):
47
- ram = psutil.virtual_memory()
48
- return {
49
- "available_gb": round(ram.available / (1024**3), 2),
50
- "total_gb": round(ram.total / (1024**3), 2),
51
- "cpu_pct": psutil.cpu_percent(interval=None)
52
- }
53
-
54
- def validate_and_load(self, repo, filename):
55
- metrics = self.get_metrics()
56
- available_ram_mb = metrics["available_gb"] * 1024
57
-
58
- # 1. Fetch File Info
59
- path = hf_hub_download(repo_id=repo, filename=filename, token=HF_TOKEN)
60
- file_size_mb = os.path.getsize(path) / (1024**2)
61
-
62
- # 2. RAM Safety Check
63
- if file_size_mb > (metrics["total_gb"] * 1024 * MODEL_MAX_RAM_PCT):
64
- return f"❌ DECLINED: Model ({file_size_mb:.1f}MB) exceeds 50% threshold."
65
-
66
- if (file_size_mb + SYSTEM_BUFFER_MB) > available_ram_mb:
67
- return f"❌ DECLINED: Insufficient RAM for safety buffer."
68
-
69
- # 3. Load Model
70
- if self.llm: del self.llm
71
- self.llm = Llama(
72
- model_path=path,
73
- n_ctx=2048,
74
- n_threads=1, # Fixed to 1 core for partitioning
75
- n_batch=512,
76
- use_mmap=True,
77
- verbose=False
78
- )
79
- self.current_repo = repo
80
- self.current_file = filename
81
-
82
- # 4. Telemetry
83
- self.popularity_data["loads"][filename] = self.popularity_data["loads"].get(filename, 0) + 1
84
- self.sync_logs()
85
-
86
- return f"✅ ZeroEngine Active: {filename}"
87
-
88
- engine = ZeroEngine()
89
-
90
- # --- UI INTERFACE ---
91
- with gr.Blocks(theme=gr.themes.Monochrome(), fill_height=True) as demo:
92
- gr.Markdown("# 🛰️ ZeroEngine V0.1 Kernel")
93
-
94
- with gr.Row():
95
- # MAIN CHAT (Center)
96
- with gr.Column(scale=8):
97
- chatbot = gr.Chatbot(type="messages", label="Engine Output")
98
- msg_input = gr.Textbox(placeholder="Input command for Active Slot...", label="Active Command")
99
-
100
- # ENGINE SIDEBAR (Right)
101
- with gr.Sidebar(label="Engine Room", open=False) as sidebar:
102
- gr.Markdown("### 📊 Metrics")
103
- ram_gauge = gr.Markdown("RAM: Calculating...")
104
- cpu_gauge = gr.Markdown("CPU: Calculating...")
105
-
106
- gr.Markdown("---")
107
- gr.Markdown("### 📥 Model Loader")
108
- repo_id = gr.Textbox(label="HF Repository", value="unsloth/Llama-3.2-1B-Instruct-GGUF")
109
- file_select = gr.Dropdown(label="Quantization File", choices=[])
110
- scan_btn = gr.Button("Scan Repository")
111
- load_btn = gr.Button("ACTIVATE ENGINE", variant="primary")
112
- status = gr.Markdown("Status: Standby")
113
-
114
- gr.Markdown("---")
115
- gr.Markdown("### 👻 Ghost Terminal (Queue)")
116
- ghost_input = gr.Textbox(placeholder="Pre-type prompt here...", label="Queue Buffer")
117
- gr.Markdown("_Queue inputs are tokenized and cached immediately upon slot availability._")
118
-
119
- # --- LOGIC HANDLERS ---
120
- def update_stats():
121
- m = engine.get_metrics()
122
- return f"**RAM:** {m['available_gb']}GB / {m['total_gb']}GB", f"**CPU (Shared):** {m['cpu_pct']}%"
123
-
124
- def scan_repo(repo):
125
- files = api.list_repo_files(repo_id=repo)
126
- gguf_files = [f for f in files if f.endswith(".gguf")]
127
- return gr.update(choices=gguf_files, value=gguf_files[0] if gguf_files else None)
128
-
129
- def trigger_load(repo, file):
130
- # Automatically open sidebar to show metrics during load
131
- return engine.validate_and_load(repo, file), gr.update(open=True)
132
-
133
- def chat_fn(message, history, ghost_msg):
134
- if not engine.llm:
135
- yield history + [{"role": "assistant", "content": "Error: Engine not initialized."}]
136
- return
137
-
138
- # Stitch Ghost Prompt if exists
139
- full_prompt = f"{ghost_msg}\n{message}" if ghost_msg else message
140
- response = ""
141
-
142
- for chunk in engine.llm(full_prompt, max_tokens=1024, stream=True):
143
- token = chunk["choices"][0].get("text", "")
144
- response += token
145
- yield history + [{"role": "user", "content": message}, {"role": "assistant", "content": response}]
146
-
147
- # Events
148
- demo.load(update_stats, None, [ram_gauge, cpu_gauge], every=2)
149
- scan_btn.click(scan_repo, [repo_id], [file_select])
150
- load_btn.click(trigger_load, [repo_id, file_select], [status, sidebar])
151
- msg_input.submit(chat_fn, [msg_input, chatbot, ghost_input], [chatbot], concurrency_limit=2)
152
- msg_input.submit(lambda: "", None, [msg_input]) # Clear active
153
- msg_input.submit(lambda: "", None, [ghost_input]) # Clear ghost buffer after use
154
-
155
- demo.queue().launch()