Yermek68 commited on
Commit
241c892
·
verified ·
1 Parent(s): 4508d0d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -143
app.py CHANGED
@@ -1,79 +1,50 @@
1
  """
2
- 🤖 Eroha AgentAPI v5.9 — Enterprise Edition
3
  Enterprise-grade architecture for Hugging Face Spaces
4
  Auto-Token Recovery | Smart Fallback 2.0 | Self-Heal | Metrics | Stable Dashboard
5
  """
6
 
7
- import os
8
- import asyncio
9
- import aiohttp
10
- import time
11
  from datetime import datetime
12
  import gradio as gr
13
  from fastapi import FastAPI
14
- from fastapi.responses import JSONResponse, HTMLResponse, PlainTextResponse
15
  from transformers import pipeline
16
- import psutil
17
 
18
- # ==========================
19
- # CONFIGURATION
20
- # ==========================
21
  HF_TOKEN = os.getenv("HF_TOKEN", "")
22
  ROUTER_URL = "https://api-inference.huggingface.co/models"
23
  FALLBACK_MODEL = "sshleifer/tiny-gpt2"
24
- CHECK_INTERVAL = 180 # Router health check every 3 minutes
25
- MAX_MEMORY_THRESHOLD = 85 # Fallback loads only if memory < 85%
26
 
27
- # ==========================
28
- # CORE CLASSES
29
- # ==========================
30
  class CircuitBreaker:
31
  def __init__(self, threshold=3, timeout=60):
32
- self.failures = 0
33
- self.threshold = threshold
34
- self.timeout = timeout
35
- self.state = "CLOSED"
36
- self.last_failure = 0
37
-
38
  def allow(self):
39
  if self.state == "OPEN" and time.time() - self.last_failure < self.timeout:
40
  return False
41
  if self.state == "OPEN" and time.time() - self.last_failure >= self.timeout:
42
  self.state = "HALF_OPEN"
43
  return True
44
-
45
- def record_success(self):
46
- self.failures = 0
47
- if self.state in ["HALF_OPEN", "OPEN"]:
48
- self.state = "CLOSED"
49
-
50
  def record_failure(self):
51
  self.failures += 1
52
  if self.failures >= self.threshold:
53
- self.state = "OPEN"
54
- self.last_failure = time.time()
55
-
56
  circuit = CircuitBreaker()
57
 
58
- # ==========================
59
- # HF CLIENT
60
- # ==========================
61
  class HFClient:
62
  def __init__(self):
63
- self.token = HF_TOKEN
64
- self.headers = {"Authorization": f"Bearer {self.token}"} if self.token else {}
65
- self.valid = False
66
- self.session = None
67
- self.retries = 0
68
- self.latency = 0
69
-
70
  async def get_session(self):
71
  if not self.session or self.session.closed:
72
  self.session = aiohttp.ClientSession()
73
  return self.session
74
-
75
  async def validate(self):
76
- """Validate HF token"""
77
  try:
78
  async with aiohttp.ClientSession() as s:
79
  start = time.time()
@@ -84,23 +55,17 @@ class HFClient:
84
  except:
85
  self.valid = False
86
  return False
87
-
88
  async def infer(self, model, text):
89
- """Inference through Hugging Face Router"""
90
  if not circuit.allow():
91
  return {"error": "Circuit breaker open — fallback engaged"}
92
-
93
  try:
94
- session = await self.get_session()
95
  payload = {"inputs": text, "parameters": {"max_new_tokens": 250}}
96
  start = time.time()
97
- async with session.post(
98
- f"{ROUTER_URL}/{model}", headers=self.headers, json=payload, timeout=30
99
- ) as r:
100
  self.latency = round((time.time() - start) * 1000, 2)
101
  if r.status == 200:
102
  circuit.record_success()
103
- self.retries = 0
104
  return await r.json()
105
  else:
106
  circuit.record_failure()
@@ -111,147 +76,84 @@ class HFClient:
111
  except Exception as e:
112
  circuit.record_failure()
113
  return {"error": f"Router exception: {e}"}
114
-
115
  async def recover_token(self):
116
- """Try to reload token from backup or /tmp file"""
117
  print("⚠️ Token invalid — trying recovery...")
118
- token_paths = [
119
- "/tmp/hf_token.txt",
120
- os.getenv("HF_TOKEN_BACKUP", "")
121
- ]
122
- for path in token_paths:
123
  if path and os.path.exists(path):
124
- try:
125
- with open(path, "r") as f:
126
- token = f.read().strip()
127
- if token:
128
- self.headers = {"Authorization": f"Bearer {token}"}
129
  print("✅ Token recovered successfully.")
130
- self.valid = await self.validate()
131
- if self.valid:
132
- return True
133
- except:
134
- continue
135
  print("❌ Token recovery failed.")
136
  return False
137
-
138
  client = HFClient()
139
 
140
- # ==========================
141
- # SMART FALLBACK
142
- # ==========================
143
  class Fallback:
144
- def __init__(self):
145
- self.pipe = None
146
- self.loaded = False
147
-
148
  async def load(self):
149
- """Load fallback model if memory is OK"""
150
  if not self.loaded and psutil.virtual_memory().percent < MAX_MEMORY_THRESHOLD:
151
  print("🧠 Loading fallback model...")
152
  self.pipe = pipeline("text-generation", model=FALLBACK_MODEL)
153
  self.loaded = True
154
- print("✅ Fallback model ready.")
155
-
156
  async def generate(self, text):
157
- """Generate fallback response"""
158
- if not self.loaded:
159
- await self.load()
160
- if not self.pipe:
161
- return "⚠️ Fallback model unavailable."
162
- result = self.pipe(text, max_new_tokens=100)[0]["generated_text"]
163
- return result
164
-
165
  fallback = Fallback()
166
 
167
- # ==========================
168
- # WATCHDOG & SELF-HEAL
169
- # ==========================
170
  async def watchdog():
171
- uptime_start = time.time()
172
  while True:
173
  await asyncio.sleep(CHECK_INTERVAL)
174
  print(f"[{datetime.now().isoformat()}] 🩺 Watchdog check...")
175
- valid = await client.validate()
176
- if not valid:
177
  await client.recover_token()
178
- mem = psutil.virtual_memory().percent
179
- if mem > 90:
180
- print(f"⚠️ High memory usage: {mem}% — consider restart.")
181
  if not circuit.allow():
182
- print("⚠️ Circuit breaker is open — self-healing...")
183
  circuit.state = "CLOSED"
 
184
 
185
- # ==========================
186
- # FASTAPI CORE
187
- # ==========================
188
  app = FastAPI(title="Eroha AgentAPI v5.9 — Enterprise Edition")
189
 
190
  @app.on_event("startup")
191
  async def startup():
192
- print("🚀 Starting Eroha AgentAPI v5.9 — Enterprise Edition")
193
  asyncio.create_task(watchdog())
194
  await client.validate()
195
 
196
  @app.get("/health")
197
  async def health():
198
- uptime = round(time.time() - psutil.boot_time(), 1)
199
- mem = psutil.virtual_memory().percent
200
  return JSONResponse({
201
  "status": "ok" if client.valid else "degraded",
202
  "circuit": circuit.state,
203
- "memory": mem,
204
  "latency_ms": client.latency,
205
- "uptime_s": uptime,
206
  "token_valid": client.valid
207
  })
208
 
209
  @app.get("/metrics", response_class=PlainTextResponse)
210
  async def metrics():
211
- """Prometheus-style metrics"""
212
  mem = psutil.virtual_memory().percent
213
- return (
214
- f"hf_token_valid {1 if client.valid else 0}\n"
215
- f"router_latency_ms {client.latency}\n"
216
- f"memory_usage_percent {mem}\n"
217
- f"circuit_state {'0' if circuit.state == 'CLOSED' else 1}\n"
218
- )
219
 
220
  @app.post("/inference")
221
  async def inference(data: dict):
222
- text = data.get("prompt", "")
223
- model = data.get("model", "microsoft/phi-3-mini-4k-instruct")
224
  res = await client.infer(model, text)
225
  if "error" in res:
226
  fb = await fallback.generate(text)
227
  return {"source": "fallback", "response": fb, "note": res["error"]}
228
  return {"source": "router", "response": res}
229
 
230
- @app.get("/", response_class=HTMLResponse)
231
- async def root_page():
232
- """Root status page"""
233
- return """
234
- <html><head><title>Eroha AgentAPI v5.9</title></head>
235
- <body style='font-family:Arial;text-align:center;padding:2em;'>
236
- <h2>🤖 Eroha AgentAPI v5.9 — Enterprise Edition</h2>
237
- <p>Status: <b style='color:green;'>Running ✅</b></p>
238
- <p><a href='/ui' style='font-size:18px;color:#4a68ff;'>Открыть интерфейс →</a></p>
239
- <p><a href='/health'>Health</a> • <a href='/metrics'>Metrics</a></p>
240
- </body></html>
241
- """
242
-
243
- @app.get("/favicon.ico")
244
- async def favicon():
245
- return PlainTextResponse("", status_code=204)
246
-
247
- # ==========================
248
- # GRADIO DASHBOARD
249
- # ==========================
250
  def gradio_infer(prompt, model_choice):
251
- loop = asyncio.new_event_loop()
252
- asyncio.set_event_loop(loop)
253
- result = loop.run_until_complete(client.infer(model_choice, prompt))
254
- loop.close()
255
  if "error" in result:
256
  fb = asyncio.run(fallback.generate(prompt))
257
  return f"⚠️ Router failed ({result['error']})\n\n🧠 Fallback:\n{fb}"
@@ -274,9 +176,9 @@ def show_dashboard():
274
  | Time | {datetime.now().strftime("%H:%M:%S")} |
275
  """
276
 
277
- demo = gr.Blocks(title="Eroha AgentAPI v5.9 — Enterprise Edition")
278
  with demo:
279
- gr.Markdown("# 🤖 Eroha AgentAPI v5.9 — AutoRecovery + Smart Dashboard")
280
  with gr.Tab("💬 Chat"):
281
  inp = gr.Textbox(label="Введите запрос")
282
  model = gr.Dropdown(
@@ -292,8 +194,6 @@ with demo:
292
  refresh.click(fn=show_dashboard, outputs=dash)
293
  dash.value = show_dashboard()
294
 
295
- app = gr.mount_gradio_app(app, demo, path="/ui")
296
-
297
  if __name__ == "__main__":
298
- import uvicorn
299
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
  """
2
+ 🤖 Eroha AgentAPI v5.9.2 — Enterprise Edition (Docker UI Fix)
3
  Enterprise-grade architecture for Hugging Face Spaces
4
  Auto-Token Recovery | Smart Fallback 2.0 | Self-Heal | Metrics | Stable Dashboard
5
  """
6
 
7
+ import os, asyncio, aiohttp, time, psutil
 
 
 
8
  from datetime import datetime
9
  import gradio as gr
10
  from fastapi import FastAPI
11
+ from fastapi.responses import JSONResponse, PlainTextResponse
12
  from transformers import pipeline
 
13
 
 
 
 
14
  HF_TOKEN = os.getenv("HF_TOKEN", "")
15
  ROUTER_URL = "https://api-inference.huggingface.co/models"
16
  FALLBACK_MODEL = "sshleifer/tiny-gpt2"
17
+ CHECK_INTERVAL = 180
18
+ MAX_MEMORY_THRESHOLD = 85
19
 
20
+ # ================= CIRCUIT BREAKER =================
 
 
21
  class CircuitBreaker:
22
  def __init__(self, threshold=3, timeout=60):
23
+ self.failures, self.threshold, self.timeout = 0, threshold, timeout
24
+ self.state, self.last_failure = "CLOSED", 0
 
 
 
 
25
  def allow(self):
26
  if self.state == "OPEN" and time.time() - self.last_failure < self.timeout:
27
  return False
28
  if self.state == "OPEN" and time.time() - self.last_failure >= self.timeout:
29
  self.state = "HALF_OPEN"
30
  return True
31
+ def record_success(self): self.failures, self.state = 0, "CLOSED"
 
 
 
 
 
32
  def record_failure(self):
33
  self.failures += 1
34
  if self.failures >= self.threshold:
35
+ self.state, self.last_failure = "OPEN", time.time()
 
 
36
  circuit = CircuitBreaker()
37
 
38
+ # ================= HF CLIENT =================
 
 
39
  class HFClient:
40
  def __init__(self):
41
+ self.token, self.valid, self.session, self.latency = HF_TOKEN, False, None, 0
42
+ self.headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
 
 
 
 
 
43
  async def get_session(self):
44
  if not self.session or self.session.closed:
45
  self.session = aiohttp.ClientSession()
46
  return self.session
 
47
  async def validate(self):
 
48
  try:
49
  async with aiohttp.ClientSession() as s:
50
  start = time.time()
 
55
  except:
56
  self.valid = False
57
  return False
 
58
  async def infer(self, model, text):
 
59
  if not circuit.allow():
60
  return {"error": "Circuit breaker open — fallback engaged"}
 
61
  try:
62
+ s = await self.get_session()
63
  payload = {"inputs": text, "parameters": {"max_new_tokens": 250}}
64
  start = time.time()
65
+ async with s.post(f"{ROUTER_URL}/{model}", headers=self.headers, json=payload, timeout=30) as r:
 
 
66
  self.latency = round((time.time() - start) * 1000, 2)
67
  if r.status == 200:
68
  circuit.record_success()
 
69
  return await r.json()
70
  else:
71
  circuit.record_failure()
 
76
  except Exception as e:
77
  circuit.record_failure()
78
  return {"error": f"Router exception: {e}"}
 
79
  async def recover_token(self):
 
80
  print("⚠️ Token invalid — trying recovery...")
81
+ for path in ["/tmp/hf_token.txt", os.getenv("HF_TOKEN_BACKUP", "")]:
 
 
 
 
82
  if path and os.path.exists(path):
83
+ with open(path) as f:
84
+ token = f.read().strip()
85
+ if token:
86
+ self.headers = {"Authorization": f"Bearer {token}"}
87
+ if await self.validate():
88
  print("✅ Token recovered successfully.")
89
+ return True
 
 
 
 
90
  print("❌ Token recovery failed.")
91
  return False
 
92
  client = HFClient()
93
 
94
+ # ================= FALLBACK =================
 
 
95
  class Fallback:
96
+ def __init__(self): self.pipe, self.loaded = None, False
 
 
 
97
  async def load(self):
 
98
  if not self.loaded and psutil.virtual_memory().percent < MAX_MEMORY_THRESHOLD:
99
  print("🧠 Loading fallback model...")
100
  self.pipe = pipeline("text-generation", model=FALLBACK_MODEL)
101
  self.loaded = True
 
 
102
  async def generate(self, text):
103
+ await self.load()
104
+ if not self.pipe: return "⚠️ Fallback unavailable."
105
+ return self.pipe(text, max_new_tokens=100)[0]["generated_text"]
 
 
 
 
 
106
  fallback = Fallback()
107
 
108
+ # ================= WATCHDOG =================
 
 
109
  async def watchdog():
 
110
  while True:
111
  await asyncio.sleep(CHECK_INTERVAL)
112
  print(f"[{datetime.now().isoformat()}] 🩺 Watchdog check...")
113
+ if not await client.validate():
 
114
  await client.recover_token()
115
+ if psutil.virtual_memory().percent > 90:
116
+ print("⚠️ High memory usage.")
 
117
  if not circuit.allow():
 
118
  circuit.state = "CLOSED"
119
+ print("🛠️ Circuit auto-healed.")
120
 
121
+ # ================= FASTAPI =================
 
 
122
  app = FastAPI(title="Eroha AgentAPI v5.9 — Enterprise Edition")
123
 
124
  @app.on_event("startup")
125
  async def startup():
126
+ print("🚀 Starting Eroha AgentAPI v5.9.2 — Enterprise Edition")
127
  asyncio.create_task(watchdog())
128
  await client.validate()
129
 
130
  @app.get("/health")
131
  async def health():
 
 
132
  return JSONResponse({
133
  "status": "ok" if client.valid else "degraded",
134
  "circuit": circuit.state,
135
+ "memory": psutil.virtual_memory().percent,
136
  "latency_ms": client.latency,
 
137
  "token_valid": client.valid
138
  })
139
 
140
  @app.get("/metrics", response_class=PlainTextResponse)
141
  async def metrics():
 
142
  mem = psutil.virtual_memory().percent
143
+ return f"hf_token_valid {1 if client.valid else 0}\nrouter_latency_ms {client.latency}\nmemory_usage_percent {mem}\ncircuit_state {'0' if circuit.state == 'CLOSED' else 1}\n"
 
 
 
 
 
144
 
145
  @app.post("/inference")
146
  async def inference(data: dict):
147
+ text, model = data.get("prompt", ""), data.get("model", "microsoft/phi-3-mini-4k-instruct")
 
148
  res = await client.infer(model, text)
149
  if "error" in res:
150
  fb = await fallback.generate(text)
151
  return {"source": "fallback", "response": fb, "note": res["error"]}
152
  return {"source": "router", "response": res}
153
 
154
+ # ================= GRADIO UI =================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  def gradio_infer(prompt, model_choice):
156
+ result = asyncio.run(client.infer(model_choice, prompt))
 
 
 
157
  if "error" in result:
158
  fb = asyncio.run(fallback.generate(prompt))
159
  return f"⚠️ Router failed ({result['error']})\n\n🧠 Fallback:\n{fb}"
 
176
  | Time | {datetime.now().strftime("%H:%M:%S")} |
177
  """
178
 
179
+ demo = gr.Blocks(title="Eroha AgentAPI v5.9.2 — Enterprise Edition")
180
  with demo:
181
+ gr.Markdown("# 🤖 Eroha AgentAPI v5.9.2Enterprise Edition")
182
  with gr.Tab("💬 Chat"):
183
  inp = gr.Textbox(label="Введите запрос")
184
  model = gr.Dropdown(
 
194
  refresh.click(fn=show_dashboard, outputs=dash)
195
  dash.value = show_dashboard()
196
 
197
+ # ---- Launch for Docker ----
 
198
  if __name__ == "__main__":
199
+ demo.queue().launch(server_name="0.0.0.0", server_port=7860, share=False, inline=False)