Yermek68 commited on
Commit
85d0cd1
·
verified ·
1 Parent(s): 53a4fff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -349
app.py CHANGED
@@ -1,426 +1,149 @@
1
- """
2
- 🤖 Eroha AgentAPI v5.9.2 — Enterprise Edition (Docker UI Fix)
3
- Enterprise-grade architecture for Hugging Face Spaces
4
- Auto-Token Recovery | Smart Fallback 2.0 | Self-Heal | Metrics | Stable Dashboard
5
- """
6
-
7
  import os, asyncio, aiohttp, time, psutil
8
  from datetime import datetime
9
  import gradio as gr
10
  from fastapi import FastAPI
11
- from fastapi.responses import JSONResponse, PlainTextResponse
12
- from transformers import pipeline
13
-
14
- # Добавляем папку core в путь для импорта
15
- import sys
16
- sys.path.append(os.path.join(os.path.dirname(__file__), "core"))
17
 
18
- # === Импорт логирования + метрик + FailSafe ===
19
- # from alert_core import log_alert
20
- from metrics_core import save_metrics
21
- from alerters import ConsoleAlerter, FileAlerter
22
- from alert_manager import AlertManager
23
- from failsafe_core import failsafe
24
 
25
- # Safe import for alert_core
26
- try:
27
- from alert_core import log_alert
28
- except ModuleNotFoundError:
29
- def log_alert(msg: str):
30
- print(f"[⚠️ ALERT] {msg} (alert_core not found — using fallback)")
31
 
32
- # === Настройка моделей для логики ===
33
- PRIMARY_MODEL = "microsoft/phi-3-mini-instruct"
34
- FALLBACK_MODEL = "sshleifer/tiny-gpt2"
35
 
36
- # Настройка AlertManager
37
- alert_manager = AlertManager([
38
- ConsoleAlerter(),
39
- FileAlerter("alerts_log.json")
40
- ])
41
 
 
 
42
  HF_TOKEN = os.getenv("HF_TOKEN", "")
43
  ROUTER_URL = "https://api-inference.huggingface.co/models"
44
- FALLBACK_MODEL = "sshleifer/tiny-gpt2"
45
  CHECK_INTERVAL = 180
46
- MAX_MEMORY_THRESHOLD = 85
47
 
48
- # ================= CIRCUIT BREAKER =================
49
  class CircuitBreaker:
50
  def __init__(self, threshold=3, timeout=60):
51
  self.failures, self.threshold, self.timeout = 0, threshold, timeout
52
  self.state, self.last_failure = "CLOSED", 0
 
53
  def allow(self):
54
  if self.state == "OPEN" and time.time() - self.last_failure < self.timeout:
55
  return False
56
  if self.state == "OPEN" and time.time() - self.last_failure >= self.timeout:
57
  self.state = "HALF_OPEN"
58
  return True
59
- def record_success(self): self.failures, self.state = 0, "CLOSED"
 
 
 
60
  def record_failure(self):
61
  self.failures += 1
62
  if self.failures >= self.threshold:
63
  self.state, self.last_failure = "OPEN", time.time()
 
64
  circuit = CircuitBreaker()
65
 
66
- # ================= HF CLIENT =================
67
  class HFClient:
68
  def __init__(self):
69
- self.token, self.valid, self.session, self.latency = HF_TOKEN, False, None, 0
 
 
70
  self.headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
 
71
  async def get_session(self):
72
  if not self.session or self.session.closed:
73
  self.session = aiohttp.ClientSession()
74
  return self.session
75
- async def validate(self):
76
- try:
77
- async with aiohttp.ClientSession() as s:
78
- start = time.time()
79
- async with s.get("https://huggingface.co/api/whoami-v2", headers=self.headers) as r:
80
- self.latency = round((time.time() - start) * 1000, 2)
81
- self.valid = r.status == 200
82
- return self.valid
83
- except:
84
- self.valid = False
85
- return False
86
- async def infer(self, model, text):
87
  if not circuit.allow():
88
- return {"error": "Circuit breaker open — fallback engaged"}
89
  try:
90
  s = await self.get_session()
91
- payload = {"inputs": text, "parameters": {"max_new_tokens": 250}}
92
  start = time.time()
93
- async with s.post(f"{ROUTER_URL}/{model}", headers=self.headers, json=payload, timeout=30) as r:
94
  self.latency = round((time.time() - start) * 1000, 2)
95
  if r.status == 200:
96
  circuit.record_success()
97
  return await r.json()
98
  else:
99
  circuit.record_failure()
100
- if r.status in (401, 410):
101
- self.valid = False
102
- await self.recover_token()
103
- return {"error": f"Router error {r.status}"}
104
  except Exception as e:
105
  circuit.record_failure()
106
- return {"error": f"Router exception: {e}"}
107
- async def recover_token(self):
108
- print("⚠️ Token invalid — trying recovery...")
109
- for path in ["/tmp/hf_token.txt", os.getenv("HF_TOKEN_BACKUP", "")]:
110
- if path and os.path.exists(path):
111
- with open(path) as f:
112
- token = f.read().strip()
113
- if token:
114
- self.headers = {"Authorization": f"Bearer {token}"}
115
- if await self.validate():
116
- print("✅ Token recovered successfully.")
117
- return True
118
- print("❌ Token recovery failed.")
119
- return False
120
- client = HFClient()
121
 
122
- # ================= FALLBACK =================
123
- class Fallback:
124
- def __init__(self): self.pipe, self.loaded = None, False
125
- async def load(self):
126
- if not self.loaded and psutil.virtual_memory().percent < MAX_MEMORY_THRESHOLD:
127
- print("🧠 Loading fallback model...")
128
- self.pipe = pipeline("text-generation", model=FALLBACK_MODEL)
129
- self.loaded = True
130
- async def generate(self, text):
131
- await self.load()
132
- if not self.pipe: return "⚠️ Fallback unavailable."
133
- return self.pipe(text, max_new_tokens=100)[0]["generated_text"]
134
- fallback = Fallback()
135
-
136
- # ================= WATCHDOG =================
137
- async def watchdog():
138
- while True:
139
- await asyncio.sleep(CHECK_INTERVAL)
140
- print(f"[{datetime.now().isoformat()}] 🩺 Watchdog check...")
141
- if not await client.validate():
142
- await client.recover_token()
143
- if psutil.virtual_memory().percent > 90:
144
- print("⚠️ High memory usage.")
145
- if not circuit.allow():
146
- circuit.state = "CLOSED"
147
- print("🛠️ Circuit auto-healed.")
148
 
149
- # ================= FASTAPI =================
150
- app = FastAPI(title="Eroha AgentAPI v5.9 — Enterprise Edition")
151
 
152
  @app.on_event("startup")
153
- async def startup():
154
- print("🚀 Starting Eroha AgentAPI v5.9.2 — Enterprise Edition")
155
  asyncio.create_task(watchdog())
156
- await client.validate()
157
 
158
  @app.get("/health")
159
  async def health():
160
- return JSONResponse({
161
- "status": "ok" if client.valid else "degraded",
162
- "circuit": circuit.state,
163
- "memory": psutil.virtual_memory().percent,
164
- "latency_ms": client.latency,
165
- "token_valid": client.valid
166
- })
167
 
168
  @app.get("/metrics", response_class=PlainTextResponse)
169
  async def metrics():
170
- mem = psutil.virtual_memory().percent
171
- return f"hf_token_valid {1 if client.valid else 0}\nrouter_latency_ms {client.latency}\nmemory_usage_percent {mem}\ncircuit_state {'0' if circuit.state == 'CLOSED' else 1}\n"
172
-
173
- @app.post("/inference")
174
- async def inference(data: dict):
175
-
176
- prompt = data.get("prompt", "")
177
- model = data.get("model", PRIMARY_MODEL)
178
-
179
- start_time = time.time()
180
-
181
- # FailSafe wrapper for primary inference
182
- @failsafe(alert_manager)
183
- async def run_primary(p, m):
184
- return await client.infer(m, p)
185
-
186
- try:
187
- res = await run_primary(prompt, model)
188
-
189
- duration = int((time.time() - start_time) * 1000)
190
-
191
- # Метрики
192
- save_metrics({
193
- "endpoint": "/inference",
194
- "model": model,
195
- "latency_ms": duration
196
- })
197
-
198
- # Лог — успешный ответ
199
- log_alert(
200
- source="agent",
201
- level="INFO",
202
- message=f"Inference OK (model={model})",
203
- extra={"prompt_len": len(prompt), "latency": duration}
204
- )
205
-
206
- # Если ошибка в ответе
207
- if isinstance(res, dict) and "error" in res:
208
- raise Exception(res["error"])
209
-
210
- return {"source": "router", "response": res}
211
-
212
- except Exception as primary_err:
213
-
214
- log_alert(
215
- source="agent",
216
- level="ERROR",
217
- message=f"Primary inference failed: {primary_err}",
218
- extra={"error": str(primary_err)}
219
- )
220
-
221
- # Fallback через FailSafe
222
- @failsafe(alert_manager)
223
- async def run_fallback(p):
224
- return await fallback.generate(p)
225
-
226
- try:
227
- fb = await run_fallback(prompt)
228
- duration = int((time.time() - start_time) * 1000)
229
-
230
- # Fallback метрики
231
- save_metrics({
232
- "endpoint": "/inference",
233
- "model": FALLBACK_MODEL,
234
- "latency_ms": duration,
235
- "fallback_used": True
236
- })
237
-
238
- log_alert(
239
- source="fallback",
240
- level="WARNING",
241
- message=f"Fallback inference OK (model={FALLBACK_MODEL})",
242
- extra={"latency": duration}
243
- )
244
-
245
- return {"source": "fallback", "response": fb}
246
-
247
- except Exception as fb_err:
248
- log_alert(
249
- source="fallback",
250
- level="ERROR",
251
- message=f"Fallback failed: {fb_err}",
252
- extra={"error": str(fb_err)}
253
- )
254
- return {"error": "Inference failure on both primary and fallback"}
255
-
256
 
257
- # ================= GRADIO UI =================
258
- def gradio_infer(prompt, model_choice):
 
259
 
260
- start_time = time.time()
261
- model = model_choice or PRIMARY_MODEL
262
-
263
- @failsafe(alert_manager)
264
- def run_model(p, m):
265
- return asyncio.run(client.infer(m, p))
 
 
 
 
 
 
 
266
 
 
 
267
  try:
268
- result = run_model(prompt, model)
269
-
270
- duration = int((time.time() - start_time) * 1000)
271
-
272
- # Metрики Gradio
273
- save_metrics({
274
- "interface": "gradio",
275
- "prompt_len": len(prompt),
276
- "model": model,
277
- "latency_ms": duration
278
- })
279
-
280
- log_alert(
281
- source="gradio",
282
- level="INFO",
283
- message=f"Gradio inference success (model={model})",
284
- extra={"latency": duration}
285
- )
286
-
287
- if isinstance(result, dict) and "error" in result:
288
- raise Exception(result["error"])
289
-
290
- if isinstance(result, list):
291
- return result[0].get("generated_text", str(result))
292
-
293
  return str(result)
294
-
295
- except Exception as ui_err:
296
-
297
- log_alert(
298
- source="gradio",
299
- level="ERROR",
300
- message=f"Gradio inference error: {ui_err}",
301
- extra={"error": str(ui_err)}
302
- )
303
-
304
- # fallback
305
- fb = asyncio.run(fallback.generate(prompt))
306
- return f"⚠️ Error: {ui_err}\n\n🧠 Fallback: {fb}"
307
-
308
 
309
  def show_dashboard():
310
  mem = psutil.virtual_memory().percent
311
- status = " Valid" if client.valid else "❌ Invalid"
312
- color = "green" if client.valid else "red"
313
- return f"""
314
- ### 🧠 Eroha Enterprise Dashboard
315
- | Metric | Value |
316
- |--------|--------|
317
- | Token | <span style='color:{color}'>{status}</span> |
318
- | Circuit | {circuit.state} |
319
- | Memory | {mem}% |
320
- | Latency | {client.latency} ms |
321
- | Time | {datetime.now().strftime("%H:%M:%S")} |
322
- """
323
-
324
- demo = gr.Blocks(title="Eroha AgentAPI v5.9.2 — Enterprise Edition")
325
- with demo:
326
- gr.Markdown("# 🤖 Eroha AgentAPI v5.9.2 — Enterprise Edition")
327
- with gr.Tab("💬 Chat"):
328
- inp = gr.Textbox(label="Введите запрос")
329
- model = gr.Dropdown(
330
- ["microsoft/phi-3-mini-4k-instruct", "google/gemma-2-2b-it", "meta-llama/Meta-Llama-3-8B-Instruct"],
331
- value="microsoft/phi-3-mini-4k-instruct", label="Модель"
332
- )
333
- out = gr.Textbox(label="Ответ")
334
- btn = gr.Button("🚀 Отправить")
335
- btn.click(fn=gradio_infer, inputs=[inp, model], outputs=out)
336
- with gr.Tab("📊 Dashboard"):
337
- dash = gr.Markdown()
338
- refresh = gr.Button("🔄 Обновить")
339
- refresh.click(fn=show_dashboard, outputs=dash)
340
- dash.value = show_dashboard()
341
-
342
- import uvicorn
343
- from gradio.routes import mount_gradio_app
344
-
345
- # Определяем, работает ли код внутри Hugging Face Spaces
346
- # HF Spaces detection
347
- IS_HF_SPACES = os.getenv("SPACE_ID") is not None
348
-
349
- import os
350
- import gradio as gr
351
- import uvicorn
352
- import logging
353
- from fastapi import FastAPI
354
- from gradio.routes import mount_gradio_app
355
-
356
- # =====================================================
357
- # 🔒 Safe import: alert_core (если нет — fallback)
358
- # =====================================================
359
- try:
360
- from alert_core import log_alert
361
- except ModuleNotFoundError:
362
- def log_alert(msg: str):
363
- print(f"[⚠️ ALERT] {msg} (alert_core not found — using fallback)")
364
-
365
- # =====================================================
366
- # 🧭 Настройка окружения и логирования
367
- # =====================================================
368
- IS_HF_SPACES = os.getenv("SPACE_ID") is not None
369
- RUN_ENV = "Hugging Face Spaces" if IS_HF_SPACES else "Localhost"
370
-
371
- logging.basicConfig(
372
- level=logging.INFO,
373
- format="%(asctime)s [%(levelname)s] %(message)s",
374
- handlers=[logging.StreamHandler()]
375
- )
376
-
377
- logging.info(f"🚀 Starting Eroha Agent environment: {RUN_ENV}")
378
- log_alert(f"System boot: {RUN_ENV}")
379
-
380
- # =====================================================
381
- # 🌐 Создаём FastAPI и интерфейс Gradio
382
- # =====================================================
383
- app = FastAPI()
384
 
385
  demo = gr.Blocks(title="Eroha AgentAPI v5.9.2")
386
  with demo:
387
  gr.Markdown("# 🤖 Eroha AgentAPI v5.9.2 — Enterprise Edition")
388
-
389
  with gr.Tab("💬 Chat"):
390
  inp = gr.Textbox(label="Введите запрос")
391
- model = gr.Dropdown(
392
- ["microsoft/phi-3-mini-4k-instruct",
393
- "google/gemma-2-2b-it",
394
- "meta-llama/Meta-Llama-3-8B-Instruct"],
395
- value="microsoft/phi-3-mini-4k-instruct",
396
- label="Модель"
397
- )
398
  out = gr.Textbox(label="Ответ")
399
- btn = gr.Button("🚀 Отправить")
400
- btn.click(fn=lambda x, m: f"Обработка запроса для {m}: {x}",
401
- inputs=[inp, model],
402
- outputs=out)
403
-
404
  with gr.Tab("📊 Dashboard"):
405
- dash = gr.Markdown("📈 Метрики ещё не загружены")
406
- refresh = gr.Button("🔄 Обновить")
407
- refresh.click(fn=lambda: "✅ Метрики обновлены", outputs=dash)
408
 
409
- # =====================================================
410
- # ⚙️ Запуск приложения
411
- # =====================================================
412
  if __name__ == "__main__":
413
- if IS_HF_SPACES:
414
- logging.info(" Running on Hugging Face Spaces (port 7860)")
415
- app = mount_gradio_app(app, demo, path="/")
416
- uvicorn.run(app, host="0.0.0.0", port=7860)
417
- else:
418
- import threading
419
- logging.info("✅ Running locally (FastAPI → 7860 | Gradio → 7861)")
420
-
421
- def run_gradio():
422
- demo.queue().launch(server_port=7861, share=False)
423
-
424
- threading.Thread(target=run_gradio, daemon=True).start()
425
- uvicorn.run(app, host="0.0.0.0", port=7860)
426
-
 
 
 
 
 
 
 
1
  import os, asyncio, aiohttp, time, psutil
2
  from datetime import datetime
3
  import gradio as gr
4
  from fastapi import FastAPI
5
+ from fastapi.responses import PlainTextResponse
6
+ from pydantic import BaseModel
7
+ import uvicorn
8
+ from gradio.routes import mount_gradio_app
 
 
9
 
10
+ # === Detect HF Spaces ===
11
+ IS_HF_SPACES = os.getenv("SPACE_ID") is not None
 
 
 
 
12
 
13
+ # === Заглушки для core-модулей (чтобы не было ModuleNotFoundError) ===
14
+ def log_alert(source: str, level: str, message: str):
15
+ print(f"[{level}] {source}: {message}")
 
 
 
16
 
17
+ def save_metrics(data):
18
+ print(f"📊 METRICS (dummy): {data}")
 
19
 
20
+ def failsafe(func): # decorator stub
21
+ return func
 
 
 
22
 
23
+ # === Константы ===
24
+ PRIMARY_MODEL = "microsoft/phi-3-mini-4k-instruct"
25
  HF_TOKEN = os.getenv("HF_TOKEN", "")
26
  ROUTER_URL = "https://api-inference.huggingface.co/models"
 
27
  CHECK_INTERVAL = 180
 
28
 
29
+ # === CircuitBreaker ===
30
  class CircuitBreaker:
31
  def __init__(self, threshold=3, timeout=60):
32
  self.failures, self.threshold, self.timeout = 0, threshold, timeout
33
  self.state, self.last_failure = "CLOSED", 0
34
+
35
  def allow(self):
36
  if self.state == "OPEN" and time.time() - self.last_failure < self.timeout:
37
  return False
38
  if self.state == "OPEN" and time.time() - self.last_failure >= self.timeout:
39
  self.state = "HALF_OPEN"
40
  return True
41
+
42
+ def record_success(self):
43
+ self.failures, self.state = 0, "CLOSED"
44
+
45
  def record_failure(self):
46
  self.failures += 1
47
  if self.failures >= self.threshold:
48
  self.state, self.last_failure = "OPEN", time.time()
49
+
50
  circuit = CircuitBreaker()
51
 
52
+ # === Hugging Face API клиент ===
53
  class HFClient:
54
  def __init__(self):
55
+ self.token = HF_TOKEN
56
+ self.session = None
57
+ self.latency = 0
58
  self.headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
59
+
60
  async def get_session(self):
61
  if not self.session or self.session.closed:
62
  self.session = aiohttp.ClientSession()
63
  return self.session
64
+
65
+ async def infer(self, model: str, text: str):
 
 
 
 
 
 
 
 
 
 
66
  if not circuit.allow():
67
+ return {"error": "Circuit breaker open"}
68
  try:
69
  s = await self.get_session()
70
+ payload = {"inputs": text[:1000], "parameters": {"max_new_tokens": 100}}
71
  start = time.time()
72
+ async with s.post(f"{ROUTER_URL}/{model}", headers=self.headers, json=payload) as r:
73
  self.latency = round((time.time() - start) * 1000, 2)
74
  if r.status == 200:
75
  circuit.record_success()
76
  return await r.json()
77
  else:
78
  circuit.record_failure()
79
+ return {"error": f"HTTP {r.status}"}
 
 
 
80
  except Exception as e:
81
  circuit.record_failure()
82
+ return {"error": str(e)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
+ client = HFClient()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
+ # === FastAPI App ===
87
+ app = FastAPI(title="Eroha AgentAPI v5.9.2 — Enterprise Edition")
88
 
89
  @app.on_event("startup")
90
+ async def startup_event():
91
+ print("🚀 Eroha AgentAPI started.")
92
  asyncio.create_task(watchdog())
 
93
 
94
  @app.get("/health")
95
  async def health():
96
+ return {"status": "ok", "circuit": circuit.state, "memory": psutil.virtual_memory().percent, "latency": client.latency}
 
 
 
 
 
 
97
 
98
  @app.get("/metrics", response_class=PlainTextResponse)
99
  async def metrics():
100
+ return f"circuit_state {circuit.state}\nmemory {psutil.virtual_memory().percent}\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
+ class InferenceRequest(BaseModel):
103
+ prompt: str
104
+ model: str = PRIMARY_MODEL
105
 
106
+ @app.post("/inference")
107
+ async def inference(req: InferenceRequest):
108
+ start = time.time()
109
+ result = await client.infer(req.model, req.prompt)
110
+ duration = int((time.time() - start) * 1000)
111
+ save_metrics({"latency_ms": duration})
112
+ return {"response": result, "duration_ms": duration}
113
+
114
+ # === Watchdog ===
115
+ async def watchdog():
116
+ while True:
117
+ await asyncio.sleep(CHECK_INTERVAL)
118
+ print(f"🩺 Watchdog OK at {datetime.now().strftime('%H:%M:%S')}")
119
 
120
+ # === Gradio UI ===
121
+ def gradio_infer(prompt: str, model_choice: str):
122
  try:
123
+ result = asyncio.run(client.infer(model_choice, prompt))
124
+ if isinstance(result, list) and "generated_text" in result[0]:
125
+ return result[0]["generated_text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  return str(result)
127
+ except Exception as e:
128
+ return f"❌ Error: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
  def show_dashboard():
131
  mem = psutil.virtual_memory().percent
132
+ return f"| Metric | Value |\n|--------|--------|\n| Circuit | {circuit.state} |\n| Memory | {mem}% |\n| Latency | {client.latency} ms |"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
 
134
  demo = gr.Blocks(title="Eroha AgentAPI v5.9.2")
135
  with demo:
136
  gr.Markdown("# 🤖 Eroha AgentAPI v5.9.2 — Enterprise Edition")
 
137
  with gr.Tab("💬 Chat"):
138
  inp = gr.Textbox(label="Введите запрос")
139
+ model = gr.Dropdown(["microsoft/phi-3-mini-4k-instruct", "google/gemma-2-2b-it"], value="microsoft/phi-3-mini-4k-instruct", label="Модель")
 
 
 
 
 
 
140
  out = gr.Textbox(label="Ответ")
141
+ gr.Button("🚀 Отправить").click(fn=gradio_infer, inputs=[inp, model], outputs=out)
 
 
 
 
142
  with gr.Tab("📊 Dashboard"):
143
+ dash = gr.Markdown(show_dashboard())
144
+ gr.Button("🔄 Обновить").click(fn=show_dashboard, outputs=dash)
 
145
 
146
+ # === Финальный единый запуск ===
 
 
147
  if __name__ == "__main__":
148
+ app = mount_gradio_app(app, demo, path="/")
149
+ uvicorn.run(app, host="0.0.0.0", port=7860)