Yermek68 commited on
Commit
b3f2fcb
·
verified ·
1 Parent(s): cc62d89

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +145 -135
app.py CHANGED
@@ -1,7 +1,7 @@
1
  """
2
- Eroha AgentAPI v5.8.3Stable Memory Build
3
- Production-grade архитектура для Hugging Face Spaces
4
- Auto-Recovery + CircuitBreaker + Smart Dashboard + Low Memory Optimization
5
  """
6
 
7
  import os
@@ -15,20 +15,19 @@ from fastapi.responses import JSONResponse, HTMLResponse, PlainTextResponse
15
  from transformers import pipeline
16
  import psutil
17
 
18
- # ==============================
19
  # CONFIGURATION
20
- # ==============================
21
  HF_TOKEN = os.getenv("HF_TOKEN", "")
22
  ROUTER_URL = "https://api-inference.huggingface.co/models"
23
  FALLBACK_MODEL = "sshleifer/tiny-gpt2"
24
- CHECK_INTERVAL = 300 # 5 минут
 
25
 
26
-
27
- # ==============================
28
- # CORE COMPONENTS
29
- # ==============================
30
  class CircuitBreaker:
31
- """Простая FSM-защита от каскадных ошибок"""
32
  def __init__(self, threshold=3, timeout=60):
33
  self.failures = 0
34
  self.threshold = threshold
@@ -54,162 +53,200 @@ class CircuitBreaker:
54
  self.state = "OPEN"
55
  self.last_failure = time.time()
56
 
57
-
58
  circuit = CircuitBreaker()
59
 
60
-
 
 
61
  class HFClient:
62
- """Клиент Hugging Face API с проверкой токена"""
63
  def __init__(self):
64
  self.token = HF_TOKEN
65
  self.headers = {"Authorization": f"Bearer {self.token}"} if self.token else {}
66
  self.valid = False
 
 
 
 
 
 
 
 
67
 
68
  async def validate(self):
69
- """Проверка токена HF"""
70
  try:
71
  async with aiohttp.ClientSession() as s:
 
72
  async with s.get("https://huggingface.co/api/whoami-v2", headers=self.headers) as r:
 
73
  self.valid = r.status == 200
74
- print(f"🔐 HF token valid: {self.valid}")
75
  return self.valid
76
- except Exception as e:
77
- print(f"⚠️ Token validation error: {e}")
78
  self.valid = False
79
  return False
80
 
81
  async def infer(self, model, text):
82
- """Отправка запроса на Router API"""
83
  if not circuit.allow():
84
  return {"error": "Circuit breaker open — fallback engaged"}
85
 
86
  try:
87
- async with aiohttp.ClientSession() as s:
88
- payload = {"inputs": text, "parameters": {"max_new_tokens": 250}}
89
- async with s.post(f"{ROUTER_URL}/{model}", headers=self.headers, json=payload, timeout=30) as r:
90
- if r.status == 200:
91
- circuit.record_success()
92
- data = await r.json()
93
- return data
94
- else:
95
- circuit.record_failure()
96
- if r.status == 401:
97
- self.valid = False
98
- return {"error": f"Router error {r.status}"}
 
 
 
 
 
99
  except Exception as e:
100
  circuit.record_failure()
101
- return {"error": str(e)}
102
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
  client = HFClient()
105
 
106
-
107
- # ==============================
108
- # Fallback (safe, memory-aware)
109
- # ==============================
110
  class Fallback:
111
- """
112
- Локальная резервная tiny GPT-2 модель, безопасная для HF Spaces.
113
- Загружается только при низкой загрузке памяти (<85%).
114
- """
115
  def __init__(self):
116
  self.pipe = None
117
- self.ready = False
118
 
119
  async def load(self):
120
- """Безопасная загрузка модели"""
121
- mem = psutil.virtual_memory().percent
122
- if mem > 85:
123
- print(f"⚠️ Недостаточно памяти для загрузки fallback модели ({mem:.1f}%)")
124
- return False
125
-
126
- try:
127
- from transformers import pipeline
128
- print("🧠 Загрузка fallback модели (sshleifer/tiny-gpt2)...")
129
- self.pipe = pipeline("text-generation", model=FALLBACK_MODEL, device=-1)
130
- self.ready = True
131
- print("✅ Fallback модель успешно загружена")
132
- return True
133
- except Exception as e:
134
- print(f"❌ Ошибка при загрузке fallback модели: {e}")
135
- self.ready = False
136
- return False
137
 
138
  async def generate(self, text):
139
- """Формирование ответа с проверкой памяти"""
140
- mem = psutil.virtual_memory().percent
141
- if not self.ready and mem < 85:
142
  await self.load()
143
- elif not self.ready:
144
- print(f"⚠️ Пропуск загрузки fallback — память: {mem:.1f}%")
145
- return "⚠️ Недостаточно памяти для генерации fallback-ответа."
146
-
147
  if not self.pipe:
148
- return "⚠️ Fallback модель недоступна. Попробуйте позже."
149
-
150
- try:
151
- result = self.pipe(text, max_new_tokens=100, temperature=0.7)[0]["generated_text"]
152
- return result
153
- except Exception as e:
154
- print(f"❌ Ошибка генерации fallback: {e}")
155
- return "🧠 Ошибка fallback. Попробуйте позже."
156
-
157
 
158
  fallback = Fallback()
159
 
160
-
161
- # ==============================
162
- # WATCHDOG (background health)
163
- # ==============================
164
  async def watchdog():
165
- """Периодическая проверка состояния"""
166
  while True:
167
- print(f"[{datetime.now().isoformat()}] 🔍 Watchdog check...")
168
- await client.validate()
169
- mem = psutil.virtual_memory().percent
170
- if mem > 85:
171
- print(f"⚠️ High memory usage: {mem}%")
172
  await asyncio.sleep(CHECK_INTERVAL)
 
 
 
 
 
 
 
 
 
 
173
 
174
-
175
- # ==============================
176
  # FASTAPI CORE
177
- # ==============================
178
- app = FastAPI(title="Eroha AgentAPI v5.8.3")
179
 
180
  @app.on_event("startup")
181
  async def startup():
 
182
  asyncio.create_task(watchdog())
183
  await client.validate()
184
 
185
-
186
  @app.get("/health")
187
  async def health():
188
- """Healthcheck endpoint"""
 
189
  return JSONResponse({
190
  "status": "ok" if client.valid else "degraded",
191
  "circuit": circuit.state,
192
- "memory": psutil.virtual_memory().percent,
193
- "timestamp": datetime.now().isoformat()
 
 
194
  })
195
 
 
 
 
 
 
 
 
 
 
 
196
 
197
  @app.post("/inference")
198
  async def inference(data: dict):
199
- """Основной API-инференс"""
200
  text = data.get("prompt", "")
201
  model = data.get("model", "microsoft/phi-3-mini-4k-instruct")
202
-
203
  res = await client.infer(model, text)
204
  if "error" in res:
205
  fb = await fallback.generate(text)
206
  return {"source": "fallback", "response": fb, "note": res["error"]}
207
  return {"source": "router", "response": res}
208
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
 
210
- # ==============================
211
- # GRADIO UI
212
- # ==============================
213
  def gradio_infer(prompt, model_choice):
214
  loop = asyncio.new_event_loop()
215
  asyncio.set_event_loop(loop)
@@ -222,24 +259,24 @@ def gradio_infer(prompt, model_choice):
222
  return result[0].get("generated_text", str(result))
223
  return str(result)
224
 
225
-
226
  def show_dashboard():
227
  mem = psutil.virtual_memory().percent
228
- status = "✅ OK" if client.valid else "⚠️ Token Invalid"
 
229
  return f"""
230
- ### 🧠 Eroha AgentAPI Dashboard
231
- | Metric | Status |
232
  |--------|--------|
233
- | Token Valid | {status} |
234
  | Circuit | {circuit.state} |
235
- | Memory Usage | {mem}% |
 
236
  | Time | {datetime.now().strftime("%H:%M:%S")} |
237
  """
238
 
239
-
240
- demo = gr.Blocks(title="Eroha AgentAPI v5.8.3 — Stable Memory Build")
241
  with demo:
242
- gr.Markdown("# 🤖 Eroha AgentAPI v5.8.3 — AutoRecovery + Smart Dashboard")
243
  with gr.Tab("💬 Chat"):
244
  inp = gr.Textbox(label="Введите запрос")
245
  model = gr.Dropdown(
@@ -251,39 +288,12 @@ with demo:
251
  btn.click(fn=gradio_infer, inputs=[inp, model], outputs=out)
252
  with gr.Tab("📊 Dashboard"):
253
  dash = gr.Markdown()
254
- refresh = gr.Button("🔄 Обновить состояние")
255
  refresh.click(fn=show_dashboard, outputs=dash)
256
  dash.value = show_dashboard()
257
 
258
  app = gr.mount_gradio_app(app, demo, path="/ui")
259
 
260
-
261
- # ==============================
262
- # STABLE ROOT ROUTES
263
- # ==============================
264
- @app.get("/", response_class=HTMLResponse)
265
- async def root_page():
266
- """Главная страница"""
267
- return """
268
- <html>
269
- <head><title>Eroha AgentAPI v5.8.3</title></head>
270
- <body style='font-family:Arial;text-align:center;padding:2em;'>
271
- <h2>🤖 Eroha AgentAPI v5.8.3 — Stable Memory Build</h2>
272
- <p>Status: <b style='color:green;'>Running ✅</b></p>
273
- <p><a href='./ui' target='_blank' style='color:#4a68ff;font-size:18px;'>Открыть интерфейс →</a></p>
274
- <p><a href='/health'>Health check</a></p>
275
- </body>
276
- </html>
277
- """
278
-
279
-
280
- @app.get("/favicon.ico")
281
- async def favicon():
282
- """Пустой favicon для предотвращения 404"""
283
- return PlainTextResponse("", status_code=204)
284
-
285
-
286
  if __name__ == "__main__":
287
  import uvicorn
288
- print("🚀 Starting Eroha AgentAPI v5.8.3 — Stable Memory Build")
289
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
  """
2
+ 🤖 Eroha AgentAPI v5.9Enterprise Edition
3
+ Enterprise-grade architecture for Hugging Face Spaces
4
+ Auto-Token Recovery | Smart Fallback 2.0 | Self-Heal | Metrics | Stable Dashboard
5
  """
6
 
7
  import os
 
15
  from transformers import pipeline
16
  import psutil
17
 
18
+ # ==========================
19
  # CONFIGURATION
20
+ # ==========================
21
  HF_TOKEN = os.getenv("HF_TOKEN", "")
22
  ROUTER_URL = "https://api-inference.huggingface.co/models"
23
  FALLBACK_MODEL = "sshleifer/tiny-gpt2"
24
+ CHECK_INTERVAL = 180 # Router health check every 3 minutes
25
+ MAX_MEMORY_THRESHOLD = 85 # Fallback loads only if memory < 85%
26
 
27
+ # ==========================
28
+ # CORE CLASSES
29
+ # ==========================
 
30
  class CircuitBreaker:
 
31
  def __init__(self, threshold=3, timeout=60):
32
  self.failures = 0
33
  self.threshold = threshold
 
53
  self.state = "OPEN"
54
  self.last_failure = time.time()
55
 
 
56
  circuit = CircuitBreaker()
57
 
58
+ # ==========================
59
+ # HF CLIENT
60
+ # ==========================
61
  class HFClient:
 
62
  def __init__(self):
63
  self.token = HF_TOKEN
64
  self.headers = {"Authorization": f"Bearer {self.token}"} if self.token else {}
65
  self.valid = False
66
+ self.session = None
67
+ self.retries = 0
68
+ self.latency = 0
69
+
70
+ async def get_session(self):
71
+ if not self.session or self.session.closed:
72
+ self.session = aiohttp.ClientSession()
73
+ return self.session
74
 
75
  async def validate(self):
76
+ """Validate HF token"""
77
  try:
78
  async with aiohttp.ClientSession() as s:
79
+ start = time.time()
80
  async with s.get("https://huggingface.co/api/whoami-v2", headers=self.headers) as r:
81
+ self.latency = round((time.time() - start) * 1000, 2)
82
  self.valid = r.status == 200
 
83
  return self.valid
84
+ except:
 
85
  self.valid = False
86
  return False
87
 
88
  async def infer(self, model, text):
89
+ """Inference through Hugging Face Router"""
90
  if not circuit.allow():
91
  return {"error": "Circuit breaker open — fallback engaged"}
92
 
93
  try:
94
+ session = await self.get_session()
95
+ payload = {"inputs": text, "parameters": {"max_new_tokens": 250}}
96
+ start = time.time()
97
+ async with session.post(
98
+ f"{ROUTER_URL}/{model}", headers=self.headers, json=payload, timeout=30
99
+ ) as r:
100
+ self.latency = round((time.time() - start) * 1000, 2)
101
+ if r.status == 200:
102
+ circuit.record_success()
103
+ self.retries = 0
104
+ return await r.json()
105
+ else:
106
+ circuit.record_failure()
107
+ if r.status in (401, 410):
108
+ self.valid = False
109
+ await self.recover_token()
110
+ return {"error": f"Router error {r.status}"}
111
  except Exception as e:
112
  circuit.record_failure()
113
+ return {"error": f"Router exception: {e}"}
114
+
115
+ async def recover_token(self):
116
+ """Try to reload token from backup or /tmp file"""
117
+ print("⚠️ Token invalid — trying recovery...")
118
+ token_paths = [
119
+ "/tmp/hf_token.txt",
120
+ os.getenv("HF_TOKEN_BACKUP", "")
121
+ ]
122
+ for path in token_paths:
123
+ if path and os.path.exists(path):
124
+ try:
125
+ with open(path, "r") as f:
126
+ token = f.read().strip()
127
+ if token:
128
+ self.headers = {"Authorization": f"Bearer {token}"}
129
+ print("✅ Token recovered successfully.")
130
+ self.valid = await self.validate()
131
+ if self.valid:
132
+ return True
133
+ except:
134
+ continue
135
+ print("❌ Token recovery failed.")
136
+ return False
137
 
138
  client = HFClient()
139
 
140
+ # ==========================
141
+ # SMART FALLBACK
142
+ # ==========================
 
143
  class Fallback:
 
 
 
 
144
  def __init__(self):
145
  self.pipe = None
146
+ self.loaded = False
147
 
148
  async def load(self):
149
+ """Load fallback model if memory is OK"""
150
+ if not self.loaded and psutil.virtual_memory().percent < MAX_MEMORY_THRESHOLD:
151
+ print("🧠 Loading fallback model...")
152
+ self.pipe = pipeline("text-generation", model=FALLBACK_MODEL)
153
+ self.loaded = True
154
+ print("✅ Fallback model ready.")
 
 
 
 
 
 
 
 
 
 
 
155
 
156
  async def generate(self, text):
157
+ """Generate fallback response"""
158
+ if not self.loaded:
 
159
  await self.load()
 
 
 
 
160
  if not self.pipe:
161
+ return "⚠️ Fallback model unavailable."
162
+ result = self.pipe(text, max_new_tokens=100)[0]["generated_text"]
163
+ return result
 
 
 
 
 
 
164
 
165
  fallback = Fallback()
166
 
167
+ # ==========================
168
+ # WATCHDOG & SELF-HEAL
169
+ # ==========================
 
170
  async def watchdog():
171
+ uptime_start = time.time()
172
  while True:
 
 
 
 
 
173
  await asyncio.sleep(CHECK_INTERVAL)
174
+ print(f"[{datetime.now().isoformat()}] 🩺 Watchdog check...")
175
+ valid = await client.validate()
176
+ if not valid:
177
+ await client.recover_token()
178
+ mem = psutil.virtual_memory().percent
179
+ if mem > 90:
180
+ print(f"⚠️ High memory usage: {mem}% — consider restart.")
181
+ if not circuit.allow():
182
+ print("⚠️ Circuit breaker is open — self-healing...")
183
+ circuit.state = "CLOSED"
184
 
185
+ # ==========================
 
186
  # FASTAPI CORE
187
+ # ==========================
188
+ app = FastAPI(title="Eroha AgentAPI v5.9 — Enterprise Edition")
189
 
190
  @app.on_event("startup")
191
  async def startup():
192
+ print("🚀 Starting Eroha AgentAPI v5.9 — Enterprise Edition")
193
  asyncio.create_task(watchdog())
194
  await client.validate()
195
 
 
196
  @app.get("/health")
197
  async def health():
198
+ uptime = round(time.time() - psutil.boot_time(), 1)
199
+ mem = psutil.virtual_memory().percent
200
  return JSONResponse({
201
  "status": "ok" if client.valid else "degraded",
202
  "circuit": circuit.state,
203
+ "memory": mem,
204
+ "latency_ms": client.latency,
205
+ "uptime_s": uptime,
206
+ "token_valid": client.valid
207
  })
208
 
209
+ @app.get("/metrics", response_class=PlainTextResponse)
210
+ async def metrics():
211
+ """Prometheus-style metrics"""
212
+ mem = psutil.virtual_memory().percent
213
+ return (
214
+ f"hf_token_valid {1 if client.valid else 0}\n"
215
+ f"router_latency_ms {client.latency}\n"
216
+ f"memory_usage_percent {mem}\n"
217
+ f"circuit_state {'0' if circuit.state == 'CLOSED' else 1}\n"
218
+ )
219
 
220
  @app.post("/inference")
221
  async def inference(data: dict):
 
222
  text = data.get("prompt", "")
223
  model = data.get("model", "microsoft/phi-3-mini-4k-instruct")
 
224
  res = await client.infer(model, text)
225
  if "error" in res:
226
  fb = await fallback.generate(text)
227
  return {"source": "fallback", "response": fb, "note": res["error"]}
228
  return {"source": "router", "response": res}
229
 
230
+ @app.get("/", response_class=HTMLResponse)
231
+ async def root_page():
232
+ """Root status page"""
233
+ return """
234
+ <html><head><title>Eroha AgentAPI v5.9</title></head>
235
+ <body style='font-family:Arial;text-align:center;padding:2em;'>
236
+ <h2>🤖 Eroha AgentAPI v5.9 — Enterprise Edition</h2>
237
+ <p>Status: <b style='color:green;'>Running ✅</b></p>
238
+ <p><a href='/ui' style='font-size:18px;color:#4a68ff;'>Открыть интерфейс →</a></p>
239
+ <p><a href='/health'>Health</a> • <a href='/metrics'>Metrics</a></p>
240
+ </body></html>
241
+ """
242
+
243
+ @app.get("/favicon.ico")
244
+ async def favicon():
245
+ return PlainTextResponse("", status_code=204)
246
 
247
+ # ==========================
248
+ # GRADIO DASHBOARD
249
+ # ==========================
250
  def gradio_infer(prompt, model_choice):
251
  loop = asyncio.new_event_loop()
252
  asyncio.set_event_loop(loop)
 
259
  return result[0].get("generated_text", str(result))
260
  return str(result)
261
 
 
262
  def show_dashboard():
263
  mem = psutil.virtual_memory().percent
264
+ status = "✅ Valid" if client.valid else " Invalid"
265
+ color = "green" if client.valid else "red"
266
  return f"""
267
+ ### 🧠 Eroha Enterprise Dashboard
268
+ | Metric | Value |
269
  |--------|--------|
270
+ | Token | <span style='color:{color}'>{status}</span> |
271
  | Circuit | {circuit.state} |
272
+ | Memory | {mem}% |
273
+ | Latency | {client.latency} ms |
274
  | Time | {datetime.now().strftime("%H:%M:%S")} |
275
  """
276
 
277
+ demo = gr.Blocks(title="Eroha AgentAPI v5.9 — Enterprise Edition")
 
278
  with demo:
279
+ gr.Markdown("# 🤖 Eroha AgentAPI v5.9 — AutoRecovery + Smart Dashboard")
280
  with gr.Tab("💬 Chat"):
281
  inp = gr.Textbox(label="Введите запрос")
282
  model = gr.Dropdown(
 
288
  btn.click(fn=gradio_infer, inputs=[inp, model], outputs=out)
289
  with gr.Tab("📊 Dashboard"):
290
  dash = gr.Markdown()
291
+ refresh = gr.Button("🔄 Обновить")
292
  refresh.click(fn=show_dashboard, outputs=dash)
293
  dash.value = show_dashboard()
294
 
295
  app = gr.mount_gradio_app(app, demo, path="/ui")
296
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
297
  if __name__ == "__main__":
298
  import uvicorn
 
299
  uvicorn.run(app, host="0.0.0.0", port=7860)