Yermek68 commited on
Commit
f195b0f
·
verified ·
1 Parent(s): 45bd322

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -30
app.py CHANGED
@@ -1,6 +1,7 @@
1
  """
2
- Eroha AgentAPI v5.8 — AutoRecovery + Smart Dashboard Edition
3
  Production-grade архитектура для Hugging Face Spaces
 
4
  """
5
 
6
  import os
@@ -10,7 +11,7 @@ import time
10
  from datetime import datetime
11
  import gradio as gr
12
  from fastapi import FastAPI
13
- from fastapi.responses import JSONResponse
14
  from transformers import pipeline
15
  import psutil
16
 
@@ -20,12 +21,14 @@ import psutil
20
  HF_TOKEN = os.getenv("HF_TOKEN", "")
21
  ROUTER_URL = "https://api-inference.huggingface.co/models"
22
  FALLBACK_MODEL = "sshleifer/tiny-gpt2"
23
- CHECK_INTERVAL = 300 # 5 min
 
24
 
25
  # ==============================
26
- # CORE CLASSES
27
  # ==============================
28
  class CircuitBreaker:
 
29
  def __init__(self, threshold=3, timeout=60):
30
  self.failures = 0
31
  self.threshold = threshold
@@ -51,25 +54,32 @@ class CircuitBreaker:
51
  self.state = "OPEN"
52
  self.last_failure = time.time()
53
 
 
54
  circuit = CircuitBreaker()
55
 
 
56
  class HFClient:
 
57
  def __init__(self):
58
  self.token = HF_TOKEN
59
  self.headers = {"Authorization": f"Bearer {self.token}"} if self.token else {}
60
  self.valid = False
61
 
62
  async def validate(self):
 
63
  try:
64
  async with aiohttp.ClientSession() as s:
65
  async with s.get("https://huggingface.co/api/whoami-v2", headers=self.headers) as r:
66
  self.valid = r.status == 200
 
67
  return self.valid
68
- except:
 
69
  self.valid = False
70
  return False
71
 
72
  async def infer(self, model, text):
 
73
  if not circuit.allow():
74
  return {"error": "Circuit breaker open — fallback engaged"}
75
 
@@ -90,36 +100,69 @@ class HFClient:
90
  circuit.record_failure()
91
  return {"error": str(e)}
92
 
 
93
  client = HFClient()
94
 
 
 
 
 
95
  class Fallback:
 
 
 
 
96
  def __init__(self):
97
  self.pipe = None
98
  self.ready = False
99
 
100
  async def load(self):
101
- if not self.ready:
102
- self.pipe = pipeline("text-generation", model=FALLBACK_MODEL)
 
 
 
 
 
 
 
 
103
  self.ready = True
 
 
 
 
 
 
104
 
105
  async def generate(self, text):
106
- # 💡 Здесь мы вставляем проверку памяти перед загрузкой модели
107
- import psutil
108
- if not self.ready and psutil.virtual_memory().percent < 85:
109
  await self.load()
110
  elif not self.ready:
111
- print("⚠️ Недостаточно памяти для загрузки fallback модели.")
112
- return "⚠️ Fallback model не загружена из-за нехватки памяти."
 
 
 
 
 
 
 
 
 
 
113
 
114
- out = self.pipe(text, max_new_tokens=100)[0]["generated_text"]
115
- return out
116
 
117
  fallback = Fallback()
118
 
 
119
  # ==============================
120
- # WATCHDOG
121
  # ==============================
122
  async def watchdog():
 
123
  while True:
124
  print(f"[{datetime.now().isoformat()}] 🔍 Watchdog check...")
125
  await client.validate()
@@ -128,18 +171,21 @@ async def watchdog():
128
  print(f"⚠️ High memory usage: {mem}%")
129
  await asyncio.sleep(CHECK_INTERVAL)
130
 
 
131
  # ==============================
132
  # FASTAPI CORE
133
  # ==============================
134
- app = FastAPI(title="Eroha AgentAPI v5.8")
135
 
136
  @app.on_event("startup")
137
  async def startup():
138
  asyncio.create_task(watchdog())
139
  await client.validate()
140
 
 
141
  @app.get("/health")
142
  async def health():
 
143
  return JSONResponse({
144
  "status": "ok" if client.valid else "degraded",
145
  "circuit": circuit.state,
@@ -147,8 +193,10 @@ async def health():
147
  "timestamp": datetime.now().isoformat()
148
  })
149
 
 
150
  @app.post("/inference")
151
  async def inference(data: dict):
 
152
  text = data.get("prompt", "")
153
  model = data.get("model", "microsoft/phi-3-mini-4k-instruct")
154
 
@@ -158,8 +206,9 @@ async def inference(data: dict):
158
  return {"source": "fallback", "response": fb, "note": res["error"]}
159
  return {"source": "router", "response": res}
160
 
 
161
  # ==============================
162
- # GRADIO INTERFACE
163
  # ==============================
164
  def gradio_infer(prompt, model_choice):
165
  loop = asyncio.new_event_loop()
@@ -173,6 +222,7 @@ def gradio_infer(prompt, model_choice):
173
  return result[0].get("generated_text", str(result))
174
  return str(result)
175
 
 
176
  def show_dashboard():
177
  mem = psutil.virtual_memory().percent
178
  status = "✅ OK" if client.valid else "⚠️ Token Invalid"
@@ -186,9 +236,10 @@ def show_dashboard():
186
  | Time | {datetime.now().strftime("%H:%M:%S")} |
187
  """
188
 
189
- demo = gr.Blocks(title="Eroha AgentAPI v5.8 — AutoRecovery Edition")
 
190
  with demo:
191
- gr.Markdown("# 🤖 Eroha AgentAPI v5.8 — AutoRecovery + Smart Dashboard")
192
  with gr.Tab("💬 Chat"):
193
  inp = gr.Textbox(label="Введите запрос")
194
  model = gr.Dropdown(
@@ -206,29 +257,33 @@ with demo:
206
 
207
  app = gr.mount_gradio_app(app, demo, path="/ui")
208
 
209
- if __name__ == "__main__":
210
- import uvicorn
211
- uvicorn.run(app, host="0.0.0.0", port=7860)
212
-
213
- # ========== Дополнительные системные маршруты для стабильности ==========
214
- from fastapi.responses import HTMLResponse
215
 
 
 
 
216
  @app.get("/", response_class=HTMLResponse)
217
  async def root_page():
218
- """Главная страница — отображает статус и ссылку на UI"""
219
  return """
220
  <html>
221
- <head><title>Eroha AgentAPI v5.8</title></head>
222
  <body style='font-family:Arial;text-align:center;padding:2em;'>
223
- <h2>🤖 Eroha AgentAPI v5.8 — AutoRecovery Edition</h2>
224
  <p>Status: <b style='color:green;'>Running ✅</b></p>
225
  <p><a href='/ui' style='color:#4a68ff;font-size:18px;'>Открыть интерфейс →</a></p>
226
- <p><a href='/health'>Health check</a> • <a href='/metrics'>Metrics</a></p>
227
  </body>
228
  </html>
229
  """
230
 
 
231
  @app.get("/favicon.ico")
232
  async def favicon():
233
- """Возврат пустого favicon для устранения 404"""
234
  return PlainTextResponse("", status_code=204)
 
 
 
 
 
 
 
1
  """
2
+ Eroha AgentAPI v5.8.3Stable Memory Build
3
  Production-grade архитектура для Hugging Face Spaces
4
+ Auto-Recovery + CircuitBreaker + Smart Dashboard + Low Memory Optimization
5
  """
6
 
7
  import os
 
11
  from datetime import datetime
12
  import gradio as gr
13
  from fastapi import FastAPI
14
+ from fastapi.responses import JSONResponse, HTMLResponse, PlainTextResponse
15
  from transformers import pipeline
16
  import psutil
17
 
 
21
  HF_TOKEN = os.getenv("HF_TOKEN", "")
22
  ROUTER_URL = "https://api-inference.huggingface.co/models"
23
  FALLBACK_MODEL = "sshleifer/tiny-gpt2"
24
+ CHECK_INTERVAL = 300 # 5 минут
25
+
26
 
27
  # ==============================
28
+ # CORE COMPONENTS
29
  # ==============================
30
  class CircuitBreaker:
31
+ """Простая FSM-защита от каскадных ошибок"""
32
  def __init__(self, threshold=3, timeout=60):
33
  self.failures = 0
34
  self.threshold = threshold
 
54
  self.state = "OPEN"
55
  self.last_failure = time.time()
56
 
57
+
58
  circuit = CircuitBreaker()
59
 
60
+
61
  class HFClient:
62
+ """Клиент Hugging Face API с проверкой токена"""
63
  def __init__(self):
64
  self.token = HF_TOKEN
65
  self.headers = {"Authorization": f"Bearer {self.token}"} if self.token else {}
66
  self.valid = False
67
 
68
  async def validate(self):
69
+ """Проверка токена HF"""
70
  try:
71
  async with aiohttp.ClientSession() as s:
72
  async with s.get("https://huggingface.co/api/whoami-v2", headers=self.headers) as r:
73
  self.valid = r.status == 200
74
+ print(f"🔐 HF token valid: {self.valid}")
75
  return self.valid
76
+ except Exception as e:
77
+ print(f"⚠️ Token validation error: {e}")
78
  self.valid = False
79
  return False
80
 
81
  async def infer(self, model, text):
82
+ """Отправка запроса на Router API"""
83
  if not circuit.allow():
84
  return {"error": "Circuit breaker open — fallback engaged"}
85
 
 
100
  circuit.record_failure()
101
  return {"error": str(e)}
102
 
103
+
104
  client = HFClient()
105
 
106
+
107
+ # ==============================
108
+ # Fallback (safe, memory-aware)
109
+ # ==============================
110
  class Fallback:
111
+ """
112
+ Локальная резервная tiny GPT-2 модель, безопасная для HF Spaces.
113
+ Загружается только при низкой загрузке памяти (<85%).
114
+ """
115
  def __init__(self):
116
  self.pipe = None
117
  self.ready = False
118
 
119
  async def load(self):
120
+ """Безопасная загрузка модели"""
121
+ mem = psutil.virtual_memory().percent
122
+ if mem > 85:
123
+ print(f"⚠️ Недостаточно памяти для загрузки fallback модели ({mem:.1f}%)")
124
+ return False
125
+
126
+ try:
127
+ from transformers import pipeline
128
+ print("🧠 Загрузка fallback модели (sshleifer/tiny-gpt2)...")
129
+ self.pipe = pipeline("text-generation", model=FALLBACK_MODEL, device=-1)
130
  self.ready = True
131
+ print("✅ Fallback модель успешно загружена")
132
+ return True
133
+ except Exception as e:
134
+ print(f"❌ Ошибка при загрузке fallback модели: {e}")
135
+ self.ready = False
136
+ return False
137
 
138
  async def generate(self, text):
139
+ """Формирование ответа с проверкой памяти"""
140
+ mem = psutil.virtual_memory().percent
141
+ if not self.ready and mem < 85:
142
  await self.load()
143
  elif not self.ready:
144
+ print(f"⚠️ Пропуск загрузки fallback — память: {mem:.1f}%")
145
+ return "⚠️ Недостаточно памяти для генерации fallback-ответа."
146
+
147
+ if not self.pipe:
148
+ return "⚠️ Fallback модель недоступна. Попробуйте позже."
149
+
150
+ try:
151
+ result = self.pipe(text, max_new_tokens=100, temperature=0.7)[0]["generated_text"]
152
+ return result
153
+ except Exception as e:
154
+ print(f"❌ Ошибка генерации fallback: {e}")
155
+ return "🧠 Ошибка fallback. Попробуйте позже."
156
 
 
 
157
 
158
  fallback = Fallback()
159
 
160
+
161
  # ==============================
162
+ # WATCHDOG (background health)
163
  # ==============================
164
  async def watchdog():
165
+ """Периодическая проверка состояния"""
166
  while True:
167
  print(f"[{datetime.now().isoformat()}] 🔍 Watchdog check...")
168
  await client.validate()
 
171
  print(f"⚠️ High memory usage: {mem}%")
172
  await asyncio.sleep(CHECK_INTERVAL)
173
 
174
+
175
  # ==============================
176
  # FASTAPI CORE
177
  # ==============================
178
+ app = FastAPI(title="Eroha AgentAPI v5.8.3")
179
 
180
  @app.on_event("startup")
181
  async def startup():
182
  asyncio.create_task(watchdog())
183
  await client.validate()
184
 
185
+
186
  @app.get("/health")
187
  async def health():
188
+ """Healthcheck endpoint"""
189
  return JSONResponse({
190
  "status": "ok" if client.valid else "degraded",
191
  "circuit": circuit.state,
 
193
  "timestamp": datetime.now().isoformat()
194
  })
195
 
196
+
197
  @app.post("/inference")
198
  async def inference(data: dict):
199
+ """Основной API-инференс"""
200
  text = data.get("prompt", "")
201
  model = data.get("model", "microsoft/phi-3-mini-4k-instruct")
202
 
 
206
  return {"source": "fallback", "response": fb, "note": res["error"]}
207
  return {"source": "router", "response": res}
208
 
209
+
210
  # ==============================
211
+ # GRADIO UI
212
  # ==============================
213
  def gradio_infer(prompt, model_choice):
214
  loop = asyncio.new_event_loop()
 
222
  return result[0].get("generated_text", str(result))
223
  return str(result)
224
 
225
+
226
  def show_dashboard():
227
  mem = psutil.virtual_memory().percent
228
  status = "✅ OK" if client.valid else "⚠️ Token Invalid"
 
236
  | Time | {datetime.now().strftime("%H:%M:%S")} |
237
  """
238
 
239
+
240
+ demo = gr.Blocks(title="Eroha AgentAPI v5.8.3 — Stable Memory Build")
241
  with demo:
242
+ gr.Markdown("# 🤖 Eroha AgentAPI v5.8.3 — AutoRecovery + Smart Dashboard")
243
  with gr.Tab("💬 Chat"):
244
  inp = gr.Textbox(label="Введите запрос")
245
  model = gr.Dropdown(
 
257
 
258
  app = gr.mount_gradio_app(app, demo, path="/ui")
259
 
 
 
 
 
 
 
260
 
261
+ # ==============================
262
+ # STABLE ROOT ROUTES
263
+ # ==============================
264
  @app.get("/", response_class=HTMLResponse)
265
  async def root_page():
266
+ """Главная страница"""
267
  return """
268
  <html>
269
+ <head><title>Eroha AgentAPI v5.8.3</title></head>
270
  <body style='font-family:Arial;text-align:center;padding:2em;'>
271
+ <h2>🤖 Eroha AgentAPI v5.8.3Stable Memory Build</h2>
272
  <p>Status: <b style='color:green;'>Running ✅</b></p>
273
  <p><a href='/ui' style='color:#4a68ff;font-size:18px;'>Открыть интерфейс →</a></p>
274
+ <p><a href='/health'>Health check</a></p>
275
  </body>
276
  </html>
277
  """
278
 
279
+
280
  @app.get("/favicon.ico")
281
  async def favicon():
282
+ """Пустой favicon для предотвращения 404"""
283
  return PlainTextResponse("", status_code=204)
284
+
285
+
286
+ if __name__ == "__main__":
287
+ import uvicorn
288
+ print("🚀 Starting Eroha AgentAPI v5.8.3 — Stable Memory Build")
289
+ uvicorn.run(app, host="0.0.0.0", port=7860)