Yermek68 commited on
Commit
c4a353a
·
verified ·
1 Parent(s): 20c1f96

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +141 -15
app.py CHANGED
@@ -11,14 +11,23 @@ from fastapi import FastAPI
11
  from fastapi.responses import JSONResponse, PlainTextResponse
12
  from transformers import pipeline
13
 
14
- # === Импорт логировщика ===
15
  from core.alert_core import log_alert
16
  from core.metrics_core import save_metrics
 
 
 
17
  import time
18
 
19
- # === Настройка имен моделей ===
20
  PRIMARY_MODEL = "microsoft/phi-3-mini-instruct"
21
- FALLBACK_MODEL = "tiny-gpt2"
 
 
 
 
 
 
22
 
23
  HF_TOKEN = os.getenv("HF_TOKEN", "")
24
  ROUTER_URL = "https://api-inference.huggingface.co/models"
@@ -153,22 +162,139 @@ async def metrics():
153
 
154
  @app.post("/inference")
155
  async def inference(data: dict):
156
- text, model = data.get("prompt", ""), data.get("model", "microsoft/phi-3-mini-4k-instruct")
157
- res = await client.infer(model, text)
158
- if "error" in res:
159
- fb = await fallback.generate(text)
160
- return {"source": "fallback", "response": fb, "note": res["error"]}
161
- return {"source": "router", "response": res}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
 
163
  # ================= GRADIO UI =================
164
  def gradio_infer(prompt, model_choice):
165
- result = asyncio.run(client.infer(model_choice, prompt))
166
- if "error" in result:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  fb = asyncio.run(fallback.generate(prompt))
168
- return f"⚠️ Router failed ({result['error']})\n\n🧠 Fallback:\n{fb}"
169
- if isinstance(result, list):
170
- return result[0].get("generated_text", str(result))
171
- return str(result)
172
 
173
  def show_dashboard():
174
  mem = psutil.virtual_memory().percent
 
11
  from fastapi.responses import JSONResponse, PlainTextResponse
12
  from transformers import pipeline
13
 
14
+ # === Импорт логирования + метрик + FailSafe ===
15
  from core.alert_core import log_alert
16
  from core.metrics_core import save_metrics
17
+ from core.alerters import ConsoleAlerter, FileAlerter
18
+ from core.alert_manager import AlertManager
19
+ from core.failsafe_core import failsafe
20
  import time
21
 
22
+ # === Настройка моделей для логики ===
23
  PRIMARY_MODEL = "microsoft/phi-3-mini-instruct"
24
+ FALLBACK_MODEL = "sshleifer/tiny-gpt2"
25
+
26
+ # Настройка AlertManager
27
+ alert_manager = AlertManager([
28
+ ConsoleAlerter(),
29
+ FileAlerter("alerts_log.json")
30
+ ])
31
 
32
  HF_TOKEN = os.getenv("HF_TOKEN", "")
33
  ROUTER_URL = "https://api-inference.huggingface.co/models"
 
162
 
163
  @app.post("/inference")
164
  async def inference(data: dict):
165
+
166
+ prompt = data.get("prompt", "")
167
+ model = data.get("model", PRIMARY_MODEL)
168
+
169
+ start_time = time.time()
170
+
171
+ # FailSafe wrapper for primary inference
172
+ @failsafe(alert_manager)
173
+ async def run_primary(p, m):
174
+ return await client.infer(m, p)
175
+
176
+ try:
177
+ res = await run_primary(prompt, model)
178
+
179
+ duration = int((time.time() - start_time) * 1000)
180
+
181
+ # Метрики
182
+ save_metrics({
183
+ "endpoint": "/inference",
184
+ "model": model,
185
+ "latency_ms": duration
186
+ })
187
+
188
+ # Лог — успешный ответ
189
+ log_alert(
190
+ source="agent",
191
+ level="INFO",
192
+ message=f"Inference OK (model={model})",
193
+ extra={"prompt_len": len(prompt), "latency": duration}
194
+ )
195
+
196
+ # Если ошибка в ответе
197
+ if isinstance(res, dict) and "error" in res:
198
+ raise Exception(res["error"])
199
+
200
+ return {"source": "router", "response": res}
201
+
202
+ except Exception as primary_err:
203
+
204
+ log_alert(
205
+ source="agent",
206
+ level="ERROR",
207
+ message=f"Primary inference failed: {primary_err}",
208
+ extra={"error": str(primary_err)}
209
+ )
210
+
211
+ # Fallback через FailSafe
212
+ @failsafe(alert_manager)
213
+ async def run_fallback(p):
214
+ return await fallback.generate(p)
215
+
216
+ try:
217
+ fb = await run_fallback(prompt)
218
+ duration = int((time.time() - start_time) * 1000)
219
+
220
+ # Fallback метрики
221
+ save_metrics({
222
+ "endpoint": "/inference",
223
+ "model": FALLBACK_MODEL,
224
+ "latency_ms": duration,
225
+ "fallback_used": True
226
+ })
227
+
228
+ log_alert(
229
+ source="fallback",
230
+ level="WARNING",
231
+ message=f"Fallback inference OK (model={FALLBACK_MODEL})",
232
+ extra={"latency": duration}
233
+ )
234
+
235
+ return {"source": "fallback", "response": fb}
236
+
237
+ except Exception as fb_err:
238
+ log_alert(
239
+ source="fallback",
240
+ level="ERROR",
241
+ message=f"Fallback failed: {fb_err}",
242
+ extra={"error": str(fb_err)}
243
+ )
244
+ return {"error": "Inference failure on both primary and fallback"}
245
+
246
 
247
  # ================= GRADIO UI =================
248
  def gradio_infer(prompt, model_choice):
249
+
250
+ start_time = time.time()
251
+ model = model_choice or PRIMARY_MODEL
252
+
253
+ @failsafe(alert_manager)
254
+ def run_model(p, m):
255
+ return asyncio.run(client.infer(m, p))
256
+
257
+ try:
258
+ result = run_model(prompt, model)
259
+
260
+ duration = int((time.time() - start_time) * 1000)
261
+
262
+ # Metрики Gradio
263
+ save_metrics({
264
+ "interface": "gradio",
265
+ "prompt_len": len(prompt),
266
+ "model": model,
267
+ "latency_ms": duration
268
+ })
269
+
270
+ log_alert(
271
+ source="gradio",
272
+ level="INFO",
273
+ message=f"Gradio inference success (model={model})",
274
+ extra={"latency": duration}
275
+ )
276
+
277
+ if isinstance(result, dict) and "error" in result:
278
+ raise Exception(result["error"])
279
+
280
+ if isinstance(result, list):
281
+ return result[0].get("generated_text", str(result))
282
+
283
+ return str(result)
284
+
285
+ except Exception as ui_err:
286
+
287
+ log_alert(
288
+ source="gradio",
289
+ level="ERROR",
290
+ message=f"Gradio inference error: {ui_err}",
291
+ extra={"error": str(ui_err)}
292
+ )
293
+
294
+ # fallback
295
  fb = asyncio.run(fallback.generate(prompt))
296
+ return f"⚠️ Error: {ui_err}\n\n🧠 Fallback: {fb}"
297
+
 
 
298
 
299
  def show_dashboard():
300
  mem = psutil.virtual_memory().percent