Spaces:

Yermek68
/

eroha-agentapi

Sleeping

App Files Files Community

Yermek68 commited on Dec 15, 2025

Commit

b3f2fcb

verified ·

1 Parent(s): cc62d89

Update app.py

Browse files

Files changed (1) hide show

app.py +145 -135

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """
-Eroha AgentAPI v5.8.3 — Stable Memory Build
-Production-grade архитектура для Hugging Face Spaces
-Auto-Recovery + CircuitBreaker + Smart Dashboard + Low Memory Optimization
 """
 import os
@@ -15,20 +15,19 @@ from fastapi.responses import JSONResponse, HTMLResponse, PlainTextResponse
 from transformers import pipeline
 import psutil
-# ==============================
 # CONFIGURATION
-# ==============================
 HF_TOKEN = os.getenv("HF_TOKEN", "")
 ROUTER_URL = "https://api-inference.huggingface.co/models"
 FALLBACK_MODEL = "sshleifer/tiny-gpt2"
-CHECK_INTERVAL = 300  # 5 минут
-# ==============================
-# CORE COMPONENTS
-# ==============================
 class CircuitBreaker:
-    """Простая FSM-защита от каскадных ошибок"""
     def __init__(self, threshold=3, timeout=60):
         self.failures = 0
         self.threshold = threshold
@@ -54,162 +53,200 @@ class CircuitBreaker:
             self.state = "OPEN"
             self.last_failure = time.time()
 circuit = CircuitBreaker()
 class HFClient:
-    """Клиент Hugging Face API с проверкой токена"""
     def __init__(self):
         self.token = HF_TOKEN
         self.headers = {"Authorization": f"Bearer {self.token}"} if self.token else {}
         self.valid = False
     async def validate(self):
-        """Проверка токена HF"""
         try:
             async with aiohttp.ClientSession() as s:
                 async with s.get("https://huggingface.co/api/whoami-v2", headers=self.headers) as r:
                     self.valid = r.status == 200
-                    print(f"🔐 HF token valid: {self.valid}")
                     return self.valid
-        except Exception as e:
-            print(f"⚠️ Token validation error: {e}")
             self.valid = False
             return False
     async def infer(self, model, text):
-        """Отправка запроса на Router API"""
         if not circuit.allow():
             return {"error": "Circuit breaker open — fallback engaged"}
         try:
-            async with aiohttp.ClientSession() as s:
-                payload = {"inputs": text, "parameters": {"max_new_tokens": 250}}
-                async with s.post(f"{ROUTER_URL}/{model}", headers=self.headers, json=payload, timeout=30) as r:
-                    if r.status == 200:
-                        circuit.record_success()
-                        data = await r.json()
-                        return data
-                    else:
-                        circuit.record_failure()
-                        if r.status == 401:
-                            self.valid = False
-                        return {"error": f"Router error {r.status}"}
         except Exception as e:
             circuit.record_failure()
-            return {"error": str(e)}
 client = HFClient()
-# ==============================
-# Fallback (safe, memory-aware)
-# ==============================
 class Fallback:
-    """
-    Локальная резервная tiny GPT-2 модель, безопасная для HF Spaces.
-    Загружается только при низкой загрузке памяти (<85%).
-    """
     def __init__(self):
         self.pipe = None
-        self.ready = False
     async def load(self):
-        """Безопасная загрузка модели"""
-        mem = psutil.virtual_memory().percent
-        if mem > 85:
-            print(f"⚠️ Недостаточно памяти для загрузки fallback модели ({mem:.1f}%)")
-            return False
-        try:
-            from transformers import pipeline
-            print("🧠 Загрузка fallback модели (sshleifer/tiny-gpt2)...")
-            self.pipe = pipeline("text-generation", model=FALLBACK_MODEL, device=-1)
-            self.ready = True
-            print("✅ Fallback модель успешно загружена")
-            return True
-        except Exception as e:
-            print(f"❌ Ошибка при загрузке fallback модели: {e}")
-            self.ready = False
-            return False
     async def generate(self, text):
-        """Формирование ответа с проверкой памяти"""
-        mem = psutil.virtual_memory().percent
-        if not self.ready and mem < 85:
             await self.load()
-        elif not self.ready:
-            print(f"⚠️ Пропуск загрузки fallback — память: {mem:.1f}%")
-            return "⚠️ Недостаточно памяти для генерации fallback-ответа."
         if not self.pipe:
-            return "⚠️ Fallback модель недоступна. Попробуйте позже."
-        try:
-            result = self.pipe(text, max_new_tokens=100, temperature=0.7)[0]["generated_text"]
-            return result
-        except Exception as e:
-            print(f"❌ Ошибка генерации fallback: {e}")
-            return "🧠 Ошибка fallback. Попробуйте позже."
 fallback = Fallback()
-# ==============================
-# WATCHDOG (background health)
-# ==============================
 async def watchdog():
-    """Периодическая проверка состояния"""
     while True:
-        print(f"[{datetime.now().isoformat()}] 🔍 Watchdog check...")
-        await client.validate()
-        mem = psutil.virtual_memory().percent
-        if mem > 85:
-            print(f"⚠️ High memory usage: {mem}%")
         await asyncio.sleep(CHECK_INTERVAL)
-# ==============================
 # FASTAPI CORE
-# ==============================
-app = FastAPI(title="Eroha AgentAPI v5.8.3")
 @app.on_event("startup")
 async def startup():
     asyncio.create_task(watchdog())
     await client.validate()
 @app.get("/health")
 async def health():
-    """Healthcheck endpoint"""
     return JSONResponse({
         "status": "ok" if client.valid else "degraded",
         "circuit": circuit.state,
-        "memory": psutil.virtual_memory().percent,
-        "timestamp": datetime.now().isoformat()
     })
 @app.post("/inference")
 async def inference(data: dict):
-    """Основной API-инференс"""
     text = data.get("prompt", "")
     model = data.get("model", "microsoft/phi-3-mini-4k-instruct")
     res = await client.infer(model, text)
     if "error" in res:
         fb = await fallback.generate(text)
         return {"source": "fallback", "response": fb, "note": res["error"]}
     return {"source": "router", "response": res}
-# ==============================
-# GRADIO UI
-# ==============================
 def gradio_infer(prompt, model_choice):
     loop = asyncio.new_event_loop()
     asyncio.set_event_loop(loop)
@@ -222,24 +259,24 @@ def gradio_infer(prompt, model_choice):
         return result[0].get("generated_text", str(result))
     return str(result)
 def show_dashboard():
     mem = psutil.virtual_memory().percent
-    status = "✅ OK" if client.valid else "⚠️ Token Invalid"
     return f"""
-### 🧠 Eroha AgentAPI Dashboard
-| Metric | Status |
 |--------|--------|
-| Token Valid | {status} |
 | Circuit | {circuit.state} |
-| Memory Usage | {mem}% |
 | Time | {datetime.now().strftime("%H:%M:%S")} |
 """
-demo = gr.Blocks(title="Eroha AgentAPI v5.8.3 — Stable Memory Build")
 with demo:
-    gr.Markdown("# 🤖 Eroha AgentAPI v5.8.3 — AutoRecovery + Smart Dashboard")
     with gr.Tab("💬 Chat"):
         inp = gr.Textbox(label="Введите запрос")
         model = gr.Dropdown(
@@ -251,39 +288,12 @@ with demo:
         btn.click(fn=gradio_infer, inputs=[inp, model], outputs=out)
     with gr.Tab("📊 Dashboard"):
         dash = gr.Markdown()
-        refresh = gr.Button("🔄 Обновить состояние")
         refresh.click(fn=show_dashboard, outputs=dash)
         dash.value = show_dashboard()
 app = gr.mount_gradio_app(app, demo, path="/ui")
-# ==============================
-# STABLE ROOT ROUTES
-# ==============================
-@app.get("/", response_class=HTMLResponse)
-async def root_page():
-    """Главная страница"""
-    return """
-    <html>
-      <head><title>Eroha AgentAPI v5.8.3</title></head>
-      <body style='font-family:Arial;text-align:center;padding:2em;'>
-        <h2>🤖 Eroha AgentAPI v5.8.3 — Stable Memory Build</h2>
-        <p>Status: <b style='color:green;'>Running ✅</b></p>
-        <p><a href='./ui' target='_blank' style='color:#4a68ff;font-size:18px;'>Открыть интерфейс →</a></p>
-        <p><a href='/health'>Health check</a></p>
-      </body>
-    </html>
-    """
-@app.get("/favicon.ico")
-async def favicon():
-    """Пустой favicon для предотвращения 404"""
-    return PlainTextResponse("", status_code=204)
 if __name__ == "__main__":
     import uvicorn
-    print("🚀 Starting Eroha AgentAPI v5.8.3 — Stable Memory Build")
     uvicorn.run(app, host="0.0.0.0", port=7860)

 """
+🤖 Eroha AgentAPI v5.9 — Enterprise Edition
+Enterprise-grade architecture for Hugging Face Spaces
+Auto-Token Recovery | Smart Fallback 2.0 | Self-Heal | Metrics | Stable Dashboard
 """
 import os
 from transformers import pipeline
 import psutil
+# ==========================
 # CONFIGURATION
+# ==========================
 HF_TOKEN = os.getenv("HF_TOKEN", "")
 ROUTER_URL = "https://api-inference.huggingface.co/models"
 FALLBACK_MODEL = "sshleifer/tiny-gpt2"
+CHECK_INTERVAL = 180  # Router health check every 3 minutes
+MAX_MEMORY_THRESHOLD = 85  # Fallback loads only if memory < 85%
+# ==========================
+# CORE CLASSES
+# ==========================
 class CircuitBreaker:
     def __init__(self, threshold=3, timeout=60):
         self.failures = 0
         self.threshold = threshold
             self.state = "OPEN"
             self.last_failure = time.time()
 circuit = CircuitBreaker()
+# ==========================
+# HF CLIENT
+# ==========================
 class HFClient:
     def __init__(self):
         self.token = HF_TOKEN
         self.headers = {"Authorization": f"Bearer {self.token}"} if self.token else {}
         self.valid = False
+        self.session = None
+        self.retries = 0
+        self.latency = 0
+    async def get_session(self):
+        if not self.session or self.session.closed:
+            self.session = aiohttp.ClientSession()
+        return self.session
     async def validate(self):
+        """Validate HF token"""
         try:
             async with aiohttp.ClientSession() as s:
+                start = time.time()
                 async with s.get("https://huggingface.co/api/whoami-v2", headers=self.headers) as r:
+                    self.latency = round((time.time() - start) * 1000, 2)
                     self.valid = r.status == 200
                     return self.valid
+        except:
             self.valid = False
             return False
     async def infer(self, model, text):
+        """Inference through Hugging Face Router"""
         if not circuit.allow():
             return {"error": "Circuit breaker open — fallback engaged"}
         try:
+            session = await self.get_session()
+            payload = {"inputs": text, "parameters": {"max_new_tokens": 250}}
+            start = time.time()
+            async with session.post(
+                f"{ROUTER_URL}/{model}", headers=self.headers, json=payload, timeout=30
+            ) as r:
+                self.latency = round((time.time() - start) * 1000, 2)
+                if r.status == 200:
+                    circuit.record_success()
+                    self.retries = 0
+                    return await r.json()
+                else:
+                    circuit.record_failure()
+                    if r.status in (401, 410):
+                        self.valid = False
+                        await self.recover_token()
+                    return {"error": f"Router error {r.status}"}
         except Exception as e:
             circuit.record_failure()
+            return {"error": f"Router exception: {e}"}
+    async def recover_token(self):
+        """Try to reload token from backup or /tmp file"""
+        print("⚠️ Token invalid — trying recovery...")
+        token_paths = [
+            "/tmp/hf_token.txt",
+            os.getenv("HF_TOKEN_BACKUP", "")
+        ]
+        for path in token_paths:
+            if path and os.path.exists(path):
+                try:
+                    with open(path, "r") as f:
+                        token = f.read().strip()
+                        if token:
+                            self.headers = {"Authorization": f"Bearer {token}"}
+                            print("✅ Token recovered successfully.")
+                            self.valid = await self.validate()
+                            if self.valid:
+                                return True
+                except:
+                    continue
+        print("❌ Token recovery failed.")
+        return False
 client = HFClient()
+# ==========================
+# SMART FALLBACK
+# ==========================
 class Fallback:
     def __init__(self):
         self.pipe = None
+        self.loaded = False
     async def load(self):
+        """Load fallback model if memory is OK"""
+        if not self.loaded and psutil.virtual_memory().percent < MAX_MEMORY_THRESHOLD:
+            print("🧠 Loading fallback model...")
+            self.pipe = pipeline("text-generation", model=FALLBACK_MODEL)
+            self.loaded = True
+            print("✅ Fallback model ready.")
     async def generate(self, text):
+        """Generate fallback response"""
+        if not self.loaded:
             await self.load()
         if not self.pipe:
+            return "⚠️ Fallback model unavailable."
+        result = self.pipe(text, max_new_tokens=100)[0]["generated_text"]
+        return result
 fallback = Fallback()
+# ==========================
+# WATCHDOG & SELF-HEAL
+# ==========================
 async def watchdog():
+    uptime_start = time.time()
     while True:
         await asyncio.sleep(CHECK_INTERVAL)
+        print(f"[{datetime.now().isoformat()}] 🩺 Watchdog check...")
+        valid = await client.validate()
+        if not valid:
+            await client.recover_token()
+        mem = psutil.virtual_memory().percent
+        if mem > 90:
+            print(f"⚠️ High memory usage: {mem}% — consider restart.")
+        if not circuit.allow():
+            print("⚠️ Circuit breaker is open — self-healing...")
+            circuit.state = "CLOSED"
+# ==========================
 # FASTAPI CORE
+# ==========================
+app = FastAPI(title="Eroha AgentAPI v5.9 — Enterprise Edition")
 @app.on_event("startup")
 async def startup():
+    print("🚀 Starting Eroha AgentAPI v5.9 — Enterprise Edition")
     asyncio.create_task(watchdog())
     await client.validate()
 @app.get("/health")
 async def health():
+    uptime = round(time.time() - psutil.boot_time(), 1)
+    mem = psutil.virtual_memory().percent
     return JSONResponse({
         "status": "ok" if client.valid else "degraded",
         "circuit": circuit.state,
+        "memory": mem,
+        "latency_ms": client.latency,
+        "uptime_s": uptime,
+        "token_valid": client.valid
     })
+@app.get("/metrics", response_class=PlainTextResponse)
+async def metrics():
+    """Prometheus-style metrics"""
+    mem = psutil.virtual_memory().percent
+    return (
+        f"hf_token_valid {1 if client.valid else 0}\n"
+        f"router_latency_ms {client.latency}\n"
+        f"memory_usage_percent {mem}\n"
+        f"circuit_state {'0' if circuit.state == 'CLOSED' else 1}\n"
+    )
 @app.post("/inference")
 async def inference(data: dict):
     text = data.get("prompt", "")
     model = data.get("model", "microsoft/phi-3-mini-4k-instruct")
     res = await client.infer(model, text)
     if "error" in res:
         fb = await fallback.generate(text)
         return {"source": "fallback", "response": fb, "note": res["error"]}
     return {"source": "router", "response": res}
+@app.get("/", response_class=HTMLResponse)
+async def root_page():
+    """Root status page"""
+    return """
+    <html><head><title>Eroha AgentAPI v5.9</title></head>
+    <body style='font-family:Arial;text-align:center;padding:2em;'>
+      <h2>🤖 Eroha AgentAPI v5.9 — Enterprise Edition</h2>
+      <p>Status: <b style='color:green;'>Running ✅</b></p>
+      <p><a href='/ui' style='font-size:18px;color:#4a68ff;'>Открыть интерфейс →</a></p>
+      <p><a href='/health'>Health</a> • <a href='/metrics'>Metrics</a></p>
+    </body></html>
+    """
+@app.get("/favicon.ico")
+async def favicon():
+    return PlainTextResponse("", status_code=204)
+# ==========================
+# GRADIO DASHBOARD
+# ==========================
 def gradio_infer(prompt, model_choice):
     loop = asyncio.new_event_loop()
     asyncio.set_event_loop(loop)
         return result[0].get("generated_text", str(result))
     return str(result)
 def show_dashboard():
     mem = psutil.virtual_memory().percent
+    status = "✅ Valid" if client.valid else "❌ Invalid"
+    color = "green" if client.valid else "red"
     return f"""
+### 🧠 Eroha Enterprise Dashboard
+| Metric | Value |
 |--------|--------|
+| Token | <span style='color:{color}'>{status}</span> |
 | Circuit | {circuit.state} |
+| Memory | {mem}% |
+| Latency | {client.latency} ms |
 | Time | {datetime.now().strftime("%H:%M:%S")} |
 """
+demo = gr.Blocks(title="Eroha AgentAPI v5.9 — Enterprise Edition")
 with demo:
+    gr.Markdown("# 🤖 Eroha AgentAPI v5.9 — AutoRecovery + Smart Dashboard")
     with gr.Tab("💬 Chat"):
         inp = gr.Textbox(label="Введите запрос")
         model = gr.Dropdown(
         btn.click(fn=gradio_infer, inputs=[inp, model], outputs=out)
     with gr.Tab("📊 Dashboard"):
         dash = gr.Markdown()
+        refresh = gr.Button("🔄 Обновить")
         refresh.click(fn=show_dashboard, outputs=dash)
         dash.value = show_dashboard()
 app = gr.mount_gradio_app(app, demo, path="/ui")
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)