Spaces:

Yermek68
/

eroha-agentapi

Sleeping

App Files Files Community

Yermek68 commited on Dec 15, 2025

Commit

241c892

verified ·

1 Parent(s): 4508d0d

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -143

app.py CHANGED Viewed

@@ -1,79 +1,50 @@
 """
-🤖 Eroha AgentAPI v5.9 — Enterprise Edition
 Enterprise-grade architecture for Hugging Face Spaces
 Auto-Token Recovery | Smart Fallback 2.0 | Self-Heal | Metrics | Stable Dashboard
 """
-import os
-import asyncio
-import aiohttp
-import time
 from datetime import datetime
 import gradio as gr
 from fastapi import FastAPI
-from fastapi.responses import JSONResponse, HTMLResponse, PlainTextResponse
 from transformers import pipeline
-import psutil
-# ==========================
-# CONFIGURATION
-# ==========================
 HF_TOKEN = os.getenv("HF_TOKEN", "")
 ROUTER_URL = "https://api-inference.huggingface.co/models"
 FALLBACK_MODEL = "sshleifer/tiny-gpt2"
-CHECK_INTERVAL = 180  # Router health check every 3 minutes
-MAX_MEMORY_THRESHOLD = 85  # Fallback loads only if memory < 85%
-# ==========================
-# CORE CLASSES
-# ==========================
 class CircuitBreaker:
     def __init__(self, threshold=3, timeout=60):
-        self.failures = 0
-        self.threshold = threshold
-        self.timeout = timeout
-        self.state = "CLOSED"
-        self.last_failure = 0
     def allow(self):
         if self.state == "OPEN" and time.time() - self.last_failure < self.timeout:
             return False
         if self.state == "OPEN" and time.time() - self.last_failure >= self.timeout:
             self.state = "HALF_OPEN"
         return True
-    def record_success(self):
-        self.failures = 0
-        if self.state in ["HALF_OPEN", "OPEN"]:
-            self.state = "CLOSED"
     def record_failure(self):
         self.failures += 1
         if self.failures >= self.threshold:
-            self.state = "OPEN"
-            self.last_failure = time.time()
 circuit = CircuitBreaker()
-# ==========================
-# HF CLIENT
-# ==========================
 class HFClient:
     def __init__(self):
-        self.token = HF_TOKEN
-        self.headers = {"Authorization": f"Bearer {self.token}"} if self.token else {}
-        self.valid = False
-        self.session = None
-        self.retries = 0
-        self.latency = 0
     async def get_session(self):
         if not self.session or self.session.closed:
             self.session = aiohttp.ClientSession()
         return self.session
     async def validate(self):
-        """Validate HF token"""
         try:
             async with aiohttp.ClientSession() as s:
                 start = time.time()
@@ -84,23 +55,17 @@ class HFClient:
         except:
             self.valid = False
             return False
     async def infer(self, model, text):
-        """Inference through Hugging Face Router"""
         if not circuit.allow():
             return {"error": "Circuit breaker open — fallback engaged"}
         try:
-            session = await self.get_session()
             payload = {"inputs": text, "parameters": {"max_new_tokens": 250}}
             start = time.time()
-            async with session.post(
-                f"{ROUTER_URL}/{model}", headers=self.headers, json=payload, timeout=30
-            ) as r:
                 self.latency = round((time.time() - start) * 1000, 2)
                 if r.status == 200:
                     circuit.record_success()
-                    self.retries = 0
                     return await r.json()
                 else:
                     circuit.record_failure()
@@ -111,147 +76,84 @@ class HFClient:
         except Exception as e:
             circuit.record_failure()
             return {"error": f"Router exception: {e}"}
     async def recover_token(self):
-        """Try to reload token from backup or /tmp file"""
         print("⚠️ Token invalid — trying recovery...")
-        token_paths = [
-            "/tmp/hf_token.txt",
-            os.getenv("HF_TOKEN_BACKUP", "")
-        ]
-        for path in token_paths:
             if path and os.path.exists(path):
-                try:
-                    with open(path, "r") as f:
-                        token = f.read().strip()
-                        if token:
-                            self.headers = {"Authorization": f"Bearer {token}"}
                             print("✅ Token recovered successfully.")
-                            self.valid = await self.validate()
-                            if self.valid:
-                                return True
-                except:
-                    continue
         print("❌ Token recovery failed.")
         return False
 client = HFClient()
-# ==========================
-# SMART FALLBACK
-# ==========================
 class Fallback:
-    def __init__(self):
-        self.pipe = None
-        self.loaded = False
     async def load(self):
-        """Load fallback model if memory is OK"""
         if not self.loaded and psutil.virtual_memory().percent < MAX_MEMORY_THRESHOLD:
             print("🧠 Loading fallback model...")
             self.pipe = pipeline("text-generation", model=FALLBACK_MODEL)
             self.loaded = True
-            print("✅ Fallback model ready.")
     async def generate(self, text):
-        """Generate fallback response"""
-        if not self.loaded:
-            await self.load()
-        if not self.pipe:
-            return "⚠️ Fallback model unavailable."
-        result = self.pipe(text, max_new_tokens=100)[0]["generated_text"]
-        return result
 fallback = Fallback()
-# ==========================
-# WATCHDOG & SELF-HEAL
-# ==========================
 async def watchdog():
-    uptime_start = time.time()
     while True:
         await asyncio.sleep(CHECK_INTERVAL)
         print(f"[{datetime.now().isoformat()}] 🩺 Watchdog check...")
-        valid = await client.validate()
-        if not valid:
             await client.recover_token()
-        mem = psutil.virtual_memory().percent
-        if mem > 90:
-            print(f"⚠️ High memory usage: {mem}% — consider restart.")
         if not circuit.allow():
-            print("⚠️ Circuit breaker is open — self-healing...")
             circuit.state = "CLOSED"
-# ==========================
-# FASTAPI CORE
-# ==========================
 app = FastAPI(title="Eroha AgentAPI v5.9 — Enterprise Edition")
 @app.on_event("startup")
 async def startup():
-    print("🚀 Starting Eroha AgentAPI v5.9 — Enterprise Edition")
     asyncio.create_task(watchdog())
     await client.validate()
 @app.get("/health")
 async def health():
-    uptime = round(time.time() - psutil.boot_time(), 1)
-    mem = psutil.virtual_memory().percent
     return JSONResponse({
         "status": "ok" if client.valid else "degraded",
         "circuit": circuit.state,
-        "memory": mem,
         "latency_ms": client.latency,
-        "uptime_s": uptime,
         "token_valid": client.valid
     })
 @app.get("/metrics", response_class=PlainTextResponse)
 async def metrics():
-    """Prometheus-style metrics"""
     mem = psutil.virtual_memory().percent
-    return (
-        f"hf_token_valid {1 if client.valid else 0}\n"
-        f"router_latency_ms {client.latency}\n"
-        f"memory_usage_percent {mem}\n"
-        f"circuit_state {'0' if circuit.state == 'CLOSED' else 1}\n"
-    )
 @app.post("/inference")
 async def inference(data: dict):
-    text = data.get("prompt", "")
-    model = data.get("model", "microsoft/phi-3-mini-4k-instruct")
     res = await client.infer(model, text)
     if "error" in res:
         fb = await fallback.generate(text)
         return {"source": "fallback", "response": fb, "note": res["error"]}
     return {"source": "router", "response": res}
-@app.get("/", response_class=HTMLResponse)
-async def root_page():
-    """Root status page"""
-    return """
-    <html><head><title>Eroha AgentAPI v5.9</title></head>
-    <body style='font-family:Arial;text-align:center;padding:2em;'>
-      <h2>🤖 Eroha AgentAPI v5.9 — Enterprise Edition</h2>
-      <p>Status: <b style='color:green;'>Running ✅</b></p>
-      <p><a href='/ui' style='font-size:18px;color:#4a68ff;'>Открыть интерфейс →</a></p>
-      <p><a href='/health'>Health</a> • <a href='/metrics'>Metrics</a></p>
-    </body></html>
-    """
-@app.get("/favicon.ico")
-async def favicon():
-    return PlainTextResponse("", status_code=204)
-# ==========================
-# GRADIO DASHBOARD
-# ==========================
 def gradio_infer(prompt, model_choice):
-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
-    result = loop.run_until_complete(client.infer(model_choice, prompt))
-    loop.close()
     if "error" in result:
         fb = asyncio.run(fallback.generate(prompt))
         return f"⚠️ Router failed ({result['error']})\n\n🧠 Fallback:\n{fb}"
@@ -274,9 +176,9 @@ def show_dashboard():
 | Time | {datetime.now().strftime("%H:%M:%S")} |
 """
-demo = gr.Blocks(title="Eroha AgentAPI v5.9 — Enterprise Edition")
 with demo:
-    gr.Markdown("# 🤖 Eroha AgentAPI v5.9 — AutoRecovery + Smart Dashboard")
     with gr.Tab("💬 Chat"):
         inp = gr.Textbox(label="Введите запрос")
         model = gr.Dropdown(
@@ -292,8 +194,6 @@ with demo:
         refresh.click(fn=show_dashboard, outputs=dash)
         dash.value = show_dashboard()
-app = gr.mount_gradio_app(app, demo, path="/ui")
 if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=7860)

 """
+🤖 Eroha AgentAPI v5.9.2 — Enterprise Edition (Docker UI Fix)
 Enterprise-grade architecture for Hugging Face Spaces
 Auto-Token Recovery | Smart Fallback 2.0 | Self-Heal | Metrics | Stable Dashboard
 """
+import os, asyncio, aiohttp, time, psutil
 from datetime import datetime
 import gradio as gr
 from fastapi import FastAPI
+from fastapi.responses import JSONResponse, PlainTextResponse
 from transformers import pipeline
 HF_TOKEN = os.getenv("HF_TOKEN", "")
 ROUTER_URL = "https://api-inference.huggingface.co/models"
 FALLBACK_MODEL = "sshleifer/tiny-gpt2"
+CHECK_INTERVAL = 180
+MAX_MEMORY_THRESHOLD = 85
+# ================= CIRCUIT BREAKER =================
 class CircuitBreaker:
     def __init__(self, threshold=3, timeout=60):
+        self.failures, self.threshold, self.timeout = 0, threshold, timeout
+        self.state, self.last_failure = "CLOSED", 0
     def allow(self):
         if self.state == "OPEN" and time.time() - self.last_failure < self.timeout:
             return False
         if self.state == "OPEN" and time.time() - self.last_failure >= self.timeout:
             self.state = "HALF_OPEN"
         return True
+    def record_success(self): self.failures, self.state = 0, "CLOSED"
     def record_failure(self):
         self.failures += 1
         if self.failures >= self.threshold:
+            self.state, self.last_failure = "OPEN", time.time()
 circuit = CircuitBreaker()
+# ================= HF CLIENT =================
 class HFClient:
     def __init__(self):
+        self.token, self.valid, self.session, self.latency = HF_TOKEN, False, None, 0
+        self.headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
     async def get_session(self):
         if not self.session or self.session.closed:
             self.session = aiohttp.ClientSession()
         return self.session
     async def validate(self):
         try:
             async with aiohttp.ClientSession() as s:
                 start = time.time()
         except:
             self.valid = False
             return False
     async def infer(self, model, text):
         if not circuit.allow():
             return {"error": "Circuit breaker open — fallback engaged"}
         try:
+            s = await self.get_session()
             payload = {"inputs": text, "parameters": {"max_new_tokens": 250}}
             start = time.time()
+            async with s.post(f"{ROUTER_URL}/{model}", headers=self.headers, json=payload, timeout=30) as r:
                 self.latency = round((time.time() - start) * 1000, 2)
                 if r.status == 200:
                     circuit.record_success()
                     return await r.json()
                 else:
                     circuit.record_failure()
         except Exception as e:
             circuit.record_failure()
             return {"error": f"Router exception: {e}"}
     async def recover_token(self):
         print("⚠️ Token invalid — trying recovery...")
+        for path in ["/tmp/hf_token.txt", os.getenv("HF_TOKEN_BACKUP", "")]:
             if path and os.path.exists(path):
+                with open(path) as f:
+                    token = f.read().strip()
+                    if token:
+                        self.headers = {"Authorization": f"Bearer {token}"}
+                        if await self.validate():
                             print("✅ Token recovered successfully.")
+                            return True
         print("❌ Token recovery failed.")
         return False
 client = HFClient()
+# ================= FALLBACK =================
 class Fallback:
+    def __init__(self): self.pipe, self.loaded = None, False
     async def load(self):
         if not self.loaded and psutil.virtual_memory().percent < MAX_MEMORY_THRESHOLD:
             print("🧠 Loading fallback model...")
             self.pipe = pipeline("text-generation", model=FALLBACK_MODEL)
             self.loaded = True
     async def generate(self, text):
+        await self.load()
+        if not self.pipe: return "⚠️ Fallback unavailable."
+        return self.pipe(text, max_new_tokens=100)[0]["generated_text"]
 fallback = Fallback()
+# ================= WATCHDOG =================
 async def watchdog():
     while True:
         await asyncio.sleep(CHECK_INTERVAL)
         print(f"[{datetime.now().isoformat()}] 🩺 Watchdog check...")
+        if not await client.validate():
             await client.recover_token()
+        if psutil.virtual_memory().percent > 90:
+            print("⚠️ High memory usage.")
         if not circuit.allow():
             circuit.state = "CLOSED"
+            print("🛠️ Circuit auto-healed.")
+# ================= FASTAPI =================
 app = FastAPI(title="Eroha AgentAPI v5.9 — Enterprise Edition")
 @app.on_event("startup")
 async def startup():
+    print("🚀 Starting Eroha AgentAPI v5.9.2 — Enterprise Edition")
     asyncio.create_task(watchdog())
     await client.validate()
 @app.get("/health")
 async def health():
     return JSONResponse({
         "status": "ok" if client.valid else "degraded",
         "circuit": circuit.state,
+        "memory": psutil.virtual_memory().percent,
         "latency_ms": client.latency,
         "token_valid": client.valid
     })
 @app.get("/metrics", response_class=PlainTextResponse)
 async def metrics():
     mem = psutil.virtual_memory().percent
+    return f"hf_token_valid {1 if client.valid else 0}\nrouter_latency_ms {client.latency}\nmemory_usage_percent {mem}\ncircuit_state {'0' if circuit.state == 'CLOSED' else 1}\n"
 @app.post("/inference")
 async def inference(data: dict):
+    text, model = data.get("prompt", ""), data.get("model", "microsoft/phi-3-mini-4k-instruct")
     res = await client.infer(model, text)
     if "error" in res:
         fb = await fallback.generate(text)
         return {"source": "fallback", "response": fb, "note": res["error"]}
     return {"source": "router", "response": res}
+# ================= GRADIO UI =================
 def gradio_infer(prompt, model_choice):
+    result = asyncio.run(client.infer(model_choice, prompt))
     if "error" in result:
         fb = asyncio.run(fallback.generate(prompt))
         return f"⚠️ Router failed ({result['error']})\n\n🧠 Fallback:\n{fb}"
 | Time | {datetime.now().strftime("%H:%M:%S")} |
 """
+demo = gr.Blocks(title="Eroha AgentAPI v5.9.2 — Enterprise Edition")
 with demo:
+    gr.Markdown("# 🤖 Eroha AgentAPI v5.9.2 — Enterprise Edition")
     with gr.Tab("💬 Chat"):
         inp = gr.Textbox(label="Введите запрос")
         model = gr.Dropdown(
         refresh.click(fn=show_dashboard, outputs=dash)
         dash.value = show_dashboard()
+# ---- Launch for Docker ----
 if __name__ == "__main__":
+    demo.queue().launch(server_name="0.0.0.0", server_port=7860, share=False, inline=False)