Spaces:

Yermek68
/

eroha-agentapi

Sleeping

App Files Files Community

Yermek68 commited on Dec 19, 2025

Commit

85d0cd1

verified ·

1 Parent(s): 53a4fff

Update app.py

Browse files

Files changed (1) hide show

app.py +72 -349

app.py CHANGED Viewed

@@ -1,426 +1,149 @@
-"""
-🤖 Eroha AgentAPI v5.9.2 — Enterprise Edition (Docker UI Fix)
-Enterprise-grade architecture for Hugging Face Spaces
-Auto-Token Recovery | Smart Fallback 2.0 | Self-Heal | Metrics | Stable Dashboard
-"""
 import os, asyncio, aiohttp, time, psutil
 from datetime import datetime
 import gradio as gr
 from fastapi import FastAPI
-from fastapi.responses import JSONResponse, PlainTextResponse
-from transformers import pipeline
-# Добавляем папку core в путь для импорта
-import sys
-sys.path.append(os.path.join(os.path.dirname(__file__), "core"))
-# === Импорт логирования + метрик + FailSafe ===
-# from alert_core import log_alert
-from metrics_core import save_metrics
-from alerters import ConsoleAlerter, FileAlerter
-from alert_manager import AlertManager
-from failsafe_core import failsafe
-# Safe import for alert_core
-try:
-    from alert_core import log_alert
-except ModuleNotFoundError:
-    def log_alert(msg: str):
-        print(f"[⚠️ ALERT] {msg} (alert_core not found — using fallback)")
-# === Настройка моделей для логики ===
-PRIMARY_MODEL = "microsoft/phi-3-mini-instruct"
-FALLBACK_MODEL = "sshleifer/tiny-gpt2"
-# Настройка AlertManager
-alert_manager = AlertManager([
-    ConsoleAlerter(),
-    FileAlerter("alerts_log.json")
-])
 HF_TOKEN = os.getenv("HF_TOKEN", "")
 ROUTER_URL = "https://api-inference.huggingface.co/models"
-FALLBACK_MODEL = "sshleifer/tiny-gpt2"
 CHECK_INTERVAL = 180
-MAX_MEMORY_THRESHOLD = 85
-# ================= CIRCUIT BREAKER =================
 class CircuitBreaker:
     def __init__(self, threshold=3, timeout=60):
         self.failures, self.threshold, self.timeout = 0, threshold, timeout
         self.state, self.last_failure = "CLOSED", 0
     def allow(self):
         if self.state == "OPEN" and time.time() - self.last_failure < self.timeout:
             return False
         if self.state == "OPEN" and time.time() - self.last_failure >= self.timeout:
             self.state = "HALF_OPEN"
         return True
-    def record_success(self): self.failures, self.state = 0, "CLOSED"
     def record_failure(self):
         self.failures += 1
         if self.failures >= self.threshold:
             self.state, self.last_failure = "OPEN", time.time()
 circuit = CircuitBreaker()
-# ================= HF CLIENT =================
 class HFClient:
     def __init__(self):
-        self.token, self.valid, self.session, self.latency = HF_TOKEN, False, None, 0
         self.headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
     async def get_session(self):
         if not self.session or self.session.closed:
             self.session = aiohttp.ClientSession()
         return self.session
-    async def validate(self):
-        try:
-            async with aiohttp.ClientSession() as s:
-                start = time.time()
-                async with s.get("https://huggingface.co/api/whoami-v2", headers=self.headers) as r:
-                    self.latency = round((time.time() - start) * 1000, 2)
-                    self.valid = r.status == 200
-                    return self.valid
-        except:
-            self.valid = False
-            return False
-    async def infer(self, model, text):
         if not circuit.allow():
-            return {"error": "Circuit breaker open — fallback engaged"}
         try:
             s = await self.get_session()
-            payload = {"inputs": text, "parameters": {"max_new_tokens": 250}}
             start = time.time()
-            async with s.post(f"{ROUTER_URL}/{model}", headers=self.headers, json=payload, timeout=30) as r:
                 self.latency = round((time.time() - start) * 1000, 2)
                 if r.status == 200:
                     circuit.record_success()
                     return await r.json()
                 else:
                     circuit.record_failure()
-                    if r.status in (401, 410):
-                        self.valid = False
-                        await self.recover_token()
-                    return {"error": f"Router error {r.status}"}
         except Exception as e:
             circuit.record_failure()
-            return {"error": f"Router exception: {e}"}
-    async def recover_token(self):
-        print("⚠️ Token invalid — trying recovery...")
-        for path in ["/tmp/hf_token.txt", os.getenv("HF_TOKEN_BACKUP", "")]:
-            if path and os.path.exists(path):
-                with open(path) as f:
-                    token = f.read().strip()
-                    if token:
-                        self.headers = {"Authorization": f"Bearer {token}"}
-                        if await self.validate():
-                            print("✅ Token recovered successfully.")
-                            return True
-        print("❌ Token recovery failed.")
-        return False
-client = HFClient()
-# ================= FALLBACK =================
-class Fallback:
-    def __init__(self): self.pipe, self.loaded = None, False
-    async def load(self):
-        if not self.loaded and psutil.virtual_memory().percent < MAX_MEMORY_THRESHOLD:
-            print("🧠 Loading fallback model...")
-            self.pipe = pipeline("text-generation", model=FALLBACK_MODEL)
-            self.loaded = True
-    async def generate(self, text):
-        await self.load()
-        if not self.pipe: return "⚠️ Fallback unavailable."
-        return self.pipe(text, max_new_tokens=100)[0]["generated_text"]
-fallback = Fallback()
-# ================= WATCHDOG =================
-async def watchdog():
-    while True:
-        await asyncio.sleep(CHECK_INTERVAL)
-        print(f"[{datetime.now().isoformat()}] 🩺 Watchdog check...")
-        if not await client.validate():
-            await client.recover_token()
-        if psutil.virtual_memory().percent > 90:
-            print("⚠️ High memory usage.")
-        if not circuit.allow():
-            circuit.state = "CLOSED"
-            print("🛠️ Circuit auto-healed.")
-# ================= FASTAPI =================
-app = FastAPI(title="Eroha AgentAPI v5.9 — Enterprise Edition")
 @app.on_event("startup")
-async def startup():
-    print("🚀 Starting Eroha AgentAPI v5.9.2 — Enterprise Edition")
     asyncio.create_task(watchdog())
-    await client.validate()
 @app.get("/health")
 async def health():
-    return JSONResponse({
-        "status": "ok" if client.valid else "degraded",
-        "circuit": circuit.state,
-        "memory": psutil.virtual_memory().percent,
-        "latency_ms": client.latency,
-        "token_valid": client.valid
-    })
 @app.get("/metrics", response_class=PlainTextResponse)
 async def metrics():
-    mem = psutil.virtual_memory().percent
-    return f"hf_token_valid {1 if client.valid else 0}\nrouter_latency_ms {client.latency}\nmemory_usage_percent {mem}\ncircuit_state {'0' if circuit.state == 'CLOSED' else 1}\n"
-@app.post("/inference")
-async def inference(data: dict):
-    prompt = data.get("prompt", "")
-    model = data.get("model", PRIMARY_MODEL)
-    start_time = time.time()
-    # FailSafe wrapper for primary inference
-    @failsafe(alert_manager)
-    async def run_primary(p, m):
-        return await client.infer(m, p)
-    try:
-        res = await run_primary(prompt, model)
-        duration = int((time.time() - start_time) * 1000)
-        # Метрики
-        save_metrics({
-            "endpoint": "/inference",
-            "model": model,
-            "latency_ms": duration
-        })
-        # Лог — успешный ответ
-        log_alert(
-            source="agent",
-            level="INFO",
-            message=f"Inference OK (model={model})",
-            extra={"prompt_len": len(prompt), "latency": duration}
-        )
-        # Если ошибка в ответе
-        if isinstance(res, dict) and "error" in res:
-            raise Exception(res["error"])
-        return {"source": "router", "response": res}
-    except Exception as primary_err:
-        log_alert(
-            source="agent",
-            level="ERROR",
-            message=f"Primary inference failed: {primary_err}",
-            extra={"error": str(primary_err)}
-        )
-        # Fallback через FailSafe
-        @failsafe(alert_manager)
-        async def run_fallback(p):
-            return await fallback.generate(p)
-        try:
-            fb = await run_fallback(prompt)
-            duration = int((time.time() - start_time) * 1000)
-            # Fallback метрики
-            save_metrics({
-                "endpoint": "/inference",
-                "model": FALLBACK_MODEL,
-                "latency_ms": duration,
-                "fallback_used": True
-            })
-            log_alert(
-                source="fallback",
-                level="WARNING",
-                message=f"Fallback inference OK (model={FALLBACK_MODEL})",
-                extra={"latency": duration}
-            )
-            return {"source": "fallback", "response": fb}
-        except Exception as fb_err:
-            log_alert(
-                source="fallback",
-                level="ERROR",
-                message=f"Fallback failed: {fb_err}",
-                extra={"error": str(fb_err)}
-            )
-            return {"error": "Inference failure on both primary and fallback"}
-# ================= GRADIO UI =================
-def gradio_infer(prompt, model_choice):
-    start_time = time.time()
-    model = model_choice or PRIMARY_MODEL
-    @failsafe(alert_manager)
-    def run_model(p, m):
-        return asyncio.run(client.infer(m, p))
     try:
-        result = run_model(prompt, model)
-        duration = int((time.time() - start_time) * 1000)
-        # Metрики Gradio
-        save_metrics({
-            "interface": "gradio",
-            "prompt_len": len(prompt),
-            "model": model,
-            "latency_ms": duration
-        })
-        log_alert(
-            source="gradio",
-            level="INFO",
-            message=f"Gradio inference success (model={model})",
-            extra={"latency": duration}
-        )
-        if isinstance(result, dict) and "error" in result:
-            raise Exception(result["error"])
-        if isinstance(result, list):
-            return result[0].get("generated_text", str(result))
         return str(result)
-    except Exception as ui_err:
-        log_alert(
-            source="gradio",
-            level="ERROR",
-            message=f"Gradio inference error: {ui_err}",
-            extra={"error": str(ui_err)}
-        )
-        # fallback
-        fb = asyncio.run(fallback.generate(prompt))
-        return f"⚠️ Error: {ui_err}\n\n🧠 Fallback: {fb}"
 def show_dashboard():
     mem = psutil.virtual_memory().percent
-    status = "✅ Valid" if client.valid else "❌ Invalid"
-    color = "green" if client.valid else "red"
-    return f"""
-### 🧠 Eroha Enterprise Dashboard
-| Metric | Value |
-|--------|--------|
-| Token | <span style='color:{color}'>{status}</span> |
-| Circuit | {circuit.state} |
-| Memory | {mem}% |
-| Latency | {client.latency} ms |
-| Time | {datetime.now().strftime("%H:%M:%S")} |
-"""
-demo = gr.Blocks(title="Eroha AgentAPI v5.9.2 — Enterprise Edition")
-with demo:
-    gr.Markdown("# 🤖 Eroha AgentAPI v5.9.2 — Enterprise Edition")
-    with gr.Tab("💬 Chat"):
-        inp = gr.Textbox(label="Введите запрос")
-        model = gr.Dropdown(
-            ["microsoft/phi-3-mini-4k-instruct", "google/gemma-2-2b-it", "meta-llama/Meta-Llama-3-8B-Instruct"],
-            value="microsoft/phi-3-mini-4k-instruct", label="Модель"
-        )
-        out = gr.Textbox(label="Ответ")
-        btn = gr.Button("🚀 Отправить")
-        btn.click(fn=gradio_infer, inputs=[inp, model], outputs=out)
-    with gr.Tab("📊 Dashboard"):
-        dash = gr.Markdown()
-        refresh = gr.Button("🔄 Обновить")
-        refresh.click(fn=show_dashboard, outputs=dash)
-        dash.value = show_dashboard()
-import uvicorn
-from gradio.routes import mount_gradio_app
-# Определяем, работает ли код внутри Hugging Face Spaces
-# HF Spaces detection
-IS_HF_SPACES = os.getenv("SPACE_ID") is not None
-import os
-import gradio as gr
-import uvicorn
-import logging
-from fastapi import FastAPI
-from gradio.routes import mount_gradio_app
-# =====================================================
-# 🔒 Safe import: alert_core (если нет — fallback)
-# =====================================================
-try:
-    from alert_core import log_alert
-except ModuleNotFoundError:
-    def log_alert(msg: str):
-        print(f"[⚠️ ALERT] {msg} (alert_core not found — using fallback)")
-# =====================================================
-# 🧭 Настройка окружения и логирования
-# =====================================================
-IS_HF_SPACES = os.getenv("SPACE_ID") is not None
-RUN_ENV = "Hugging Face Spaces" if IS_HF_SPACES else "Localhost"
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s [%(levelname)s] %(message)s",
-    handlers=[logging.StreamHandler()]
-)
-logging.info(f"🚀 Starting Eroha Agent environment: {RUN_ENV}")
-log_alert(f"System boot: {RUN_ENV}")
-# =====================================================
-# 🌐 Создаём FastAPI и интерфейс Gradio
-# =====================================================
-app = FastAPI()
 demo = gr.Blocks(title="Eroha AgentAPI v5.9.2")
 with demo:
     gr.Markdown("# 🤖 Eroha AgentAPI v5.9.2 — Enterprise Edition")
     with gr.Tab("💬 Chat"):
         inp = gr.Textbox(label="Введите запрос")
-        model = gr.Dropdown(
-            ["microsoft/phi-3-mini-4k-instruct",
-             "google/gemma-2-2b-it",
-             "meta-llama/Meta-Llama-3-8B-Instruct"],
-            value="microsoft/phi-3-mini-4k-instruct",
-            label="Модель"
-        )
         out = gr.Textbox(label="Ответ")
-        btn = gr.Button("🚀 Отправить")
-        btn.click(fn=lambda x, m: f"Обработка запроса для {m}: {x}",
-                  inputs=[inp, model],
-                  outputs=out)
     with gr.Tab("📊 Dashboard"):
-        dash = gr.Markdown("📈 Метрики ещё не загружены")
-        refresh = gr.Button("🔄 Обновить")
-        refresh.click(fn=lambda: "✅ Метрики обновлены", outputs=dash)
-# =====================================================
-# ⚙️ Запуск приложения
-# =====================================================
 if __name__ == "__main__":
-    if IS_HF_SPACES:
-        logging.info("✅ Running on Hugging Face Spaces (port 7860)")
-        app = mount_gradio_app(app, demo, path="/")
-        uvicorn.run(app, host="0.0.0.0", port=7860)
-    else:
-        import threading
-        logging.info("✅ Running locally (FastAPI → 7860 | Gradio → 7861)")
-        def run_gradio():
-            demo.queue().launch(server_port=7861, share=False)
-        threading.Thread(target=run_gradio, daemon=True).start()
-        uvicorn.run(app, host="0.0.0.0", port=7860)

 import os, asyncio, aiohttp, time, psutil
 from datetime import datetime
 import gradio as gr
 from fastapi import FastAPI
+from fastapi.responses import PlainTextResponse
+from pydantic import BaseModel
+import uvicorn
+from gradio.routes import mount_gradio_app
+# === Detect HF Spaces ===
+IS_HF_SPACES = os.getenv("SPACE_ID") is not None
+# === Заглушки для core-модулей (чтобы не было ModuleNotFoundError) ===
+def log_alert(source: str, level: str, message: str):
+    print(f"[{level}] {source}: {message}")
+def save_metrics(data):
+    print(f"📊 METRICS (dummy): {data}")
+def failsafe(func):  # decorator stub
+    return func
+# === Константы ===
+PRIMARY_MODEL = "microsoft/phi-3-mini-4k-instruct"
 HF_TOKEN = os.getenv("HF_TOKEN", "")
 ROUTER_URL = "https://api-inference.huggingface.co/models"
 CHECK_INTERVAL = 180
+# === CircuitBreaker ===
 class CircuitBreaker:
     def __init__(self, threshold=3, timeout=60):
         self.failures, self.threshold, self.timeout = 0, threshold, timeout
         self.state, self.last_failure = "CLOSED", 0
     def allow(self):
         if self.state == "OPEN" and time.time() - self.last_failure < self.timeout:
             return False
         if self.state == "OPEN" and time.time() - self.last_failure >= self.timeout:
             self.state = "HALF_OPEN"
         return True
+    def record_success(self):
+        self.failures, self.state = 0, "CLOSED"
     def record_failure(self):
         self.failures += 1
         if self.failures >= self.threshold:
             self.state, self.last_failure = "OPEN", time.time()
 circuit = CircuitBreaker()
+# === Hugging Face API клиент ===
 class HFClient:
     def __init__(self):
+        self.token = HF_TOKEN
+        self.session = None
+        self.latency = 0
         self.headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
     async def get_session(self):
         if not self.session or self.session.closed:
             self.session = aiohttp.ClientSession()
         return self.session
+    async def infer(self, model: str, text: str):
         if not circuit.allow():
+            return {"error": "Circuit breaker open"}
         try:
             s = await self.get_session()
+            payload = {"inputs": text[:1000], "parameters": {"max_new_tokens": 100}}
             start = time.time()
+            async with s.post(f"{ROUTER_URL}/{model}", headers=self.headers, json=payload) as r:
                 self.latency = round((time.time() - start) * 1000, 2)
                 if r.status == 200:
                     circuit.record_success()
                     return await r.json()
                 else:
                     circuit.record_failure()
+                    return {"error": f"HTTP {r.status}"}
         except Exception as e:
             circuit.record_failure()
+            return {"error": str(e)}
+client = HFClient()
+# === FastAPI App ===
+app = FastAPI(title="Eroha AgentAPI v5.9.2 — Enterprise Edition")
 @app.on_event("startup")
+async def startup_event():
+    print("🚀 Eroha AgentAPI started.")
     asyncio.create_task(watchdog())
 @app.get("/health")
 async def health():
+    return {"status": "ok", "circuit": circuit.state, "memory": psutil.virtual_memory().percent, "latency": client.latency}
 @app.get("/metrics", response_class=PlainTextResponse)
 async def metrics():
+    return f"circuit_state {circuit.state}\nmemory {psutil.virtual_memory().percent}\n"
+class InferenceRequest(BaseModel):
+    prompt: str
+    model: str = PRIMARY_MODEL
+@app.post("/inference")
+async def inference(req: InferenceRequest):
+    start = time.time()
+    result = await client.infer(req.model, req.prompt)
+    duration = int((time.time() - start) * 1000)
+    save_metrics({"latency_ms": duration})
+    return {"response": result, "duration_ms": duration}
+# === Watchdog ===
+async def watchdog():
+    while True:
+        await asyncio.sleep(CHECK_INTERVAL)
+        print(f"🩺 Watchdog OK at {datetime.now().strftime('%H:%M:%S')}")
+# === Gradio UI ===
+def gradio_infer(prompt: str, model_choice: str):
     try:
+        result = asyncio.run(client.infer(model_choice, prompt))
+        if isinstance(result, list) and "generated_text" in result[0]:
+            return result[0]["generated_text"]
         return str(result)
+    except Exception as e:
+        return f"❌ Error: {e}"
 def show_dashboard():
     mem = psutil.virtual_memory().percent
+    return f"| Metric | Value |\n|--------|--------|\n| Circuit | {circuit.state} |\n| Memory | {mem}% |\n| Latency | {client.latency} ms |"
 demo = gr.Blocks(title="Eroha AgentAPI v5.9.2")
 with demo:
     gr.Markdown("# 🤖 Eroha AgentAPI v5.9.2 — Enterprise Edition")
     with gr.Tab("💬 Chat"):
         inp = gr.Textbox(label="Введите запрос")
+        model = gr.Dropdown(["microsoft/phi-3-mini-4k-instruct", "google/gemma-2-2b-it"], value="microsoft/phi-3-mini-4k-instruct", label="Модель")
         out = gr.Textbox(label="Ответ")
+        gr.Button("🚀 Отправить").click(fn=gradio_infer, inputs=[inp, model], outputs=out)
     with gr.Tab("📊 Dashboard"):
+        dash = gr.Markdown(show_dashboard())
+        gr.Button("🔄 Обновить").click(fn=show_dashboard, outputs=dash)
+# === Финальный единый запуск ===
 if __name__ == "__main__":
+    app = mount_gradio_app(app, demo, path="/")
+    uvicorn.run(app, host="0.0.0.0", port=7860)