Spaces:

Yermek68
/

eroha-agentapi

Sleeping

App Files Files Community

Yermek68 commited on Dec 15, 2025

Commit

7276fa0

verified ·

1 Parent(s): b95b531

Update app.py

Browse files

Files changed (1) hide show

app.py +200 -105

app.py CHANGED Viewed

@@ -1,109 +1,204 @@
 import os
 import time
-import json
-import gradio as gr
-import requests
 from datetime import datetime
-# =========================
-# ⚙️ НАСТРОЙКИ
-# =========================
-HF_TOKEN = os.getenv("HF_TOKEN") or "hf_your_token_here"
-PRIMARY_MODEL = "google/gemma-2-2b-it"
-FALLBACK_MODEL = "mistralai/Mixtral-8x7B-Instruct-v0.1"
-ROUTER_URL = "https://router.huggingface.co"
-HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}
-# =========================
-# 🧩 ПОЛЕЗНЫЕ ФУНКЦИИ
-# =========================
-def check_token():
-    """Проверка валидности токена Hugging Face."""
-    try:
-        res = requests.get("https://router.huggingface.co/status", headers=HEADERS, timeout=8)
-        if res.status_code == 200:
-            return True
-        else:
-            print(f"⚠️ Токен Hugging Face невалиден ({res.status_code})")
             return False
-    except Exception as e:
-        print(f"Ошибка при проверке токена: {e}")
-        return False
-def send_request(model: str, prompt: str):
-    """Отправка запроса к модели через Router API."""
-    payload = {"model": model, "inputs": prompt, "options": {"use_cache": True}}
-    try:
-        start = time.time()
-        response = requests.post(ROUTER_URL, headers=HEADERS, json=payload, timeout=60)
-        latency = time.time() - start
-        if response.status_code == 200:
-            data = response.json()
-            # Универсальный парсинг ответа
-            if isinstance(data, list) and len(data) > 0 and "generated_text" in data[0]:
-                text = data[0]["generated_text"]
-            elif isinstance(data, dict) and "generated_text" in data:
-                text = data["generated_text"]
-            else:
-                text = str(data)
-            return text.strip(), latency, model, None
-        else:
-            return None, latency, model, f"Ошибка API {response.status_code}: {response.text}"
-    except Exception as e:
-        return None, 0, model, str(e)
-def generate_text(prompt: str):
-    """Основная функция: попытка через основную модель → fallback при ошибке."""
-    if not check_token():
-        return "❌ Токен Hugging Face недействителен. Проверьте переменную HF_TOKEN."
-    # 1️⃣ Основная модель
-    output, latency, used_model, error = send_request(PRIMARY_MODEL, prompt)
-    if output:
-        return render_output(output, used_model, latency, success=True)
-    # 2️⃣ Fallback при ошибке
-    output_fb, latency_fb, model_fb, error_fb = send_request(FALLBACK_MODEL, prompt)
-    if output_fb:
-        return render_output(output_fb, model_fb, latency_fb, success=True, fallback=True)
-    else:
-        return f"❌ Ошибка при выполнении запроса:\n- {error}\n- Fallback: {error_fb}"
-def render_output(text, model, latency, success=False, fallback=False):
-    """Форматированный вывод результата."""
-    emoji = "✅" if success else "⚠️"
-    fb_text = " (через fallback)" if fallback else ""
-    return (
-        f"{emoji} **Модель:** `{model}`{fb_text}\n"
-        f"⏱ **Время отклика:** {latency:.2f} сек\n\n"
-        f"🧠 **Ответ:**\n{text.strip()}"
-    )
-# =========================
-# 🧭 GRADIO UI
-# =========================
-with gr.Blocks(title="🤖 Eroha AgentAPI v5.7 — Stable Router Edition") as demo:
-    gr.Markdown("## 🧠 Eroha AgentAPI v5.7 — Stable Router Edition\n"
-                "Поддержка Router API + AutoFallback + Token Validation 🌐")
-    with gr.Row():
-        prompt = gr.Textbox(label="Введите запрос", placeholder="Например: 'Расскажи историю про ИИ, который научился понимать чувства.'", lines=3)
-    output = gr.Markdown(label="Ответ")
-    btn = gr.Button("🚀 Отправить", variant="primary")
-    btn.click(generate_text, inputs=prompt, outputs=output)
-    gr.Markdown("---")
-    gr.Markdown("🧩 **Eroha Router Core v5.7** | Автоопределение моделей + безопасный fallback")
-demo.launch(server_name="0.0.0.0", server_port=7860)

+"""
+Eroha AgentAPI v5.8 — AutoRecovery + Smart Dashboard Edition
+Production-grade архитектура для Hugging Face Spaces
+"""
 import os
+import asyncio
+import aiohttp
 import time
 from datetime import datetime
+import gradio as gr
+from fastapi import FastAPI
+from fastapi.responses import JSONResponse
+from transformers import pipeline
+import psutil
+# ==============================
+# CONFIGURATION
+# ==============================
+HF_TOKEN = os.getenv("HF_TOKEN", "")
+ROUTER_URL = "https://api-inference.huggingface.co/models"
+FALLBACK_MODEL = "sshleifer/tiny-gpt2"
+CHECK_INTERVAL = 300  # 5 min
+# ==============================
+# CORE CLASSES
+# ==============================
+class CircuitBreaker:
+    def __init__(self, threshold=3, timeout=60):
+        self.failures = 0
+        self.threshold = threshold
+        self.timeout = timeout
+        self.state = "CLOSED"
+        self.last_failure = 0
+    def allow(self):
+        if self.state == "OPEN" and time.time() - self.last_failure < self.timeout:
             return False
+        if self.state == "OPEN" and time.time() - self.last_failure >= self.timeout:
+            self.state = "HALF_OPEN"
+        return True
+    def record_success(self):
+        self.failures = 0
+        if self.state in ["HALF_OPEN", "OPEN"]:
+            self.state = "CLOSED"
+    def record_failure(self):
+        self.failures += 1
+        if self.failures >= self.threshold:
+            self.state = "OPEN"
+            self.last_failure = time.time()
+circuit = CircuitBreaker()
+class HFClient:
+    def __init__(self):
+        self.token = HF_TOKEN
+        self.headers = {"Authorization": f"Bearer {self.token}"} if self.token else {}
+        self.valid = False
+    async def validate(self):
+        try:
+            async with aiohttp.ClientSession() as s:
+                async with s.get("https://huggingface.co/api/whoami-v2", headers=self.headers) as r:
+                    self.valid = r.status == 200
+                    return self.valid
+        except:
+            self.valid = False
+            return False
+    async def infer(self, model, text):
+        if not circuit.allow():
+            return {"error": "Circuit breaker open — fallback engaged"}
+        try:
+            async with aiohttp.ClientSession() as s:
+                payload = {"inputs": text, "parameters": {"max_new_tokens": 250}}
+                async with s.post(f"{ROUTER_URL}/{model}", headers=self.headers, json=payload, timeout=30) as r:
+                    if r.status == 200:
+                        circuit.record_success()
+                        data = await r.json()
+                        return data
+                    else:
+                        circuit.record_failure()
+                        if r.status == 401:
+                            self.valid = False
+                        return {"error": f"Router error {r.status}"}
+        except Exception as e:
+            circuit.record_failure()
+            return {"error": str(e)}
+client = HFClient()
+class Fallback:
+    def __init__(self):
+        self.pipe = None
+        self.ready = False
+    async def load(self):
+        if not self.ready:
+            self.pipe = pipeline("text-generation", model=FALLBACK_MODEL)
+            self.ready = True
+    async def generate(self, text):
+        await self.load()
+        out = self.pipe(text, max_new_tokens=100)[0]["generated_text"]
+        return out
+fallback = Fallback()
+# ==============================
+# WATCHDOG
+# ==============================
+async def watchdog():
+    while True:
+        print(f"[{datetime.now().isoformat()}] 🔍 Watchdog check...")
+        await client.validate()
+        mem = psutil.virtual_memory().percent
+        if mem > 85:
+            print(f"⚠️ High memory usage: {mem}%")
+        await asyncio.sleep(CHECK_INTERVAL)
+# ==============================
+# FASTAPI CORE
+# ==============================
+app = FastAPI(title="Eroha AgentAPI v5.8")
+@app.on_event("startup")
+async def startup():
+    asyncio.create_task(watchdog())
+    await client.validate()
+@app.get("/health")
+async def health():
+    return JSONResponse({
+        "status": "ok" if client.valid else "degraded",
+        "circuit": circuit.state,
+        "memory": psutil.virtual_memory().percent,
+        "timestamp": datetime.now().isoformat()
+    })
+@app.post("/inference")
+async def inference(data: dict):
+    text = data.get("prompt", "")
+    model = data.get("model", "microsoft/phi-3-mini-4k-instruct")
+    res = await client.infer(model, text)
+    if "error" in res:
+        fb = await fallback.generate(text)
+        return {"source": "fallback", "response": fb, "note": res["error"]}
+    return {"source": "router", "response": res}
+# ==============================
+# GRADIO INTERFACE
+# ==============================
+def gradio_infer(prompt, model_choice):
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+    result = loop.run_until_complete(client.infer(model_choice, prompt))
+    loop.close()
+    if "error" in result:
+        fb = asyncio.run(fallback.generate(prompt))
+        return f"⚠️ Router failed ({result['error']})\n\n🧠 Fallback:\n{fb}"
+    if isinstance(result, list):
+        return result[0].get("generated_text", str(result))
+    return str(result)
+def show_dashboard():
+    mem = psutil.virtual_memory().percent
+    status = "✅ OK" if client.valid else "⚠️ Token Invalid"
+    return f"""
+### 🧠 Eroha AgentAPI Dashboard
+| Metric | Status |
+|--------|--------|
+| Token Valid | {status} |
+| Circuit | {circuit.state} |
+| Memory Usage | {mem}% |
+| Time | {datetime.now().strftime("%H:%M:%S")} |
+"""
+demo = gr.Blocks(title="Eroha AgentAPI v5.8 — AutoRecovery Edition")
+with demo:
+    gr.Markdown("# 🤖 Eroha AgentAPI v5.8 — AutoRecovery + Smart Dashboard")
+    with gr.Tab("💬 Chat"):
+        inp = gr.Textbox(label="Введите запрос")
+        model = gr.Dropdown(
+            ["microsoft/phi-3-mini-4k-instruct", "google/gemma-2-2b-it", "meta-llama/Meta-Llama-3-8B-Instruct"],
+            value="microsoft/phi-3-mini-4k-instruct", label="Модель"
+        )
+        out = gr.Textbox(label="Ответ")
+        btn = gr.Button("🚀 Отправить")
+        btn.click(fn=gradio_infer, inputs=[inp, model], outputs=out)
+    with gr.Tab("📊 Dashboard"):
+        dash = gr.Markdown()
+        refresh = gr.Button("🔄 Обновить состояние")
+        refresh.click(fn=show_dashboard, outputs=dash)
+        dash.value = show_dashboard()
+app = gr.mount_gradio_app(app, demo, path="/ui")
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)