Spaces:

Yermek68
/

eroha-agentapi

Sleeping

App Files Files Community

Yermek68 commited on Dec 20, 2025

Commit

db886be

verified ·

1 Parent(s): fcd4016

Update app.py

Browse files

Files changed (1) hide show

app.py +89 -138

app.py CHANGED Viewed

@@ -1,149 +1,100 @@
-import os, asyncio, aiohttp, time, psutil
-from datetime import datetime
 import gradio as gr
-from fastapi import FastAPI
-from fastapi.responses import PlainTextResponse
-from pydantic import BaseModel
-import uvicorn
 from gradio.routes import mount_gradio_app
-# === Detect HF Spaces ===
-IS_HF_SPACES = os.getenv("SPACE_ID") is not None
-# === Заглушки для core-модулей (чтобы не было ModuleNotFoundError) ===
-def log_alert(source: str, level: str, message: str):
-    print(f"[{level}] {source}: {message}")
-def save_metrics(data):
-    print(f"📊 METRICS (dummy): {data}")
-def failsafe(func):  # decorator stub
-    return func
-# === Константы ===
-PRIMARY_MODEL = "microsoft/phi-3-mini-4k-instruct"
-HF_TOKEN = os.getenv("HF_TOKEN", "")
-ROUTER_URL = "https://api-inference.huggingface.co/models"
-CHECK_INTERVAL = 180
-# === CircuitBreaker ===
-class CircuitBreaker:
-    def __init__(self, threshold=3, timeout=60):
-        self.failures, self.threshold, self.timeout = 0, threshold, timeout
-        self.state, self.last_failure = "CLOSED", 0
-    def allow(self):
-        if self.state == "OPEN" and time.time() - self.last_failure < self.timeout:
-            return False
-        if self.state == "OPEN" and time.time() - self.last_failure >= self.timeout:
-            self.state = "HALF_OPEN"
-        return True
-    def record_success(self):
-        self.failures, self.state = 0, "CLOSED"
-    def record_failure(self):
-        self.failures += 1
-        if self.failures >= self.threshold:
-            self.state, self.last_failure = "OPEN", time.time()
-circuit = CircuitBreaker()
-# === Hugging Face API клиент ===
-class HFClient:
-    def __init__(self):
-        self.token = HF_TOKEN
-        self.session = None
-        self.latency = 0
-        self.headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
-    async def get_session(self):
-        if not self.session or self.session.closed:
-            self.session = aiohttp.ClientSession()
-        return self.session
-    async def infer(self, model: str, text: str):
-        if not circuit.allow():
-            return {"error": "Circuit breaker open"}
-        try:
-            s = await self.get_session()
-            payload = {"inputs": text[:1000], "parameters": {"max_new_tokens": 100}}
-            start = time.time()
-            async with s.post(f"{ROUTER_URL}/{model}", headers=self.headers, json=payload) as r:
-                self.latency = round((time.time() - start) * 1000, 2)
-                if r.status == 200:
-                    circuit.record_success()
-                    return await r.json()
-                else:
-                    circuit.record_failure()
-                    return {"error": f"HTTP {r.status}"}
-        except Exception as e:
-            circuit.record_failure()
-            return {"error": str(e)}
-client = HFClient()
-# === FastAPI App ===
-app = FastAPI(title="Eroha AgentAPI v5.9.2 — Enterprise Edition")
-@app.on_event("startup")
-async def startup_event():
-    print("🚀 Eroha AgentAPI started.")
-    asyncio.create_task(watchdog())
 @app.get("/health")
 async def health():
-    return {"status": "ok", "circuit": circuit.state, "memory": psutil.virtual_memory().percent, "latency": client.latency}
-@app.get("/metrics", response_class=PlainTextResponse)
 async def metrics():
-    return f"circuit_state {circuit.state}\nmemory {psutil.virtual_memory().percent}\n"
-class InferenceRequest(BaseModel):
-    prompt: str
-    model: str = PRIMARY_MODEL
 @app.post("/inference")
-async def inference(req: InferenceRequest):
-    start = time.time()
-    result = await client.infer(req.model, req.prompt)
-    duration = int((time.time() - start) * 1000)
-    save_metrics({"latency_ms": duration})
-    return {"response": result, "duration_ms": duration}
-# === Watchdog ===
-async def watchdog():
-    while True:
-        await asyncio.sleep(CHECK_INTERVAL)
-        print(f"🩺 Watchdog OK at {datetime.now().strftime('%H:%M:%S')}")
-# === Gradio UI ===
-def gradio_infer(prompt: str, model_choice: str):
-    try:
-        result = asyncio.run(client.infer(model_choice, prompt))
-        if isinstance(result, list) and "generated_text" in result[0]:
-            return result[0]["generated_text"]
-        return str(result)
-    except Exception as e:
-        return f"❌ Error: {e}"
-def show_dashboard():
-    mem = psutil.virtual_memory().percent
-    return f"| Metric | Value |\n|--------|--------|\n| Circuit | {circuit.state} |\n| Memory | {mem}% |\n| Latency | {client.latency} ms |"
-demo = gr.Blocks(title="Eroha AgentAPI v5.9.2")
-with demo:
-    gr.Markdown("# 🤖 Eroha AgentAPI v5.9.2 — Enterprise Edition")
-    with gr.Tab("💬 Chat"):
-        inp = gr.Textbox(label="Введите запрос")
-        model = gr.Dropdown(["microsoft/phi-3-mini-4k-instruct", "google/gemma-2-2b-it"], value="microsoft/phi-3-mini-4k-instruct", label="Модель")
-        out = gr.Textbox(label="Ответ")
-        gr.Button("🚀 Отправить").click(fn=gradio_infer, inputs=[inp, model], outputs=out)
-    with gr.Tab("📊 Dashboard"):
-        dash = gr.Markdown(show_dashboard())
-        gr.Button("🔄 Обновить").click(fn=show_dashboard, outputs=dash)
-# === Финальный единый запуск ===
-if __name__ == "__main__":
-    app = mount_gradio_app(app, demo, path="/")
-    uvicorn.run(app, host="0.0.0.0", port=7860)

+"""
+Eroha v6.4.4 — Zero-Daemon Edition
+----------------------------------
+Production-grade FastAPI + Gradio fusion
+No threads, no leaks, 100% graceful lifecycle.
+"""
+import asyncio
+import psutil
 import gradio as gr
+from fastapi import FastAPI, Request
+from slowapi import Limiter, _rate_limit_exceeded_handler
+from slowapi.util import get_remote_address
+from slowapi.errors import RateLimitExceeded
 from gradio.routes import mount_gradio_app
+from contextlib import asynccontextmanager
+# ───────────────────────────────
+# 1️⃣ Global metrics state (event-loop safe)
+# ───────────────────────────────
+state = {"cpu": 0.0, "ram": 0.0, "timestamp": 0.0}
+# ───────────────────────────────
+# 2️⃣ Lifespan manager (async background task)
+# ───────────────────────────────
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    stop_event = asyncio.Event()
+    async def background_metrics():
+        while not stop_event.is_set():
+            try:
+                state["cpu"] = psutil.cpu_percent()
+                state["ram"] = psutil.virtual_memory().percent
+                state["timestamp"] = asyncio.get_event_loop().time()
+            except Exception as e:
+                print(f"[Metrics] error: {e}")
+            await asyncio.sleep(5)
+    task = asyncio.create_task(background_metrics())
+    yield  # Server runs here
+    stop_event.set()
+    await asyncio.gather(task, return_exceptions=True)
+# ───────────────────────────────
+# 3️⃣ FastAPI app with rate limiter
+# ───────────────────────────────
+app = FastAPI(title="Eroha v6.4.4 API", lifespan=lifespan)
+limiter = Limiter(key_func=get_remote_address)
+app.state.limiter = limiter
+app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
 @app.get("/health")
 async def health():
+    return {"status": "ok", "uptime": state["timestamp"]}
+@app.get("/metrics")
 async def metrics():
+    return state
 @app.post("/inference")
+@limiter.limit("10/minute")
+async def inference(request: Request):
+    data = await request.json()
+    prompt = data.get("prompt", "")
+    # simulate model call
+    await asyncio.sleep(0.1)
+    return {"reply": f"Echo: {prompt[:120]}", "stats": state}
+# ───────────────────────────────
+# 4️⃣ Gradio dashboard
+# ───────────────────────────────
+with gr.Blocks(title="Eroha v6.4.4 Dashboard") as demo:
+    gr.Markdown("## ⚙️ Eroha v6.4.4 – Zero-Daemon Edition")
+    with gr.Row():
+        inp = gr.Textbox(label="Prompt")
+        out = gr.Textbox(label="Response")
+        gr.Button("Send").click(lambda x: f"Echo: {x}", inputs=inp, outputs=out)
+    gr.Markdown("### 📊 Live Metrics (5s refresh)")
+    cpu_box = gr.Number(label="CPU %")
+    ram_box = gr.Number(label="RAM %")
+    demo.load(lambda: (state["cpu"], state["ram"]), outputs=[cpu_box, ram_box], every=5)
+# ───────────────────────────────
+# 5️⃣ Mount Gradio to FastAPI (single port)
+# ───────────────────────────────
+app = mount_gradio_app(app, demo, path="/")
+# No explicit uvicorn.run — HF Spaces handles launch automatically.