Spaces:

Yermek68
/

eroha-agentapi

Running

App Files Files Community

Yermek68 commited on 26 days ago

Commit

5c680d6

verified ·

1 Parent(s): b2e3720

Update app.py

Browse files

Files changed (1) hide show

app.py +93 -127

app.py CHANGED Viewed

@@ -1,148 +1,114 @@
 """
-Eroha v6.5 — Hybrid Enterprise Edition
---------------------------------------
-Features: Async Lifespan, Prometheus Metrics, Proxy-Aware Limiter,
-Stable Health Monitor, and Gradio Dashboard.
 """
 import asyncio
 import psutil
-import httpx
-import gradio as gr
 from fastapi import FastAPI, Request
-from slowapi import Limiter, _rate_limit_exceeded_handler
 from slowapi.util import get_remote_address
-from slowapi.errors import RateLimitExceeded
 from gradio.routes import mount_gradio_app
-from contextlib import asynccontextmanager
-from prometheus_client import make_asgi_app, Counter, Gauge
-# ───────────────────────────────
-# 1️⃣ Enterprise Metrics & State
-# ───────────────────────────────
-# Метрики для внешних систем (Prometheus/Grafana)
-REQ_COUNT = Counter("api_requests_total", "Total requests", ["method", "endpoint"])
-SYS_USAGE = Gauge("system_usage_percent", "System metrics", ["resource"])
-# Локальный стейт для Gradio UI
-state = {"cpu": 0.0, "ram": 0.0, "timestamp": 0.0}
-# ───────────────────────────────
-# 2️⃣ Proxy-Aware Real IP Limiter
-# ───────────────────────────────
 def get_real_ip(request: Request):
-    """Извлекает реальный IP пользователя за прокси (Hugging Face/Nginx)."""
     forwarded = request.headers.get("x-forwarded-for")
-    if forwarded:
-        return forwarded.split(",")[0]
-    return request.client.host
 limiter = Limiter(key_func=get_real_ip)
-# ───────────────────────────────
-# 3️⃣ Lifespan Manager (Resource Control)
-# ───────────────────────────────
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    stop_event = asyncio.Event()
-    async def background_metrics():
-        """Фоновый цикл сбора данных (без потоков)."""
-        while not stop_event.is_set():
-            try:
-                cpu = psutil.cpu_percent()
-                ram = psutil.virtual_memory().percent
-                # Обновление для UI
-                state["cpu"], state["ram"] = cpu, ram
-                state["timestamp"] = asyncio.get_event_loop().time()
-                # Обновление для Prometheus
-                SYS_USAGE.labels(resource="cpu").set(cpu)
-                SYS_USAGE.labels(resource="ram").set(ram)
-            except Exception as e:
-                print(f"[Metrics Error] {e}")
-            await asyncio.sleep(15)  # Интервал для Production
-    task = asyncio.create_task(background_metrics())
-    yield  # Здесь работает приложение
-    stop_event.set()
-    await asyncio.gather(task, return_exceptions=True)
-# ───────────────────────────────
-# 4️⃣ FastAPI Core Setup
-# ───────────────────────────────
-app = FastAPI(title="Eroha v6.5 Enterprise", lifespan=lifespan)
-app.state.limiter = limiter
-app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
-# Монтируем эндпоинт для Prometheus
-metrics_app = make_asgi_app()
-app.mount("/metrics/prom", metrics_app)
-@app.get("/health")
-async def health():
-    return {"status": "ok", "uptime": state["timestamp"]}
-@app.get("/metrics")
-async def get_json_metrics():
-    """Для обратной совместимости с простыми чекерами."""
-    return state
 @app.post("/inference")
-@limiter.limit("10/minute")
 async def inference(request: Request):
-    REQ_COUNT.labels(method="POST", endpoint="/inference").inc()
     data = await request.json()
     prompt = data.get("prompt", "")
-    await asyncio.sleep(0.1)  # Имитация работы модели
-    return {"reply": f"Echo: {prompt[:120]}", "stats": state}
-# ───────────────────────────────
-# 5️⃣ Gradio Interface (UI)
-# ───────────────────────────────
-async def check_health_ui():
-    """Проверка доступности API через петлю (Self-health check)."""
-    try:
-        async with httpx.AsyncClient(timeout=1) as client:
-            resp = await client.get("http://localhost:7860/health")
-            if resp.status_code == 200:
-                return "<div style='color:lime;font-size:18px;'>🟢 API ONLINE</div>"
-    except:
-        pass
-    return "<div style='color:red;font-size:18px;'>🔴 API OFFLINE</div>"
-with gr.Blocks(title="Eroha v6.5 Dashboard", theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# ⚙️ Eroha v6.5 — Hybrid Enterprise")
-    with gr.Row():
-        health_status = gr.HTML("<div style='font-size:18px;'>🟡 Checking...</div>")
-    with gr.Tabs():
-        with gr.TabItem("Inference"):
-            with gr.Row():
-                inp = gr.Textbox(label="Input Prompt", placeholder="Type here...")
-                out = gr.Textbox(label="Model Response")
-            btn = gr.Button("Run Inference", variant="primary")
-            btn.click(lambda x: f"Processed: {x}", inputs=inp, outputs=out)
-        with gr.TabItem("System Monitor"):
-            gr.Markdown("### 📊 Real-time Resource Usage")
-            with gr.Row():
-                cpu_box = gr.Number(label="CPU %")
-                ram_box = gr.Number(label="RAM %")
-            gr.Markdown("> Metrics are also exported to `/metrics/prom` for Prometheus.")
-    # Авто-обновление UI каждые 5 секунд
-    demo.load(check_health_ui, outputs=[health_status], every=5)
-    demo.load(lambda: (state["cpu"], state["ram"]), outputs=[cpu_box, ram_box], every=5)
-# ───────────────────────────────
-# 6️⃣ Mounting & Launch
-# ───────────────────────────────
 app = mount_gradio_app(app, demo, path="/")
 if __name__ == "__main__":
     import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=7860)

 """
+Eroha v6.5-ML Stable + Latency Patch (RC1)
+Production-grade build: FastAPI + Gradio + Prometheus + Lazy Model
 """
 import asyncio
+import time
 import psutil
 from fastapi import FastAPI, Request
+from slowapi import Limiter
 from slowapi.util import get_remote_address
+from prometheus_client import make_asgi_app, Counter, Gauge, Histogram
+import gradio as gr
 from gradio.routes import mount_gradio_app
+from transformers import pipeline
+# ─────────────────────────────
+# 1️⃣ Метрики и лимитер
+# ─────────────────────────────
+REQ_COUNT = Counter("api_requests_total", "Total API requests", ["endpoint"])
+SYS_USAGE = Gauge("system_usage_percent", "System resource usage", ["resource"])
+INF_LATENCY = Histogram(
+    "inference_latency_seconds",
+    "Time spent in model inference",
+    buckets=[0.1, 0.5, 1, 2, 5, 10, float("inf")]
+)
+state = {"cpu": 0, "ram": 0, "model_ready": False}
 def get_real_ip(request: Request):
     forwarded = request.headers.get("x-forwarded-for")
+    return forwarded.split(",")[0] if forwarded else request.client.host
 limiter = Limiter(key_func=get_real_ip)
+# ─────────────────────────────
+# 2️⃣ Модель (Lazy Singleton)
+# ───────────────────────��─────
+class ErohaModel:
+    pipe = None
+    @classmethod
+    def get_pipe(cls):
+        if cls.pipe is None:
+            cls.pipe = pipeline("text-generation", model="gpt2")
+            state["model_ready"] = True
+        return cls.pipe
+# ─────────────────────────────
+# 3️⃣ Мониторинг (фон)
+# ─────────────────────────────
+async def monitor():
+    while True:
+        state["cpu"] = psutil.cpu_percent()
+        state["ram"] = psutil.virtual_memory().percent
+        SYS_USAGE.labels(resource="cpu").set(state["cpu"])
+        SYS_USAGE.labels(resource="ram").set(state["ram"])
+        await asyncio.sleep(15)
+app = FastAPI(on_startup=[lambda: asyncio.create_task(monitor())])
+app.mount("/metrics/prom", make_asgi_app())
+# ─────────────────────────────
+# 4️⃣ Эндпоинты API
+# ─────────────────────────────
 @app.post("/inference")
+@limiter.limit("5/minute")
 async def inference(request: Request):
+    REQ_COUNT.labels(endpoint="/inference").inc()
     data = await request.json()
     prompt = data.get("prompt", "")
+    loop = asyncio.get_event_loop()
+    start_time = time.perf_counter()
+    with INF_LATENCY.time():
+        res = await loop.run_in_executor(
+            None, lambda: ErohaModel.get_pipe()(prompt, max_length=50)
+        )
+    latency = time.perf_counter() - start_time
+    return {
+        "result": res[0]['generated_text'],
+        "latency_sec": round(latency, 3),
+        "cpu": state["cpu"],
+        "ram": state["ram"]
+    }
+@app.get("/health")
+async def health():
+    return {"status": "ok", "model_loaded": state["model_ready"]}
+# ─────────────────────────────
+# 5️⃣ Интерфейс (Gradio)
+# ─────────────────────────────
+with gr.Blocks(title="Eroha v6.5-ML Stable") as demo:
+    gr.Markdown("# ⚙️ Eroha v6.5-ML Stable")
+    prompt = gr.Textbox(label="Input Prompt", placeholder="Type something...")
+    output = gr.Textbox(label="Model Output")
+    latency_box = gr.Number(label="Latency (sec)")
+    def run_inference(text):
+        start = time.perf_counter()
+        res = ErohaModel.get_pipe()(text, max_length=50)[0]['generated_text']
+        latency = time.perf_counter() - start
+        return res, round(latency, 3)
+    btn = gr.Button("Generate")
+    btn.click(run_inference, inputs=prompt, outputs=[output, latency_box])
 app = mount_gradio_app(app, demo, path="/")
 if __name__ == "__main__":
     import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)