Yermek68 commited on
Commit
5c680d6
Β·
verified Β·
1 Parent(s): b2e3720

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -127
app.py CHANGED
@@ -1,148 +1,114 @@
1
  """
2
- Eroha v6.5 β€” Hybrid Enterprise Edition
3
- --------------------------------------
4
- Features: Async Lifespan, Prometheus Metrics, Proxy-Aware Limiter,
5
- Stable Health Monitor, and Gradio Dashboard.
6
  """
7
 
8
  import asyncio
 
9
  import psutil
10
- import httpx
11
- import gradio as gr
12
  from fastapi import FastAPI, Request
13
- from slowapi import Limiter, _rate_limit_exceeded_handler
14
  from slowapi.util import get_remote_address
15
- from slowapi.errors import RateLimitExceeded
 
16
  from gradio.routes import mount_gradio_app
17
- from contextlib import asynccontextmanager
18
- from prometheus_client import make_asgi_app, Counter, Gauge
19
-
20
- # ───────────────────────────────
21
- # 1️⃣ Enterprise Metrics & State
22
- # ───────────────────────────────
23
- # ΠœΠ΅Ρ‚Ρ€ΠΈΠΊΠΈ для Π²Π½Π΅ΡˆΠ½ΠΈΡ… систСм (Prometheus/Grafana)
24
- REQ_COUNT = Counter("api_requests_total", "Total requests", ["method", "endpoint"])
25
- SYS_USAGE = Gauge("system_usage_percent", "System metrics", ["resource"])
26
-
27
- # Π›ΠΎΠΊΠ°Π»ΡŒΠ½Ρ‹ΠΉ стСйт для Gradio UI
28
- state = {"cpu": 0.0, "ram": 0.0, "timestamp": 0.0}
29
-
30
- # ───────────────────────────────
31
- # 2️⃣ Proxy-Aware Real IP Limiter
32
- # ───────────────────────────────
33
  def get_real_ip(request: Request):
34
- """Π˜Π·Π²Π»Π΅ΠΊΠ°Π΅Ρ‚ Ρ€Π΅Π°Π»ΡŒΠ½Ρ‹ΠΉ IP ΠΏΠΎΠ»ΡŒΠ·ΠΎΠ²Π°Ρ‚Π΅Π»Ρ Π·Π° прокси (Hugging Face/Nginx)."""
35
  forwarded = request.headers.get("x-forwarded-for")
36
- if forwarded:
37
- return forwarded.split(",")[0]
38
- return request.client.host
39
 
40
  limiter = Limiter(key_func=get_real_ip)
41
 
42
- # ───────────────────────────────
43
- # 3️⃣ Lifespan Manager (Resource Control)
44
- # ───────────────────────────────
45
- @asynccontextmanager
46
- async def lifespan(app: FastAPI):
47
- stop_event = asyncio.Event()
48
-
49
- async def background_metrics():
50
- """Π€ΠΎΠ½ΠΎΠ²Ρ‹ΠΉ Ρ†ΠΈΠΊΠ» сбора Π΄Π°Π½Π½Ρ‹Ρ… (Π±Π΅Π· ΠΏΠΎΡ‚ΠΎΠΊΠΎΠ²)."""
51
- while not stop_event.is_set():
52
- try:
53
- cpu = psutil.cpu_percent()
54
- ram = psutil.virtual_memory().percent
55
-
56
- # ОбновлСниС для UI
57
- state["cpu"], state["ram"] = cpu, ram
58
- state["timestamp"] = asyncio.get_event_loop().time()
59
-
60
- # ОбновлСниС для Prometheus
61
- SYS_USAGE.labels(resource="cpu").set(cpu)
62
- SYS_USAGE.labels(resource="ram").set(ram)
63
-
64
- except Exception as e:
65
- print(f"[Metrics Error] {e}")
66
- await asyncio.sleep(15) # Π˜Π½Ρ‚Π΅Ρ€Π²Π°Π» для Production
67
-
68
- task = asyncio.create_task(background_metrics())
69
- yield # Π—Π΄Π΅ΡΡŒ Ρ€Π°Π±ΠΎΡ‚Π°Π΅Ρ‚ ΠΏΡ€ΠΈΠ»ΠΎΠΆΠ΅Π½ΠΈΠ΅
70
- stop_event.set()
71
- await asyncio.gather(task, return_exceptions=True)
72
-
73
- # ───────────────────────────────
74
- # 4️⃣ FastAPI Core Setup
75
- # ───────────────────────────────
76
- app = FastAPI(title="Eroha v6.5 Enterprise", lifespan=lifespan)
77
- app.state.limiter = limiter
78
- app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
79
-
80
- # ΠœΠΎΠ½Ρ‚ΠΈΡ€ΡƒΠ΅ΠΌ эндпоинт для Prometheus
81
- metrics_app = make_asgi_app()
82
- app.mount("/metrics/prom", metrics_app)
83
-
84
- @app.get("/health")
85
- async def health():
86
- return {"status": "ok", "uptime": state["timestamp"]}
87
-
88
- @app.get("/metrics")
89
- async def get_json_metrics():
90
- """Для ΠΎΠ±Ρ€Π°Ρ‚Π½ΠΎΠΉ совмСстимости с простыми Ρ‡Π΅ΠΊΠ΅Ρ€Π°ΠΌΠΈ."""
91
- return state
92
-
93
  @app.post("/inference")
94
- @limiter.limit("10/minute")
95
  async def inference(request: Request):
96
- REQ_COUNT.labels(method="POST", endpoint="/inference").inc()
97
  data = await request.json()
98
  prompt = data.get("prompt", "")
99
- await asyncio.sleep(0.1) # Π˜ΠΌΠΈΡ‚Π°Ρ†ΠΈΡ Ρ€Π°Π±ΠΎΡ‚Ρ‹ ΠΌΠΎΠ΄Π΅Π»ΠΈ
100
- return {"reply": f"Echo: {prompt[:120]}", "stats": state}
101
-
102
- # ───────────────────────────────
103
- # 5️⃣ Gradio Interface (UI)
104
- # ───────────────────────────────
105
- async def check_health_ui():
106
- """ΠŸΡ€ΠΎΠ²Π΅Ρ€ΠΊΠ° доступности API Ρ‡Π΅Ρ€Π΅Π· ΠΏΠ΅Ρ‚Π»ΡŽ (Self-health check)."""
107
- try:
108
- async with httpx.AsyncClient(timeout=1) as client:
109
- resp = await client.get("http://localhost:7860/health")
110
- if resp.status_code == 200:
111
- return "<div style='color:lime;font-size:18px;'>🟒 API ONLINE</div>"
112
- except:
113
- pass
114
- return "<div style='color:red;font-size:18px;'>πŸ”΄ API OFFLINE</div>"
115
-
116
- with gr.Blocks(title="Eroha v6.5 Dashboard", theme=gr.themes.Soft()) as demo:
117
- gr.Markdown("# βš™οΈ Eroha v6.5 β€” Hybrid Enterprise")
118
-
119
- with gr.Row():
120
- health_status = gr.HTML("<div style='font-size:18px;'>🟑 Checking...</div>")
121
-
122
- with gr.Tabs():
123
- with gr.TabItem("Inference"):
124
- with gr.Row():
125
- inp = gr.Textbox(label="Input Prompt", placeholder="Type here...")
126
- out = gr.Textbox(label="Model Response")
127
- btn = gr.Button("Run Inference", variant="primary")
128
- btn.click(lambda x: f"Processed: {x}", inputs=inp, outputs=out)
129
-
130
- with gr.TabItem("System Monitor"):
131
- gr.Markdown("### πŸ“Š Real-time Resource Usage")
132
- with gr.Row():
133
- cpu_box = gr.Number(label="CPU %")
134
- ram_box = gr.Number(label="RAM %")
135
- gr.Markdown("> Metrics are also exported to `/metrics/prom` for Prometheus.")
136
-
137
- # Авто-ΠΎΠ±Π½ΠΎΠ²Π»Π΅Π½ΠΈΠ΅ UI ΠΊΠ°ΠΆΠ΄Ρ‹Π΅ 5 сСкунд
138
- demo.load(check_health_ui, outputs=[health_status], every=5)
139
- demo.load(lambda: (state["cpu"], state["ram"]), outputs=[cpu_box, ram_box], every=5)
140
-
141
- # ───────────────────────────────
142
- # 6️⃣ Mounting & Launch
143
- # ───────────────────────────────
144
  app = mount_gradio_app(app, demo, path="/")
145
 
146
  if __name__ == "__main__":
147
  import uvicorn
148
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
  """
2
+ Eroha v6.5-ML Stable + Latency Patch (RC1)
3
+ Production-grade build: FastAPI + Gradio + Prometheus + Lazy Model
 
 
4
  """
5
 
6
  import asyncio
7
+ import time
8
  import psutil
 
 
9
  from fastapi import FastAPI, Request
10
+ from slowapi import Limiter
11
  from slowapi.util import get_remote_address
12
+ from prometheus_client import make_asgi_app, Counter, Gauge, Histogram
13
+ import gradio as gr
14
  from gradio.routes import mount_gradio_app
15
+ from transformers import pipeline
16
+
17
+ # ─────────────────────────────
18
+ # 1️⃣ ΠœΠ΅Ρ‚Ρ€ΠΈΠΊΠΈ ΠΈ Π»ΠΈΠΌΠΈΡ‚Π΅Ρ€
19
+ # ─────────────────────────────
20
+ REQ_COUNT = Counter("api_requests_total", "Total API requests", ["endpoint"])
21
+ SYS_USAGE = Gauge("system_usage_percent", "System resource usage", ["resource"])
22
+ INF_LATENCY = Histogram(
23
+ "inference_latency_seconds",
24
+ "Time spent in model inference",
25
+ buckets=[0.1, 0.5, 1, 2, 5, 10, float("inf")]
26
+ )
27
+ state = {"cpu": 0, "ram": 0, "model_ready": False}
28
+
 
 
29
  def get_real_ip(request: Request):
 
30
  forwarded = request.headers.get("x-forwarded-for")
31
+ return forwarded.split(",")[0] if forwarded else request.client.host
 
 
32
 
33
  limiter = Limiter(key_func=get_real_ip)
34
 
35
+ # ─────────────────────────────
36
+ # 2️⃣ МодСль (Lazy Singleton)
37
+ # ───────────────────────��─────
38
+ class ErohaModel:
39
+ pipe = None
40
+
41
+ @classmethod
42
+ def get_pipe(cls):
43
+ if cls.pipe is None:
44
+ cls.pipe = pipeline("text-generation", model="gpt2")
45
+ state["model_ready"] = True
46
+ return cls.pipe
47
+
48
+ # ─────────────────────────────
49
+ # 3️⃣ ΠœΠΎΠ½ΠΈΡ‚ΠΎΡ€ΠΈΠ½Π³ (Ρ„ΠΎΠ½)
50
+ # ─────────────────────────────
51
+ async def monitor():
52
+ while True:
53
+ state["cpu"] = psutil.cpu_percent()
54
+ state["ram"] = psutil.virtual_memory().percent
55
+ SYS_USAGE.labels(resource="cpu").set(state["cpu"])
56
+ SYS_USAGE.labels(resource="ram").set(state["ram"])
57
+ await asyncio.sleep(15)
58
+
59
+ app = FastAPI(on_startup=[lambda: asyncio.create_task(monitor())])
60
+ app.mount("/metrics/prom", make_asgi_app())
61
+
62
+ # ─────────────────────────────
63
+ # 4️⃣ Π­Π½Π΄ΠΏΠΎΠΈΠ½Ρ‚Ρ‹ API
64
+ # ─────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  @app.post("/inference")
66
+ @limiter.limit("5/minute")
67
  async def inference(request: Request):
68
+ REQ_COUNT.labels(endpoint="/inference").inc()
69
  data = await request.json()
70
  prompt = data.get("prompt", "")
71
+
72
+ loop = asyncio.get_event_loop()
73
+
74
+ start_time = time.perf_counter()
75
+ with INF_LATENCY.time():
76
+ res = await loop.run_in_executor(
77
+ None, lambda: ErohaModel.get_pipe()(prompt, max_length=50)
78
+ )
79
+ latency = time.perf_counter() - start_time
80
+
81
+ return {
82
+ "result": res[0]['generated_text'],
83
+ "latency_sec": round(latency, 3),
84
+ "cpu": state["cpu"],
85
+ "ram": state["ram"]
86
+ }
87
+
88
+ @app.get("/health")
89
+ async def health():
90
+ return {"status": "ok", "model_loaded": state["model_ready"]}
91
+
92
+ # ─────────────────────────────
93
+ # 5️⃣ Π˜Π½Ρ‚Π΅Ρ€Ρ„Π΅ΠΉΡ (Gradio)
94
+ # ─────────────────────────────
95
+ with gr.Blocks(title="Eroha v6.5-ML Stable") as demo:
96
+ gr.Markdown("# βš™οΈ Eroha v6.5-ML Stable")
97
+ prompt = gr.Textbox(label="Input Prompt", placeholder="Type something...")
98
+ output = gr.Textbox(label="Model Output")
99
+ latency_box = gr.Number(label="Latency (sec)")
100
+
101
+ def run_inference(text):
102
+ start = time.perf_counter()
103
+ res = ErohaModel.get_pipe()(text, max_length=50)[0]['generated_text']
104
+ latency = time.perf_counter() - start
105
+ return res, round(latency, 3)
106
+
107
+ btn = gr.Button("Generate")
108
+ btn.click(run_inference, inputs=prompt, outputs=[output, latency_box])
109
+
 
 
 
 
 
 
110
  app = mount_gradio_app(app, demo, path="/")
111
 
112
  if __name__ == "__main__":
113
  import uvicorn
114
+ uvicorn.run(app, host="0.0.0.0", port=7860)