Yermek68 commited on
Commit
6860acf
Β·
verified Β·
1 Parent(s): ab81e0d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -92
app.py CHANGED
@@ -1,12 +1,13 @@
1
  """
2
- Eroha v6.4.4 β€” Zero-Daemon Edition
3
- ----------------------------------
4
- Production-grade FastAPI + Gradio fusion
5
- No threads, no leaks, 100% graceful lifecycle.
6
  """
7
 
8
  import asyncio
9
  import psutil
 
10
  import gradio as gr
11
  from fastapi import FastAPI, Request
12
  from slowapi import Limiter, _rate_limit_exceeded_handler
@@ -14,149 +15,134 @@ from slowapi.util import get_remote_address
14
  from slowapi.errors import RateLimitExceeded
15
  from gradio.routes import mount_gradio_app
16
  from contextlib import asynccontextmanager
17
-
18
 
19
  # ───────────────────────────────
20
- # 1️⃣ Global metrics state (event-loop safe)
21
  # ───────────────────────────────
 
 
 
 
 
22
  state = {"cpu": 0.0, "ram": 0.0, "timestamp": 0.0}
23
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  # ───────────────────────────────
26
- # 2️⃣ Lifespan manager (async background task)
27
  # ───────────────────────────────
28
  @asynccontextmanager
29
  async def lifespan(app: FastAPI):
30
  stop_event = asyncio.Event()
31
 
32
  async def background_metrics():
 
33
  while not stop_event.is_set():
34
  try:
35
- state["cpu"] = psutil.cpu_percent()
36
- state["ram"] = psutil.virtual_memory().percent
 
 
 
37
  state["timestamp"] = asyncio.get_event_loop().time()
 
 
 
 
 
38
  except Exception as e:
39
- print(f"[Metrics] error: {e}")
40
- await asyncio.sleep(5)
41
 
42
  task = asyncio.create_task(background_metrics())
43
- yield # Server runs here
44
  stop_event.set()
45
  await asyncio.gather(task, return_exceptions=True)
46
 
47
-
48
  # ───────────────────────────────
49
- # 3️⃣ FastAPI app with rate limiter
50
  # ───────────────────────────────
51
- app = FastAPI(title="Eroha v6.4.4 API", lifespan=lifespan)
52
- limiter = Limiter(key_func=get_remote_address)
53
  app.state.limiter = limiter
54
  app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
55
 
 
 
 
56
 
57
  @app.get("/health")
58
  async def health():
59
  return {"status": "ok", "uptime": state["timestamp"]}
60
 
61
-
62
  @app.get("/metrics")
63
- async def metrics():
 
64
  return state
65
 
66
-
67
  @app.post("/inference")
68
  @limiter.limit("10/minute")
69
  async def inference(request: Request):
 
70
  data = await request.json()
71
  prompt = data.get("prompt", "")
72
- # simulate model call
73
- await asyncio.sleep(0.1)
74
  return {"reply": f"Echo: {prompt[:120]}", "stats": state}
75
 
76
-
77
  # ─────────���─────────────────────
78
- # 4️⃣ Gradio dashboard
79
  # ───────────────────────────────
80
- with gr.Blocks(title="Eroha v6.4.4 Dashboard") as demo:
81
- gr.Markdown("## βš™οΈ Eroha v6.4.4 – Zero-Daemon Edition")
82
-
83
- with gr.Row():
84
- inp = gr.Textbox(label="Prompt")
85
- out = gr.Textbox(label="Response")
86
- gr.Button("Send").click(lambda x: f"Echo: {x}", inputs=inp, outputs=out)
87
-
88
- gr.Markdown("### πŸ“Š Live Metrics (5s refresh)")
89
- cpu_box = gr.Number(label="CPU %")
90
- ram_box = gr.Number(label="RAM %")
91
-
92
- demo.load(lambda: (state["cpu"], state["ram"]), outputs=[cpu_box, ram_box], every=5)
93
-
94
- import httpx
95
-
96
- # 4️⃣b Health Status Checker
97
- async def check_health():
98
- """ΠŸΡ€ΠΎΠ²Π΅Ρ€ΠΊΠ° состояния API."""
99
  try:
100
- async with httpx.AsyncClient(timeout=2) as client:
101
  resp = await client.get("http://localhost:7860/health")
102
- if resp.status_code == 200 and resp.json().get("status") == "ok":
103
- return "🟒 ONLINE"
104
- except Exception:
105
  pass
106
- return "πŸ”΄ OFFLINE"
107
-
108
- # ДобавляСм Π² интСрфСйс Gradio
109
- with gr.Blocks(title="Eroha v6.4.5 Dashboard") as demo:
110
- gr.Markdown("## βš™οΈ Eroha v6.4.5 – Stable Health Edition")
111
 
 
 
 
112
  with gr.Row():
113
- with gr.Column():
114
- inp = gr.Textbox(label="Prompt")
115
- btn = gr.Button("Send")
116
- out = gr.Textbox(label="Response")
117
 
118
- btn.click(fn=lambda x: f"Echo: {x[:200]}", inputs=inp, outputs=out)
119
-
120
- # 🩺 System Health Monitor
121
- gr.Markdown("### 🩺 System Health Monitor")
122
-
123
- with gr.Row():
124
- with gr.Column(scale=1):
125
- health_indicator = gr.HTML("<div style='font-size:20px;'>🟑 Checking...</div>")
126
-
127
- # Асинхронная функция ΠΏΡ€ΠΎΠ²Π΅Ρ€ΠΊΠΈ состояния API
128
- async def check_health_ui():
129
- try:
130
- async with httpx.AsyncClient(timeout=2) as client:
131
- resp = await client.get("http://localhost:7860/health")
132
- if resp.status_code == 200 and resp.json().get("status") == "ok":
133
- return "<div style='color:lime;font-size:20px;'>🟒 ONLINE</div>"
134
- except Exception:
135
- pass
136
- return "<div style='color:red;font-size:20px;'>πŸ”΄ OFFLINE</div>"
137
-
138
- # АвтообновлСниС статуса API ΠΊΠ°ΠΆΠ΄Ρ‹Π΅ 5 сСкунд
139
- demo.load(check_health_ui, outputs=[health_indicator], every=5)
140
-
141
- # πŸ“Š Live Metrics (5s refresh)
142
- gr.Markdown("### πŸ“Š Live Metrics (5s refresh)")
143
- cpu_box = gr.Number(label="CPU %")
144
- ram_box = gr.Number(label="RAM %")
145
-
146
- # АвтообновлСниС ΠΌΠ΅Ρ‚Ρ€ΠΈΠΊ
147
- demo.load(lambda: (state["cpu"], state["ram"]), outputs=[cpu_box, ram_box], every=5)
148
 
149
  # ───────────────────────────────
150
- # 5️⃣ Mount Gradio to FastAPI (single port)
151
  # ───────────────────────────────
152
  app = mount_gradio_app(app, demo, path="/")
153
 
154
- # No explicit uvicorn.run β€” HF Spaces handles launch automatically.
155
-
156
- # ───────────────────────────────
157
- # 6️⃣ Entrypoint для Hugging Face & Local
158
- # ───────────────────────────────
159
  if __name__ == "__main__":
160
  import uvicorn
161
- # Π’Π°ΠΆΠ½ΠΎ: οΏ½οΏ½ΡΠΏΠΎΠ»ΡŒΠ·ΡƒΠ΅ΠΌ строку "app:app" для ΠΏΠΎΠ΄Π΄Π΅Ρ€ΠΆΠΊΠΈ reload ΠΈ ΠΊΠΎΡ€Ρ€Π΅ΠΊΡ‚Π½ΠΎΠΉ Ρ€Π°Π±ΠΎΡ‚Ρ‹ lifespan
162
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
  """
2
+ Eroha v6.5 β€” Hybrid Enterprise Edition
3
+ --------------------------------------
4
+ Features: Async Lifespan, Prometheus Metrics, Proxy-Aware Limiter,
5
+ Stable Health Monitor, and Gradio Dashboard.
6
  """
7
 
8
  import asyncio
9
  import psutil
10
+ import httpx
11
  import gradio as gr
12
  from fastapi import FastAPI, Request
13
  from slowapi import Limiter, _rate_limit_exceeded_handler
 
15
  from slowapi.errors import RateLimitExceeded
16
  from gradio.routes import mount_gradio_app
17
  from contextlib import asynccontextmanager
18
+ from prometheus_client import make_asgi_app, Counter, Gauge
19
 
20
  # ───────────────────────────────
21
+ # 1️⃣ Enterprise Metrics & State
22
  # ───────────────────────────────
23
+ # ΠœΠ΅Ρ‚Ρ€ΠΈΠΊΠΈ для Π²Π½Π΅ΡˆΠ½ΠΈΡ… систСм (Prometheus/Grafana)
24
+ REQ_COUNT = Counter("api_requests_total", "Total requests", ["method", "endpoint"])
25
+ SYS_USAGE = Gauge("system_usage_percent", "System metrics", ["resource"])
26
+
27
+ # Π›ΠΎΠΊΠ°Π»ΡŒΠ½Ρ‹ΠΉ стСйт для Gradio UI
28
  state = {"cpu": 0.0, "ram": 0.0, "timestamp": 0.0}
29
 
30
+ # ───────────────────────────────
31
+ # 2️⃣ Proxy-Aware Real IP Limiter
32
+ # ───────────────────────────────
33
+ def get_real_ip(request: Request):
34
+ """Π˜Π·Π²Π»Π΅ΠΊΠ°Π΅Ρ‚ Ρ€Π΅Π°Π»ΡŒΠ½Ρ‹ΠΉ IP ΠΏΠΎΠ»ΡŒΠ·ΠΎΠ²Π°Ρ‚Π΅Π»Ρ Π·Π° прокси (Hugging Face/Nginx)."""
35
+ forwarded = request.headers.get("x-forwarded-for")
36
+ if forwarded:
37
+ return forwarded.split(",")[0]
38
+ return request.client.host
39
+
40
+ limiter = Limiter(key_func=get_real_ip)
41
 
42
  # ───────────────────────────────
43
+ # 3️⃣ Lifespan Manager (Resource Control)
44
  # ───────────────────────────────
45
  @asynccontextmanager
46
  async def lifespan(app: FastAPI):
47
  stop_event = asyncio.Event()
48
 
49
  async def background_metrics():
50
+ """Π€ΠΎΠ½ΠΎΠ²Ρ‹ΠΉ Ρ†ΠΈΠΊΠ» сбора Π΄Π°Π½Π½Ρ‹Ρ… (Π±Π΅Π· ΠΏΠΎΡ‚ΠΎΠΊΠΎΠ²)."""
51
  while not stop_event.is_set():
52
  try:
53
+ cpu = psutil.cpu_percent()
54
+ ram = psutil.virtual_memory().percent
55
+
56
+ # ОбновлСниС для UI
57
+ state["cpu"], state["ram"] = cpu, ram
58
  state["timestamp"] = asyncio.get_event_loop().time()
59
+
60
+ # ОбновлСниС для Prometheus
61
+ SYS_USAGE.labels(resource="cpu").set(cpu)
62
+ SYS_USAGE.labels(resource="ram").set(ram)
63
+
64
  except Exception as e:
65
+ print(f"[Metrics Error] {e}")
66
+ await asyncio.sleep(15) # Π˜Π½Ρ‚Π΅Ρ€Π²Π°Π» для Production
67
 
68
  task = asyncio.create_task(background_metrics())
69
+ yield # Π—Π΄Π΅ΡΡŒ Ρ€Π°Π±ΠΎΡ‚Π°Π΅Ρ‚ ΠΏΡ€ΠΈΠ»ΠΎΠΆΠ΅Π½ΠΈΠ΅
70
  stop_event.set()
71
  await asyncio.gather(task, return_exceptions=True)
72
 
 
73
  # ───────────────────────────────
74
+ # 4️⃣ FastAPI Core Setup
75
  # ───────────────────────────────
76
+ app = FastAPI(title="Eroha v6.5 Enterprise", lifespan=lifespan)
 
77
  app.state.limiter = limiter
78
  app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
79
 
80
+ # ΠœΠΎΠ½Ρ‚ΠΈΡ€ΡƒΠ΅ΠΌ эндпоинт для Prometheus
81
+ metrics_app = make_asgi_app()
82
+ app.mount("/metrics/prom", metrics_app)
83
 
84
  @app.get("/health")
85
  async def health():
86
  return {"status": "ok", "uptime": state["timestamp"]}
87
 
 
88
  @app.get("/metrics")
89
+ async def get_json_metrics():
90
+ """Для ΠΎΠ±Ρ€Π°Ρ‚Π½ΠΎΠΉ совмСстимости с простыми Ρ‡Π΅ΠΊΠ΅Ρ€Π°ΠΌΠΈ."""
91
  return state
92
 
 
93
  @app.post("/inference")
94
  @limiter.limit("10/minute")
95
  async def inference(request: Request):
96
+ REQ_COUNT.labels(method="POST", endpoint="/inference").inc()
97
  data = await request.json()
98
  prompt = data.get("prompt", "")
99
+ await asyncio.sleep(0.1) # Π˜ΠΌΠΈΡ‚Π°Ρ†ΠΈΡ Ρ€Π°Π±ΠΎΡ‚Ρ‹ ΠΌΠΎΠ΄Π΅Π»ΠΈ
 
100
  return {"reply": f"Echo: {prompt[:120]}", "stats": state}
101
 
 
102
  # ─────────���─────────────────────
103
+ # 5️⃣ Gradio Interface (UI)
104
  # ───────────────────────────────
105
+ async def check_health_ui():
106
+ """ΠŸΡ€ΠΎΠ²Π΅Ρ€ΠΊΠ° доступности API Ρ‡Π΅Ρ€Π΅Π· ΠΏΠ΅Ρ‚Π»ΡŽ (Self-health check)."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  try:
108
+ async with httpx.AsyncClient(timeout=1) as client:
109
  resp = await client.get("http://localhost:7860/health")
110
+ if resp.status_code == 200:
111
+ return "<div style='color:lime;font-size:18px;'>🟒 API ONLINE</div>"
112
+ except:
113
  pass
114
+ return "<div style='color:red;font-size:18px;'>πŸ”΄ API OFFLINE</div>"
 
 
 
 
115
 
116
+ with gr.Blocks(title="Eroha v6.5 Dashboard", theme=gr.themes.Soft()) as demo:
117
+ gr.Markdown("# βš™οΈ Eroha v6.5 β€” Hybrid Enterprise")
118
+
119
  with gr.Row():
120
+ health_status = gr.HTML("<div style='font-size:18px;'>🟑 Checking...</div>")
 
 
 
121
 
122
+ with gr.Tabs():
123
+ with gr.TabItem("Inference"):
124
+ with gr.Row():
125
+ inp = gr.Textbox(label="Input Prompt", placeholder="Type here...")
126
+ out = gr.Textbox(label="Model Response")
127
+ btn = gr.Button("Run Inference", variant="primary")
128
+ btn.click(lambda x: f"Processed: {x}", inputs=inp, outputs=out)
129
+
130
+ with gr.TabItem("System Monitor"):
131
+ gr.Markdown("### πŸ“Š Real-time Resource Usage")
132
+ with gr.Row():
133
+ cpu_box = gr.Number(label="CPU %")
134
+ ram_box = gr.Number(label="RAM %")
135
+ gr.Markdown("> Metrics are also exported to `/metrics/prom` for Prometheus.")
136
+
137
+ # Авто-ΠΎΠ±Π½ΠΎΠ²Π»Π΅Π½ΠΈΠ΅ UI ΠΊΠ°ΠΆΠ΄Ρ‹Π΅ 5 сСкунд
138
+ demo.load(check_health_ui, outputs=[health_status], every=5)
139
+ demo.load(lambda: (state["cpu"], state["ram"]), outputs=[cpu_box, ram_box], every=5)
 
 
 
 
 
 
 
 
 
 
 
 
140
 
141
  # ───────────────────────────────
142
+ # 6️⃣ Mounting & Launch
143
  # ───────────────────────────────
144
  app = mount_gradio_app(app, demo, path="/")
145
 
 
 
 
 
 
146
  if __name__ == "__main__":
147
  import uvicorn
 
148
  uvicorn.run(app, host="0.0.0.0", port=7860)