Yermek68 commited on
Commit
db886be
Β·
verified Β·
1 Parent(s): fcd4016

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -138
app.py CHANGED
@@ -1,149 +1,100 @@
1
- import os, asyncio, aiohttp, time, psutil
2
- from datetime import datetime
 
 
 
 
 
 
 
3
  import gradio as gr
4
- from fastapi import FastAPI
5
- from fastapi.responses import PlainTextResponse
6
- from pydantic import BaseModel
7
- import uvicorn
8
  from gradio.routes import mount_gradio_app
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- # === Detect HF Spaces ===
11
- IS_HF_SPACES = os.getenv("SPACE_ID") is not None
12
-
13
- # === Π—Π°Π³Π»ΡƒΡˆΠΊΠΈ для core-ΠΌΠΎΠ΄ΡƒΠ»Π΅ΠΉ (Ρ‡Ρ‚ΠΎΠ±Ρ‹ Π½Π΅ Π±Ρ‹Π»ΠΎ ModuleNotFoundError) ===
14
- def log_alert(source: str, level: str, message: str):
15
- print(f"[{level}] {source}: {message}")
16
-
17
- def save_metrics(data):
18
- print(f"πŸ“Š METRICS (dummy): {data}")
19
-
20
- def failsafe(func): # decorator stub
21
- return func
22
-
23
- # === ΠšΠΎΠ½ΡΡ‚Π°Π½Ρ‚Ρ‹ ===
24
- PRIMARY_MODEL = "microsoft/phi-3-mini-4k-instruct"
25
- HF_TOKEN = os.getenv("HF_TOKEN", "")
26
- ROUTER_URL = "https://api-inference.huggingface.co/models"
27
- CHECK_INTERVAL = 180
28
-
29
- # === CircuitBreaker ===
30
- class CircuitBreaker:
31
- def __init__(self, threshold=3, timeout=60):
32
- self.failures, self.threshold, self.timeout = 0, threshold, timeout
33
- self.state, self.last_failure = "CLOSED", 0
34
-
35
- def allow(self):
36
- if self.state == "OPEN" and time.time() - self.last_failure < self.timeout:
37
- return False
38
- if self.state == "OPEN" and time.time() - self.last_failure >= self.timeout:
39
- self.state = "HALF_OPEN"
40
- return True
41
-
42
- def record_success(self):
43
- self.failures, self.state = 0, "CLOSED"
44
-
45
- def record_failure(self):
46
- self.failures += 1
47
- if self.failures >= self.threshold:
48
- self.state, self.last_failure = "OPEN", time.time()
49
-
50
- circuit = CircuitBreaker()
51
-
52
- # === Hugging Face API ΠΊΠ»ΠΈΠ΅Π½Ρ‚ ===
53
- class HFClient:
54
- def __init__(self):
55
- self.token = HF_TOKEN
56
- self.session = None
57
- self.latency = 0
58
- self.headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
59
-
60
- async def get_session(self):
61
- if not self.session or self.session.closed:
62
- self.session = aiohttp.ClientSession()
63
- return self.session
64
-
65
- async def infer(self, model: str, text: str):
66
- if not circuit.allow():
67
- return {"error": "Circuit breaker open"}
68
- try:
69
- s = await self.get_session()
70
- payload = {"inputs": text[:1000], "parameters": {"max_new_tokens": 100}}
71
- start = time.time()
72
- async with s.post(f"{ROUTER_URL}/{model}", headers=self.headers, json=payload) as r:
73
- self.latency = round((time.time() - start) * 1000, 2)
74
- if r.status == 200:
75
- circuit.record_success()
76
- return await r.json()
77
- else:
78
- circuit.record_failure()
79
- return {"error": f"HTTP {r.status}"}
80
- except Exception as e:
81
- circuit.record_failure()
82
- return {"error": str(e)}
83
-
84
- client = HFClient()
85
-
86
- # === FastAPI App ===
87
- app = FastAPI(title="Eroha AgentAPI v5.9.2 β€” Enterprise Edition")
88
-
89
- @app.on_event("startup")
90
- async def startup_event():
91
- print("πŸš€ Eroha AgentAPI started.")
92
- asyncio.create_task(watchdog())
93
 
94
  @app.get("/health")
95
  async def health():
96
- return {"status": "ok", "circuit": circuit.state, "memory": psutil.virtual_memory().percent, "latency": client.latency}
 
97
 
98
- @app.get("/metrics", response_class=PlainTextResponse)
99
  async def metrics():
100
- return f"circuit_state {circuit.state}\nmemory {psutil.virtual_memory().percent}\n"
101
 
102
- class InferenceRequest(BaseModel):
103
- prompt: str
104
- model: str = PRIMARY_MODEL
105
 
106
  @app.post("/inference")
107
- async def inference(req: InferenceRequest):
108
- start = time.time()
109
- result = await client.infer(req.model, req.prompt)
110
- duration = int((time.time() - start) * 1000)
111
- save_metrics({"latency_ms": duration})
112
- return {"response": result, "duration_ms": duration}
113
-
114
- # === Watchdog ===
115
- async def watchdog():
116
- while True:
117
- await asyncio.sleep(CHECK_INTERVAL)
118
- print(f"🩺 Watchdog OK at {datetime.now().strftime('%H:%M:%S')}")
119
-
120
- # === Gradio UI ===
121
- def gradio_infer(prompt: str, model_choice: str):
122
- try:
123
- result = asyncio.run(client.infer(model_choice, prompt))
124
- if isinstance(result, list) and "generated_text" in result[0]:
125
- return result[0]["generated_text"]
126
- return str(result)
127
- except Exception as e:
128
- return f"❌ Error: {e}"
129
-
130
- def show_dashboard():
131
- mem = psutil.virtual_memory().percent
132
- return f"| Metric | Value |\n|--------|--------|\n| Circuit | {circuit.state} |\n| Memory | {mem}% |\n| Latency | {client.latency} ms |"
133
-
134
- demo = gr.Blocks(title="Eroha AgentAPI v5.9.2")
135
- with demo:
136
- gr.Markdown("# πŸ€– Eroha AgentAPI v5.9.2 β€” Enterprise Edition")
137
- with gr.Tab("πŸ’¬ Chat"):
138
- inp = gr.Textbox(label="Π’Π²Π΅Π΄ΠΈΡ‚Π΅ запрос")
139
- model = gr.Dropdown(["microsoft/phi-3-mini-4k-instruct", "google/gemma-2-2b-it"], value="microsoft/phi-3-mini-4k-instruct", label="МодСль")
140
- out = gr.Textbox(label="ΠžΡ‚Π²Π΅Ρ‚")
141
- gr.Button("πŸš€ ΠžΡ‚ΠΏΡ€Π°Π²ΠΈΡ‚ΡŒ").click(fn=gradio_infer, inputs=[inp, model], outputs=out)
142
- with gr.Tab("πŸ“Š Dashboard"):
143
- dash = gr.Markdown(show_dashboard())
144
- gr.Button("πŸ”„ ΠžΠ±Π½ΠΎΠ²ΠΈΡ‚ΡŒ").click(fn=show_dashboard, outputs=dash)
145
-
146
- # === Π€ΠΈΠ½Π°Π»ΡŒΠ½Ρ‹ΠΉ Π΅Π΄ΠΈΠ½Ρ‹ΠΉ запуск ===
147
- if __name__ == "__main__":
148
- app = mount_gradio_app(app, demo, path="/")
149
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
+ """
2
+ Eroha v6.4.4 β€” Zero-Daemon Edition
3
+ ----------------------------------
4
+ Production-grade FastAPI + Gradio fusion
5
+ No threads, no leaks, 100% graceful lifecycle.
6
+ """
7
+
8
+ import asyncio
9
+ import psutil
10
  import gradio as gr
11
+ from fastapi import FastAPI, Request
12
+ from slowapi import Limiter, _rate_limit_exceeded_handler
13
+ from slowapi.util import get_remote_address
14
+ from slowapi.errors import RateLimitExceeded
15
  from gradio.routes import mount_gradio_app
16
+ from contextlib import asynccontextmanager
17
+
18
+
19
+ # ───────────────────────────────
20
+ # 1️⃣ Global metrics state (event-loop safe)
21
+ # ───────────────────────────────
22
+ state = {"cpu": 0.0, "ram": 0.0, "timestamp": 0.0}
23
+
24
+
25
+ # ───────────────────────────────
26
+ # 2️⃣ Lifespan manager (async background task)
27
+ # ───────────────────────────────
28
+ @asynccontextmanager
29
+ async def lifespan(app: FastAPI):
30
+ stop_event = asyncio.Event()
31
+
32
+ async def background_metrics():
33
+ while not stop_event.is_set():
34
+ try:
35
+ state["cpu"] = psutil.cpu_percent()
36
+ state["ram"] = psutil.virtual_memory().percent
37
+ state["timestamp"] = asyncio.get_event_loop().time()
38
+ except Exception as e:
39
+ print(f"[Metrics] error: {e}")
40
+ await asyncio.sleep(5)
41
+
42
+ task = asyncio.create_task(background_metrics())
43
+ yield # Server runs here
44
+ stop_event.set()
45
+ await asyncio.gather(task, return_exceptions=True)
46
+
47
+
48
+ # ───────────────────────────────
49
+ # 3️⃣ FastAPI app with rate limiter
50
+ # ───────────────────────────────
51
+ app = FastAPI(title="Eroha v6.4.4 API", lifespan=lifespan)
52
+ limiter = Limiter(key_func=get_remote_address)
53
+ app.state.limiter = limiter
54
+ app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  @app.get("/health")
58
  async def health():
59
+ return {"status": "ok", "uptime": state["timestamp"]}
60
+
61
 
62
+ @app.get("/metrics")
63
  async def metrics():
64
+ return state
65
 
 
 
 
66
 
67
  @app.post("/inference")
68
+ @limiter.limit("10/minute")
69
+ async def inference(request: Request):
70
+ data = await request.json()
71
+ prompt = data.get("prompt", "")
72
+ # simulate model call
73
+ await asyncio.sleep(0.1)
74
+ return {"reply": f"Echo: {prompt[:120]}", "stats": state}
75
+
76
+
77
+ # ───────────────────────────────
78
+ # 4️⃣ Gradio dashboard
79
+ # ───────────────────────────────
80
+ with gr.Blocks(title="Eroha v6.4.4 Dashboard") as demo:
81
+ gr.Markdown("## βš™οΈ Eroha v6.4.4 – Zero-Daemon Edition")
82
+
83
+ with gr.Row():
84
+ inp = gr.Textbox(label="Prompt")
85
+ out = gr.Textbox(label="Response")
86
+ gr.Button("Send").click(lambda x: f"Echo: {x}", inputs=inp, outputs=out)
87
+
88
+ gr.Markdown("### πŸ“Š Live Metrics (5s refresh)")
89
+ cpu_box = gr.Number(label="CPU %")
90
+ ram_box = gr.Number(label="RAM %")
91
+
92
+ demo.load(lambda: (state["cpu"], state["ram"]), outputs=[cpu_box, ram_box], every=5)
93
+
94
+
95
+ # ───────────────────────────────
96
+ # 5️⃣ Mount Gradio to FastAPI (single port)
97
+ # ───────────────────────────────
98
+ app = mount_gradio_app(app, demo, path="/")
99
+
100
+ # No explicit uvicorn.run β€” HF Spaces handles launch automatically.