Yermek68 commited on
Commit
7276fa0
·
verified ·
1 Parent(s): b95b531

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +200 -105
app.py CHANGED
@@ -1,109 +1,204 @@
 
 
 
 
 
1
  import os
 
 
2
  import time
3
- import json
4
- import gradio as gr
5
- import requests
6
  from datetime import datetime
7
-
8
- # =========================
9
- # ⚙️ НАСТРОЙКИ
10
- # =========================
11
- HF_TOKEN = os.getenv("HF_TOKEN") or "hf_your_token_here"
12
-
13
- PRIMARY_MODEL = "google/gemma-2-2b-it"
14
- FALLBACK_MODEL = "mistralai/Mixtral-8x7B-Instruct-v0.1"
15
- ROUTER_URL = "https://router.huggingface.co"
16
-
17
- HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}
18
-
19
-
20
- # =========================
21
- # 🧩 ПОЛЕЗНЫЕ ФУНКЦИИ
22
- # =========================
23
-
24
- def check_token():
25
- """Проверка валидности токена Hugging Face."""
26
- try:
27
- res = requests.get("https://router.huggingface.co/status", headers=HEADERS, timeout=8)
28
- if res.status_code == 200:
29
- return True
30
- else:
31
- print(f"⚠️ Токен Hugging Face невалиден ({res.status_code})")
 
 
32
  return False
33
- except Exception as e:
34
- print(f"Ошибка при проверке токена: {e}")
35
- return False
36
-
37
-
38
- def send_request(model: str, prompt: str):
39
- """Отправка запроса к модели через Router API."""
40
- payload = {"model": model, "inputs": prompt, "options": {"use_cache": True}}
41
- try:
42
- start = time.time()
43
- response = requests.post(ROUTER_URL, headers=HEADERS, json=payload, timeout=60)
44
- latency = time.time() - start
45
-
46
- if response.status_code == 200:
47
- data = response.json()
48
- # Универсальный парсинг ответа
49
- if isinstance(data, list) and len(data) > 0 and "generated_text" in data[0]:
50
- text = data[0]["generated_text"]
51
- elif isinstance(data, dict) and "generated_text" in data:
52
- text = data["generated_text"]
53
- else:
54
- text = str(data)
55
- return text.strip(), latency, model, None
56
- else:
57
- return None, latency, model, f"Ошибка API {response.status_code}: {response.text}"
58
- except Exception as e:
59
- return None, 0, model, str(e)
60
-
61
-
62
- def generate_text(prompt: str):
63
- """Основная функция: попытка через основную модель → fallback при ошибке."""
64
- if not check_token():
65
- return "❌ Токен Hugging Face недействителен. Проверьте переменную HF_TOKEN."
66
-
67
- # 1️⃣ Основная модель
68
- output, latency, used_model, error = send_request(PRIMARY_MODEL, prompt)
69
- if output:
70
- return render_output(output, used_model, latency, success=True)
71
-
72
- # 2️⃣ Fallback при ошибке
73
- output_fb, latency_fb, model_fb, error_fb = send_request(FALLBACK_MODEL, prompt)
74
- if output_fb:
75
- return render_output(output_fb, model_fb, latency_fb, success=True, fallback=True)
76
- else:
77
- return f"❌ Ошибка при выполнении запроса:\n- {error}\n- Fallback: {error_fb}"
78
-
79
-
80
- def render_output(text, model, latency, success=False, fallback=False):
81
- """Форматированный вывод результата."""
82
- emoji = "" if success else "⚠️"
83
- fb_text = " (через fallback)" if fallback else ""
84
- return (
85
- f"{emoji} **Модель:** `{model}`{fb_text}\n"
86
- f"⏱ **Время отклика:** {latency:.2f} сек\n\n"
87
- f"🧠 **Ответ:**\n{text.strip()}"
88
- )
89
-
90
-
91
- # =========================
92
- # 🧭 GRADIO UI
93
- # =========================
94
-
95
- with gr.Blocks(title="🤖 Eroha AgentAPI v5.7 — Stable Router Edition") as demo:
96
- gr.Markdown("## 🧠 Eroha AgentAPI v5.7 — Stable Router Edition\n"
97
- "Поддержка Router API + AutoFallback + Token Validation 🌐")
98
-
99
- with gr.Row():
100
- prompt = gr.Textbox(label="Введите запрос", placeholder="Например: 'Расскажи историю про ИИ, который научился понимать чувства.'", lines=3)
101
- output = gr.Markdown(label="Ответ")
102
-
103
- btn = gr.Button("🚀 Отправить", variant="primary")
104
- btn.click(generate_text, inputs=prompt, outputs=output)
105
-
106
- gr.Markdown("---")
107
- gr.Markdown("🧩 **Eroha Router Core v5.7** | Автоопределение моделей + безопасный fallback")
108
-
109
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Eroha AgentAPI v5.8 — AutoRecovery + Smart Dashboard Edition
3
+ Production-grade архитектура для Hugging Face Spaces
4
+ """
5
+
6
  import os
7
+ import asyncio
8
+ import aiohttp
9
  import time
 
 
 
10
  from datetime import datetime
11
+ import gradio as gr
12
+ from fastapi import FastAPI
13
+ from fastapi.responses import JSONResponse
14
+ from transformers import pipeline
15
+ import psutil
16
+
17
+ # ==============================
18
+ # CONFIGURATION
19
+ # ==============================
20
+ HF_TOKEN = os.getenv("HF_TOKEN", "")
21
+ ROUTER_URL = "https://api-inference.huggingface.co/models"
22
+ FALLBACK_MODEL = "sshleifer/tiny-gpt2"
23
+ CHECK_INTERVAL = 300 # 5 min
24
+
25
+ # ==============================
26
+ # CORE CLASSES
27
+ # ==============================
28
+ class CircuitBreaker:
29
+ def __init__(self, threshold=3, timeout=60):
30
+ self.failures = 0
31
+ self.threshold = threshold
32
+ self.timeout = timeout
33
+ self.state = "CLOSED"
34
+ self.last_failure = 0
35
+
36
+ def allow(self):
37
+ if self.state == "OPEN" and time.time() - self.last_failure < self.timeout:
38
  return False
39
+ if self.state == "OPEN" and time.time() - self.last_failure >= self.timeout:
40
+ self.state = "HALF_OPEN"
41
+ return True
42
+
43
+ def record_success(self):
44
+ self.failures = 0
45
+ if self.state in ["HALF_OPEN", "OPEN"]:
46
+ self.state = "CLOSED"
47
+
48
+ def record_failure(self):
49
+ self.failures += 1
50
+ if self.failures >= self.threshold:
51
+ self.state = "OPEN"
52
+ self.last_failure = time.time()
53
+
54
+ circuit = CircuitBreaker()
55
+
56
+ class HFClient:
57
+ def __init__(self):
58
+ self.token = HF_TOKEN
59
+ self.headers = {"Authorization": f"Bearer {self.token}"} if self.token else {}
60
+ self.valid = False
61
+
62
+ async def validate(self):
63
+ try:
64
+ async with aiohttp.ClientSession() as s:
65
+ async with s.get("https://huggingface.co/api/whoami-v2", headers=self.headers) as r:
66
+ self.valid = r.status == 200
67
+ return self.valid
68
+ except:
69
+ self.valid = False
70
+ return False
71
+
72
+ async def infer(self, model, text):
73
+ if not circuit.allow():
74
+ return {"error": "Circuit breaker open fallback engaged"}
75
+
76
+ try:
77
+ async with aiohttp.ClientSession() as s:
78
+ payload = {"inputs": text, "parameters": {"max_new_tokens": 250}}
79
+ async with s.post(f"{ROUTER_URL}/{model}", headers=self.headers, json=payload, timeout=30) as r:
80
+ if r.status == 200:
81
+ circuit.record_success()
82
+ data = await r.json()
83
+ return data
84
+ else:
85
+ circuit.record_failure()
86
+ if r.status == 401:
87
+ self.valid = False
88
+ return {"error": f"Router error {r.status}"}
89
+ except Exception as e:
90
+ circuit.record_failure()
91
+ return {"error": str(e)}
92
+
93
+ client = HFClient()
94
+
95
+ class Fallback:
96
+ def __init__(self):
97
+ self.pipe = None
98
+ self.ready = False
99
+
100
+ async def load(self):
101
+ if not self.ready:
102
+ self.pipe = pipeline("text-generation", model=FALLBACK_MODEL)
103
+ self.ready = True
104
+
105
+ async def generate(self, text):
106
+ await self.load()
107
+ out = self.pipe(text, max_new_tokens=100)[0]["generated_text"]
108
+ return out
109
+
110
+ fallback = Fallback()
111
+
112
+ # ==============================
113
+ # WATCHDOG
114
+ # ==============================
115
+ async def watchdog():
116
+ while True:
117
+ print(f"[{datetime.now().isoformat()}] 🔍 Watchdog check...")
118
+ await client.validate()
119
+ mem = psutil.virtual_memory().percent
120
+ if mem > 85:
121
+ print(f"⚠️ High memory usage: {mem}%")
122
+ await asyncio.sleep(CHECK_INTERVAL)
123
+
124
+ # ==============================
125
+ # FASTAPI CORE
126
+ # ==============================
127
+ app = FastAPI(title="Eroha AgentAPI v5.8")
128
+
129
+ @app.on_event("startup")
130
+ async def startup():
131
+ asyncio.create_task(watchdog())
132
+ await client.validate()
133
+
134
+ @app.get("/health")
135
+ async def health():
136
+ return JSONResponse({
137
+ "status": "ok" if client.valid else "degraded",
138
+ "circuit": circuit.state,
139
+ "memory": psutil.virtual_memory().percent,
140
+ "timestamp": datetime.now().isoformat()
141
+ })
142
+
143
+ @app.post("/inference")
144
+ async def inference(data: dict):
145
+ text = data.get("prompt", "")
146
+ model = data.get("model", "microsoft/phi-3-mini-4k-instruct")
147
+
148
+ res = await client.infer(model, text)
149
+ if "error" in res:
150
+ fb = await fallback.generate(text)
151
+ return {"source": "fallback", "response": fb, "note": res["error"]}
152
+ return {"source": "router", "response": res}
153
+
154
+ # ==============================
155
+ # GRADIO INTERFACE
156
+ # ==============================
157
+ def gradio_infer(prompt, model_choice):
158
+ loop = asyncio.new_event_loop()
159
+ asyncio.set_event_loop(loop)
160
+ result = loop.run_until_complete(client.infer(model_choice, prompt))
161
+ loop.close()
162
+ if "error" in result:
163
+ fb = asyncio.run(fallback.generate(prompt))
164
+ return f"⚠️ Router failed ({result['error']})\n\n🧠 Fallback:\n{fb}"
165
+ if isinstance(result, list):
166
+ return result[0].get("generated_text", str(result))
167
+ return str(result)
168
+
169
+ def show_dashboard():
170
+ mem = psutil.virtual_memory().percent
171
+ status = "✅ OK" if client.valid else "⚠️ Token Invalid"
172
+ return f"""
173
+ ### 🧠 Eroha AgentAPI Dashboard
174
+ | Metric | Status |
175
+ |--------|--------|
176
+ | Token Valid | {status} |
177
+ | Circuit | {circuit.state} |
178
+ | Memory Usage | {mem}% |
179
+ | Time | {datetime.now().strftime("%H:%M:%S")} |
180
+ """
181
+
182
+ demo = gr.Blocks(title="Eroha AgentAPI v5.8 — AutoRecovery Edition")
183
+ with demo:
184
+ gr.Markdown("# 🤖 Eroha AgentAPI v5.8 — AutoRecovery + Smart Dashboard")
185
+ with gr.Tab("💬 Chat"):
186
+ inp = gr.Textbox(label="Введите запрос")
187
+ model = gr.Dropdown(
188
+ ["microsoft/phi-3-mini-4k-instruct", "google/gemma-2-2b-it", "meta-llama/Meta-Llama-3-8B-Instruct"],
189
+ value="microsoft/phi-3-mini-4k-instruct", label="Модель"
190
+ )
191
+ out = gr.Textbox(label="Ответ")
192
+ btn = gr.Button("🚀 Отправить")
193
+ btn.click(fn=gradio_infer, inputs=[inp, model], outputs=out)
194
+ with gr.Tab("📊 Dashboard"):
195
+ dash = gr.Markdown()
196
+ refresh = gr.Button("🔄 Обновить состояние")
197
+ refresh.click(fn=show_dashboard, outputs=dash)
198
+ dash.value = show_dashboard()
199
+
200
+ app = gr.mount_gradio_app(app, demo, path="/ui")
201
+
202
+ if __name__ == "__main__":
203
+ import uvicorn
204
+ uvicorn.run(app, host="0.0.0.0", port=7860)