Yermek68 commited on
Commit
2ab9e4f
·
verified ·
1 Parent(s): c4c9606

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +147 -110
app.py CHANGED
@@ -1,139 +1,176 @@
1
  import os
 
2
  import json
3
  import time
4
- import hashlib
5
- import gradio as gr
6
- from fastapi import FastAPI
 
 
7
  from fastapi.middleware.cors import CORSMiddleware
8
- from transformers import pipeline
9
- from langdetect import detect
10
- from datetime import datetime
11
-
12
-
13
- # ============ Проверка наличия модулей ============
14
- MODULES = [
15
- "Eroha_PromptCore.md",
16
- "Eroha_Dashboard.md",
17
- "Eroha_AgentAPI_v1.1.md",
18
- "Eroha_MetricsCore_v1.0.md"
19
- ]
20
- AVAILABLE_MODULES = [m for m in MODULES if os.path.exists(m)]
21
- MISSING_MODULES = [m for m in MODULES if m not in AVAILABLE_MODULES]
22
-
23
-
24
- # ============ Проверка загрузчика ============
25
- def loader_health():
26
- table = []
27
- for mod in MODULES:
28
- status = "✅ OK" if mod in AVAILABLE_MODULES else "❌ Missing"
29
- table.append(f"| {mod} | {status} |")
30
-
31
- trace_id = hashlib.md5(str(time.time()).encode()).hexdigest()[:8]
32
- summary = "\n".join(table)
33
-
34
- return f"""
35
- ## 🔍 Eroha Loader — Health Check
36
- | Модуль | Статус |
37
- |---|---|
38
- {summary}
39
-
40
- **TL;DR:** {len(AVAILABLE_MODULES)} из {len(MODULES)} активны.
41
- **Вывод:** {'Все работает корректно ✅' if not MISSING_MODULES else 'Есть отсутствующие модули ⚠️'}
42
- **Риски:** {', '.join(MISSING_MODULES) if MISSING_MODULES else '—'}
43
- **Рекомендации:** Проверь структуру проекта в Hugging Face (Files → Upload)
44
- trace_id: {trace_id}
45
- """
46
-
47
 
48
- # ============ Модель суммаризации ============
49
- summarizers = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
- def get_summarizer(lang: str):
52
- if lang == "ru":
53
- model_name = "IlyaGusev/mbart_ru_sum_gazeta"
54
- elif lang == "kk":
55
- model_name = "facebook/mbart-large-50-many-to-many-mmt"
56
- else:
57
- model_name = "facebook/bart-large-cnn"
58
 
59
- if model_name not in summarizers:
60
- summarizers[model_name] = pipeline("summarization", model=model_name)
 
 
 
61
 
62
- return summarizers[model_name]
63
 
 
 
64
 
65
- def summarize_text(text: str):
66
- if not text.strip():
67
- return "❌ Введите текст для анализа."
68
 
 
 
69
  try:
70
- lang = detect(text)
71
  except Exception:
72
- lang = "en"
73
 
74
- model = get_summarizer(lang)
75
- result = model(text, max_length=160, min_length=40, do_sample=False)
76
- summary = result[0]["summary_text"].replace("▁", " ").strip()
77
 
78
- now = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
 
 
79
 
80
- json_ld = {
81
- "@context": "https://schema.org",
82
- "@type": "NewsArticle",
83
- "headline": summary[:80],
84
- "datePublished": now,
85
- "inLanguage": lang,
86
- "publisher": {"@type": "Organization", "name": "Eroha AI Publisher"},
87
  }
 
88
 
89
- formatted = f"""# 🧠 Eroha Summarizer PRO v2.5 (AutoLang: {lang.upper()})
90
- **Дата:** {now}
91
-
92
- ## 📘 Резюме
93
- {summary}
94
 
95
- ---
 
 
 
96
 
97
- ## 🧩 Schema.org JSON-LD
98
- ```json
99
- {json.dumps(json_ld, ensure_ascii=False, indent=2)}
100
-
101
- TL;DR: {summary[:140]}{'...' if len(summary) > 140 else ''}
102
- """
103
- return formatted
104
-
105
- # ============ FastAPI API ============
106
-
107
- app = FastAPI(title="Eroha Summarizer PRO v2.5")
108
-
109
- app.add_middleware(
110
- CORSMiddleware,
111
- allow_origins=["*"],
112
- allow_methods=["*"],
113
- allow_headers=["*"]
114
- )
115
 
 
116
  @app.get("/")
117
  async def home():
118
- return {"status": "ok", "message": "Eroha Core PRO v2.5 active"}
 
 
 
 
 
119
 
120
- @app.post("/api/summarize")
121
- async def summarize_api(data: dict):
122
- return {"summary": summarize_text(data.get("text", ""))}
123
 
124
- @app.get("/api/core-checkup")
125
- async def core_checkup():
126
- return {"report": loader_health()}
127
 
128
- # ============ Gradio интерфейс ============
129
 
130
- iface = gr.Interface(
131
- fn=summarize_text,
132
- inputs=gr.Textbox(lines=10, label="Введите текст для анализа"),
133
- outputs=gr.Markdown(label="Результат"),
134
- title="Eroha Summarizer PRO v2.5 — Publisher Edition",
135
- description="AI-инструмент для суммаризации, автоопределения языка (RU, EN, KZ, DE, ES, FR) и SEO-разметки (JSON-LD)"
136
- )
137
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  if __name__ == "__main__":
139
- iface.launch(server_name="0.0.0.0", server_port=7860)
 
1
  import os
2
+ import sys
3
  import json
4
  import time
5
+ import asyncio
6
+ import logging
7
+ import traceback
8
+ from contextlib import asynccontextmanager
9
+ from fastapi import FastAPI, HTTPException
10
  from fastapi.middleware.cors import CORSMiddleware
11
+ from pydantic import BaseModel, Field, validator
12
+ from typing import Dict
13
+ import uvicorn
14
+
15
+ # ==================== Безопасные импорты ====================
16
+ try:
17
+ from transformers import pipeline
18
+ from langdetect import detect
19
+ except Exception as e:
20
+ logging.error(f"[ImportError] transformers/langdetect not available: {e}", file=sys.stderr)
21
+ pipeline = None
22
+ detect = lambda x: "en"
23
+
24
+ # ==================== Логирование ====================
25
+ logging.basicConfig(
26
+ level=logging.INFO,
27
+ format="%(asctime)s [%(levelname)s] %(message)s",
28
+ handlers=[logging.StreamHandler(sys.stderr)],
29
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
+ # ==================== Конфигурация ====================
32
+ HF_HOME = "/tmp/huggingface"
33
+ os.environ["HF_HOME"] = HF_HOME
34
+ os.makedirs(HF_HOME, exist_ok=True)
35
+
36
+ _model_cache: Dict[str, any] = {}
37
+
38
+ # ==================== Lifespan Context ====================
39
+ @asynccontextmanager
40
+ async def lifespan(app: FastAPI):
41
+ start = time.time()
42
+ preload_models = ["facebook/bart-large-cnn", "IlyaGusev/mbart_ru_sum_gazeta"]
43
+ if pipeline:
44
+ for model_name in preload_models:
45
+ try:
46
+ _model_cache[model_name] = pipeline("summarization", model=model_name, device=-1)
47
+ logging.info(f"[Warmup] Preloaded model: {model_name}")
48
+ except Exception as e:
49
+ logging.error(f"[Warmup] Failed preload {model_name}: {e}")
50
+ logging.info(f"[Startup] Models initialized in {time.time() - start:.2f}s")
51
+ yield
52
+
53
+
54
+ app = FastAPI(title="Eroha AI Summarizer PRO", version="v3.4", lifespan=lifespan)
55
+ app.add_middleware(
56
+ CORSMiddleware,
57
+ allow_origins=["*"],
58
+ allow_methods=["*"],
59
+ allow_headers=["*"],
60
+ )
61
 
62
+ # ==================== Pydantic модели ====================
63
+ class SummarizeRequest(BaseModel):
64
+ text: str = Field(..., min_length=3, max_length=1_000_000)
 
 
 
 
65
 
66
+ @validator("text")
67
+ def not_empty(cls, v):
68
+ if not v.strip():
69
+ raise ValueError("Text cannot be empty or whitespace only")
70
+ return v
71
 
 
72
 
73
+ class CheckRequest(BaseModel):
74
+ data: str = Field(..., min_length=1, max_length=500_000)
75
 
 
 
 
76
 
77
+ # ==================== Утилиты ====================
78
+ def safe_detect_lang(text: str) -> str:
79
  try:
80
+ return detect(text)
81
  except Exception:
82
+ return "en"
83
 
 
 
 
84
 
85
+ def get_model(lang: str):
86
+ if not pipeline:
87
+ raise RuntimeError("Transformers pipeline unavailable")
88
 
89
+ model_map = {
90
+ "ru": "IlyaGusev/mbart_ru_sum_gazeta",
91
+ "kk": "facebook/mbart-large-50-many-to-many-mmt",
92
+ "de": "facebook/bart-large-cnn",
93
+ "es": "facebook/mbart-large-50-many-to-many-mmt",
94
+ "fr": "facebook/mbart-large-50-many-to-many-mmt",
95
+ "en": "facebook/bart-large-cnn",
96
  }
97
+ model_name = model_map.get(lang, "facebook/bart-large-cnn")
98
 
99
+ if model_name in _model_cache:
100
+ return _model_cache[model_name]
 
 
 
101
 
102
+ logging.info(f"[ModelLoad] Loading model dynamically: {model_name}")
103
+ model = pipeline("summarization", model=model_name, device=-1)
104
+ _model_cache[model_name] = model
105
+ return model
106
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
+ # ==================== Эндпоинты ====================
109
  @app.get("/")
110
  async def home():
111
+ return {
112
+ "status": "ok",
113
+ "version": app.version,
114
+ "cached_models": list(_model_cache.keys()),
115
+ "endpoints": ["/ping", "/check", "/summarize", "/warmup"],
116
+ }
117
 
 
 
 
118
 
119
+ @app.get("/ping")
120
+ async def ping():
121
+ return {"status": "healthy", "cached_models": list(_model_cache.keys())}
122
 
 
123
 
124
+ @app.get("/warmup")
125
+ async def warmup():
126
+ return {"status": "warm", "models_ready": len(_model_cache) > 0}
127
+
 
 
 
128
 
129
+ @app.post("/check")
130
+ async def check_text(req: CheckRequest):
131
+ try:
132
+ lang = safe_detect_lang(req.data)
133
+ return {
134
+ "status": "success",
135
+ "preview": req.data[:150],
136
+ "length": len(req.data),
137
+ "language": lang,
138
+ }
139
+ except Exception as e:
140
+ logging.error(f"/check error: {traceback.format_exc()}")
141
+ raise HTTPException(status_code=500, detail=str(e))
142
+
143
+
144
+ @app.post("/summarize")
145
+ async def summarize(req: SummarizeRequest):
146
+ try:
147
+ lang = safe_detect_lang(req.text)
148
+ summarizer = get_model(lang)
149
+ input_text = req.text[:3000]
150
+ result = summarizer(input_text, max_length=180, min_length=40, do_sample=False)
151
+ summary = result[0]["summary_text"].replace("▁", " ").strip()
152
+
153
+ json_ld = {
154
+ "@context": "https://schema.org",
155
+ "@type": "NewsArticle",
156
+ "headline": summary[:80],
157
+ "inLanguage": lang,
158
+ "publisher": {"@type": "Organization", "name": "Eroha AI Publisher"},
159
+ }
160
+
161
+ return {
162
+ "status": "success",
163
+ "language": lang,
164
+ "summary": summary,
165
+ "summary_length": len(summary),
166
+ "original_length": len(req.text),
167
+ "seo_json_ld": json_ld,
168
+ }
169
+ except Exception as e:
170
+ logging.error(f"/summarize error: {traceback.format_exc()}")
171
+ raise HTTPException(status_code=500, detail=str(e))
172
+
173
+
174
+ # ==================== Запуск ====================
175
  if __name__ == "__main__":
176
+ uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=False)