Yermek68 commited on
Commit
b3e6415
·
verified ·
1 Parent(s): ee49bfe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +153 -161
app.py CHANGED
@@ -3,198 +3,190 @@ from transformers import pipeline
3
  from fastapi import FastAPI
4
  from fastapi.middleware.cors import CORSMiddleware
5
  from langdetect import detect
6
- from functools import lru_cache
7
  import re
 
 
8
 
9
- # ======================================================
10
- # 🚀 Eroha Summarizer PRO++++ v2.1.1 Stable (by Yermek68)
11
- # ======================================================
12
 
13
- # --- Кэш моделей ---
14
- @lru_cache(maxsize=10)
15
- def get_summarizer(lang: str, long: bool = False):
16
- if lang == "ru":
17
- model = "IlyaGusev/mbart_ru_sum_gazeta"
18
- elif lang == "de":
19
- model = "ml6team/mbart-large-cc25-cnn-distilled-german"
20
- elif lang == "es":
21
- model = "mrm8488/bert2bert_shared-spanish-finetuned-summarization"
22
- elif lang == "fr":
23
- model = "mrm8488/mbart-large-finetuned-opus-fr-en"
24
- else:
25
- model = "facebook/bart-large-cnn" if not long else "pszemraj/led-large-book-summary"
26
- return pipeline("summarization", model=model)
27
-
28
- @lru_cache(maxsize=10)
29
- def get_sentiment_analyzer(lang: str):
30
- if lang == "ru":
31
- model = "cointegrated/rubert-tiny2-emo"
32
- else:
33
- model = "j-hartmann/emotion-english-distilroberta-base"
34
- return pipeline("text-classification", model=model, top_k=None)
35
 
36
- # --- Вспомогательные функции ---
37
- def clean_text(text: str) -> str:
38
- text = re.sub(r"[^\x00-\x7Fа-яА-ЯёЁ.,!?;:\-–—«»\"'()\[\] ]", "", text)
39
- text = text.replace("▁", " ").replace("<n>", "\n").replace("<s>", "").replace("</s>", "")
40
- text = text.replace("Ġ", " ").replace("Â", "").replace("", "").replace("�", "").strip()
41
- return re.sub(" +", " ", text)
42
-
43
- def detect_topic(text: str):
44
- topics = {
45
- "Политика": ["правительство", "закон", "президент", "выборы"],
46
- "Экономика": ["компания", "рынок", "инвестиции", "бизнес"],
47
- "Технологии": ["AI", "робот", "интернет", "технологии"],
48
- "Спорт": ["команда", "матч", "игра"],
49
- "Наука": ["исследование", "данные", "учёные"],
50
- }
51
- t = text.lower()
52
- for topic, keys in topics.items():
53
- if any(k in t for k in keys):
54
- return topic
55
- return "Общее / неопределённое направление"
56
-
57
- def detect_genre(text: str):
58
- t = text.lower()
59
- if any(w in t for w in ["заявил", "сообщил", "вчера", "компания", "год"]):
60
- return "📰 Новость"
61
- if any(w in t for w in ["исследование", "данные", "анализ", "эксперимент"]):
62
- return "📊 Аналитика"
63
- if any(w in t for w in ["купил", "доволен", "рекомендую", "не советую"]):
64
- return "🗣️ Отзыв"
65
- if any(w in t for w in ["коммерческий", "продукт", "цена", "скидка"]):
66
- return "📢 Реклама"
67
- return "📄 Текст общего типа"
68
-
69
- # =====================================================
70
- # 🧩 Основная функция
71
- # =====================================================
72
- def summarize_text(text: str):
73
- if not text.strip():
74
- return "❌ Введите текст для анализа."
75
 
 
 
76
  try:
77
  lang = detect(text)
78
  except:
79
  lang = "en"
80
 
81
- text = clean_text(text)
82
- words = len(text.split())
 
83
 
84
- long_doc = words > 800
85
- summarizer = get_summarizer(lang, long_doc)
86
- sentiment_model = get_sentiment_analyzer(lang)
87
 
88
- if words < 50:
89
- summary = text
90
- else:
91
- max_len, min_len = (250, 60) if words > 300 else (120, 40)
92
- summary_raw = summarizer(text, max_length=max_len, min_length=min_len, do_sample=False)[0]["summary_text"]
93
- summary = clean_text(summary_raw)
94
-
95
- # Анализ эмоций
96
- emotions = sentiment_model(summary)
97
- emo_label = emotions[0]["label"]
98
- emo_score = emotions[0].get("score", 0)
99
-
100
- emo_map = {
101
- "joy": "😊 Радость",
102
- "sadness": "😢 Грусть",
103
- "anger": "😠 Гнев",
104
- "fear": "😨 Тревога",
105
- "neutral": "😐 Нейтральное",
106
- "surprise": "😲 Удивление",
107
- "disgust": "🤢 Отвращение"
108
- }
109
- emotion = emo_map.get(emo_label.lower(), "😐 Нейтральное")
110
 
111
- topic = detect_topic(text)
112
- genre = detect_genre(text)
113
 
114
- color = "green" if "Радость" in emotion else "red" if "Грусть" in emotion or "Гнев" in emotion else "orange"
 
 
 
 
 
 
 
115
 
116
- formatted_output = f"""
117
- # 🧠 <span style='color:#0073e6'>Eroha Summarizer PRO++++ v2.1.1 Stable</span>
118
- _(Автоязык: {'Русский' if lang == 'ru' else 'Английский'})_
119
 
120
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
- ### 📌 Тема: <b>{topic}</b>
123
- ### 🗂️ Жанр: {genre}
124
- ### 💬 Настроение: <span style='color:{color}'>{emotion}</span> ({emo_score:.2f})
 
 
 
 
 
 
 
 
 
 
 
125
 
126
- ---
127
 
128
- ## 📘 Резюме:
129
- {summary}
 
 
130
 
131
- ---
 
132
 
133
- ### TL;DR:
134
- {summary[:200]}{'...' if len(summary) > 200 else ''}
135
 
136
- ---
 
 
 
 
 
 
 
137
 
138
- *Eroha Intelligence Suite — Multilingual AI summarizer powered by Hugging Face*
139
- """
140
- return formatted_output.strip()
141
 
142
- # =====================================================
143
- # 🌐 FastAPI backend
144
- # =====================================================
 
 
 
 
 
145
 
146
- app = FastAPI(title="Eroha Summarizer PRO++++ v2.1.1", version="2.1.1")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
148
 
149
- @app.post("/api/full")
150
- def api_full(data: dict):
151
  text = data.get("text", "")
152
- summary = summarize_text(text)
153
- return {"summary": summary}
154
-
155
- @app.post("/api/lite")
156
- def api_lite(data: dict):
157
- text = data.get("text", "")
158
- result = summarize_text(text)
159
- clean_result = re.sub(r"<[^>]+>", "", result)
160
- return {"tldr": clean_result[:300]}
161
-
162
- # =====================================================
163
- # 🎨 Gradio интерфейс
164
- # =====================================================
165
 
166
- def gradio_summary(text):
167
- return summarize_text(text)
168
-
169
- with gr.Blocks(title="Eroha Summarizer PRO++++ v2.1.1") as iface:
170
- gr.Markdown("## 🧠 Eroha Summarizer PRO++++ v2.1.1 Stable\nAI-инструмент нового поколения для анализа, темы, эмоций и автоопределения языка (рус/англ/нем/исп/фр).")
171
-
172
- text_input = gr.Textbox(lines=10, label="Введите текст для анализа и суммаризации")
173
- result_output = gr.Markdown(label="Результат")
174
 
175
  with gr.Row():
176
- copy_btn = gr.Button("📋 Копировать")
177
- download_btn = gr.Button("💾 Скачать результат")
 
 
178
 
179
- text_input.submit(gradio_summary, inputs=text_input, outputs=result_output)
180
- copy_btn.click(lambda x: x, inputs=result_output, outputs=None)
181
- download_btn.click(lambda x: gr.File.update(value=x.encode("utf-8"), visible=True), inputs=result_output, outputs=None)
182
 
183
- gr.Markdown("---\n✨ _Eroha Intelligence Suite © 2025 — by Yermek68_")
 
184
 
185
- if __name__ == "__main__":
186
- import os
187
- try:
188
- # Очередь безопасно активируем без параметров
189
- iface.queue() # поддерживается всеми версиями Gradio >=5.0
190
-
191
- # Основной запуск
192
- iface.launch(
193
- server_name="0.0.0.0",
194
- server_port=int(os.getenv("PORT", 7860)),
195
- share=False, # безопасно для Hugging Face
196
- ssr_mode=False, # предотвращает повторные рестарты
197
- debug=False, # чистый лог без шума
198
- )
199
- except Exception as e:
200
- print(f"⚠️ Runtime restart or environment reload detected: {e}")
 
3
  from fastapi import FastAPI
4
  from fastapi.middleware.cors import CORSMiddleware
5
  from langdetect import detect
 
6
  import re
7
+ import datetime
8
+ import hashlib
9
 
10
+ # Кэш моделей
11
+ summarizers = {}
12
+ analyzers = {}
13
 
14
+ # =============== УТИЛИТЫ ===============
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
+ def clean_text(text: str):
17
+ """Очистка текста от мусора и нечитабельных символов"""
18
+ text = text.replace("\n", " ").replace("\r", " ")
19
+ text = re.sub(r"\s+", " ", text)
20
+ text = re.sub(r"[^\w\s.,!?%\-–:;()\"'’«»]", "", text)
21
+ return text.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
+ def detect_language(text: str):
24
+ """Определение языка (включая казахский 🇰🇿)"""
25
  try:
26
  lang = detect(text)
27
  except:
28
  lang = "en"
29
 
30
+ kazakh_letters = "қңәөүһіұ"
31
+ if any(ch in text.lower() for ch in kazakh_letters):
32
+ lang = "kk"
33
 
34
+ return lang
 
 
35
 
36
+ def generate_slug(title: str):
37
+ """Генерация SEO-дружественной ссылки"""
38
+ slug = re.sub(r"[^a-zA-Zа-яА-Я0-9]+", "-", title.lower()).strip("-")
39
+ slug_hash = hashlib.md5(title.encode()).hexdigest()[:6]
40
+ return f"/news/{slug}-{slug_hash}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
+ # =============== МОДЕЛИ ===============
 
43
 
44
+ def get_summarizer(lang: str):
45
+ """Выбор модели суммаризации по языку"""
46
+ if lang == "ru":
47
+ model_name = "IlyaGusev/mbart_ru_sum_gazeta"
48
+ elif lang == "kk":
49
+ model_name = "facebook/mbart-large-50-many-to-many-mmt"
50
+ else:
51
+ model_name = "facebook/bart-large-cnn"
52
 
53
+ if model_name not in summarizers:
54
+ summarizers[model_name] = pipeline("summarization", model=model_name)
55
+ return summarizers[model_name]
56
 
57
+ def get_sentiment_analyzer(lang: str):
58
+ """Выбор модели анализа настроения"""
59
+ if lang in ["ru", "kk"]:
60
+ model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
61
+ else:
62
+ model_name = "cardiffnlp/twitter-roberta-base-sentiment"
63
+ if model_name not in analyzers:
64
+ analyzers[model_name] = pipeline("sentiment-analysis", model=model_name)
65
+ return analyzers[model_name]
66
+
67
+ # =============== КОНТЕНТ ===============
68
+
69
+ def extract_keywords(text: str, top_n: int = 7):
70
+ """Грубое извлечение ключевых слов (простая эвристика)"""
71
+ words = re.findall(r"\b\w{5,}\b", text.lower())
72
+ freq = {}
73
+ for w in words:
74
+ freq[w] = freq.get(w, 0) + 1
75
+ keywords = sorted(freq, key=freq.get, reverse=True)[:top_n]
76
+ return ", ".join(keywords)
77
 
78
+ def detect_topic(text: str):
79
+ """Эвристика для определения темы"""
80
+ topics = {
81
+ "Экономика": ["рынок", "компания", "акция", "инвестиция", "сату", "қаржы"],
82
+ "Технологии": ["ai", "робот", "интернет", "жасанды интеллект"],
83
+ "Саясат": ["үкімет", "закон", "президент", "выборы"],
84
+ "Ғылым": ["зерттеу", "ғалым", "эксперимент"],
85
+ "Спорт": ["матч", "команда", "спорт"]
86
+ }
87
+ text_lower = text.lower()
88
+ for topic, words in topics.items():
89
+ if any(w in text_lower for w in words):
90
+ return topic
91
+ return "Жалпы тақырып / Общая тема"
92
 
93
+ # =============== ОСНОВНАЯ ЛОГИКА ===============
94
 
95
+ def summarize_text(text: str):
96
+ """Основная функция суммаризации + SEO"""
97
+ if not text.strip():
98
+ return "⚠️ Введите текст для анализа."
99
 
100
+ text = clean_text(text)
101
+ lang = detect_language(text)
102
 
103
+ summarizer = get_summarizer(lang)
104
+ sentiment_model = get_sentiment_analyzer(lang)
105
 
106
+ # Оптимизация по длине
107
+ words = len(text.split())
108
+ if words < 80:
109
+ max_len, min_len = 70, 20
110
+ elif words < 300:
111
+ max_len, min_len = 140, 40
112
+ else:
113
+ max_len, min_len = 220, 60
114
 
115
+ # Суммаризация
116
+ summary = summarizer(text, max_length=max_len, min_length=min_len, do_sample=False)[0]["summary_text"]
 
117
 
118
+ # Анализ настроения
119
+ sentiment = sentiment_model(summary)[0]["label"].lower()
120
+ if "5" in sentiment or "pos" in sentiment:
121
+ sentiment = "😊 Позитивті / Позитивное"
122
+ elif "1" in sentiment or "neg" in sentiment:
123
+ sentiment = "😞 Теріс / Негативное"
124
+ else:
125
+ sentiment = "😐 Бейтарап / Нейтральное"
126
 
127
+ # SEO генерация
128
+ topic = detect_topic(text)
129
+ keywords = extract_keywords(text)
130
+ title = summary.split(".")[0][:80].strip()
131
+ meta_description = summary[:160].strip()
132
+ slug = generate_slug(title)
133
+
134
+ # SEO оценка
135
+ score = 0
136
+ score += 1 if len(keywords.split(",")) >= 5 else 0
137
+ score += 1 if len(meta_description) >= 100 else 0
138
+ score += 1 if len(title) > 20 else 0
139
+ seo_status = "✅ Оптимально для публикации" if score >= 2 else "⚠️ Недостаточно данных для SEO"
140
+
141
+ date_now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M")
142
+
143
+ # Форматированный Markdown
144
+ output = f"# 🧠 Eroha Summarizer PRO++++ v2.3 SEO Edition\n"
145
+ output += f"## 🌍 Language: {'Қазақ (Kazakh)' if lang == 'kk' else 'Русский' if lang == 'ru' else 'English'}\n"
146
+ output += f"### 📅 Date: {date_now}\n"
147
+ output += f"### 📌 Topic: {topic}\n"
148
+ output += f"### 💬 Sentiment: {sentiment}\n\n"
149
+ output += "---\n\n"
150
+ output += f"📄 **Summary:**\n{summary}\n\n"
151
+ output += "---\n\n"
152
+ output += f"## 🧭 SEO Optimization\n"
153
+ output += f"**📰 Title:** {title}\n\n"
154
+ output += f"**🔑 Keywords:** {keywords}\n\n"
155
+ output += f"**📄 Meta Description:** {meta_description}\n\n"
156
+ output += f"**🔗 Slug:** `{slug}`\n\n"
157
+ output += f"**📊 SEO Score:** {seo_status}\n\n"
158
+ output += "---\n\n"
159
+ output += f"🔖 **Tags:** #Eroha #AI #SEO #Press #Kazakhstan #News\n"
160
+
161
+ return output
162
+
163
+ # =============== API & UI ===============
164
+
165
+ app = FastAPI(title="Eroha Summarizer PRO++++ v2.3 SEO Edition")
166
  app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
167
 
168
+ @app.post("/api/summarize")
169
+ async def summarize_api(data: dict):
170
  text = data.get("text", "")
171
+ return {"summary": summarize_text(text)}
 
 
 
 
 
 
 
 
 
 
 
 
172
 
173
+ # Gradio UI
174
+ with gr.Blocks(title="Eroha Summarizer PRO++++ v2.3 SEO Edition") as iface:
175
+ gr.Markdown("# 🧠 Eroha Summarizer PRO++++ v2.3 SEO Edition (Kazakh Supported)")
176
+ gr.Markdown("AI-инструмент для суммаризации, анализа, SEO и автогенерации метаданных (с поддержкой казахского 🇰🇿)")
 
 
 
 
177
 
178
  with gr.Row():
179
+ input_box = gr.Textbox(lines=8, label="Введите текст / Мәтінді енгізіңіз")
180
+ with gr.Row():
181
+ summarize_btn = gr.Button("🚀 Анализ и SEO-суммаризация")
182
+ clear_btn = gr.Button("🧹 Очистить")
183
 
184
+ output_box = gr.Markdown(label="Результат / Result")
 
 
185
 
186
+ def process_input(text):
187
+ return summarize_text(text)
188
 
189
+ summarize_btn.click(process_input, inputs=input_box, outputs=output_box)
190
+ clear_btn.click(lambda: "", None, input_box)
191
+
192
+ iface.launch(server_name="0.0.0.0", server_port=7860)