Yermek68 commited on
Commit
865def6
·
verified ·
1 Parent(s): b3e6415

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -45
app.py CHANGED
@@ -6,22 +6,17 @@ from langdetect import detect
6
  import re
7
  import datetime
8
  import hashlib
 
9
 
10
- # Кэш моделей
11
- summarizers = {}
12
- analyzers = {}
13
-
14
- # =============== УТИЛИТЫ ===============
15
 
16
  def clean_text(text: str):
17
- """Очистка текста от мусора и нечитабельных символов"""
18
  text = text.replace("\n", " ").replace("\r", " ")
19
  text = re.sub(r"\s+", " ", text)
20
  text = re.sub(r"[^\w\s.,!?%\-–:;()\"'’«»]", "", text)
21
  return text.strip()
22
 
23
  def detect_language(text: str):
24
- """Определение языка (включая казахский 🇰🇿)"""
25
  try:
26
  lang = detect(text)
27
  except:
@@ -30,32 +25,30 @@ def detect_language(text: str):
30
  kazakh_letters = "қңәөүһіұ"
31
  if any(ch in text.lower() for ch in kazakh_letters):
32
  lang = "kk"
33
-
34
  return lang
35
 
36
  def generate_slug(title: str):
37
- """Генерация SEO-дружественной ссылки"""
38
  slug = re.sub(r"[^a-zA-Zа-яА-Я0-9]+", "-", title.lower()).strip("-")
39
  slug_hash = hashlib.md5(title.encode()).hexdigest()[:6]
40
  return f"/news/{slug}-{slug_hash}"
41
 
42
- # =============== МОДЕЛИ ===============
 
 
 
43
 
44
  def get_summarizer(lang: str):
45
- """Выбор модели суммаризации по языку"""
46
  if lang == "ru":
47
  model_name = "IlyaGusev/mbart_ru_sum_gazeta"
48
  elif lang == "kk":
49
  model_name = "facebook/mbart-large-50-many-to-many-mmt"
50
  else:
51
  model_name = "facebook/bart-large-cnn"
52
-
53
  if model_name not in summarizers:
54
  summarizers[model_name] = pipeline("summarization", model=model_name)
55
  return summarizers[model_name]
56
 
57
  def get_sentiment_analyzer(lang: str):
58
- """Выбор модели анализа настроения"""
59
  if lang in ["ru", "kk"]:
60
  model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
61
  else:
@@ -64,10 +57,9 @@ def get_sentiment_analyzer(lang: str):
64
  analyzers[model_name] = pipeline("sentiment-analysis", model=model_name)
65
  return analyzers[model_name]
66
 
67
- # =============== КОНТЕНТ ===============
68
 
69
  def extract_keywords(text: str, top_n: int = 7):
70
- """Грубое извлечение ключевых слов (простая эвристика)"""
71
  words = re.findall(r"\b\w{5,}\b", text.lower())
72
  freq = {}
73
  for w in words:
@@ -76,9 +68,8 @@ def extract_keywords(text: str, top_n: int = 7):
76
  return ", ".join(keywords)
77
 
78
  def detect_topic(text: str):
79
- """Эвристика для определения темы"""
80
  topics = {
81
- "Экономика": ["рынок", "компания", "акция", "инвестиция", "сату", "қаржы"],
82
  "Технологии": ["ai", "робот", "интернет", "жасанды интеллект"],
83
  "Саясат": ["үкімет", "закон", "президент", "выборы"],
84
  "Ғылым": ["зерттеу", "ғалым", "эксперимент"],
@@ -90,12 +81,11 @@ def detect_topic(text: str):
90
  return topic
91
  return "Жалпы тақырып / Общая тема"
92
 
93
- # =============== ОСНОВНАЯ ЛОГИКА ===============
94
 
95
  def summarize_text(text: str):
96
- """Основная функция суммаризации + SEO"""
97
  if not text.strip():
98
- return "⚠️ Введите текст для анализа."
99
 
100
  text = clean_text(text)
101
  lang = detect_language(text)
@@ -103,7 +93,6 @@ def summarize_text(text: str):
103
  summarizer = get_summarizer(lang)
104
  sentiment_model = get_sentiment_analyzer(lang)
105
 
106
- # Оптимизация по длине
107
  words = len(text.split())
108
  if words < 80:
109
  max_len, min_len = 70, 20
@@ -112,10 +101,8 @@ def summarize_text(text: str):
112
  else:
113
  max_len, min_len = 220, 60
114
 
115
- # Суммаризация
116
  summary = summarizer(text, max_length=max_len, min_length=min_len, do_sample=False)[0]["summary_text"]
117
 
118
- # Анализ настроения
119
  sentiment = sentiment_model(summary)[0]["label"].lower()
120
  if "5" in sentiment or "pos" in sentiment:
121
  sentiment = "😊 Позитивті / Позитивное"
@@ -124,25 +111,27 @@ def summarize_text(text: str):
124
  else:
125
  sentiment = "😐 Бейтарап / Нейтральное"
126
 
127
- # SEO генерация
128
  topic = detect_topic(text)
129
  keywords = extract_keywords(text)
130
  title = summary.split(".")[0][:80].strip()
131
  meta_description = summary[:160].strip()
132
  slug = generate_slug(title)
 
133
 
134
- # SEO оценка
135
  score = 0
136
  score += 1 if len(keywords.split(",")) >= 5 else 0
137
  score += 1 if len(meta_description) >= 100 else 0
138
  score += 1 if len(title) > 20 else 0
139
  seo_status = "✅ Оптимально для публикации" if score >= 2 else "⚠️ Недостаточно данных для SEO"
140
 
141
- date_now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M")
 
 
 
 
142
 
143
- # Форматированный Markdown
144
- output = f"# 🧠 Eroha Summarizer PRO++++ v2.3 SEO Edition\n"
145
- output += f"## 🌍 Language: {'Қазақ (Kazakh)' if lang == 'kk' else 'Русский' if lang == 'ru' else 'English'}\n"
146
  output += f"### 📅 Date: {date_now}\n"
147
  output += f"### 📌 Topic: {topic}\n"
148
  output += f"### 💬 Sentiment: {sentiment}\n\n"
@@ -156,37 +145,45 @@ def summarize_text(text: str):
156
  output += f"**🔗 Slug:** `{slug}`\n\n"
157
  output += f"**📊 SEO Score:** {seo_status}\n\n"
158
  output += "---\n\n"
159
- output += f"🔖 **Tags:** #Eroha #AI #SEO #Press #Kazakhstan #News\n"
 
 
 
 
 
160
 
161
- return output
162
 
163
- # =============== API & UI ===============
164
 
165
- app = FastAPI(title="Eroha Summarizer PRO++++ v2.3 SEO Edition")
166
  app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
167
 
168
  @app.post("/api/summarize")
169
  async def summarize_api(data: dict):
170
  text = data.get("text", "")
171
- return {"summary": summarize_text(text)}
 
172
 
173
- # Gradio UI
174
- with gr.Blocks(title="Eroha Summarizer PRO++++ v2.3 SEO Edition") as iface:
175
- gr.Markdown("# 🧠 Eroha Summarizer PRO++++ v2.3 SEO Edition (Kazakh Supported)")
176
- gr.Markdown("AI-инструмент для суммаризации, анализа, SEO и автогенерации метаданных (с поддержкой казахского 🇰🇿)")
177
 
178
- with gr.Row():
179
- input_box = gr.Textbox(lines=8, label="Введите текст / Мәтінді енгізіңіз")
180
- with gr.Row():
181
- summarize_btn = gr.Button("🚀 Анализ и SEO-суммаризация")
182
- clear_btn = gr.Button("🧹 Очистить")
183
 
184
  output_box = gr.Markdown(label="Результат / Result")
185
 
186
  def process_input(text):
187
- return summarize_text(text)
 
188
 
189
- summarize_btn.click(process_input, inputs=input_box, outputs=output_box)
190
  clear_btn.click(lambda: "", None, input_box)
 
191
 
192
  iface.launch(server_name="0.0.0.0", server_port=7860)
 
6
  import re
7
  import datetime
8
  import hashlib
9
+ import io
10
 
11
+ # ================== Утилиты ==================
 
 
 
 
12
 
13
  def clean_text(text: str):
 
14
  text = text.replace("\n", " ").replace("\r", " ")
15
  text = re.sub(r"\s+", " ", text)
16
  text = re.sub(r"[^\w\s.,!?%\-–:;()\"'’«»]", "", text)
17
  return text.strip()
18
 
19
  def detect_language(text: str):
 
20
  try:
21
  lang = detect(text)
22
  except:
 
25
  kazakh_letters = "қңәөүһіұ"
26
  if any(ch in text.lower() for ch in kazakh_letters):
27
  lang = "kk"
 
28
  return lang
29
 
30
  def generate_slug(title: str):
 
31
  slug = re.sub(r"[^a-zA-Zа-яА-Я0-9]+", "-", title.lower()).strip("-")
32
  slug_hash = hashlib.md5(title.encode()).hexdigest()[:6]
33
  return f"/news/{slug}-{slug_hash}"
34
 
35
+ # ================== Модели ==================
36
+
37
+ summarizers = {}
38
+ analyzers = {}
39
 
40
  def get_summarizer(lang: str):
 
41
  if lang == "ru":
42
  model_name = "IlyaGusev/mbart_ru_sum_gazeta"
43
  elif lang == "kk":
44
  model_name = "facebook/mbart-large-50-many-to-many-mmt"
45
  else:
46
  model_name = "facebook/bart-large-cnn"
 
47
  if model_name not in summarizers:
48
  summarizers[model_name] = pipeline("summarization", model=model_name)
49
  return summarizers[model_name]
50
 
51
  def get_sentiment_analyzer(lang: str):
 
52
  if lang in ["ru", "kk"]:
53
  model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
54
  else:
 
57
  analyzers[model_name] = pipeline("sentiment-analysis", model=model_name)
58
  return analyzers[model_name]
59
 
60
+ # ================== Контент ==================
61
 
62
  def extract_keywords(text: str, top_n: int = 7):
 
63
  words = re.findall(r"\b\w{5,}\b", text.lower())
64
  freq = {}
65
  for w in words:
 
68
  return ", ".join(keywords)
69
 
70
  def detect_topic(text: str):
 
71
  topics = {
72
+ "Экономика": ["рынок", "компания", "инвестиция", "қаржы", "сату"],
73
  "Технологии": ["ai", "робот", "интернет", "жасанды интеллект"],
74
  "Саясат": ["үкімет", "закон", "президент", "выборы"],
75
  "Ғылым": ["зерттеу", "ғалым", "эксперимент"],
 
81
  return topic
82
  return "Жалпы тақырып / Общая тема"
83
 
84
+ # ================== Основная логика ==================
85
 
86
  def summarize_text(text: str):
 
87
  if not text.strip():
88
+ return "⚠️ Введите текст для анализа.", None
89
 
90
  text = clean_text(text)
91
  lang = detect_language(text)
 
93
  summarizer = get_summarizer(lang)
94
  sentiment_model = get_sentiment_analyzer(lang)
95
 
 
96
  words = len(text.split())
97
  if words < 80:
98
  max_len, min_len = 70, 20
 
101
  else:
102
  max_len, min_len = 220, 60
103
 
 
104
  summary = summarizer(text, max_length=max_len, min_length=min_len, do_sample=False)[0]["summary_text"]
105
 
 
106
  sentiment = sentiment_model(summary)[0]["label"].lower()
107
  if "5" in sentiment or "pos" in sentiment:
108
  sentiment = "😊 Позитивті / Позитивное"
 
111
  else:
112
  sentiment = "😐 Бейтарап / Нейтральное"
113
 
 
114
  topic = detect_topic(text)
115
  keywords = extract_keywords(text)
116
  title = summary.split(".")[0][:80].strip()
117
  meta_description = summary[:160].strip()
118
  slug = generate_slug(title)
119
+ date_now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M")
120
 
 
121
  score = 0
122
  score += 1 if len(keywords.split(",")) >= 5 else 0
123
  score += 1 if len(meta_description) >= 100 else 0
124
  score += 1 if len(title) > 20 else 0
125
  seo_status = "✅ Оптимально для публикации" if score >= 2 else "⚠️ Недостаточно данных для SEO"
126
 
127
+ lang_name = {
128
+ "kk": "Қазақ (Kazakh)",
129
+ "ru": "Русский (Russian)",
130
+ "en": "Ағылшын (English)"
131
+ }.get(lang, "Multilingual")
132
 
133
+ output = f"# 🧠 Eroha Summarizer PRO++++ v2.4 Publisher Edition\n"
134
+ output += f"## 🌍 Language: {lang_name}\n"
 
135
  output += f"### 📅 Date: {date_now}\n"
136
  output += f"### 📌 Topic: {topic}\n"
137
  output += f"### 💬 Sentiment: {sentiment}\n\n"
 
145
  output += f"**🔗 Slug:** `{slug}`\n\n"
146
  output += f"**📊 SEO Score:** {seo_status}\n\n"
147
  output += "---\n\n"
148
+ output += f"🔖 **Tags:** #Eroha #AI #SEO #Publisher #Kazakhstan #Press #News\n"
149
+
150
+ # Создание Markdown-файла
151
+ filename = f"Eroha_Summary_{datetime.datetime.now().strftime('%Y-%m-%d_%H-%M')}.md"
152
+ md_bytes = io.BytesIO(output.encode('utf-8'))
153
+ md_bytes.name = filename
154
 
155
+ return output, md_bytes
156
 
157
+ # ================== API и UI ==================
158
 
159
+ app = FastAPI(title="Eroha Summarizer PRO++++ v2.4 Publisher Edition")
160
  app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
161
 
162
  @app.post("/api/summarize")
163
  async def summarize_api(data: dict):
164
  text = data.get("text", "")
165
+ summary, _ = summarize_text(text)
166
+ return {"summary": summary}
167
 
168
+ # Gradio интерфейс
169
+ with gr.Blocks(title="Eroha Summarizer PRO++++ v2.4 Publisher Edition") as iface:
170
+ gr.Markdown("# 🧠 Eroha Summarizer PRO++++ v2.4 Publisher Edition")
171
+ gr.Markdown("AI-инструмент для суммаризации, анализа, SEO и экспорта Markdown (с поддержкой казахского 🇰🇿)")
172
 
173
+ input_box = gr.Textbox(lines=8, label="Введите текст / Мәтінді енгізіңіз")
174
+ summarize_btn = gr.Button("🚀 Анализ и SEO-суммаризация")
175
+ clear_btn = gr.Button("🧹 Очистить")
176
+ copy_btn = gr.Button("📋 Копировать результат")
177
+ download_btn = gr.File(label="💾 Скачать результат в Markdown")
178
 
179
  output_box = gr.Markdown(label="Результат / Result")
180
 
181
  def process_input(text):
182
+ summary, md_file = summarize_text(text)
183
+ return summary, md_file
184
 
185
+ summarize_btn.click(process_input, inputs=input_box, outputs=[output_box, download_btn])
186
  clear_btn.click(lambda: "", None, input_box)
187
+ copy_btn.click(lambda t: t, input_box, input_box)
188
 
189
  iface.launch(server_name="0.0.0.0", server_port=7860)