Spaces:

Yermek68
/

eroha-agentapi

Running

App Files Files Community

Yermek68 commited on Dec 9, 2025

Commit

b3e6415

verified ·

1 Parent(s): ee49bfe

Update app.py

Browse files

Files changed (1) hide show

app.py +153 -161

app.py CHANGED Viewed

@@ -3,198 +3,190 @@ from transformers import pipeline
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from langdetect import detect
-from functools import lru_cache
 import re
-# ======================================================
-# 🚀 Eroha Summarizer PRO++++ v2.1.1 Stable (by Yermek68)
-# ======================================================
-# --- Кэш моделей ---
-@lru_cache(maxsize=10)
-def get_summarizer(lang: str, long: bool = False):
-    if lang == "ru":
-        model = "IlyaGusev/mbart_ru_sum_gazeta"
-    elif lang == "de":
-        model = "ml6team/mbart-large-cc25-cnn-distilled-german"
-    elif lang == "es":
-        model = "mrm8488/bert2bert_shared-spanish-finetuned-summarization"
-    elif lang == "fr":
-        model = "mrm8488/mbart-large-finetuned-opus-fr-en"
-    else:
-        model = "facebook/bart-large-cnn" if not long else "pszemraj/led-large-book-summary"
-    return pipeline("summarization", model=model)
-@lru_cache(maxsize=10)
-def get_sentiment_analyzer(lang: str):
-    if lang == "ru":
-        model = "cointegrated/rubert-tiny2-emo"
-    else:
-        model = "j-hartmann/emotion-english-distilroberta-base"
-    return pipeline("text-classification", model=model, top_k=None)
-# --- Вспомогательные функции ---
-def clean_text(text: str) -> str:
-    text = re.sub(r"[^\x00-\x7Fа-яА-ЯёЁ.,!?;:\-–—«»\"'()\[\] ]", "", text)
-    text = text.replace("▁", " ").replace("<n>", "\n").replace("<s>", "").replace("</s>", "")
-    text = text.replace("Ġ", " ").replace("Â", "").replace("", "").replace("�", "").strip()
-    return re.sub(" +", " ", text)
-def detect_topic(text: str):
-    topics = {
-        "Политика": ["правительство", "закон", "президент", "выборы"],
-        "Экономика": ["компания", "рынок", "инвестиции", "бизнес"],
-        "Технологии": ["AI", "робот", "интернет", "технологии"],
-        "Спорт": ["команда", "матч", "игра"],
-        "Наука": ["исследование", "данные", "учёные"],
-    }
-    t = text.lower()
-    for topic, keys in topics.items():
-        if any(k in t for k in keys):
-            return topic
-    return "Общее / неопределённое направление"
-def detect_genre(text: str):
-    t = text.lower()
-    if any(w in t for w in ["заявил", "сообщил", "вчера", "компания", "год"]):
-        return "📰 Новость"
-    if any(w in t for w in ["исследование", "данные", "анализ", "эксперимент"]):
-        return "📊 Аналитика"
-    if any(w in t for w in ["купил", "доволен", "рекомендую", "не советую"]):
-        return "🗣️ Отзыв"
-    if any(w in t for w in ["коммерческий", "продукт", "цена", "скидка"]):
-        return "📢 Реклама"
-    return "📄 Текст общего типа"
-# =====================================================
-# 🧩 Основная функция
-# =====================================================
-def summarize_text(text: str):
-    if not text.strip():
-        return "❌ Введите текст для анализа."
     try:
         lang = detect(text)
     except:
         lang = "en"
-    text = clean_text(text)
-    words = len(text.split())
-    long_doc = words > 800
-    summarizer = get_summarizer(lang, long_doc)
-    sentiment_model = get_sentiment_analyzer(lang)
-    if words < 50:
-        summary = text
-    else:
-        max_len, min_len = (250, 60) if words > 300 else (120, 40)
-        summary_raw = summarizer(text, max_length=max_len, min_length=min_len, do_sample=False)[0]["summary_text"]
-        summary = clean_text(summary_raw)
-    # Анализ эмоций
-    emotions = sentiment_model(summary)
-    emo_label = emotions[0]["label"]
-    emo_score = emotions[0].get("score", 0)
-    emo_map = {
-        "joy": "😊 Радость",
-        "sadness": "😢 Грусть",
-        "anger": "😠 Гнев",
-        "fear": "😨 Тревога",
-        "neutral": "😐 Нейтральное",
-        "surprise": "😲 Удивление",
-        "disgust": "🤢 Отвращение"
-    }
-    emotion = emo_map.get(emo_label.lower(), "😐 Нейтральное")
-    topic = detect_topic(text)
-    genre = detect_genre(text)
-    color = "green" if "Радость" in emotion else "red" if "Грусть" in emotion or "Гнев" in emotion else "orange"
-    formatted_output = f"""
-# 🧠 <span style='color:#0073e6'>Eroha Summarizer PRO++++ v2.1.1 Stable</span>
-_(Автоязык: {'Русский' if lang == 'ru' else 'Английский'})_
----
-### 📌 Тема: <b>{topic}</b>
-### 🗂️ Жанр: {genre}
-### 💬 Настроение: <span style='color:{color}'>{emotion}</span> ({emo_score:.2f})
----
-## 📘 Резюме:
-{summary}
----
-### ✨ TL;DR:
-{summary[:200]}{'...' if len(summary) > 200 else ''}
----
-*Eroha Intelligence Suite — Multilingual AI summarizer powered by Hugging Face*
-"""
-    return formatted_output.strip()
-# =====================================================
-# 🌐 FastAPI backend
-# =====================================================
-app = FastAPI(title="Eroha Summarizer PRO++++ v2.1.1", version="2.1.1")
 app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
-@app.post("/api/full")
-def api_full(data: dict):
     text = data.get("text", "")
-    summary = summarize_text(text)
-    return {"summary": summary}
-@app.post("/api/lite")
-def api_lite(data: dict):
-    text = data.get("text", "")
-    result = summarize_text(text)
-    clean_result = re.sub(r"<[^>]+>", "", result)
-    return {"tldr": clean_result[:300]}
-# =====================================================
-# 🎨 Gradio интерфейс
-# =====================================================
-def gradio_summary(text):
-    return summarize_text(text)
-with gr.Blocks(title="Eroha Summarizer PRO++++ v2.1.1") as iface:
-    gr.Markdown("## 🧠 Eroha Summarizer PRO++++ v2.1.1 Stable\nAI-инструмент нового поколения для анализа, темы, эмоций и автоопределения языка (рус/англ/нем/исп/фр).")
-    text_input = gr.Textbox(lines=10, label="Введите текст для анализа и суммаризации")
-    result_output = gr.Markdown(label="Результат")
     with gr.Row():
-        copy_btn = gr.Button("📋 Копировать")
-        download_btn = gr.Button("💾 Скачать результат")
-    text_input.submit(gradio_summary, inputs=text_input, outputs=result_output)
-    copy_btn.click(lambda x: x, inputs=result_output, outputs=None)
-    download_btn.click(lambda x: gr.File.update(value=x.encode("utf-8"), visible=True), inputs=result_output, outputs=None)
-    gr.Markdown("---\n✨ _Eroha Intelligence Suite © 2025 — by Yermek68_")
-if __name__ == "__main__":
-    import os
-    try:
-        # Очередь безопасно активируем без параметров
-        iface.queue()  # поддерживается всеми версиями Gradio >=5.0
-        # Основной запуск
-        iface.launch(
-            server_name="0.0.0.0",
-            server_port=int(os.getenv("PORT", 7860)),
-            share=False,          # безопасно для Hugging Face
-            ssr_mode=False,       # предотвращает повторные рестарты
-            debug=False,          # чистый лог без шума
-        )
-    except Exception as e:
-        print(f"⚠️ Runtime restart or environment reload detected: {e}")

 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from langdetect import detect
 import re
+import datetime
+import hashlib
+# Кэш моделей
+summarizers = {}
+analyzers = {}
+# =============== УТИЛИТЫ ===============
+def clean_text(text: str):
+    """Очистка текста от мусора и нечитабельных символов"""
+    text = text.replace("\n", " ").replace("\r", " ")
+    text = re.sub(r"\s+", " ", text)
+    text = re.sub(r"[^\w\s.,!?%\-–:;()\"'’«»]", "", text)
+    return text.strip()
+def detect_language(text: str):
+    """Определение языка (включая казахский 🇰🇿)"""
     try:
         lang = detect(text)
     except:
         lang = "en"
+    kazakh_letters = "қңәөүһіұ"
+    if any(ch in text.lower() for ch in kazakh_letters):
+        lang = "kk"
+    return lang
+def generate_slug(title: str):
+    """Генерация SEO-дружественной ссылки"""
+    slug = re.sub(r"[^a-zA-Zа-яА-Я0-9]+", "-", title.lower()).strip("-")
+    slug_hash = hashlib.md5(title.encode()).hexdigest()[:6]
+    return f"/news/{slug}-{slug_hash}"
+# =============== МОДЕЛИ ===============
+def get_summarizer(lang: str):
+    """Выбор модели суммаризации по языку"""
+    if lang == "ru":
+        model_name = "IlyaGusev/mbart_ru_sum_gazeta"
+    elif lang == "kk":
+        model_name = "facebook/mbart-large-50-many-to-many-mmt"
+    else:
+        model_name = "facebook/bart-large-cnn"
+    if model_name not in summarizers:
+        summarizers[model_name] = pipeline("summarization", model=model_name)
+    return summarizers[model_name]
+def get_sentiment_analyzer(lang: str):
+    """Выбор модели анализа настроения"""
+    if lang in ["ru", "kk"]:
+        model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
+    else:
+        model_name = "cardiffnlp/twitter-roberta-base-sentiment"
+    if model_name not in analyzers:
+        analyzers[model_name] = pipeline("sentiment-analysis", model=model_name)
+    return analyzers[model_name]
+# =============== КОНТЕНТ ===============
+def extract_keywords(text: str, top_n: int = 7):
+    """Грубое извлечение ключевых слов (простая эвристика)"""
+    words = re.findall(r"\b\w{5,}\b", text.lower())
+    freq = {}
+    for w in words:
+        freq[w] = freq.get(w, 0) + 1
+    keywords = sorted(freq, key=freq.get, reverse=True)[:top_n]
+    return ", ".join(keywords)
+def detect_topic(text: str):
+    """Эвристика для определения темы"""
+    topics = {
+        "Экономика": ["рынок", "компания", "акция", "инвестиция", "сату", "қаржы"],
+        "Технологии": ["ai", "робот", "интернет", "жасанды интеллект"],
+        "Саясат": ["үкімет", "закон", "президент", "выборы"],
+        "Ғылым": ["зерттеу", "ғалым", "эксперимент"],
+        "Спорт": ["матч", "команда", "спорт"]
+    }
+    text_lower = text.lower()
+    for topic, words in topics.items():
+        if any(w in text_lower for w in words):
+            return topic
+    return "Жалпы тақырып / Общая тема"
+# =============== ОСНОВНАЯ ЛОГИКА ===============
+def summarize_text(text: str):
+    """Основная функция суммаризации + SEO"""
+    if not text.strip():
+        return "⚠️ Введите текст для анализа."
+    text = clean_text(text)
+    lang = detect_language(text)
+    summarizer = get_summarizer(lang)
+    sentiment_model = get_sentiment_analyzer(lang)
+    # Оптимизация по длине
+    words = len(text.split())
+    if words < 80:
+        max_len, min_len = 70, 20
+    elif words < 300:
+        max_len, min_len = 140, 40
+    else:
+        max_len, min_len = 220, 60
+    # Суммаризация
+    summary = summarizer(text, max_length=max_len, min_length=min_len, do_sample=False)[0]["summary_text"]
+    # Анализ настроения
+    sentiment = sentiment_model(summary)[0]["label"].lower()
+    if "5" in sentiment or "pos" in sentiment:
+        sentiment = "😊 Позитивті / Позитивное"
+    elif "1" in sentiment or "neg" in sentiment:
+        sentiment = "😞 Теріс / Негативное"
+    else:
+        sentiment = "😐 Бейтарап / Нейтральное"
+    # SEO генерация
+    topic = detect_topic(text)
+    keywords = extract_keywords(text)
+    title = summary.split(".")[0][:80].strip()
+    meta_description = summary[:160].strip()
+    slug = generate_slug(title)
+    # SEO оценка
+    score = 0
+    score += 1 if len(keywords.split(",")) >= 5 else 0
+    score += 1 if len(meta_description) >= 100 else 0
+    score += 1 if len(title) > 20 else 0
+    seo_status = "✅ Оптимально для публикации" if score >= 2 else "⚠️ Недостаточно данных для SEO"
+    date_now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M")
+    # Форматированный Markdown
+    output = f"# 🧠 Eroha Summarizer PRO++++ v2.3 SEO Edition\n"
+    output += f"## 🌍 Language: {'Қазақ (Kazakh)' if lang == 'kk' else 'Русский' if lang == 'ru' else 'English'}\n"
+    output += f"### 📅 Date: {date_now}\n"
+    output += f"### 📌 Topic: {topic}\n"
+    output += f"### 💬 Sentiment: {sentiment}\n\n"
+    output += "---\n\n"
+    output += f"📄 **Summary:**\n{summary}\n\n"
+    output += "---\n\n"
+    output += f"## 🧭 SEO Optimization\n"
+    output += f"**📰 Title:** {title}\n\n"
+    output += f"**🔑 Keywords:** {keywords}\n\n"
+    output += f"**📄 Meta Description:** {meta_description}\n\n"
+    output += f"**🔗 Slug:** `{slug}`\n\n"
+    output += f"**📊 SEO Score:** {seo_status}\n\n"
+    output += "---\n\n"
+    output += f"🔖 **Tags:** #Eroha #AI #SEO #Press #Kazakhstan #News\n"
+    return output
+# =============== API & UI ===============
+app = FastAPI(title="Eroha Summarizer PRO++++ v2.3 SEO Edition")
 app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
+@app.post("/api/summarize")
+async def summarize_api(data: dict):
     text = data.get("text", "")
+    return {"summary": summarize_text(text)}
+# Gradio UI
+with gr.Blocks(title="Eroha Summarizer PRO++++ v2.3 SEO Edition") as iface:
+    gr.Markdown("# 🧠 Eroha Summarizer PRO++++ v2.3 SEO Edition (Kazakh Supported)")
+    gr.Markdown("AI-инструмент для суммаризации, анализа, SEO и автогенерации метаданных (с поддержкой казахского 🇰🇿)")
     with gr.Row():
+        input_box = gr.Textbox(lines=8, label="Введите текст / Мәтінді енгізіңіз")
+    with gr.Row():
+        summarize_btn = gr.Button("🚀 Анализ и SEO-суммаризация")
+        clear_btn = gr.Button("🧹 Очистить")
+    output_box = gr.Markdown(label="Результат / Result")
+    def process_input(text):
+        return summarize_text(text)
+    summarize_btn.click(process_input, inputs=input_box, outputs=output_box)
+    clear_btn.click(lambda: "", None, input_box)
+iface.launch(server_name="0.0.0.0", server_port=7860)