Cascade-Edge

Sleeping

App Files Files Community

VSPAN commited on Nov 21, 2025

Commit

b2496fc

verified ·

1 Parent(s): 2eb8293

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -45

app.py CHANGED Viewed

@@ -9,54 +9,48 @@ import re
 from pydub import AudioSegment
 from transformers import pipeline
-# --- НАСТРОЙКИ ФЭНТЕЗИ ---
 VOICE_CONFIG = {
-    "narrator": {"voice": "ru-RU-DmitryNeural", "pitch": "-7Hz", "rate": "-5%"}, # Эпичный бас
-    "male":     {"voice": "ru-RU-DenisNeural",  "pitch": "-2Hz", "rate": "+0%"}, # Обычный
-    "female":   {"voice": "ru-RU-SvetlanaNeural","pitch": "+5Hz", "rate": "+5%"}  # Нежный
 }
 TEMP_DIR = tempfile.gettempdir()
 # --- ЗАГРУЗКА МАЛЕНЬКОЙ НЕЙРОСЕТИ ---
-# Используем Qwen 2.5 0.5B Instruct. Она весит копейки и работает мгновенно.
 MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct"
-print(f"🚀 Загрузка легкой модели {MODEL_ID}...")
 try:
-    # Создаем пайплайн для генерации текста
     pipe = pipeline(
         "text-generation",
         model=MODEL_ID,
-        device_map="auto", # Автоматически использует CPU или GPU
         max_new_tokens=2048,
         trust_remote_code=True
     )
-    print("✅ Модель готова к работе!")
 except Exception as e:
     print(f"❌ Ошибка загрузки модели: {e}")
     pipe = None
 def analyze_text_with_tiny_ai(text):
-    """
-    Использует маленькую модель для разбора текста.
-    """
     if not pipe:
         return [{"text": text, "role": "narrator"}]
-    # Простой промпт для маленькой модели.
-    # Маленькие модели любят конкретику.
     system_prompt = (
-        "Ты редактор. Твоя задача - определить, кто говорит фразу.\n"
-        "Варианты ролей: narrator (автор), male (мужчина), female (женщина).\n"
-        "Ответь СТРОГО в формате JSON списка."
     )
-    user_prompt = f"""Разбей этот текст на роли:
     "{text}"
-    Пример ответа:
     [{{"text": "- Привет", "role": "male"}}, {{"text": "- сказала она", "role": "narrator"}}]
     """
@@ -69,15 +63,13 @@ def analyze_text_with_tiny_ai(text):
         outputs = pipe(messages)
         result_text = outputs[0]["generated_text"][-1]["content"]
-        # Очистка ответа (маленькие модели могут добавить лишний текст)
         json_match = re.search(r'\[.*\]', result_text, re.DOTALL)
         if json_match:
             json_str = json_match.group(0)
-            data = json.loads(json_str)
-            return data
         else:
-            # Если JSON не найден, пробуем распарсить грубо или возвращаем ошибку
-            print(f"⚠️ Модель ответила не JSON: {result_text}")
             return [{"text": text, "role": "narrator"}]
     except Exception as e:
@@ -90,44 +82,46 @@ async def generate_segment(text, role):
     if not text.strip(): return None
     conf = VOICE_CONFIG.get(role, VOICE_CONFIG["narrator"])
     path = os.path.join(TEMP_DIR, f"{uuid.uuid4().hex}.mp3")
     try:
         comm = edge_tts.Communicate(text, conf["voice"], rate=conf["rate"], pitch=conf["pitch"])
         await comm.save(path)
         return path
-    except: return None
 async def process_book(text):
-    if not text.strip(): raise gr.Warning("Пустой текст!")
-    print("⚡ AI анализ (Lite)...")
     segments = analyze_text_with_tiny_ai(text)
-    print(f"Результат анализа: {len(segments)} кусков.")
     full_audio = AudioSegment.empty()
     temp_files = []
     progress = gr.Progress()
     for item in progress.tqdm(segments, desc="Озвучка"):
-        # Если модель вернула просто строку вместо словаря (бывает у маленьких моделей)
-        if isinstance(item, str):
-            txt, role = item, "narrator"
-        else:
             txt = item.get("text", "")
             role = item.get("role", "narrator")
         path = await generate_segment(txt, role)
         if path:
             temp_files.append(path)
             seg = AudioSegment.from_mp3(path)
-            # Мягкая склейка (Crossfade 50ms)
             if len(full_audio) > 0:
                 full_audio = full_audio.append(seg, crossfade=50)
             else:
                 full_audio = seg
             await asyncio.sleep(0.1)
-    out_path = os.path.join(TEMP_DIR, f"lite_fantasy_{uuid.uuid4().hex}.mp3")
     full_audio.export(out_path, format="mp3")
     for f in temp_files:
@@ -137,16 +131,27 @@ async def process_book(text):
     return out_path, segments
 # --- ИНТЕРФЕЙС ---
-css = "body {background-color: #1e1e2e; color: #cdd6f4;} .gradio-container {font-family: 'Verdana', sans-serif;}"
-theme = gr.themes.Soft(primary_hue="indigo")
-with gr.Blocks(theme=theme, css=css, title="Fantasy TTS Lite") as demo:
-    gr.Markdown("# ⚡ Fantasy TTS: Lite Edition (Qwen 0.5B)")
-    gr.Markdown("Использует сверхлегкую нейросеть для скорости. Работает на слабом железе.")
-    with gr.Row():
-        inp = gr.Textbox(label="Текст", lines=8, value='— Стой! — крикнул он.\nОна обернулась: — Зачем?')
-        btn = gr.Button("🚀 Озвучить", variant="primary")
     with gr.Row():
-        out_audio = gr.

 from pydub import AudioSegment
 from transformers import pipeline
+# --- НАСТРОЙКИ ГОЛОСОВ (ФЭНТЕЗИ) ---
 VOICE_CONFIG = {
+    "narrator": {"voice": "ru-RU-DmitryNeural", "pitch": "-7Hz", "rate": "-5%"},
+    "male":     {"voice": "ru-RU-DenisNeural",  "pitch": "-2Hz", "rate": "+0%"},
+    "female":   {"voice": "ru-RU-SvetlanaNeural","pitch": "+5Hz", "rate": "+5%"}
 }
 TEMP_DIR = tempfile.gettempdir()
 # --- ЗАГРУЗКА МАЛЕНЬКОЙ НЕЙРОСЕТИ ---
+# Qwen 2.5 0.5B Instruct - очень легкая, но умная
 MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct"
+print(f"🚀 Загрузка модели {MODEL_ID}...")
 try:
     pipe = pipeline(
         "text-generation",
         model=MODEL_ID,
+        device_map="auto",
         max_new_tokens=2048,
         trust_remote_code=True
     )
+    print("✅ Модель готова!")
 except Exception as e:
     print(f"❌ Ошибка загрузки модели: {e}")
     pipe = None
 def analyze_text_with_tiny_ai(text):
+    """Анализ текста легкой нейросетью."""
     if not pipe:
         return [{"text": text, "role": "narrator"}]
     system_prompt = (
+        "Ты редактор. Твоя задача - определить роль для озвучки.\n"
+        "Роли: narrator (автор), male (мужчина), female (женщина).\n"
+        "Верни ТОЛЬКО JSON список."
     )
+    user_prompt = f"""Разбей текст на роли:
     "{text}"
+    Пример JSON ответа:
     [{{"text": "- Привет", "role": "male"}}, {{"text": "- сказала она", "role": "narrator"}}]
     """
         outputs = pipe(messages)
         result_text = outputs[0]["generated_text"][-1]["content"]
+        # Поиск JSON в ответе
         json_match = re.search(r'\[.*\]', result_text, re.DOTALL)
         if json_match:
             json_str = json_match.group(0)
+            return json.loads(json_str)
         else:
+            print(f"⚠️ Не JSON: {result_text}")
             return [{"text": text, "role": "narrator"}]
     except Exception as e:
     if not text.strip(): return None
     conf = VOICE_CONFIG.get(role, VOICE_CONFIG["narrator"])
     path = os.path.join(TEMP_DIR, f"{uuid.uuid4().hex}.mp3")
     try:
         comm = edge_tts.Communicate(text, conf["voice"], rate=conf["rate"], pitch=conf["pitch"])
         await comm.save(path)
         return path
+    except:
+        return None
 async def process_book(text):
+    if not text.strip(): raise gr.Warning("Введите текст!")
+    print("⚡ Анализ текста...")
     segments = analyze_text_with_tiny_ai(text)
     full_audio = AudioSegment.empty()
     temp_files = []
     progress = gr.Progress()
     for item in progress.tqdm(segments, desc="Озвучка"):
+        # Защита от некорректного формата
+        if isinstance(item, dict):
             txt = item.get("text", "")
             role = item.get("role", "narrator")
+        else:
+            txt = str(item)
+            role = "narrator"
         path = await generate_segment(txt, role)
         if path:
             temp_files.append(path)
             seg = AudioSegment.from_mp3(path)
+            # Плавная склейка (50ms)
             if len(full_audio) > 0:
                 full_audio = full_audio.append(seg, crossfade=50)
             else:
                 full_audio = seg
             await asyncio.sleep(0.1)
+    out_path = os.path.join(TEMP_DIR, f"fantasy_{uuid.uuid4().hex}.mp3")
     full_audio.export(out_path, format="mp3")
     for f in temp_files:
     return out_path, segments
 # --- ИНТЕРФЕЙС ---
+css = """
+body {background-color: #111827; color: #e5e7eb;}
+.container {max-width: 900px; margin: auto;}
+"""
+theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="slate")
+with gr.Blocks(theme=theme, css=css, title="Fantasy Lite TTS") as demo:
+    gr.Markdown("# ⚡ Fantasy Lite TTS (Qwen 0.5B)")
     with gr.Row():
+        with gr.Column(scale=2):
+            inp = gr.Textbox(label="Текст", lines=10, placeholder="Вставьте текст...", value='— Кто здесь? — спросил рыцарь.\nВедьма усмехнулась: — Твоя судьба.')
+            btn = gr.Button("🚀 Создать", variant="primary")
+        with gr.Column(scale=1):
+            out_audio = gr.Audio(label="Результат", type="filepath")
+            out_debug = gr.JSON(label="Лог нейросети")
+    btn.click(process_book, inputs=inp, outputs=[out_audio, out_debug])
+if __name__ == "__main__":
+    demo.queue().launch()