Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,15 +7,13 @@ import uuid
|
|
| 7 |
import re
|
| 8 |
import shutil
|
| 9 |
from pydub import AudioSegment
|
| 10 |
-
from transformers import pipeline
|
| 11 |
|
| 12 |
-
# --- ПРОВЕРКА
|
| 13 |
-
# Проверяем, установлен ли ffmpeg (нужен для работы pydub)
|
| 14 |
if not shutil.which("ffmpeg"):
|
| 15 |
-
print("⚠️
|
| 16 |
|
| 17 |
# --- НАСТРОЙКИ ГОЛОСОВ ---
|
| 18 |
-
|
| 19 |
"narrator": {"voice": "ru-RU-DmitryNeural", "pitch": "-7Hz", "rate": "-5%"},
|
| 20 |
"male": {"voice": "ru-RU-DenisNeural", "pitch": "-2Hz", "rate": "+0%"},
|
| 21 |
"female": {"voice": "ru-RU-SvetlanaNeural","pitch": "+5Hz", "rate": "+5%"}
|
|
@@ -23,220 +21,133 @@ VOICE_PRESETS = {
|
|
| 23 |
|
| 24 |
TEMP_DIR = tempfile.gettempdir()
|
| 25 |
|
| 26 |
-
# ---
|
| 27 |
-
MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct"
|
| 28 |
-
try:
|
| 29 |
-
print(f"🚀 Загрузка модели {MODEL_ID}...")
|
| 30 |
-
pipe = pipeline(
|
| 31 |
-
"text-generation",
|
| 32 |
-
model=MODEL_ID,
|
| 33 |
-
device_map="auto",
|
| 34 |
-
max_new_tokens=20, # Ограничиваем токены для скорости
|
| 35 |
-
trust_remote_code=True
|
| 36 |
-
)
|
| 37 |
-
print("✅ AI готов!")
|
| 38 |
-
except Exception as e:
|
| 39 |
-
print(f"❌ Ошибка загрузки AI: {e}")
|
| 40 |
-
pipe = None
|
| 41 |
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
|
|
|
| 47 |
|
| 48 |
-
#
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
# Жесткий промпт для маленькой модели
|
| 53 |
-
prompt = [
|
| 54 |
-
{"role": "system", "content": "You are a classifier. Determine gender of the speaker in the Russian text. Options: male, female. Answer with single word."},
|
| 55 |
-
{"role": "user", "content": f"Text: '{text}'\nGender:"}
|
| 56 |
]
|
| 57 |
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
return "male" # Если диалог, но не понятно - пусть будет мужчина (чаще всего)
|
| 64 |
-
except:
|
| 65 |
-
return "narrator"
|
| 66 |
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
segments = []
|
|
|
|
| 71 |
|
| 72 |
for p in paragraphs:
|
| 73 |
p = p.strip()
|
| 74 |
if not p: continue
|
| 75 |
|
| 76 |
-
#
|
| 77 |
-
|
| 78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
|
| 80 |
return segments
|
| 81 |
|
| 82 |
-
# --- ГЕНЕРАЦИЯ
|
| 83 |
|
| 84 |
-
async def
|
| 85 |
-
"""Генерирует один кусок и проверяет, не пустой ли он."""
|
| 86 |
if not text.strip(): return None
|
| 87 |
|
| 88 |
-
|
| 89 |
-
path = os.path.join(TEMP_DIR,
|
| 90 |
-
|
| 91 |
-
rate_str = f"{rate:+d}%" if isinstance(rate, int) else rate
|
| 92 |
-
pitch_str = f"{pitch:+d}Hz" if isinstance(pitch, int) else pitch
|
| 93 |
|
| 94 |
try:
|
| 95 |
-
|
| 96 |
-
comm = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
|
| 97 |
await comm.save(path)
|
| 98 |
-
|
| 99 |
-
# ПРОВЕРКА: Создался ли файл?
|
| 100 |
-
if os.path.exists(path) and os.path.getsize(path) > 100: # Больше 100 байт (заголовок mp3)
|
| 101 |
return path
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
except Exception as e:
|
| 107 |
-
print(f"❌ Ошибка генерации куска: {e}")
|
| 108 |
-
return None
|
| 109 |
|
| 110 |
-
async def
|
| 111 |
-
|
| 112 |
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
segments = robust_text_split(text)
|
| 116 |
|
| 117 |
full_audio = AudioSegment.empty()
|
| 118 |
-
|
| 119 |
-
generated_count = 0
|
| 120 |
-
|
| 121 |
-
# Прогресс бар для Gradio
|
| 122 |
progress = gr.Progress()
|
| 123 |
|
| 124 |
-
for item in progress.tqdm(segments, desc="Озвучка
|
| 125 |
-
|
| 126 |
-
# Получаем настройки пресета
|
| 127 |
-
conf = VOICE_PRESETS.get(role, VOICE_PRESETS["narrator"])
|
| 128 |
-
|
| 129 |
-
print(f"🎙️ Генерирую: [{role}] {item['text'][:30]}...")
|
| 130 |
-
|
| 131 |
-
path = await generate_one_segment(
|
| 132 |
-
item["text"],
|
| 133 |
-
conf["voice"],
|
| 134 |
-
conf["rate"],
|
| 135 |
-
conf["pitch"]
|
| 136 |
-
)
|
| 137 |
|
| 138 |
if path:
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
generated_count += 1
|
| 149 |
-
except Exception as e:
|
| 150 |
-
print(f"❌ Ошибка склейки Pydub: {e}")
|
| 151 |
-
|
| 152 |
-
# Пауза чтобы API не забанил
|
| 153 |
-
await asyncio.sleep(0.1)
|
| 154 |
-
|
| 155 |
-
if generated_count == 0:
|
| 156 |
-
raise Exception("Не удалось сгенерировать ни одного фрагмента аудио!")
|
| 157 |
-
|
| 158 |
-
# Экспорт
|
| 159 |
-
print("💾 Сохранение итогового файла...")
|
| 160 |
-
out_name = f"final_{uuid.uuid4().hex}.mp3"
|
| 161 |
-
out_path = os.path.join(TEMP_DIR, out_name)
|
| 162 |
-
|
| 163 |
full_audio.export(out_path, format="mp3")
|
| 164 |
-
print(f"✅ Готово! Файл: {out_path}, Размер: {os.path.getsize(out_path)} байт")
|
| 165 |
|
| 166 |
-
|
| 167 |
-
for f in files_to_cleanup:
|
| 168 |
try: os.remove(f)
|
| 169 |
except: pass
|
| 170 |
|
| 171 |
return out_path, segments
|
| 172 |
|
| 173 |
-
# --- ОБЕРТКА ДЛЯ GRADIO ---
|
| 174 |
-
def run_async_in_thread(text):
|
| 175 |
-
"""Запускает async функцию безопасно для Gradio"""
|
| 176 |
-
try:
|
| 177 |
-
return asyncio.run(main_process_loop(text))
|
| 178 |
-
except RuntimeError:
|
| 179 |
-
# Если цикл уже запущен (в некоторых средах)
|
| 180 |
-
loop = asyncio.get_event_loop()
|
| 181 |
-
return loop.run_until_complete(main_process_loop(text))
|
| 182 |
-
|
| 183 |
-
# --- РУЧНОЙ РЕЖИМ (Бэкап) ---
|
| 184 |
-
def manual_process(text, voice, rate, pitch):
|
| 185 |
-
async def _manual():
|
| 186 |
-
v = voice.split(" (")[0]
|
| 187 |
-
path = os.path.join(TEMP_DIR, f"man_{uuid.uuid4().hex}.mp3")
|
| 188 |
-
comm = edge_tts.Communicate(text, v, rate=f"{rate:+d}%", pitch=f"{pitch:+d}Hz")
|
| 189 |
-
await comm.save(path)
|
| 190 |
-
return path
|
| 191 |
-
return asyncio.run(_manual())
|
| 192 |
-
|
| 193 |
# --- ИНТЕРФЕЙС ---
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
return sorted([f"{i['ShortName']} ({i['Gender']})" for i in v if i['Locale'] == "ru-RU"])
|
| 197 |
-
|
| 198 |
-
RU_VOICES = asyncio.run(get_voices())
|
| 199 |
|
| 200 |
-
css = "
|
| 201 |
-
|
| 202 |
-
.
|
| 203 |
-
"""
|
| 204 |
-
|
| 205 |
-
with gr.Blocks(theme=gr.themes.Soft(primary_hue="amber"), css=css, title="Fantasy Voice V4") as demo:
|
| 206 |
-
|
| 207 |
-
gr.Markdown("# 🏰 Fantasy Voice V4 (Robust)")
|
| 208 |
|
| 209 |
-
with gr.
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
with gr.Column(scale=1):
|
| 222 |
-
gr.Markdown("### Статус")
|
| 223 |
-
audio_ai = gr.Audio(label="Результат")
|
| 224 |
-
json_debug = gr.JSON(label="Разбор ролей")
|
| 225 |
-
|
| 226 |
-
btn_ai.click(run_async_in_thread, inputs=t_input, outputs=[audio_ai, json_debug])
|
| 227 |
|
| 228 |
-
|
| 229 |
-
with gr.Tab("🛠️ Ручной режим"):
|
| 230 |
-
with gr.Row():
|
| 231 |
-
t_man = gr.Textbox(label="Текст", lines=8, value="Проверка связи.")
|
| 232 |
-
with gr.Column():
|
| 233 |
-
v_man = gr.Dropdown(choices=RU_VOICES, value=RU_VOICES[0], label="Голос")
|
| 234 |
-
r_man = gr.Slider(-50, 50, 0, label="Скорость")
|
| 235 |
-
p_man = gr.Slider(-20, 20, 0, label="Тон")
|
| 236 |
-
btn_man = gr.Button("🔊 Озвучить")
|
| 237 |
-
|
| 238 |
-
out_man = gr.Audio()
|
| 239 |
-
btn_man.click(manual_process, inputs=[t_man, v_man, r_man, p_man], outputs=out_man)
|
| 240 |
|
| 241 |
if __name__ == "__main__":
|
| 242 |
demo.queue().launch()
|
|
|
|
| 7 |
import re
|
| 8 |
import shutil
|
| 9 |
from pydub import AudioSegment
|
|
|
|
| 10 |
|
| 11 |
+
# --- ПРОВЕРКА FFmpeg ---
|
|
|
|
| 12 |
if not shutil.which("ffmpeg"):
|
| 13 |
+
print("⚠️ FFmpeg не найден! Убедитесь, что он установлен на сервере.")
|
| 14 |
|
| 15 |
# --- НАСТРОЙКИ ГОЛОСОВ ---
|
| 16 |
+
VOICE_CONFIG = {
|
| 17 |
"narrator": {"voice": "ru-RU-DmitryNeural", "pitch": "-7Hz", "rate": "-5%"},
|
| 18 |
"male": {"voice": "ru-RU-DenisNeural", "pitch": "-2Hz", "rate": "+0%"},
|
| 19 |
"female": {"voice": "ru-RU-SvetlanaNeural","pitch": "+5Hz", "rate": "+5%"}
|
|
|
|
| 21 |
|
| 22 |
TEMP_DIR = tempfile.gettempdir()
|
| 23 |
|
| 24 |
+
# --- УМНАЯ ЛОГИКА (БЕЗ НЕЙРОСЕТИ) ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
+
def analyze_gender_by_grammar(text):
|
| 27 |
+
"""
|
| 28 |
+
Определяет пол по окончаниям русских глаголов в словах автора.
|
| 29 |
+
Работает мгновенно и точно.
|
| 30 |
+
"""
|
| 31 |
+
text_lower = text.lower()
|
| 32 |
|
| 33 |
+
# Маркеры женского рода (прошедшее время + "а")
|
| 34 |
+
female_verbs = [
|
| 35 |
+
r"сказала", r"спросила", r"ответила", r"прошептала", r"крикнула",
|
| 36 |
+
r"подумала", r"заметила", r"усмехнулась", r"вздохнула", r"обернулась"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
]
|
| 38 |
|
| 39 |
+
# Маркеры мужского рода
|
| 40 |
+
male_verbs = [
|
| 41 |
+
r"сказал\b", r"спросил\b", r"ответил\b", r"прошептал\b", r"крикнул\b",
|
| 42 |
+
r"подумал\b", r"заметил\b", r"усмехнулся", r"вздохнул", r"обернулся"
|
| 43 |
+
]
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
+
# Проверяем контекст (слова автора)
|
| 46 |
+
for verb in female_verbs:
|
| 47 |
+
if re.search(verb, text_lower):
|
| 48 |
+
return "female"
|
| 49 |
+
|
| 50 |
+
for verb in male_verbs:
|
| 51 |
+
if re.search(verb, text_lower):
|
| 52 |
+
return "male"
|
| 53 |
+
|
| 54 |
+
return "narrator" # Если не понятно — читает рассказчик
|
| 55 |
+
|
| 56 |
+
def smart_split_text(text):
|
| 57 |
+
"""Разбивает текст на сцены и раздает роли"""
|
| 58 |
segments = []
|
| 59 |
+
paragraphs = text.split('\n')
|
| 60 |
|
| 61 |
for p in paragraphs:
|
| 62 |
p = p.strip()
|
| 63 |
if not p: continue
|
| 64 |
|
| 65 |
+
# Логика: Если это диалог (тире или кавычки)
|
| 66 |
+
if p.startswith('—') or p.startswith('-') or '"' in p or '«' in p:
|
| 67 |
+
# Пытаемся найти пол в этом же абзаце (слова автора)
|
| 68 |
+
role = analyze_gender_by_grammar(p)
|
| 69 |
+
|
| 70 |
+
# Если грамматика не помогла, но это явно диалог — ставим мужчину (как дефолт для героя)
|
| 71 |
+
if role == "narrator":
|
| 72 |
+
role = "male"
|
| 73 |
+
|
| 74 |
+
segments.append({"text": p, "role": role})
|
| 75 |
+
else:
|
| 76 |
+
# Просто описание
|
| 77 |
+
segments.append({"text": p, "role": "narrator"})
|
| 78 |
|
| 79 |
return segments
|
| 80 |
|
| 81 |
+
# --- ГЕНЕРАЦИЯ ---
|
| 82 |
|
| 83 |
+
async def generate_segment(text, role):
|
|
|
|
| 84 |
if not text.strip(): return None
|
| 85 |
|
| 86 |
+
conf = VOICE_CONFIG.get(role, VOICE_CONFIG["narrator"])
|
| 87 |
+
path = os.path.join(TEMP_DIR, f"seg_{uuid.uuid4().hex}.mp3")
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
try:
|
| 90 |
+
comm = edge_tts.Communicate(text, conf["voice"], rate=conf["rate"], pitch=conf["pitch"])
|
|
|
|
| 91 |
await comm.save(path)
|
| 92 |
+
if os.path.exists(path) and os.path.getsize(path) > 100:
|
|
|
|
|
|
|
| 93 |
return path
|
| 94 |
+
except:
|
| 95 |
+
pass
|
| 96 |
+
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
|
| 98 |
+
async def process_book(text):
|
| 99 |
+
if not text.strip(): raise gr.Warning("Текст пуст!")
|
| 100 |
|
| 101 |
+
print("⚡ Мгновенный анализ текста...")
|
| 102 |
+
segments = smart_split_text(text)
|
|
|
|
| 103 |
|
| 104 |
full_audio = AudioSegment.empty()
|
| 105 |
+
temp_files = []
|
|
|
|
|
|
|
|
|
|
| 106 |
progress = gr.Progress()
|
| 107 |
|
| 108 |
+
for item in progress.tqdm(segments, desc="Озвучка"):
|
| 109 |
+
path = await generate_segment(item["text"], item["role"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
if path:
|
| 112 |
+
temp_files.append(path)
|
| 113 |
+
seg = AudioSegment.from_mp3(path)
|
| 114 |
+
if len(full_audio) > 0:
|
| 115 |
+
full_audio = full_audio.append(seg, crossfade=50)
|
| 116 |
+
else:
|
| 117 |
+
full_audio = seg
|
| 118 |
+
await asyncio.sleep(0.1)
|
| 119 |
+
|
| 120 |
+
out_path = os.path.join(TEMP_DIR, f"turbo_book_{uuid.uuid4().hex}.mp3")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
full_audio.export(out_path, format="mp3")
|
|
|
|
| 122 |
|
| 123 |
+
for f in temp_files:
|
|
|
|
| 124 |
try: os.remove(f)
|
| 125 |
except: pass
|
| 126 |
|
| 127 |
return out_path, segments
|
| 128 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
# --- ИНТЕРФЕЙС ---
|
| 130 |
+
css = "body {background-color: #111827;} .container {max-width: 900px; margin: auto;}"
|
| 131 |
+
theme = gr.themes.Soft(primary_hue="green")
|
|
|
|
|
|
|
|
|
|
| 132 |
|
| 133 |
+
with gr.Blocks(theme=theme, css=css, title="Turbo TTS") as demo:
|
| 134 |
+
gr.Markdown("# 🚀 Turbo Fantasy TTS (No GPU needed)")
|
| 135 |
+
gr.Markdown("Мгновенная загрузка. Умное определение пола по грамматике.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
|
| 137 |
+
with gr.Row():
|
| 138 |
+
with gr.Column(scale=2):
|
| 139 |
+
inp = gr.Textbox(
|
| 140 |
+
label="Текст", lines=12,
|
| 141 |
+
value='— Я пришла за тобой, — прошептала ведьма.\nРыцарь ответил: — Я готов.',
|
| 142 |
+
placeholder="Вставьте текст..."
|
| 143 |
+
)
|
| 144 |
+
btn = gr.Button("⚡ Создать моментально", variant="primary")
|
| 145 |
+
|
| 146 |
+
with gr.Column(scale=1):
|
| 147 |
+
out_audio = gr.Audio(label="Результат")
|
| 148 |
+
out_debug = gr.JSON(label="Роли (Debug)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
|
| 150 |
+
btn.click(process_book, inputs=inp, outputs=[out_audio, out_debug])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
|
| 152 |
if __name__ == "__main__":
|
| 153 |
demo.queue().launch()
|