Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,11 +6,11 @@ import soundfile as sf
|
|
| 6 |
from openvoice_cli.__main__ import tune_one
|
| 7 |
import os
|
| 8 |
import uuid
|
| 9 |
-
import zipfile
|
| 10 |
import logging
|
| 11 |
import numpy as np
|
| 12 |
from concurrent.futures import ThreadPoolExecutor
|
| 13 |
import threading
|
|
|
|
| 14 |
|
| 15 |
# Configure logging
|
| 16 |
logging.basicConfig(level=logging.INFO)
|
|
@@ -22,7 +22,6 @@ client = Client()
|
|
| 22 |
# Silero TTS configuration
|
| 23 |
language = 'ru'
|
| 24 |
model_id = 'ru_v3'
|
| 25 |
-
speaker = 'baya' # Russian voice
|
| 26 |
sample_rate = 48000 # Sample rate for Russian model
|
| 27 |
device = torch.device('cpu')
|
| 28 |
|
|
@@ -34,6 +33,12 @@ model, _ = torch.hub.load(repo_or_dir='snakers4/silero-models',
|
|
| 34 |
model.to(device)
|
| 35 |
tts_lock = threading.Lock() # Lock for TTS model thread safety
|
| 36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
def get_task(prompt):
|
| 38 |
return f'''Ты генерируешь скрипт продолжения прямой линии с Владимиром Владимировичем Путиным. Вопросы телезрителей передаёт Киселёв - он максимально подобострастен, поддерживает Путина и Россию. Пиши в духе пропаганды пророссийской. Каждая реплика - не менее 150-200 символов. Английские названия при необходимости всегда транскрибируй кириллицей. Числительные также записывай кириллицей. Пиши в стиле кремлёвской пропаганды. Пиши в стереотипном стиле. Киселёв задаёт вопросы, обрашается к Путину как "Владимир Владимирович" или "Господин Президент". Путин отвечает. Всё максимально аутентично. Фразы Киселёва: {{"Киселёв":"фраза"}} Фразы Путина: {{"Путин":"фраза"}} Ответ дай в формате JSON без дополнительных символов: [{{"Киселёв":"фраза"}}, {{"Путин":"фраза"}} . . . ].
|
| 39 |
Вопрос от пользователя поступил: "{prompt}"'''
|
|
@@ -82,6 +87,10 @@ def generate_audio(text, speaker_name):
|
|
| 82 |
"""Generate audio with thread-safe splitting and synthesis"""
|
| 83 |
logger.info(f"Generating audio for {speaker_name} ({len(text)} chars)")
|
| 84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
chunks = split_text(text)
|
| 86 |
audio_arrays = []
|
| 87 |
|
|
@@ -89,7 +98,7 @@ def generate_audio(text, speaker_name):
|
|
| 89 |
with tts_lock: # Ensure thread-safe TTS operations
|
| 90 |
audio = model.apply_tts(
|
| 91 |
ssml_text=f"<speak>{chunk}</speak>",
|
| 92 |
-
speaker=
|
| 93 |
sample_rate=sample_rate,
|
| 94 |
put_accent=True,
|
| 95 |
put_yo=True
|
|
@@ -149,39 +158,83 @@ def process_line(args):
|
|
| 149 |
if f and os.path.exists(f):
|
| 150 |
os.remove(f)
|
| 151 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
def process_prompt(prompt):
|
| 153 |
"""Main processing pipeline with parallel execution"""
|
| 154 |
logger.info(f"Starting processing for prompt: {prompt}")
|
| 155 |
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
for file in audio_files:
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
# Cleanup working files
|
| 181 |
-
for file in audio_files:
|
| 182 |
-
os.remove(file)
|
| 183 |
-
|
| 184 |
-
return zip_filename
|
| 185 |
|
| 186 |
# Gradio interface
|
| 187 |
examples = [
|
|
@@ -201,7 +254,7 @@ with gr.Blocks() as demo:
|
|
| 201 |
)
|
| 202 |
|
| 203 |
generate_btn = gr.Button("Generate", variant="primary")
|
| 204 |
-
output = gr.
|
| 205 |
|
| 206 |
gr.Examples(
|
| 207 |
examples=examples,
|
|
|
|
| 6 |
from openvoice_cli.__main__ import tune_one
|
| 7 |
import os
|
| 8 |
import uuid
|
|
|
|
| 9 |
import logging
|
| 10 |
import numpy as np
|
| 11 |
from concurrent.futures import ThreadPoolExecutor
|
| 12 |
import threading
|
| 13 |
+
from moviepy.editor import AudioFileClip, VideoFileClip, concatenate_videoclips
|
| 14 |
|
| 15 |
# Configure logging
|
| 16 |
logging.basicConfig(level=logging.INFO)
|
|
|
|
| 22 |
# Silero TTS configuration
|
| 23 |
language = 'ru'
|
| 24 |
model_id = 'ru_v3'
|
|
|
|
| 25 |
sample_rate = 48000 # Sample rate for Russian model
|
| 26 |
device = torch.device('cpu')
|
| 27 |
|
|
|
|
| 33 |
model.to(device)
|
| 34 |
tts_lock = threading.Lock() # Lock for TTS model thread safety
|
| 35 |
|
| 36 |
+
# GIF mappings
|
| 37 |
+
GIF_MAPPING = {
|
| 38 |
+
"Киселёв": "kisel.gif",
|
| 39 |
+
"Путин": "putin.gif"
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
def get_task(prompt):
|
| 43 |
return f'''Ты генерируешь скрипт продолжения прямой линии с Владимиром Владимировичем Путиным. Вопросы телезрителей передаёт Киселёв - он максимально подобострастен, поддерживает Путина и Россию. Пиши в духе пропаганды пророссийской. Каждая реплика - не менее 150-200 символов. Английские названия при необходимости всегда транскрибируй кириллицей. Числительные также записывай кириллицей. Пиши в стиле кремлёвской пропаганды. Пиши в стереотипном стиле. Киселёв задаёт вопросы, обрашается к Путину как "Владимир Владимирович" или "Господин Президент". Путин отвечает. Всё максимально аутентично. Фразы Киселёва: {{"Киселёв":"фраза"}} Фразы Путина: {{"Путин":"фраза"}} Ответ дай в формате JSON без дополнительных символов: [{{"Киселёв":"фраза"}}, {{"Путин":"фраза"}} . . . ].
|
| 44 |
Вопрос от пользователя поступил: "{prompt}"'''
|
|
|
|
| 87 |
"""Generate audio with thread-safe splitting and synthesis"""
|
| 88 |
logger.info(f"Generating audio for {speaker_name} ({len(text)} chars)")
|
| 89 |
|
| 90 |
+
# Switch between speakers
|
| 91 |
+
silero_speaker = 'aidar' if speaker_name == 'Киселёв' else 'baya'
|
| 92 |
+
logger.debug(f"Using Silero speaker: {silero_speaker} for {speaker_name}")
|
| 93 |
+
|
| 94 |
chunks = split_text(text)
|
| 95 |
audio_arrays = []
|
| 96 |
|
|
|
|
| 98 |
with tts_lock: # Ensure thread-safe TTS operations
|
| 99 |
audio = model.apply_tts(
|
| 100 |
ssml_text=f"<speak>{chunk}</speak>",
|
| 101 |
+
speaker=silero_speaker,
|
| 102 |
sample_rate=sample_rate,
|
| 103 |
put_accent=True,
|
| 104 |
put_yo=True
|
|
|
|
| 158 |
if f and os.path.exists(f):
|
| 159 |
os.remove(f)
|
| 160 |
|
| 161 |
+
def create_video(audio_files):
|
| 162 |
+
"""Create final video from processed audio files"""
|
| 163 |
+
logger.info("Starting video creation process")
|
| 164 |
+
|
| 165 |
+
try:
|
| 166 |
+
# Sort audio files by their numerical index
|
| 167 |
+
audio_files.sort(key=lambda x: int(x.split('t')[1].split('-')[0]))
|
| 168 |
+
clips = []
|
| 169 |
+
|
| 170 |
+
for audio_file in audio_files:
|
| 171 |
+
speaker = audio_file.split('-')[1].split('.')[0]
|
| 172 |
+
gif_file = GIF_MAPPING.get(speaker)
|
| 173 |
+
|
| 174 |
+
if not gif_file or not os.path.exists(gif_file):
|
| 175 |
+
logger.error(f"Missing GIF file for {speaker}")
|
| 176 |
+
continue
|
| 177 |
+
|
| 178 |
+
logger.info(f"Processing {audio_file} with {gif_file}")
|
| 179 |
+
|
| 180 |
+
audio_clip = AudioFileClip(audio_file)
|
| 181 |
+
gif_clip = VideoFileClip(gif_file).loop(duration=audio_clip.duration)
|
| 182 |
+
gif_clip = gif_clip.set_audio(audio_clip)
|
| 183 |
+
clips.append(gif_clip)
|
| 184 |
+
|
| 185 |
+
if not clips:
|
| 186 |
+
raise ValueError("No valid video clips created")
|
| 187 |
+
|
| 188 |
+
final_video = concatenate_videoclips(clips)
|
| 189 |
+
video_filename = f"output_{uuid.uuid4().hex[:8]}.mp4"
|
| 190 |
+
final_video.write_videofile(video_filename, codec='libx264', audio_codec='aac', logger='bar' if logger.level == logging.DEBUG else None)
|
| 191 |
+
|
| 192 |
+
logger.info(f"Successfully created video: {video_filename}")
|
| 193 |
+
return video_filename
|
| 194 |
+
|
| 195 |
+
except Exception as e:
|
| 196 |
+
logger.error(f"Video creation failed: {str(e)}", exc_info=True)
|
| 197 |
+
raise
|
| 198 |
+
|
| 199 |
def process_prompt(prompt):
|
| 200 |
"""Main processing pipeline with parallel execution"""
|
| 201 |
logger.info(f"Starting processing for prompt: {prompt}")
|
| 202 |
|
| 203 |
+
try:
|
| 204 |
+
# Generate script
|
| 205 |
+
script = generate_text(prompt)
|
| 206 |
+
script_data = json.loads(script)
|
| 207 |
+
|
| 208 |
+
# Prepare tasks for parallel processing
|
| 209 |
+
tasks = [(idx, speaker, text)
|
| 210 |
+
for idx, item in enumerate(script_data)
|
| 211 |
+
for speaker, text in item.items()]
|
| 212 |
+
|
| 213 |
+
# Process lines in parallel
|
| 214 |
+
audio_files = []
|
| 215 |
+
with ThreadPoolExecutor(max_workers=4) as executor:
|
| 216 |
+
futures = [executor.submit(process_line, task) for task in tasks]
|
| 217 |
+
for future in futures:
|
| 218 |
+
result = future.result()
|
| 219 |
+
if result:
|
| 220 |
+
audio_files.append(result)
|
| 221 |
+
|
| 222 |
+
# Create final video
|
| 223 |
+
if not audio_files:
|
| 224 |
+
raise ValueError("No audio files generated")
|
| 225 |
+
|
| 226 |
+
video_filename = create_video(audio_files)
|
| 227 |
+
|
| 228 |
+
return video_filename
|
| 229 |
+
|
| 230 |
+
except Exception as e:
|
| 231 |
+
logger.error(f"Processing failed: {str(e)}", exc_info=True)
|
| 232 |
+
return None
|
| 233 |
+
finally:
|
| 234 |
+
# Cleanup audio files after video creation
|
| 235 |
for file in audio_files:
|
| 236 |
+
if os.path.exists(file):
|
| 237 |
+
os.remove(file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
|
| 239 |
# Gradio interface
|
| 240 |
examples = [
|
|
|
|
| 254 |
)
|
| 255 |
|
| 256 |
generate_btn = gr.Button("Generate", variant="primary")
|
| 257 |
+
output = gr.Video(label="Generated Video", format="mp4")
|
| 258 |
|
| 259 |
gr.Examples(
|
| 260 |
examples=examples,
|