Spaces:
Sleeping
Sleeping
File size: 12,297 Bytes
5c6bd93 dd99c93 618d3a8 29232db 400f26d 5a92983 400f26d 01769ef 400f26d 5a92983 5c6bd93 29232db dd99c93 5c6bd93 29232db 400f26d 29232db 400f26d 29232db 01769ef e5894bd 8105b9f e5894bd 2c0b7b8 618d3a8 5a92983 618d3a8 dd99c93 5a92983 618d3a8 5a92983 400f26d 618d3a8 5c6bd93 400f26d 5a92983 400f26d 29232db 400f26d 5a92983 29232db 01769ef 5a92983 01769ef 400f26d 29232db 5a92983 400f26d 01769ef 400f26d 29232db 400f26d 5a92983 400f26d 92de730 400f26d 92de730 400f26d 92de730 400f26d 92de730 400f26d 92de730 400f26d 92de730 400f26d 92de730 400f26d 92de730 400f26d 92de730 29232db 01769ef 5a92983 01769ef 5a92983 01769ef 5a92983 01769ef 5a92983 01769ef 5a92983 01769ef 5a92983 01769ef 29232db 400f26d 5a92983 400f26d 01769ef 5a92983 01769ef 5a92983 01769ef 5a92983 01769ef 5a92983 01769ef 5a92983 01769ef 5a92983 01769ef 400f26d 01769ef 29232db 5c6bd93 8af62fa 5c6bd93 dd99c93 8af62fa dd99c93 01769ef dd99c93 29232db dd99c93 5c6bd93 dd99c93 29232db dd99c93 5c6bd93 dd99c93 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 |
import gradio as gr
from g4f.client import Client
import json
import torch
import soundfile as sf
from openvoice_cli.__main__ import tune_one
import os
import uuid
import logging
import numpy as np
from concurrent.futures import ThreadPoolExecutor, as_completed
import threading
from moviepy.editor import AudioFileClip, VideoFileClip, concatenate_videoclips
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
# Initialize G4F client
client = Client()
# Silero TTS configuration
language = 'ru'
model_id = 'ru_v3'
sample_rate = 48000 # Sample rate for Russian model
device = torch.device('cpu')
# Load Silero TTS model with thread safety
model, _ = torch.hub.load(repo_or_dir='snakers4/silero-models',
model='silero_tts',
language=language,
speaker=model_id)
model.to(device)
tts_lock = threading.Lock() # Lock for TTS model thread safety
# GIF mappings
GIF_MAPPING = {
"Киселёв": "kisel.gif",
"Путин": "putin.gif"
}
def get_task(prompt):
return f'''Ты генерируешь скрипт продолжения прямой линии с Владимиром Владимировичем Путиным. Вопросы телезрителей передаёт Киселёв - он максимально подобострастен, поддерживает Путина и Россию. Пиши в духе пропаганды пророссийской. Каждая реплика - не менее 150-200 символов. Английские названия при необходимости всегда транскрибируй кириллицей. Числительные также записывай кириллицей. Пиши в стиле кремлёвской пропаганды. Пиши в стереотипном стиле. Киселёв задаёт вопросы, обрашается к Путину как "Владимир Владимирович" или "Господин Президент". Путин отвечает. Всё максимально аутентично. Фразы Киселёва: {{"Киселёв":"фраза"}} Фразы Путина: {{"Путин":"фраза"}} Ответ дай в формате JSON без дополнительных символов: [{{"Киселёв":"фраза"}}, {{"Путин":"фраза"}} . . . ].
Вопрос от пользователя поступил: "{prompt}"'''
def validate_response(response):
try:
data = json.loads(response)
if not isinstance(data, list):
logger.warning("Invalid response: Root element is not a list")
return False
for idx, item in enumerate(data):
if not isinstance(item, dict):
logger.warning(f"Invalid item #{idx+1}: Not a dictionary")
return False
if len(item) != 1:
logger.warning(f"Invalid item #{idx+1}: Contains {len(item)} keys instead of 1")
return False
key = next(iter(item.keys()))
if key not in ["Киселёв", "Путин"]:
logger.warning(f"Invalid item #{idx+1}: Unexpected speaker '{key}'")
return False
return True
except json.JSONDecodeError as e:
logger.warning(f"JSON decode error: {str(e)}")
return False
def generate_text(prompt):
logger.info(f"Generating text for prompt: '{prompt}'")
max_retries = 4
for attempt in range(max_retries):
try:
response = client.chat.completions.create(
model="llama-3.3-70b",
messages=[{"role": "user", "content": get_task(prompt)}],
web_search=False
)
response_text = response.choices[0].message.content
logger.debug(f"Raw API response: {response_text}")
if validate_response(response_text):
logger.info(f"Successfully validated response (attempt {attempt+1})")
return response_text
logger.warning(f"Validation failed (attempt {attempt+1})")
except Exception as e:
logger.error(f"API call failed: {str(e)}")
logger.error("Failed to generate valid response after 4 attempts")
return '[{"Киселёв":"К сожалению, не удалось расслышать вопрос. Пожалуйста, попробуйте еще раз."}, {"Путин":"Мы работаем над улучшением системы. Спасибо за понимание."}]'
def split_text(text, max_length=800):
"""Split text into chunks of maximum length, trying to preserve word boundaries"""
chunks = []
while len(text) > max_length:
split_at = text.rfind(' ', 0, max_length)
if split_at == -1:
split_at = max_length
chunks.append(text[:split_at])
text = text[split_at:].lstrip()
chunks.append(text)
logger.debug(f"Split text into {len(chunks)} chunks")
return chunks
def generate_audio(text, speaker_name):
"""Generate audio with thread-safe splitting and synthesis"""
logger.info(f"Generating audio for {speaker_name} ({len(text)} characters)")
# Switch between speakers
silero_speaker = 'aidar' if speaker_name == 'Киселёв' else 'baya'
logger.debug(f"Using Silero speaker: {silero_speaker}")
chunks = split_text(text)
audio_arrays = []
for idx, chunk in enumerate(chunks, 1):
logger.debug(f"Processing chunk {idx}/{len(chunks)}")
with tts_lock: # Ensure thread-safe TTS operations
audio = model.apply_tts(
ssml_text=f"<speak>{chunk}</speak>",
speaker=silero_speaker,
sample_rate=sample_rate,
put_accent=True,
put_yo=True
)
audio_arrays.append(audio)
full_audio = np.concatenate(audio_arrays)
temp_filename = f"temp_{uuid.uuid4().hex}.wav"
sf.write(temp_filename, full_audio, sample_rate)
logger.debug(f"Temporary audio saved: {temp_filename}")
return temp_filename
def process_line(args):
"""Process single dialogue line with enhanced error handling"""
idx, speaker, text = args
final_filename = f"t{idx+1}-{speaker}.wav"
base_audio = None
output_filename = None
try:
logger.info(f"Processing line {idx+1} for {speaker}")
# Generate base audio
base_audio = generate_audio(text, speaker)
if not os.path.exists(base_audio):
logger.error(f"Base audio not generated for line {idx+1}")
return None
# Generate voice cover
ref_audio = "kisel.mp3" if speaker == "Киселёв" else "putin.mp3"
output_filename = f"output_{uuid.uuid4().hex[:6]}.wav"
logger.debug(f"Tuning audio with reference: {ref_audio}")
tune_one(
input_file=base_audio,
ref_file=ref_audio,
output_file=output_filename,
device='cpu'
)
# Verify output file creation
if not os.path.exists(output_filename):
logger.error(f"Voice tuning failed for line {idx+1}")
return None
# Rename final file
os.rename(output_filename, final_filename)
logger.info(f"Created final file: {final_filename}")
return final_filename
except Exception as e:
logger.error(f"Error processing line {idx+1}: {str(e)}", exc_info=True)
return None
finally:
# Cleanup temporary files
for f in [base_audio, output_filename]:
if f and os.path.exists(f):
os.remove(f)
def create_video(audio_files):
"""Create final video from processed audio files"""
logger.info(f"⏳ Starting video creation with {len(audio_files)} audio files")
try:
# Sort audio files by their numerical index
audio_files.sort(key=lambda x: int(x.split('t')[1].split('-')[0]))
clips = []
logger.info("Processing audio-GIF pairs:")
for audio_file in audio_files:
speaker = audio_file.split('-')[1].split('.')[0]
gif_file = GIF_MAPPING.get(speaker)
if not gif_file or not os.path.exists(gif_file):
logger.error(f"Missing GIF file for {speaker}")
continue
audio_clip = AudioFileClip(audio_file)
logger.info(f"🔊 {os.path.basename(audio_file)} ({audio_clip.duration:.1f}s)")
gif_clip = VideoFileClip(gif_file).loop(duration=audio_clip.duration)
gif_clip = gif_clip.set_audio(audio_clip)
clips.append(gif_clip)
logger.debug(f"Processed {speaker} segment")
if not clips:
raise ValueError("No valid video clips created")
final_video = concatenate_videoclips(clips)
video_filename = f"output_{uuid.uuid4().hex[:8]}.mp4"
logger.info(f"🎬 Concatenating {len(clips)} clips (total: {final_video.duration:.1f}s)")
final_video.write_videofile(video_filename, codec='libx264', audio_codec='aac')
logger.info(f"✅ Successfully created video: {video_filename}")
return video_filename
except Exception as e:
logger.error(f"Video creation failed: {str(e)}", exc_info=True)
raise
def process_prompt(prompt):
"""Main processing pipeline with parallel execution"""
logger.info(f"🚀 Starting processing for prompt: '{prompt}'")
try:
# Generate script
script = generate_text(prompt)
logger.debug(f"Raw script data: {script}")
script_data = json.loads(script)
logger.info(f"📝 Generated script with {len(script_data)} lines")
# Prepare tasks for parallel processing
tasks = [(idx, speaker, text)
for idx, item in enumerate(script_data)
for speaker, text in item.items()]
# Process lines in parallel
audio_files = []
with ThreadPoolExecutor(max_workers=4) as executor:
futures = [executor.submit(process_line, task) for task in tasks]
total_tasks = len(futures)
logger.info(f"📦 Processing {total_tasks} audio segments in parallel")
for i, future in enumerate(as_completed(futures), 1):
result = future.result()
if result:
audio_files.append(result)
remaining = total_tasks - i
logger.info(f"🔧 Processed {os.path.basename(result)} ({i}/{total_tasks}, {remaining} remaining)")
else:
logger.warning(f"⚠️ Failed to process task {i}/{total_tasks}")
# Create final video
if not audio_files:
raise ValueError("No audio files generated")
return create_video(audio_files)
except Exception as e:
logger.error(f"❌ Processing failed: {str(e)}", exc_info=True)
return None
finally:
# Cleanup audio files after video creation
for file in audio_files:
if os.path.exists(file):
os.remove(file)
# Gradio interface
examples = [
"Почему такие высокие налоги?",
"Какие цели СВО?",
"Когда развалится Америка?"
]
with gr.Blocks() as demo:
gr.Markdown("# Kisel TV")
with gr.Row():
prompt_input = gr.Textbox(
label="Input Prompt",
placeholder="Enter your text here...",
lines=3
)
generate_btn = gr.Button("Generate", variant="primary")
output = gr.Video(label="Generated Video", format="mp4")
gr.Examples(
examples=examples,
inputs=prompt_input,
outputs=output,
fn=process_prompt,
cache_examples=False
)
generate_btn.click(
fn=process_prompt,
inputs=prompt_input,
outputs=output
)
if __name__ == "__main__":
demo.launch() |