Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,13 +8,14 @@ import os
|
|
| 8 |
import uuid
|
| 9 |
import logging
|
| 10 |
import numpy as np
|
| 11 |
-
from concurrent.futures import ThreadPoolExecutor
|
| 12 |
import threading
|
| 13 |
from moviepy.editor import AudioFileClip, VideoFileClip, concatenate_videoclips
|
| 14 |
|
| 15 |
# Configure logging
|
| 16 |
logging.basicConfig(level=logging.INFO)
|
| 17 |
logger = logging.getLogger(__name__)
|
|
|
|
| 18 |
|
| 19 |
# Initialize G4F client
|
| 20 |
client = Client()
|
|
@@ -46,28 +47,46 @@ def get_task(prompt):
|
|
| 46 |
def validate_response(response):
|
| 47 |
try:
|
| 48 |
data = json.loads(response)
|
| 49 |
-
if isinstance(data, list)
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
def generate_text(prompt):
|
|
|
|
| 56 |
max_retries = 4
|
| 57 |
for attempt in range(max_retries):
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
-
if validate_response(response_text):
|
| 68 |
-
return response_text
|
| 69 |
-
logger.warning("Invalid response format, retrying...")
|
| 70 |
-
|
| 71 |
logger.error("Failed to generate valid response after 4 attempts")
|
| 72 |
return '[{"Киселёв":"К сожалению, не удалось расслышать вопрос. Пожалуйста, попробуйте еще раз."}, {"Путин":"Мы работаем над улучшением системы. Спасибо за понимание."}]'
|
| 73 |
|
|
@@ -81,20 +100,22 @@ def split_text(text, max_length=800):
|
|
| 81 |
chunks.append(text[:split_at])
|
| 82 |
text = text[split_at:].lstrip()
|
| 83 |
chunks.append(text)
|
|
|
|
| 84 |
return chunks
|
| 85 |
|
| 86 |
def generate_audio(text, speaker_name):
|
| 87 |
"""Generate audio with thread-safe splitting and synthesis"""
|
| 88 |
-
logger.info(f"Generating audio for {speaker_name} ({len(text)}
|
| 89 |
|
| 90 |
# Switch between speakers
|
| 91 |
silero_speaker = 'aidar' if speaker_name == 'Киселёв' else 'baya'
|
| 92 |
-
logger.debug(f"Using Silero speaker: {silero_speaker}
|
| 93 |
|
| 94 |
chunks = split_text(text)
|
| 95 |
audio_arrays = []
|
| 96 |
|
| 97 |
-
for chunk in chunks:
|
|
|
|
| 98 |
with tts_lock: # Ensure thread-safe TTS operations
|
| 99 |
audio = model.apply_tts(
|
| 100 |
ssml_text=f"<speak>{chunk}</speak>",
|
|
@@ -108,6 +129,7 @@ def generate_audio(text, speaker_name):
|
|
| 108 |
full_audio = np.concatenate(audio_arrays)
|
| 109 |
temp_filename = f"temp_{uuid.uuid4().hex}.wav"
|
| 110 |
sf.write(temp_filename, full_audio, sample_rate)
|
|
|
|
| 111 |
return temp_filename
|
| 112 |
|
| 113 |
def process_line(args):
|
|
@@ -160,13 +182,14 @@ def process_line(args):
|
|
| 160 |
|
| 161 |
def create_video(audio_files):
|
| 162 |
"""Create final video from processed audio files"""
|
| 163 |
-
logger.info("Starting video creation
|
| 164 |
|
| 165 |
try:
|
| 166 |
# Sort audio files by their numerical index
|
| 167 |
audio_files.sort(key=lambda x: int(x.split('t')[1].split('-')[0]))
|
| 168 |
clips = []
|
| 169 |
|
|
|
|
| 170 |
for audio_file in audio_files:
|
| 171 |
speaker = audio_file.split('-')[1].split('.')[0]
|
| 172 |
gif_file = GIF_MAPPING.get(speaker)
|
|
@@ -175,21 +198,23 @@ def create_video(audio_files):
|
|
| 175 |
logger.error(f"Missing GIF file for {speaker}")
|
| 176 |
continue
|
| 177 |
|
| 178 |
-
logger.info(f"Processing {audio_file} with {gif_file}")
|
| 179 |
-
|
| 180 |
audio_clip = AudioFileClip(audio_file)
|
|
|
|
|
|
|
| 181 |
gif_clip = VideoFileClip(gif_file).loop(duration=audio_clip.duration)
|
| 182 |
gif_clip = gif_clip.set_audio(audio_clip)
|
| 183 |
clips.append(gif_clip)
|
|
|
|
| 184 |
|
| 185 |
if not clips:
|
| 186 |
raise ValueError("No valid video clips created")
|
| 187 |
|
| 188 |
final_video = concatenate_videoclips(clips)
|
| 189 |
video_filename = f"output_{uuid.uuid4().hex[:8]}.mp4"
|
| 190 |
-
|
| 191 |
|
| 192 |
-
|
|
|
|
| 193 |
return video_filename
|
| 194 |
|
| 195 |
except Exception as e:
|
|
@@ -198,13 +223,15 @@ def create_video(audio_files):
|
|
| 198 |
|
| 199 |
def process_prompt(prompt):
|
| 200 |
"""Main processing pipeline with parallel execution"""
|
| 201 |
-
logger.info(f"Starting processing for prompt: {prompt}")
|
| 202 |
|
| 203 |
try:
|
| 204 |
# Generate script
|
| 205 |
script = generate_text(prompt)
|
|
|
|
| 206 |
script_data = json.loads(script)
|
| 207 |
-
|
|
|
|
| 208 |
# Prepare tasks for parallel processing
|
| 209 |
tasks = [(idx, speaker, text)
|
| 210 |
for idx, item in enumerate(script_data)
|
|
@@ -214,21 +241,26 @@ def process_prompt(prompt):
|
|
| 214 |
audio_files = []
|
| 215 |
with ThreadPoolExecutor(max_workers=4) as executor:
|
| 216 |
futures = [executor.submit(process_line, task) for task in tasks]
|
| 217 |
-
|
|
|
|
|
|
|
|
|
|
| 218 |
result = future.result()
|
| 219 |
if result:
|
| 220 |
audio_files.append(result)
|
| 221 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
# Create final video
|
| 223 |
if not audio_files:
|
| 224 |
raise ValueError("No audio files generated")
|
| 225 |
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
return video_filename
|
| 229 |
|
| 230 |
except Exception as e:
|
| 231 |
-
logger.error(f"Processing failed: {str(e)}", exc_info=True)
|
| 232 |
return None
|
| 233 |
finally:
|
| 234 |
# Cleanup audio files after video creation
|
|
|
|
| 8 |
import uuid
|
| 9 |
import logging
|
| 10 |
import numpy as np
|
| 11 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 12 |
import threading
|
| 13 |
from moviepy.editor import AudioFileClip, VideoFileClip, concatenate_videoclips
|
| 14 |
|
| 15 |
# Configure logging
|
| 16 |
logging.basicConfig(level=logging.INFO)
|
| 17 |
logger = logging.getLogger(__name__)
|
| 18 |
+
logger.setLevel(logging.INFO)
|
| 19 |
|
| 20 |
# Initialize G4F client
|
| 21 |
client = Client()
|
|
|
|
| 47 |
def validate_response(response):
|
| 48 |
try:
|
| 49 |
data = json.loads(response)
|
| 50 |
+
if not isinstance(data, list):
|
| 51 |
+
logger.warning("Invalid response: Root element is not a list")
|
| 52 |
+
return False
|
| 53 |
+
for idx, item in enumerate(data):
|
| 54 |
+
if not isinstance(item, dict):
|
| 55 |
+
logger.warning(f"Invalid item #{idx+1}: Not a dictionary")
|
| 56 |
+
return False
|
| 57 |
+
if len(item) != 1:
|
| 58 |
+
logger.warning(f"Invalid item #{idx+1}: Contains {len(item)} keys instead of 1")
|
| 59 |
+
return False
|
| 60 |
+
key = next(iter(item.keys()))
|
| 61 |
+
if key not in ["Киселёв", "Путин"]:
|
| 62 |
+
logger.warning(f"Invalid item #{idx+1}: Unexpected speaker '{key}'")
|
| 63 |
+
return False
|
| 64 |
+
return True
|
| 65 |
+
except json.JSONDecodeError as e:
|
| 66 |
+
logger.warning(f"JSON decode error: {str(e)}")
|
| 67 |
+
return False
|
| 68 |
|
| 69 |
def generate_text(prompt):
|
| 70 |
+
logger.info(f"Generating text for prompt: '{prompt}'")
|
| 71 |
max_retries = 4
|
| 72 |
for attempt in range(max_retries):
|
| 73 |
+
try:
|
| 74 |
+
response = client.chat.completions.create(
|
| 75 |
+
model="llama-3.3-70b",
|
| 76 |
+
messages=[{"role": "user", "content": get_task(prompt)}],
|
| 77 |
+
web_search=False
|
| 78 |
+
)
|
| 79 |
+
response_text = response.choices[0].message.content
|
| 80 |
+
logger.debug(f"Raw API response: {response_text}")
|
| 81 |
+
|
| 82 |
+
if validate_response(response_text):
|
| 83 |
+
logger.info(f"Successfully validated response (attempt {attempt+1})")
|
| 84 |
+
return response_text
|
| 85 |
+
logger.warning(f"Validation failed (attempt {attempt+1})")
|
| 86 |
+
|
| 87 |
+
except Exception as e:
|
| 88 |
+
logger.error(f"API call failed: {str(e)}")
|
| 89 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
logger.error("Failed to generate valid response after 4 attempts")
|
| 91 |
return '[{"Киселёв":"К сожалению, не удалось расслышать вопрос. Пожалуйста, попробуйте еще раз."}, {"Путин":"Мы работаем над улучшением системы. Спасибо за понимание."}]'
|
| 92 |
|
|
|
|
| 100 |
chunks.append(text[:split_at])
|
| 101 |
text = text[split_at:].lstrip()
|
| 102 |
chunks.append(text)
|
| 103 |
+
logger.debug(f"Split text into {len(chunks)} chunks")
|
| 104 |
return chunks
|
| 105 |
|
| 106 |
def generate_audio(text, speaker_name):
|
| 107 |
"""Generate audio with thread-safe splitting and synthesis"""
|
| 108 |
+
logger.info(f"Generating audio for {speaker_name} ({len(text)} characters)")
|
| 109 |
|
| 110 |
# Switch between speakers
|
| 111 |
silero_speaker = 'aidar' if speaker_name == 'Киселёв' else 'baya'
|
| 112 |
+
logger.debug(f"Using Silero speaker: {silero_speaker}")
|
| 113 |
|
| 114 |
chunks = split_text(text)
|
| 115 |
audio_arrays = []
|
| 116 |
|
| 117 |
+
for idx, chunk in enumerate(chunks, 1):
|
| 118 |
+
logger.debug(f"Processing chunk {idx}/{len(chunks)}")
|
| 119 |
with tts_lock: # Ensure thread-safe TTS operations
|
| 120 |
audio = model.apply_tts(
|
| 121 |
ssml_text=f"<speak>{chunk}</speak>",
|
|
|
|
| 129 |
full_audio = np.concatenate(audio_arrays)
|
| 130 |
temp_filename = f"temp_{uuid.uuid4().hex}.wav"
|
| 131 |
sf.write(temp_filename, full_audio, sample_rate)
|
| 132 |
+
logger.debug(f"Temporary audio saved: {temp_filename}")
|
| 133 |
return temp_filename
|
| 134 |
|
| 135 |
def process_line(args):
|
|
|
|
| 182 |
|
| 183 |
def create_video(audio_files):
|
| 184 |
"""Create final video from processed audio files"""
|
| 185 |
+
logger.info(f"⏳ Starting video creation with {len(audio_files)} audio files")
|
| 186 |
|
| 187 |
try:
|
| 188 |
# Sort audio files by their numerical index
|
| 189 |
audio_files.sort(key=lambda x: int(x.split('t')[1].split('-')[0]))
|
| 190 |
clips = []
|
| 191 |
|
| 192 |
+
logger.info("Processing audio-GIF pairs:")
|
| 193 |
for audio_file in audio_files:
|
| 194 |
speaker = audio_file.split('-')[1].split('.')[0]
|
| 195 |
gif_file = GIF_MAPPING.get(speaker)
|
|
|
|
| 198 |
logger.error(f"Missing GIF file for {speaker}")
|
| 199 |
continue
|
| 200 |
|
|
|
|
|
|
|
| 201 |
audio_clip = AudioFileClip(audio_file)
|
| 202 |
+
logger.info(f"🔊 {os.path.basename(audio_file)} ({audio_clip.duration:.1f}s)")
|
| 203 |
+
|
| 204 |
gif_clip = VideoFileClip(gif_file).loop(duration=audio_clip.duration)
|
| 205 |
gif_clip = gif_clip.set_audio(audio_clip)
|
| 206 |
clips.append(gif_clip)
|
| 207 |
+
logger.debug(f"Processed {speaker} segment")
|
| 208 |
|
| 209 |
if not clips:
|
| 210 |
raise ValueError("No valid video clips created")
|
| 211 |
|
| 212 |
final_video = concatenate_videoclips(clips)
|
| 213 |
video_filename = f"output_{uuid.uuid4().hex[:8]}.mp4"
|
| 214 |
+
logger.info(f"🎬 Concatenating {len(clips)} clips (total: {final_video.duration:.1f}s)")
|
| 215 |
|
| 216 |
+
final_video.write_videofile(video_filename, codec='libx264', audio_codec='aac')
|
| 217 |
+
logger.info(f"✅ Successfully created video: {video_filename}")
|
| 218 |
return video_filename
|
| 219 |
|
| 220 |
except Exception as e:
|
|
|
|
| 223 |
|
| 224 |
def process_prompt(prompt):
|
| 225 |
"""Main processing pipeline with parallel execution"""
|
| 226 |
+
logger.info(f"🚀 Starting processing for prompt: '{prompt}'")
|
| 227 |
|
| 228 |
try:
|
| 229 |
# Generate script
|
| 230 |
script = generate_text(prompt)
|
| 231 |
+
logger.debug(f"Raw script data: {script}")
|
| 232 |
script_data = json.loads(script)
|
| 233 |
+
logger.info(f"📝 Generated script with {len(script_data)} lines")
|
| 234 |
+
|
| 235 |
# Prepare tasks for parallel processing
|
| 236 |
tasks = [(idx, speaker, text)
|
| 237 |
for idx, item in enumerate(script_data)
|
|
|
|
| 241 |
audio_files = []
|
| 242 |
with ThreadPoolExecutor(max_workers=4) as executor:
|
| 243 |
futures = [executor.submit(process_line, task) for task in tasks]
|
| 244 |
+
total_tasks = len(futures)
|
| 245 |
+
logger.info(f"📦 Processing {total_tasks} audio segments in parallel")
|
| 246 |
+
|
| 247 |
+
for i, future in enumerate(as_completed(futures), 1):
|
| 248 |
result = future.result()
|
| 249 |
if result:
|
| 250 |
audio_files.append(result)
|
| 251 |
+
remaining = total_tasks - i
|
| 252 |
+
logger.info(f"🔧 Processed {os.path.basename(result)} ({i}/{total_tasks}, {remaining} remaining)")
|
| 253 |
+
else:
|
| 254 |
+
logger.warning(f"⚠️ Failed to process task {i}/{total_tasks}")
|
| 255 |
+
|
| 256 |
# Create final video
|
| 257 |
if not audio_files:
|
| 258 |
raise ValueError("No audio files generated")
|
| 259 |
|
| 260 |
+
return create_video(audio_files)
|
|
|
|
|
|
|
| 261 |
|
| 262 |
except Exception as e:
|
| 263 |
+
logger.error(f"❌ Processing failed: {str(e)}", exc_info=True)
|
| 264 |
return None
|
| 265 |
finally:
|
| 266 |
# Cleanup audio files after video creation
|