tester1hf commited on
Commit
5a92983
·
verified ·
1 Parent(s): 01769ef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -34
app.py CHANGED
@@ -8,13 +8,14 @@ import os
8
  import uuid
9
  import logging
10
  import numpy as np
11
- from concurrent.futures import ThreadPoolExecutor
12
  import threading
13
  from moviepy.editor import AudioFileClip, VideoFileClip, concatenate_videoclips
14
 
15
  # Configure logging
16
  logging.basicConfig(level=logging.INFO)
17
  logger = logging.getLogger(__name__)
 
18
 
19
  # Initialize G4F client
20
  client = Client()
@@ -46,28 +47,46 @@ def get_task(prompt):
46
  def validate_response(response):
47
  try:
48
  data = json.loads(response)
49
- if isinstance(data, list) and all(isinstance(item, dict) and len(item) == 1 for item in data):
50
- return True
51
- except json.JSONDecodeError:
52
- pass
53
- return False
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
  def generate_text(prompt):
 
56
  max_retries = 4
57
  for attempt in range(max_retries):
58
- logger.info(f"Generating response for prompt: {prompt} (attempt {attempt+1})")
59
- response = client.chat.completions.create(
60
- model="llama-3.3-70b",
61
- messages=[{"role": "user", "content": get_task(prompt)}],
62
- web_search=False
63
- )
64
- response_text = response.choices[0].message.content
65
- logger.info(f"Generated response: {response_text}")
 
 
 
 
 
 
 
 
66
 
67
- if validate_response(response_text):
68
- return response_text
69
- logger.warning("Invalid response format, retrying...")
70
-
71
  logger.error("Failed to generate valid response after 4 attempts")
72
  return '[{"Киселёв":"К сожалению, не удалось расслышать вопрос. Пожалуйста, попробуйте еще раз."}, {"Путин":"Мы работаем над улучшением системы. Спасибо за понимание."}]'
73
 
@@ -81,20 +100,22 @@ def split_text(text, max_length=800):
81
  chunks.append(text[:split_at])
82
  text = text[split_at:].lstrip()
83
  chunks.append(text)
 
84
  return chunks
85
 
86
  def generate_audio(text, speaker_name):
87
  """Generate audio with thread-safe splitting and synthesis"""
88
- logger.info(f"Generating audio for {speaker_name} ({len(text)} chars)")
89
 
90
  # Switch between speakers
91
  silero_speaker = 'aidar' if speaker_name == 'Киселёв' else 'baya'
92
- logger.debug(f"Using Silero speaker: {silero_speaker} for {speaker_name}")
93
 
94
  chunks = split_text(text)
95
  audio_arrays = []
96
 
97
- for chunk in chunks:
 
98
  with tts_lock: # Ensure thread-safe TTS operations
99
  audio = model.apply_tts(
100
  ssml_text=f"<speak>{chunk}</speak>",
@@ -108,6 +129,7 @@ def generate_audio(text, speaker_name):
108
  full_audio = np.concatenate(audio_arrays)
109
  temp_filename = f"temp_{uuid.uuid4().hex}.wav"
110
  sf.write(temp_filename, full_audio, sample_rate)
 
111
  return temp_filename
112
 
113
  def process_line(args):
@@ -160,13 +182,14 @@ def process_line(args):
160
 
161
  def create_video(audio_files):
162
  """Create final video from processed audio files"""
163
- logger.info("Starting video creation process")
164
 
165
  try:
166
  # Sort audio files by their numerical index
167
  audio_files.sort(key=lambda x: int(x.split('t')[1].split('-')[0]))
168
  clips = []
169
 
 
170
  for audio_file in audio_files:
171
  speaker = audio_file.split('-')[1].split('.')[0]
172
  gif_file = GIF_MAPPING.get(speaker)
@@ -175,21 +198,23 @@ def create_video(audio_files):
175
  logger.error(f"Missing GIF file for {speaker}")
176
  continue
177
 
178
- logger.info(f"Processing {audio_file} with {gif_file}")
179
-
180
  audio_clip = AudioFileClip(audio_file)
 
 
181
  gif_clip = VideoFileClip(gif_file).loop(duration=audio_clip.duration)
182
  gif_clip = gif_clip.set_audio(audio_clip)
183
  clips.append(gif_clip)
 
184
 
185
  if not clips:
186
  raise ValueError("No valid video clips created")
187
 
188
  final_video = concatenate_videoclips(clips)
189
  video_filename = f"output_{uuid.uuid4().hex[:8]}.mp4"
190
- final_video.write_videofile(video_filename, codec='libx264', audio_codec='aac', logger='bar' if logger.level == logging.DEBUG else None)
191
 
192
- logger.info(f"Successfully created video: {video_filename}")
 
193
  return video_filename
194
 
195
  except Exception as e:
@@ -198,13 +223,15 @@ def create_video(audio_files):
198
 
199
  def process_prompt(prompt):
200
  """Main processing pipeline with parallel execution"""
201
- logger.info(f"Starting processing for prompt: {prompt}")
202
 
203
  try:
204
  # Generate script
205
  script = generate_text(prompt)
 
206
  script_data = json.loads(script)
207
-
 
208
  # Prepare tasks for parallel processing
209
  tasks = [(idx, speaker, text)
210
  for idx, item in enumerate(script_data)
@@ -214,21 +241,26 @@ def process_prompt(prompt):
214
  audio_files = []
215
  with ThreadPoolExecutor(max_workers=4) as executor:
216
  futures = [executor.submit(process_line, task) for task in tasks]
217
- for future in futures:
 
 
 
218
  result = future.result()
219
  if result:
220
  audio_files.append(result)
221
-
 
 
 
 
222
  # Create final video
223
  if not audio_files:
224
  raise ValueError("No audio files generated")
225
 
226
- video_filename = create_video(audio_files)
227
-
228
- return video_filename
229
 
230
  except Exception as e:
231
- logger.error(f"Processing failed: {str(e)}", exc_info=True)
232
  return None
233
  finally:
234
  # Cleanup audio files after video creation
 
8
  import uuid
9
  import logging
10
  import numpy as np
11
+ from concurrent.futures import ThreadPoolExecutor, as_completed
12
  import threading
13
  from moviepy.editor import AudioFileClip, VideoFileClip, concatenate_videoclips
14
 
15
  # Configure logging
16
  logging.basicConfig(level=logging.INFO)
17
  logger = logging.getLogger(__name__)
18
+ logger.setLevel(logging.INFO)
19
 
20
  # Initialize G4F client
21
  client = Client()
 
47
  def validate_response(response):
48
  try:
49
  data = json.loads(response)
50
+ if not isinstance(data, list):
51
+ logger.warning("Invalid response: Root element is not a list")
52
+ return False
53
+ for idx, item in enumerate(data):
54
+ if not isinstance(item, dict):
55
+ logger.warning(f"Invalid item #{idx+1}: Not a dictionary")
56
+ return False
57
+ if len(item) != 1:
58
+ logger.warning(f"Invalid item #{idx+1}: Contains {len(item)} keys instead of 1")
59
+ return False
60
+ key = next(iter(item.keys()))
61
+ if key not in ["Киселёв", "Путин"]:
62
+ logger.warning(f"Invalid item #{idx+1}: Unexpected speaker '{key}'")
63
+ return False
64
+ return True
65
+ except json.JSONDecodeError as e:
66
+ logger.warning(f"JSON decode error: {str(e)}")
67
+ return False
68
 
69
  def generate_text(prompt):
70
+ logger.info(f"Generating text for prompt: '{prompt}'")
71
  max_retries = 4
72
  for attempt in range(max_retries):
73
+ try:
74
+ response = client.chat.completions.create(
75
+ model="llama-3.3-70b",
76
+ messages=[{"role": "user", "content": get_task(prompt)}],
77
+ web_search=False
78
+ )
79
+ response_text = response.choices[0].message.content
80
+ logger.debug(f"Raw API response: {response_text}")
81
+
82
+ if validate_response(response_text):
83
+ logger.info(f"Successfully validated response (attempt {attempt+1})")
84
+ return response_text
85
+ logger.warning(f"Validation failed (attempt {attempt+1})")
86
+
87
+ except Exception as e:
88
+ logger.error(f"API call failed: {str(e)}")
89
 
 
 
 
 
90
  logger.error("Failed to generate valid response after 4 attempts")
91
  return '[{"Киселёв":"К сожалению, не удалось расслышать вопрос. Пожалуйста, попробуйте еще раз."}, {"Путин":"Мы работаем над улучшением системы. Спасибо за понимание."}]'
92
 
 
100
  chunks.append(text[:split_at])
101
  text = text[split_at:].lstrip()
102
  chunks.append(text)
103
+ logger.debug(f"Split text into {len(chunks)} chunks")
104
  return chunks
105
 
106
  def generate_audio(text, speaker_name):
107
  """Generate audio with thread-safe splitting and synthesis"""
108
+ logger.info(f"Generating audio for {speaker_name} ({len(text)} characters)")
109
 
110
  # Switch between speakers
111
  silero_speaker = 'aidar' if speaker_name == 'Киселёв' else 'baya'
112
+ logger.debug(f"Using Silero speaker: {silero_speaker}")
113
 
114
  chunks = split_text(text)
115
  audio_arrays = []
116
 
117
+ for idx, chunk in enumerate(chunks, 1):
118
+ logger.debug(f"Processing chunk {idx}/{len(chunks)}")
119
  with tts_lock: # Ensure thread-safe TTS operations
120
  audio = model.apply_tts(
121
  ssml_text=f"<speak>{chunk}</speak>",
 
129
  full_audio = np.concatenate(audio_arrays)
130
  temp_filename = f"temp_{uuid.uuid4().hex}.wav"
131
  sf.write(temp_filename, full_audio, sample_rate)
132
+ logger.debug(f"Temporary audio saved: {temp_filename}")
133
  return temp_filename
134
 
135
  def process_line(args):
 
182
 
183
  def create_video(audio_files):
184
  """Create final video from processed audio files"""
185
+ logger.info(f"Starting video creation with {len(audio_files)} audio files")
186
 
187
  try:
188
  # Sort audio files by their numerical index
189
  audio_files.sort(key=lambda x: int(x.split('t')[1].split('-')[0]))
190
  clips = []
191
 
192
+ logger.info("Processing audio-GIF pairs:")
193
  for audio_file in audio_files:
194
  speaker = audio_file.split('-')[1].split('.')[0]
195
  gif_file = GIF_MAPPING.get(speaker)
 
198
  logger.error(f"Missing GIF file for {speaker}")
199
  continue
200
 
 
 
201
  audio_clip = AudioFileClip(audio_file)
202
+ logger.info(f"🔊 {os.path.basename(audio_file)} ({audio_clip.duration:.1f}s)")
203
+
204
  gif_clip = VideoFileClip(gif_file).loop(duration=audio_clip.duration)
205
  gif_clip = gif_clip.set_audio(audio_clip)
206
  clips.append(gif_clip)
207
+ logger.debug(f"Processed {speaker} segment")
208
 
209
  if not clips:
210
  raise ValueError("No valid video clips created")
211
 
212
  final_video = concatenate_videoclips(clips)
213
  video_filename = f"output_{uuid.uuid4().hex[:8]}.mp4"
214
+ logger.info(f"🎬 Concatenating {len(clips)} clips (total: {final_video.duration:.1f}s)")
215
 
216
+ final_video.write_videofile(video_filename, codec='libx264', audio_codec='aac')
217
+ logger.info(f"✅ Successfully created video: {video_filename}")
218
  return video_filename
219
 
220
  except Exception as e:
 
223
 
224
  def process_prompt(prompt):
225
  """Main processing pipeline with parallel execution"""
226
+ logger.info(f"🚀 Starting processing for prompt: '{prompt}'")
227
 
228
  try:
229
  # Generate script
230
  script = generate_text(prompt)
231
+ logger.debug(f"Raw script data: {script}")
232
  script_data = json.loads(script)
233
+ logger.info(f"📝 Generated script with {len(script_data)} lines")
234
+
235
  # Prepare tasks for parallel processing
236
  tasks = [(idx, speaker, text)
237
  for idx, item in enumerate(script_data)
 
241
  audio_files = []
242
  with ThreadPoolExecutor(max_workers=4) as executor:
243
  futures = [executor.submit(process_line, task) for task in tasks]
244
+ total_tasks = len(futures)
245
+ logger.info(f"📦 Processing {total_tasks} audio segments in parallel")
246
+
247
+ for i, future in enumerate(as_completed(futures), 1):
248
  result = future.result()
249
  if result:
250
  audio_files.append(result)
251
+ remaining = total_tasks - i
252
+ logger.info(f"🔧 Processed {os.path.basename(result)} ({i}/{total_tasks}, {remaining} remaining)")
253
+ else:
254
+ logger.warning(f"⚠️ Failed to process task {i}/{total_tasks}")
255
+
256
  # Create final video
257
  if not audio_files:
258
  raise ValueError("No audio files generated")
259
 
260
+ return create_video(audio_files)
 
 
261
 
262
  except Exception as e:
263
+ logger.error(f"Processing failed: {str(e)}", exc_info=True)
264
  return None
265
  finally:
266
  # Cleanup audio files after video creation