Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
-
import re
|
| 2 |
import numpy as np
|
|
|
|
| 3 |
import concurrent.futures
|
| 4 |
import gradio as gr
|
| 5 |
from datetime import datetime
|
|
@@ -40,6 +40,15 @@ def silence(duration, fps=44100):
|
|
| 40 |
Returns a silent AudioClip of the specified duration.
|
| 41 |
"""
|
| 42 |
return AudioArrayClip(np.zeros((int(fps*duration), 2)), fps=fps)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
# Define the passcode
|
| 45 |
PASSCODE = "show_feedback_db"
|
|
@@ -78,7 +87,6 @@ css = """
|
|
| 78 |
}
|
| 79 |
"""
|
| 80 |
|
| 81 |
-
|
| 82 |
# Function to save feedback or provide access to the database file
|
| 83 |
def handle_feedback(feedback):
|
| 84 |
feedback = feedback.strip() # Clean up leading/trailing whitespace
|
|
@@ -123,15 +131,14 @@ def transcribe_video(video_path):
|
|
| 123 |
start = segment["start"]
|
| 124 |
end = segment["end"]
|
| 125 |
text = segment["text"]
|
| 126 |
-
|
| 127 |
-
word_count = len(re.findall(r'\w+', text))
|
| 128 |
transcript_with_timestamps.append({
|
| 129 |
"start": start,
|
| 130 |
"end": end,
|
| 131 |
-
"text": text
|
| 132 |
-
"word_count": word_count
|
| 133 |
})
|
| 134 |
-
|
|
|
|
| 135 |
total_words += word_count
|
| 136 |
total_duration += (end - start)
|
| 137 |
|
|
@@ -270,13 +277,13 @@ def process_entry(entry, i, video_width, video_height, add_voiceover, target_lan
|
|
| 270 |
audio_segment = None
|
| 271 |
if add_voiceover:
|
| 272 |
segment_audio_path = f"segment_{i}_voiceover.wav"
|
| 273 |
-
|
|
|
|
| 274 |
audio_clip = AudioFileClip(segment_audio_path)
|
| 275 |
# Get and log all methods in AudioFileClip
|
| 276 |
logger.info("Methods in AudioFileClip:")
|
| 277 |
for method in dir(audio_clip):
|
| 278 |
logger.info(method)
|
| 279 |
-
desired_duration = entry["end"] - entry["start"]
|
| 280 |
|
| 281 |
# Log duration of the audio clip and the desired duration for debugging.
|
| 282 |
logger.debug(f"Audio clip duration: {audio_clip.duration}, Desired duration: {desired_duration}")
|
|
@@ -355,7 +362,26 @@ def generate_voiceover(translated_json, language, output_audio_path):
|
|
| 355 |
except Exception as e:
|
| 356 |
raise ValueError(f"Error generating voiceover: {e}")
|
| 357 |
|
| 358 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 359 |
"""
|
| 360 |
Generate voiceover from translated text for a given language using OpenAI TTS API.
|
| 361 |
"""
|
|
@@ -373,11 +399,13 @@ def generate_voiceover_OpenAI(translated_json, language, output_audio_path):
|
|
| 373 |
|
| 374 |
while retry_count < max_retries:
|
| 375 |
try:
|
|
|
|
| 376 |
# Create the speech using OpenAI TTS API
|
| 377 |
response = client.audio.speech.create(
|
| 378 |
model=model,
|
| 379 |
voice=voice,
|
| 380 |
-
input=full_text
|
|
|
|
| 381 |
)
|
| 382 |
# Save the audio to the specified path
|
| 383 |
with open(output_audio_path, 'wb') as f:
|
|
|
|
|
|
|
| 1 |
import numpy as np
|
| 2 |
+
import re
|
| 3 |
import concurrent.futures
|
| 4 |
import gradio as gr
|
| 5 |
from datetime import datetime
|
|
|
|
| 40 |
Returns a silent AudioClip of the specified duration.
|
| 41 |
"""
|
| 42 |
return AudioArrayClip(np.zeros((int(fps*duration), 2)), fps=fps)
|
| 43 |
+
|
| 44 |
+
def count_words_or_characters(text):
|
| 45 |
+
# Count non-Chinese words
|
| 46 |
+
non_chinese_words = len(re.findall(r'\b[a-zA-Z0-9]+\b', text))
|
| 47 |
+
|
| 48 |
+
# Count Chinese characters
|
| 49 |
+
chinese_chars = len(re.findall(r'[\u4e00-\u9fff]', text))
|
| 50 |
+
|
| 51 |
+
return non_chinese_words + chinese_chars
|
| 52 |
|
| 53 |
# Define the passcode
|
| 54 |
PASSCODE = "show_feedback_db"
|
|
|
|
| 87 |
}
|
| 88 |
"""
|
| 89 |
|
|
|
|
| 90 |
# Function to save feedback or provide access to the database file
|
| 91 |
def handle_feedback(feedback):
|
| 92 |
feedback = feedback.strip() # Clean up leading/trailing whitespace
|
|
|
|
| 131 |
start = segment["start"]
|
| 132 |
end = segment["end"]
|
| 133 |
text = segment["text"]
|
| 134 |
+
|
|
|
|
| 135 |
transcript_with_timestamps.append({
|
| 136 |
"start": start,
|
| 137 |
"end": end,
|
| 138 |
+
"text": text
|
|
|
|
| 139 |
})
|
| 140 |
+
|
| 141 |
+
word_count = count_words_or_characters(text)
|
| 142 |
total_words += word_count
|
| 143 |
total_duration += (end - start)
|
| 144 |
|
|
|
|
| 277 |
audio_segment = None
|
| 278 |
if add_voiceover:
|
| 279 |
segment_audio_path = f"segment_{i}_voiceover.wav"
|
| 280 |
+
desired_duration = entry["end"] - entry["start"]
|
| 281 |
+
generate_voiceover_OpenAI([entry], target_language, desired_duration, segment_audio_path)
|
| 282 |
audio_clip = AudioFileClip(segment_audio_path)
|
| 283 |
# Get and log all methods in AudioFileClip
|
| 284 |
logger.info("Methods in AudioFileClip:")
|
| 285 |
for method in dir(audio_clip):
|
| 286 |
logger.info(method)
|
|
|
|
| 287 |
|
| 288 |
# Log duration of the audio clip and the desired duration for debugging.
|
| 289 |
logger.debug(f"Audio clip duration: {audio_clip.duration}, Desired duration: {desired_duration}")
|
|
|
|
| 362 |
except Exception as e:
|
| 363 |
raise ValueError(f"Error generating voiceover: {e}")
|
| 364 |
|
| 365 |
+
def truncated_linear(x):
|
| 366 |
+
if x < 15:
|
| 367 |
+
return 1
|
| 368 |
+
elif x > 25:
|
| 369 |
+
return 1.2
|
| 370 |
+
else:
|
| 371 |
+
slope = (1.2 - 1) / (25 - 15)
|
| 372 |
+
return 1 + slope * (x - 15)
|
| 373 |
+
|
| 374 |
+
def calculate_speed(text, desired_duration):
|
| 375 |
+
# Calculate characters per second
|
| 376 |
+
char_count = len(text)
|
| 377 |
+
chars_per_second = char_count / (desired_duration + 0.001)
|
| 378 |
+
|
| 379 |
+
# Apply truncated linear function to get speed
|
| 380 |
+
speed = truncated_linear(chars_per_second)
|
| 381 |
+
|
| 382 |
+
return speed
|
| 383 |
+
|
| 384 |
+
def generate_voiceover_OpenAI(translated_json, language, desired_duration, output_audio_path):
|
| 385 |
"""
|
| 386 |
Generate voiceover from translated text for a given language using OpenAI TTS API.
|
| 387 |
"""
|
|
|
|
| 399 |
|
| 400 |
while retry_count < max_retries:
|
| 401 |
try:
|
| 402 |
+
speed_tts = calculate_speed(full_text, desired_duration)
|
| 403 |
# Create the speech using OpenAI TTS API
|
| 404 |
response = client.audio.speech.create(
|
| 405 |
model=model,
|
| 406 |
voice=voice,
|
| 407 |
+
input=full_text,
|
| 408 |
+
speed=speed_tts
|
| 409 |
)
|
| 410 |
# Save the audio to the specified path
|
| 411 |
with open(output_audio_path, 'wb') as f:
|