Update app.py
Browse files
app.py
CHANGED
|
@@ -125,22 +125,47 @@ def transcribe_video(video_path):
|
|
| 125 |
}
|
| 126 |
for segment in result["segments"]
|
| 127 |
]
|
| 128 |
-
|
|
|
|
|
|
|
|
|
|
| 129 |
|
| 130 |
# Function to get the appropriate translation model based on target language
|
| 131 |
-
def get_translation_model(target_language):
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
|
| 141 |
-
def translate_text(transcription_json, target_language):
|
| 142 |
# Load the translation model for the specified target language
|
| 143 |
-
translation_model_id = get_translation_model(target_language)
|
| 144 |
logger.debug(f"Translation model: {translation_model_id}")
|
| 145 |
translator = pipeline("translation", model=translation_model_id)
|
| 146 |
|
|
@@ -271,7 +296,7 @@ def replace_audio_in_video(video_path: str, new_audio_path: str, final_video_pat
|
|
| 271 |
except Exception as e:
|
| 272 |
logger.error(f"Error replacing audio in video: {e}")
|
| 273 |
|
| 274 |
-
def upload_and_manage(file,
|
| 275 |
if file is None:
|
| 276 |
return None, [], None, "No file uploaded. Please upload a video/audio file."
|
| 277 |
|
|
@@ -286,17 +311,17 @@ def upload_and_manage(file, language, mode="transcription"):
|
|
| 286 |
list_available_fonts()
|
| 287 |
|
| 288 |
# Step 1: Transcribe audio from uploaded media file and get timestamps
|
| 289 |
-
transcription_json = transcribe_video(file.name)
|
| 290 |
|
| 291 |
# Step 2: Translate the transcription
|
| 292 |
-
translated_json = translate_text(transcription_json,
|
| 293 |
|
| 294 |
# Step 3: Add transcript to video based on timestamps
|
| 295 |
add_transcript_to_video(file.name, translated_json, output_video_path)
|
| 296 |
|
| 297 |
# Step 4 (Optional): Generate voiceover if mode is "transcription_voiceover"
|
| 298 |
if mode == "Transcription with Voiceover":
|
| 299 |
-
generate_voiceover(translated_json,
|
| 300 |
replace_audio_in_video(output_video_path, voiceover_path, output_video_path)
|
| 301 |
|
| 302 |
# Convert translated JSON into a format for the editable table
|
|
|
|
| 125 |
}
|
| 126 |
for segment in result["segments"]
|
| 127 |
]
|
| 128 |
+
# Get the detected language
|
| 129 |
+
detected_language = result["language"]
|
| 130 |
+
logger.debug(f"Detected language:\n{detected_language}")
|
| 131 |
+
return transcript_with_timestamps, detected_language
|
| 132 |
|
| 133 |
# Function to get the appropriate translation model based on target language
|
| 134 |
+
def get_translation_model(source_language, target_language):
|
| 135 |
+
"""
|
| 136 |
+
Get the translation model based on the source and target language.
|
| 137 |
+
|
| 138 |
+
Parameters:
|
| 139 |
+
- target_language (str): The language to translate the content into (e.g., 'es', 'fr').
|
| 140 |
+
- source_language (str): The language of the input content (default is 'en' for English).
|
| 141 |
+
|
| 142 |
+
Returns:
|
| 143 |
+
- str: The translation model identifier.
|
| 144 |
+
"""
|
| 145 |
+
# List of allowable languages
|
| 146 |
+
allowable_languages = ["en", "es", "fr", "zh", "de", "it", "pt", "ja", "ko", "ru"]
|
| 147 |
+
|
| 148 |
+
# Validate source and target languages
|
| 149 |
+
if source_language not in allowable_languages:
|
| 150 |
+
logger.debug(f"Invalid source language '{source_language}'. Supported languages are: {', '.join(allowable_languages)}")
|
| 151 |
+
# Return a default model if source language is invalid
|
| 152 |
+
source_language = "en" # Default to 'en'
|
| 153 |
+
|
| 154 |
+
if target_language not in allowable_languages:
|
| 155 |
+
logger.debug(f"Invalid target language '{target_language}'. Supported languages are: {', '.join(allowable_languages)}")
|
| 156 |
+
# Return a default model if target language is invalid
|
| 157 |
+
target_language = "zh" # Default to 'zh'
|
| 158 |
+
|
| 159 |
+
if source_language == target_language:
|
| 160 |
+
source_language = "en" # Default to 'en'
|
| 161 |
+
target_language = "zh" # Default to 'zh'
|
| 162 |
+
|
| 163 |
+
# Return the model using string concatenation
|
| 164 |
+
return f"Helsinki-NLP/opus-mt-{source_language}-{target_language}"
|
| 165 |
|
| 166 |
+
def translate_text(transcription_json, source_language, target_language):
|
| 167 |
# Load the translation model for the specified target language
|
| 168 |
+
translation_model_id = get_translation_model(source_language, target_language)
|
| 169 |
logger.debug(f"Translation model: {translation_model_id}")
|
| 170 |
translator = pipeline("translation", model=translation_model_id)
|
| 171 |
|
|
|
|
| 296 |
except Exception as e:
|
| 297 |
logger.error(f"Error replacing audio in video: {e}")
|
| 298 |
|
| 299 |
+
def upload_and_manage(file, target_language, mode="transcription"):
|
| 300 |
if file is None:
|
| 301 |
return None, [], None, "No file uploaded. Please upload a video/audio file."
|
| 302 |
|
|
|
|
| 311 |
list_available_fonts()
|
| 312 |
|
| 313 |
# Step 1: Transcribe audio from uploaded media file and get timestamps
|
| 314 |
+
transcription_json, source_language = transcribe_video(file.name)
|
| 315 |
|
| 316 |
# Step 2: Translate the transcription
|
| 317 |
+
translated_json = translate_text(transcription_json, source_language, target_language)
|
| 318 |
|
| 319 |
# Step 3: Add transcript to video based on timestamps
|
| 320 |
add_transcript_to_video(file.name, translated_json, output_video_path)
|
| 321 |
|
| 322 |
# Step 4 (Optional): Generate voiceover if mode is "transcription_voiceover"
|
| 323 |
if mode == "Transcription with Voiceover":
|
| 324 |
+
generate_voiceover(translated_json, target_language, voiceover_path)
|
| 325 |
replace_audio_in_video(output_video_path, voiceover_path, output_video_path)
|
| 326 |
|
| 327 |
# Convert translated JSON into a format for the editable table
|