Upload 2 files
Browse files- audio_builder.py +185 -0
- main.py +304 -0
audio_builder.py
ADDED
|
@@ -0,0 +1,185 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import soundfile
|
| 2 |
+
import pyrubberband
|
| 3 |
+
import configparser
|
| 4 |
+
import pathlib
|
| 5 |
+
import os
|
| 6 |
+
import io
|
| 7 |
+
|
| 8 |
+
from Scripts.shared_imports import *
|
| 9 |
+
import Scripts.TTS as TTS
|
| 10 |
+
from Scripts.utils import parseBool
|
| 11 |
+
|
| 12 |
+
from pydub import AudioSegment
|
| 13 |
+
from pydub.silence import detect_leading_silence
|
| 14 |
+
import langcodes
|
| 15 |
+
|
| 16 |
+
# Set working folder
|
| 17 |
+
workingFolder = "workingFolder"
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def trim_clip(inputSound):
|
| 21 |
+
trim_leading_silence: AudioSegment = lambda x: x[detect_leading_silence(x) :]
|
| 22 |
+
trim_trailing_silence: AudioSegment = lambda x: trim_leading_silence(x.reverse()).reverse()
|
| 23 |
+
strip_silence: AudioSegment = lambda x: trim_trailing_silence(trim_leading_silence(x))
|
| 24 |
+
strippedSound = strip_silence(inputSound)
|
| 25 |
+
return strippedSound
|
| 26 |
+
|
| 27 |
+
# Function to insert audio into canvas at specific point
|
| 28 |
+
def insert_audio(canvas, audioToOverlay, startTimeMs):
|
| 29 |
+
# Create a copy of the canvas
|
| 30 |
+
canvasCopy = canvas
|
| 31 |
+
# Overlay the audio onto the copy
|
| 32 |
+
canvasCopy = canvasCopy.overlay(audioToOverlay, position=int(startTimeMs))
|
| 33 |
+
# Return the copy
|
| 34 |
+
return canvasCopy
|
| 35 |
+
|
| 36 |
+
# Function to create a canvas of a specific duration in miliseconds
|
| 37 |
+
def create_canvas(canvasDuration, frame_rate=int(config['synth_sample_rate'])):
|
| 38 |
+
canvas = AudioSegment.silent(duration=canvasDuration, frame_rate=frame_rate)
|
| 39 |
+
return canvas
|
| 40 |
+
|
| 41 |
+
def get_speed_factor(subsDict, trimmedAudio, desiredDuration, num):
|
| 42 |
+
virtualTempFile = AudioSegment.from_file(trimmedAudio, format="wav")
|
| 43 |
+
rawDuration = virtualTempFile.duration_seconds
|
| 44 |
+
trimmedAudio.seek(0) # This MUST be done to reset the file pointer to the start of the file, otherwise will get errors next time try to access the virtual files
|
| 45 |
+
# Calculate the speed factor, put into dictionary
|
| 46 |
+
desiredDuration = float(desiredDuration)
|
| 47 |
+
speedFactor = (rawDuration*1000) / desiredDuration
|
| 48 |
+
subsDict[num]['speed_factor'] = speedFactor
|
| 49 |
+
return subsDict
|
| 50 |
+
|
| 51 |
+
def stretch_audio(audioFileToStretch, speedFactor, num):
|
| 52 |
+
virtualTempAudioFile = io.BytesIO()
|
| 53 |
+
# Write the raw string to virtualtempaudiofile
|
| 54 |
+
y, sampleRate = soundfile.read(audioFileToStretch)
|
| 55 |
+
|
| 56 |
+
streched_audio = pyrubberband.time_stretch(y, sampleRate, speedFactor, rbargs={'--fine': '--fine'}) # Need to add rbarges in weird way because it demands a dictionary of two values
|
| 57 |
+
#soundfile.write(f'{workingFolder}\\temp_stretched.wav', streched_audio, sampleRate)
|
| 58 |
+
soundfile.write(virtualTempAudioFile, streched_audio, sampleRate, format='wav')
|
| 59 |
+
if config['debug_mode']:
|
| 60 |
+
soundfile.write(os.path.join(workingFolder, f'{num}_s.wav'), streched_audio, sampleRate) # For debugging, saves the stretched audio files
|
| 61 |
+
#return AudioSegment.from_file(f'{workingFolder}\\temp_stretched.wav', format="wav")
|
| 62 |
+
return AudioSegment.from_file(virtualTempAudioFile, format="wav")
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
from pydub import AudioSegment
|
| 66 |
+
|
| 67 |
+
def build_audio(subsDict, langDict, totalAudioLength, twoPassVoiceSynth=False):
|
| 68 |
+
if cloudConfig['tts_service'] == 'azure':
|
| 69 |
+
twoPassVoiceSynth = False # Azure doesn't need two pass voice synth, so disable it
|
| 70 |
+
|
| 71 |
+
virtualTrimmedFileDict = {}
|
| 72 |
+
# First trim silence off the audio files
|
| 73 |
+
for key, value in subsDict.items():
|
| 74 |
+
filePathTrimmed = os.path.join(workingFolder, str(key)) + "_t.wav"
|
| 75 |
+
subsDict[key]['TTS_FilePath_Trimmed'] = filePathTrimmed
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
# Trim the clip and re-write file
|
| 79 |
+
rawClip = AudioSegment.from_file(value['TTS_FilePath'], format="mp3", frame_rate=int(config['synth_sample_rate']))
|
| 80 |
+
trimmedClip = trim_clip(rawClip)
|
| 81 |
+
|
| 82 |
+
if config['debug_mode']:
|
| 83 |
+
trimmedClip.export(filePathTrimmed, format="wav")
|
| 84 |
+
|
| 85 |
+
# Create virtual file in dictionary with audio to be read later
|
| 86 |
+
tempTrimmedFile = io.BytesIO()
|
| 87 |
+
trimmedClip.export(tempTrimmedFile, format="wav")
|
| 88 |
+
virtualTrimmedFileDict[key] = tempTrimmedFile
|
| 89 |
+
keyIndex = list(subsDict.keys()).index(key)
|
| 90 |
+
print(f" Trimmed Audio: {keyIndex + 1} of {len(subsDict)}", end="\r")
|
| 91 |
+
print("\n")
|
| 92 |
+
|
| 93 |
+
# Calculates speed factor if necessary. Azure doesn't need this, so skip it
|
| 94 |
+
if not cloudConfig['tts_service'] == 'azure':
|
| 95 |
+
# Calculate speed factors for each clip, aka how much to stretch the audio
|
| 96 |
+
for key, value in subsDict.items:
|
| 97 |
+
# subsDict = get_speed_factor(subsDict, value['TTS_FilePath_Trimmed'], value['duration_ms'], num=key)
|
| 98 |
+
subsDict = get_speed_factor(subsDict, virtualTrimmedFileDict[key], value['duration_ms'], num=key)
|
| 99 |
+
keyIndex = list(subsDict.keys()).index(key)
|
| 100 |
+
print(f" Calculated Speed Factor: {keyIndex + 1} of {len(subsDict)}", end="\r")
|
| 101 |
+
print("\n")
|
| 102 |
+
|
| 103 |
+
# If two pass voice synth is enabled, have API re-synthesize the clips at the new speed
|
| 104 |
+
# Azure allows direct specification of audio duration, so no need to re-synthesize
|
| 105 |
+
if twoPassVoiceSynth and not cloudConfig['tts_service'] == 'azure':
|
| 106 |
+
if cloudConfig['batch_tts_synthesize'] and cloudConfig['tts_service'] == 'azure':
|
| 107 |
+
subsDict = TTS.synthesize_dictionary_batch(subsDict, langDict, skipSynthesize=config['skip_synthesize'], secondPass=True)
|
| 108 |
+
else:
|
| 109 |
+
subsDict = TTS.synthesize_dictionary(subsDict, langDict, skipSynthesize=config['skip_synthesize'], secondPass=True)
|
| 110 |
+
|
| 111 |
+
for key, value in subsDict.items:
|
| 112 |
+
# Trim the clip and re-write file
|
| 113 |
+
rawClip = AudioSegment.from_file(value['TTS_FilePath'], format="mp3", frame_rate=int(config['synth_sample_rate']))
|
| 114 |
+
trimmedClip = trim_clip(rawClip)
|
| 115 |
+
if config['debug_mode']:
|
| 116 |
+
# Remove '.wav' from the end of the file path
|
| 117 |
+
secondPassTrimmedFile = value['TTS_FilePath_Trimmed'][:-4] + "_p2_t.wav"
|
| 118 |
+
trimmedClip.export(secondPassTrimmedFile, format="wav")
|
| 119 |
+
trimmedClip.export(virtualTrimmedFileDict[key], format="wav")
|
| 120 |
+
keyIndex = list(subsDict.keys()).index(key)
|
| 121 |
+
print(f" Trimmed Audio (2nd Pass): {keyIndex + 1} of {len(subsDict)}", end="\r")
|
| 122 |
+
print("\n")
|
| 123 |
+
|
| 124 |
+
if config['force_stretch_with_twopass']:
|
| 125 |
+
for key, value in subsDict.items:
|
| 126 |
+
subsDict = get_speed_factor(subsDict, virtualTrimmedFileDict[key], value['duration_ms'], num=key)
|
| 127 |
+
keyIndex = list(subsDict.keys()).index(key)
|
| 128 |
+
print(f" Calculated Speed Factor (2nd Pass): {keyIndex + 1} of {len(subsDict)}", end="\r")
|
| 129 |
+
print("\n")
|
| 130 |
+
|
| 131 |
+
# Create canvas to overlay audio onto
|
| 132 |
+
canvas = create_canvas(totalAudioLength)
|
| 133 |
+
|
| 134 |
+
# Stretch audio and insert into canvas
|
| 135 |
+
for key, value in subsDict.items():
|
| 136 |
+
if (not twoPassVoiceSynth or config['force_stretch_with_twopass']) and not cloudConfig['tts_service'] == 'azure':
|
| 137 |
+
# stretchedClip = stretch_audio(value['TTS_FilePath_Trimmed'], speedFactor=subsDict[key]['speed_factor'], num=key)
|
| 138 |
+
stretchedClip = stretch_audio(virtualTrimmedFileDict[key], speedFactor=subsDict[key]['speed_factor'], num=key)
|
| 139 |
+
else:
|
| 140 |
+
# stretchedClip = AudioSegment.from_file(value['TTS_FilePath_Trimmed'], format="wav")
|
| 141 |
+
stretchedClip = AudioSegment.from_file(virtualTrimmedFileDict[key], format="wav")
|
| 142 |
+
virtualTrimmedFileDict[key].seek(0) # Not 100% sure if this is necessary but it was in the other place it is used
|
| 143 |
+
|
| 144 |
+
canvas = insert_audio(canvas, stretchedClip, value['start_ms'])
|
| 145 |
+
keyIndex = list(subsDict.keys()).index(key)
|
| 146 |
+
print(f" Final Audio Processed: {keyIndex + 1} of {len(subsDict)}", end="\r")
|
| 147 |
+
print("\n")
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
# Use video file name to use in the name of the output file. Add language name and language code
|
| 151 |
+
lang = langcodes.get(langDict['languageCode'])
|
| 152 |
+
langName = langcodes.get(langDict['languageCode']).get(lang.to_alpha3()).display_name()
|
| 153 |
+
if config['debug_mode'] and not os.path.isfile(ORIGINAL_VIDEO_PATH):
|
| 154 |
+
outputFileName = "debug" + f" - {langName} - {langDict['languageCode']}."
|
| 155 |
+
else:
|
| 156 |
+
outputFileName = pathlib.Path(ORIGINAL_VIDEO_PATH).stem + f" - {langName} - {langDict['languageCode']}."
|
| 157 |
+
# Set output path
|
| 158 |
+
outputFileName = os.path.join(OUTPUT_FOLDER, outputFileName)
|
| 159 |
+
|
| 160 |
+
# Determine string to use for output format and file extension based on config setting
|
| 161 |
+
outputFormat=config['output_format'].lower()
|
| 162 |
+
if outputFormat == "mp3":
|
| 163 |
+
outputFileName += "mp3"
|
| 164 |
+
formatString = "mp3"
|
| 165 |
+
elif outputFormat == "wav":
|
| 166 |
+
outputFileName += "wav"
|
| 167 |
+
formatString = "wav"
|
| 168 |
+
elif outputFormat == "aac":
|
| 169 |
+
#outputFileName += "m4a"
|
| 170 |
+
#formatString = "mp4" # Pydub doesn't accept "aac" as a format, so we have to use "mp4" instead. Alternatively, could use "adts" with file extension "aac"
|
| 171 |
+
outputFileName += "aac"
|
| 172 |
+
formatString = "adts" # Pydub doesn't accept "aac" as a format, so we have to use "mp4" instead. Alternatively, could use "adts" with file extension "aac"
|
| 173 |
+
|
| 174 |
+
canvas = canvas.set_channels(2) # Change from mono to stereo
|
| 175 |
+
try:
|
| 176 |
+
print("\nExporting audio file...")
|
| 177 |
+
canvas.export(outputFileName, format=formatString, bitrate="192k")
|
| 178 |
+
except:
|
| 179 |
+
outputFileName = outputFileName + ".bak"
|
| 180 |
+
canvas.export(outputFileName, format=formatString, bitrate="192k")
|
| 181 |
+
print("\nThere was an issue exporting the audio, it might be a permission error. The file was saved as a backup with the extension .bak")
|
| 182 |
+
print("Try removing the .bak extension then listen to the file to see if it worked.\n")
|
| 183 |
+
input("Press Enter to exit...")
|
| 184 |
+
|
| 185 |
+
return subsDict
|
main.py
ADDED
|
@@ -0,0 +1,304 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# -*- coding: UTF-8 -*-
|
| 3 |
+
|
| 4 |
+
# Project Title: Auto Synced Translated Dubs (https://github.com/ThioJoe/Auto-Synced-Translated-Dubs)
|
| 5 |
+
# Author / Project Owner: "ThioJoe" (https://github.com/ThioJoe)
|
| 6 |
+
# License: GPLv3
|
| 7 |
+
# NOTE: By contributing to this project, you agree to the terms of the GPLv3 license, and agree to grant the project owner the right to also provide or sell this software, including your contribution, to anyone under any other license, with no compensation to you.
|
| 8 |
+
|
| 9 |
+
version = '0.15.0'
|
| 10 |
+
print(f"------- 'Auto Synced Translated Dubs' script by ThioJoe - Release version {version} -------")
|
| 11 |
+
# winsound.py
|
| 12 |
+
|
| 13 |
+
# Import other files
|
| 14 |
+
from Scripts.shared_imports import *
|
| 15 |
+
import Scripts.TTS as TTS
|
| 16 |
+
import Scripts.audio_builder as audio_builder
|
| 17 |
+
import Scripts.auth as auth
|
| 18 |
+
import Scripts.translate as translate
|
| 19 |
+
from Scripts.utils import parseBool
|
| 20 |
+
|
| 21 |
+
# Import built in modules
|
| 22 |
+
import re
|
| 23 |
+
import copy
|
| 24 |
+
import winsound
|
| 25 |
+
|
| 26 |
+
# Import other modules
|
| 27 |
+
import ffprobe
|
| 28 |
+
|
| 29 |
+
# EXTERNAL REQUIREMENTS:
|
| 30 |
+
# rubberband binaries: https://breakfastquay.com/rubberband/ - Put rubberband.exe and sndfile.dll in the same folder as this script
|
| 31 |
+
# ffmpeg installed: https://ffmpeg.org/download.html
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
#---------------------------------------- Batch File Processing ----------------------------------------
|
| 36 |
+
|
| 37 |
+
# Get list of languages to process
|
| 38 |
+
languageNums = batchConfig['SETTINGS']['enabled_languages'].replace(' ','').split(',')
|
| 39 |
+
srtFile = os.path.abspath(batchConfig['SETTINGS']['srt_file_path'].strip("\""))
|
| 40 |
+
|
| 41 |
+
# Get original video file path, also allow you to debug using a subtitle file without having the original video file
|
| 42 |
+
videoFilePath = batchConfig['SETTINGS']['original_video_file_path']
|
| 43 |
+
|
| 44 |
+
# Validate the number of sections
|
| 45 |
+
for num in languageNums:
|
| 46 |
+
# Check if section exists
|
| 47 |
+
if not batchConfig.has_section(f'LANGUAGE-{num}'):
|
| 48 |
+
raise ValueError(f'Invalid language number in batch.ini: {num} - Make sure the section [LANGUAGE-{num}] exists')
|
| 49 |
+
|
| 50 |
+
# Validate the settings in each section
|
| 51 |
+
for num in languageNums:
|
| 52 |
+
if not batchConfig.has_option(f'LANGUAGE-{num}', 'synth_language_code'):
|
| 53 |
+
raise ValueError(f'Invalid configuration in batch.ini: {num} - Make sure the option "synth_language_code" exists under [LANGUAGE-{num}]')
|
| 54 |
+
if not batchConfig.has_option(f'LANGUAGE-{num}', 'synth_voice_name'):
|
| 55 |
+
raise ValueError(f'Invalid configuration in batch.ini: {num} - Make sure the option "synth_voice_name" exists under [LANGUAGE-{num}]')
|
| 56 |
+
if not batchConfig.has_option(f'LANGUAGE-{num}', 'translation_target_language'):
|
| 57 |
+
raise ValueError(f'Invalid configuration in batch.ini: {num} - Make sure the option "translation_target_language" exists under [LANGUAGE-{num}]')
|
| 58 |
+
if not batchConfig.has_option(f'LANGUAGE-{num}', 'synth_voice_gender'):
|
| 59 |
+
raise ValueError(f'Invalid configuration in batch.ini: {num} - Make sure the option "synth_voice_gender" exists under [LANGUAGE-{num}]')
|
| 60 |
+
|
| 61 |
+
# Create a dictionary of the settings from each section
|
| 62 |
+
batchSettings = {}
|
| 63 |
+
for num in languageNums:
|
| 64 |
+
batchSettings[num] = {
|
| 65 |
+
'synth_language_code': batchConfig[f'LANGUAGE-{num}']['synth_language_code'],
|
| 66 |
+
'synth_voice_name': batchConfig[f'LANGUAGE-{num}']['synth_voice_name'],
|
| 67 |
+
'translation_target_language': batchConfig[f'LANGUAGE-{num}']['translation_target_language'],
|
| 68 |
+
'synth_voice_gender': batchConfig[f'LANGUAGE-{num}']['synth_voice_gender']
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
#======================================== Parse SRT File ================================================
|
| 73 |
+
|
| 74 |
+
def parse_srt_file(srtFileLines, preTranslated=False):
|
| 75 |
+
# Matches the following example with regex: 00:00:20,130 --> 00:00:23,419
|
| 76 |
+
subtitleTimeLineRegex = re.compile(r'\d\d:\d\d:\d\d,\d\d\d --> \d\d:\d\d:\d\d,\d\d\d')
|
| 77 |
+
|
| 78 |
+
# Create a dictionary
|
| 79 |
+
subsDict = {}
|
| 80 |
+
|
| 81 |
+
# Will add this many milliseconds of extra silence before and after each audio clip / spoken subtitle line
|
| 82 |
+
addBufferMilliseconds = int(config['add_line_buffer_milliseconds'])
|
| 83 |
+
|
| 84 |
+
# Enumerate lines, and if a line in lines contains only an integer, put that number in the key, and a dictionary in the value
|
| 85 |
+
# The dictionary contains the start, ending, and duration of the subtitles as well as the text
|
| 86 |
+
# The next line uses the syntax HH:MM:SS,MMM --> HH:MM:SS,MMM . Get the difference between the two times and put that in the dictionary
|
| 87 |
+
# For the line after that, put the text in the dictionary
|
| 88 |
+
for lineNum, line in enumerate(srtFileLines):
|
| 89 |
+
line = line.strip()
|
| 90 |
+
if line.isdigit() and subtitleTimeLineRegex.match(srtFileLines[lineNum + 1]):
|
| 91 |
+
lineWithTimestamps = srtFileLines[lineNum + 1].strip()
|
| 92 |
+
lineWithSubtitleText = srtFileLines[lineNum + 2].strip()
|
| 93 |
+
|
| 94 |
+
# If there are more lines after the subtitle text, add them to the text
|
| 95 |
+
count = 3
|
| 96 |
+
while True:
|
| 97 |
+
# Check if the next line is blank or not
|
| 98 |
+
if (lineNum+count) < len(srtFileLines) and srtFileLines[lineNum + count].strip():
|
| 99 |
+
lineWithSubtitleText += ' ' + srtFileLines[lineNum + count].strip()
|
| 100 |
+
count += 1
|
| 101 |
+
else:
|
| 102 |
+
break
|
| 103 |
+
|
| 104 |
+
# Create empty dictionary with keys for start and end times and subtitle text
|
| 105 |
+
subsDict[line] = {'start_ms': '', 'end_ms': '', 'duration_ms': '', 'text': '', 'break_until_next': '', 'srt_timestamps_line': lineWithTimestamps}
|
| 106 |
+
|
| 107 |
+
time = lineWithTimestamps.split(' --> ')
|
| 108 |
+
time1 = time[0].split(':')
|
| 109 |
+
time2 = time[1].split(':')
|
| 110 |
+
|
| 111 |
+
# Converts the time to milliseconds
|
| 112 |
+
processedTime1 = int(time1[0]) * 3600000 + int(time1[1]) * 60000 + int(time1[2].split(',')[0]) * 1000 + int(time1[2].split(',')[1]) #/ 1000 #Uncomment to turn into seconds
|
| 113 |
+
processedTime2 = int(time2[0]) * 3600000 + int(time2[1]) * 60000 + int(time2[2].split(',')[0]) * 1000 + int(time2[2].split(',')[1]) #/ 1000 #Uncomment to turn into seconds
|
| 114 |
+
timeDifferenceMs = str(processedTime2 - processedTime1)
|
| 115 |
+
|
| 116 |
+
# Adjust times with buffer
|
| 117 |
+
if addBufferMilliseconds > 0 and not preTranslated:
|
| 118 |
+
subsDict[line]['start_ms_buffered'] = str(processedTime1 + addBufferMilliseconds)
|
| 119 |
+
subsDict[line]['end_ms_buffered'] = str(processedTime2 - addBufferMilliseconds)
|
| 120 |
+
subsDict[line]['duration_ms_buffered'] = str((processedTime2 - addBufferMilliseconds) - (processedTime1 + addBufferMilliseconds))
|
| 121 |
+
else:
|
| 122 |
+
subsDict[line]['start_ms_buffered'] = str(processedTime1)
|
| 123 |
+
subsDict[line]['end_ms_buffered'] = str(processedTime2)
|
| 124 |
+
subsDict[line]['duration_ms_buffered'] = str(processedTime2 - processedTime1)
|
| 125 |
+
|
| 126 |
+
# Set the keys in the dictionary to the values
|
| 127 |
+
subsDict[line]['start_ms'] = str(processedTime1)
|
| 128 |
+
subsDict[line]['end_ms'] = str(processedTime2)
|
| 129 |
+
subsDict[line]['duration_ms'] = timeDifferenceMs
|
| 130 |
+
subsDict[line]['text'] = lineWithSubtitleText
|
| 131 |
+
if lineNum > 0:
|
| 132 |
+
# Goes back to previous line's dictionary and writes difference in time to current line
|
| 133 |
+
subsDict[str(int(line)-1)]['break_until_next'] = processedTime1 - int(subsDict[str(int(line) - 1)]['end_ms'])
|
| 134 |
+
else:
|
| 135 |
+
subsDict[line]['break_until_next'] = 0
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
# Apply the buffer to the start and end times by setting copying over the buffer values to main values
|
| 139 |
+
if addBufferMilliseconds > 0 and not preTranslated:
|
| 140 |
+
for key, value in subsDict.items():
|
| 141 |
+
subsDict[key]['start_ms'] = value['start_ms_buffered']
|
| 142 |
+
subsDict[key]['end_ms'] = value['end_ms_buffered']
|
| 143 |
+
subsDict[key]['duration_ms'] = value['duration_ms_buffered']
|
| 144 |
+
|
| 145 |
+
return subsDict
|
| 146 |
+
|
| 147 |
+
# ----------------------------------------
|
| 148 |
+
|
| 149 |
+
# Open an srt file and read the lines into a list
|
| 150 |
+
with open(srtFile, 'r', encoding='utf-8-sig') as f:
|
| 151 |
+
originalSubLines = f.readlines()
|
| 152 |
+
|
| 153 |
+
originalLanguageSubsDict = parse_srt_file(originalSubLines)
|
| 154 |
+
|
| 155 |
+
#======================================== Get Total Duration ================================================
|
| 156 |
+
# Final audio file Should equal the length of the video in milliseconds
|
| 157 |
+
def get_duration(filename):
|
| 158 |
+
import subprocess, json
|
| 159 |
+
result = subprocess.check_output(
|
| 160 |
+
f'ffprobe -v quiet -show_streams -select_streams v:0 -of json "{filename}"', shell=True).decode()
|
| 161 |
+
fields = json.loads(result)['streams'][0]
|
| 162 |
+
try:
|
| 163 |
+
duration = fields['tags']['DURATION']
|
| 164 |
+
except KeyError:
|
| 165 |
+
duration = fields['duration']
|
| 166 |
+
durationMS = round(float(duration)*1000) # Convert to milliseconds
|
| 167 |
+
return durationMS
|
| 168 |
+
|
| 169 |
+
# Get the duration of the original video file
|
| 170 |
+
if config['debug_mode'] and ORIGINAL_VIDEO_PATH.lower() == "debug.test":
|
| 171 |
+
# Copy the duration based on the last timestamp of the subtitles
|
| 172 |
+
totalAudioLength = int(originalLanguageSubsDict[str(len(originalLanguageSubsDict))]['end_ms'])
|
| 173 |
+
else:
|
| 174 |
+
totalAudioLength = get_duration(ORIGINAL_VIDEO_PATH)
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
#============================================= Directory Validation =====================================================
|
| 178 |
+
|
| 179 |
+
# Check if the output folder exists, if not, create it
|
| 180 |
+
if not os.path.exists(OUTPUT_DIRECTORY):
|
| 181 |
+
os.makedirs(OUTPUT_DIRECTORY)
|
| 182 |
+
if not os.path.exists(OUTPUT_FOLDER):
|
| 183 |
+
os.makedirs(OUTPUT_FOLDER)
|
| 184 |
+
|
| 185 |
+
# Check if the working folder exists, if not, create it
|
| 186 |
+
if not os.path.exists('workingFolder'):
|
| 187 |
+
os.makedirs('workingFolder')
|
| 188 |
+
|
| 189 |
+
#======================================== Translation and Text-To-Speech ================================================
|
| 190 |
+
|
| 191 |
+
def manually_prepare_dictionary(dictionaryToPrep):
|
| 192 |
+
### Do additional Processing to match the format produced by translation function
|
| 193 |
+
# Create new key 'translated_text' and set it to the value of 'text'
|
| 194 |
+
for key, value in dictionaryToPrep.items():
|
| 195 |
+
dictionaryToPrep[key]['translated_text'] = value['text']
|
| 196 |
+
|
| 197 |
+
# Convert the keys to integers and return the dictionary
|
| 198 |
+
return {int(k): v for k, v in dictionaryToPrep.items()}
|
| 199 |
+
|
| 200 |
+
def get_pretranslated_subs_dict(langData):
|
| 201 |
+
# Get list of files in the output folder
|
| 202 |
+
files = os.listdir(OUTPUT_FOLDER)
|
| 203 |
+
# Check if youtube-translated directory/files exist
|
| 204 |
+
if os.path.exists(OUTPUT_YTSYNCED_FOLDER):
|
| 205 |
+
altFiles = os.listdir(OUTPUT_YTSYNCED_FOLDER)
|
| 206 |
+
else:
|
| 207 |
+
altFiles = None
|
| 208 |
+
|
| 209 |
+
# If alternative translations found in addition to the main output folder, ask user which to use
|
| 210 |
+
if altFiles and files:
|
| 211 |
+
print("Found YouTube-synced translations in: " + OUTPUT_YTSYNCED_FOLDER)
|
| 212 |
+
userResponse = input("Use YouTube-synced translations instead of those in main output folder? (y/n): ")
|
| 213 |
+
if userResponse.lower() == 'y':
|
| 214 |
+
files = altFiles
|
| 215 |
+
print("Using YouTube-synced translations...\n")
|
| 216 |
+
elif altFiles and not files:
|
| 217 |
+
print("Found YouTube-synced translations to use in: " + OUTPUT_YTSYNCED_FOLDER)
|
| 218 |
+
files = altFiles
|
| 219 |
+
|
| 220 |
+
# Check if any files ends with the specific language code and srt file extension
|
| 221 |
+
for file in files:
|
| 222 |
+
if file.replace(' ', '').endswith(f"-{langData['translation_target_language']}.srt"):
|
| 223 |
+
# If so, open the file and read the lines into a list
|
| 224 |
+
with open(f"{OUTPUT_FOLDER}/{file}", 'r', encoding='utf-8-sig') as f:
|
| 225 |
+
pretranslatedSubLines = f.readlines()
|
| 226 |
+
print(f"Pre-translated file found: {file}")
|
| 227 |
+
|
| 228 |
+
# Parse the srt file using function
|
| 229 |
+
preTranslatedDict = parse_srt_file(pretranslatedSubLines, preTranslated=True)
|
| 230 |
+
|
| 231 |
+
# Convert the keys to integers
|
| 232 |
+
preTranslatedDict = manually_prepare_dictionary(preTranslatedDict)
|
| 233 |
+
|
| 234 |
+
# Return the dictionary
|
| 235 |
+
return preTranslatedDict
|
| 236 |
+
|
| 237 |
+
# If no file is found, return None
|
| 238 |
+
return None
|
| 239 |
+
|
| 240 |
+
# Process a language: Translate, Synthesize, and Build Audio
|
| 241 |
+
def process_language(langData, processedCount, totalLanguages):
|
| 242 |
+
langDict = {
|
| 243 |
+
'targetLanguage': langData['translation_target_language'],
|
| 244 |
+
'voiceName': langData['synth_voice_name'],
|
| 245 |
+
'languageCode': langData['synth_language_code'],
|
| 246 |
+
'voiceGender': langData['synth_voice_gender'],
|
| 247 |
+
'translateService': langData['translate_service'],
|
| 248 |
+
'formality': langData['formality']
|
| 249 |
+
}
|
| 250 |
+
|
| 251 |
+
individualLanguageSubsDict = copy.deepcopy(originalLanguageSubsDict)
|
| 252 |
+
|
| 253 |
+
# Print language being processed
|
| 254 |
+
print(f"\n----- Beginning Processing of Language ({processedCount}/{totalLanguages}): {langDict['languageCode']} -----")
|
| 255 |
+
|
| 256 |
+
# Check for special case where original language is the same as the target language
|
| 257 |
+
if langDict['languageCode'].lower() == config['original_language'].lower():
|
| 258 |
+
print("Original language is the same as the target language. Skipping translation.")
|
| 259 |
+
individualLanguageSubsDict = manually_prepare_dictionary(individualLanguageSubsDict)
|
| 260 |
+
|
| 261 |
+
elif config['skip_translation'] == False:
|
| 262 |
+
# Translate
|
| 263 |
+
individualLanguageSubsDict = translate.translate_dictionary(individualLanguageSubsDict, langDict, skipTranslation=config['skip_translation'])
|
| 264 |
+
if config['stop_after_translation']:
|
| 265 |
+
print("Stopping at translation is enabled. Skipping TTS and building audio.")
|
| 266 |
+
return
|
| 267 |
+
|
| 268 |
+
elif config['skip_translation'] == True:
|
| 269 |
+
print("Skip translation enabled. Checking for pre-translated subtitles...")
|
| 270 |
+
# Check if pre-translated subtitles exist
|
| 271 |
+
pretranslatedSubsDict = get_pretranslated_subs_dict(langData)
|
| 272 |
+
if pretranslatedSubsDict != None:
|
| 273 |
+
individualLanguageSubsDict = pretranslatedSubsDict
|
| 274 |
+
else:
|
| 275 |
+
print(f"\nPre-translated subtitles not found for language '{langDict['languageCode']}' in folder '{OUTPUT_FOLDER}'. Skipping.")
|
| 276 |
+
print(f"Note: Ensure the subtitle filename for this language ends with: ' - {langData['translation_target_language']}.srt'\n")
|
| 277 |
+
return
|
| 278 |
+
|
| 279 |
+
# Synthesize
|
| 280 |
+
if cloudConfig['batch_tts_synthesize'] == True and cloudConfig['tts_service'] == 'azure':
|
| 281 |
+
individualLanguageSubsDict = TTS.synthesize_dictionary_batch(individualLanguageSubsDict, langDict, skipSynthesize=config['skip_synthesize'])
|
| 282 |
+
else:
|
| 283 |
+
individualLanguageSubsDict = TTS.synthesize_dictionary(individualLanguageSubsDict, langDict, skipSynthesize=config['skip_synthesize'])
|
| 284 |
+
|
| 285 |
+
# Build audio
|
| 286 |
+
individualLanguageSubsDict = audio_builder.build_audio(individualLanguageSubsDict, langDict, totalAudioLength, config['two_pass_voice_synth'])
|
| 287 |
+
|
| 288 |
+
|
| 289 |
+
#======================================== Main Program ================================================
|
| 290 |
+
# Counter for number of languages processed
|
| 291 |
+
processedCount = 0
|
| 292 |
+
totalLanguages = len(batchSettings)
|
| 293 |
+
|
| 294 |
+
# Process all languages
|
| 295 |
+
print(f"\n----- Beginning Processing of Languages -----")
|
| 296 |
+
batchSettings = translate.set_translation_info(batchSettings)
|
| 297 |
+
for langNum, langData in batchSettings.items():
|
| 298 |
+
processedCount += 1
|
| 299 |
+
# Process current fallback language
|
| 300 |
+
process_language(langData, processedCount, totalLanguages)
|
| 301 |
+
|
| 302 |
+
# # Play a system sound to indicate completion
|
| 303 |
+
# sound_name = winsound.MB_ICONASTERISK # represents the 'Asterisk' system sound
|
| 304 |
+
# winsound.MessageBeep(sound_name) # Play the system sound
|