Upload 3 files
Browse files- batch.ini +124 -0
- cloud_service_settings.ini +36 -0
- config.ini +80 -0
batch.ini
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# You can add as many sections for langauges as you need. Just add to the number in the [LANGUAGE-#] for each one
|
| 2 |
+
# Each section must contain the synth language code, synth voice name, and translation target language
|
| 3 |
+
# A value for synth_voice_gender only matters if the service requires it, like Google TTS, but the setting must be there even if not used
|
| 4 |
+
|
| 5 |
+
[SETTINGS]
|
| 6 |
+
|
| 7 |
+
# Enter the language numbers you wish to process when the batch file is read.
|
| 8 |
+
# This is also useful for multiple presets of a single language - You could just enable one of them at a time
|
| 9 |
+
# Just enter the numbers separated by commas: For example, to use LANGUAGE-1 and LANGUAGE-2, put "1,2"
|
| 10 |
+
# Please note that supported languages and their codes vary by service. See the supported languages for each service in the README
|
| 11 |
+
enabled_languages = 5
|
| 12 |
+
|
| 13 |
+
# You an use a full file path, or the name of the file if it's in the same directory
|
| 14 |
+
# The video file name will also be used to name the final output audio file
|
| 15 |
+
original_video_file_path = video.mp4
|
| 16 |
+
srt_file_path = subtitles.srt
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
[LANGUAGE-0]
|
| 21 |
+
# English / Testing
|
| 22 |
+
# Remember, the translation target code may be different from synth_language_code because it is the one used by the translation service, not the TTS service
|
| 23 |
+
# See the codes here: https://cloud.google.com/translate/docs/languages / https://www.deepl.com/docs-api/translating-text/request/
|
| 24 |
+
translation_target_language = en
|
| 25 |
+
# The language of the synthesized voice. Might not necessarily be exactly the same as target_language variable below
|
| 26 |
+
# Examples - English: en-US Spanish: es-us or es-mx
|
| 27 |
+
synth_language_code = en-US
|
| 28 |
+
# Choose one from here so it matches the target language: https://cloud.google.com/text-to-speech/docs/voices / https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support?tabs=stt-tts
|
| 29 |
+
# Examples - English: en-US-Neural2-I (Google) or en-US-JasonNeural (Azure) Spanish: es-US-Neural2-B (Google) or es-MX-CecilioNeural (Azure)
|
| 30 |
+
synth_voice_name = en-US-JasonNeural
|
| 31 |
+
# Google Only: Should match the gender of the chosen voice above
|
| 32 |
+
synth_voice_gender = MALE
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
[LANGUAGE-1]
|
| 36 |
+
# Spanish
|
| 37 |
+
translation_target_language = es
|
| 38 |
+
synth_language_code = es-MX
|
| 39 |
+
synth_voice_name = es-MX-CecilioNeural
|
| 40 |
+
synth_voice_gender = MALE
|
| 41 |
+
|
| 42 |
+
[LANGUAGE-2]
|
| 43 |
+
# Hindi
|
| 44 |
+
translation_target_language = hi
|
| 45 |
+
synth_language_code = hi-IN
|
| 46 |
+
synth_voice_name = hi-IN-MadhurNeural
|
| 47 |
+
synth_voice_gender = MALE
|
| 48 |
+
|
| 49 |
+
[LANGUAGE-3]
|
| 50 |
+
# Arabic
|
| 51 |
+
translation_target_language = ar
|
| 52 |
+
synth_language_code = ar-EG
|
| 53 |
+
synth_voice_name = ar-EG-ShakirNeural
|
| 54 |
+
synth_voice_gender = MALE
|
| 55 |
+
|
| 56 |
+
[LANGUAGE-4]
|
| 57 |
+
# Russian
|
| 58 |
+
translation_target_language = ru
|
| 59 |
+
synth_language_code = ru-RU
|
| 60 |
+
synth_voice_name = ru-RU-DmitryNeural
|
| 61 |
+
synth_voice_gender = MALE
|
| 62 |
+
|
| 63 |
+
[LANGUAGE-5]
|
| 64 |
+
# Portuguese
|
| 65 |
+
translation_target_language = pt
|
| 66 |
+
synth_language_code = pt-BR
|
| 67 |
+
synth_voice_name = pt-BR-FabioNeural
|
| 68 |
+
synth_voice_gender = MALE
|
| 69 |
+
|
| 70 |
+
[LANGUAGE-6]
|
| 71 |
+
# Italian
|
| 72 |
+
translation_target_language = it
|
| 73 |
+
synth_language_code = it-IT
|
| 74 |
+
synth_voice_name = it-IT-DiegoNeural
|
| 75 |
+
synth_voice_gender = MALE
|
| 76 |
+
|
| 77 |
+
[LANGUAGE-7]
|
| 78 |
+
# Indonesian
|
| 79 |
+
translation_target_language = id
|
| 80 |
+
synth_language_code = id-ID
|
| 81 |
+
synth_voice_name = id-ID-ArdiNeural
|
| 82 |
+
synth_voice_gender = MALE
|
| 83 |
+
|
| 84 |
+
[LANGUAGE-8]
|
| 85 |
+
# Japanese
|
| 86 |
+
translation_target_language = ja
|
| 87 |
+
synth_language_code = ja-JP
|
| 88 |
+
synth_voice_name = ja-JP-NaokiNeural
|
| 89 |
+
synth_voice_gender = MALE
|
| 90 |
+
|
| 91 |
+
[LANGUAGE-9]
|
| 92 |
+
# Korean
|
| 93 |
+
translation_target_language = ko
|
| 94 |
+
synth_language_code = ko-KR
|
| 95 |
+
synth_voice_name = ko-KR-BongJinNeural
|
| 96 |
+
synth_voice_gender = MALE
|
| 97 |
+
|
| 98 |
+
[LANGUAGE-10]
|
| 99 |
+
# German
|
| 100 |
+
translation_target_language = de
|
| 101 |
+
synth_language_code = de-DE
|
| 102 |
+
synth_voice_name = de-DE-KasperNeural
|
| 103 |
+
synth_voice_gender = MALE
|
| 104 |
+
|
| 105 |
+
[LANGUAGE-11]
|
| 106 |
+
# Chinese (Mandarin Simplified)
|
| 107 |
+
translation_target_language = zh
|
| 108 |
+
synth_language_code = zh-CN
|
| 109 |
+
synth_voice_name = zh-CN-YunyeNeural
|
| 110 |
+
synth_voice_gender = MALE
|
| 111 |
+
|
| 112 |
+
[LANGUAGE-12]
|
| 113 |
+
# Turkish
|
| 114 |
+
translation_target_language = tr
|
| 115 |
+
synth_language_code = tr-TR
|
| 116 |
+
synth_voice_name = tr-TR-AhmetNeural
|
| 117 |
+
synth_voice_gender = MALE
|
| 118 |
+
|
| 119 |
+
[LANGUAGE-13]
|
| 120 |
+
#
|
| 121 |
+
translation_target_language =
|
| 122 |
+
synth_language_code =
|
| 123 |
+
synth_voice_name =
|
| 124 |
+
synth_voice_gender =
|
cloud_service_settings.ini
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[CLOUD]
|
| 2 |
+
# Which TTS service will you use?
|
| 3 |
+
# Possble Values: azure / google
|
| 4 |
+
tts_service = azure
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
# Which translation service will you use? DeepL is slower but more accurate
|
| 8 |
+
# Possble Values: google / deepl
|
| 9 |
+
# Note: If you will be skipping translation, this doesn't matter
|
| 10 |
+
translate_service = google
|
| 11 |
+
|
| 12 |
+
# In case the translation language is not supported by DeepL, use Google Translate as a fallback
|
| 13 |
+
# Ignored if translate_service is set to google
|
| 14 |
+
use_fallback_google_translate = True
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
# The project name / project ID in the Google Cloud console. Required for translating
|
| 18 |
+
google_project_id = your-project-name
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
# API Key for your DeepL account. Required for translating if translate_service = deepl
|
| 22 |
+
deepl_api_key = yourkeyxxxxxx
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
# API Key for your Speech resource in Azure (cognitive speech)
|
| 26 |
+
azure_speech_key = 9d05b045bd8e4477acfb9b9dd58be65c
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
# The Location/Region of the speech resource. This should be listed on the same page as the API keys.
|
| 30 |
+
# Example: eastus
|
| 31 |
+
azure_speech_region = brazilsouth
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
# Sends request to TTS service to create multiple audio clips simultaneously. MUCH faster.
|
| 35 |
+
# Currently only supported when using azure
|
| 36 |
+
batch_tts_synthesize = False
|
config.ini
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[SETTINGS]
|
| 2 |
+
|
| 3 |
+
# Set to True if you don't want to translate the subtitles. If so, ignore the language variables
|
| 4 |
+
skip_translation = True
|
| 5 |
+
|
| 6 |
+
# Set to True if you don't want to synthesize the audio. For example, if you already did that and are testing
|
| 7 |
+
skip_synthesize = True
|
| 8 |
+
|
| 9 |
+
# Set to True if you want to stop the program after translating the subtitles.
|
| 10 |
+
# For example, if you want to manually review the resulting subtitles before synthesizing the audio.
|
| 11 |
+
# Note that to resume the process, you must set this to False again and set skip_translation to True
|
| 12 |
+
stop_after_translation = False
|
| 13 |
+
|
| 14 |
+
# The BCP-47 language code for the original text language
|
| 15 |
+
original_language = pt-BR
|
| 16 |
+
|
| 17 |
+
# Applies to DeepL translations only - Whether to have it use more or less formal language
|
| 18 |
+
# Possible Values: default | more | less
|
| 19 |
+
formality_preference = default
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
# The format/codec of the final audio file
|
| 23 |
+
# Possible Values: mp3 | aac | wav
|
| 24 |
+
output_format = aac
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
# Must be a codec from 'Supported Audio Encodings' section here: https://cloud.google.com/speech-to-text/docs/encoding#audio-encodings
|
| 28 |
+
# This determines the codec returned by the API, not the one produced by the program! You probably shouldn't change this, it might not work otherwise
|
| 29 |
+
synth_audio_encoding = MP3
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
# Enter the native sample rate for the voice audio provided by the TTS service
|
| 33 |
+
# This is usually 24KHz (24000), but some services like Azure offer higher quality audio at 48KHz (48000)
|
| 34 |
+
# Enter only number digits, no commas or anything
|
| 35 |
+
synth_sample_rate = 24000
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
# This will drastically improve the quality of the final result, BUT see note below
|
| 39 |
+
# Note! Setting this to true will make it so instead of just stretching the audio clips, it will have the API generate new audio clips with adjusted speaking rates
|
| 40 |
+
# This can't be done on the first pass because we don't know how long the audio clips will be until we generate them
|
| 41 |
+
two_pass_voice_synth = True
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
# On the second pass, each audio clip will be extremely close to the desired length, but a bit off
|
| 45 |
+
# Set this to True if you want to stretch the second-pass clip anyway to be exact, down to the millisecond
|
| 46 |
+
# However, this will degrade the voice and make it sound similar to if it was just 1-Pass
|
| 47 |
+
force_stretch_with_twopass = False
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
# Azure Only: Sets the exact pause in milliseconds that the TTS voice will pause after a period between sentences
|
| 51 |
+
# Set it to "default" to keep it default which is quite slow. I find 80ms is pretty good
|
| 52 |
+
# Note: Changing this from default adds about 60 characters per line to the total Azure character usage count
|
| 53 |
+
# Possible values: default | Any integer
|
| 54 |
+
azure_sentence_pause = 80
|
| 55 |
+
|
| 56 |
+
# Azure Only: Sets the exact pause in milliseconds that the TTS voice will pause after a comma.
|
| 57 |
+
# Set it to "default" to keep it default which is quite slow.
|
| 58 |
+
# It doesn't seem to follow this number exactly, and seems to have a minimum around 50ms
|
| 59 |
+
# Note: Changing this from default adds about 60 characters per line to the total Azure character usage count
|
| 60 |
+
# Possible values: default | Any integer
|
| 61 |
+
azure_comma_pause = 50
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
# Adds a silence buffer between each spoken clip, but keeps the speech "centered" at the right spot so it's still synced
|
| 65 |
+
# > To be clear the total length of the audio file will remain the same, each spoken clip gets shrunk within it
|
| 66 |
+
# Useful if your subtitles file butts all the beginning and end timings right up against each other
|
| 67 |
+
# Note, this applies both before and after, so the total extra between clips will be 2x this
|
| 68 |
+
# Warning, setting this too high could result in the TTS speaking extremely fast to fit into remaining clip duration
|
| 69 |
+
# > Around 25 - 50 milliseconds is a good starting point
|
| 70 |
+
add_line_buffer_milliseconds = 0
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
# If the combination of two adjacent subtitle lines is below this amount, and one starts at the same time the other ends, it will combine the lines
|
| 74 |
+
# This should improve the speech synthesis by reducing unnatural splits in spoken sentences.
|
| 75 |
+
# Setting this to zero or a low number will effectively disable it
|
| 76 |
+
combine_subtitles_max_chars = 200
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
# Mostly prevents the program from deleting files in the working directory, and also generates files for each audio step
|
| 80 |
+
debug_mode = False
|