RafaG
/

Tests

Model card Files Files and versions

xet

Community

RafaG commited on Oct 24, 2023

Commit

be52395

1 Parent(s): 1c6d76f

Upload 3 files

Browse files

Files changed (3) hide show

batch.ini +124 -0
cloud_service_settings.ini +36 -0
config.ini +80 -0

batch.ini ADDED Viewed

	@@ -0,0 +1,124 @@

+# You can add as many sections for langauges as you need. Just add to the number in the [LANGUAGE-#] for each one
+# Each section must contain the synth language code, synth voice name, and translation target language
+# A value for synth_voice_gender only matters if the service requires it, like Google TTS, but the setting must be there even if not used
+[SETTINGS]
+	# Enter the language numbers you wish to process when the batch file is read.
+	# This is also useful for multiple presets of a single language - You could just enable one of them at a time
+	# Just enter the numbers separated by commas: For example, to use LANGUAGE-1 and LANGUAGE-2, put "1,2"
+	# Please note that supported languages and their codes vary by service. See the supported languages for each service in the README
+enabled_languages = 5
+	# You an use a full file path, or the name of the file if it's in the same directory
+	# The video file name will also be used to name the final output audio file
+original_video_file_path = video.mp4
+srt_file_path = subtitles.srt
+[LANGUAGE-0]
+# English / Testing
+	# Remember, the translation target code may be different from synth_language_code because it is the one used by the translation service, not the TTS service
+	# See the codes here: https://cloud.google.com/translate/docs/languages / https://www.deepl.com/docs-api/translating-text/request/
+translation_target_language = en
+	# The language of the synthesized voice. Might not necessarily be exactly the same as target_language variable below
+	# Examples - English: en-US    Spanish: es-us   or   es-mx
+synth_language_code = en-US
+	# Choose one from here so it matches the target language: https://cloud.google.com/text-to-speech/docs/voices  /   https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support?tabs=stt-tts
+	# Examples - English: en-US-Neural2-I  (Google)  or  en-US-JasonNeural (Azure)  Spanish: es-US-Neural2-B (Google)  or es-MX-CecilioNeural (Azure)
+synth_voice_name = en-US-JasonNeural
+	# Google Only: Should match the gender of the chosen voice above
+synth_voice_gender = MALE
+[LANGUAGE-1]
+# Spanish
+translation_target_language = es
+synth_language_code = es-MX
+synth_voice_name = es-MX-CecilioNeural
+synth_voice_gender = MALE
+[LANGUAGE-2]
+# Hindi
+translation_target_language = hi
+synth_language_code = hi-IN
+synth_voice_name = hi-IN-MadhurNeural
+synth_voice_gender = MALE
+[LANGUAGE-3]
+# Arabic
+translation_target_language = ar
+synth_language_code = ar-EG
+synth_voice_name = ar-EG-ShakirNeural
+synth_voice_gender = MALE
+[LANGUAGE-4]
+# Russian
+translation_target_language = ru
+synth_language_code = ru-RU
+synth_voice_name = ru-RU-DmitryNeural
+synth_voice_gender = MALE
+[LANGUAGE-5]
+# Portuguese
+translation_target_language = pt
+synth_language_code = pt-BR
+synth_voice_name = pt-BR-FabioNeural
+synth_voice_gender = MALE
+[LANGUAGE-6]
+# Italian
+translation_target_language = it
+synth_language_code = it-IT
+synth_voice_name = it-IT-DiegoNeural
+synth_voice_gender = MALE
+[LANGUAGE-7]
+# Indonesian
+translation_target_language = id
+synth_language_code = id-ID
+synth_voice_name = id-ID-ArdiNeural
+synth_voice_gender = MALE
+[LANGUAGE-8]
+# Japanese
+translation_target_language = ja
+synth_language_code = ja-JP
+synth_voice_name = ja-JP-NaokiNeural
+synth_voice_gender = MALE
+[LANGUAGE-9]
+# Korean
+translation_target_language = ko
+synth_language_code = ko-KR
+synth_voice_name = ko-KR-BongJinNeural
+synth_voice_gender = MALE
+[LANGUAGE-10]
+# German
+translation_target_language = de
+synth_language_code = de-DE
+synth_voice_name = de-DE-KasperNeural
+synth_voice_gender = MALE
+[LANGUAGE-11]
+# Chinese (Mandarin Simplified)
+translation_target_language = zh
+synth_language_code = zh-CN
+synth_voice_name = zh-CN-YunyeNeural
+synth_voice_gender = MALE
+[LANGUAGE-12]
+# Turkish
+translation_target_language = tr
+synth_language_code = tr-TR
+synth_voice_name = tr-TR-AhmetNeural
+synth_voice_gender = MALE
+[LANGUAGE-13]
+#
+translation_target_language =
+synth_language_code =
+synth_voice_name =
+synth_voice_gender =

cloud_service_settings.ini ADDED Viewed

	@@ -0,0 +1,36 @@

+[CLOUD]
+	# Which TTS service will you use?
+	# Possble Values: azure / google
+tts_service = azure
+	# Which translation service will you use? DeepL is slower but more accurate
+	# Possble Values: google / deepl
+	# Note: If you will be skipping translation, this doesn't matter
+translate_service = google
+	# In case the translation language is not supported by DeepL, use Google Translate as a fallback
+	# Ignored if translate_service is set to google
+use_fallback_google_translate = True
+	# The project name / project ID in the Google Cloud console. Required for translating
+google_project_id = your-project-name
+	# API Key for your DeepL account. Required for translating if translate_service = deepl
+deepl_api_key = yourkeyxxxxxx
+	# API Key for your Speech resource in Azure (cognitive speech)
+azure_speech_key = 9d05b045bd8e4477acfb9b9dd58be65c
+	# The Location/Region of the speech resource. This should be listed on the same page as the API keys.
+	# Example: eastus
+azure_speech_region = brazilsouth
+	# Sends request to TTS service to create multiple audio clips simultaneously. MUCH faster.
+	# Currently only supported when using azure
+batch_tts_synthesize = False

config.ini ADDED Viewed

	@@ -0,0 +1,80 @@

+[SETTINGS]
+	# Set to True if you don't want to translate the subtitles. If so, ignore the language variables
+skip_translation = True
+	# Set to True if you don't want to synthesize the audio. For example, if you already did that and are testing
+skip_synthesize = True
+    # Set to True if you want to stop the program after translating the subtitles.
+	# For example, if you want to manually review the resulting subtitles before synthesizing the audio.
+	# Note that to resume the process, you must set this to False again and set skip_translation to True
+stop_after_translation = False
+	# The BCP-47 language code for the original text language
+original_language = pt-BR
+	# Applies to DeepL translations only - Whether to have it use more or less formal language
+	# Possible Values: default | more | less
+formality_preference = default
+	# The format/codec of the final audio file
+	# Possible Values:  mp3  |  aac  |  wav
+output_format = aac
+	# Must be a codec from 'Supported Audio Encodings' section here: https://cloud.google.com/speech-to-text/docs/encoding#audio-encodings
+	# This determines the codec returned by the API, not the one produced by the program! You probably shouldn't change this, it might not work otherwise
+synth_audio_encoding = MP3
+	# Enter the native sample rate for the voice audio provided by the TTS service
+	# This is usually 24KHz (24000), but some services like Azure offer higher quality audio at 48KHz (48000)
+	# Enter only number digits, no commas or anything
+synth_sample_rate = 24000
+	# This will drastically improve the quality of the final result, BUT see note below
+	# Note! Setting this to true will make it so instead of just stretching the audio clips, it will have the API generate new audio clips with adjusted speaking rates
+	# This can't be done on the first pass because we don't know how long the audio clips will be until we generate them
+two_pass_voice_synth = True
+	# On the second pass, each audio clip will be extremely close to the desired length, but a bit off
+	# Set this to True if you want to stretch the second-pass clip anyway to be exact, down to the millisecond
+	# However, this will degrade the voice and make it sound similar to if it was just 1-Pass
+force_stretch_with_twopass = False
+	# Azure Only: Sets the exact pause in milliseconds that the TTS voice will pause after a period between sentences
+	# Set it to "default" to keep it default which is quite slow. I find 80ms is pretty good
+	# Note: Changing this from default adds about 60 characters per line to the total Azure character usage count
+	# Possible values:  default  |  Any integer
+azure_sentence_pause = 80
+	# Azure Only: Sets the exact pause in milliseconds that the TTS voice will pause after a comma.
+	# Set it to "default" to keep it default which is quite slow.
+	# It doesn't seem to follow this number exactly, and seems to have a minimum around 50ms
+	# Note: Changing this from default adds about 60 characters per line to the total Azure character usage count
+	# Possible values:  default  |  Any integer
+azure_comma_pause = 50
+	# Adds a silence buffer between each spoken clip, but keeps the speech "centered" at the right spot so it's still synced
+	#   >  To be clear the total length of the audio file will remain the same, each spoken clip gets shrunk within it
+	# Useful if your subtitles file butts all the beginning and end timings right up against each other
+	# Note, this applies both before and after, so the total extra between clips will be 2x this
+	# Warning, setting this too high could result in the TTS speaking extremely fast to fit into remaining clip duration
+	#   >  Around 25 - 50 milliseconds is a good starting point
+add_line_buffer_milliseconds = 0
+	# If the combination of two adjacent subtitle lines is below this amount, and one starts at the same time the other ends, it will combine the lines
+	# This should improve the speech synthesis by reducing unnatural splits in spoken sentences.
+	# Setting this to zero or a low number will effectively disable it
+combine_subtitles_max_chars = 200
+	# Mostly prevents the program from deleting files in the working directory, and also generates files for each audio step
+debug_mode = False