dkounadis
/

artificial-styletts2

audio-generation

Model card Files Files and versions

artificial-styletts2 / demo.py

Dionyssos's picture

demo fx

9146509 over 1 year ago

2.56 kB

	import numpy as np
	import soundfile
	import msinference


	def tts_entry(text='A quick brown fox jumps over the lazy dog. Sweet dreams are made of this, I traveled the world and the seven seas.',
	voice='af_ZA_google-nwu_1919', # 'serbian', # 'en_US/vctk_low#p276', 'isl', 'abi',
	speed=1.4, # only for non-english
	affect = True # False = high clarity for partially sight
	):
	'''returns 24kHZ np.array TTS

	voice : 'en_US/vctk_low#p276' # from English voices -> https://audeering.github.io/shift/

	or

	voice : 'af_ZA_google-nwu_1919' # from english non-native accents -> https://huggingface.co/dkounadis/artificial-styletts2/discussions/1#6783e3b00e7d90facec060c6

	or

	voice : 'deu' # foreign langs -> https://huggingface.co/dkounadis/artificial-styletts2/blob/main/Utils/all_langs.csv
	'''

	# StyleTTS2 - En

	# mimic-3 format of voice (English txt - English accent)

	if ('en_US/' in voice) or ('en_UK/' in voice):
	a = '' if affect else 'v2/'
	style_vector = msinference.compute_style('assets/wavs/style_vector/' + a + voice.replace(
	'/', '_').replace('#', '_').replace(
	'cmu-arctic', 'cmu_arctic').replace(
	'_low', '') + '.wav')

	x = msinference.inference(text,
	style_vector)

	# mimic-3 format of voice (English text - Foreign accent)

	elif '_' in voice:
	style_vector = msinference.compute_style('assets/wavs/mimic3_foreign_4x/' + voice.replace(
	'/', '_').replace('#', '_').replace(
	'cmu-arctic', 'cmu_arctic').replace(
	'_low', '') + '.wav')

	x = msinference.inference(text,
	style_vector)


	# Fallback - MMS TTS - Non-English

	else:

	# dont split foreign sentences: Avoids re-load of VITS & random speaker change issue
	x = msinference.foreign(text=text,
	lang=voice, # voice = 'romanian', 'serbian' 'hungarian'
	speed=speed) # normalisation externally

	# volume

	x /= np.abs(x).max() + 1e-7 # amplify speech to full [-1,1]
	print(x.shape, 'TTS OK')
	return x

	soundfile.write(f'demo.wav', tts_entry(), 24000)