Spaces:

Vikranth
/

talkaway

Runtime error

App Files Files Community

talkaway / app.py

Vikranth

Update app.py

1ae6775 over 3 years ago

raw

history blame contribute delete

2.41 kB

	import tempfile

	import gradio as gr

	from neon_tts_plugin_coqui import CoquiTTS
	import whisper
	import requests
	import tempfile


	LANGUAGES = list(CoquiTTS.langs.keys())
	default_lang = "en"



	title = "Talk to (almost) anyone in the world"


	coquiTTS = CoquiTTS()

	model_med = whisper.load_model("base")

	def tts(audio, language):
	#print(text, language)
	transcribe, text, lang = whisper_stt(audio,language)
	# return output
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
	coquiTTS.get_tts(text, fp, speaker = {"language" : language})
	return fp.name

	def whisper_stt(audio,language):
	print("Inside Whisper TTS")
	# load audio and pad/trim it to fit 30 seconds
	audio = whisper.load_audio(audio)
	audio = whisper.pad_or_trim(audio)

	# make log-Mel spectrogram and move to the same device as the model
	mel = whisper.log_mel_spectrogram(audio).to(model_med.device)

	# detect the spoken language
	_, probs = model_med.detect_language(mel)
	lang = max(probs, key=probs.get)
	print(f"Detected language: {max(probs, key=probs.get)}")

	# decode the audio
	options_transc = whisper.DecodingOptions(fp16 = False, language=lang, task='transcribe') #lang
	options_transl = whisper.DecodingOptions(fp16 = False, language=language, task='translate') #lang
	result_transc = whisper.decode(model_med, mel, options_transc)
	result_transl = whisper.decode(model_med, mel, options_transl)

	# print the recognized text
	print(f"transcript is : {result_transc.text}")
	print(f"translation is : {result_transl.text}")

	return result_transc.text, result_transl.text, lang


	with gr.Blocks() as blocks:
	gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>"
	+ title
	+ "</h1>")
	with gr.Row():# equal_height=False
	with gr.Column():# variant="panel"
	in_audio = gr.Audio(source="microphone",type="filepath", label='Record your voice here')
	radio = gr.Radio(
	label="Language",
	choices=LANGUAGES,
	value=default_lang
	)
	with gr.Row():# mobile_collapse=False
	submit = gr.Button("Submit", variant="primary")
	audio = gr.Audio(label="Output", interactive=False)

	# actions
	submit.click(
	tts,
	[in_audio, radio],
	[audio],
	)



	blocks.launch()