Spaces:

fvyounesi
/

STT

Running

App Files Files Community

STT / app.py

fvyounesi

Update app.py

9321315 verified about 2 months ago

raw

history blame contribute delete

2.08 kB

	import gradio as gr
	import speech_recognition as sr
	from pydub import AudioSegment
	import os

	def audio_preproccessing():
	os.system("deepFilter 'Audio.wav'")
	os.rename("Audio_DeepFilterNet3.wav", "Audio.wav")
	print("Audio Preprocessing Done!")

	def transcribe_audio(audio_file_path, input_language,denoising,music):
	# Auto-detect format from file extension
	file_ext = os.path.splitext(audio_file_path)[1][1:] # e.g. "mp3", "wav"

	# Add silence padding
	audio = AudioSegment.from_file(audio_file_path, format=file_ext)
	two_sec_silence = AudioSegment.silent(duration=2500)
	audio = two_sec_silence + audio + two_sec_silence
	audio.export("Audio.wav", format="wav")
	file_path2 = "Audio.wav"

	#if music=='Yes':


	if denoising=='Yes':
	audio_preproccessing()

	recognizer = sr.Recognizer()
	try:
	with sr.AudioFile(file_path2) as source:
	recognizer.adjust_for_ambient_noise(source)
	audio_data = recognizer.record(source)

	# Google API call
	text = recognizer.recognize_google(audio_data, language=input_language)
	return text

	except sr.UnknownValueError:
	return "Could not understand the audio"
	except sr.RequestError as e:
	return f"Could not request results; {e}"

	# Gradio interface
	iface = gr.Interface(
	fn=transcribe_audio,
	inputs=[
	gr.Audio(sources=["upload","microphone"], type="filepath", label="Audio Input"),
	#gr.Textbox(label="Language code (e.g., en-US, fa-IR)")
	gr.Dropdown(choices=["fa-IR", "en-US", "ar-SA"], label="Choose the right language:"),
	#gr.Checkbox(label="Apply Denoising?")
	gr.Dropdown(choices=["No","Yes"], label="Need Denoising?"),
	#gr.Dropdown(choices=["No","Yes"], label="Does the input audio have music?")
	],
	#outputs="text",
	outputs=gr.Textbox(label="Transcription results", lines=10),
	title="Speech-to-Text Service",
	description="Upload or record audio and get transcription using our STT service."
	)

	iface.launch()