Spaces:

NeoBoy
/

quranASR_nemo

Sleeping

App Files Files Community

quranASR_nemo / app.py

NeoBoy

Update app.py

b207925 verified about 2 months ago

raw

history blame contribute delete

1.7 kB

	import gradio as gr
	from pathlib import Path
	import nemo.collections.asr as nemo_asr
	import librosa
	import soundfile as sf

	base_path = str(Path(__file__).parent)

	# Convert audio to 16kHz WAV
	def convert_wav_to_16k(input_wav, output_file_path, sr=16000):
	if input_wav is None:
	raise ValueError("No audio file provided")

	if isinstance(input_wav, str): # filepath
	y, s = librosa.load(input_wav, sr=sr)
	elif isinstance(input_wav, tuple): # numpy array + sample rate
	y, orig_sr = input_wav
	y = librosa.resample(y, orig_sr=orig_sr, target_sr=sr)
	s = sr
	else:
	raise ValueError(f"Unsupported audio input type: {type(input_wav)}")

	sf.write(output_file_path, y, s)
	print(f'"{output_file_path}" has been converted to {s}Hz')
	return output_file_path

	# Load NeMo model and run transcription
	def loading_nemo_and_prediction(processed_wav):
	arabic_asr = nemo_asr.models.EncDecHybridRNNTCTCBPEModel.restore_from(
	restore_path="FastConformer-Custom-Tokenizer.nemo"
	)
	prediction = arabic_asr.transcribe(paths2audio_files=[processed_wav])
	return prediction

	# Prediction wrapper
	def predict(uploaded_wav):
	if uploaded_wav is None:
	return "No audio file uploaded."

	out_path = base_path + "/converted.wav"
	audio_conversion = convert_wav_to_16k(uploaded_wav, out_path)
	prediction_text = loading_nemo_and_prediction(audio_conversion)
	return prediction_text[0]

	# Gradio interface
	demo = gr.Interface(
	fn=predict,
	inputs=gr.Audio(label="Upload or record audio", interactive=True, type="filepath"),
	outputs=gr.Textbox(label="Transcription")
	)

	demo.launch(debug=True, share=True)