quranASR_nemo / app.py
NeoBoy's picture
Update app.py
b207925 verified
import gradio as gr
from pathlib import Path
import nemo.collections.asr as nemo_asr
import librosa
import soundfile as sf
base_path = str(Path(__file__).parent)
# Convert audio to 16kHz WAV
def convert_wav_to_16k(input_wav, output_file_path, sr=16000):
if input_wav is None:
raise ValueError("No audio file provided")
if isinstance(input_wav, str): # filepath
y, s = librosa.load(input_wav, sr=sr)
elif isinstance(input_wav, tuple): # numpy array + sample rate
y, orig_sr = input_wav
y = librosa.resample(y, orig_sr=orig_sr, target_sr=sr)
s = sr
else:
raise ValueError(f"Unsupported audio input type: {type(input_wav)}")
sf.write(output_file_path, y, s)
print(f'"{output_file_path}" has been converted to {s}Hz')
return output_file_path
# Load NeMo model and run transcription
def loading_nemo_and_prediction(processed_wav):
arabic_asr = nemo_asr.models.EncDecHybridRNNTCTCBPEModel.restore_from(
restore_path="FastConformer-Custom-Tokenizer.nemo"
)
prediction = arabic_asr.transcribe(paths2audio_files=[processed_wav])
return prediction
# Prediction wrapper
def predict(uploaded_wav):
if uploaded_wav is None:
return "No audio file uploaded."
out_path = base_path + "/converted.wav"
audio_conversion = convert_wav_to_16k(uploaded_wav, out_path)
prediction_text = loading_nemo_and_prediction(audio_conversion)
return prediction_text[0]
# Gradio interface
demo = gr.Interface(
fn=predict,
inputs=gr.Audio(label="Upload or record audio", interactive=True, type="filepath"),
outputs=gr.Textbox(label="Transcription")
)
demo.launch(debug=True, share=True)