|
|
import streamlit as st
|
|
|
import torch
|
|
|
from transformers import WhisperForConditionalGeneration, WhisperProcessor
|
|
|
import librosa
|
|
|
import numpy as np
|
|
|
import os
|
|
|
|
|
|
|
|
|
st.set_page_config(page_title="Hausa Speech Transcription", page_icon="🎙️")
|
|
|
|
|
|
|
|
|
@st.cache_resource
|
|
|
def load_model():
|
|
|
st.info("Loading the transcription model, please wait...")
|
|
|
model = WhisperForConditionalGeneration.from_pretrained(
|
|
|
"therealbee/whisper-small-ha-bible-tts",
|
|
|
ignore_mismatched_sizes=True
|
|
|
)
|
|
|
processor = WhisperProcessor.from_pretrained("therealbee/whisper-small-ha-bible-tts")
|
|
|
return model, processor
|
|
|
|
|
|
|
|
|
def transcribe_audio(audio_path, model, processor):
|
|
|
|
|
|
audio, sampling_rate = librosa.load(audio_path, sr=None)
|
|
|
if sampling_rate != 16000:
|
|
|
audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
|
|
|
|
|
|
|
|
|
inputs = processor(
|
|
|
audio,
|
|
|
sampling_rate=16000,
|
|
|
return_tensors="pt",
|
|
|
language="ha"
|
|
|
)
|
|
|
|
|
|
|
|
|
with torch.no_grad():
|
|
|
outputs = model.generate(inputs.input_features, task="transcribe")
|
|
|
|
|
|
|
|
|
transcription = processor.batch_decode(outputs, skip_special_tokens=True)[0]
|
|
|
return transcription
|
|
|
|
|
|
|
|
|
def main():
|
|
|
st.title("Hausa Speech Transcription")
|
|
|
st.write("Upload a Hausa language audio file for transcription.")
|
|
|
|
|
|
|
|
|
model, processor = load_model()
|
|
|
|
|
|
|
|
|
uploaded_file = st.file_uploader(
|
|
|
"Choose an audio file",
|
|
|
type=['wav', 'mp3', 'ogg'],
|
|
|
help="Upload a Hausa language audio file."
|
|
|
)
|
|
|
|
|
|
if uploaded_file is not None:
|
|
|
|
|
|
file_extension = uploaded_file.name.split('.')[-1]
|
|
|
temp_audio_path = f"temp_audio_file.{file_extension}"
|
|
|
|
|
|
|
|
|
with open(temp_audio_path, "wb") as f:
|
|
|
f.write(uploaded_file.getbuffer())
|
|
|
|
|
|
|
|
|
st.audio(temp_audio_path)
|
|
|
|
|
|
|
|
|
if st.button("Transcribe"):
|
|
|
with st.spinner("Transcribing audio..."):
|
|
|
try:
|
|
|
transcription = transcribe_audio(temp_audio_path, model, processor)
|
|
|
st.success("Transcription complete!")
|
|
|
st.write(transcription)
|
|
|
except FileNotFoundError:
|
|
|
st.error("Audio file not found. Please try uploading again.")
|
|
|
except ValueError as ve:
|
|
|
st.error(f"Value error: {ve}")
|
|
|
except Exception as e:
|
|
|
st.error(f"An unexpected error occurred: {e}")
|
|
|
finally:
|
|
|
|
|
|
os.remove(temp_audio_path)
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
main()
|
|
|
|