| import streamlit as st
|
| import torch
|
| from transformers import WhisperForConditionalGeneration, WhisperProcessor
|
| import librosa
|
| import numpy as np
|
| import os
|
|
|
|
|
| st.set_page_config(page_title="Hausa Speech Transcription", page_icon="🎙️")
|
|
|
|
|
| @st.cache_resource
|
| def load_model():
|
| st.info("Loading the transcription model, please wait...")
|
| model = WhisperForConditionalGeneration.from_pretrained(
|
| "therealbee/whisper-small-ha-bible-tts",
|
| ignore_mismatched_sizes=True
|
| )
|
| processor = WhisperProcessor.from_pretrained("therealbee/whisper-small-ha-bible-tts")
|
| return model, processor
|
|
|
|
|
| def transcribe_audio(audio_path, model, processor):
|
|
|
| audio, sampling_rate = librosa.load(audio_path, sr=None)
|
| if sampling_rate != 16000:
|
| audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
|
|
|
|
|
| inputs = processor(
|
| audio,
|
| sampling_rate=16000,
|
| return_tensors="pt",
|
| language="ha"
|
| )
|
|
|
|
|
| with torch.no_grad():
|
| outputs = model.generate(inputs.input_features, task="transcribe")
|
|
|
|
|
| transcription = processor.batch_decode(outputs, skip_special_tokens=True)[0]
|
| return transcription
|
|
|
|
|
| def main():
|
| st.title("Hausa Speech Transcription")
|
| st.write("Upload a Hausa language audio file for transcription.")
|
|
|
|
|
| model, processor = load_model()
|
|
|
|
|
| uploaded_file = st.file_uploader(
|
| "Choose an audio file",
|
| type=['wav', 'mp3', 'ogg'],
|
| help="Upload a Hausa language audio file."
|
| )
|
|
|
| if uploaded_file is not None:
|
|
|
| file_extension = uploaded_file.name.split('.')[-1]
|
| temp_audio_path = f"temp_audio_file.{file_extension}"
|
|
|
|
|
| with open(temp_audio_path, "wb") as f:
|
| f.write(uploaded_file.getbuffer())
|
|
|
|
|
| st.audio(temp_audio_path)
|
|
|
|
|
| if st.button("Transcribe"):
|
| with st.spinner("Transcribing audio..."):
|
| try:
|
| transcription = transcribe_audio(temp_audio_path, model, processor)
|
| st.success("Transcription complete!")
|
| st.write(transcription)
|
| except FileNotFoundError:
|
| st.error("Audio file not found. Please try uploading again.")
|
| except ValueError as ve:
|
| st.error(f"Value error: {ve}")
|
| except Exception as e:
|
| st.error(f"An unexpected error occurred: {e}")
|
| finally:
|
|
|
| os.remove(temp_audio_path)
|
|
|
|
|
| if __name__ == "__main__":
|
| main()
|
|
|