tarjama / app.py
therealbee's picture
Rename Hausa_model.py to app.py
c98cc95 verified
import streamlit as st
import torch
from transformers import WhisperForConditionalGeneration, WhisperProcessor
import librosa
import numpy as np
import os
# Page configuration
st.set_page_config(page_title="Hausa Speech Transcription", page_icon="πŸŽ™οΈ")
# Load model and processor
@st.cache_resource
def load_model():
st.info("Loading the transcription model, please wait...")
model = WhisperForConditionalGeneration.from_pretrained(
"therealbee/whisper-small-ha-bible-tts",
ignore_mismatched_sizes=True
)
processor = WhisperProcessor.from_pretrained("therealbee/whisper-small-ha-bible-tts")
return model, processor
# Transcription function
def transcribe_audio(audio_path, model, processor):
# Load and resample audio
audio, sampling_rate = librosa.load(audio_path, sr=None)
if sampling_rate != 16000:
audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
# Prepare inputs
inputs = processor(
audio,
sampling_rate=16000,
return_tensors="pt",
language="ha"
)
# Generate transcription
with torch.no_grad():
outputs = model.generate(inputs.input_features, task="transcribe")
# Decode transcription
transcription = processor.batch_decode(outputs, skip_special_tokens=True)[0]
return transcription
# Streamlit app
def main():
st.title("Hausa Speech Transcription")
st.write("Upload a Hausa language audio file for transcription.")
# Load the model and processor
model, processor = load_model()
# File uploader
uploaded_file = st.file_uploader(
"Choose an audio file",
type=['wav', 'mp3', 'ogg'],
help="Upload a Hausa language audio file."
)
if uploaded_file is not None:
# Get the file extension
file_extension = uploaded_file.name.split('.')[-1]
temp_audio_path = f"temp_audio_file.{file_extension}"
# Save the uploaded file
with open(temp_audio_path, "wb") as f:
f.write(uploaded_file.getbuffer())
# Display the audio player
st.audio(temp_audio_path)
# Transcription button
if st.button("Transcribe"):
with st.spinner("Transcribing audio..."):
try:
transcription = transcribe_audio(temp_audio_path, model, processor)
st.success("Transcription complete!")
st.write(transcription)
except FileNotFoundError:
st.error("Audio file not found. Please try uploading again.")
except ValueError as ve:
st.error(f"Value error: {ve}")
except Exception as e:
st.error(f"An unexpected error occurred: {e}")
finally:
# Clean up temporary file
os.remove(temp_audio_path)
# Run the app
if __name__ == "__main__":
main()