File size: 2,964 Bytes
3771792
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import streamlit as st
import torch
from transformers import WhisperForConditionalGeneration, WhisperProcessor
import librosa
import numpy as np
import os

# Page configuration
st.set_page_config(page_title="Hausa Speech Transcription", page_icon="🎙️")

# Load model and processor
@st.cache_resource
def load_model():
    st.info("Loading the transcription model, please wait...")
    model = WhisperForConditionalGeneration.from_pretrained(
        "therealbee/whisper-small-ha-bible-tts",
        ignore_mismatched_sizes=True
    )
    processor = WhisperProcessor.from_pretrained("therealbee/whisper-small-ha-bible-tts")
    return model, processor

# Transcription function
def transcribe_audio(audio_path, model, processor):
    # Load and resample audio
    audio, sampling_rate = librosa.load(audio_path, sr=None)
    if sampling_rate != 16000:
        audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)

    # Prepare inputs
    inputs = processor(
        audio, 
        sampling_rate=16000, 
        return_tensors="pt", 
        language="ha"
    )

    # Generate transcription
    with torch.no_grad():
        outputs = model.generate(inputs.input_features, task="transcribe")

    # Decode transcription
    transcription = processor.batch_decode(outputs, skip_special_tokens=True)[0]
    return transcription

# Streamlit app
def main():
    st.title("Hausa Speech Transcription")
    st.write("Upload a Hausa language audio file for transcription.")

    # Load the model and processor
    model, processor = load_model()

    # File uploader
    uploaded_file = st.file_uploader(
        "Choose an audio file", 
        type=['wav', 'mp3', 'ogg'],
        help="Upload a Hausa language audio file."
    )

    if uploaded_file is not None:
        # Get the file extension
        file_extension = uploaded_file.name.split('.')[-1]
        temp_audio_path = f"temp_audio_file.{file_extension}"

        # Save the uploaded file
        with open(temp_audio_path, "wb") as f:
            f.write(uploaded_file.getbuffer())

        # Display the audio player
        st.audio(temp_audio_path)

        # Transcription button
        if st.button("Transcribe"):
            with st.spinner("Transcribing audio..."):
                try:
                    transcription = transcribe_audio(temp_audio_path, model, processor)
                    st.success("Transcription complete!")
                    st.write(transcription)
                except FileNotFoundError:
                    st.error("Audio file not found. Please try uploading again.")
                except ValueError as ve:
                    st.error(f"Value error: {ve}")
                except Exception as e:
                    st.error(f"An unexpected error occurred: {e}")
                finally:
                    # Clean up temporary file
                    os.remove(temp_audio_path)

# Run the app
if __name__ == "__main__":
    main()