Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import whisper | |
| import tempfile | |
| # Function to transcribe audio and detect language | |
| def transcribe_and_detect_language(audio_file): | |
| model = whisper.load_model("base").to("cpu").float() # Ensure model is in full precision | |
| # Load and process audio | |
| audio = whisper.load_audio(audio_file) | |
| audio = whisper.pad_or_trim(audio) | |
| # Convert to log-Mel spectrogram in full precision | |
| mel = whisper.log_mel_spectrogram(audio).to(model.device).float() # Convert to float32 | |
| # Detect the spoken language | |
| _, probs = model.detect_language(mel) | |
| detected_language = max(probs, key=probs.get) | |
| # Decode the audio | |
| options = whisper.DecodingOptions() | |
| result = whisper.decode(model, mel, options) | |
| return detected_language, result.text | |
| # Streamlit UI | |
| st.title("Speech to Text with Whisper") | |
| # File uploader widget | |
| uploaded_file = st.file_uploader("Upload an audio file", type=['wav', 'mp3']) | |
| if uploaded_file is not None: | |
| with tempfile.NamedTemporaryFile(delete=False) as tmp_file: | |
| tmp_file.write(uploaded_file.getvalue()) | |
| with st.spinner('Processing...'): | |
| language, transcribed_text = transcribe_and_detect_language(tmp_file.name) | |
| st.write(f"Detected language: {language}") | |
| st.text_area("Transcribed Text:", value=transcribed_text, height=300) | |