import whisper import streamlit as st st.title("Audio Transcription with Whisper") # File uploader for audio uploaded_file = st.file_uploader("Upload an audio file", type=["wav", "mp3", "m4a"]) if uploaded_file is not None: with open("output.mp3", "wb") as f: f.write(uploaded_file.getbuffer()) st.audio("output.mp3") # Transcribe audio model = whisper.load_model("base") audio = whisper.load_audio("output.mp3") audio = whisper.pad_or_trim(audio) mel = whisper.log_mel_spectrogram(audio).to(model.device) # Detect the spoken language _, probs = model.detect_language(mel) st.write(f"Detected language: {max(probs, key=probs.get)}") # Decode the audio options = whisper.DecodingOptions() result = whisper.decode(model, mel, options) # Display the recognized text st.write(result.text)