Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from stable_whisper import load_model | |
| from stable_whisper import load_hf_whisper | |
| from pydub import AudioSegment | |
| import webvtt | |
| import pysrt | |
| import requests | |
| import os | |
| # Variables | |
| #valid_api_token = st.secrets["API_TOKEN"] | |
| st.title("Speech-to-Text") | |
| with st.expander("README"): | |
| st.write("This little tool accepts and audiofile. After choosing the model a WebVTT file will be generated. The content of the WebVTT will be shown and a user can choose to download it. This can be used as Subtitle file e.g. in Davinci Resolve Import Subtitles" ) | |
| # Upload audio file | |
| uploaded_file = st.file_uploader("Upload Audio File", type=["mp3", "wav", "mov"]) | |
| # Free tier or API token option | |
| use_free_tier = st.checkbox("Free Tier (Max 2 minutes)") | |
| api_token = st.text_input("API Token (Unlimited)") | |
| # Should we translate to english? | |
| translate = st.checkbox("Would you like a translation to english?") | |
| # Model selection | |
| model_size = st.selectbox("Model Size", ("tiny", "base", "small", "medium")) | |
| def transcribe_to_subtitle(audio_bytes, model_name): | |
| """Transcribe audio to subtitle using OpenAI Whisper""" | |
| # Load model based on selection | |
| model = load_model(model_name) | |
| #speedmodel = load_hf_whisper(model_name) | |
| # Check how long the audio is free tier | |
| # newAudio = AudioSegment.from_wav("audiofiles/download.wav") | |
| #if use_free_tier and len(audio_bytes) > 0.048 * 2 * 60 * 1024: | |
| # st.error(len(audio_bytes)) | |
| # st.error("Free tier only supports audio files under 2 minutes") | |
| # return | |
| # Transcribe audio | |
| try: | |
| if translate: | |
| result = model.transcribe(audio_bytes, verbose=True, task = 'translate') | |
| result.to_srt_vtt('audio.srt') | |
| else: | |
| result = model.transcribe(audio_bytes, verbose=True) | |
| result.to_srt_vtt('audio.srt') | |
| except Exception as e: | |
| return {"error": f"Error during transcription: {str(e)}"} | |
| captions = pysrt.open("audio.srt") | |
| for caption in captions: | |
| print(caption.start) | |
| print(caption.text) | |
| print(caption.end) | |
| print() | |
| output = captions.text | |
| st.markdown(output, unsafe_allow_html=True) | |
| # Download option | |
| st.success("Transcription successful! Download subtitle file?") | |
| with open("audio.srt", "rb") as f: | |
| st.download_button("Download Subtitle in WebVtt Format", f, "audio.srt") | |
| os.remove("audio.srt") # Remove temporary file | |
| if uploaded_file is not None: | |
| audio_bytes = uploaded_file.read() | |
| # Check for API token if free tier is not selected | |
| if not use_free_tier and not api_token: | |
| st.error("API token required for non-free tier usage") | |
| else: | |
| transcribe_to_subtitle(audio_bytes, model_size) |