import streamlit as st import tempfile from faster_whisper import WhisperModel import textwrap from datetime import timedelta from xml.sax.saxutils import escape st.set_page_config(page_title="VO to Subtitle Generator Multi-languages version", layout="wide") st.title("🌍 VO to Subtitle Generator — Multi-languages Version") def format_time(seconds): td = timedelta(seconds=seconds) result = str(td)[:11].replace(".", ",") return result if "," in result else result + ",000" def generate_srt(segments, max_chars, max_lines): srt_text = "" count = 1 for seg in segments: start = seg.start end = seg.end text = seg.text.strip() lines = textwrap.wrap(text, width=max_chars) grouped = [lines[i:i+max_lines] for i in range(0, len(lines), max_lines)] chunk_count = len(grouped) duration = end - start chunk_duration = duration / chunk_count if chunk_count > 0 else duration for j, chunk in enumerate(grouped): chunk_start = start + j * chunk_duration chunk_end = chunk_start + chunk_duration timestamp = f"{format_time(chunk_start)} --> {format_time(chunk_end)}" content = "\n".join(chunk) srt_text += f"{count}\n{timestamp}\n{content}\n\n" count += 1 return srt_text def generate_fcpxml(segments, version): xml = [f'', f'', f'', ' ', ' ', ' ', ' ', ' ', ' ', ' ', f' ', ' ', f' '] for i, seg in enumerate(segments): start = round(seg.start, 2) duration = round(seg.end - seg.start, 2) text = escape(seg.text.strip()) xml.append( f' ', f' <param name="Position" key="9999/10199/10201/1/100/101" value="0 -415"/>', f' <param name="Alignment" key="9999/10199/10201/2/354/1002961760/401" value="1 (Center)"/>', f' <text>', f' <text-style ref="ts{i+1}">{text}</text-style>', f' </text>', f' <text-style-def id="ts{i+1}">', f' <text-style font="Arial" fontSize="60" fontColor="1 1 1 1" alignment="center" bold="1"', f' strokeColor="0.3 0.3 0.3 1" strokeWidth="-1"', f' shadowColor="0 0 0 0.75" shadowOffset="3 315" kerning="1.2"/>', f' </text-style-def>', f' ' ) xml.extend([' ', ' ', ' ', ' ', ' ', ' ', '']) return '\n'.join(xml) # --- UI Sidebar --- with st.sidebar: st.header("⚙️ Settings") uploaded_file = st.file_uploader("Upload MP3 or WAV", type=["mp3", "wav"]) model_size = st.selectbox("Model Size", ["tiny", "base", "small", "medium"]) layout = st.selectbox("Video Layout", ["Horizontal (37 chars)", "Vertical (25 chars)"]) lines = st.selectbox("Lines per Subtitle", [1, 2], index=1) language_map = { "Auto": None, "Arabic": "ar", "English": "en", "French": "fr", "Farsi": "fa", "Spanish": "es" } language = st.selectbox("Language", list(language_map.keys())) export_format = st.selectbox("Export Format", ["srt", "fcpxml"]) fcpxml_version = st.selectbox("FCPXML Version", ["1.13", "1.12", "1.11", "1.8"], index=3) if export_format == "fcpxml" else None if 'subtitle_data' not in st.session_state: st.session_state.subtitle_data = "" st.session_state.text_dir = "rtl" st.session_state.generated = False if uploaded_file and st.button("🔁 Generate Subtitle"): with st.spinner("Transcribing with Whisper..."): with tempfile.NamedTemporaryFile(delete=False) as temp_audio: temp_audio.write(uploaded_file.read()) temp_audio.flush() whisper = WhisperModel(model_size, device="cpu", compute_type="int8") segments_gen, _ = whisper.transcribe(temp_audio.name, language=language_map[language]) segments = list(segments_gen) st.session_state.segments = segments max_chars = 25 if "Vertical" in layout else 37 max_lines = int(lines) if export_format == "srt": st.session_state.subtitle_data = generate_srt(segments, max_chars, max_lines) else: st.session_state.subtitle_data = generate_fcpxml(segments, fcpxml_version) st.session_state.generated = True # --- Main UI Output --- if st.session_state.generated and st.session_state.subtitle_data: col1, col2 = st.columns([1, 6]) with col1: st.write("Text Direction:") if st.button("⬅️ RTL"): st.session_state.text_dir = "rtl" if st.button("➡️ LTR"): st.session_state.text_dir = "ltr" with col2: st.markdown("### ✏️ Edit Before Download") preview_html = f''' ''' st.components.v1.html(preview_html, height=360) st.download_button( label="⬇️ Download Subtitle", data=st.session_state.subtitle_data, file_name="subtitles." + ("srt" if export_format == "srt" else "fcpxml"), mime="text/plain" )