import streamlit as st
import tempfile
from faster_whisper import WhisperModel
import textwrap
from datetime import timedelta
from xml.sax.saxutils import escape
st.set_page_config(page_title="VO to Subtitle Generator Multi-languages version", layout="wide")
st.title("🌍 VO to Subtitle Generator — Multi-languages Version")
def format_time(seconds):
td = timedelta(seconds=seconds)
result = str(td)[:11].replace(".", ",")
return result if "," in result else result + ",000"
def generate_srt(segments, max_chars, max_lines):
srt_text = ""
count = 1
for seg in segments:
start = seg.start
end = seg.end
text = seg.text.strip()
lines = textwrap.wrap(text, width=max_chars)
grouped = [lines[i:i+max_lines] for i in range(0, len(lines), max_lines)]
chunk_count = len(grouped)
duration = end - start
chunk_duration = duration / chunk_count if chunk_count > 0 else duration
for j, chunk in enumerate(grouped):
chunk_start = start + j * chunk_duration
chunk_end = chunk_start + chunk_duration
timestamp = f"{format_time(chunk_start)} --> {format_time(chunk_end)}"
content = "\n".join(chunk)
srt_text += f"{count}\n{timestamp}\n{content}\n\n"
count += 1
return srt_text
def generate_fcpxml(segments, version):
xml = [f'',
f'',
f'',
' ',
' ',
' ',
' ',
' ',
' ',
' ',
f' ',
' ',
f' ']
for i, seg in enumerate(segments):
start = round(seg.start, 2)
duration = round(seg.end - seg.start, 2)
text = escape(seg.text.strip())
xml.append(
f' ',
f' ',
f' ',
f' ',
f' {text}',
f' ',
f' ',
f' ',
f' ',
f' '
)
xml.extend([' ',
' ',
' ',
' ',
' ',
' ',
''])
return '\n'.join(xml)
# --- UI Sidebar ---
with st.sidebar:
st.header("⚙️ Settings")
uploaded_file = st.file_uploader("Upload MP3 or WAV", type=["mp3", "wav"])
model_size = st.selectbox("Model Size", ["tiny", "base", "small", "medium"])
layout = st.selectbox("Video Layout", ["Horizontal (37 chars)", "Vertical (25 chars)"])
lines = st.selectbox("Lines per Subtitle", [1, 2], index=1)
language_map = {
"Auto": None,
"Arabic": "ar",
"English": "en",
"French": "fr",
"Farsi": "fa",
"Spanish": "es"
}
language = st.selectbox("Language", list(language_map.keys()))
export_format = st.selectbox("Export Format", ["srt", "fcpxml"])
fcpxml_version = st.selectbox("FCPXML Version", ["1.13", "1.12", "1.11", "1.8"], index=3) if export_format == "fcpxml" else None
if 'subtitle_data' not in st.session_state:
st.session_state.subtitle_data = ""
st.session_state.text_dir = "rtl"
st.session_state.generated = False
if uploaded_file and st.button("🔁 Generate Subtitle"):
with st.spinner("Transcribing with Whisper..."):
with tempfile.NamedTemporaryFile(delete=False) as temp_audio:
temp_audio.write(uploaded_file.read())
temp_audio.flush()
whisper = WhisperModel(model_size, device="cpu", compute_type="int8")
segments_gen, _ = whisper.transcribe(temp_audio.name, language=language_map[language])
segments = list(segments_gen)
st.session_state.segments = segments
max_chars = 25 if "Vertical" in layout else 37
max_lines = int(lines)
if export_format == "srt":
st.session_state.subtitle_data = generate_srt(segments, max_chars, max_lines)
else:
st.session_state.subtitle_data = generate_fcpxml(segments, fcpxml_version)
st.session_state.generated = True
# --- Main UI Output ---
if st.session_state.generated and st.session_state.subtitle_data:
col1, col2 = st.columns([1, 6])
with col1:
st.write("Text Direction:")
if st.button("⬅️ RTL"):
st.session_state.text_dir = "rtl"
if st.button("➡️ LTR"):
st.session_state.text_dir = "ltr"
with col2:
st.markdown("### ✏️ Edit Before Download")
preview_html = f'''
'''
st.components.v1.html(preview_html, height=360)
st.download_button(
label="⬇️ Download Subtitle",
data=st.session_state.subtitle_data,
file_name="subtitles." + ("srt" if export_format == "srt" else "fcpxml"),
mime="text/plain"
)