Spaces:
Sleeping
Sleeping
File size: 7,013 Bytes
f75e191 2625ccf f75e191 2625ccf f75e191 2625ccf f75e191 2625ccf f75e191 2625ccf f75e191 2625ccf f75e191 2625ccf f75e191 2625ccf f75e191 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 |
import streamlit as st
import tempfile
from faster_whisper import WhisperModel
import textwrap
from datetime import timedelta
from xml.sax.saxutils import escape
st.set_page_config(page_title="VO to Subtitle Generator Multi-languages version", layout="wide")
st.title("🌍 VO to Subtitle Generator — Multi-languages Version")
def format_time(seconds):
td = timedelta(seconds=seconds)
result = str(td)[:11].replace(".", ",")
return result if "," in result else result + ",000"
def generate_srt(segments, max_chars, max_lines):
srt_text = ""
count = 1
for seg in segments:
start = seg.start
end = seg.end
text = seg.text.strip()
lines = textwrap.wrap(text, width=max_chars)
grouped = [lines[i:i+max_lines] for i in range(0, len(lines), max_lines)]
chunk_count = len(grouped)
duration = end - start
chunk_duration = duration / chunk_count if chunk_count > 0 else duration
for j, chunk in enumerate(grouped):
chunk_start = start + j * chunk_duration
chunk_end = chunk_start + chunk_duration
timestamp = f"{format_time(chunk_start)} --> {format_time(chunk_end)}"
content = "\n".join(chunk)
srt_text += f"{count}\n{timestamp}\n{content}\n\n"
count += 1
return srt_text
def generate_fcpxml(segments, version):
xml = [f'<?xml version="1.0" encoding="UTF-8"?>',
f'<!DOCTYPE fcpxml>',
f'<fcpxml version="{version}">',
' <resources>',
' <format id="r1" name="FFVideoFormat1080p25" frameDuration="100/2500s" width="1920" height="1080" colorSpace="1-1-1 (Rec. 709)"/>',
' <effect id="r2" name="Custom" uid=".../Titles.localized/Build In:Out.localized/Custom.localized/Custom.moti"/>',
' </resources>',
' <library>',
' <event name="Subtitles">',
' <project name="Subtitles">',
f' <sequence format="r1" tcStart="0s" tcFormat="NDF">',
' <spine>',
f' <gap name="Gap" offset="0s" duration="{round(segments[-1].end, 2)}s">']
for i, seg in enumerate(segments):
start = round(seg.start, 2)
duration = round(seg.end - seg.start, 2)
text = escape(seg.text.strip())
xml.append(
f' <title name="Subtitle {i+1}" lane="1" offset="{start}s" ref="r2" duration="{duration}s">',
f' <param name="Position" key="9999/10199/10201/1/100/101" value="0 -415"/>',
f' <param name="Alignment" key="9999/10199/10201/2/354/1002961760/401" value="1 (Center)"/>',
f' <text>',
f' <text-style ref="ts{i+1}">{text}</text-style>',
f' </text>',
f' <text-style-def id="ts{i+1}">',
f' <text-style font="Arial" fontSize="60" fontColor="1 1 1 1" alignment="center" bold="1"',
f' strokeColor="0.3 0.3 0.3 1" strokeWidth="-1"',
f' shadowColor="0 0 0 0.75" shadowOffset="3 315" kerning="1.2"/>',
f' </text-style-def>',
f' </title>'
)
xml.extend([' </gap>',
' </spine>',
' </sequence>',
' </project>',
' </event>',
' </library>',
'</fcpxml>'])
return '\n'.join(xml)
# --- UI Sidebar ---
with st.sidebar:
st.header("⚙️ Settings")
uploaded_file = st.file_uploader("Upload MP3 or WAV", type=["mp3", "wav"])
model_size = st.selectbox("Model Size", ["tiny", "base", "small", "medium"])
layout = st.selectbox("Video Layout", ["Horizontal (37 chars)", "Vertical (25 chars)"])
lines = st.selectbox("Lines per Subtitle", [1, 2], index=1)
language_map = {
"Auto": None,
"Arabic": "ar",
"English": "en",
"French": "fr",
"Farsi": "fa",
"Spanish": "es"
}
language = st.selectbox("Language", list(language_map.keys()))
export_format = st.selectbox("Export Format", ["srt", "fcpxml"])
fcpxml_version = st.selectbox("FCPXML Version", ["1.13", "1.12", "1.11", "1.8"], index=3) if export_format == "fcpxml" else None
if 'subtitle_data' not in st.session_state:
st.session_state.subtitle_data = ""
st.session_state.text_dir = "rtl"
st.session_state.generated = False
if uploaded_file and st.button("🔁 Generate Subtitle"):
with st.spinner("Transcribing with Whisper..."):
with tempfile.NamedTemporaryFile(delete=False) as temp_audio:
temp_audio.write(uploaded_file.read())
temp_audio.flush()
whisper = WhisperModel(model_size, device="cpu", compute_type="int8")
segments_gen, _ = whisper.transcribe(temp_audio.name, language=language_map[language])
segments = list(segments_gen)
st.session_state.segments = segments
max_chars = 25 if "Vertical" in layout else 37
max_lines = int(lines)
if export_format == "srt":
st.session_state.subtitle_data = generate_srt(segments, max_chars, max_lines)
else:
st.session_state.subtitle_data = generate_fcpxml(segments, fcpxml_version)
st.session_state.generated = True
# --- Main UI Output ---
if st.session_state.generated and st.session_state.subtitle_data:
col1, col2 = st.columns([1, 6])
with col1:
st.write("Text Direction:")
if st.button("⬅️ RTL"):
st.session_state.text_dir = "rtl"
if st.button("➡️ LTR"):
st.session_state.text_dir = "ltr"
with col2:
st.markdown("### ✏️ Edit Before Download")
preview_html = f'''
<textarea id="subtitle_editor" name="subtitle_editor"
style="width: 100%; height: 300px; padding: 1.5em 2em;
border: 1px solid #ccc; border-radius: 8px;
font-family: monospace; font-size: 14px;
direction: {st.session_state.text_dir}; white-space: pre-wrap;">{st.session_state.subtitle_data}</textarea>
<script>
const editor = document.getElementById('subtitle_editor');
editor.addEventListener('input', () => {{
window.subtitleEdited = editor.value;
}});
window.subtitleEdited = editor.value;
</script>
'''
st.components.v1.html(preview_html, height=360)
st.download_button(
label="⬇️ Download Subtitle",
data=st.session_state.subtitle_data,
file_name="subtitles." + ("srt" if export_format == "srt" else "fcpxml"),
mime="text/plain"
)
|