File size: 7,013 Bytes
f75e191
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2625ccf
 
f75e191
 
2625ccf
f75e191
2625ccf
 
 
 
 
f75e191
 
 
 
2625ccf
 
 
 
 
 
 
 
 
 
 
 
f75e191
2625ccf
 
 
f75e191
 
 
 
 
 
 
2625ccf
f75e191
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2625ccf
f75e191
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2625ccf
f75e191
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
import streamlit as st
import tempfile
from faster_whisper import WhisperModel
import textwrap
from datetime import timedelta
from xml.sax.saxutils import escape

st.set_page_config(page_title="VO to Subtitle Generator Multi-languages version", layout="wide")
st.title("🌍 VO to Subtitle Generator — Multi-languages Version")

def format_time(seconds):
    td = timedelta(seconds=seconds)
    result = str(td)[:11].replace(".", ",")
    return result if "," in result else result + ",000"

def generate_srt(segments, max_chars, max_lines):
    srt_text = ""
    count = 1
    for seg in segments:
        start = seg.start
        end = seg.end
        text = seg.text.strip()
        lines = textwrap.wrap(text, width=max_chars)
        grouped = [lines[i:i+max_lines] for i in range(0, len(lines), max_lines)]
        chunk_count = len(grouped)
        duration = end - start
        chunk_duration = duration / chunk_count if chunk_count > 0 else duration

        for j, chunk in enumerate(grouped):
            chunk_start = start + j * chunk_duration
            chunk_end = chunk_start + chunk_duration
            timestamp = f"{format_time(chunk_start)} --> {format_time(chunk_end)}"
            content = "\n".join(chunk)
            srt_text += f"{count}\n{timestamp}\n{content}\n\n"
            count += 1
    return srt_text

def generate_fcpxml(segments, version):
    xml = [f'<?xml version="1.0" encoding="UTF-8"?>',
           f'<!DOCTYPE fcpxml>',
           f'<fcpxml version="{version}">',
           '  <resources>',
           '    <format id="r1" name="FFVideoFormat1080p25" frameDuration="100/2500s" width="1920" height="1080" colorSpace="1-1-1 (Rec. 709)"/>',
           '    <effect id="r2" name="Custom" uid=".../Titles.localized/Build In:Out.localized/Custom.localized/Custom.moti"/>',
           '  </resources>',
           '  <library>',
           '    <event name="Subtitles">',
           '      <project name="Subtitles">',
           f'        <sequence format="r1" tcStart="0s" tcFormat="NDF">',
           '          <spine>',
           f'            <gap name="Gap" offset="0s" duration="{round(segments[-1].end, 2)}s">']

    for i, seg in enumerate(segments):
        start = round(seg.start, 2)
        duration = round(seg.end - seg.start, 2)
        text = escape(seg.text.strip())
        xml.append(
            f'              <title name="Subtitle {i+1}" lane="1" offset="{start}s" ref="r2" duration="{duration}s">',
            f'                <param name="Position" key="9999/10199/10201/1/100/101" value="0 -415"/>',
            f'                <param name="Alignment" key="9999/10199/10201/2/354/1002961760/401" value="1 (Center)"/>',
            f'                <text>',
            f'                  <text-style ref="ts{i+1}">{text}</text-style>',
            f'                </text>',
            f'                <text-style-def id="ts{i+1}">',
            f'                  <text-style font="Arial" fontSize="60" fontColor="1 1 1 1" alignment="center" bold="1"',
            f'                             strokeColor="0.3 0.3 0.3 1" strokeWidth="-1"',
            f'                             shadowColor="0 0 0 0.75" shadowOffset="3 315" kerning="1.2"/>',
            f'                </text-style-def>',
            f'              </title>'
        )

    xml.extend(['            </gap>',
                '          </spine>',
                '        </sequence>',
                '      </project>',
                '    </event>',
                '  </library>',
                '</fcpxml>'])
    return '\n'.join(xml)

# --- UI Sidebar ---
with st.sidebar:
    st.header("⚙️ Settings")
    uploaded_file = st.file_uploader("Upload MP3 or WAV", type=["mp3", "wav"])
    model_size = st.selectbox("Model Size", ["tiny", "base", "small", "medium"])
    layout = st.selectbox("Video Layout", ["Horizontal (37 chars)", "Vertical (25 chars)"])
    lines = st.selectbox("Lines per Subtitle", [1, 2], index=1)
    language_map = {
        "Auto": None,
        "Arabic": "ar",
        "English": "en",
        "French": "fr",
        "Farsi": "fa",
        "Spanish": "es"
    }
    language = st.selectbox("Language", list(language_map.keys()))
    export_format = st.selectbox("Export Format", ["srt", "fcpxml"])
    fcpxml_version = st.selectbox("FCPXML Version", ["1.13", "1.12", "1.11", "1.8"], index=3) if export_format == "fcpxml" else None

    if 'subtitle_data' not in st.session_state:
        st.session_state.subtitle_data = ""
        st.session_state.text_dir = "rtl"
        st.session_state.generated = False

    if uploaded_file and st.button("🔁 Generate Subtitle"):
        with st.spinner("Transcribing with Whisper..."):
            with tempfile.NamedTemporaryFile(delete=False) as temp_audio:
                temp_audio.write(uploaded_file.read())
                temp_audio.flush()
                whisper = WhisperModel(model_size, device="cpu", compute_type="int8")
                segments_gen, _ = whisper.transcribe(temp_audio.name, language=language_map[language])
                segments = list(segments_gen)
                st.session_state.segments = segments

            max_chars = 25 if "Vertical" in layout else 37
            max_lines = int(lines)

            if export_format == "srt":
                st.session_state.subtitle_data = generate_srt(segments, max_chars, max_lines)
            else:
                st.session_state.subtitle_data = generate_fcpxml(segments, fcpxml_version)

            st.session_state.generated = True

# --- Main UI Output ---
if st.session_state.generated and st.session_state.subtitle_data:
    col1, col2 = st.columns([1, 6])
    with col1:
        st.write("Text Direction:")
        if st.button("⬅️ RTL"):
            st.session_state.text_dir = "rtl"
        if st.button("➡️ LTR"):
            st.session_state.text_dir = "ltr"

    with col2:
        st.markdown("### ✏️ Edit Before Download")
        preview_html = f'''
            <textarea id="subtitle_editor" name="subtitle_editor"
                style="width: 100%; height: 300px; padding: 1.5em 2em;
                border: 1px solid #ccc; border-radius: 8px;
                font-family: monospace; font-size: 14px;
                direction: {st.session_state.text_dir}; white-space: pre-wrap;">{st.session_state.subtitle_data}</textarea>
            <script>
                const editor = document.getElementById('subtitle_editor');
                editor.addEventListener('input', () => {{
                    window.subtitleEdited = editor.value;
                }});
                window.subtitleEdited = editor.value;
            </script>
        '''
        st.components.v1.html(preview_html, height=360)

        st.download_button(
            label="⬇️ Download Subtitle",
            data=st.session_state.subtitle_data,
            file_name="subtitles." + ("srt" if export_format == "srt" else "fcpxml"),
            mime="text/plain"
        )