Spaces:

N4DerAX20
/

Arabic-Transcriber-app

Sleeping

App Files Files Community

Arabic-Transcriber-app / streamlit_app.py

N4DerAX20

Upload streamlit_app.py

ad610b0 verified 6 months ago

raw

history blame contribute delete

7.01 kB

	import streamlit as st
	import tempfile
	from faster_whisper import WhisperModel
	import textwrap
	from datetime import timedelta
	from xml.sax.saxutils import escape

	st.set_page_config(page_title="VO to Subtitle Generator Multi-languages version", layout="wide")
	st.title("🌍 VO to Subtitle Generator — Multi-languages Version")

	def format_time(seconds):
	td = timedelta(seconds=seconds)
	result = str(td)[:11].replace(".", ",")
	return result if "," in result else result + ",000"

	def generate_srt(segments, max_chars, max_lines):
	srt_text = ""
	count = 1
	for seg in segments:
	start = seg.start
	end = seg.end
	text = seg.text.strip()
	lines = textwrap.wrap(text, width=max_chars)
	grouped = [lines[i:i+max_lines] for i in range(0, len(lines), max_lines)]
	chunk_count = len(grouped)
	duration = end - start
	chunk_duration = duration / chunk_count if chunk_count > 0 else duration

	for j, chunk in enumerate(grouped):
	chunk_start = start + j * chunk_duration
	chunk_end = chunk_start + chunk_duration
	timestamp = f"{format_time(chunk_start)} --> {format_time(chunk_end)}"
	content = "\n".join(chunk)
	srt_text += f"{count}\n{timestamp}\n{content}\n\n"
	count += 1
	return srt_text

	def generate_fcpxml(segments, version):
	xml = [f'<?xml version="1.0" encoding="UTF-8"?>',
	f'<!DOCTYPE fcpxml>',
	f'<fcpxml version="{version}">',
	' <resources>',
	' <format id="r1" name="FFVideoFormat1080p25" frameDuration="100/2500s" width="1920" height="1080" colorSpace="1-1-1 (Rec. 709)"/>',
	' <effect id="r2" name="Custom" uid=".../Titles.localized/Build In:Out.localized/Custom.localized/Custom.moti"/>',
	' </resources>',
	' <library>',
	' <event name="Subtitles">',
	' <project name="Subtitles">',
	f' <sequence format="r1" tcStart="0s" tcFormat="NDF">',
	' <spine>',
	f' <gap name="Gap" offset="0s" duration="{round(segments[-1].end, 2)}s">']

	for i, seg in enumerate(segments):
	start = round(seg.start, 2)
	duration = round(seg.end - seg.start, 2)
	text = escape(seg.text.strip())
	xml.append(
	f' <title name="Subtitle {i+1}" lane="1" offset="{start}s" ref="r2" duration="{duration}s">',
	f' <param name="Position" key="9999/10199/10201/1/100/101" value="0 -415"/>',
	f' <param name="Alignment" key="9999/10199/10201/2/354/1002961760/401" value="1 (Center)"/>',
	f' <text>',
	f' <text-style ref="ts{i+1}">{text}</text-style>',
	f' </text>',
	f' <text-style-def id="ts{i+1}">',
	f' <text-style font="Arial" fontSize="60" fontColor="1 1 1 1" alignment="center" bold="1"',
	f' strokeColor="0.3 0.3 0.3 1" strokeWidth="-1"',
	f' shadowColor="0 0 0 0.75" shadowOffset="3 315" kerning="1.2"/>',
	f' </text-style-def>',
	f' </title>'
	)

	xml.extend([' </gap>',
	' </spine>',
	' </sequence>',
	' </project>',
	' </event>',
	' </library>',
	'</fcpxml>'])
	return '\n'.join(xml)

	# --- UI Sidebar ---
	with st.sidebar:
	st.header("⚙️ Settings")
	uploaded_file = st.file_uploader("Upload MP3 or WAV", type=["mp3", "wav"])
	model_size = st.selectbox("Model Size", ["tiny", "base", "small", "medium"])
	layout = st.selectbox("Video Layout", ["Horizontal (37 chars)", "Vertical (25 chars)"])
	lines = st.selectbox("Lines per Subtitle", [1, 2], index=1)
	language_map = {
	"Auto": None,
	"Arabic": "ar",
	"English": "en",
	"French": "fr",
	"Farsi": "fa",
	"Spanish": "es"
	}
	language = st.selectbox("Language", list(language_map.keys()))
	export_format = st.selectbox("Export Format", ["srt", "fcpxml"])
	fcpxml_version = st.selectbox("FCPXML Version", ["1.13", "1.12", "1.11", "1.8"], index=3) if export_format == "fcpxml" else None

	if 'subtitle_data' not in st.session_state:
	st.session_state.subtitle_data = ""
	st.session_state.text_dir = "rtl"
	st.session_state.generated = False

	if uploaded_file and st.button("🔁 Generate Subtitle"):
	with st.spinner("Transcribing with Whisper..."):
	with tempfile.NamedTemporaryFile(delete=False) as temp_audio:
	temp_audio.write(uploaded_file.read())
	temp_audio.flush()
	whisper = WhisperModel(model_size, device="cpu", compute_type="int8")
	segments_gen, _ = whisper.transcribe(temp_audio.name, language=language_map[language])
	segments = list(segments_gen)
	st.session_state.segments = segments

	max_chars = 25 if "Vertical" in layout else 37
	max_lines = int(lines)

	if export_format == "srt":
	st.session_state.subtitle_data = generate_srt(segments, max_chars, max_lines)
	else:
	st.session_state.subtitle_data = generate_fcpxml(segments, fcpxml_version)

	st.session_state.generated = True

	# --- Main UI Output ---
	if st.session_state.generated and st.session_state.subtitle_data:
	col1, col2 = st.columns([1, 6])
	with col1:
	st.write("Text Direction:")
	if st.button("⬅️ RTL"):
	st.session_state.text_dir = "rtl"
	if st.button("➡️ LTR"):
	st.session_state.text_dir = "ltr"

	with col2:
	st.markdown("### ✏️ Edit Before Download")
	preview_html = f'''
	<textarea id="subtitle_editor" name="subtitle_editor"
	style="width: 100%; height: 300px; padding: 1.5em 2em;
	border: 1px solid #ccc; border-radius: 8px;
	font-family: monospace; font-size: 14px;
	direction: {st.session_state.text_dir}; white-space: pre-wrap;">{st.session_state.subtitle_data}</textarea>
	<script>
	const editor = document.getElementById('subtitle_editor');
	editor.addEventListener('input', () => {{
	window.subtitleEdited = editor.value;
	}});
	window.subtitleEdited = editor.value;
	</script>
	'''
	st.components.v1.html(preview_html, height=360)

	st.download_button(
	label="⬇️ Download Subtitle",
	data=st.session_state.subtitle_data,
	file_name="subtitles." + ("srt" if export_format == "srt" else "fcpxml"),
	mime="text/plain"
	)