Spaces:

BiliSakura
/

SRT-Processing-Tool

Running

App Files Files Community

SRT-Processing-Tool / app.py

BiliSakura

Enable SSR mode in app.py by setting ssr_mode to False

44a5ece about 1 month ago

raw

history blame contribute delete

15.2 kB

	"""
	SRT Processing Tool - Gradio Interface
	Production-ready for Hugging Face Spaces
	"""

	import os
	import tempfile
	import gradio as gr
	from tools import process_srt_file
	from tools.audio_transcriber import transcribe_audio_to_srt
	from dotenv import load_dotenv

	# Load environment variables from .env if present
	load_dotenv(override=True)


	def process_srt_interface(
	file_path,
	operation,
	target_lang,
	provider,
	model,
	workers,
	max_chars,
	audio_path=None,
	input_type="SRT File",
	):
	"""
	Process SRT file based on user inputs.

	Args:
	file_path: Path to uploaded SRT file
	operation: "Translate only", "Resegment only", or "Transcribe only"
	target_lang: Target language code (for translation)
	provider: Translation provider ("Aliyun (DashScope)", "OpenAI", "OpenRouter")
	model: Model name (optional)
	workers: Number of concurrent workers
	max_chars: Maximum characters per segment
	audio_path: Path to uploaded audio file
	input_type: "SRT File" or "Audio File"

	Returns:
	Tuple of (output_file_path, success_message)
	"""
	if input_type == "SRT File" and file_path is None:
	return None, "❌ Please upload an SRT file first."
	if input_type == "Audio File" and audio_path is None:
	return None, "❌ Please upload an audio file first."

	try:
	# Step 1: Transcribe if input is audio
	temp_srt_path = None
	temp_output_path = None
	if input_type == "Audio File":
	with tempfile.NamedTemporaryFile(delete=False, suffix=".srt") as temp_srt:
	temp_srt_path = temp_srt.name

	try:
	transcribe_audio_to_srt(audio_path, temp_srt_path)
	file_path = temp_srt_path
	if operation == "Transcribe only":
	# If only transcribing, we can return the SRT now
	# But we'll follow the same renaming logic below
	pass
	except Exception as e:
	if temp_srt_path and os.path.exists(temp_srt_path):
	os.remove(temp_srt_path)
	return None, f"❌ Transcription failed: {str(e)}"

	# Map provider names to internal router values
	provider_map = {
	"Aliyun (DashScope)": "dashscope",
	"OpenAI": "openai",
	"OpenRouter": "openrouter",
	}
	router = provider_map.get(provider, "dashscope")

	# Map operation names to internal values
	operation_map = {
	"Translate only": "translate",
	"Resegment only": "resegment",
	"Transcribe only": "none", # Special case for just transcription
	}
	operation_value = operation_map.get(operation, "resegment")

	# If operation is "Transcribe only", we just use the transcribed file
	if operation_value == "none":
	temp_output_path = file_path
	else:
	# Validate inputs
	if operation_value == "translate" and not target_lang:
	return None, "❌ Target language is required for translation."

	# Create temporary output file
	with tempfile.NamedTemporaryFile(delete=False, suffix=".srt") as temp_output:
	temp_output_path = temp_output.name

	# Process the file
	process_srt_file(
	file_path,
	temp_output_path,
	operation=operation_value,
	max_chars=int(max_chars),
	target_lang=target_lang if operation_value == "translate" else None,
	model=model if model else None,
	workers=int(workers),
	router=router,
	)

	# Generate output filename
	if input_type == "Audio File":
	input_filename = os.path.splitext(os.path.basename(audio_path))[0]
	else:
	input_filename = os.path.splitext(os.path.basename(file_path))[0]

	if operation_value == "translate":
	output_filename = f"{input_filename}_{target_lang}.srt"
	elif operation_value == "resegment":
	output_filename = f"{input_filename}_resentenced.srt"
	else:
	output_filename = f"{input_filename}.srt"

	# Read the output file and create download file
	with open(temp_output_path, "r", encoding="utf-8") as f:
	output_content = f.read()

	# Create a temporary file for download with proper name
	download_dir = tempfile.gettempdir()
	download_path = os.path.join(download_dir, output_filename)
	with open(download_path, "w", encoding="utf-8") as download_file:
	download_file.write(output_content)

	# Clean up temporary files
	try:
	if operation_value != "none" or input_type == "Audio File":
	os.remove(temp_output_path)
	if temp_srt_path and os.path.exists(temp_srt_path):
	os.remove(temp_srt_path)
	except Exception:
	pass

	success_msg = f"✅ Processing complete! ({operation})"
	return download_path, success_msg

	except Exception as e:
	# Clean up on error
	try:
	if "temp_output_path" in locals() and temp_output_path and os.path.exists(temp_output_path):
	os.remove(temp_output_path)
	if "temp_srt_path" in locals() and temp_srt_path and os.path.exists(temp_srt_path):
	os.remove(temp_srt_path)
	except Exception:
	pass
	return None, f"❌ Processing failed: {str(e)}"


	def create_interface():
	"""Create and configure the Gradio interface."""

	with gr.Blocks(title="SRT Processing Tool", theme=gr.themes.Soft()) as app:
	gr.Markdown(
	"""
	# 🎬 SRT Processing Tool

	Process and translate your subtitle files with AI-powered tools!

	Features:
	- 🎤 Audio to SRT: Transcribe audio files using NVIDIA Parakeet TDT
	- 🔄 Resegment: SRT files to optimize character limits per segment
	- 🌍 Translate: SRT files using AI (OpenAI, Aliyun DashScope, or OpenRouter)
	- ⚡ One-Stop: Transcribe, resegment, and translate in one click!
	"""
	)

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### 📤 Upload & Settings")

	input_type = gr.Radio(
	label="Input Type",
	choices=["SRT File", "Audio File"],
	value="SRT File",
	)

	uploaded_file = gr.File(
	label="Upload SRT File",
	file_types=[".srt"],
	type="filepath",
	visible=True,
	)

	audio_file = gr.Audio(
	label="Upload Audio File",
	type="filepath",
	visible=False,
	)

	operation = gr.Radio(
	label="Processing Operation",
	choices=["Translate only", "Resegment only"],
	value="Translate only",
	info="Choose what operation to perform on the input",
	)

	with gr.Accordion("Translation Settings", open=True, visible=True) as translation_accordion:
	target_lang = gr.Textbox(
	label="Target Language Code",
	placeholder="e.g., fr, es, de, zh",
	value="zh",
	info="ISO language code for translation",
	)

	provider = gr.Dropdown(
	label="Translation Provider",
	choices=["Aliyun (DashScope)", "OpenAI", "OpenRouter"],
	value="Aliyun (DashScope)",
	info="Choose the translation provider",
	)

	model = gr.Textbox(
	label="Model Name",
	placeholder="Leave blank for default",
	value="qwen-max",
	info="Model to use (defaults: qwen-max for DashScope, gpt-4.1 for OpenAI, openai/gpt-4o for OpenRouter)",
	)

	workers = gr.Slider(
	label="Concurrent Workers",
	minimum=1,
	maximum=50,
	value=25,
	step=1,
	info="Number of parallel translation requests",
	)

	with gr.Accordion("Resegmentation Settings", open=True) as resegment_accordion:
	max_chars = gr.Slider(
	label="Maximum Characters per Segment",
	minimum=10,
	maximum=500,
	value=125,
	step=5,
	info="Controls how the SRT is resegmented before translation",
	)

	process_btn = gr.Button("🚀 Process File", variant="primary", size="lg")

	info_box = gr.Markdown(
	"""
	ℹ️ Note: Translation automatically includes resegmentation for optimal chunk sizes.

	API Keys: Set these as secrets in Hugging Face Spaces:
	- `DASHSCOPE_API_KEY` for Aliyun DashScope
	- `OPENAI_API_KEY` for OpenAI
	- `OPENROUTER_API_KEY` for OpenRouter
	"""
	)

	with gr.Column(scale=1):
	gr.Markdown("### 📥 Results")

	status_output = gr.Textbox(
	label="Status",
	interactive=False,
	value="Waiting for file upload...",
	)

	output_file = gr.File(
	label="Download Processed SRT",
	visible=False,
	)

	# Update UI visibility based on input type
	def update_input_visibility(selected_input_type):
	if selected_input_type == "SRT File":
	return (
	gr.update(visible=True), # uploaded_file
	gr.update(visible=False), # audio_file
	gr.update(choices=["Translate only", "Resegment only"]), # operation choices
	)
	else:
	return (
	gr.update(visible=False), # uploaded_file
	gr.update(visible=True), # audio_file
	gr.update(choices=["Transcribe only", "Translate only", "Resegment only"]), # operation choices
	)

	input_type.change(
	fn=update_input_visibility,
	inputs=[input_type],
	outputs=[uploaded_file, audio_file, operation],
	)

	# Update UI visibility based on operation
	def update_ui(selected_operation):
	"""Update UI components visibility based on selected operation."""
	if selected_operation == "Translate only":
	return (
	gr.update(visible=True, open=True), # translation_accordion
	gr.update(visible=True, open=True), # resegment_accordion
	gr.update(value="qwen-max"), # model default
	)
	elif selected_operation == "Resegment only":
	return (
	gr.update(visible=False), # translation_accordion
	gr.update(visible=True, open=True), # resegment_accordion
	gr.update(value=""), # model empty
	)
	else: # Transcribe only
	return (
	gr.update(visible=False), # translation_accordion
	gr.update(visible=False), # resegment_accordion
	gr.update(value=""), # model empty
	)

	operation.change(
	fn=update_ui,
	inputs=[operation],
	outputs=[translation_accordion, resegment_accordion, model],
	)

	# Update model placeholder based on provider
	def update_model_placeholder(selected_provider):
	"""Update model placeholder text based on provider."""
	defaults = {
	"Aliyun (DashScope)": "qwen-max",
	"OpenAI": "gpt-4.1",
	"OpenRouter": "openai/gpt-4o",
	}
	return gr.update(value=defaults.get(selected_provider, ""))

	provider.change(
	fn=update_model_placeholder,
	inputs=[provider],
	outputs=[model],
	)

	# Process button click handler
	def handle_process(srt_path, op, lang, prov, mod, wrk, chars, aud_path, in_type):
	"""Handle the process button click."""
	result_file, message = process_srt_interface(
	srt_path, op, lang, prov, mod, wrk, chars, aud_path, in_type
	)

	if result_file:
	return (
	gr.update(value=message, visible=True),
	gr.update(value=result_file, visible=True, label=f"Download: {os.path.basename(result_file)}")
	)
	else:
	return (
	gr.update(value=message, visible=True),
	gr.update(visible=False)
	)

	process_btn.click(
	fn=handle_process,
	inputs=[uploaded_file, operation, target_lang, provider, model, workers, max_chars, audio_file, input_type],
	outputs=[status_output, output_file],
	)

	# Update status when file is uploaded
	def update_upload_status(f):
	if f:
	return gr.update(value="✅ File uploaded! Configure settings and click 'Process File'.")
	return gr.update(value="Waiting for file upload...")

	uploaded_file.change(fn=update_upload_status, inputs=[uploaded_file], outputs=[status_output])
	audio_file.change(fn=update_upload_status, inputs=[audio_file], outputs=[status_output])

	return app

	return app


	# Create the Gradio interface
	demo = create_interface()

	# For Hugging Face Spaces, expose the demo variable
	# For local development, launch the app
	if __name__ == "__main__":
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False,
	ssr_mode=False,
	)