BiliSakura's picture
Enable SSR mode in app.py by setting ssr_mode to False
44a5ece
"""
SRT Processing Tool - Gradio Interface
Production-ready for Hugging Face Spaces
"""
import os
import tempfile
import gradio as gr
from tools import process_srt_file
from tools.audio_transcriber import transcribe_audio_to_srt
from dotenv import load_dotenv
# Load environment variables from .env if present
load_dotenv(override=True)
def process_srt_interface(
file_path,
operation,
target_lang,
provider,
model,
workers,
max_chars,
audio_path=None,
input_type="SRT File",
):
"""
Process SRT file based on user inputs.
Args:
file_path: Path to uploaded SRT file
operation: "Translate only", "Resegment only", or "Transcribe only"
target_lang: Target language code (for translation)
provider: Translation provider ("Aliyun (DashScope)", "OpenAI", "OpenRouter")
model: Model name (optional)
workers: Number of concurrent workers
max_chars: Maximum characters per segment
audio_path: Path to uploaded audio file
input_type: "SRT File" or "Audio File"
Returns:
Tuple of (output_file_path, success_message)
"""
if input_type == "SRT File" and file_path is None:
return None, "❌ Please upload an SRT file first."
if input_type == "Audio File" and audio_path is None:
return None, "❌ Please upload an audio file first."
try:
# Step 1: Transcribe if input is audio
temp_srt_path = None
temp_output_path = None
if input_type == "Audio File":
with tempfile.NamedTemporaryFile(delete=False, suffix=".srt") as temp_srt:
temp_srt_path = temp_srt.name
try:
transcribe_audio_to_srt(audio_path, temp_srt_path)
file_path = temp_srt_path
if operation == "Transcribe only":
# If only transcribing, we can return the SRT now
# But we'll follow the same renaming logic below
pass
except Exception as e:
if temp_srt_path and os.path.exists(temp_srt_path):
os.remove(temp_srt_path)
return None, f"❌ Transcription failed: {str(e)}"
# Map provider names to internal router values
provider_map = {
"Aliyun (DashScope)": "dashscope",
"OpenAI": "openai",
"OpenRouter": "openrouter",
}
router = provider_map.get(provider, "dashscope")
# Map operation names to internal values
operation_map = {
"Translate only": "translate",
"Resegment only": "resegment",
"Transcribe only": "none", # Special case for just transcription
}
operation_value = operation_map.get(operation, "resegment")
# If operation is "Transcribe only", we just use the transcribed file
if operation_value == "none":
temp_output_path = file_path
else:
# Validate inputs
if operation_value == "translate" and not target_lang:
return None, "❌ Target language is required for translation."
# Create temporary output file
with tempfile.NamedTemporaryFile(delete=False, suffix=".srt") as temp_output:
temp_output_path = temp_output.name
# Process the file
process_srt_file(
file_path,
temp_output_path,
operation=operation_value,
max_chars=int(max_chars),
target_lang=target_lang if operation_value == "translate" else None,
model=model if model else None,
workers=int(workers),
router=router,
)
# Generate output filename
if input_type == "Audio File":
input_filename = os.path.splitext(os.path.basename(audio_path))[0]
else:
input_filename = os.path.splitext(os.path.basename(file_path))[0]
if operation_value == "translate":
output_filename = f"{input_filename}_{target_lang}.srt"
elif operation_value == "resegment":
output_filename = f"{input_filename}_resentenced.srt"
else:
output_filename = f"{input_filename}.srt"
# Read the output file and create download file
with open(temp_output_path, "r", encoding="utf-8") as f:
output_content = f.read()
# Create a temporary file for download with proper name
download_dir = tempfile.gettempdir()
download_path = os.path.join(download_dir, output_filename)
with open(download_path, "w", encoding="utf-8") as download_file:
download_file.write(output_content)
# Clean up temporary files
try:
if operation_value != "none" or input_type == "Audio File":
os.remove(temp_output_path)
if temp_srt_path and os.path.exists(temp_srt_path):
os.remove(temp_srt_path)
except Exception:
pass
success_msg = f"✅ Processing complete! ({operation})"
return download_path, success_msg
except Exception as e:
# Clean up on error
try:
if "temp_output_path" in locals() and temp_output_path and os.path.exists(temp_output_path):
os.remove(temp_output_path)
if "temp_srt_path" in locals() and temp_srt_path and os.path.exists(temp_srt_path):
os.remove(temp_srt_path)
except Exception:
pass
return None, f"❌ Processing failed: {str(e)}"
def create_interface():
"""Create and configure the Gradio interface."""
with gr.Blocks(title="SRT Processing Tool", theme=gr.themes.Soft()) as app:
gr.Markdown(
"""
# 🎬 SRT Processing Tool
Process and translate your subtitle files with AI-powered tools!
**Features:**
- 🎤 **Audio to SRT**: Transcribe audio files using NVIDIA Parakeet TDT
- 🔄 **Resegment**: SRT files to optimize character limits per segment
- 🌍 **Translate**: SRT files using AI (OpenAI, Aliyun DashScope, or OpenRouter)
- ⚡ **One-Stop**: Transcribe, resegment, and translate in one click!
"""
)
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### 📤 Upload & Settings")
input_type = gr.Radio(
label="Input Type",
choices=["SRT File", "Audio File"],
value="SRT File",
)
uploaded_file = gr.File(
label="Upload SRT File",
file_types=[".srt"],
type="filepath",
visible=True,
)
audio_file = gr.Audio(
label="Upload Audio File",
type="filepath",
visible=False,
)
operation = gr.Radio(
label="Processing Operation",
choices=["Translate only", "Resegment only"],
value="Translate only",
info="Choose what operation to perform on the input",
)
with gr.Accordion("Translation Settings", open=True, visible=True) as translation_accordion:
target_lang = gr.Textbox(
label="Target Language Code",
placeholder="e.g., fr, es, de, zh",
value="zh",
info="ISO language code for translation",
)
provider = gr.Dropdown(
label="Translation Provider",
choices=["Aliyun (DashScope)", "OpenAI", "OpenRouter"],
value="Aliyun (DashScope)",
info="Choose the translation provider",
)
model = gr.Textbox(
label="Model Name",
placeholder="Leave blank for default",
value="qwen-max",
info="Model to use (defaults: qwen-max for DashScope, gpt-4.1 for OpenAI, openai/gpt-4o for OpenRouter)",
)
workers = gr.Slider(
label="Concurrent Workers",
minimum=1,
maximum=50,
value=25,
step=1,
info="Number of parallel translation requests",
)
with gr.Accordion("Resegmentation Settings", open=True) as resegment_accordion:
max_chars = gr.Slider(
label="Maximum Characters per Segment",
minimum=10,
maximum=500,
value=125,
step=5,
info="Controls how the SRT is resegmented before translation",
)
process_btn = gr.Button("🚀 Process File", variant="primary", size="lg")
info_box = gr.Markdown(
"""
**ℹ️ Note:** Translation automatically includes resegmentation for optimal chunk sizes.
**API Keys:** Set these as secrets in Hugging Face Spaces:
- `DASHSCOPE_API_KEY` for Aliyun DashScope
- `OPENAI_API_KEY` for OpenAI
- `OPENROUTER_API_KEY` for OpenRouter
"""
)
with gr.Column(scale=1):
gr.Markdown("### 📥 Results")
status_output = gr.Textbox(
label="Status",
interactive=False,
value="Waiting for file upload...",
)
output_file = gr.File(
label="Download Processed SRT",
visible=False,
)
# Update UI visibility based on input type
def update_input_visibility(selected_input_type):
if selected_input_type == "SRT File":
return (
gr.update(visible=True), # uploaded_file
gr.update(visible=False), # audio_file
gr.update(choices=["Translate only", "Resegment only"]), # operation choices
)
else:
return (
gr.update(visible=False), # uploaded_file
gr.update(visible=True), # audio_file
gr.update(choices=["Transcribe only", "Translate only", "Resegment only"]), # operation choices
)
input_type.change(
fn=update_input_visibility,
inputs=[input_type],
outputs=[uploaded_file, audio_file, operation],
)
# Update UI visibility based on operation
def update_ui(selected_operation):
"""Update UI components visibility based on selected operation."""
if selected_operation == "Translate only":
return (
gr.update(visible=True, open=True), # translation_accordion
gr.update(visible=True, open=True), # resegment_accordion
gr.update(value="qwen-max"), # model default
)
elif selected_operation == "Resegment only":
return (
gr.update(visible=False), # translation_accordion
gr.update(visible=True, open=True), # resegment_accordion
gr.update(value=""), # model empty
)
else: # Transcribe only
return (
gr.update(visible=False), # translation_accordion
gr.update(visible=False), # resegment_accordion
gr.update(value=""), # model empty
)
operation.change(
fn=update_ui,
inputs=[operation],
outputs=[translation_accordion, resegment_accordion, model],
)
# Update model placeholder based on provider
def update_model_placeholder(selected_provider):
"""Update model placeholder text based on provider."""
defaults = {
"Aliyun (DashScope)": "qwen-max",
"OpenAI": "gpt-4.1",
"OpenRouter": "openai/gpt-4o",
}
return gr.update(value=defaults.get(selected_provider, ""))
provider.change(
fn=update_model_placeholder,
inputs=[provider],
outputs=[model],
)
# Process button click handler
def handle_process(srt_path, op, lang, prov, mod, wrk, chars, aud_path, in_type):
"""Handle the process button click."""
result_file, message = process_srt_interface(
srt_path, op, lang, prov, mod, wrk, chars, aud_path, in_type
)
if result_file:
return (
gr.update(value=message, visible=True),
gr.update(value=result_file, visible=True, label=f"Download: {os.path.basename(result_file)}")
)
else:
return (
gr.update(value=message, visible=True),
gr.update(visible=False)
)
process_btn.click(
fn=handle_process,
inputs=[uploaded_file, operation, target_lang, provider, model, workers, max_chars, audio_file, input_type],
outputs=[status_output, output_file],
)
# Update status when file is uploaded
def update_upload_status(f):
if f:
return gr.update(value="✅ File uploaded! Configure settings and click 'Process File'.")
return gr.update(value="Waiting for file upload...")
uploaded_file.change(fn=update_upload_status, inputs=[uploaded_file], outputs=[status_output])
audio_file.change(fn=update_upload_status, inputs=[audio_file], outputs=[status_output])
return app
return app
# Create the Gradio interface
demo = create_interface()
# For Hugging Face Spaces, expose the demo variable
# For local development, launch the app
if __name__ == "__main__":
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
ssr_mode=False,
)