""" SRT Processing Tool - Gradio Interface Production-ready for Hugging Face Spaces """ import os import tempfile import gradio as gr from tools import process_srt_file from tools.audio_transcriber import transcribe_audio_to_srt from dotenv import load_dotenv # Load environment variables from .env if present load_dotenv(override=True) def process_srt_interface( file_path, operation, target_lang, provider, model, workers, max_chars, audio_path=None, input_type="SRT File", ): """ Process SRT file based on user inputs. Args: file_path: Path to uploaded SRT file operation: "Translate only", "Resegment only", or "Transcribe only" target_lang: Target language code (for translation) provider: Translation provider ("Aliyun (DashScope)", "OpenAI", "OpenRouter") model: Model name (optional) workers: Number of concurrent workers max_chars: Maximum characters per segment audio_path: Path to uploaded audio file input_type: "SRT File" or "Audio File" Returns: Tuple of (output_file_path, success_message) """ if input_type == "SRT File" and file_path is None: return None, "❌ Please upload an SRT file first." if input_type == "Audio File" and audio_path is None: return None, "❌ Please upload an audio file first." try: # Step 1: Transcribe if input is audio temp_srt_path = None temp_output_path = None if input_type == "Audio File": with tempfile.NamedTemporaryFile(delete=False, suffix=".srt") as temp_srt: temp_srt_path = temp_srt.name try: transcribe_audio_to_srt(audio_path, temp_srt_path) file_path = temp_srt_path if operation == "Transcribe only": # If only transcribing, we can return the SRT now # But we'll follow the same renaming logic below pass except Exception as e: if temp_srt_path and os.path.exists(temp_srt_path): os.remove(temp_srt_path) return None, f"❌ Transcription failed: {str(e)}" # Map provider names to internal router values provider_map = { "Aliyun (DashScope)": "dashscope", "OpenAI": "openai", "OpenRouter": "openrouter", } router = provider_map.get(provider, "dashscope") # Map operation names to internal values operation_map = { "Translate only": "translate", "Resegment only": "resegment", "Transcribe only": "none", # Special case for just transcription } operation_value = operation_map.get(operation, "resegment") # If operation is "Transcribe only", we just use the transcribed file if operation_value == "none": temp_output_path = file_path else: # Validate inputs if operation_value == "translate" and not target_lang: return None, "❌ Target language is required for translation." # Create temporary output file with tempfile.NamedTemporaryFile(delete=False, suffix=".srt") as temp_output: temp_output_path = temp_output.name # Process the file process_srt_file( file_path, temp_output_path, operation=operation_value, max_chars=int(max_chars), target_lang=target_lang if operation_value == "translate" else None, model=model if model else None, workers=int(workers), router=router, ) # Generate output filename if input_type == "Audio File": input_filename = os.path.splitext(os.path.basename(audio_path))[0] else: input_filename = os.path.splitext(os.path.basename(file_path))[0] if operation_value == "translate": output_filename = f"{input_filename}_{target_lang}.srt" elif operation_value == "resegment": output_filename = f"{input_filename}_resentenced.srt" else: output_filename = f"{input_filename}.srt" # Read the output file and create download file with open(temp_output_path, "r", encoding="utf-8") as f: output_content = f.read() # Create a temporary file for download with proper name download_dir = tempfile.gettempdir() download_path = os.path.join(download_dir, output_filename) with open(download_path, "w", encoding="utf-8") as download_file: download_file.write(output_content) # Clean up temporary files try: if operation_value != "none" or input_type == "Audio File": os.remove(temp_output_path) if temp_srt_path and os.path.exists(temp_srt_path): os.remove(temp_srt_path) except Exception: pass success_msg = f"✅ Processing complete! ({operation})" return download_path, success_msg except Exception as e: # Clean up on error try: if "temp_output_path" in locals() and temp_output_path and os.path.exists(temp_output_path): os.remove(temp_output_path) if "temp_srt_path" in locals() and temp_srt_path and os.path.exists(temp_srt_path): os.remove(temp_srt_path) except Exception: pass return None, f"❌ Processing failed: {str(e)}" def create_interface(): """Create and configure the Gradio interface.""" with gr.Blocks(title="SRT Processing Tool", theme=gr.themes.Soft()) as app: gr.Markdown( """ # 🎬 SRT Processing Tool Process and translate your subtitle files with AI-powered tools! **Features:** - 🎤 **Audio to SRT**: Transcribe audio files using NVIDIA Parakeet TDT - 🔄 **Resegment**: SRT files to optimize character limits per segment - 🌍 **Translate**: SRT files using AI (OpenAI, Aliyun DashScope, or OpenRouter) - ⚡ **One-Stop**: Transcribe, resegment, and translate in one click! """ ) with gr.Row(): with gr.Column(scale=1): gr.Markdown("### 📤 Upload & Settings") input_type = gr.Radio( label="Input Type", choices=["SRT File", "Audio File"], value="SRT File", ) uploaded_file = gr.File( label="Upload SRT File", file_types=[".srt"], type="filepath", visible=True, ) audio_file = gr.Audio( label="Upload Audio File", type="filepath", visible=False, ) operation = gr.Radio( label="Processing Operation", choices=["Translate only", "Resegment only"], value="Translate only", info="Choose what operation to perform on the input", ) with gr.Accordion("Translation Settings", open=True, visible=True) as translation_accordion: target_lang = gr.Textbox( label="Target Language Code", placeholder="e.g., fr, es, de, zh", value="zh", info="ISO language code for translation", ) provider = gr.Dropdown( label="Translation Provider", choices=["Aliyun (DashScope)", "OpenAI", "OpenRouter"], value="Aliyun (DashScope)", info="Choose the translation provider", ) model = gr.Textbox( label="Model Name", placeholder="Leave blank for default", value="qwen-max", info="Model to use (defaults: qwen-max for DashScope, gpt-4.1 for OpenAI, openai/gpt-4o for OpenRouter)", ) workers = gr.Slider( label="Concurrent Workers", minimum=1, maximum=50, value=25, step=1, info="Number of parallel translation requests", ) with gr.Accordion("Resegmentation Settings", open=True) as resegment_accordion: max_chars = gr.Slider( label="Maximum Characters per Segment", minimum=10, maximum=500, value=125, step=5, info="Controls how the SRT is resegmented before translation", ) process_btn = gr.Button("🚀 Process File", variant="primary", size="lg") info_box = gr.Markdown( """ **ℹ️ Note:** Translation automatically includes resegmentation for optimal chunk sizes. **API Keys:** Set these as secrets in Hugging Face Spaces: - `DASHSCOPE_API_KEY` for Aliyun DashScope - `OPENAI_API_KEY` for OpenAI - `OPENROUTER_API_KEY` for OpenRouter """ ) with gr.Column(scale=1): gr.Markdown("### 📥 Results") status_output = gr.Textbox( label="Status", interactive=False, value="Waiting for file upload...", ) output_file = gr.File( label="Download Processed SRT", visible=False, ) # Update UI visibility based on input type def update_input_visibility(selected_input_type): if selected_input_type == "SRT File": return ( gr.update(visible=True), # uploaded_file gr.update(visible=False), # audio_file gr.update(choices=["Translate only", "Resegment only"]), # operation choices ) else: return ( gr.update(visible=False), # uploaded_file gr.update(visible=True), # audio_file gr.update(choices=["Transcribe only", "Translate only", "Resegment only"]), # operation choices ) input_type.change( fn=update_input_visibility, inputs=[input_type], outputs=[uploaded_file, audio_file, operation], ) # Update UI visibility based on operation def update_ui(selected_operation): """Update UI components visibility based on selected operation.""" if selected_operation == "Translate only": return ( gr.update(visible=True, open=True), # translation_accordion gr.update(visible=True, open=True), # resegment_accordion gr.update(value="qwen-max"), # model default ) elif selected_operation == "Resegment only": return ( gr.update(visible=False), # translation_accordion gr.update(visible=True, open=True), # resegment_accordion gr.update(value=""), # model empty ) else: # Transcribe only return ( gr.update(visible=False), # translation_accordion gr.update(visible=False), # resegment_accordion gr.update(value=""), # model empty ) operation.change( fn=update_ui, inputs=[operation], outputs=[translation_accordion, resegment_accordion, model], ) # Update model placeholder based on provider def update_model_placeholder(selected_provider): """Update model placeholder text based on provider.""" defaults = { "Aliyun (DashScope)": "qwen-max", "OpenAI": "gpt-4.1", "OpenRouter": "openai/gpt-4o", } return gr.update(value=defaults.get(selected_provider, "")) provider.change( fn=update_model_placeholder, inputs=[provider], outputs=[model], ) # Process button click handler def handle_process(srt_path, op, lang, prov, mod, wrk, chars, aud_path, in_type): """Handle the process button click.""" result_file, message = process_srt_interface( srt_path, op, lang, prov, mod, wrk, chars, aud_path, in_type ) if result_file: return ( gr.update(value=message, visible=True), gr.update(value=result_file, visible=True, label=f"Download: {os.path.basename(result_file)}") ) else: return ( gr.update(value=message, visible=True), gr.update(visible=False) ) process_btn.click( fn=handle_process, inputs=[uploaded_file, operation, target_lang, provider, model, workers, max_chars, audio_file, input_type], outputs=[status_output, output_file], ) # Update status when file is uploaded def update_upload_status(f): if f: return gr.update(value="✅ File uploaded! Configure settings and click 'Process File'.") return gr.update(value="Waiting for file upload...") uploaded_file.change(fn=update_upload_status, inputs=[uploaded_file], outputs=[status_output]) audio_file.change(fn=update_upload_status, inputs=[audio_file], outputs=[status_output]) return app return app # Create the Gradio interface demo = create_interface() # For Hugging Face Spaces, expose the demo variable # For local development, launch the app if __name__ == "__main__": demo.launch( server_name="0.0.0.0", server_port=7860, share=False, ssr_mode=False, )