Spaces:
Running
Running
| """ | |
| SRT Processing Tool - Gradio Interface | |
| Production-ready for Hugging Face Spaces | |
| """ | |
| import os | |
| import tempfile | |
| import gradio as gr | |
| from tools import process_srt_file | |
| from tools.audio_transcriber import transcribe_audio_to_srt | |
| from dotenv import load_dotenv | |
| # Load environment variables from .env if present | |
| load_dotenv(override=True) | |
| def process_srt_interface( | |
| file_path, | |
| operation, | |
| target_lang, | |
| provider, | |
| model, | |
| workers, | |
| max_chars, | |
| audio_path=None, | |
| input_type="SRT File", | |
| ): | |
| """ | |
| Process SRT file based on user inputs. | |
| Args: | |
| file_path: Path to uploaded SRT file | |
| operation: "Translate only", "Resegment only", or "Transcribe only" | |
| target_lang: Target language code (for translation) | |
| provider: Translation provider ("Aliyun (DashScope)", "OpenAI", "OpenRouter") | |
| model: Model name (optional) | |
| workers: Number of concurrent workers | |
| max_chars: Maximum characters per segment | |
| audio_path: Path to uploaded audio file | |
| input_type: "SRT File" or "Audio File" | |
| Returns: | |
| Tuple of (output_file_path, success_message) | |
| """ | |
| if input_type == "SRT File" and file_path is None: | |
| return None, "❌ Please upload an SRT file first." | |
| if input_type == "Audio File" and audio_path is None: | |
| return None, "❌ Please upload an audio file first." | |
| try: | |
| # Step 1: Transcribe if input is audio | |
| temp_srt_path = None | |
| temp_output_path = None | |
| if input_type == "Audio File": | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".srt") as temp_srt: | |
| temp_srt_path = temp_srt.name | |
| try: | |
| transcribe_audio_to_srt(audio_path, temp_srt_path) | |
| file_path = temp_srt_path | |
| if operation == "Transcribe only": | |
| # If only transcribing, we can return the SRT now | |
| # But we'll follow the same renaming logic below | |
| pass | |
| except Exception as e: | |
| if temp_srt_path and os.path.exists(temp_srt_path): | |
| os.remove(temp_srt_path) | |
| return None, f"❌ Transcription failed: {str(e)}" | |
| # Map provider names to internal router values | |
| provider_map = { | |
| "Aliyun (DashScope)": "dashscope", | |
| "OpenAI": "openai", | |
| "OpenRouter": "openrouter", | |
| } | |
| router = provider_map.get(provider, "dashscope") | |
| # Map operation names to internal values | |
| operation_map = { | |
| "Translate only": "translate", | |
| "Resegment only": "resegment", | |
| "Transcribe only": "none", # Special case for just transcription | |
| } | |
| operation_value = operation_map.get(operation, "resegment") | |
| # If operation is "Transcribe only", we just use the transcribed file | |
| if operation_value == "none": | |
| temp_output_path = file_path | |
| else: | |
| # Validate inputs | |
| if operation_value == "translate" and not target_lang: | |
| return None, "❌ Target language is required for translation." | |
| # Create temporary output file | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".srt") as temp_output: | |
| temp_output_path = temp_output.name | |
| # Process the file | |
| process_srt_file( | |
| file_path, | |
| temp_output_path, | |
| operation=operation_value, | |
| max_chars=int(max_chars), | |
| target_lang=target_lang if operation_value == "translate" else None, | |
| model=model if model else None, | |
| workers=int(workers), | |
| router=router, | |
| ) | |
| # Generate output filename | |
| if input_type == "Audio File": | |
| input_filename = os.path.splitext(os.path.basename(audio_path))[0] | |
| else: | |
| input_filename = os.path.splitext(os.path.basename(file_path))[0] | |
| if operation_value == "translate": | |
| output_filename = f"{input_filename}_{target_lang}.srt" | |
| elif operation_value == "resegment": | |
| output_filename = f"{input_filename}_resentenced.srt" | |
| else: | |
| output_filename = f"{input_filename}.srt" | |
| # Read the output file and create download file | |
| with open(temp_output_path, "r", encoding="utf-8") as f: | |
| output_content = f.read() | |
| # Create a temporary file for download with proper name | |
| download_dir = tempfile.gettempdir() | |
| download_path = os.path.join(download_dir, output_filename) | |
| with open(download_path, "w", encoding="utf-8") as download_file: | |
| download_file.write(output_content) | |
| # Clean up temporary files | |
| try: | |
| if operation_value != "none" or input_type == "Audio File": | |
| os.remove(temp_output_path) | |
| if temp_srt_path and os.path.exists(temp_srt_path): | |
| os.remove(temp_srt_path) | |
| except Exception: | |
| pass | |
| success_msg = f"✅ Processing complete! ({operation})" | |
| return download_path, success_msg | |
| except Exception as e: | |
| # Clean up on error | |
| try: | |
| if "temp_output_path" in locals() and temp_output_path and os.path.exists(temp_output_path): | |
| os.remove(temp_output_path) | |
| if "temp_srt_path" in locals() and temp_srt_path and os.path.exists(temp_srt_path): | |
| os.remove(temp_srt_path) | |
| except Exception: | |
| pass | |
| return None, f"❌ Processing failed: {str(e)}" | |
| def create_interface(): | |
| """Create and configure the Gradio interface.""" | |
| with gr.Blocks(title="SRT Processing Tool", theme=gr.themes.Soft()) as app: | |
| gr.Markdown( | |
| """ | |
| # 🎬 SRT Processing Tool | |
| Process and translate your subtitle files with AI-powered tools! | |
| **Features:** | |
| - 🎤 **Audio to SRT**: Transcribe audio files using NVIDIA Parakeet TDT | |
| - 🔄 **Resegment**: SRT files to optimize character limits per segment | |
| - 🌍 **Translate**: SRT files using AI (OpenAI, Aliyun DashScope, or OpenRouter) | |
| - ⚡ **One-Stop**: Transcribe, resegment, and translate in one click! | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### 📤 Upload & Settings") | |
| input_type = gr.Radio( | |
| label="Input Type", | |
| choices=["SRT File", "Audio File"], | |
| value="SRT File", | |
| ) | |
| uploaded_file = gr.File( | |
| label="Upload SRT File", | |
| file_types=[".srt"], | |
| type="filepath", | |
| visible=True, | |
| ) | |
| audio_file = gr.Audio( | |
| label="Upload Audio File", | |
| type="filepath", | |
| visible=False, | |
| ) | |
| operation = gr.Radio( | |
| label="Processing Operation", | |
| choices=["Translate only", "Resegment only"], | |
| value="Translate only", | |
| info="Choose what operation to perform on the input", | |
| ) | |
| with gr.Accordion("Translation Settings", open=True, visible=True) as translation_accordion: | |
| target_lang = gr.Textbox( | |
| label="Target Language Code", | |
| placeholder="e.g., fr, es, de, zh", | |
| value="zh", | |
| info="ISO language code for translation", | |
| ) | |
| provider = gr.Dropdown( | |
| label="Translation Provider", | |
| choices=["Aliyun (DashScope)", "OpenAI", "OpenRouter"], | |
| value="Aliyun (DashScope)", | |
| info="Choose the translation provider", | |
| ) | |
| model = gr.Textbox( | |
| label="Model Name", | |
| placeholder="Leave blank for default", | |
| value="qwen-max", | |
| info="Model to use (defaults: qwen-max for DashScope, gpt-4.1 for OpenAI, openai/gpt-4o for OpenRouter)", | |
| ) | |
| workers = gr.Slider( | |
| label="Concurrent Workers", | |
| minimum=1, | |
| maximum=50, | |
| value=25, | |
| step=1, | |
| info="Number of parallel translation requests", | |
| ) | |
| with gr.Accordion("Resegmentation Settings", open=True) as resegment_accordion: | |
| max_chars = gr.Slider( | |
| label="Maximum Characters per Segment", | |
| minimum=10, | |
| maximum=500, | |
| value=125, | |
| step=5, | |
| info="Controls how the SRT is resegmented before translation", | |
| ) | |
| process_btn = gr.Button("🚀 Process File", variant="primary", size="lg") | |
| info_box = gr.Markdown( | |
| """ | |
| **ℹ️ Note:** Translation automatically includes resegmentation for optimal chunk sizes. | |
| **API Keys:** Set these as secrets in Hugging Face Spaces: | |
| - `DASHSCOPE_API_KEY` for Aliyun DashScope | |
| - `OPENAI_API_KEY` for OpenAI | |
| - `OPENROUTER_API_KEY` for OpenRouter | |
| """ | |
| ) | |
| with gr.Column(scale=1): | |
| gr.Markdown("### 📥 Results") | |
| status_output = gr.Textbox( | |
| label="Status", | |
| interactive=False, | |
| value="Waiting for file upload...", | |
| ) | |
| output_file = gr.File( | |
| label="Download Processed SRT", | |
| visible=False, | |
| ) | |
| # Update UI visibility based on input type | |
| def update_input_visibility(selected_input_type): | |
| if selected_input_type == "SRT File": | |
| return ( | |
| gr.update(visible=True), # uploaded_file | |
| gr.update(visible=False), # audio_file | |
| gr.update(choices=["Translate only", "Resegment only"]), # operation choices | |
| ) | |
| else: | |
| return ( | |
| gr.update(visible=False), # uploaded_file | |
| gr.update(visible=True), # audio_file | |
| gr.update(choices=["Transcribe only", "Translate only", "Resegment only"]), # operation choices | |
| ) | |
| input_type.change( | |
| fn=update_input_visibility, | |
| inputs=[input_type], | |
| outputs=[uploaded_file, audio_file, operation], | |
| ) | |
| # Update UI visibility based on operation | |
| def update_ui(selected_operation): | |
| """Update UI components visibility based on selected operation.""" | |
| if selected_operation == "Translate only": | |
| return ( | |
| gr.update(visible=True, open=True), # translation_accordion | |
| gr.update(visible=True, open=True), # resegment_accordion | |
| gr.update(value="qwen-max"), # model default | |
| ) | |
| elif selected_operation == "Resegment only": | |
| return ( | |
| gr.update(visible=False), # translation_accordion | |
| gr.update(visible=True, open=True), # resegment_accordion | |
| gr.update(value=""), # model empty | |
| ) | |
| else: # Transcribe only | |
| return ( | |
| gr.update(visible=False), # translation_accordion | |
| gr.update(visible=False), # resegment_accordion | |
| gr.update(value=""), # model empty | |
| ) | |
| operation.change( | |
| fn=update_ui, | |
| inputs=[operation], | |
| outputs=[translation_accordion, resegment_accordion, model], | |
| ) | |
| # Update model placeholder based on provider | |
| def update_model_placeholder(selected_provider): | |
| """Update model placeholder text based on provider.""" | |
| defaults = { | |
| "Aliyun (DashScope)": "qwen-max", | |
| "OpenAI": "gpt-4.1", | |
| "OpenRouter": "openai/gpt-4o", | |
| } | |
| return gr.update(value=defaults.get(selected_provider, "")) | |
| provider.change( | |
| fn=update_model_placeholder, | |
| inputs=[provider], | |
| outputs=[model], | |
| ) | |
| # Process button click handler | |
| def handle_process(srt_path, op, lang, prov, mod, wrk, chars, aud_path, in_type): | |
| """Handle the process button click.""" | |
| result_file, message = process_srt_interface( | |
| srt_path, op, lang, prov, mod, wrk, chars, aud_path, in_type | |
| ) | |
| if result_file: | |
| return ( | |
| gr.update(value=message, visible=True), | |
| gr.update(value=result_file, visible=True, label=f"Download: {os.path.basename(result_file)}") | |
| ) | |
| else: | |
| return ( | |
| gr.update(value=message, visible=True), | |
| gr.update(visible=False) | |
| ) | |
| process_btn.click( | |
| fn=handle_process, | |
| inputs=[uploaded_file, operation, target_lang, provider, model, workers, max_chars, audio_file, input_type], | |
| outputs=[status_output, output_file], | |
| ) | |
| # Update status when file is uploaded | |
| def update_upload_status(f): | |
| if f: | |
| return gr.update(value="✅ File uploaded! Configure settings and click 'Process File'.") | |
| return gr.update(value="Waiting for file upload...") | |
| uploaded_file.change(fn=update_upload_status, inputs=[uploaded_file], outputs=[status_output]) | |
| audio_file.change(fn=update_upload_status, inputs=[audio_file], outputs=[status_output]) | |
| return app | |
| return app | |
| # Create the Gradio interface | |
| demo = create_interface() | |
| # For Hugging Face Spaces, expose the demo variable | |
| # For local development, launch the app | |
| if __name__ == "__main__": | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False, | |
| ssr_mode=False, | |
| ) | |