""" Transcriptinator - HuggingFace Spaces Gradio Interface Audio transcription with Gemini + OpenRouter """ import gradio as gr import os from transcribe_core import process_audio_file, get_audio_duration from ai_providers import GeminiProvider, OpenRouterProvider # Establish absolute paths for Hugging Face Spaces compatibility CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) OUTPUT_FOLDER = os.path.join(CURRENT_DIR, "outputs") def transcribe_audio(audio_file, gemini_key, openrouter_key, model_name): """ Main transcription function for Gradio interface. """ if not audio_file: return "❌ Please upload an audio file.", None if not gemini_key or len(gemini_key.strip()) < 10: return "❌ Please provide a valid Gemini API key.", None try: # Create Gemini provider for transcription gemini_provider = GeminiProvider(gemini_key, model_name) # Create OpenRouter provider for summary/ideas (optional) openrouter_provider = None if openrouter_key and len(openrouter_key.strip()) > 10: openrouter_provider = OpenRouterProvider(openrouter_key) # Get audio duration and file size for estimate duration = get_audio_duration(audio_file) duration_min = duration / 60 file_size_mb = os.path.getsize(audio_file) / (1024 * 1024) # Process the audio file - ensure this function in core uses absolute paths output_path, is_zip = process_audio_file( audio_file, gemini_provider, openrouter_provider, progress_callback=lambda msg, progress: None ) # Determine file type for success message if str(is_zip) == "True": file_type = "ZIP archive" file_desc = "Multiple transcript files (chunked audio)" else: file_type = "Markdown file" file_desc = "Single transcript file" text_provider = "OpenRouter (DeepSeek R1)" if openrouter_provider else "Gemini" success_msg = f"""✅ **Transcription Complete!** 📝 Original file: {os.path.basename(audio_file)} ⏱️ Duration: {duration_min:.1f} minutes 💾 Size: {file_size_mb:.1f} MB 🎙️ Transcription: Gemini ({model_name}) 💡 Summary/Ideas: {text_provider} 📄 Output: {file_type} {file_desc} Click below to download your transcript(s).""" # Return the absolute file path - Gradio handles the download via proxy return success_msg, output_path except Exception as e: error_msg = f"""❌ **Error during transcription:** {str(e)} **Common issues:** - Invalid API key - Audio file too large or corrupted - Network connection issues""" return error_msg, None # Create Gradio interface with gr.Blocks(title="Transcriptinator", theme=gr.themes.Soft()) as app: gr.Markdown(""" # 🎙️ Transcriptinator ### AI-Powered Audio Transcription **Powered by:** Gemini (transcription) + OpenRouter DeepSeek R1 (summarization) """) with gr.Row(): with gr.Column(scale=2): # Audio upload audio_input = gr.Audio( label="Upload Audio File", type="filepath", sources=["upload"], ) gr.Markdown(""" **Supported formats:** MP3, WAV, M4A, OGG, FLAC, WEBM **Large files (>30MB):** Automatically chunked and processed """) # Model selection model_dropdown = gr.Dropdown( choices=list(GeminiProvider.AVAILABLE_MODELS.keys()), value="Gemini 2.5 Flash", label="Gemini Model", info="Select which Gemini model to use for transcription" ) # API keys gemini_key_input = gr.Textbox( label="Gemini API Key (Required)", placeholder="Enter your Gemini API key...", type="password", info="Get one free at: https://aistudio.google.com/app/apikey" ) openrouter_key_input = gr.Textbox( label="OpenRouter API Key (Optional)", placeholder="Enter your OpenRouter key for better summaries...", type="password", info="Leave empty to use Gemini for all tasks | Get free at: https://openrouter.ai" ) # Submit button submit_btn = gr.Button("🚀 Transcribe Audio", variant="primary", size="lg") with gr.Column(scale=1): # Status output status_output = gr.Markdown(label="Status") # Download component - removed 'interactive=False' for better stability download_output = gr.File(label="📥 Download Transcript") # Information section ... (remains unchanged) gr.Markdown(""" --- ### 🎯 What you'll get: - 📝 **Full transcription** with timestamps and speaker detection - 📊 **Summary** in 2-3 sentences - 💡 **Key ideas** with descriptions - 📄 **Markdown file** ready to download """) # Connect the transcription function submit_btn.click( fn=transcribe_audio, inputs=[audio_input, gemini_key_input, openrouter_key_input, model_dropdown], outputs=[status_output, download_output] ) # Launch the app with queuing and allowed_paths for file access if __name__ == "__main__": os.makedirs(OUTPUT_FOLDER, exist_ok=True) app.queue().launch(allowed_paths=[OUTPUT_FOLDER])