Spaces:
Build error
Build error
| """ | |
| Transcriptinator - HuggingFace Spaces Gradio Interface | |
| Audio transcription with Gemini + OpenRouter | |
| """ | |
| import gradio as gr | |
| import os | |
| from transcribe_core import process_audio_file, get_audio_duration | |
| from ai_providers import GeminiProvider, OpenRouterProvider | |
| # Establish absolute paths for Hugging Face Spaces compatibility | |
| CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) | |
| OUTPUT_FOLDER = os.path.join(CURRENT_DIR, "outputs") | |
| def transcribe_audio(audio_file, gemini_key, openrouter_key, model_name): | |
| """ | |
| Main transcription function for Gradio interface. | |
| """ | |
| if not audio_file: | |
| return "β Please upload an audio file.", None | |
| if not gemini_key or len(gemini_key.strip()) < 10: | |
| return "β Please provide a valid Gemini API key.", None | |
| try: | |
| # Create Gemini provider for transcription | |
| gemini_provider = GeminiProvider(gemini_key, model_name) | |
| # Create OpenRouter provider for summary/ideas (optional) | |
| openrouter_provider = None | |
| if openrouter_key and len(openrouter_key.strip()) > 10: | |
| openrouter_provider = OpenRouterProvider(openrouter_key) | |
| # Get audio duration and file size for estimate | |
| duration = get_audio_duration(audio_file) | |
| duration_min = duration / 60 | |
| file_size_mb = os.path.getsize(audio_file) / (1024 * 1024) | |
| # Process the audio file - ensure this function in core uses absolute paths | |
| output_path, is_zip = process_audio_file( | |
| audio_file, | |
| gemini_provider, | |
| openrouter_provider, | |
| progress_callback=lambda msg, progress: None | |
| ) | |
| # Determine file type for success message | |
| if str(is_zip) == "True": | |
| file_type = "ZIP archive" | |
| file_desc = "Multiple transcript files (chunked audio)" | |
| else: | |
| file_type = "Markdown file" | |
| file_desc = "Single transcript file" | |
| text_provider = "OpenRouter (DeepSeek R1)" if openrouter_provider else "Gemini" | |
| success_msg = f"""β **Transcription Complete!** | |
| π Original file: {os.path.basename(audio_file)} | |
| β±οΈ Duration: {duration_min:.1f} minutes | |
| πΎ Size: {file_size_mb:.1f} MB | |
| ποΈ Transcription: Gemini ({model_name}) | |
| π‘ Summary/Ideas: {text_provider} | |
| π Output: {file_type} | |
| {file_desc} | |
| Click below to download your transcript(s).""" | |
| # Return the absolute file path - Gradio handles the download via proxy | |
| return success_msg, output_path | |
| except Exception as e: | |
| error_msg = f"""β **Error during transcription:** | |
| {str(e)} | |
| **Common issues:** | |
| - Invalid API key | |
| - Audio file too large or corrupted | |
| - Network connection issues""" | |
| return error_msg, None | |
| # Create Gradio interface | |
| with gr.Blocks(title="Transcriptinator", theme=gr.themes.Soft()) as app: | |
| gr.Markdown(""" | |
| # ποΈ Transcriptinator | |
| ### AI-Powered Audio Transcription | |
| **Powered by:** Gemini (transcription) + OpenRouter DeepSeek R1 (summarization) | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| # Audio upload | |
| audio_input = gr.Audio( | |
| label="Upload Audio File", | |
| type="filepath", | |
| sources=["upload"], | |
| ) | |
| gr.Markdown(""" | |
| **Supported formats:** MP3, WAV, M4A, OGG, FLAC, WEBM | |
| **Large files (>30MB):** Automatically chunked and processed | |
| """) | |
| # Model selection | |
| model_dropdown = gr.Dropdown( | |
| choices=list(GeminiProvider.AVAILABLE_MODELS.keys()), | |
| value="Gemini 2.5 Flash", | |
| label="Gemini Model", | |
| info="Select which Gemini model to use for transcription" | |
| ) | |
| # API keys | |
| gemini_key_input = gr.Textbox( | |
| label="Gemini API Key (Required)", | |
| placeholder="Enter your Gemini API key...", | |
| type="password", | |
| info="Get one free at: https://aistudio.google.com/app/apikey" | |
| ) | |
| openrouter_key_input = gr.Textbox( | |
| label="OpenRouter API Key (Optional)", | |
| placeholder="Enter your OpenRouter key for better summaries...", | |
| type="password", | |
| info="Leave empty to use Gemini for all tasks | Get free at: https://openrouter.ai" | |
| ) | |
| # Submit button | |
| submit_btn = gr.Button("π Transcribe Audio", variant="primary", size="lg") | |
| with gr.Column(scale=1): | |
| # Status output | |
| status_output = gr.Markdown(label="Status") | |
| # Download component - removed 'interactive=False' for better stability | |
| download_output = gr.File(label="π₯ Download Transcript") | |
| # Information section ... (remains unchanged) | |
| gr.Markdown(""" | |
| --- | |
| ### π― What you'll get: | |
| - π **Full transcription** with timestamps and speaker detection | |
| - π **Summary** in 2-3 sentences | |
| - π‘ **Key ideas** with descriptions | |
| - π **Markdown file** ready to download | |
| """) | |
| # Connect the transcription function | |
| submit_btn.click( | |
| fn=transcribe_audio, | |
| inputs=[audio_input, gemini_key_input, openrouter_key_input, model_dropdown], | |
| outputs=[status_output, download_output] | |
| ) | |
| # Launch the app with queuing and allowed_paths for file access | |
| if __name__ == "__main__": | |
| os.makedirs(OUTPUT_FOLDER, exist_ok=True) | |
| app.queue().launch(allowed_paths=[OUTPUT_FOLDER]) |