Spaces:
Build error
Build error
File size: 5,708 Bytes
7ee2bc7 df8ba4d 7ee2bc7 df8ba4d 7ee2bc7 df8ba4d 7ee2bc7 df8ba4d 7ee2bc7 df8ba4d 7ee2bc7 df8ba4d 7ee2bc7 df8ba4d 7ee2bc7 df8ba4d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 | """
Transcriptinator - HuggingFace Spaces Gradio Interface
Audio transcription with Gemini + OpenRouter
"""
import gradio as gr
import os
from transcribe_core import process_audio_file, get_audio_duration
from ai_providers import GeminiProvider, OpenRouterProvider
# Establish absolute paths for Hugging Face Spaces compatibility
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
OUTPUT_FOLDER = os.path.join(CURRENT_DIR, "outputs")
def transcribe_audio(audio_file, gemini_key, openrouter_key, model_name):
"""
Main transcription function for Gradio interface.
"""
if not audio_file:
return "β Please upload an audio file.", None
if not gemini_key or len(gemini_key.strip()) < 10:
return "β Please provide a valid Gemini API key.", None
try:
# Create Gemini provider for transcription
gemini_provider = GeminiProvider(gemini_key, model_name)
# Create OpenRouter provider for summary/ideas (optional)
openrouter_provider = None
if openrouter_key and len(openrouter_key.strip()) > 10:
openrouter_provider = OpenRouterProvider(openrouter_key)
# Get audio duration and file size for estimate
duration = get_audio_duration(audio_file)
duration_min = duration / 60
file_size_mb = os.path.getsize(audio_file) / (1024 * 1024)
# Process the audio file - ensure this function in core uses absolute paths
output_path, is_zip = process_audio_file(
audio_file,
gemini_provider,
openrouter_provider,
progress_callback=lambda msg, progress: None
)
# Determine file type for success message
if str(is_zip) == "True":
file_type = "ZIP archive"
file_desc = "Multiple transcript files (chunked audio)"
else:
file_type = "Markdown file"
file_desc = "Single transcript file"
text_provider = "OpenRouter (DeepSeek R1)" if openrouter_provider else "Gemini"
success_msg = f"""β
**Transcription Complete!**
π Original file: {os.path.basename(audio_file)}
β±οΈ Duration: {duration_min:.1f} minutes
πΎ Size: {file_size_mb:.1f} MB
ποΈ Transcription: Gemini ({model_name})
π‘ Summary/Ideas: {text_provider}
π Output: {file_type}
{file_desc}
Click below to download your transcript(s)."""
# Return the absolute file path - Gradio handles the download via proxy
return success_msg, output_path
except Exception as e:
error_msg = f"""β **Error during transcription:**
{str(e)}
**Common issues:**
- Invalid API key
- Audio file too large or corrupted
- Network connection issues"""
return error_msg, None
# Create Gradio interface
with gr.Blocks(title="Transcriptinator", theme=gr.themes.Soft()) as app:
gr.Markdown("""
# ποΈ Transcriptinator
### AI-Powered Audio Transcription
**Powered by:** Gemini (transcription) + OpenRouter DeepSeek R1 (summarization)
""")
with gr.Row():
with gr.Column(scale=2):
# Audio upload
audio_input = gr.Audio(
label="Upload Audio File",
type="filepath",
sources=["upload"],
)
gr.Markdown("""
**Supported formats:** MP3, WAV, M4A, OGG, FLAC, WEBM
**Large files (>30MB):** Automatically chunked and processed
""")
# Model selection
model_dropdown = gr.Dropdown(
choices=list(GeminiProvider.AVAILABLE_MODELS.keys()),
value="Gemini 2.5 Flash",
label="Gemini Model",
info="Select which Gemini model to use for transcription"
)
# API keys
gemini_key_input = gr.Textbox(
label="Gemini API Key (Required)",
placeholder="Enter your Gemini API key...",
type="password",
info="Get one free at: https://aistudio.google.com/app/apikey"
)
openrouter_key_input = gr.Textbox(
label="OpenRouter API Key (Optional)",
placeholder="Enter your OpenRouter key for better summaries...",
type="password",
info="Leave empty to use Gemini for all tasks | Get free at: https://openrouter.ai"
)
# Submit button
submit_btn = gr.Button("π Transcribe Audio", variant="primary", size="lg")
with gr.Column(scale=1):
# Status output
status_output = gr.Markdown(label="Status")
# Download component - removed 'interactive=False' for better stability
download_output = gr.File(label="π₯ Download Transcript")
# Information section ... (remains unchanged)
gr.Markdown("""
---
### π― What you'll get:
- π **Full transcription** with timestamps and speaker detection
- π **Summary** in 2-3 sentences
- π‘ **Key ideas** with descriptions
- π **Markdown file** ready to download
""")
# Connect the transcription function
submit_btn.click(
fn=transcribe_audio,
inputs=[audio_input, gemini_key_input, openrouter_key_input, model_dropdown],
outputs=[status_output, download_output]
)
# Launch the app with queuing and allowed_paths for file access
if __name__ == "__main__":
os.makedirs(OUTPUT_FOLDER, exist_ok=True)
app.queue().launch(allowed_paths=[OUTPUT_FOLDER]) |