import gradio as gr import torch from transformers import pipeline import librosa import os # Config MODEL_NAME = "openai/whisper-small" BATCH_SIZE = 8 YT_LENGTH_LIMIT_S = 3600 # 1 hour device = 0 if torch.cuda.is_available() else "cpu" # Load model pipeline pipe = pipeline( task="automatic-speech-recognition", model=MODEL_NAME, chunk_length_s=30, device=device, ) # Duration check def is_too_long(audio_path, limit=YT_LENGTH_LIMIT_S): duration = librosa.get_duration(path=audio_path) return duration > limit # Transcription logic def transcribe(audio_path, task="transcribe"): if audio_path is None or not os.path.exists(audio_path): raise gr.Error("❌ Invalid file path or missing audio.") if is_too_long(audio_path): raise gr.Error("⚠️ Audio exceeds 1-hour limit. Please upload a shorter file.") result = pipe(audio_path, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True) return result["text"] # Wrapper with error handling def handle_audio(audio_path, task): try: return transcribe(audio_path, task) except Exception as e: return f"❌ Error: {str(e)}" # Gradio App with gr.Blocks() as demo: gr.Markdown("# 🎙️ Whisper Speech-to-Text App\nPowered by 🤗 Transformers and Gradio.\n\nUpload an audio file or record with your mic.") with gr.Tabs(): # Tab 1: Upload with gr.Tab("📁 Upload Audio"): with gr.Row(): audio_upload = gr.Audio(type="filepath", label="Upload Audio File") task_option_upload = gr.Radio(["transcribe", "translate"], value="transcribe", label="Choose Task") with gr.Row(): transcribe_btn_upload = gr.Button("Transcribe") output_upload = gr.Textbox(label="📝 Transcription", lines=8) transcribe_btn_upload.click(handle_audio, inputs=[audio_upload, task_option_upload], outputs=output_upload) gr.Examples( examples=[ "sample_audio/sample1.wav", "sample_audio/sample2.mp3" ], inputs=[audio_upload], label="Example Audio Files" ) # # Tab 2: Record # with gr.Tab("🎤 Record Audio"): # with gr.Row(): # audio_record = gr.Audio(source="microphone", type="filepath", label="Record with Microphone") # task_option_record = gr.Radio(["transcribe", "translate"], value="transcribe", label="Choose Task") # with gr.Row(): # transcribe_btn_record = gr.Button("Transcribe") # output_record = gr.Textbox(label="📝 Transcription", lines=8) # transcribe_btn_record.click(handle_audio, inputs=[audio_record, task_option_record], outputs=output_record) demo.launch()