Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| from transformers import pipeline | |
| import librosa | |
| import os | |
| # Config | |
| MODEL_NAME = "openai/whisper-small" | |
| BATCH_SIZE = 8 | |
| YT_LENGTH_LIMIT_S = 3600 # 1 hour | |
| device = 0 if torch.cuda.is_available() else "cpu" | |
| # Load model pipeline | |
| pipe = pipeline( | |
| task="automatic-speech-recognition", | |
| model=MODEL_NAME, | |
| chunk_length_s=30, | |
| device=device, | |
| ) | |
| # Duration check | |
| def is_too_long(audio_path, limit=YT_LENGTH_LIMIT_S): | |
| duration = librosa.get_duration(path=audio_path) | |
| return duration > limit | |
| # Transcription logic | |
| def transcribe(audio_path, task="transcribe"): | |
| if audio_path is None or not os.path.exists(audio_path): | |
| raise gr.Error("β Invalid file path or missing audio.") | |
| if is_too_long(audio_path): | |
| raise gr.Error("β οΈ Audio exceeds 1-hour limit. Please upload a shorter file.") | |
| result = pipe(audio_path, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True) | |
| return result["text"] | |
| # Wrapper with error handling | |
| def handle_audio(audio_path, task): | |
| try: | |
| return transcribe(audio_path, task) | |
| except Exception as e: | |
| return f"β Error: {str(e)}" | |
| # Gradio App | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# ποΈ Whisper Speech-to-Text App\nPowered by π€ Transformers and Gradio.\n\nUpload an audio file or record with your mic.") | |
| with gr.Tabs(): | |
| # Tab 1: Upload | |
| with gr.Tab("π Upload Audio"): | |
| with gr.Row(): | |
| audio_upload = gr.Audio(type="filepath", label="Upload Audio File") | |
| task_option_upload = gr.Radio(["transcribe", "translate"], value="transcribe", label="Choose Task") | |
| with gr.Row(): | |
| transcribe_btn_upload = gr.Button("Transcribe") | |
| output_upload = gr.Textbox(label="π Transcription", lines=8) | |
| transcribe_btn_upload.click(handle_audio, inputs=[audio_upload, task_option_upload], outputs=output_upload) | |
| gr.Examples( | |
| examples=[ | |
| "sample_audio/sample1.wav", | |
| "sample_audio/sample2.mp3" | |
| ], | |
| inputs=[audio_upload], | |
| label="Example Audio Files" | |
| ) | |
| # # Tab 2: Record | |
| # with gr.Tab("π€ Record Audio"): | |
| # with gr.Row(): | |
| # audio_record = gr.Audio(source="microphone", type="filepath", label="Record with Microphone") | |
| # task_option_record = gr.Radio(["transcribe", "translate"], value="transcribe", label="Choose Task") | |
| # with gr.Row(): | |
| # transcribe_btn_record = gr.Button("Transcribe") | |
| # output_record = gr.Textbox(label="π Transcription", lines=8) | |
| # transcribe_btn_record.click(handle_audio, inputs=[audio_record, task_option_record], outputs=output_record) | |
| demo.launch() | |