Spaces:
Sleeping
Sleeping
File size: 2,881 Bytes
1a5e2a2 6ae0891 1a5e2a2 6ae0891 1a5e2a2 6ae0891 1a5e2a2 6ae0891 1a5e2a2 6ae0891 1a5e2a2 6ae0891 1a5e2a2 6ae0891 1a5e2a2 6ae0891 1a5e2a2 6ae0891 1a5e2a2 6ae0891 1a5e2a2 6ae0891 1a5e2a2 6ae0891 1a5e2a2 6ae0891 1a5e2a2 6ae0891 3d9462c 1a5e2a2 6ae0891 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import gradio as gr
import torch
from transformers import pipeline
import librosa
import os
# Config
MODEL_NAME = "openai/whisper-small"
BATCH_SIZE = 8
YT_LENGTH_LIMIT_S = 3600 # 1 hour
device = 0 if torch.cuda.is_available() else "cpu"
# Load model pipeline
pipe = pipeline(
task="automatic-speech-recognition",
model=MODEL_NAME,
chunk_length_s=30,
device=device,
)
# Duration check
def is_too_long(audio_path, limit=YT_LENGTH_LIMIT_S):
duration = librosa.get_duration(path=audio_path)
return duration > limit
# Transcription logic
def transcribe(audio_path, task="transcribe"):
if audio_path is None or not os.path.exists(audio_path):
raise gr.Error("β Invalid file path or missing audio.")
if is_too_long(audio_path):
raise gr.Error("β οΈ Audio exceeds 1-hour limit. Please upload a shorter file.")
result = pipe(audio_path, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)
return result["text"]
# Wrapper with error handling
def handle_audio(audio_path, task):
try:
return transcribe(audio_path, task)
except Exception as e:
return f"β Error: {str(e)}"
# Gradio App
with gr.Blocks() as demo:
gr.Markdown("# ποΈ Whisper Speech-to-Text App\nPowered by π€ Transformers and Gradio.\n\nUpload an audio file or record with your mic.")
with gr.Tabs():
# Tab 1: Upload
with gr.Tab("π Upload Audio"):
with gr.Row():
audio_upload = gr.Audio(type="filepath", label="Upload Audio File")
task_option_upload = gr.Radio(["transcribe", "translate"], value="transcribe", label="Choose Task")
with gr.Row():
transcribe_btn_upload = gr.Button("Transcribe")
output_upload = gr.Textbox(label="π Transcription", lines=8)
transcribe_btn_upload.click(handle_audio, inputs=[audio_upload, task_option_upload], outputs=output_upload)
gr.Examples(
examples=[
"sample_audio/sample1.wav",
"sample_audio/sample2.mp3"
],
inputs=[audio_upload],
label="Example Audio Files"
)
# # Tab 2: Record
# with gr.Tab("π€ Record Audio"):
# with gr.Row():
# audio_record = gr.Audio(source="microphone", type="filepath", label="Record with Microphone")
# task_option_record = gr.Radio(["transcribe", "translate"], value="transcribe", label="Choose Task")
# with gr.Row():
# transcribe_btn_record = gr.Button("Transcribe")
# output_record = gr.Textbox(label="π Transcription", lines=8)
# transcribe_btn_record.click(handle_audio, inputs=[audio_record, task_option_record], outputs=output_record)
demo.launch()
|