File size: 2,881 Bytes
1a5e2a2
 
 
6ae0891
1a5e2a2
 
6ae0891
 
1a5e2a2
6ae0891
1a5e2a2
 
6ae0891
1a5e2a2
 
 
 
 
 
 
6ae0891
 
 
 
 
 
1a5e2a2
 
6ae0891
1a5e2a2
6ae0891
 
1a5e2a2
6ae0891
1a5e2a2
 
6ae0891
1a5e2a2
 
 
 
 
 
6ae0891
1a5e2a2
6ae0891
1a5e2a2
 
6ae0891
 
 
 
 
1a5e2a2
6ae0891
 
 
1a5e2a2
6ae0891
 
 
 
 
 
 
 
 
3d9462c
 
 
 
 
 
 
 
 
1a5e2a2
6ae0891
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import gradio as gr
import torch
from transformers import pipeline
import librosa
import os

# Config
MODEL_NAME = "openai/whisper-small"
BATCH_SIZE = 8
YT_LENGTH_LIMIT_S = 3600  # 1 hour
device = 0 if torch.cuda.is_available() else "cpu"

# Load model pipeline
pipe = pipeline(
    task="automatic-speech-recognition",
    model=MODEL_NAME,
    chunk_length_s=30,
    device=device,
)

# Duration check
def is_too_long(audio_path, limit=YT_LENGTH_LIMIT_S):
    duration = librosa.get_duration(path=audio_path)
    return duration > limit

# Transcription logic
def transcribe(audio_path, task="transcribe"):
    if audio_path is None or not os.path.exists(audio_path):
        raise gr.Error("❌ Invalid file path or missing audio.")

    if is_too_long(audio_path):
        raise gr.Error("⚠️ Audio exceeds 1-hour limit. Please upload a shorter file.")

    result = pipe(audio_path, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)
    return result["text"]

# Wrapper with error handling
def handle_audio(audio_path, task):
    try:
        return transcribe(audio_path, task)
    except Exception as e:
        return f"❌ Error: {str(e)}"

# Gradio App
with gr.Blocks() as demo:
    gr.Markdown("# πŸŽ™οΈ Whisper Speech-to-Text App\nPowered by πŸ€— Transformers and Gradio.\n\nUpload an audio file or record with your mic.")

    with gr.Tabs():
        # Tab 1: Upload
        with gr.Tab("πŸ“ Upload Audio"):
            with gr.Row():
                audio_upload = gr.Audio(type="filepath", label="Upload Audio File")
                task_option_upload = gr.Radio(["transcribe", "translate"], value="transcribe", label="Choose Task")
            with gr.Row():
                transcribe_btn_upload = gr.Button("Transcribe")
                output_upload = gr.Textbox(label="πŸ“ Transcription", lines=8)
            transcribe_btn_upload.click(handle_audio, inputs=[audio_upload, task_option_upload], outputs=output_upload)

            gr.Examples(
                examples=[
                    "sample_audio/sample1.wav",
                    "sample_audio/sample2.mp3"
                ],
                inputs=[audio_upload],
                label="Example Audio Files"
            )

        # # Tab 2: Record
        # with gr.Tab("🎀 Record Audio"):
        #     with gr.Row():
        #         audio_record = gr.Audio(source="microphone", type="filepath", label="Record with Microphone")
        #         task_option_record = gr.Radio(["transcribe", "translate"], value="transcribe", label="Choose Task")
        #     with gr.Row():
        #         transcribe_btn_record = gr.Button("Transcribe")
        #         output_record = gr.Textbox(label="πŸ“ Transcription", lines=8)
        #     transcribe_btn_record.click(handle_audio, inputs=[audio_record, task_option_record], outputs=output_record)

demo.launch()