sifujohn commited on
Commit
1a5e2a2
·
verified ·
1 Parent(s): 1e10d8f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -0
app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import pipeline
4
+ from transformers.pipelines.audio_utils import ffmpeg_read
5
+ import time
6
+ import os
7
+
8
+ # Choose an open-source model (English only or multilingual)
9
+ MODEL_NAME = "openai/whisper-small" # or try "distil-whisper/distil-small.en"
10
+ BATCH_SIZE = 8
11
+ YT_LENGTH_LIMIT_S = 3600
12
+ device = 0 if torch.cuda.is_available() else "cpu"
13
+
14
+ # Load open-source model
15
+ pipe = pipeline(
16
+ task="automatic-speech-recognition",
17
+ model=MODEL_NAME,
18
+ chunk_length_s=30,
19
+ device=device,
20
+ )
21
+
22
+ # Transcribe function
23
+ def transcribe(audio_path, task="transcribe"):
24
+ if audio_path is None or not os.path.exists(audio_path):
25
+ raise gr.Error("Invalid file path.")
26
+
27
+ # Read the audio file using ffmpeg_read
28
+ audio_array = ffmpeg_read(audio_path, pipe.feature_extractor.sampling_rate)
29
+
30
+ # Ensure the audio data is in the correct format
31
+ inputs = {"array": audio_array, "sampling_rate": pipe.feature_extractor.sampling_rate}
32
+
33
+ # Transcribe the audio
34
+ result = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)
35
+ return result["text"]
36
+
37
+ # Wrapper for file uploads
38
+ def handle_audio(audio_path, task):
39
+ try:
40
+ return transcribe(audio_path, task)
41
+ except Exception as e:
42
+ return f"❌ Error: {str(e)}"
43
+
44
+ # Gradio UI
45
+ with gr.Blocks() as demo:
46
+ gr.Markdown("# 🎙️ Free Whisper Speech-to-Text App\nPowered by Open Source Whisper from Hugging Face.")
47
+
48
+ with gr.Tabs():
49
+ with gr.Tab("🎧 Upload Audio"):
50
+ with gr.Row():
51
+ audio_input = gr.Audio(type="filepath", label="Upload audio file")
52
+ task_option = gr.Radio(["transcribe", "translate"], value="transcribe", label="Choose Task")
53
+ transcribe_btn = gr.Button("Transcribe")
54
+ result = gr.Textbox(label="📝 Transcription", lines=8)
55
+
56
+ transcribe_btn.click(handle_audio, inputs=[audio_input, task_option], outputs=result)
57
+
58
+ demo.launch()