Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,17 +1,16 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import torch
|
| 3 |
from transformers import pipeline
|
| 4 |
-
|
| 5 |
-
import time
|
| 6 |
import os
|
| 7 |
|
| 8 |
-
#
|
| 9 |
-
MODEL_NAME = "openai/whisper-small"
|
| 10 |
BATCH_SIZE = 8
|
| 11 |
-
YT_LENGTH_LIMIT_S = 3600
|
| 12 |
device = 0 if torch.cuda.is_available() else "cpu"
|
| 13 |
|
| 14 |
-
# Load
|
| 15 |
pipe = pipeline(
|
| 16 |
task="automatic-speech-recognition",
|
| 17 |
model=MODEL_NAME,
|
|
@@ -19,40 +18,61 @@ pipe = pipeline(
|
|
| 19 |
device=device,
|
| 20 |
)
|
| 21 |
|
| 22 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
def transcribe(audio_path, task="transcribe"):
|
| 24 |
if audio_path is None or not os.path.exists(audio_path):
|
| 25 |
-
raise gr.Error("Invalid file path.")
|
| 26 |
-
|
| 27 |
-
# Read the audio file using ffmpeg_read
|
| 28 |
-
audio_array = ffmpeg_read(audio_path, pipe.feature_extractor.sampling_rate)
|
| 29 |
|
| 30 |
-
|
| 31 |
-
|
| 32 |
|
| 33 |
-
|
| 34 |
-
result = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)
|
| 35 |
return result["text"]
|
| 36 |
|
| 37 |
-
# Wrapper
|
| 38 |
def handle_audio(audio_path, task):
|
| 39 |
try:
|
| 40 |
return transcribe(audio_path, task)
|
| 41 |
except Exception as e:
|
| 42 |
return f"β Error: {str(e)}"
|
| 43 |
|
| 44 |
-
# Gradio
|
| 45 |
with gr.Blocks() as demo:
|
| 46 |
-
gr.Markdown("# ποΈ
|
| 47 |
|
| 48 |
with gr.Tabs():
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
with gr.Row():
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
result = gr.Textbox(label="π Transcription", lines=8)
|
| 55 |
|
| 56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
-
demo.launch()
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import torch
|
| 3 |
from transformers import pipeline
|
| 4 |
+
import librosa
|
|
|
|
| 5 |
import os
|
| 6 |
|
| 7 |
+
# Config
|
| 8 |
+
MODEL_NAME = "openai/whisper-small"
|
| 9 |
BATCH_SIZE = 8
|
| 10 |
+
YT_LENGTH_LIMIT_S = 3600 # 1 hour
|
| 11 |
device = 0 if torch.cuda.is_available() else "cpu"
|
| 12 |
|
| 13 |
+
# Load model pipeline
|
| 14 |
pipe = pipeline(
|
| 15 |
task="automatic-speech-recognition",
|
| 16 |
model=MODEL_NAME,
|
|
|
|
| 18 |
device=device,
|
| 19 |
)
|
| 20 |
|
| 21 |
+
# Duration check
|
| 22 |
+
def is_too_long(audio_path, limit=YT_LENGTH_LIMIT_S):
|
| 23 |
+
duration = librosa.get_duration(path=audio_path)
|
| 24 |
+
return duration > limit
|
| 25 |
+
|
| 26 |
+
# Transcription logic
|
| 27 |
def transcribe(audio_path, task="transcribe"):
|
| 28 |
if audio_path is None or not os.path.exists(audio_path):
|
| 29 |
+
raise gr.Error("β Invalid file path or missing audio.")
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
+
if is_too_long(audio_path):
|
| 32 |
+
raise gr.Error("β οΈ Audio exceeds 1-hour limit. Please upload a shorter file.")
|
| 33 |
|
| 34 |
+
result = pipe(audio_path, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)
|
|
|
|
| 35 |
return result["text"]
|
| 36 |
|
| 37 |
+
# Wrapper with error handling
|
| 38 |
def handle_audio(audio_path, task):
|
| 39 |
try:
|
| 40 |
return transcribe(audio_path, task)
|
| 41 |
except Exception as e:
|
| 42 |
return f"β Error: {str(e)}"
|
| 43 |
|
| 44 |
+
# Gradio App
|
| 45 |
with gr.Blocks() as demo:
|
| 46 |
+
gr.Markdown("# ποΈ Whisper Speech-to-Text App\nPowered by π€ Transformers and Gradio.\n\nUpload an audio file or record with your mic.")
|
| 47 |
|
| 48 |
with gr.Tabs():
|
| 49 |
+
# Tab 1: Upload
|
| 50 |
+
with gr.Tab("π Upload Audio"):
|
| 51 |
+
with gr.Row():
|
| 52 |
+
audio_upload = gr.Audio(type="filepath", label="Upload Audio File")
|
| 53 |
+
task_option_upload = gr.Radio(["transcribe", "translate"], value="transcribe", label="Choose Task")
|
| 54 |
with gr.Row():
|
| 55 |
+
transcribe_btn_upload = gr.Button("Transcribe")
|
| 56 |
+
output_upload = gr.Textbox(label="π Transcription", lines=8)
|
| 57 |
+
transcribe_btn_upload.click(handle_audio, inputs=[audio_upload, task_option_upload], outputs=output_upload)
|
|
|
|
| 58 |
|
| 59 |
+
gr.Examples(
|
| 60 |
+
examples=[
|
| 61 |
+
"sample_audio/sample1.wav",
|
| 62 |
+
"sample_audio/sample2.mp3"
|
| 63 |
+
],
|
| 64 |
+
inputs=[audio_upload],
|
| 65 |
+
label="Example Audio Files"
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
# Tab 2: Record
|
| 69 |
+
with gr.Tab("π€ Record Audio"):
|
| 70 |
+
with gr.Row():
|
| 71 |
+
audio_record = gr.Audio(source="microphone", type="filepath", label="Record with Microphone")
|
| 72 |
+
task_option_record = gr.Radio(["transcribe", "translate"], value="transcribe", label="Choose Task")
|
| 73 |
+
with gr.Row():
|
| 74 |
+
transcribe_btn_record = gr.Button("Transcribe")
|
| 75 |
+
output_record = gr.Textbox(label="π Transcription", lines=8)
|
| 76 |
+
transcribe_btn_record.click(handle_audio, inputs=[audio_record, task_option_record], outputs=output_record)
|
| 77 |
|
| 78 |
+
demo.launch()
|