sifujohn commited on
Commit
6ae0891
Β·
verified Β·
1 Parent(s): e72a7f1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -25
app.py CHANGED
@@ -1,17 +1,16 @@
1
  import gradio as gr
2
  import torch
3
  from transformers import pipeline
4
- from transformers.pipelines.audio_utils import ffmpeg_read
5
- import time
6
  import os
7
 
8
- # Choose an open-source model (English only or multilingual)
9
- MODEL_NAME = "openai/whisper-small" # or try "distil-whisper/distil-small.en"
10
  BATCH_SIZE = 8
11
- YT_LENGTH_LIMIT_S = 3600
12
  device = 0 if torch.cuda.is_available() else "cpu"
13
 
14
- # Load open-source model
15
  pipe = pipeline(
16
  task="automatic-speech-recognition",
17
  model=MODEL_NAME,
@@ -19,40 +18,61 @@ pipe = pipeline(
19
  device=device,
20
  )
21
 
22
- # Transcribe function
 
 
 
 
 
23
  def transcribe(audio_path, task="transcribe"):
24
  if audio_path is None or not os.path.exists(audio_path):
25
- raise gr.Error("Invalid file path.")
26
-
27
- # Read the audio file using ffmpeg_read
28
- audio_array = ffmpeg_read(audio_path, pipe.feature_extractor.sampling_rate)
29
 
30
- # Ensure the audio data is in the correct format
31
- inputs = {"array": audio_array, "sampling_rate": pipe.feature_extractor.sampling_rate}
32
 
33
- # Transcribe the audio
34
- result = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)
35
  return result["text"]
36
 
37
- # Wrapper for file uploads
38
  def handle_audio(audio_path, task):
39
  try:
40
  return transcribe(audio_path, task)
41
  except Exception as e:
42
  return f"❌ Error: {str(e)}"
43
 
44
- # Gradio UI
45
  with gr.Blocks() as demo:
46
- gr.Markdown("# πŸŽ™οΈ Free Whisper Speech-to-Text App\nPowered by Open Source Whisper from Hugging Face.")
47
 
48
  with gr.Tabs():
49
- with gr.Tab("🎧 Upload Audio"):
 
 
 
 
50
  with gr.Row():
51
- audio_input = gr.Audio(type="filepath", label="Upload audio file")
52
- task_option = gr.Radio(["transcribe", "translate"], value="transcribe", label="Choose Task")
53
- transcribe_btn = gr.Button("Transcribe")
54
- result = gr.Textbox(label="πŸ“ Transcription", lines=8)
55
 
56
- transcribe_btn.click(handle_audio, inputs=[audio_input, task_option], outputs=result)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
- demo.launch()
 
1
  import gradio as gr
2
  import torch
3
  from transformers import pipeline
4
+ import librosa
 
5
  import os
6
 
7
+ # Config
8
+ MODEL_NAME = "openai/whisper-small"
9
  BATCH_SIZE = 8
10
+ YT_LENGTH_LIMIT_S = 3600 # 1 hour
11
  device = 0 if torch.cuda.is_available() else "cpu"
12
 
13
+ # Load model pipeline
14
  pipe = pipeline(
15
  task="automatic-speech-recognition",
16
  model=MODEL_NAME,
 
18
  device=device,
19
  )
20
 
21
+ # Duration check
22
+ def is_too_long(audio_path, limit=YT_LENGTH_LIMIT_S):
23
+ duration = librosa.get_duration(path=audio_path)
24
+ return duration > limit
25
+
26
+ # Transcription logic
27
  def transcribe(audio_path, task="transcribe"):
28
  if audio_path is None or not os.path.exists(audio_path):
29
+ raise gr.Error("❌ Invalid file path or missing audio.")
 
 
 
30
 
31
+ if is_too_long(audio_path):
32
+ raise gr.Error("⚠️ Audio exceeds 1-hour limit. Please upload a shorter file.")
33
 
34
+ result = pipe(audio_path, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)
 
35
  return result["text"]
36
 
37
+ # Wrapper with error handling
38
  def handle_audio(audio_path, task):
39
  try:
40
  return transcribe(audio_path, task)
41
  except Exception as e:
42
  return f"❌ Error: {str(e)}"
43
 
44
+ # Gradio App
45
  with gr.Blocks() as demo:
46
+ gr.Markdown("# πŸŽ™οΈ Whisper Speech-to-Text App\nPowered by πŸ€— Transformers and Gradio.\n\nUpload an audio file or record with your mic.")
47
 
48
  with gr.Tabs():
49
+ # Tab 1: Upload
50
+ with gr.Tab("πŸ“ Upload Audio"):
51
+ with gr.Row():
52
+ audio_upload = gr.Audio(type="filepath", label="Upload Audio File")
53
+ task_option_upload = gr.Radio(["transcribe", "translate"], value="transcribe", label="Choose Task")
54
  with gr.Row():
55
+ transcribe_btn_upload = gr.Button("Transcribe")
56
+ output_upload = gr.Textbox(label="πŸ“ Transcription", lines=8)
57
+ transcribe_btn_upload.click(handle_audio, inputs=[audio_upload, task_option_upload], outputs=output_upload)
 
58
 
59
+ gr.Examples(
60
+ examples=[
61
+ "sample_audio/sample1.wav",
62
+ "sample_audio/sample2.mp3"
63
+ ],
64
+ inputs=[audio_upload],
65
+ label="Example Audio Files"
66
+ )
67
+
68
+ # Tab 2: Record
69
+ with gr.Tab("🎀 Record Audio"):
70
+ with gr.Row():
71
+ audio_record = gr.Audio(source="microphone", type="filepath", label="Record with Microphone")
72
+ task_option_record = gr.Radio(["transcribe", "translate"], value="transcribe", label="Choose Task")
73
+ with gr.Row():
74
+ transcribe_btn_record = gr.Button("Transcribe")
75
+ output_record = gr.Textbox(label="πŸ“ Transcription", lines=8)
76
+ transcribe_btn_record.click(handle_audio, inputs=[audio_record, task_option_record], outputs=output_record)
77
 
78
+ demo.launch()