prasanacodes commited on
Commit
7a5e7bc
·
verified ·
1 Parent(s): cff7092

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -5
app.py CHANGED
@@ -2,6 +2,7 @@
2
  import gradio as gr
3
  from transformers import pipeline
4
  import torch
 
5
 
6
  # --- Model Loading ---
7
  # We load the model once when the app starts, not on every function call.
@@ -21,6 +22,33 @@ transcriber = pipeline(
21
  )
22
 
23
  # --- Transcription Function ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  def transcribe_audio(audio_path):
25
  """
26
  This function takes an audio file path, transcribes it using the Whisper model,
@@ -41,6 +69,11 @@ def transcribe_audio(audio_path):
41
  print(f"An error occurred during transcription: {e}")
42
  return f"Sorry, an error occurred. Please try again. Details: {str(e)}"
43
 
 
 
 
 
 
44
  # --- Gradio Interface Definition ---
45
  # Title and description for the new Space
46
  title = "Custom Whisper Transcription App"
@@ -55,11 +88,9 @@ article = "<p style='text-align: center'><a href='https://huggingface.co/openai/
55
  # Create the Gradio interface with our custom function
56
  # We define the input as an Audio component and the output as a Textbox
57
  app_interface = gr.Interface(
58
- fn=transcribe_audio,
59
- inputs=gr.Audio(
60
- sources=["microphone", "upload"],
61
- type="filepath",
62
- label="Upload Audio or Record"
63
  ),
64
  outputs=gr.Textbox(label="Transcription Result"),
65
  title=title,
 
2
  import gradio as gr
3
  from transformers import pipeline
4
  import torch
5
+ import ffmpeg
6
 
7
  # --- Model Loading ---
8
  # We load the model once when the app starts, not on every function call.
 
22
  )
23
 
24
  # --- Transcription Function ---
25
+ def extract_audio_from_video(video_path, output_audio_path="temp_extracted_audio.wav"):
26
+ """
27
+ Extracts audio from a video file using python-ffmpeg.
28
+ """
29
+ print(f"\n[STEP 1/9] Extracting audio from video: {video_path}")
30
+
31
+ try:
32
+ (
33
+ ffmpeg
34
+ .input(video_path)
35
+ .output(
36
+ output_audio_path,
37
+ vn=None, # Disable video
38
+ acodec='mp3', # Audio codec
39
+ ab='192k', # Audio bitrate
40
+ ar='44100', # Sample rate
41
+ ac=2, # Audio channels
42
+ f='wav' # Output format
43
+ )
44
+ .run(overwrite_output=True, quiet=True)
45
+ )
46
+ print(f"✅ Audio extracted successfully to: {output_audio_path}")
47
+ return output_audio_path
48
+ except ffmpeg.Error as e:
49
+ print(f"Error: Failed to extract audio from video. stderr: {e.stderr.decode('utf8')}")
50
+ return None
51
+
52
  def transcribe_audio(audio_path):
53
  """
54
  This function takes an audio file path, transcribes it using the Whisper model,
 
69
  print(f"An error occurred during transcription: {e}")
70
  return f"Sorry, an error occurred. Please try again. Details: {str(e)}"
71
 
72
+ def main_run(video_path):
73
+ original_audio_file = extract_audio_from_video(video_path)
74
+ original_text = transcribe_audio(original_audio_file)
75
+ return original_text
76
+
77
  # --- Gradio Interface Definition ---
78
  # Title and description for the new Space
79
  title = "Custom Whisper Transcription App"
 
88
  # Create the Gradio interface with our custom function
89
  # We define the input as an Audio component and the output as a Textbox
90
  app_interface = gr.Interface(
91
+ fn=main_run,
92
+ inputs=gr.video(
93
+ label="Upload Video"
 
 
94
  ),
95
  outputs=gr.Textbox(label="Transcription Result"),
96
  title=title,