karthikmn commited on
Commit
09723b9
·
verified ·
1 Parent(s): af62f08

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -2
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import gradio as gr
2
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
3
  import torch
 
4
 
5
  # Load the pre-trained Wav2Vec 2.0 model and processor from Hugging Face
6
  processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h")
@@ -9,7 +10,7 @@ model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h")
9
  # Function to convert speech to text
10
  def speech_to_text(audio_file):
11
  # Load the audio file
12
- audio_input, _ = torchaudio.load(audio_file.name)
13
 
14
  # Preprocess the audio input (e.g., resample, normalize, etc.)
15
  input_values = processor(audio_input, return_tensors="pt").input_values
@@ -27,7 +28,7 @@ def speech_to_text(audio_file):
27
  # Set up the Gradio interface
28
  iface = gr.Interface(
29
  fn=speech_to_text, # Function to be executed
30
- inputs=gr.Audio(type="file"), # Allow audio file upload without the 'source' argument
31
  outputs=gr.Textbox(), # Display transcription in a text box
32
  title="Speech-to-Text Analyzer for Lecture Notes",
33
  description="Upload an audio file (e.g., lecture recording) to get the transcription of the speech."
 
1
  import gradio as gr
2
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
3
  import torch
4
+ import torchaudio
5
 
6
  # Load the pre-trained Wav2Vec 2.0 model and processor from Hugging Face
7
  processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h")
 
10
  # Function to convert speech to text
11
  def speech_to_text(audio_file):
12
  # Load the audio file
13
+ audio_input, _ = torchaudio.load(audio_file)
14
 
15
  # Preprocess the audio input (e.g., resample, normalize, etc.)
16
  input_values = processor(audio_input, return_tensors="pt").input_values
 
28
  # Set up the Gradio interface
29
  iface = gr.Interface(
30
  fn=speech_to_text, # Function to be executed
31
+ inputs=gr.Audio(type="filepath"), # Correct type for file upload
32
  outputs=gr.Textbox(), # Display transcription in a text box
33
  title="Speech-to-Text Analyzer for Lecture Notes",
34
  description="Upload an audio file (e.g., lecture recording) to get the transcription of the speech."