Spaces:

karthikmn
/

smart-notes

Runtime error

karthikmn commited on Jun 7, 2025

Commit

09723b9

verified ·

1 Parent(s): af62f08

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import gradio as gr
 from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
 import torch
 # Load the pre-trained Wav2Vec 2.0 model and processor from Hugging Face
 processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h")
@@ -9,7 +10,7 @@ model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h")
 # Function to convert speech to text
 def speech_to_text(audio_file):
     # Load the audio file
-    audio_input, _ = torchaudio.load(audio_file.name)
     # Preprocess the audio input (e.g., resample, normalize, etc.)
     input_values = processor(audio_input, return_tensors="pt").input_values
@@ -27,7 +28,7 @@ def speech_to_text(audio_file):
 # Set up the Gradio interface
 iface = gr.Interface(
     fn=speech_to_text,  # Function to be executed
-    inputs=gr.Audio(type="file"),  # Allow audio file upload without the 'source' argument
     outputs=gr.Textbox(),  # Display transcription in a text box
     title="Speech-to-Text Analyzer for Lecture Notes",
     description="Upload an audio file (e.g., lecture recording) to get the transcription of the speech."

 import gradio as gr
 from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
 import torch
+import torchaudio
 # Load the pre-trained Wav2Vec 2.0 model and processor from Hugging Face
 processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h")
 # Function to convert speech to text
 def speech_to_text(audio_file):
     # Load the audio file
+    audio_input, _ = torchaudio.load(audio_file)
     # Preprocess the audio input (e.g., resample, normalize, etc.)
     input_values = processor(audio_input, return_tensors="pt").input_values
 # Set up the Gradio interface
 iface = gr.Interface(
     fn=speech_to_text,  # Function to be executed
+    inputs=gr.Audio(type="filepath"),  # Correct type for file upload
     outputs=gr.Textbox(),  # Display transcription in a text box
     title="Speech-to-Text Analyzer for Lecture Notes",
     description="Upload an audio file (e.g., lecture recording) to get the transcription of the speech."