Spaces:

Shanuka01
/

ASR-wisper-large

Running

Shanuka01 commited on Nov 4, 2023

Commit

70cc84d

1 Parent(s): 2bd0dd9

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,10 +5,9 @@ import whisper
 model = whisper.load_model("large")
 def transcribe(audio_file):
-    # The audio_file parameter is a tuple with the filename and the file object
-    # We only need the file object which is the second element of the tuple
-    audio_data = audio_file[1]
-    audio = whisper.load_audio(audio_data)
     audio = whisper.pad_or_trim(audio)
     mel = whisper.log_mel_spectrogram(audio).to(model.device)
     options = whisper.DecodingOptions()
@@ -18,7 +17,7 @@ def transcribe(audio_file):
 # Create the Gradio interface
 iface = gr.Interface(
     fn=transcribe,
-    inputs=gr.Audio(label="Upload your audio file", type="file"),
     outputs="text",
     title="Whisper ASR",
     description="Upload an audio file and it will be transcribed using OpenAI's Whisper model."

 model = whisper.load_model("large")
 def transcribe(audio_file):
+    # Whisper expects a filepath, so we use the 'filepath' type in gr.Audio
+    # audio_file now directly contains the path to the uploaded file
+    audio = whisper.load_audio(audio_file)
     audio = whisper.pad_or_trim(audio)
     mel = whisper.log_mel_spectrogram(audio).to(model.device)
     options = whisper.DecodingOptions()
 # Create the Gradio interface
 iface = gr.Interface(
     fn=transcribe,
+    inputs=gr.Audio(label="Upload your audio file", type="filepath"),
     outputs="text",
     title="Whisper ASR",
     description="Upload an audio file and it will be transcribed using OpenAI's Whisper model."