Spaces:

NightPrince
/

ASR

Sleeping

NightPrince commited on Jan 8, 2025

Commit

0bbfec6

verified ·

1 Parent(s): 2726627

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -11,15 +11,19 @@ processor = Wav2Vec2Processor.from_pretrained(model_name)
 # Function to transcribe audio using the model
 def transcribe(audio):
     # Resample the audio to 16kHz if necessary
-    if audio.ndim > 1:  # If audio is stereo
-        audio = audio.mean(axis=1)  # Convert to mono
-    # Resample audio to 16kHz if it's not already
-    audio = librosa.resample(audio, orig_sr=audio.shape[0] / len(audio), target_sr=16000)
     # Process the audio to match the model's input format
-    inputs = processor(audio, return_tensors="pt", sampling_rate=16000)
     # Get the model's predictions
     with torch.no_grad():
@@ -35,8 +39,7 @@ def transcribe(audio):
 interface = gr.Interface(
     fn=transcribe,
     inputs=gr.Audio(type="numpy"),  # Take the audio input as numpy array
-    outputs="text",  # Output transcribed text
-    live=True  # Optional: live transcribing as you speak
 )
 # Launch the interface

 # Function to transcribe audio using the model
 def transcribe(audio):
+    # Extract audio data from the tuple (audio, sample_rate)
+    audio_data, sample_rate = audio
     # Resample the audio to 16kHz if necessary
+    if audio_data.ndim > 1:  # If audio is stereo
+        audio_data = audio_data.mean(axis=1)  # Convert to mono
+    # Ensure the audio is resampled to 16kHz if it's not already
+    if sample_rate != 16000:
+        audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=16000)
     # Process the audio to match the model's input format
+    inputs = processor(audio_data, return_tensors="pt", sampling_rate=16000)
     # Get the model's predictions
     with torch.no_grad():
 interface = gr.Interface(
     fn=transcribe,
     inputs=gr.Audio(type="numpy"),  # Take the audio input as numpy array
+    outputs="text" # Optional: live transcribing as you speak
 )
 # Launch the interface