Spaces:

NightPrince
/

ASR

Sleeping

NightPrince commited on Jan 8, 2025

Commit

98c9824

verified ·

1 Parent(s): ee196d5

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import gradio as gr
 from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
 import torch
 # Load the pre-trained model and processor
 model_name = "facebook/s2t-wav2vec2-large-en-ar"
@@ -9,13 +10,20 @@ processor = Wav2Vec2Processor.from_pretrained(model_name)
 # Define a function for the ASR model
 def transcribe(audio):
     # Process the audio
     inputs = processor(audio, return_tensors="pt", sampling_rate=16000)
     # Get the model's predictions
     logits = model(input_values=inputs.input_values).logits
     # Decode the predicted text
     predicted_ids = logits.argmax(dim=-1)
     transcription = processor.decode(predicted_ids[0])
     return transcription
 # Define the Gradio interface

 import gradio as gr
 from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
 import torch
+import numpy as np
 # Load the pre-trained model and processor
 model_name = "facebook/s2t-wav2vec2-large-en-ar"
 # Define a function for the ASR model
 def transcribe(audio):
+    # Convert the audio into a format compatible with the processor
+    if isinstance(audio, np.ndarray):
+        audio = audio.flatten()  # Ensure it's a 1D array
     # Process the audio
     inputs = processor(audio, return_tensors="pt", sampling_rate=16000)
     # Get the model's predictions
     logits = model(input_values=inputs.input_values).logits
     # Decode the predicted text
     predicted_ids = logits.argmax(dim=-1)
     transcription = processor.decode(predicted_ids[0])
     return transcription
 # Define the Gradio interface