Spaces:

peterkros
/

transcribeapi

Sleeping

peterkros commited on Sep 9, 2024

Commit

f0d0884

verified ·

1 Parent(s): 4ca61bc

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -13,11 +13,14 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
 def transcribe(audio):
-    # Gradio passes audio as a numpy array, so no need to load from file.
-    # If the input is a file path, load the audio from the file:
     if isinstance(audio, str):  # Assuming it's a file path
         audio, sampling_rate = sf.read(audio)
     # Process the audio to get input features
     input_features = processor(audio, sampling_rate=16000, return_tensors="pt").input_features.to(device)

 model.to(device)
 def transcribe(audio):
+    # Check if the input is a file path and load the audio from the file
     if isinstance(audio, str):  # Assuming it's a file path
         audio, sampling_rate = sf.read(audio)
+    # If the audio has more than one channel, convert it to mono by averaging the channels
+    if len(audio.shape) > 1:
+        audio = audio.mean(axis=1)
     # Process the audio to get input features
     input_features = processor(audio, sampling_rate=16000, return_tensors="pt").input_features.to(device)