Spaces:

MusIre
/

practiceAI

Sleeping

MusIre commited on Nov 27, 2023

Commit

6a6d2f9

1 Parent(s): 95dcc38

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -14,11 +14,27 @@ model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large")
 forced_decoder_ids = processor.get_decoder_prompt_ids(language="italian", task="transcribe")
 # Custom preprocessing function
-def preprocess_audio(audio_data):
-    # Apply any custom preprocessing to the audio data here if needed
     # Ensure that the input data is a valid format for the model
-    processed_data = processor(audio_data, return_tensors="pt", padding=True, truncation=True)
-    return processed_data
 # Function to perform ASR on audio data
 def transcribe_audio(audio_data):

 forced_decoder_ids = processor.get_decoder_prompt_ids(language="italian", task="transcribe")
 # Custom preprocessing function
+def preprocess_audio(audio_data, sampling_rate=16_000):
     # Ensure that the input data is a valid format for the model
+    # Convert the audio data to a numpy array with a correct shape
+    raw_speech = np.asarray(audio_data, dtype=np.float32)
+    # Pad or truncate the audio data to the required length
+    if len(raw_speech) > processor.feature_extractor.max_len:
+        raw_speech = raw_speech[:processor.feature_extractor.max_len]
+    else:
+        raw_speech = np.pad(raw_speech, (0, processor.feature_extractor.max_len - len(raw_speech)))
+    # Process the audio data using the Whisper processor
+    processed_data = processor(
+        raw_speech,
+        sampling_rate=sampling_rate,
+        return_tensors="pt",
+        padding=True,
+        truncation=True
+    )
+    return processed_data.input_features
 # Function to perform ASR on audio data
 def transcribe_audio(audio_data):