AVE

Runtime error

jfforero commited on May 6, 2024

Commit

33519ac

verified ·

1 Parent(s): 42d0121

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -51,7 +51,34 @@ def predict_emotion_from_audio(wav_filepath):
 api_key = os.getenv("DeepAI_api_key")
 # Define the API key for DeepAI Text to Image API
 #api_key = 'dee3e3f2-d5cf-474c-8072-bd6bea47e865'
 # Predict emotion from audio
 def get_predictions(audio_input):
     emotion_prediction = predict_emotion_from_audio(audio_input)

 api_key = os.getenv("DeepAI_api_key")
 # Define the API key for DeepAI Text to Image API
 #api_key = 'dee3e3f2-d5cf-474c-8072-bd6bea47e865'
+####
+import torch
+from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration
+# Load the pretrained model and processor
+model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-small-librispeech-asr")
+processor = Speech2TextProcessor.from_pretrained("facebook/s2t-small-librispeech-asr")
+# Load your local audio file
+#audio_input, sampling_rate = sf.read("/content/1001_IEO_DIS_HI.wav")
+# Convert the audio to PyTorch tensors using the processor
+inputs = processor(audio_input, sampling_rate=sampling_rate, return_tensors="pt")
+# Generate transcription in English
+generated_ids = model.generate(
+    inputs["input_features"],
+    attention_mask=inputs["attention_mask"],
+    forced_bos_token_id=processor.tokenizer.bos_token_id  # Use the <s> token ID as the start of sequence token
+)
+# Decode the generated transcription
+transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)
+###
 # Predict emotion from audio
 def get_predictions(audio_input):
     emotion_prediction = predict_emotion_from_audio(audio_input)