jfforero commited on
Commit
33519ac
·
verified ·
1 Parent(s): 42d0121

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -1
app.py CHANGED
@@ -51,7 +51,34 @@ def predict_emotion_from_audio(wav_filepath):
51
  api_key = os.getenv("DeepAI_api_key")
52
  # Define the API key for DeepAI Text to Image API
53
  #api_key = 'dee3e3f2-d5cf-474c-8072-bd6bea47e865'
54
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  # Predict emotion from audio
56
  def get_predictions(audio_input):
57
  emotion_prediction = predict_emotion_from_audio(audio_input)
 
51
  api_key = os.getenv("DeepAI_api_key")
52
  # Define the API key for DeepAI Text to Image API
53
  #api_key = 'dee3e3f2-d5cf-474c-8072-bd6bea47e865'
54
+
55
+ ####
56
+ import torch
57
+ from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration
58
+
59
+
60
+ # Load the pretrained model and processor
61
+ model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-small-librispeech-asr")
62
+ processor = Speech2TextProcessor.from_pretrained("facebook/s2t-small-librispeech-asr")
63
+
64
+ # Load your local audio file
65
+ #audio_input, sampling_rate = sf.read("/content/1001_IEO_DIS_HI.wav")
66
+
67
+ # Convert the audio to PyTorch tensors using the processor
68
+ inputs = processor(audio_input, sampling_rate=sampling_rate, return_tensors="pt")
69
+
70
+ # Generate transcription in English
71
+ generated_ids = model.generate(
72
+ inputs["input_features"],
73
+ attention_mask=inputs["attention_mask"],
74
+ forced_bos_token_id=processor.tokenizer.bos_token_id # Use the <s> token ID as the start of sequence token
75
+ )
76
+
77
+ # Decode the generated transcription
78
+ transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)
79
+
80
+ ###
81
+
82
  # Predict emotion from audio
83
  def get_predictions(audio_input):
84
  emotion_prediction = predict_emotion_from_audio(audio_input)