Update app.py
Browse files
app.py
CHANGED
|
@@ -49,35 +49,7 @@ def predict_emotion_from_audio(wav_filepath):
|
|
| 49 |
return None
|
| 50 |
|
| 51 |
api_key = os.getenv("DeepAI_api_key")
|
| 52 |
-
# Define the API key for DeepAI Text to Image API
|
| 53 |
-
#api_key = 'dee3e3f2-d5cf-474c-8072-bd6bea47e865'
|
| 54 |
|
| 55 |
-
####
|
| 56 |
-
import torch
|
| 57 |
-
from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
# Load the pretrained model and processor
|
| 61 |
-
model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-small-librispeech-asr")
|
| 62 |
-
processor = Speech2TextProcessor.from_pretrained("facebook/s2t-small-librispeech-asr")
|
| 63 |
-
|
| 64 |
-
# Load your local audio file
|
| 65 |
-
#audio_input, sampling_rate = sf.read("/content/1001_IEO_DIS_HI.wav")
|
| 66 |
-
|
| 67 |
-
# Convert the audio to PyTorch tensors using the processor
|
| 68 |
-
inputs = processor(audio_input, sampling_rate=sampling_rate, return_tensors="pt")
|
| 69 |
-
|
| 70 |
-
# Generate transcription in English
|
| 71 |
-
generated_ids = model.generate(
|
| 72 |
-
inputs["input_features"],
|
| 73 |
-
attention_mask=inputs["attention_mask"],
|
| 74 |
-
forced_bos_token_id=processor.tokenizer.bos_token_id # Use the <s> token ID as the start of sequence token
|
| 75 |
-
)
|
| 76 |
-
|
| 77 |
-
# Decode the generated transcription
|
| 78 |
-
transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)
|
| 79 |
-
|
| 80 |
-
###
|
| 81 |
|
| 82 |
# Predict emotion from audio
|
| 83 |
def get_predictions(audio_input):
|
|
@@ -86,13 +58,6 @@ def get_predictions(audio_input):
|
|
| 86 |
image = generate_image(api_key, emotion_prediction)
|
| 87 |
return emotion_prediction, image
|
| 88 |
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
###
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
|
| 97 |
# Define a function to generate an image using DeepAI Text to Image API
|
| 98 |
def generate_image(api_key, text):
|
|
|
|
| 49 |
return None
|
| 50 |
|
| 51 |
api_key = os.getenv("DeepAI_api_key")
|
|
|
|
|
|
|
| 52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
# Predict emotion from audio
|
| 55 |
def get_predictions(audio_input):
|
|
|
|
| 58 |
image = generate_image(api_key, emotion_prediction)
|
| 59 |
return emotion_prediction, image
|
| 60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
# Define a function to generate an image using DeepAI Text to Image API
|
| 63 |
def generate_image(api_key, text):
|