jfforero commited on
Commit
0601bf8
·
verified ·
1 Parent(s): ea2051f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +101 -0
app.py CHANGED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import librosa
4
+ import requests
5
+ from io import BytesIO
6
+ from PIL import Image
7
+ import os
8
+ from tensorflow.keras.models import load_model
9
+ from faster_whisper import WhisperModel
10
+
11
+ # Load the emotion prediction model
12
+ def load_emotion_model(model_path):
13
+ try:
14
+ model = load_model(model_path)
15
+ return model
16
+ except Exception as e:
17
+ print("Error loading emotion prediction model:", e)
18
+ return None
19
+
20
+ model_path = 'mymodel_SER_LSTM_RAVDESS.h5'
21
+ model = load_emotion_model(model_path)
22
+
23
+ # Initialize WhisperModel
24
+ model_size = "small"
25
+ model2 = WhisperModel(model_size, device="cpu", compute_type="int8")
26
+
27
+ # Function to transcribe audio
28
+ def transcribe(wav_filepath):
29
+ segments, _ = model2.transcribe(wav_filepath, beam_size=5)
30
+ return "".join([segment.text for segment in segments])
31
+
32
+ # Function to extract MFCC features from audio
33
+ def extract_mfcc(wav_file_name):
34
+ try:
35
+ y, sr = librosa.load(wav_file_name)
36
+ mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T, axis=0)
37
+ return mfccs
38
+ except Exception as e:
39
+ print("Error extracting MFCC features:", e)
40
+ return None
41
+
42
+ # Emotions dictionary
43
+ emotions = {1: 'neutral', 2: 'calm', 3: 'happy', 4: 'sad', 5: 'angry', 6: 'fearful', 7: 'disgust', 8: 'surprised'}
44
+
45
+ # Function to predict emotion from audio
46
+ def predict_emotion_from_audio(wav_filepath):
47
+ try:
48
+ test_point = extract_mfcc(wav_filepath)
49
+ if test_point is not None:
50
+ test_point = np.reshape(test_point, newshape=(1, 40, 1))
51
+ predictions = model.predict(test_point)
52
+ predicted_emotion_label = np.argmax(predictions[0]) + 1
53
+ return emotions[predicted_emotion_label]
54
+ else:
55
+ return "Error: Unable to extract features"
56
+ except Exception as e:
57
+ print("Error predicting emotion:", e)
58
+ return None
59
+
60
+ api_key = os.getenv("DeepAI_api_key")
61
+
62
+ # Function to generate an image using DeepAI Text to Image API
63
+ def generate_image(api_key, text):
64
+ url = "https://api.deepai.org/api/text2img"
65
+ headers = {'api-key': api_key}
66
+ response = requests.post(
67
+ url,
68
+ data={'text': text},
69
+ headers=headers
70
+ )
71
+ response_data = response.json()
72
+ if 'output_url' in response_data:
73
+ image_url = response_data['output_url']
74
+ image_response = requests.get(image_url)
75
+ image = Image.open(BytesIO(image_response.content))
76
+ return image
77
+ else:
78
+ return None
79
+
80
+ # Function to get predictions
81
+ def get_predictions(audio_input):
82
+ emotion_prediction = predict_emotion_from_audio(audio_input)
83
+ transcribed_text = transcribe(audio_input)
84
+ texto_imagen = "Generate Patagonian Monsters' with a "+ emotion_prediction + " attitude, representing the idea of: [" + transcribed_text + "]. Illustrate this using asemic writings in an old map style."
85
+ image = generate_image(api_key, texto_imagen)
86
+ return emotion_prediction, transcribed_text, image
87
+
88
+ # Create the Gradio interface
89
+ interface = gr.Interface(
90
+ fn=get_predictions,
91
+ inputs=gr.Audio(label="Input Audio", type="filepath", sources=["microphone"]), # gr.Audio(label="Input Audio", type="filepath")
92
+ outputs=[
93
+ gr.Label("Acoustic Prediction", label="Acoustic Prediction", visible=False),
94
+ gr.Label("Transcribed Text", label="Transcribed Text", visible=False),
95
+ gr.Image(type='pil', label="Generated Image")
96
+ ],
97
+ title="Terra Australis Ignota",
98
+ description="Terra Australis Ignota is a Multimedia project.\n\n The term refers to a common cartographic reference used during the 15th to 18th centuries, which alluded to territories about which little was known, such as Patagonia, or Antarctica, among other places. These territories were commonly attributed with fantastical and monstrous characteristics. The nature of the South America was depicted as exotic, wild, other, and, in this sense, monstrous.\n\n Record your voice with a sentence imagining/remembering Patagonia."
99
+ )
100
+
101
+ interface.launch()