jfforero commited on
Commit
445b628
·
verified ·
1 Parent(s): 4ee2028

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -21
app.py CHANGED
@@ -81,7 +81,7 @@ def predict_emotion_from_audio(wav_filepath):
81
  if test_point is not None:
82
  test_point = np.reshape(test_point, newshape=(1, 40, 1))
83
  predictions = model.predict(test_point)
84
- predicted_emotion_label = np.argmax(predictions[0]) #
85
  return emotions.get(predicted_emotion_label, "Unknown emotion")
86
  else:
87
  return "Error: Unable to extract features"
@@ -93,7 +93,7 @@ def predict_emotion_from_audio(wav_filepath):
93
  def analyze_sentiment(text):
94
  try:
95
  if not text or text.strip() == "":
96
- return "No text to analyze", 0.0
97
 
98
  analysis = TextBlob(text)
99
  polarity = analysis.sentiment.polarity
@@ -108,15 +108,15 @@ def analyze_sentiment(text):
108
  return sentiment, polarity
109
  except Exception as e:
110
  print("Error analyzing sentiment:", e)
111
- return "sentiment analysis error", 0.0
112
 
113
- # Function to generate music with MusicGen
114
  def generate_music(transcribed_text, emotion_prediction):
115
  try:
116
  if processor is None or music_model is None:
117
  return None
118
 
119
- # Create a prompt that combines the emotion and transcription
120
  prompt = f"Background music that is {emotion_prediction} and represents: {transcribed_text}"
121
 
122
  # Limit prompt length to avoid model issues
@@ -151,14 +151,14 @@ def generate_music(transcribed_text, emotion_prediction):
151
  # --- DeepAI Image Generation (Text2Img) ---
152
  api_key = os.getenv("DeepAI_api_key")
153
 
154
- def generate_image(emotion_prediction, transcribed_text):
155
  try:
156
  if not api_key:
157
  # fallback white image if no API key
158
  return Image.new('RGB', (1024, 512), color='white')
159
 
160
- # Create the prompt for text2img
161
- prompt = f"Generate an equirectangular 360 image texture {emotion_prediction} attitude, representing the idea of: [{transcribed_text}]."
162
 
163
  # Make request to DeepAI text2img API
164
  response = requests.post(
@@ -204,7 +204,6 @@ def create_texture_and_sphere_preview(image):
204
  fig.add_trace(go.Image(z=img_array), row=1, col=1)
205
 
206
  # Create a 3D sphere for the second subplot
207
- # Since we can't directly apply the texture, we'll create a colored sphere
208
  u = np.linspace(0, 2 * np.pi, 50)
209
  v = np.linspace(0, np.pi, 25)
210
  u, v = np.meshgrid(u, v)
@@ -218,7 +217,7 @@ def create_texture_and_sphere_preview(image):
218
 
219
  fig.add_trace(go.Surface(
220
  x=x, y=y, z=z,
221
- surfacecolor=z, # Use z-coordinate for color
222
  colorscale='Viridis',
223
  showscale=False,
224
  opacity=0.8
@@ -249,20 +248,19 @@ def create_texture_and_sphere_preview(image):
249
 
250
  # Function to get predictions
251
  def get_predictions(audio_input):
 
252
  emotion_prediction = predict_emotion_from_audio(audio_input)
253
- transcribed_text = transcribe(audio_input)
254
 
255
- # Handle case where emotion_prediction might be None
256
- if emotion_prediction is None:
257
- emotion_prediction = "Unknown"
258
 
259
- # Analyze sentiment of transcribed text
260
  sentiment, polarity = analyze_sentiment(transcribed_text)
261
 
262
- # Generate image using text2img
263
- image = generate_image(emotion_prediction, transcribed_text)
264
 
265
- # Generate music based on transcription and emotion
266
  music_path = generate_music(transcribed_text, emotion_prediction)
267
 
268
  # Create visualization with both texture and sphere
@@ -275,15 +273,15 @@ interface = gr.Interface(
275
  fn=get_predictions,
276
  inputs=gr.Audio(label="Input Audio", type="filepath", sources=["microphone"]),
277
  outputs=[
278
- gr.Label(label="Acoustic Prediction"),
279
  gr.Label(label="Transcribed Text"),
280
- gr.Label(label="Sentiment Analysis"),
281
  gr.Image(type='pil', label="Generated Equirectangular Image"),
282
  gr.Audio(label="Generated Music", type="filepath"),
283
  gr.Plot(label="Texture and Sphere Preview")
284
  ],
285
  title="Affective Virtual Environments",
286
- description="Create an AVE using your voice. Get emotion prediction, transcription, sentiment analysis, a generated equirectangular image, music, and a preview of how it would look as a texture on a sphere."
287
  )
288
 
289
  interface.launch()
 
81
  if test_point is not None:
82
  test_point = np.reshape(test_point, newshape=(1, 40, 1))
83
  predictions = model.predict(test_point)
84
+ predicted_emotion_label = np.argmax(predictions[0])
85
  return emotions.get(predicted_emotion_label, "Unknown emotion")
86
  else:
87
  return "Error: Unable to extract features"
 
93
  def analyze_sentiment(text):
94
  try:
95
  if not text or text.strip() == "":
96
+ return "neutral", 0.0
97
 
98
  analysis = TextBlob(text)
99
  polarity = analysis.sentiment.polarity
 
108
  return sentiment, polarity
109
  except Exception as e:
110
  print("Error analyzing sentiment:", e)
111
+ return "neutral", 0.0
112
 
113
+ # Function to generate music with MusicGen (using acoustic emotion prediction)
114
  def generate_music(transcribed_text, emotion_prediction):
115
  try:
116
  if processor is None or music_model is None:
117
  return None
118
 
119
+ # Create a prompt that combines the acoustic emotion and transcription
120
  prompt = f"Background music that is {emotion_prediction} and represents: {transcribed_text}"
121
 
122
  # Limit prompt length to avoid model issues
 
151
  # --- DeepAI Image Generation (Text2Img) ---
152
  api_key = os.getenv("DeepAI_api_key")
153
 
154
+ def generate_image(sentiment_prediction, transcribed_text):
155
  try:
156
  if not api_key:
157
  # fallback white image if no API key
158
  return Image.new('RGB', (1024, 512), color='white')
159
 
160
+ # Create the prompt for text2img using SENTIMENT analysis instead of acoustic emotion
161
+ prompt = f"Generate an equirectangular 360 image texture with {sentiment_prediction} sentiment, representing the idea of: [{transcribed_text}]."
162
 
163
  # Make request to DeepAI text2img API
164
  response = requests.post(
 
204
  fig.add_trace(go.Image(z=img_array), row=1, col=1)
205
 
206
  # Create a 3D sphere for the second subplot
 
207
  u = np.linspace(0, 2 * np.pi, 50)
208
  v = np.linspace(0, np.pi, 25)
209
  u, v = np.meshgrid(u, v)
 
217
 
218
  fig.add_trace(go.Surface(
219
  x=x, y=y, z=z,
220
+ surfacecolor=z,
221
  colorscale='Viridis',
222
  showscale=False,
223
  opacity=0.8
 
248
 
249
  # Function to get predictions
250
  def get_predictions(audio_input):
251
+ # Get acoustic emotion prediction (for music)
252
  emotion_prediction = predict_emotion_from_audio(audio_input)
 
253
 
254
+ # Get transcribed text
255
+ transcribed_text = transcribe(audio_input)
 
256
 
257
+ # Analyze sentiment of transcribed text (for image)
258
  sentiment, polarity = analyze_sentiment(transcribed_text)
259
 
260
+ # Generate image using SENTIMENT analysis
261
+ image = generate_image(sentiment, transcribed_text)
262
 
263
+ # Generate music using ACOUSTIC EMOTION prediction
264
  music_path = generate_music(transcribed_text, emotion_prediction)
265
 
266
  # Create visualization with both texture and sphere
 
273
  fn=get_predictions,
274
  inputs=gr.Audio(label="Input Audio", type="filepath", sources=["microphone"]),
275
  outputs=[
276
+ gr.Label(label="Acoustic Emotion Prediction (for music)"),
277
  gr.Label(label="Transcribed Text"),
278
+ gr.Label(label="Sentiment Analysis (for image)"),
279
  gr.Image(type='pil', label="Generated Equirectangular Image"),
280
  gr.Audio(label="Generated Music", type="filepath"),
281
  gr.Plot(label="Texture and Sphere Preview")
282
  ],
283
  title="Affective Virtual Environments",
284
+ description="Create an AVE using your voice. Get emotion prediction (for music), transcription, sentiment analysis (for image), a generated equirectangular image, music, and a preview of how it would look as a texture on a sphere."
285
  )
286
 
287
  interface.launch()