jfforero commited on
Commit
fafe874
·
verified ·
1 Parent(s): 3703a9e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -109
app.py CHANGED
@@ -19,11 +19,6 @@ import base64
19
  import plotly.graph_objects as go
20
  from plotly.subplots import make_subplots
21
 
22
-
23
-
24
-
25
-
26
-
27
  # Load the emotion prediction model
28
  def load_emotion_model(model_path):
29
  try:
@@ -189,7 +184,7 @@ def generate_image(sentiment_prediction, transcribed_text):
189
  try:
190
  if not api_key:
191
  # fallback white image if no API key
192
- return Image.new('RGB', (1024, 512), color='white')
193
 
194
  # Get specific prompt based on sentiment
195
  prompt = get_image_prompt(sentiment_prediction, transcribed_text)
@@ -199,8 +194,8 @@ def generate_image(sentiment_prediction, transcribed_text):
199
  "https://api.deepai.org/api/text2img",
200
  data={
201
  'text': prompt,
202
- 'width': 1024,
203
- 'height': 512,
204
  'image_generator_version': 'hd'
205
  },
206
  headers={'api-key': api_key}
@@ -214,109 +209,44 @@ def generate_image(sentiment_prediction, transcribed_text):
214
  else:
215
  print("Error in DeepAI response:", data)
216
  # Return a fallback image
217
- return Image.new('RGB', (1024, 512), color='white')
218
  except Exception as e:
219
  print("Error generating image:", e)
220
  # Return a fallback image
221
- return Image.new('RGB', (1024, 512), color='white')
222
 
223
- # Function to create a visualization with both the equirectangular image and a 3D sphere
224
- # Function to create a visualization with both the equirectangular image and a 3D sphere
225
- def create_texture_and_sphere_preview(image):
226
  try:
227
- # Convert PIL image to numpy array
228
- img_array = np.array(image)
229
- height, width = img_array.shape[0], img_array.shape[1]
230
-
231
- # Create a subplot with the equirectangular image and a 3D sphere
232
- fig = make_subplots(
233
- rows=1, cols=2,
234
- subplot_titles=("Equirectangular Texture", "3D Sphere with Texture Mapping"),
235
- specs=[[{"type": "image"}, {"type": "scatter3d"}]],
236
- horizontal_spacing=0.1
237
- )
238
-
239
- # Add the equirectangular image to the first subplot
240
- fig.add_trace(go.Image(z=img_array), row=1, col=1)
241
-
242
- # Create sphere coordinates
243
- u_res, v_res = 50, 25
244
- u = np.linspace(0, 2 * np.pi, u_res)
245
- v = np.linspace(0, np.pi, v_res)
246
- u, v = np.meshgrid(u, v)
247
-
248
- # Convert spherical coordinates to Cartesian coordinates
249
- x = np.sin(v) * np.cos(u)
250
- y = np.sin(v) * np.sin(u)
251
- z = np.cos(v)
252
-
253
- # Sample colors from the equirectangular image based on UV coordinates
254
- # This approximates texture mapping by sampling the image at the correct UV coordinates
255
- texture_colors = np.zeros((v_res, u_res, 3), dtype=np.uint8)
256
-
257
- for i in range(v_res):
258
- for j in range(u_res):
259
- # Convert spherical coordinates to image coordinates
260
- img_x = int((u[i, j] / (2 * np.pi)) * (width - 1))
261
- img_y = int((v[i, j] / np.pi) * (height - 1))
262
-
263
- # Ensure coordinates are within bounds
264
- img_x = max(0, min(img_x, width - 1))
265
- img_y = max(0, min(img_y, height - 1))
266
-
267
- # Get color from image
268
- if len(img_array.shape) == 3: # RGB image
269
- texture_colors[i, j] = img_array[img_y, img_x, :3]
270
- else: # Grayscale image
271
- texture_colors[i, j] = [img_array[img_y, img_x]] * 3
272
-
273
- # Convert colors to Plotly format (normalized to [0,1])
274
- surface_colors = texture_colors.astype(float) / 255.0
275
 
276
- # Create surface with sampled colors
277
- fig.add_trace(go.Surface(
278
- x=x, y=y, z=z,
279
- surfacecolor=surface_colors,
280
- showscale=False,
281
- opacity=1.0,
282
- lighting=dict(ambient=0.8, diffuse=0.8, specular=0.1, roughness=0.5),
283
- lightposition=dict(x=100, y=100, z=100)
284
- ), row=1, col=2)
285
 
286
- # Update layout
287
- fig.update_layout(
288
- height=500,
289
- title_text="Equirectangular Texture and 3D Sphere Preview",
290
- showlegend=False,
291
- scene2=dict(
292
- xaxis=dict(visible=False, showticklabels=False),
293
- yaxis=dict(visible=False, showticklabels=False),
294
- zaxis=dict(visible=False, showticklabels=False),
295
- aspectmode='data',
296
- camera=dict(
297
- eye=dict(x=1.8, y=1.8, z=1.8)
298
- ),
299
- bgcolor='rgba(0,0,0,0)'
300
- )
301
- )
302
-
303
- # Update axes for the image subplot
304
- fig.update_xaxes(visible=False, row=1, col=1)
305
- fig.update_yaxes(visible=False, row=1, col=1)
306
-
307
- return fig
308
 
 
309
  except Exception as e:
310
- print("Error creating texture and sphere preview:", e)
311
- return go.Figure()
312
 
313
- # Function to get predictions
314
- def get_predictions(audio_input):
315
  # Get acoustic emotion prediction (for music)
316
- emotion_prediction = predict_emotion_from_audio(audio_input)
317
 
318
  # Get transcribed text
319
- transcribed_text = transcribe(audio_input)
320
 
321
  # Analyze sentiment of transcribed text (for image)
322
  sentiment, polarity = analyze_sentiment(transcribed_text)
@@ -327,25 +257,61 @@ def get_predictions(audio_input):
327
  # Generate music using ACOUSTIC EMOTION prediction with specific prompt
328
  music_path = generate_music(transcribed_text, emotion_prediction)
329
 
330
- # Create visualization with both texture and sphere
331
- preview_fig = create_texture_and_sphere_preview(image)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
332
 
333
- return emotion_prediction, transcribed_text, f"Sentiment: {sentiment} (Polarity: {polarity:.2f})", image, music_path, preview_fig
334
 
335
  # Create the Gradio interface
336
  interface = gr.Interface(
337
  fn=get_predictions,
338
  inputs=gr.Audio(label="Input Audio", type="filepath", sources=["microphone"]),
339
  outputs=[
340
- gr.Label(label="Acoustic Emotion Prediction (for music)"),
341
- gr.Label(label="Transcribed Text"),
342
- gr.Label(label="Sentiment Analysis (for image)"),
343
- gr.Image(type='pil', label="Generated Equirectangular Image"),
344
- gr.Audio(label="Generated Music", type="filepath"),
345
- gr.Plot(label="Texture and Sphere Preview")
346
  ],
347
- title="Affective Virtual Environments",
348
- description="Create an AVE using your voice. Get emotion prediction (for music), transcription, sentiment analysis (for image), a generated equirectangular image, music, and a preview of how it would look as a texture on a sphere."
349
  )
350
 
351
  interface.launch()
 
19
  import plotly.graph_objects as go
20
  from plotly.subplots import make_subplots
21
 
 
 
 
 
 
22
  # Load the emotion prediction model
23
  def load_emotion_model(model_path):
24
  try:
 
184
  try:
185
  if not api_key:
186
  # fallback white image if no API key
187
+ return Image.new('RGB', (512, 258), color='white')
188
 
189
  # Get specific prompt based on sentiment
190
  prompt = get_image_prompt(sentiment_prediction, transcribed_text)
 
194
  "https://api.deepai.org/api/text2img",
195
  data={
196
  'text': prompt,
197
+ 'width': 512,
198
+ 'height': 258,
199
  'image_generator_version': 'hd'
200
  },
201
  headers={'api-key': api_key}
 
209
  else:
210
  print("Error in DeepAI response:", data)
211
  # Return a fallback image
212
+ return Image.new('RGB', (512, 258), color='white')
213
  except Exception as e:
214
  print("Error generating image:", e)
215
  # Return a fallback image
216
+ return Image.new('RGB', (512, 258), color='white')
217
 
218
+ # Function to split audio into chunks
219
+ def split_audio_into_chunks(audio_path, chunk_length=5):
220
+ """Split audio into chunks of specified length in seconds"""
221
  try:
222
+ # Load audio file
223
+ y, sr = librosa.load(audio_path, sr=None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
 
225
+ # Calculate number of samples per chunk
226
+ samples_per_chunk = chunk_length * sr
 
 
 
 
 
 
 
227
 
228
+ # Split into chunks
229
+ chunks = []
230
+ for i in range(0, len(y), samples_per_chunk):
231
+ chunk = y[i:i + samples_per_chunk]
232
+ if len(chunk) >= sr: # Ensure chunk has at least 1 second of audio
233
+ # Save chunk to temporary file
234
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
235
+ scipy.io.wavfile.write(tmp_file.name, sr, chunk)
236
+ chunks.append(tmp_file.name)
 
 
 
 
 
 
 
 
 
 
 
 
 
237
 
238
+ return chunks
239
  except Exception as e:
240
+ print("Error splitting audio:", e)
241
+ return []
242
 
243
+ # Function to process a single chunk
244
+ def process_chunk(chunk_path):
245
  # Get acoustic emotion prediction (for music)
246
+ emotion_prediction = predict_emotion_from_audio(chunk_path)
247
 
248
  # Get transcribed text
249
+ transcribed_text = transcribe(chunk_path)
250
 
251
  # Analyze sentiment of transcribed text (for image)
252
  sentiment, polarity = analyze_sentiment(transcribed_text)
 
257
  # Generate music using ACOUSTIC EMOTION prediction with specific prompt
258
  music_path = generate_music(transcribed_text, emotion_prediction)
259
 
260
+ return {
261
+ "emotion": emotion_prediction,
262
+ "transcription": transcribed_text,
263
+ "sentiment": f"Sentiment: {sentiment} (Polarity: {polarity:.2f})",
264
+ "image": image,
265
+ "music": music_path
266
+ }
267
+
268
+ # Function to get predictions for all chunks
269
+ def get_predictions(audio_input):
270
+ # Split audio into 5-second chunks
271
+ chunks = split_audio_into_chunks(audio_input, chunk_length=5)
272
+
273
+ if not chunks:
274
+ return "Error: Could not split audio into chunks", "", "", None, None
275
+
276
+ # Process each chunk
277
+ results = []
278
+ for i, chunk_path in enumerate(chunks):
279
+ print(f"Processing chunk {i+1}/{len(chunks)}")
280
+ result = process_chunk(chunk_path)
281
+ results.append(result)
282
+
283
+ # Prepare outputs for Gradio
284
+ emotion_outputs = [f"Chunk {i+1}: {r['emotion']}" for i, r in enumerate(results)]
285
+ transcription_outputs = [f"Chunk {i+1}: {r['transcription']}" for i, r in enumerate(results)]
286
+ sentiment_outputs = [f"Chunk {i+1}: {r['sentiment']}" for i, r in enumerate(results)]
287
+
288
+ # Combine all outputs into strings
289
+ emotion_str = "\n".join(emotion_outputs)
290
+ transcription_str = "\n".join(transcription_outputs)
291
+ sentiment_str = "\n".join(sentiment_outputs)
292
+
293
+ # Create a gallery of images
294
+ images = [r["image"] for r in results]
295
+
296
+ # Return first music file for demo (Gradio can only display one audio file)
297
+ # In a real application, you might want to combine all music chunks
298
+ music_path = results[0]["music"] if results[0]["music"] else None
299
 
300
+ return emotion_str, transcription_str, sentiment_str, images, music_path
301
 
302
  # Create the Gradio interface
303
  interface = gr.Interface(
304
  fn=get_predictions,
305
  inputs=gr.Audio(label="Input Audio", type="filepath", sources=["microphone"]),
306
  outputs=[
307
+ gr.Textbox(label="Acoustic Emotion Predictions (for music)", lines=5),
308
+ gr.Textbox(label="Transcribed Texts", lines=5),
309
+ gr.Textbox(label="Sentiment Analyses (for image)", lines=5),
310
+ gr.Gallery(label="Generated Equirectangular Images", columns=2),
311
+ gr.Audio(label="Generated Music (First Chunk)", type="filepath")
 
312
  ],
313
+ title="Affective Virtual Environments - Chunked Processing",
314
+ description="Process audio in 5-second chunks. Get emotion predictions, transcriptions, sentiment analyses, generated equirectangular images, and music for each chunk."
315
  )
316
 
317
  interface.launch()