jfforero commited on
Commit
de6513b
·
verified ·
1 Parent(s): fafe874

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +110 -76
app.py CHANGED
@@ -19,6 +19,11 @@ import base64
19
  import plotly.graph_objects as go
20
  from plotly.subplots import make_subplots
21
 
 
 
 
 
 
22
  # Load the emotion prediction model
23
  def load_emotion_model(model_path):
24
  try:
@@ -184,7 +189,7 @@ def generate_image(sentiment_prediction, transcribed_text):
184
  try:
185
  if not api_key:
186
  # fallback white image if no API key
187
- return Image.new('RGB', (512, 258), color='white')
188
 
189
  # Get specific prompt based on sentiment
190
  prompt = get_image_prompt(sentiment_prediction, transcribed_text)
@@ -194,8 +199,8 @@ def generate_image(sentiment_prediction, transcribed_text):
194
  "https://api.deepai.org/api/text2img",
195
  data={
196
  'text': prompt,
197
- 'width': 512,
198
- 'height': 258,
199
  'image_generator_version': 'hd'
200
  },
201
  headers={'api-key': api_key}
@@ -209,44 +214,109 @@ def generate_image(sentiment_prediction, transcribed_text):
209
  else:
210
  print("Error in DeepAI response:", data)
211
  # Return a fallback image
212
- return Image.new('RGB', (512, 258), color='white')
213
  except Exception as e:
214
  print("Error generating image:", e)
215
  # Return a fallback image
216
- return Image.new('RGB', (512, 258), color='white')
217
 
218
- # Function to split audio into chunks
219
- def split_audio_into_chunks(audio_path, chunk_length=5):
220
- """Split audio into chunks of specified length in seconds"""
221
  try:
222
- # Load audio file
223
- y, sr = librosa.load(audio_path, sr=None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
 
225
- # Calculate number of samples per chunk
226
- samples_per_chunk = chunk_length * sr
 
 
 
 
 
 
 
227
 
228
- # Split into chunks
229
- chunks = []
230
- for i in range(0, len(y), samples_per_chunk):
231
- chunk = y[i:i + samples_per_chunk]
232
- if len(chunk) >= sr: # Ensure chunk has at least 1 second of audio
233
- # Save chunk to temporary file
234
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
235
- scipy.io.wavfile.write(tmp_file.name, sr, chunk)
236
- chunks.append(tmp_file.name)
 
 
 
 
 
 
 
 
 
 
 
 
 
237
 
238
- return chunks
239
  except Exception as e:
240
- print("Error splitting audio:", e)
241
- return []
242
 
243
- # Function to process a single chunk
244
- def process_chunk(chunk_path):
245
  # Get acoustic emotion prediction (for music)
246
- emotion_prediction = predict_emotion_from_audio(chunk_path)
247
 
248
  # Get transcribed text
249
- transcribed_text = transcribe(chunk_path)
250
 
251
  # Analyze sentiment of transcribed text (for image)
252
  sentiment, polarity = analyze_sentiment(transcribed_text)
@@ -257,61 +327,25 @@ def process_chunk(chunk_path):
257
  # Generate music using ACOUSTIC EMOTION prediction with specific prompt
258
  music_path = generate_music(transcribed_text, emotion_prediction)
259
 
260
- return {
261
- "emotion": emotion_prediction,
262
- "transcription": transcribed_text,
263
- "sentiment": f"Sentiment: {sentiment} (Polarity: {polarity:.2f})",
264
- "image": image,
265
- "music": music_path
266
- }
267
-
268
- # Function to get predictions for all chunks
269
- def get_predictions(audio_input):
270
- # Split audio into 5-second chunks
271
- chunks = split_audio_into_chunks(audio_input, chunk_length=5)
272
-
273
- if not chunks:
274
- return "Error: Could not split audio into chunks", "", "", None, None
275
-
276
- # Process each chunk
277
- results = []
278
- for i, chunk_path in enumerate(chunks):
279
- print(f"Processing chunk {i+1}/{len(chunks)}")
280
- result = process_chunk(chunk_path)
281
- results.append(result)
282
-
283
- # Prepare outputs for Gradio
284
- emotion_outputs = [f"Chunk {i+1}: {r['emotion']}" for i, r in enumerate(results)]
285
- transcription_outputs = [f"Chunk {i+1}: {r['transcription']}" for i, r in enumerate(results)]
286
- sentiment_outputs = [f"Chunk {i+1}: {r['sentiment']}" for i, r in enumerate(results)]
287
-
288
- # Combine all outputs into strings
289
- emotion_str = "\n".join(emotion_outputs)
290
- transcription_str = "\n".join(transcription_outputs)
291
- sentiment_str = "\n".join(sentiment_outputs)
292
-
293
- # Create a gallery of images
294
- images = [r["image"] for r in results]
295
-
296
- # Return first music file for demo (Gradio can only display one audio file)
297
- # In a real application, you might want to combine all music chunks
298
- music_path = results[0]["music"] if results[0]["music"] else None
299
 
300
- return emotion_str, transcription_str, sentiment_str, images, music_path
301
 
302
  # Create the Gradio interface
303
  interface = gr.Interface(
304
  fn=get_predictions,
305
  inputs=gr.Audio(label="Input Audio", type="filepath", sources=["microphone"]),
306
  outputs=[
307
- gr.Textbox(label="Acoustic Emotion Predictions (for music)", lines=5),
308
- gr.Textbox(label="Transcribed Texts", lines=5),
309
- gr.Textbox(label="Sentiment Analyses (for image)", lines=5),
310
- gr.Gallery(label="Generated Equirectangular Images", columns=2),
311
- gr.Audio(label="Generated Music (First Chunk)", type="filepath")
 
312
  ],
313
- title="Affective Virtual Environments - Chunked Processing",
314
- description="Process audio in 5-second chunks. Get emotion predictions, transcriptions, sentiment analyses, generated equirectangular images, and music for each chunk."
315
  )
316
 
317
- interface.launch()
 
19
  import plotly.graph_objects as go
20
  from plotly.subplots import make_subplots
21
 
22
+
23
+
24
+
25
+
26
+
27
  # Load the emotion prediction model
28
  def load_emotion_model(model_path):
29
  try:
 
189
  try:
190
  if not api_key:
191
  # fallback white image if no API key
192
+ return Image.new('RGB', (1024, 512), color='white')
193
 
194
  # Get specific prompt based on sentiment
195
  prompt = get_image_prompt(sentiment_prediction, transcribed_text)
 
199
  "https://api.deepai.org/api/text2img",
200
  data={
201
  'text': prompt,
202
+ 'width': 1024,
203
+ 'height': 512,
204
  'image_generator_version': 'hd'
205
  },
206
  headers={'api-key': api_key}
 
214
  else:
215
  print("Error in DeepAI response:", data)
216
  # Return a fallback image
217
+ return Image.new('RGB', (1024, 512), color='white')
218
  except Exception as e:
219
  print("Error generating image:", e)
220
  # Return a fallback image
221
+ return Image.new('RGB', (1024, 512), color='white')
222
 
223
+ # Function to create a visualization with both the equirectangular image and a 3D sphere
224
+ # Function to create a visualization with both the equirectangular image and a 3D sphere
225
+ def create_texture_and_sphere_preview(image):
226
  try:
227
+ # Convert PIL image to numpy array
228
+ img_array = np.array(image)
229
+ height, width = img_array.shape[0], img_array.shape[1]
230
+
231
+ # Create a subplot with the equirectangular image and a 3D sphere
232
+ fig = make_subplots(
233
+ rows=1, cols=2,
234
+ subplot_titles=("Equirectangular Texture", "3D Sphere with Texture Mapping"),
235
+ specs=[[{"type": "image"}, {"type": "scatter3d"}]],
236
+ horizontal_spacing=0.1
237
+ )
238
+
239
+ # Add the equirectangular image to the first subplot
240
+ fig.add_trace(go.Image(z=img_array), row=1, col=1)
241
+
242
+ # Create sphere coordinates
243
+ u_res, v_res = 50, 25
244
+ u = np.linspace(0, 2 * np.pi, u_res)
245
+ v = np.linspace(0, np.pi, v_res)
246
+ u, v = np.meshgrid(u, v)
247
+
248
+ # Convert spherical coordinates to Cartesian coordinates
249
+ x = np.sin(v) * np.cos(u)
250
+ y = np.sin(v) * np.sin(u)
251
+ z = np.cos(v)
252
+
253
+ # Sample colors from the equirectangular image based on UV coordinates
254
+ # This approximates texture mapping by sampling the image at the correct UV coordinates
255
+ texture_colors = np.zeros((v_res, u_res, 3), dtype=np.uint8)
256
+
257
+ for i in range(v_res):
258
+ for j in range(u_res):
259
+ # Convert spherical coordinates to image coordinates
260
+ img_x = int((u[i, j] / (2 * np.pi)) * (width - 1))
261
+ img_y = int((v[i, j] / np.pi) * (height - 1))
262
+
263
+ # Ensure coordinates are within bounds
264
+ img_x = max(0, min(img_x, width - 1))
265
+ img_y = max(0, min(img_y, height - 1))
266
+
267
+ # Get color from image
268
+ if len(img_array.shape) == 3: # RGB image
269
+ texture_colors[i, j] = img_array[img_y, img_x, :3]
270
+ else: # Grayscale image
271
+ texture_colors[i, j] = [img_array[img_y, img_x]] * 3
272
+
273
+ # Convert colors to Plotly format (normalized to [0,1])
274
+ surface_colors = texture_colors.astype(float) / 255.0
275
 
276
+ # Create surface with sampled colors
277
+ fig.add_trace(go.Surface(
278
+ x=x, y=y, z=z,
279
+ surfacecolor=surface_colors,
280
+ showscale=False,
281
+ opacity=1.0,
282
+ lighting=dict(ambient=0.8, diffuse=0.8, specular=0.1, roughness=0.5),
283
+ lightposition=dict(x=100, y=100, z=100)
284
+ ), row=1, col=2)
285
 
286
+ # Update layout
287
+ fig.update_layout(
288
+ height=500,
289
+ title_text="Equirectangular Texture and 3D Sphere Preview",
290
+ showlegend=False,
291
+ scene2=dict(
292
+ xaxis=dict(visible=False, showticklabels=False),
293
+ yaxis=dict(visible=False, showticklabels=False),
294
+ zaxis=dict(visible=False, showticklabels=False),
295
+ aspectmode='data',
296
+ camera=dict(
297
+ eye=dict(x=1.8, y=1.8, z=1.8)
298
+ ),
299
+ bgcolor='rgba(0,0,0,0)'
300
+ )
301
+ )
302
+
303
+ # Update axes for the image subplot
304
+ fig.update_xaxes(visible=False, row=1, col=1)
305
+ fig.update_yaxes(visible=False, row=1, col=1)
306
+
307
+ return fig
308
 
 
309
  except Exception as e:
310
+ print("Error creating texture and sphere preview:", e)
311
+ return go.Figure()
312
 
313
+ # Function to get predictions
314
+ def get_predictions(audio_input):
315
  # Get acoustic emotion prediction (for music)
316
+ emotion_prediction = predict_emotion_from_audio(audio_input)
317
 
318
  # Get transcribed text
319
+ transcribed_text = transcribe(audio_input)
320
 
321
  # Analyze sentiment of transcribed text (for image)
322
  sentiment, polarity = analyze_sentiment(transcribed_text)
 
327
  # Generate music using ACOUSTIC EMOTION prediction with specific prompt
328
  music_path = generate_music(transcribed_text, emotion_prediction)
329
 
330
+ # Create visualization with both texture and sphere
331
+ preview_fig = create_texture_and_sphere_preview(image)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
332
 
333
+ return emotion_prediction, transcribed_text, f"Sentiment: {sentiment} (Polarity: {polarity:.2f})", image, music_path, preview_fig
334
 
335
  # Create the Gradio interface
336
  interface = gr.Interface(
337
  fn=get_predictions,
338
  inputs=gr.Audio(label="Input Audio", type="filepath", sources=["microphone"]),
339
  outputs=[
340
+ gr.Label(label="Acoustic Emotion Prediction (for music)"),
341
+ gr.Label(label="Transcribed Text"),
342
+ gr.Label(label="Sentiment Analysis (for image)"),
343
+ gr.Image(type='pil', label="Generated Equirectangular Image"),
344
+ gr.Audio(label="Generated Music", type="filepath"),
345
+ gr.Plot(label="Texture and Sphere Preview")
346
  ],
347
+ title="Affective Virtual Environments",
348
+ description="Create an AVE using your voice. Get emotion prediction (for music), transcription, sentiment analysis (for image), a generated equirectangular image, music, and a preview of how it would look as a texture on a sphere."
349
  )
350
 
351
+ interface.launch()"