jfforero commited on
Commit
1cfc495
·
verified ·
1 Parent(s): 342ac2d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -52
app.py CHANGED
@@ -16,6 +16,8 @@ import tempfile
16
  import base64
17
  import plotly.graph_objects as go
18
  from plotly.subplots import make_subplots
 
 
19
 
20
  # Load the emotion prediction model
21
  def load_emotion_model(model_path):
@@ -52,7 +54,7 @@ processor, music_model, device = load_musicgen_model()
52
  # Function to transcribe audio
53
  def transcribe(wav_filepath):
54
  try:
55
- segments, _ = model2.transcribe(wav_filepath, beam_size=5)
56
  return "".join([segment.text for segment in segments])
57
  except Exception as e:
58
  print("Error transcribing audio:", e)
@@ -186,66 +188,68 @@ def generate_image(emotion_prediction, transcribed_text):
186
  # Return a fallback image
187
  return Image.new('RGB', (1024, 512), color='white')
188
 
189
- # Function to create a visualization with both the equirectangular image and a 3D sphere
190
- def create_texture_and_sphere_preview(image):
191
  try:
192
- # Convert PIL image to numpy array for display
193
  img_array = np.array(image)
194
 
195
- # Create a subplot with the equirectangular image and a 3D sphere
196
- fig = make_subplots(
197
- rows=1, cols=2,
198
- subplot_titles=("Equirectangular Texture", "3D Sphere Preview"),
199
- specs=[[{"type": "image"}, {"type": "scatter3d"}]],
200
- horizontal_spacing=0.1
201
- )
202
 
203
- # Add the equirectangular image to the first subplot
204
- fig.add_trace(go.Image(z=img_array), row=1, col=1)
 
 
 
205
 
206
- # Create a 3D sphere for the second subplot
207
- # Since we can't directly apply the texture, we'll create a colored sphere
208
- u = np.linspace(0, 2 * np.pi, 50)
209
- v = np.linspace(0, np.pi, 25)
210
- u, v = np.meshgrid(u, v)
211
 
212
- x = np.sin(v) * np.cos(u)
213
- y = np.sin(v) * np.sin(u)
214
- z = np.cos(v)
215
 
216
- # Create a color pattern based on the sphere coordinates
217
- colorscale = [(0, 'red'), (0.5, 'green'), (1, 'blue')]
 
 
 
 
 
218
 
219
- fig.add_trace(go.Surface(
220
- x=x, y=y, z=z,
221
- surfacecolor=z, # Use z-coordinate for color
222
- colorscale='Viridis',
223
- showscale=False,
224
- opacity=0.8
225
- ), row=1, col=2)
226
 
227
- # Update layout
228
- fig.update_layout(
229
- height=400,
230
- title_text="Equirectangular Texture and 3D Sphere Preview",
231
- showlegend=False
232
- )
233
 
234
- # Update axes for the image subplot
235
- fig.update_xaxes(visible=False, row=1, col=1)
236
- fig.update_yaxes(visible=False, row=1, col=1)
237
 
238
- # Update 3D scene settings
239
- fig.update_scenes(
240
- aspectmode='data',
241
- row=1, col=2
242
- )
 
 
243
 
244
- return fig
245
 
 
 
 
 
 
 
246
  except Exception as e:
247
- print("Error creating texture and sphere preview:", e)
248
- return go.Figure()
 
249
 
250
  # Function to get predictions
251
  def get_predictions(audio_input):
@@ -265,10 +269,15 @@ def get_predictions(audio_input):
265
  # Generate music based on transcription and emotion
266
  music_path = generate_music(transcribed_text, emotion_prediction)
267
 
268
- # Create visualization with both texture and sphere
269
- preview_fig = create_texture_and_sphere_preview(image)
 
 
 
 
 
270
 
271
- return emotion_prediction, transcribed_text, f"Sentiment: {sentiment} (Polarity: {polarity:.2f})", image, music_path, preview_fig
272
 
273
  # Create the Gradio interface
274
  interface = gr.Interface(
@@ -280,10 +289,10 @@ interface = gr.Interface(
280
  gr.Label(label="Sentiment Analysis"),
281
  gr.Image(type='pil', label="Generated Equirectangular Image"),
282
  gr.Audio(label="Generated Music", type="filepath"),
283
- gr.Plot(label="Texture and Sphere Preview")
284
  ],
285
  title="Affective Virtual Environments",
286
- description="Create an AVE using your voice. Get emotion prediction, transcription, sentiment analysis, a generated equirectangular image, music, and a preview of how it would look as a texture on a sphere."
287
  )
288
 
289
  interface.launch()
 
16
  import base64
17
  import plotly.graph_objects as go
18
  from plotly.subplots import make_subplots
19
+ import matplotlib.pyplot as plt
20
+ from mpl_toolkits.mplot3d import Axes3D
21
 
22
  # Load the emotion prediction model
23
  def load_emotion_model(model_path):
 
54
  # Function to transcribe audio
55
  def transcribe(wav_filepath):
56
  try:
57
+ segments, _ = model2.transscribe(wav_filepath, beam_size=5)
58
  return "".join([segment.text for segment in segments])
59
  except Exception as e:
60
  print("Error transcribing audio:", e)
 
188
  # Return a fallback image
189
  return Image.new('RGB', (1024, 512), color='white')
190
 
191
+ # Function to create a sphere with the equirectangular texture using matplotlib
192
+ def create_sphere_with_texture(image):
193
  try:
194
+ # Convert PIL image to numpy array
195
  img_array = np.array(image)
196
 
197
+ # Create a figure with two subplots
198
+ fig = plt.figure(figsize=(12, 6))
 
 
 
 
 
199
 
200
+ # First subplot: equirectangular image
201
+ ax1 = fig.add_subplot(121)
202
+ ax1.imshow(img_array)
203
+ ax1.set_title('Equirectangular Texture')
204
+ ax1.axis('off')
205
 
206
+ # Second subplot: 3D sphere with texture
207
+ ax2 = fig.add_subplot(122, projection='3d')
 
 
 
208
 
209
+ # Define a grid matching the map size, subsample along with pixels
210
+ theta = np.linspace(0, np.pi, img_array.shape[0])
211
+ phi = np.linspace(0, 2*np.pi, img_array.shape[1])
212
 
213
+ # Keep a reasonable number of points for performance
214
+ count = 100
215
+ theta_inds = np.linspace(0, img_array.shape[0] - 1, count).round().astype(int)
216
+ phi_inds = np.linspace(0, img_array.shape[1] - 1, count*2).round().astype(int)
217
+ theta = theta[theta_inds]
218
+ phi = phi[phi_inds]
219
+ img_sampled = img_array[np.ix_(theta_inds, phi_inds)]
220
 
221
+ # Create meshgrid
222
+ theta, phi = np.meshgrid(theta, phi)
223
+ R = 1
 
 
 
 
224
 
225
+ # Sphere coordinates
226
+ x = R * np.sin(theta) * np.cos(phi)
227
+ y = R * np.sin(theta) * np.sin(phi)
228
+ z = R * np.cos(theta)
 
 
229
 
230
+ # Plot the sphere with texture
231
+ ax2.plot_surface(x, y, z, facecolors=img_sampled/255, rstride=1, cstride=1)
 
232
 
233
+ # Make the plot more spherical
234
+ ax2.set_box_aspect([1, 1, 1]) # Aspect ratio is 1:1:1
235
+ ax2.set_axis_off()
236
+ ax2.set_title('3D Sphere with Texture')
237
+
238
+ # Adjust viewing angle
239
+ ax2.view_init(elev=30, azim=45)
240
 
241
+ plt.tight_layout()
242
 
243
+ # Save the figure to a temporary file
244
+ with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_file:
245
+ plt.savefig(tmp_file.name, dpi=100, bbox_inches='tight')
246
+ plt.close(fig)
247
+ return tmp_file.name
248
+
249
  except Exception as e:
250
+ print("Error creating sphere with texture:", e)
251
+ # Return a fallback image path
252
+ return None
253
 
254
  # Function to get predictions
255
  def get_predictions(audio_input):
 
269
  # Generate music based on transcription and emotion
270
  music_path = generate_music(transcribed_text, emotion_prediction)
271
 
272
+ # Create sphere with texture visualization
273
+ sphere_image_path = create_sphere_with_texture(image)
274
+
275
+ # Load the sphere image if it was created successfully
276
+ sphere_image = None
277
+ if sphere_image_path:
278
+ sphere_image = Image.open(sphere_image_path)
279
 
280
+ return emotion_prediction, transcribed_text, f"Sentiment: {sentiment} (Polarity: {polarity:.2f})", image, music_path, sphere_image
281
 
282
  # Create the Gradio interface
283
  interface = gr.Interface(
 
289
  gr.Label(label="Sentiment Analysis"),
290
  gr.Image(type='pil', label="Generated Equirectangular Image"),
291
  gr.Audio(label="Generated Music", type="filepath"),
292
+ gr.Image(type='pil', label="3D Sphere with Texture")
293
  ],
294
  title="Affective Virtual Environments",
295
+ description="Create an AVE using your voice. Get emotion prediction, transcription, sentiment analysis, a generated equirectangular image, music, and a 3D sphere with your texture applied."
296
  )
297
 
298
  interface.launch()