jfforero commited on
Commit
7b86b8d
·
verified ·
1 Parent(s): 5fefbfa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +108 -4
app.py CHANGED
@@ -13,6 +13,8 @@ import torch
13
  import scipy.io.wavfile
14
  from transformers import AutoProcessor, MusicgenForConditionalGeneration
15
  import tempfile
 
 
16
 
17
  # Load the emotion prediction model
18
  def load_emotion_model(model_path):
@@ -183,6 +185,104 @@ def generate_image(emotion_prediction, transcribed_text):
183
  # Return a fallback image
184
  return Image.new('RGB', (1024, 512), color='white')
185
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  # Function to get predictions
187
  def get_predictions(audio_input):
188
  emotion_prediction = predict_emotion_from_audio(audio_input)
@@ -201,7 +301,10 @@ def get_predictions(audio_input):
201
  # Generate music based on transcription and emotion
202
  music_path = generate_music(transcribed_text, emotion_prediction)
203
 
204
- return emotion_prediction, transcribed_text, f"Sentiment: {sentiment} (Polarity: {polarity:.2f})", image, music_path
 
 
 
205
 
206
  # Create the Gradio interface
207
  interface = gr.Interface(
@@ -211,11 +314,12 @@ interface = gr.Interface(
211
  gr.Label(label="Acoustic Prediction"),
212
  gr.Label(label="Transcribed Text"),
213
  gr.Label(label="Sentiment Analysis"),
214
- gr.Image(type='pil', label="Generated Image"),
215
- gr.Audio(label="Generated Music", type="filepath")
 
216
  ],
217
  title="Affective Virtual Environments",
218
- description="Create an AVE using your voice. Get emotion prediction, transcription, sentiment analysis, a generated image, and music."
219
  )
220
 
221
  interface.launch()
 
13
  import scipy.io.wavfile
14
  from transformers import AutoProcessor, MusicgenForConditionalGeneration
15
  import tempfile
16
+ import base64
17
+ import json
18
 
19
  # Load the emotion prediction model
20
  def load_emotion_model(model_path):
 
185
  # Return a fallback image
186
  return Image.new('RGB', (1024, 512), color='white')
187
 
188
+ # Function to create HTML for 3D sphere with texture
189
+ def create_sphere_viewer(image):
190
+ try:
191
+ # Convert PIL image to base64
192
+ buffered = BytesIO()
193
+ image.save(buffered, format="PNG")
194
+ img_str = base64.b64encode(buffered.getvalue()).decode()
195
+
196
+ # Create HTML with Three.js for 3D sphere
197
+ html_content = f"""
198
+ <!DOCTYPE html>
199
+ <html>
200
+ <head>
201
+ <style>
202
+ body {{ margin: 0; overflow: hidden; }}
203
+ canvas {{ display: block; }}
204
+ </style>
205
+ </head>
206
+ <body>
207
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/three.js/r128/three.min.js"></script>
208
+ <script>
209
+ // Set up scene
210
+ const scene = new THREE.Scene();
211
+ const camera = new THREE.PerspectiveCamera(75, window.innerWidth / window.innerHeight, 0.1, 1000);
212
+ const renderer = new THREE.WebGLRenderer();
213
+ renderer.setSize(window.innerWidth, window.innerHeight);
214
+ document.body.appendChild(renderer.domElement);
215
+
216
+ // Create sphere with texture
217
+ const geometry = new THREE.SphereGeometry(5, 60, 40);
218
+
219
+ // Convert base64 image to texture
220
+ const textureLoader = new THREE.TextureLoader();
221
+ const texture = textureLoader.load('data:image/png;base64,{img_str}');
222
+
223
+ // Flip the texture for proper equirectangular mapping
224
+ texture.wrapS = THREE.RepeatWrapping;
225
+ texture.repeat.x = -1;
226
+
227
+ const material = new THREE.MeshBasicMaterial({{
228
+ map: texture,
229
+ side: THREE.DoubleSide
230
+ }});
231
+
232
+ const sphere = new THREE.Mesh(geometry, material);
233
+ scene.add(sphere);
234
+
235
+ // Position camera
236
+ camera.position.z = 8;
237
+
238
+ // Add controls for rotation
239
+ let mouseX = 0;
240
+ let mouseY = 0;
241
+ let targetX = 0;
242
+ let targetY = 0;
243
+ const windowHalfX = window.innerWidth / 2;
244
+ const windowHalfY = window.innerHeight / 2;
245
+
246
+ document.addEventListener('mousemove', (event) => {{
247
+ mouseX = (event.clientX - windowHalfX);
248
+ mouseY = (event.clientY - windowHalfY);
249
+ }});
250
+
251
+ // Animation loop
252
+ function animate() {{
253
+ requestAnimationFrame(animate);
254
+
255
+ targetX = mouseX * 0.001;
256
+ targetY = mouseY * 0.001;
257
+
258
+ sphere.rotation.y += 0.05 * (targetX - sphere.rotation.y);
259
+ sphere.rotation.x += 0.05 * (targetY - sphere.rotation.x);
260
+
261
+ renderer.render(scene, camera);
262
+ }}
263
+
264
+ animate();
265
+
266
+ // Handle window resize
267
+ window.addEventListener('resize', () => {{
268
+ camera.aspect = window.innerWidth / window.innerHeight;
269
+ camera.updateProjectionMatrix();
270
+ renderer.setSize(window.innerWidth, window.innerHeight);
271
+ }});
272
+ </script>
273
+ </body>
274
+ </html>
275
+ """
276
+
277
+ # Save HTML to temporary file
278
+ with tempfile.NamedTemporaryFile(suffix=".html", delete=False, mode='w') as f:
279
+ f.write(html_content)
280
+ return f.name
281
+
282
+ except Exception as e:
283
+ print("Error creating sphere viewer:", e)
284
+ return None
285
+
286
  # Function to get predictions
287
  def get_predictions(audio_input):
288
  emotion_prediction = predict_emotion_from_audio(audio_input)
 
301
  # Generate music based on transcription and emotion
302
  music_path = generate_music(transcribed_text, emotion_prediction)
303
 
304
+ # Create 3D sphere viewer with the generated image as texture
305
+ sphere_html_path = create_sphere_viewer(image)
306
+
307
+ return emotion_prediction, transcribed_text, f"Sentiment: {sentiment} (Polarity: {polarity:.2f})", image, music_path, sphere_html_path
308
 
309
  # Create the Gradio interface
310
  interface = gr.Interface(
 
314
  gr.Label(label="Acoustic Prediction"),
315
  gr.Label(label="Transcribed Text"),
316
  gr.Label(label="Sentiment Analysis"),
317
+ gr.Image(type='pil', label="Generated Equirectangular Image"),
318
+ gr.Audio(label="Generated Music", type="filepath"),
319
+ gr.HTML(label="3D Sphere Viewer") # Added HTML output for 3D sphere
320
  ],
321
  title="Affective Virtual Environments",
322
+ description="Create an AVE using your voice. Get emotion prediction, transcription, sentiment analysis, a generated equirectangular image, music, and a 3D sphere viewer."
323
  )
324
 
325
  interface.launch()