Update app.py
Browse files
app.py
CHANGED
|
@@ -13,6 +13,8 @@ import torch
|
|
| 13 |
import scipy.io.wavfile
|
| 14 |
from transformers import AutoProcessor, MusicgenForConditionalGeneration
|
| 15 |
import tempfile
|
|
|
|
|
|
|
| 16 |
|
| 17 |
# Load the emotion prediction model
|
| 18 |
def load_emotion_model(model_path):
|
|
@@ -183,6 +185,104 @@ def generate_image(emotion_prediction, transcribed_text):
|
|
| 183 |
# Return a fallback image
|
| 184 |
return Image.new('RGB', (1024, 512), color='white')
|
| 185 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
# Function to get predictions
|
| 187 |
def get_predictions(audio_input):
|
| 188 |
emotion_prediction = predict_emotion_from_audio(audio_input)
|
|
@@ -201,7 +301,10 @@ def get_predictions(audio_input):
|
|
| 201 |
# Generate music based on transcription and emotion
|
| 202 |
music_path = generate_music(transcribed_text, emotion_prediction)
|
| 203 |
|
| 204 |
-
|
|
|
|
|
|
|
|
|
|
| 205 |
|
| 206 |
# Create the Gradio interface
|
| 207 |
interface = gr.Interface(
|
|
@@ -211,11 +314,12 @@ interface = gr.Interface(
|
|
| 211 |
gr.Label(label="Acoustic Prediction"),
|
| 212 |
gr.Label(label="Transcribed Text"),
|
| 213 |
gr.Label(label="Sentiment Analysis"),
|
| 214 |
-
gr.Image(type='pil', label="Generated Image"),
|
| 215 |
-
gr.Audio(label="Generated Music", type="filepath")
|
|
|
|
| 216 |
],
|
| 217 |
title="Affective Virtual Environments",
|
| 218 |
-
description="Create an AVE using your voice. Get emotion prediction, transcription, sentiment analysis, a generated image, and
|
| 219 |
)
|
| 220 |
|
| 221 |
interface.launch()
|
|
|
|
| 13 |
import scipy.io.wavfile
|
| 14 |
from transformers import AutoProcessor, MusicgenForConditionalGeneration
|
| 15 |
import tempfile
|
| 16 |
+
import base64
|
| 17 |
+
import json
|
| 18 |
|
| 19 |
# Load the emotion prediction model
|
| 20 |
def load_emotion_model(model_path):
|
|
|
|
| 185 |
# Return a fallback image
|
| 186 |
return Image.new('RGB', (1024, 512), color='white')
|
| 187 |
|
| 188 |
+
# Function to create HTML for 3D sphere with texture
|
| 189 |
+
def create_sphere_viewer(image):
|
| 190 |
+
try:
|
| 191 |
+
# Convert PIL image to base64
|
| 192 |
+
buffered = BytesIO()
|
| 193 |
+
image.save(buffered, format="PNG")
|
| 194 |
+
img_str = base64.b64encode(buffered.getvalue()).decode()
|
| 195 |
+
|
| 196 |
+
# Create HTML with Three.js for 3D sphere
|
| 197 |
+
html_content = f"""
|
| 198 |
+
<!DOCTYPE html>
|
| 199 |
+
<html>
|
| 200 |
+
<head>
|
| 201 |
+
<style>
|
| 202 |
+
body {{ margin: 0; overflow: hidden; }}
|
| 203 |
+
canvas {{ display: block; }}
|
| 204 |
+
</style>
|
| 205 |
+
</head>
|
| 206 |
+
<body>
|
| 207 |
+
<script src="https://cdnjs.cloudflare.com/ajax/libs/three.js/r128/three.min.js"></script>
|
| 208 |
+
<script>
|
| 209 |
+
// Set up scene
|
| 210 |
+
const scene = new THREE.Scene();
|
| 211 |
+
const camera = new THREE.PerspectiveCamera(75, window.innerWidth / window.innerHeight, 0.1, 1000);
|
| 212 |
+
const renderer = new THREE.WebGLRenderer();
|
| 213 |
+
renderer.setSize(window.innerWidth, window.innerHeight);
|
| 214 |
+
document.body.appendChild(renderer.domElement);
|
| 215 |
+
|
| 216 |
+
// Create sphere with texture
|
| 217 |
+
const geometry = new THREE.SphereGeometry(5, 60, 40);
|
| 218 |
+
|
| 219 |
+
// Convert base64 image to texture
|
| 220 |
+
const textureLoader = new THREE.TextureLoader();
|
| 221 |
+
const texture = textureLoader.load('data:image/png;base64,{img_str}');
|
| 222 |
+
|
| 223 |
+
// Flip the texture for proper equirectangular mapping
|
| 224 |
+
texture.wrapS = THREE.RepeatWrapping;
|
| 225 |
+
texture.repeat.x = -1;
|
| 226 |
+
|
| 227 |
+
const material = new THREE.MeshBasicMaterial({{
|
| 228 |
+
map: texture,
|
| 229 |
+
side: THREE.DoubleSide
|
| 230 |
+
}});
|
| 231 |
+
|
| 232 |
+
const sphere = new THREE.Mesh(geometry, material);
|
| 233 |
+
scene.add(sphere);
|
| 234 |
+
|
| 235 |
+
// Position camera
|
| 236 |
+
camera.position.z = 8;
|
| 237 |
+
|
| 238 |
+
// Add controls for rotation
|
| 239 |
+
let mouseX = 0;
|
| 240 |
+
let mouseY = 0;
|
| 241 |
+
let targetX = 0;
|
| 242 |
+
let targetY = 0;
|
| 243 |
+
const windowHalfX = window.innerWidth / 2;
|
| 244 |
+
const windowHalfY = window.innerHeight / 2;
|
| 245 |
+
|
| 246 |
+
document.addEventListener('mousemove', (event) => {{
|
| 247 |
+
mouseX = (event.clientX - windowHalfX);
|
| 248 |
+
mouseY = (event.clientY - windowHalfY);
|
| 249 |
+
}});
|
| 250 |
+
|
| 251 |
+
// Animation loop
|
| 252 |
+
function animate() {{
|
| 253 |
+
requestAnimationFrame(animate);
|
| 254 |
+
|
| 255 |
+
targetX = mouseX * 0.001;
|
| 256 |
+
targetY = mouseY * 0.001;
|
| 257 |
+
|
| 258 |
+
sphere.rotation.y += 0.05 * (targetX - sphere.rotation.y);
|
| 259 |
+
sphere.rotation.x += 0.05 * (targetY - sphere.rotation.x);
|
| 260 |
+
|
| 261 |
+
renderer.render(scene, camera);
|
| 262 |
+
}}
|
| 263 |
+
|
| 264 |
+
animate();
|
| 265 |
+
|
| 266 |
+
// Handle window resize
|
| 267 |
+
window.addEventListener('resize', () => {{
|
| 268 |
+
camera.aspect = window.innerWidth / window.innerHeight;
|
| 269 |
+
camera.updateProjectionMatrix();
|
| 270 |
+
renderer.setSize(window.innerWidth, window.innerHeight);
|
| 271 |
+
}});
|
| 272 |
+
</script>
|
| 273 |
+
</body>
|
| 274 |
+
</html>
|
| 275 |
+
"""
|
| 276 |
+
|
| 277 |
+
# Save HTML to temporary file
|
| 278 |
+
with tempfile.NamedTemporaryFile(suffix=".html", delete=False, mode='w') as f:
|
| 279 |
+
f.write(html_content)
|
| 280 |
+
return f.name
|
| 281 |
+
|
| 282 |
+
except Exception as e:
|
| 283 |
+
print("Error creating sphere viewer:", e)
|
| 284 |
+
return None
|
| 285 |
+
|
| 286 |
# Function to get predictions
|
| 287 |
def get_predictions(audio_input):
|
| 288 |
emotion_prediction = predict_emotion_from_audio(audio_input)
|
|
|
|
| 301 |
# Generate music based on transcription and emotion
|
| 302 |
music_path = generate_music(transcribed_text, emotion_prediction)
|
| 303 |
|
| 304 |
+
# Create 3D sphere viewer with the generated image as texture
|
| 305 |
+
sphere_html_path = create_sphere_viewer(image)
|
| 306 |
+
|
| 307 |
+
return emotion_prediction, transcribed_text, f"Sentiment: {sentiment} (Polarity: {polarity:.2f})", image, music_path, sphere_html_path
|
| 308 |
|
| 309 |
# Create the Gradio interface
|
| 310 |
interface = gr.Interface(
|
|
|
|
| 314 |
gr.Label(label="Acoustic Prediction"),
|
| 315 |
gr.Label(label="Transcribed Text"),
|
| 316 |
gr.Label(label="Sentiment Analysis"),
|
| 317 |
+
gr.Image(type='pil', label="Generated Equirectangular Image"),
|
| 318 |
+
gr.Audio(label="Generated Music", type="filepath"),
|
| 319 |
+
gr.HTML(label="3D Sphere Viewer") # Added HTML output for 3D sphere
|
| 320 |
],
|
| 321 |
title="Affective Virtual Environments",
|
| 322 |
+
description="Create an AVE using your voice. Get emotion prediction, transcription, sentiment analysis, a generated equirectangular image, music, and a 3D sphere viewer."
|
| 323 |
)
|
| 324 |
|
| 325 |
interface.launch()
|