Update app.py
Browse files
app.py
CHANGED
|
@@ -16,6 +16,8 @@ import tempfile
|
|
| 16 |
import base64
|
| 17 |
import plotly.graph_objects as go
|
| 18 |
from plotly.subplots import make_subplots
|
|
|
|
|
|
|
| 19 |
|
| 20 |
# Load the emotion prediction model
|
| 21 |
def load_emotion_model(model_path):
|
|
@@ -52,7 +54,7 @@ processor, music_model, device = load_musicgen_model()
|
|
| 52 |
# Function to transcribe audio
|
| 53 |
def transcribe(wav_filepath):
|
| 54 |
try:
|
| 55 |
-
segments, _ = model2.
|
| 56 |
return "".join([segment.text for segment in segments])
|
| 57 |
except Exception as e:
|
| 58 |
print("Error transcribing audio:", e)
|
|
@@ -186,66 +188,68 @@ def generate_image(emotion_prediction, transcribed_text):
|
|
| 186 |
# Return a fallback image
|
| 187 |
return Image.new('RGB', (1024, 512), color='white')
|
| 188 |
|
| 189 |
-
# Function to create a
|
| 190 |
-
def
|
| 191 |
try:
|
| 192 |
-
# Convert PIL image to numpy array
|
| 193 |
img_array = np.array(image)
|
| 194 |
|
| 195 |
-
# Create a
|
| 196 |
-
fig =
|
| 197 |
-
rows=1, cols=2,
|
| 198 |
-
subplot_titles=("Equirectangular Texture", "3D Sphere Preview"),
|
| 199 |
-
specs=[[{"type": "image"}, {"type": "scatter3d"}]],
|
| 200 |
-
horizontal_spacing=0.1
|
| 201 |
-
)
|
| 202 |
|
| 203 |
-
#
|
| 204 |
-
fig.
|
|
|
|
|
|
|
|
|
|
| 205 |
|
| 206 |
-
#
|
| 207 |
-
|
| 208 |
-
u = np.linspace(0, 2 * np.pi, 50)
|
| 209 |
-
v = np.linspace(0, np.pi, 25)
|
| 210 |
-
u, v = np.meshgrid(u, v)
|
| 211 |
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
|
| 216 |
-
#
|
| 217 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
colorscale='Viridis',
|
| 223 |
-
showscale=False,
|
| 224 |
-
opacity=0.8
|
| 225 |
-
), row=1, col=2)
|
| 226 |
|
| 227 |
-
#
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
showlegend=False
|
| 232 |
-
)
|
| 233 |
|
| 234 |
-
#
|
| 235 |
-
|
| 236 |
-
fig.update_yaxes(visible=False, row=1, col=1)
|
| 237 |
|
| 238 |
-
#
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
|
|
|
|
|
|
| 243 |
|
| 244 |
-
|
| 245 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 246 |
except Exception as e:
|
| 247 |
-
print("Error creating
|
| 248 |
-
|
|
|
|
| 249 |
|
| 250 |
# Function to get predictions
|
| 251 |
def get_predictions(audio_input):
|
|
@@ -265,10 +269,15 @@ def get_predictions(audio_input):
|
|
| 265 |
# Generate music based on transcription and emotion
|
| 266 |
music_path = generate_music(transcribed_text, emotion_prediction)
|
| 267 |
|
| 268 |
-
# Create
|
| 269 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
|
| 271 |
-
return emotion_prediction, transcribed_text, f"Sentiment: {sentiment} (Polarity: {polarity:.2f})", image, music_path,
|
| 272 |
|
| 273 |
# Create the Gradio interface
|
| 274 |
interface = gr.Interface(
|
|
@@ -280,10 +289,10 @@ interface = gr.Interface(
|
|
| 280 |
gr.Label(label="Sentiment Analysis"),
|
| 281 |
gr.Image(type='pil', label="Generated Equirectangular Image"),
|
| 282 |
gr.Audio(label="Generated Music", type="filepath"),
|
| 283 |
-
gr.
|
| 284 |
],
|
| 285 |
title="Affective Virtual Environments",
|
| 286 |
-
description="Create an AVE using your voice. Get emotion prediction, transcription, sentiment analysis, a generated equirectangular image, music, and a
|
| 287 |
)
|
| 288 |
|
| 289 |
interface.launch()
|
|
|
|
| 16 |
import base64
|
| 17 |
import plotly.graph_objects as go
|
| 18 |
from plotly.subplots import make_subplots
|
| 19 |
+
import matplotlib.pyplot as plt
|
| 20 |
+
from mpl_toolkits.mplot3d import Axes3D
|
| 21 |
|
| 22 |
# Load the emotion prediction model
|
| 23 |
def load_emotion_model(model_path):
|
|
|
|
| 54 |
# Function to transcribe audio
|
| 55 |
def transcribe(wav_filepath):
|
| 56 |
try:
|
| 57 |
+
segments, _ = model2.transscribe(wav_filepath, beam_size=5)
|
| 58 |
return "".join([segment.text for segment in segments])
|
| 59 |
except Exception as e:
|
| 60 |
print("Error transcribing audio:", e)
|
|
|
|
| 188 |
# Return a fallback image
|
| 189 |
return Image.new('RGB', (1024, 512), color='white')
|
| 190 |
|
| 191 |
+
# Function to create a sphere with the equirectangular texture using matplotlib
|
| 192 |
+
def create_sphere_with_texture(image):
|
| 193 |
try:
|
| 194 |
+
# Convert PIL image to numpy array
|
| 195 |
img_array = np.array(image)
|
| 196 |
|
| 197 |
+
# Create a figure with two subplots
|
| 198 |
+
fig = plt.figure(figsize=(12, 6))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
|
| 200 |
+
# First subplot: equirectangular image
|
| 201 |
+
ax1 = fig.add_subplot(121)
|
| 202 |
+
ax1.imshow(img_array)
|
| 203 |
+
ax1.set_title('Equirectangular Texture')
|
| 204 |
+
ax1.axis('off')
|
| 205 |
|
| 206 |
+
# Second subplot: 3D sphere with texture
|
| 207 |
+
ax2 = fig.add_subplot(122, projection='3d')
|
|
|
|
|
|
|
|
|
|
| 208 |
|
| 209 |
+
# Define a grid matching the map size, subsample along with pixels
|
| 210 |
+
theta = np.linspace(0, np.pi, img_array.shape[0])
|
| 211 |
+
phi = np.linspace(0, 2*np.pi, img_array.shape[1])
|
| 212 |
|
| 213 |
+
# Keep a reasonable number of points for performance
|
| 214 |
+
count = 100
|
| 215 |
+
theta_inds = np.linspace(0, img_array.shape[0] - 1, count).round().astype(int)
|
| 216 |
+
phi_inds = np.linspace(0, img_array.shape[1] - 1, count*2).round().astype(int)
|
| 217 |
+
theta = theta[theta_inds]
|
| 218 |
+
phi = phi[phi_inds]
|
| 219 |
+
img_sampled = img_array[np.ix_(theta_inds, phi_inds)]
|
| 220 |
|
| 221 |
+
# Create meshgrid
|
| 222 |
+
theta, phi = np.meshgrid(theta, phi)
|
| 223 |
+
R = 1
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
|
| 225 |
+
# Sphere coordinates
|
| 226 |
+
x = R * np.sin(theta) * np.cos(phi)
|
| 227 |
+
y = R * np.sin(theta) * np.sin(phi)
|
| 228 |
+
z = R * np.cos(theta)
|
|
|
|
|
|
|
| 229 |
|
| 230 |
+
# Plot the sphere with texture
|
| 231 |
+
ax2.plot_surface(x, y, z, facecolors=img_sampled/255, rstride=1, cstride=1)
|
|
|
|
| 232 |
|
| 233 |
+
# Make the plot more spherical
|
| 234 |
+
ax2.set_box_aspect([1, 1, 1]) # Aspect ratio is 1:1:1
|
| 235 |
+
ax2.set_axis_off()
|
| 236 |
+
ax2.set_title('3D Sphere with Texture')
|
| 237 |
+
|
| 238 |
+
# Adjust viewing angle
|
| 239 |
+
ax2.view_init(elev=30, azim=45)
|
| 240 |
|
| 241 |
+
plt.tight_layout()
|
| 242 |
|
| 243 |
+
# Save the figure to a temporary file
|
| 244 |
+
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_file:
|
| 245 |
+
plt.savefig(tmp_file.name, dpi=100, bbox_inches='tight')
|
| 246 |
+
plt.close(fig)
|
| 247 |
+
return tmp_file.name
|
| 248 |
+
|
| 249 |
except Exception as e:
|
| 250 |
+
print("Error creating sphere with texture:", e)
|
| 251 |
+
# Return a fallback image path
|
| 252 |
+
return None
|
| 253 |
|
| 254 |
# Function to get predictions
|
| 255 |
def get_predictions(audio_input):
|
|
|
|
| 269 |
# Generate music based on transcription and emotion
|
| 270 |
music_path = generate_music(transcribed_text, emotion_prediction)
|
| 271 |
|
| 272 |
+
# Create sphere with texture visualization
|
| 273 |
+
sphere_image_path = create_sphere_with_texture(image)
|
| 274 |
+
|
| 275 |
+
# Load the sphere image if it was created successfully
|
| 276 |
+
sphere_image = None
|
| 277 |
+
if sphere_image_path:
|
| 278 |
+
sphere_image = Image.open(sphere_image_path)
|
| 279 |
|
| 280 |
+
return emotion_prediction, transcribed_text, f"Sentiment: {sentiment} (Polarity: {polarity:.2f})", image, music_path, sphere_image
|
| 281 |
|
| 282 |
# Create the Gradio interface
|
| 283 |
interface = gr.Interface(
|
|
|
|
| 289 |
gr.Label(label="Sentiment Analysis"),
|
| 290 |
gr.Image(type='pil', label="Generated Equirectangular Image"),
|
| 291 |
gr.Audio(label="Generated Music", type="filepath"),
|
| 292 |
+
gr.Image(type='pil', label="3D Sphere with Texture")
|
| 293 |
],
|
| 294 |
title="Affective Virtual Environments",
|
| 295 |
+
description="Create an AVE using your voice. Get emotion prediction, transcription, sentiment analysis, a generated equirectangular image, music, and a 3D sphere with your texture applied."
|
| 296 |
)
|
| 297 |
|
| 298 |
interface.launch()
|