Spaces:

jfforero
/

Bello

Sleeping

App Files Files Community

jfforero commited on Sep 3, 2025

Commit

445b628

verified ·

1 Parent(s): 4ee2028

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -21

app.py CHANGED Viewed

@@ -81,7 +81,7 @@ def predict_emotion_from_audio(wav_filepath):
         if test_point is not None:
             test_point = np.reshape(test_point, newshape=(1, 40, 1))
             predictions = model.predict(test_point)
-            predicted_emotion_label = np.argmax(predictions[0])   #
             return emotions.get(predicted_emotion_label, "Unknown emotion")
         else:
             return "Error: Unable to extract features"
@@ -93,7 +93,7 @@ def predict_emotion_from_audio(wav_filepath):
 def analyze_sentiment(text):
     try:
         if not text or text.strip() == "":
-            return "No text to analyze", 0.0
         analysis = TextBlob(text)
         polarity = analysis.sentiment.polarity
@@ -108,15 +108,15 @@ def analyze_sentiment(text):
         return sentiment, polarity
     except Exception as e:
         print("Error analyzing sentiment:", e)
-        return "sentiment analysis error", 0.0
-# Function to generate music with MusicGen
 def generate_music(transcribed_text, emotion_prediction):
     try:
         if processor is None or music_model is None:
             return None
-        # Create a prompt that combines the emotion and transcription
         prompt = f"Background music that is {emotion_prediction} and represents: {transcribed_text}"
         # Limit prompt length to avoid model issues
@@ -151,14 +151,14 @@ def generate_music(transcribed_text, emotion_prediction):
 # --- DeepAI Image Generation (Text2Img) ---
 api_key = os.getenv("DeepAI_api_key")
-def generate_image(emotion_prediction, transcribed_text):
     try:
         if not api_key:
             # fallback white image if no API key
             return Image.new('RGB', (1024, 512), color='white')
-        # Create the prompt for text2img
-        prompt = f"Generate an equirectangular 360 image texture {emotion_prediction} attitude, representing the idea of: [{transcribed_text}]."
         # Make request to DeepAI text2img API
         response = requests.post(
@@ -204,7 +204,6 @@ def create_texture_and_sphere_preview(image):
         fig.add_trace(go.Image(z=img_array), row=1, col=1)
         # Create a 3D sphere for the second subplot
-        # Since we can't directly apply the texture, we'll create a colored sphere
         u = np.linspace(0, 2 * np.pi, 50)
         v = np.linspace(0, np.pi, 25)
         u, v = np.meshgrid(u, v)
@@ -218,7 +217,7 @@ def create_texture_and_sphere_preview(image):
         fig.add_trace(go.Surface(
             x=x, y=y, z=z,
-            surfacecolor=z,  # Use z-coordinate for color
             colorscale='Viridis',
             showscale=False,
             opacity=0.8
@@ -249,20 +248,19 @@ def create_texture_and_sphere_preview(image):
 # Function to get predictions
 def get_predictions(audio_input):
     emotion_prediction = predict_emotion_from_audio(audio_input)
-    transcribed_text = transcribe(audio_input)
-    # Handle case where emotion_prediction might be None
-    if emotion_prediction is None:
-        emotion_prediction = "Unknown"
-    # Analyze sentiment of transcribed text
     sentiment, polarity = analyze_sentiment(transcribed_text)
-    # Generate image using text2img
-    image = generate_image(emotion_prediction, transcribed_text)
-    # Generate music based on transcription and emotion
     music_path = generate_music(transcribed_text, emotion_prediction)
     # Create visualization with both texture and sphere
@@ -275,15 +273,15 @@ interface = gr.Interface(
     fn=get_predictions,
     inputs=gr.Audio(label="Input Audio", type="filepath", sources=["microphone"]),
     outputs=[
-        gr.Label(label="Acoustic Prediction"),
         gr.Label(label="Transcribed Text"),
-        gr.Label(label="Sentiment Analysis"),
         gr.Image(type='pil', label="Generated Equirectangular Image"),
         gr.Audio(label="Generated Music", type="filepath"),
         gr.Plot(label="Texture and Sphere Preview")
     ],
     title="Affective Virtual Environments",
-    description="Create an AVE using your voice. Get emotion prediction, transcription, sentiment analysis, a generated equirectangular image, music, and a preview of how it would look as a texture on a sphere."
 )
 interface.launch()

         if test_point is not None:
             test_point = np.reshape(test_point, newshape=(1, 40, 1))
             predictions = model.predict(test_point)
+            predicted_emotion_label = np.argmax(predictions[0])
             return emotions.get(predicted_emotion_label, "Unknown emotion")
         else:
             return "Error: Unable to extract features"
 def analyze_sentiment(text):
     try:
         if not text or text.strip() == "":
+            return "neutral", 0.0
         analysis = TextBlob(text)
         polarity = analysis.sentiment.polarity
         return sentiment, polarity
     except Exception as e:
         print("Error analyzing sentiment:", e)
+        return "neutral", 0.0
+# Function to generate music with MusicGen (using acoustic emotion prediction)
 def generate_music(transcribed_text, emotion_prediction):
     try:
         if processor is None or music_model is None:
             return None
+        # Create a prompt that combines the acoustic emotion and transcription
         prompt = f"Background music that is {emotion_prediction} and represents: {transcribed_text}"
         # Limit prompt length to avoid model issues
 # --- DeepAI Image Generation (Text2Img) ---
 api_key = os.getenv("DeepAI_api_key")
+def generate_image(sentiment_prediction, transcribed_text):
     try:
         if not api_key:
             # fallback white image if no API key
             return Image.new('RGB', (1024, 512), color='white')
+        # Create the prompt for text2img using SENTIMENT analysis instead of acoustic emotion
+        prompt = f"Generate an equirectangular 360 image texture with {sentiment_prediction} sentiment, representing the idea of: [{transcribed_text}]."
         # Make request to DeepAI text2img API
         response = requests.post(
         fig.add_trace(go.Image(z=img_array), row=1, col=1)
         # Create a 3D sphere for the second subplot
         u = np.linspace(0, 2 * np.pi, 50)
         v = np.linspace(0, np.pi, 25)
         u, v = np.meshgrid(u, v)
         fig.add_trace(go.Surface(
             x=x, y=y, z=z,
+            surfacecolor=z,
             colorscale='Viridis',
             showscale=False,
             opacity=0.8
 # Function to get predictions
 def get_predictions(audio_input):
+    # Get acoustic emotion prediction (for music)
     emotion_prediction = predict_emotion_from_audio(audio_input)
+    # Get transcribed text
+    transcribed_text = transcribe(audio_input)
+    # Analyze sentiment of transcribed text (for image)
     sentiment, polarity = analyze_sentiment(transcribed_text)
+    # Generate image using SENTIMENT analysis
+    image = generate_image(sentiment, transcribed_text)
+    # Generate music using ACOUSTIC EMOTION prediction
     music_path = generate_music(transcribed_text, emotion_prediction)
     # Create visualization with both texture and sphere
     fn=get_predictions,
     inputs=gr.Audio(label="Input Audio", type="filepath", sources=["microphone"]),
     outputs=[
+        gr.Label(label="Acoustic Emotion Prediction (for music)"),
         gr.Label(label="Transcribed Text"),
+        gr.Label(label="Sentiment Analysis (for image)"),
         gr.Image(type='pil', label="Generated Equirectangular Image"),
         gr.Audio(label="Generated Music", type="filepath"),
         gr.Plot(label="Texture and Sphere Preview")
     ],
     title="Affective Virtual Environments",
+    description="Create an AVE using your voice. Get emotion prediction (for music), transcription, sentiment analysis (for image), a generated equirectangular image, music, and a preview of how it would look as a texture on a sphere."
 )
 interface.launch()