Spaces:

jfforero
/

Bello

Sleeping

App Files Files Community

jfforero commited on Sep 4, 2025

Commit

de6513b

verified ·

1 Parent(s): fafe874

Update app.py

Browse files

Files changed (1) hide show

app.py +110 -76

app.py CHANGED Viewed

@@ -19,6 +19,11 @@ import base64
 import plotly.graph_objects as go
 from plotly.subplots import make_subplots
 # Load the emotion prediction model
 def load_emotion_model(model_path):
     try:
@@ -184,7 +189,7 @@ def generate_image(sentiment_prediction, transcribed_text):
     try:
         if not api_key:
             # fallback white image if no API key
-            return Image.new('RGB', (512, 258), color='white')
         # Get specific prompt based on sentiment
         prompt = get_image_prompt(sentiment_prediction, transcribed_text)
@@ -194,8 +199,8 @@ def generate_image(sentiment_prediction, transcribed_text):
             "https://api.deepai.org/api/text2img",
             data={
                 'text': prompt,
-                'width': 512,
-                'height': 258,
                 'image_generator_version': 'hd'
             },
             headers={'api-key': api_key}
@@ -209,44 +214,109 @@ def generate_image(sentiment_prediction, transcribed_text):
         else:
             print("Error in DeepAI response:", data)
             # Return a fallback image
-            return Image.new('RGB', (512, 258), color='white')
     except Exception as e:
         print("Error generating image:", e)
         # Return a fallback image
-        return Image.new('RGB', (512, 258), color='white')
-# Function to split audio into chunks
-def split_audio_into_chunks(audio_path, chunk_length=5):
-    """Split audio into chunks of specified length in seconds"""
     try:
-        # Load audio file
-        y, sr = librosa.load(audio_path, sr=None)
-        # Calculate number of samples per chunk
-        samples_per_chunk = chunk_length * sr
-        # Split into chunks
-        chunks = []
-        for i in range(0, len(y), samples_per_chunk):
-            chunk = y[i:i + samples_per_chunk]
-            if len(chunk) >= sr:  # Ensure chunk has at least 1 second of audio
-                # Save chunk to temporary file
-                with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
-                    scipy.io.wavfile.write(tmp_file.name, sr, chunk)
-                    chunks.append(tmp_file.name)
-        return chunks
     except Exception as e:
-        print("Error splitting audio:", e)
-        return []
-# Function to process a single chunk
-def process_chunk(chunk_path):
     # Get acoustic emotion prediction (for music)
-    emotion_prediction = predict_emotion_from_audio(chunk_path)
     # Get transcribed text
-    transcribed_text = transcribe(chunk_path)
     # Analyze sentiment of transcribed text (for image)
     sentiment, polarity = analyze_sentiment(transcribed_text)
@@ -257,61 +327,25 @@ def process_chunk(chunk_path):
     # Generate music using ACOUSTIC EMOTION prediction with specific prompt
     music_path = generate_music(transcribed_text, emotion_prediction)
-    return {
-        "emotion": emotion_prediction,
-        "transcription": transcribed_text,
-        "sentiment": f"Sentiment: {sentiment} (Polarity: {polarity:.2f})",
-        "image": image,
-        "music": music_path
-    }
-# Function to get predictions for all chunks
-def get_predictions(audio_input):
-    # Split audio into 5-second chunks
-    chunks = split_audio_into_chunks(audio_input, chunk_length=5)
-    if not chunks:
-        return "Error: Could not split audio into chunks", "", "", None, None
-    # Process each chunk
-    results = []
-    for i, chunk_path in enumerate(chunks):
-        print(f"Processing chunk {i+1}/{len(chunks)}")
-        result = process_chunk(chunk_path)
-        results.append(result)
-    # Prepare outputs for Gradio
-    emotion_outputs = [f"Chunk {i+1}: {r['emotion']}" for i, r in enumerate(results)]
-    transcription_outputs = [f"Chunk {i+1}: {r['transcription']}" for i, r in enumerate(results)]
-    sentiment_outputs = [f"Chunk {i+1}: {r['sentiment']}" for i, r in enumerate(results)]
-    # Combine all outputs into strings
-    emotion_str = "\n".join(emotion_outputs)
-    transcription_str = "\n".join(transcription_outputs)
-    sentiment_str = "\n".join(sentiment_outputs)
-    # Create a gallery of images
-    images = [r["image"] for r in results]
-    # Return first music file for demo (Gradio can only display one audio file)
-    # In a real application, you might want to combine all music chunks
-    music_path = results[0]["music"] if results[0]["music"] else None
-    return emotion_str, transcription_str, sentiment_str, images, music_path
 # Create the Gradio interface
 interface = gr.Interface(
     fn=get_predictions,
     inputs=gr.Audio(label="Input Audio", type="filepath", sources=["microphone"]),
     outputs=[
-        gr.Textbox(label="Acoustic Emotion Predictions (for music)", lines=5),
-        gr.Textbox(label="Transcribed Texts", lines=5),
-        gr.Textbox(label="Sentiment Analyses (for image)", lines=5),
-        gr.Gallery(label="Generated Equirectangular Images", columns=2),
-        gr.Audio(label="Generated Music (First Chunk)", type="filepath")
     ],
-    title="Affective Virtual Environments - Chunked Processing",
-    description="Process audio in 5-second chunks. Get emotion predictions, transcriptions, sentiment analyses, generated equirectangular images, and music for each chunk."
 )
-interface.launch()

 import plotly.graph_objects as go
 from plotly.subplots import make_subplots
 # Load the emotion prediction model
 def load_emotion_model(model_path):
     try:
     try:
         if not api_key:
             # fallback white image if no API key
+            return Image.new('RGB', (1024, 512), color='white')
         # Get specific prompt based on sentiment
         prompt = get_image_prompt(sentiment_prediction, transcribed_text)
             "https://api.deepai.org/api/text2img",
             data={
                 'text': prompt,
+                'width': 1024,
+                'height': 512,
                 'image_generator_version': 'hd'
             },
             headers={'api-key': api_key}
         else:
             print("Error in DeepAI response:", data)
             # Return a fallback image
+            return Image.new('RGB', (1024, 512), color='white')
     except Exception as e:
         print("Error generating image:", e)
         # Return a fallback image
+        return Image.new('RGB', (1024, 512), color='white')
+# Function to create a visualization with both the equirectangular image and a 3D sphere
+# Function to create a visualization with both the equirectangular image and a 3D sphere
+def create_texture_and_sphere_preview(image):
     try:
+        # Convert PIL image to numpy array
+        img_array = np.array(image)
+        height, width = img_array.shape[0], img_array.shape[1]
+        # Create a subplot with the equirectangular image and a 3D sphere
+        fig = make_subplots(
+            rows=1, cols=2,
+            subplot_titles=("Equirectangular Texture", "3D Sphere with Texture Mapping"),
+            specs=[[{"type": "image"}, {"type": "scatter3d"}]],
+            horizontal_spacing=0.1
+        )
+        # Add the equirectangular image to the first subplot
+        fig.add_trace(go.Image(z=img_array), row=1, col=1)
+        # Create sphere coordinates
+        u_res, v_res = 50, 25
+        u = np.linspace(0, 2 * np.pi, u_res)
+        v = np.linspace(0, np.pi, v_res)
+        u, v = np.meshgrid(u, v)
+        # Convert spherical coordinates to Cartesian coordinates
+        x = np.sin(v) * np.cos(u)
+        y = np.sin(v) * np.sin(u)
+        z = np.cos(v)
+        # Sample colors from the equirectangular image based on UV coordinates
+        # This approximates texture mapping by sampling the image at the correct UV coordinates
+        texture_colors = np.zeros((v_res, u_res, 3), dtype=np.uint8)
+        for i in range(v_res):
+            for j in range(u_res):
+                # Convert spherical coordinates to image coordinates
+                img_x = int((u[i, j] / (2 * np.pi)) * (width - 1))
+                img_y = int((v[i, j] / np.pi) * (height - 1))
+                # Ensure coordinates are within bounds
+                img_x = max(0, min(img_x, width - 1))
+                img_y = max(0, min(img_y, height - 1))
+                # Get color from image
+                if len(img_array.shape) == 3:  # RGB image
+                    texture_colors[i, j] = img_array[img_y, img_x, :3]
+                else:  # Grayscale image
+                    texture_colors[i, j] = [img_array[img_y, img_x]] * 3
+        # Convert colors to Plotly format (normalized to [0,1])
+        surface_colors = texture_colors.astype(float) / 255.0
+        # Create surface with sampled colors
+        fig.add_trace(go.Surface(
+            x=x, y=y, z=z,
+            surfacecolor=surface_colors,
+            showscale=False,
+            opacity=1.0,
+            lighting=dict(ambient=0.8, diffuse=0.8, specular=0.1, roughness=0.5),
+            lightposition=dict(x=100, y=100, z=100)
+        ), row=1, col=2)
+        # Update layout
+        fig.update_layout(
+            height=500,
+            title_text="Equirectangular Texture and 3D Sphere Preview",
+            showlegend=False,
+            scene2=dict(
+                xaxis=dict(visible=False, showticklabels=False),
+                yaxis=dict(visible=False, showticklabels=False),
+                zaxis=dict(visible=False, showticklabels=False),
+                aspectmode='data',
+                camera=dict(
+                    eye=dict(x=1.8, y=1.8, z=1.8)
+                ),
+                bgcolor='rgba(0,0,0,0)'
+            )
+        )
+        # Update axes for the image subplot
+        fig.update_xaxes(visible=False, row=1, col=1)
+        fig.update_yaxes(visible=False, row=1, col=1)
+        return fig
     except Exception as e:
+        print("Error creating texture and sphere preview:", e)
+        return go.Figure()
+# Function to get predictions
+def get_predictions(audio_input):
     # Get acoustic emotion prediction (for music)
+    emotion_prediction = predict_emotion_from_audio(audio_input)
     # Get transcribed text
+    transcribed_text = transcribe(audio_input)
     # Analyze sentiment of transcribed text (for image)
     sentiment, polarity = analyze_sentiment(transcribed_text)
     # Generate music using ACOUSTIC EMOTION prediction with specific prompt
     music_path = generate_music(transcribed_text, emotion_prediction)
+    # Create visualization with both texture and sphere
+    preview_fig = create_texture_and_sphere_preview(image)
+    return emotion_prediction, transcribed_text, f"Sentiment: {sentiment} (Polarity: {polarity:.2f})", image, music_path, preview_fig
 # Create the Gradio interface
 interface = gr.Interface(
     fn=get_predictions,
     inputs=gr.Audio(label="Input Audio", type="filepath", sources=["microphone"]),
     outputs=[
+        gr.Label(label="Acoustic Emotion Prediction (for music)"),
+        gr.Label(label="Transcribed Text"),
+        gr.Label(label="Sentiment Analysis (for image)"),
+        gr.Image(type='pil', label="Generated Equirectangular Image"),
+        gr.Audio(label="Generated Music", type="filepath"),
+        gr.Plot(label="Texture and Sphere Preview")
     ],
+    title="Affective Virtual Environments",
+    description="Create an AVE using your voice. Get emotion prediction (for music), transcription, sentiment analysis (for image), a generated equirectangular image, music, and a preview of how it would look as a texture on a sphere."
 )
+interface.launch()"