Spaces:

jfforero
/

Bello

Sleeping

App Files Files Community

jfforero commited on Sep 4, 2025

Commit

fafe874

verified ·

1 Parent(s): 3703a9e

Update app.py

Browse files

Files changed (1) hide show

app.py +75 -109

app.py CHANGED Viewed

@@ -19,11 +19,6 @@ import base64
 import plotly.graph_objects as go
 from plotly.subplots import make_subplots
 # Load the emotion prediction model
 def load_emotion_model(model_path):
     try:
@@ -189,7 +184,7 @@ def generate_image(sentiment_prediction, transcribed_text):
     try:
         if not api_key:
             # fallback white image if no API key
-            return Image.new('RGB', (1024, 512), color='white')
         # Get specific prompt based on sentiment
         prompt = get_image_prompt(sentiment_prediction, transcribed_text)
@@ -199,8 +194,8 @@ def generate_image(sentiment_prediction, transcribed_text):
             "https://api.deepai.org/api/text2img",
             data={
                 'text': prompt,
-                'width': 1024,
-                'height': 512,
                 'image_generator_version': 'hd'
             },
             headers={'api-key': api_key}
@@ -214,109 +209,44 @@ def generate_image(sentiment_prediction, transcribed_text):
         else:
             print("Error in DeepAI response:", data)
             # Return a fallback image
-            return Image.new('RGB', (1024, 512), color='white')
     except Exception as e:
         print("Error generating image:", e)
         # Return a fallback image
-        return Image.new('RGB', (1024, 512), color='white')
-# Function to create a visualization with both the equirectangular image and a 3D sphere
-# Function to create a visualization with both the equirectangular image and a 3D sphere
-def create_texture_and_sphere_preview(image):
     try:
-        # Convert PIL image to numpy array
-        img_array = np.array(image)
-        height, width = img_array.shape[0], img_array.shape[1]
-        # Create a subplot with the equirectangular image and a 3D sphere
-        fig = make_subplots(
-            rows=1, cols=2,
-            subplot_titles=("Equirectangular Texture", "3D Sphere with Texture Mapping"),
-            specs=[[{"type": "image"}, {"type": "scatter3d"}]],
-            horizontal_spacing=0.1
-        )
-        # Add the equirectangular image to the first subplot
-        fig.add_trace(go.Image(z=img_array), row=1, col=1)
-        # Create sphere coordinates
-        u_res, v_res = 50, 25
-        u = np.linspace(0, 2 * np.pi, u_res)
-        v = np.linspace(0, np.pi, v_res)
-        u, v = np.meshgrid(u, v)
-        # Convert spherical coordinates to Cartesian coordinates
-        x = np.sin(v) * np.cos(u)
-        y = np.sin(v) * np.sin(u)
-        z = np.cos(v)
-        # Sample colors from the equirectangular image based on UV coordinates
-        # This approximates texture mapping by sampling the image at the correct UV coordinates
-        texture_colors = np.zeros((v_res, u_res, 3), dtype=np.uint8)
-        for i in range(v_res):
-            for j in range(u_res):
-                # Convert spherical coordinates to image coordinates
-                img_x = int((u[i, j] / (2 * np.pi)) * (width - 1))
-                img_y = int((v[i, j] / np.pi) * (height - 1))
-                # Ensure coordinates are within bounds
-                img_x = max(0, min(img_x, width - 1))
-                img_y = max(0, min(img_y, height - 1))
-                # Get color from image
-                if len(img_array.shape) == 3:  # RGB image
-                    texture_colors[i, j] = img_array[img_y, img_x, :3]
-                else:  # Grayscale image
-                    texture_colors[i, j] = [img_array[img_y, img_x]] * 3
-        # Convert colors to Plotly format (normalized to [0,1])
-        surface_colors = texture_colors.astype(float) / 255.0
-        # Create surface with sampled colors
-        fig.add_trace(go.Surface(
-            x=x, y=y, z=z,
-            surfacecolor=surface_colors,
-            showscale=False,
-            opacity=1.0,
-            lighting=dict(ambient=0.8, diffuse=0.8, specular=0.1, roughness=0.5),
-            lightposition=dict(x=100, y=100, z=100)
-        ), row=1, col=2)
-        # Update layout
-        fig.update_layout(
-            height=500,
-            title_text="Equirectangular Texture and 3D Sphere Preview",
-            showlegend=False,
-            scene2=dict(
-                xaxis=dict(visible=False, showticklabels=False),
-                yaxis=dict(visible=False, showticklabels=False),
-                zaxis=dict(visible=False, showticklabels=False),
-                aspectmode='data',
-                camera=dict(
-                    eye=dict(x=1.8, y=1.8, z=1.8)
-                ),
-                bgcolor='rgba(0,0,0,0)'
-            )
-        )
-        # Update axes for the image subplot
-        fig.update_xaxes(visible=False, row=1, col=1)
-        fig.update_yaxes(visible=False, row=1, col=1)
-        return fig
     except Exception as e:
-        print("Error creating texture and sphere preview:", e)
-        return go.Figure()
-# Function to get predictions
-def get_predictions(audio_input):
     # Get acoustic emotion prediction (for music)
-    emotion_prediction = predict_emotion_from_audio(audio_input)
     # Get transcribed text
-    transcribed_text = transcribe(audio_input)
     # Analyze sentiment of transcribed text (for image)
     sentiment, polarity = analyze_sentiment(transcribed_text)
@@ -327,25 +257,61 @@ def get_predictions(audio_input):
     # Generate music using ACOUSTIC EMOTION prediction with specific prompt
     music_path = generate_music(transcribed_text, emotion_prediction)
-    # Create visualization with both texture and sphere
-    preview_fig = create_texture_and_sphere_preview(image)
-    return emotion_prediction, transcribed_text, f"Sentiment: {sentiment} (Polarity: {polarity:.2f})", image, music_path, preview_fig
 # Create the Gradio interface
 interface = gr.Interface(
     fn=get_predictions,
     inputs=gr.Audio(label="Input Audio", type="filepath", sources=["microphone"]),
     outputs=[
-        gr.Label(label="Acoustic Emotion Prediction (for music)"),
-        gr.Label(label="Transcribed Text"),
-        gr.Label(label="Sentiment Analysis (for image)"),
-        gr.Image(type='pil', label="Generated Equirectangular Image"),
-        gr.Audio(label="Generated Music", type="filepath"),
-        gr.Plot(label="Texture and Sphere Preview")
     ],
-    title="Affective Virtual Environments",
-    description="Create an AVE using your voice. Get emotion prediction (for music), transcription, sentiment analysis (for image), a generated equirectangular image, music, and a preview of how it would look as a texture on a sphere."
 )
 interface.launch()

 import plotly.graph_objects as go
 from plotly.subplots import make_subplots
 # Load the emotion prediction model
 def load_emotion_model(model_path):
     try:
     try:
         if not api_key:
             # fallback white image if no API key
+            return Image.new('RGB', (512, 258), color='white')
         # Get specific prompt based on sentiment
         prompt = get_image_prompt(sentiment_prediction, transcribed_text)
             "https://api.deepai.org/api/text2img",
             data={
                 'text': prompt,
+                'width': 512,
+                'height': 258,
                 'image_generator_version': 'hd'
             },
             headers={'api-key': api_key}
         else:
             print("Error in DeepAI response:", data)
             # Return a fallback image
+            return Image.new('RGB', (512, 258), color='white')
     except Exception as e:
         print("Error generating image:", e)
         # Return a fallback image
+        return Image.new('RGB', (512, 258), color='white')
+# Function to split audio into chunks
+def split_audio_into_chunks(audio_path, chunk_length=5):
+    """Split audio into chunks of specified length in seconds"""
     try:
+        # Load audio file
+        y, sr = librosa.load(audio_path, sr=None)
+        # Calculate number of samples per chunk
+        samples_per_chunk = chunk_length * sr
+        # Split into chunks
+        chunks = []
+        for i in range(0, len(y), samples_per_chunk):
+            chunk = y[i:i + samples_per_chunk]
+            if len(chunk) >= sr:  # Ensure chunk has at least 1 second of audio
+                # Save chunk to temporary file
+                with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
+                    scipy.io.wavfile.write(tmp_file.name, sr, chunk)
+                    chunks.append(tmp_file.name)
+        return chunks
     except Exception as e:
+        print("Error splitting audio:", e)
+        return []
+# Function to process a single chunk
+def process_chunk(chunk_path):
     # Get acoustic emotion prediction (for music)
+    emotion_prediction = predict_emotion_from_audio(chunk_path)
     # Get transcribed text
+    transcribed_text = transcribe(chunk_path)
     # Analyze sentiment of transcribed text (for image)
     sentiment, polarity = analyze_sentiment(transcribed_text)
     # Generate music using ACOUSTIC EMOTION prediction with specific prompt
     music_path = generate_music(transcribed_text, emotion_prediction)
+    return {
+        "emotion": emotion_prediction,
+        "transcription": transcribed_text,
+        "sentiment": f"Sentiment: {sentiment} (Polarity: {polarity:.2f})",
+        "image": image,
+        "music": music_path
+    }
+# Function to get predictions for all chunks
+def get_predictions(audio_input):
+    # Split audio into 5-second chunks
+    chunks = split_audio_into_chunks(audio_input, chunk_length=5)
+    if not chunks:
+        return "Error: Could not split audio into chunks", "", "", None, None
+    # Process each chunk
+    results = []
+    for i, chunk_path in enumerate(chunks):
+        print(f"Processing chunk {i+1}/{len(chunks)}")
+        result = process_chunk(chunk_path)
+        results.append(result)
+    # Prepare outputs for Gradio
+    emotion_outputs = [f"Chunk {i+1}: {r['emotion']}" for i, r in enumerate(results)]
+    transcription_outputs = [f"Chunk {i+1}: {r['transcription']}" for i, r in enumerate(results)]
+    sentiment_outputs = [f"Chunk {i+1}: {r['sentiment']}" for i, r in enumerate(results)]
+    # Combine all outputs into strings
+    emotion_str = "\n".join(emotion_outputs)
+    transcription_str = "\n".join(transcription_outputs)
+    sentiment_str = "\n".join(sentiment_outputs)
+    # Create a gallery of images
+    images = [r["image"] for r in results]
+    # Return first music file for demo (Gradio can only display one audio file)
+    # In a real application, you might want to combine all music chunks
+    music_path = results[0]["music"] if results[0]["music"] else None
+    return emotion_str, transcription_str, sentiment_str, images, music_path
 # Create the Gradio interface
 interface = gr.Interface(
     fn=get_predictions,
     inputs=gr.Audio(label="Input Audio", type="filepath", sources=["microphone"]),
     outputs=[
+        gr.Textbox(label="Acoustic Emotion Predictions (for music)", lines=5),
+        gr.Textbox(label="Transcribed Texts", lines=5),
+        gr.Textbox(label="Sentiment Analyses (for image)", lines=5),
+        gr.Gallery(label="Generated Equirectangular Images", columns=2),
+        gr.Audio(label="Generated Music (First Chunk)", type="filepath")
     ],
+    title="Affective Virtual Environments - Chunked Processing",
+    description="Process audio in 5-second chunks. Get emotion predictions, transcriptions, sentiment analyses, generated equirectangular images, and music for each chunk."
 )
 interface.launch()