Spaces:

jfforero
/

Speech2Scene3

Sleeping

App Files Files Community

jfforero commited on Sep 6, 2025

Commit

f3b54b2

verified ·

1 Parent(s): 9b34f1a

Update app.py

Browse files

Files changed (1) hide show

app.py +90 -71

app.py CHANGED Viewed

@@ -30,6 +30,8 @@ import base64
 from io import BytesIO
 import struct
 import cv2
 # Load the emotion prediction model
 def load_emotion_model(model_path):
@@ -306,7 +308,7 @@ def upscale_image(image, target_width=4096, target_height=2048):
         )
         return Image.fromarray(upscaled)
-# ADD THE MISSING generate_image FUNCTION HERE
 def generate_image(sentiment_prediction, transcribed_text, chunk_idx, total_chunks):
     try:
         if not api_key:
@@ -514,14 +516,14 @@ def create_360_viewer_html(image_paths, audio_paths, output_path):
         else:
             audio_data_list.append(None)  # Placeholder for chunks without audio
-        # Create the HTML content
     html_content = f"""
     <!DOCTYPE html>
     <html lang="en">
     <head>
         <meta charset="UTF-8">
         <meta name="viewport" content="width=device-width, initial-scale=1.0">
-        <title>360 Panorama Viewer with Audio</title>
         <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/pannellum@2.5.6/build/pannellum.css"/>
         <style>
             body {{
@@ -676,7 +678,6 @@ def create_360_viewer_html(image_paths, audio_paths, output_path):
     return output_path
 # Update the process_and_display function
 def process_and_display(audio_input, generate_audio, chunk_duration):
     # Validate chunk duration
@@ -691,7 +692,7 @@ def process_and_display(audio_input, generate_audio, chunk_duration):
             <style>@keyframes spin {{ 0% {{ transform: rotate(0deg); }} 100% {{ transform: rotate(360deg); }} }}</style>
             <p style="font-size: 14px; color: #4a4a4a;">This may take several minutes depending on the audio length...</p>
         </div>
-    """)] + [gr.Group(visible=False)] * len(group_components) + [None] * (len(output_containers) * 6) + [None, ""]
     results = get_predictions(audio_input, generate_audio, chunk_duration)
@@ -729,12 +730,47 @@ def process_and_display(audio_input, generate_audio, chunk_duration):
     # Create 360 viewer HTML if we have 360 images
     viewer_html_path = None
     if all_360_images:
-        with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as tmp_file:
-            viewer_html_path = create_360_viewer_html(all_360_images, all_music_paths, tmp_file.name)
-    # Hide loading indicator and show results
-    yield [gr.HTML("")] + group_visibility + outputs + [viewer_html_path, ""]
 # Update the clear_all function to handle the new outputs
 def clear_all():
@@ -745,7 +781,8 @@ def clear_all():
     outputs.extend([gr.Group(visible=False)] * len(group_components))
     # For all output containers (set to None)
-    outputs.extend([None] * (len(output_containers) * 6))
     # For loading indicator (empty HTML)
     outputs.append(gr.HTML(""))
@@ -759,6 +796,9 @@ def clear_all():
     # For viewer (set to None)
     outputs.append(None)
     # For JavaScript output (empty)
     outputs.append("")
@@ -771,9 +811,28 @@ def load_example_audio(example_name):
     return None
 # Create the Gradio interface with proper output handling
-# Update the Gradio interface with a more prominent download button
-with gr.Blocks(title="Affective Virtual Environments - Chunked Processing") as interface:
     gr.Markdown("# Affective Virtual Environments")
     gr.Markdown("Create an AVE using your voice. Audio is split into chunks, with separate predictions and generations for each segment.")
@@ -813,13 +872,7 @@ with gr.Blocks(title="Affective Virtual Environments - Chunked Processing") as i
                 clear_btn = gr.Button("Clear All", variant="secondary")
     # Add a loading indicator
-    loading_indicator = gr.HTML("""
-        <div id="loading" style="display: none; text-align: center; margin: 20px;">
-            <p style="font-size: 18px; color: #4a4a4a;">Processing audio chunks...</p>
-            <div style="border: 4px solid #f3f3f3; border-top: 4px solid #3498db; border-radius: 50%; width: 30px; height: 30px; animation: spin 2s linear infinite; margin: 0 auto;"></div>
-            <style>@keyframes spin { 0% { transform: rotate(0deg); } 100% { transform: rotate(360deg); } }</style>
-        </div>
-    """)
     # Create output components for each chunk type
     output_containers = []
@@ -838,7 +891,7 @@ with gr.Blocks(title="Affective Virtual Environments - Chunked Processing") as i
                 image_360_output = gr.File(label="Download 360 Image", type="filepath")
             with gr.Row():
                 audio_output = gr.Audio(label="Generated Music")
-            gr.HTML("<hr style='margin: 20px 0; border: 1px solid #ccc;'>")
         group_components.append(chunk_group)
         output_containers.append({
@@ -850,47 +903,16 @@ with gr.Blocks(title="Affective Virtual Environments - Chunked Processing") as i
             'music': audio_output
         })
-    # Add component for 360 viewer with a more prominent download button
-    with gr.Group(visible=False) as viewer_group:
-        gr.Markdown("### 🎯 360 Viewer Results")
-        gr.Markdown("Your interactive 360 viewer is ready! Download the HTML file to view your generated environments in a web browser.")
-        # Create a row with a prominent download button
-        with gr.Row():
-            viewer_html_output = gr.File(
-                label="360 Viewer HTML File",
-                type="filepath",
-                interactive=False,
-                elem_classes=["prominent-download"]
-            )
-            # Add a custom CSS class to make the download button more prominent
-            gr.HTML("""
-                <style>
-                    .prominent-download .gr-button {
-                        background: linear-gradient(45deg, #FF3366, #FF9933) !important;
-                        color: white !important;
-                        font-size: 18px !important;
-                        font-weight: bold !important;
-                        padding: 15px 25px !important;
-                        border-radius: 8px !important;
-                        border: none !important;
-                        box-shadow: 0 4px 8px rgba(0,0,0,0.2) !important;
-                        transition: all 0.3s ease !important;
-                    }
-                    .prominent-download .gr-button:hover {
-                        transform: scale(1.05) !important;
-                        box-shadow: 0 6px 12px rgba(0,0,0,0.3) !important;
-                    }
-                    .prominent-download .gr-button:before {
-                        content: "⬇️ " !important;
-                        margin-right: 8px !important;
-                    }
-                </style>
-            """)
-    # Add the viewer group to the group components
-    group_components.append(viewer_group)
     # Add a hidden HTML component for JavaScript execution
     js_output = gr.HTML(visible=False)
@@ -917,7 +939,7 @@ with gr.Blocks(title="Affective Virtual Environments - Chunked Processing") as i
             container['image'],
             container['image_360'],
             container['music']
-        ]] + [viewer_html_output, js_output]
     )
     clear_btn.click(
@@ -930,7 +952,7 @@ with gr.Blocks(title="Affective Virtual Environments - Chunked Processing") as i
             container['image'],
             container['image_360'],
             container['music']
-        ]] + [loading_indicator, chunk_duration_input, example_selector, viewer_html_output, js_output]
     )
     load_example_btn.click(
@@ -938,10 +960,7 @@ with gr.Blocks(title="Affective Virtual Environments - Chunked Processing") as i
         inputs=[example_selector],
         outputs=[audio_input, example_selector]
     )
-    # Check if we're running on Hugging Face Spaces
-    is_spaces = os.getenv('SPACE_ID') is not None
-    # Launch with appropriate settings
-    interface.launch(share=True)  # Only share when not on Spaces

 from io import BytesIO
 import struct
 import cv2
+import shutil
+from datetime import datetime
 # Load the emotion prediction model
 def load_emotion_model(model_path):
         )
         return Image.fromarray(upscaled)
+# Function to generate image using DeepAI API
 def generate_image(sentiment_prediction, transcribed_text, chunk_idx, total_chunks):
     try:
         if not api_key:
         else:
             audio_data_list.append(None)  # Placeholder for chunks without audio
+    # Create the HTML content
     html_content = f"""
     <!DOCTYPE html>
     <html lang="en">
     <head>
         <meta charset="UTF-8">
         <meta name="viewport" content="width=device-width, initial-scale=1.0">
+        <title>My AVE - 360 Panorama Viewer with Audio</title>
         <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/pannellum@2.5.6/build/pannellum.css"/>
         <style>
             body {{
     return output_path
 # Update the process_and_display function
 def process_and_display(audio_input, generate_audio, chunk_duration):
     # Validate chunk duration
             <style>@keyframes spin {{ 0% {{ transform: rotate(0deg); }} 100% {{ transform: rotate(360deg); }} }}</style>
             <p style="font-size: 14px; color: #4a4a4a;">This may take several minutes depending on the audio length...</p>
         </div>
+    """)] + [gr.Group(visible=False)] * len(group_components) + [None] * (len(output_containers) * 6) + [None, None, ""]
     results = get_predictions(audio_input, generate_audio, chunk_duration)
     # Create 360 viewer HTML if we have 360 images
     viewer_html_path = None
+    download_button_html = ""
     if all_360_images:
+        # Create a timestamp for unique filenames
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        html_filename = f"MyAVE_{timestamp}.html"
+        # Create a temporary directory for our output
+        output_dir = tempfile.mkdtemp()
+        viewer_html_path = os.path.join(output_dir, html_filename)
+        # Create the HTML file
+        create_360_viewer_html(all_360_images, all_music_paths, viewer_html_path)
+        # Create a beautiful download button for the Gradio interface
+        download_button_html = f"""
+        <div style="text-align: center; margin: 20px;">
+            <a href="/file={viewer_html_path}" download="MyAVE.html">
+                <button style="
+                    background: linear-gradient(to bottom, #4CAF50, #45a049);
+                    color: white;
+                    border: none;
+                    padding: 15px 30px;
+                    text-align: center;
+                    text-decoration: none;
+                    display: inline-block;
+                    font-size: 18px;
+                    margin: 10px 2px;
+                    cursor: pointer;
+                    border-radius: 25px;
+                    box-shadow: 0 4px 8px rgba(0,0,0,0.2);
+                    transition: all 0.3s ease;
+                ">
+                    📥 Download Complete AVE Experience
+                </button>
+            </a>
+            <p style="font-size: 14px; color: #4a4a4a;">Download the complete 360° experience with all chunks</p>
+        </div>
+        """
+    # After processing, return the results along with other outputs
+    yield [gr.HTML("")] + group_visibility + outputs + [viewer_html_path, gr.HTML(download_button_html), js_output]
 # Update the clear_all function to handle the new outputs
 def clear_all():
     outputs.extend([gr.Group(visible=False)] * len(group_components))
     # For all output containers (set to None)
+    for _ in output_containers:
+        outputs.extend([None, None, None, None, None, None])  # emotion, transcription, sentiment, image, image_360, music
     # For loading indicator (empty HTML)
     outputs.append(gr.HTML(""))
     # For viewer (set to None)
     outputs.append(None)
+    # For download button (empty HTML)
+    outputs.append(gr.HTML(""))
     # For JavaScript output (empty)
     outputs.append("")
     return None
 # Create the Gradio interface with proper output handling
+with gr.Blocks(title="Affective Virtual Environments - Chunked Processing", css="""
+    .download-button {
+        background: linear-gradient(to bottom, #4CAF50, #45a049) !important;
+        color: white !important;
+        border: none !important;
+        padding: 15px 30px !important;
+        text-align: center !important;
+        text-decoration: none !important;
+        display: inline-block !important;
+        font-size: 18px !important;
+        margin: 10px 2px !important;
+        cursor: pointer !important;
+        border-radius: 25px !important;
+        box-shadow: 0 4px 8px rgba(0,0,0,0.2) !important;
+        transition: all 0.3s ease !important;
+    }
+    .download-button:hover {
+        background: linear-gradient(to bottom, #45a049, #4CAF50) !important;
+        box-shadow: 0 6px 12px rgba(0,0,0,0.3) !important;
+        transform: translateY(-2px) !important;
+    }
+""") as interface:
     gr.Markdown("# Affective Virtual Environments")
     gr.Markdown("Create an AVE using your voice. Audio is split into chunks, with separate predictions and generations for each segment.")
                 clear_btn = gr.Button("Clear All", variant="secondary")
     # Add a loading indicator
+    loading_indicator = gr.HTML("")
     # Create output components for each chunk type
     output_containers = []
                 image_360_output = gr.File(label="Download 360 Image", type="filepath")
             with gr.Row():
                 audio_output = gr.Audio(label="Generated Music")
+            gr.Markdown("---")  # Horizontal line using Markdown
         group_components.append(chunk_group)
         output_containers.append({
             'music': audio_output
         })
+    # Add component for 360 viewer
+    viewer_html_output = gr.File(
+        label="Complete AVE Experience (HTML)",
+        type="filepath",
+        interactive=False,
+        visible=False
+    )
+    # Add a beautiful download button component
+    download_button = gr.HTML("")
     # Add a hidden HTML component for JavaScript execution
     js_output = gr.HTML(visible=False)
             container['image'],
             container['image_360'],
             container['music']
+        ]] + [viewer_html_output, download_button, js_output]
     )
     clear_btn.click(
             container['image'],
             container['image_360'],
             container['music']
+        ]] + [loading_indicator, chunk_duration_input, example_selector, viewer_html_output, download_button, js_output]
     )
     load_example_btn.click(
         inputs=[example_selector],
         outputs=[audio_input, example_selector]
     )
+    # Launch the interface
+    interface.launch()