Spaces:

jfforero
/

Speech2Scene3

Sleeping

App Files Files Community

jfforero commited on Sep 6, 2025

Commit

2392587

verified ·

1 Parent(s): e6f2ff7

Update app.py

Browse files

Files changed (1) hide show

app.py +556 -68

app.py CHANGED Viewed

@@ -33,76 +33,470 @@ import cv2
 import shutil
 from datetime import datetime
-# [Keep all your existing code until the process_and_display function]
-# Update the process_and_display function to create a named HTML file
-def process_and_display(audio_input, generate_audio, chunk_duration):
-    # Validate chunk duration
-    if chunk_duration is None or chunk_duration <= 0:
-        chunk_duration = 10
-    # Show loading indicator
-    yield [gr.HTML(f"""
-        <div style="text-align: center; margin: 20px;">
-            <p style="font-size: 18px; color: #4a4a4a;">Processing audio in {chunk_duration}-second chunks...</p>
-            <div style="border: 4px solid #f3f3f3; border-top: 4px solid #3498db; border-radius: 50%; width: 30px; height: 30px; animation: spin 2s linear infinite; margin: 0 auto;"></div>
-            <style>@keyframes spin {{ 0% {{ transform: rotate(0deg); }} 100% {{ transform: rotate(360deg); }} }}</style>
-            <p style="font-size: 14px; color: #4a4a4a;">This may take several minutes depending on the audio length...</p>
-        </div>
-    """)] + [gr.Group(visible=False)] * len(group_components) + [None] * (len(output_containers) * 6) + [None, ""]
-    results = get_predictions(audio_input, generate_audio, chunk_duration)
-    # Initialize outputs list
-    outputs = []
-    group_visibility = []
-    all_360_images = []  # Collect all 360 images for the viewer
-    all_music_paths = []  # Collect all music paths for the viewer
-    # Process each result
-    for i, result in enumerate(results):
-        if i < len(output_containers):
-            group_visibility.append(gr.Group(visible=True))
-            outputs.extend([
-                result['emotion'],
-                result['transcription'],
-                result['sentiment'],
-                result['image'],
-                result['image_360'],
-                result['music']
-            ])
-            # Collect the 360-processed images and music
-            if result['image_360']:
-                all_360_images.append(result['image_360'])  # Use the 360-processed image
-            all_music_paths.append(result['music'])  # Can be None if no music generated
-        else:
-            # If we have more results than containers, just extend with None
-            group_visibility.append(gr.Group(visible=False))
-            outputs.extend([None] * 6)
-    # Hide remaining containers
-    for i in range(len(results), len(output_containers)):
-        group_visibility.append(gr.Group(visible=False))
-        outputs.extend([None] * 6)
-    # Create 360 viewer HTML if we have 360 images
-    viewer_html_path = None
-    if all_360_images:
-        # Create a timestamp for unique filenames
-        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        html_filename = f"MyAVE_{timestamp}.html"
-        # Create a temporary directory for our output
-        output_dir = tempfile.mkdtemp()
-        viewer_html_path = os.path.join(output_dir, html_filename)
-        # Create the HTML file
-        create_360_viewer_html(all_360_images, all_music_paths, viewer_html_path)
-    # After processing, return the results along with other outputs
-    yield [gr.HTML("")] + group_visibility + outputs + [viewer_html_path, js_output, results]
-# Update the create_360_viewer_html function to include a download button in the HTML itself
 def create_360_viewer_html(image_paths, audio_paths, output_path):
     """Create an HTML file with a 360 viewer and audio player for the given images and audio."""
     # Create a list of image data URIs
@@ -331,7 +725,107 @@ def create_360_viewer_html(image_paths, audio_paths, output_path):
     return output_path
-# [Keep the rest of your code but remove the share button and related functions]
 # Create the Gradio interface with proper output handling
 with gr.Blocks(title="Affective Virtual Environments - Chunked Processing") as interface:
@@ -374,13 +868,7 @@ with gr.Blocks(title="Affective Virtual Environments - Chunked Processing") as i
                 clear_btn = gr.Button("Clear All", variant="secondary")
     # Add a loading indicator
-    loading_indicator = gr.HTML("""
-        <div id="loading" style="display: none; text-align: center; margin: 20px;">
-            <p style="font-size: 18px; color: #4a4a4a;">Processing audio chunks...</p>
-            <div style="border: 4px solid #f3f3f3; border-top: 4px solid #3498db; border-radius: 50%; width: 30px; height: 30px; animation: spin 2s linear infinite; margin: 0 auto;"></div>
-            <style>@keyframes spin { 0% { transform: rotate(0deg); } 100% { transform: rotate(360deg); } }</style>
-        </div>
-    """)
     # Create output components for each chunk type
     output_containers = []

 import shutil
 from datetime import datetime
+# Load the emotion prediction model
+def load_emotion_model(model_path):
+    try:
+        model = load_model(model_path)
+        print("Emotion model loaded successfully")
+        return model
+    except Exception as e:
+        print("Error loading emotion prediction model:", e)
+        return None
+model_path = 'mymodel_SER_LSTM_RAVDESS.h5'
+model = load_emotion_model(model_path)
+# Initialize WhisperModel
+model_size = "small"
+model2 = WhisperModel(model_size, device="cpu", compute_type="int8")
+# Load MusicGen model
+def load_musicgen_model():
+    try:
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
+        music_model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+        music_model.to(device)
+        print("MusicGen model loaded successfully")
+        return processor, music_model, device
+    except Exception as e:
+        print("Error loading MusicGen model:", e)
+        return None, None, None
+processor, music_model, device = load_musicgen_model()
+# Function to chunk audio into segments
+def chunk_audio(audio_path, chunk_duration=10):
+    """Split audio into chunks and return list of chunk file paths"""
+    try:
+        # Load audio file
+        audio = AudioSegment.from_file(audio_path)
+        duration_ms = len(audio)
+        chunk_ms = chunk_duration * 1000
+        # Validate chunk duration
+        if chunk_duration <= 0:
+            raise ValueError("Chunk duration must be positive")
+        if chunk_duration > duration_ms / 1000:
+            # If chunk duration is longer than audio, return the whole audio
+            return [audio_path], 1
+        chunks = []
+        chunk_files = []
+        # Calculate number of chunks
+        num_chunks = math.ceil(duration_ms / chunk_ms)
+        for i in range(num_chunks):
+            start_ms = i * chunk_ms
+            end_ms = min((i + 1) * chunk_ms, duration_ms)
+            # Extract chunk
+            chunk = audio[start_ms:end_ms]
+            chunks.append(chunk)
+            # Save chunk to temporary file
+            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
+                chunk.export(tmp_file.name, format="wav")
+                chunk_files.append(tmp_file.name)
+        return chunk_files, num_chunks
+    except Exception as e:
+        print("Error chunking audio:", e)
+        # Return original file as single chunk if chunking fails
+        return [audio_path], 1
+# Function to transcribe audio
+def transcribe(wav_filepath):
+    try:
+        segments, _ = model2.transcribe(wav_filepath, beam_size=5)
+        return "".join([segment.text for segment in segments])
+    except Exception as e:
+        print("Error transcribing audio:", e)
+        return "Transcription failed"
+# Function to extract MFCC features from audio
+def extract_mfcc(wav_file_name):
+    try:
+        y, sr = librosa.load(wav_file_name)
+        mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T, axis=0)
+        return mfccs
+    except Exception as e:
+        print("Error extracting MFCC features:", e)
+        return None
+# Emotions dictionary
+emotions = {0: 'neutral', 1: 'calm', 2: 'happy', 3: 'sad', 4: 'angry', 5: 'fearful', 6: 'disgust', 7: 'surprised'}
+# Function to predict emotion from audio
+def predict_emotion_from_audio(wav_filepath):
+    try:
+        if model is None:
+            return "Model not loaded"
+        test_point = extract_mfcc(wav_filepath)
+        if test_point is not None:
+            test_point = np.reshape(test_point, newshape=(1, 40, 1))
+            predictions = model.predict(test_point)
+            predicted_emotion_label = np.argmax(predictions[0])
+            return emotions.get(predicted_emotion_label, "Unknown emotion")
+        else:
+            return "Error: Unable to extract features"
+    except Exception as e:
+        print("Error predicting emotion:", e)
+        return "Prediction error"
+# Function to analyze sentiment from text
+def analyze_sentiment(text):
+    try:
+        if not text or text.strip() == "":
+            return "neutral", 0.0
+        analysis = TextBlob(text)
+        polarity = analysis.sentiment.polarity
+        if polarity > 0.1:
+            sentiment = "positive"
+        elif polarity < -0.1:
+            sentiment = "negative"
+        else:
+            sentiment = "neutral"
+        return sentiment, polarity
+    except Exception as e:
+        print("Error analyzing sentiment:", e)
+        return "neutral", 0.0
+# Function to get image prompt based on sentiment
+def get_image_prompt(sentiment, transcribed_text, chunk_idx, total_chunks):
+    base_prompt = f"Chunk {chunk_idx+1}/{total_chunks}: "
+    if sentiment == "positive":
+        return base_prompt + f"Generate a vibrant, uplifting equirectangular 360 image texture with bright colors, joyful atmosphere, and optimistic vibes representing: [{transcribed_text}]. The scene should evoke happiness and positivity."
+    elif sentiment == "negative":
+        return base_prompt + f"Generate a moody, dramatic equirectangular 360 image texture with dark tones, intense atmosphere, and emotional depth representing: [{transcribed_text}]. The scene should convey melancholy and intensity."
+    else:  # neutral
+        return base_prompt + f"Generate a balanced, serene equirectangular 360 image texture with harmonious colors, peaceful atmosphere, and calm vibes representing: [{transcribed_text}]. The scene should evoke tranquility and balance."
+# Function to get music prompt based on emotion
+def get_music_prompt(emotion, transcribed_text, chunk_idx, total_chunks):
+    base_prompt = f"Chunk {chunk_idx+1}/{total_chunks}: "
+    emotion_prompts = {
+        'neutral': f"Create ambient, background music with neutral tones, subtle melodies, and unobtrusive atmosphere that complements: {transcribed_text}. The music should be calm and balanced.",
+        'calm': f"Generate soothing, peaceful music with gentle melodies, soft instrumentation, and relaxing vibes that represents: {transcribed_text}. The music should evoke tranquility and serenity.",
+        'happy': f"Create joyful, upbeat music with cheerful melodies, bright instrumentation, and energetic rhythms that celebrates: {transcribed_text}. The music should evoke happiness and positivity.",
+        'sad': f"Generate emotional, melancholic music with poignant melodies, soft strings, and heartfelt atmosphere that reflects: {transcribed_text}. The music should evoke sadness and reflection.",
+        'angry': f"Create intense, powerful music with driving rhythms, aggressive instrumentation, and strong dynamics that expresses: {transcribed_text}. The music should evoke anger and intensity.",
+        'fearful': f"Generate suspenseful, tense music with eerie melodies, atmospheric sounds, and unsettling vibes that represents: {transcribed_text}. The music should evoke fear and anticipation.",
+        'disgust': f"Create dark, unsettling music with dissonant harmonies, unusual sounds, and uncomfortable atmosphere that reflects: {transcribed_text}. The music should evoke discomfort and unease.",
+        'surprised': f"Generate dynamic, unexpected music with sudden changes, playful melodies, and surprising elements that represents: {transcribed_text}. The music should evoke surprise and wonder."
+    }
+    return base_prompt + emotion_prompts.get(emotion.lower(),
+        f"Create background music with {emotion} atmosphere that represents: {transcribed_text}")
+# Function to generate music with MusicGen (using acoustic emotion prediction)
+def generate_music(transcribed_text, emotion_prediction, chunk_idx, total_chunks):
+    try:
+        if processor is None or music_model is None:
+            return None
+        # Get specific prompt based on emotion
+        prompt = get_music_prompt(emotion_prediction, transcribed_text, chunk_idx, total_chunks)
+        # Limit prompt length to avoid model issues
+        if len(prompt) > 200:
+            prompt = prompt[:200] + "..."
+        inputs = processor(
+            text=[prompt],
+            padding=True,
+            return_tensors="pt",
+        ).to(device)
+        # Generate audio
+        audio_values = music_model.generate(**inputs, max_new_tokens=512)
+        # Convert to numpy array and sample rate
+        sampling_rate = music_model.config.audio_encoder.sampling_rate
+        audio_data = audio_values[0, 0].cpu().numpy()
+        # Normalize audio data
+        audio_data = audio_data / np.max(np.abs(audio_data))
+        # Create a temporary file to save the audio
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
+            scipy.io.wavfile.write(tmp_file.name, rate=sampling_rate, data=audio_data)
+            return tmp_file.name
+    except Exception as e:
+        print("Error generating music:", e)
+        return None
+# --- DeepAI Image Generation (Text2Img) ---
+api_key = os.getenv("DeepAI_api_key")
+# Function to upscale image using Lanczos interpolation
+def upscale_image(image, target_width=4096, target_height=2048):
+    """
+    Upscale image using DeepAI's Torch-SRGAN API for super resolution
+    """
+    try:
+        if not api_key:
+            print("No API key available for upscaling")
+            # Fallback to OpenCV if no API key
+            img_array = np.array(image)
+            upscaled = cv2.resize(
+                img_array,
+                (target_width, target_height),
+                interpolation=cv2.INTER_LANCZOS4
+            )
+            return Image.fromarray(upscaled)
+        # Save the image to a temporary file
+        with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp_input:
+            image.save(tmp_input.name, "JPEG", quality=95)
+            # Make request to DeepAI torch-srgan API
+            response = requests.post(
+                "https://api.deepai.org/api/torch-srgan",
+                files={'image': open(tmp_input.name, 'rb')},
+                headers={'api-key': api_key}
+            )
+            data = response.json()
+            if 'output_url' in data:
+                # Download the upscaled image
+                img_resp = requests.get(data['output_url'])
+                upscaled_image = Image.open(BytesIO(img_resp.content))
+                # Ensure the image meets our target dimensions
+                if upscaled_image.size != (target_width, target_height):
+                    upscaled_image = upscaled_image.resize(
+                        (target_width, target_height),
+                        Image.Resampling.LANCZOS
+                    )
+                # Clean up temporary file
+                os.unlink(tmp_input.name)
+                return upscaled_image
+            else:
+                print("Error in DeepAI upscaling response:", data)
+                # Fallback to OpenCV if API fails
+                img_array = np.array(image)
+                upscaled = cv2.resize(
+                    img_array,
+                    (target_width, target_height),
+                    interpolation=cv2.INTER_LANCZOS4
+                )
+                return Image.fromarray(upscaled)
+    except Exception as e:
+        print(f"Error upscaling image with DeepAI: {e}")
+        # Fallback to OpenCV if any error occurs
+        img_array = np.array(image)
+        upscaled = cv2.resize(
+            img_array,
+            (target_width, target_height),
+            interpolation=cv2.INTER_LANCZOS4
+        )
+        return Image.fromarray(upscaled)
+# Function to generate image using DeepAI API
+def generate_image(sentiment_prediction, transcribed_text, chunk_idx, total_chunks):
+    try:
+        if not api_key:
+            # fallback white image if no API key
+            base_image = Image.new('RGB', (1024,512), color='white')
+        else:
+            # Get specific prompt based on sentiment
+            prompt = get_image_prompt(sentiment_prediction, transcribed_text, chunk_idx, total_chunks)
+            # Make request to DeepAI text2img API
+            response = requests.post(
+                "https://api.deepai.org/api/text2img",
+                data={
+                    'text': prompt,
+                    'width': 1024,
+                    'height': 512,
+                    'image_generator_version': 'hd'
+                },
+                headers={'api-key': api_key}
+            )
+            data = response.json()
+            if 'output_url' in data:
+                # Download the generated image
+                img_resp = requests.get(data['output_url'])
+                base_image = Image.open(BytesIO(img_resp.content))
+            else:
+                print("Error in DeepAI response:", data)
+                # Return a fallback image
+                base_image = Image.new('RGB', (1024,512), color='white')
+        # Upscale the image for better quality in 360 viewer
+        upscaled_image = upscale_image(base_image)
+        return upscaled_image
+    except Exception as e:
+        print("Error generating image:", e)
+        # Return a fallback image
+        return Image.new('RGB', (1024,512), color='white')
+# Function to process a single chunk
+def process_chunk(chunk_path, chunk_idx, total_chunks, generate_audio=True):
+    try:
+        # Get acoustic emotion prediction (for music)
+        emotion_prediction = predict_emotion_from_audio(chunk_path)
+        # Get transcribed text
+        transcribed_text = transcribe(chunk_path)
+        # Analyze sentiment of transcribed text (for image)
+        sentiment, polarity = analyze_sentiment(transcribed_text)
+        # Generate image using SENTIMENT analysis with specific prompt
+        image = generate_image(sentiment, transcribed_text, chunk_idx, total_chunks)
+        # Add 360 metadata to the image
+        image_with_360_path = add_360_metadata(image)
+        # Generate music only if audio generation is enabled
+        music_path = None
+        if generate_audio:
+            music_path = generate_music(transcribed_text, emotion_prediction, chunk_idx, total_chunks)
+        return {
+            'chunk_index': chunk_idx + 1,
+            'emotion': emotion_prediction,
+            'transcription': transcribed_text,
+            'sentiment': sentiment,
+            'image': image,  # Original image for display in Gradio
+            'image_360': image_with_360_path,  # Image with 360 metadata
+            'music': music_path
+        }
+    except Exception as e:
+        print(f"Error processing chunk {chunk_idx + 1}:", e)
+        # Return a fallback result with all required keys
+        return {
+            'chunk_index': chunk_idx + 1,
+            'emotion': "Error",
+            'transcription': "Transcription failed",
+            'sentiment': "Sentiment: error",
+            'image': Image.new('RGB', (1440, 770), color='white'),
+            'image_360': None,
+            'music': None
+        }
+# Function to get predictions for all chunks
+def get_predictions(audio_input, generate_audio=True, chunk_duration=10):
+    # Chunk the audio into segments
+    chunk_files, total_chunks = chunk_audio(audio_input, chunk_duration)
+    results = []
+    # Process each chunk
+    for i, chunk_path in enumerate(chunk_files):
+        print(f"Processing chunk {i+1}/{total_chunks} ({chunk_duration}s each)")
+        result = process_chunk(chunk_path, i, total_chunks, generate_audio)
+        results.append(result)
+    # Clean up temporary chunk files
+    for chunk_path in chunk_files:
+        try:
+            if chunk_path != audio_input:  # Don't delete original input file
+                os.unlink(chunk_path)
+        except:
+            pass
+    return results
+def create_xmp_block(width, height):
+    """Create XMP metadata block following ExifTool's exact format."""
+    xmp = (
+        f'<?xpacket begin="ï»¿" id="W5M0MpCehiHzreSzNTczkc9d"?>\n'
+        f'<x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="ExifTool">\n'
+        f'<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">\n'
+        f'<rdf:Description rdf:about=""\n'
+        f'xmlns:GPano="http://ns.google.com/photos/1.0/panorama/"\n'
+        f'GPano:ProjectionType="equirectangular"\n'
+        f'GPano:UsePanoramaViewer="True"\n'
+        f'GPano:FullPanoWidthPixels="{width}"\n'
+        f'GPano:FullPanoHeightPixels="{height}"\n'
+        f'GPano:CroppedAreaImageWidthPixels="{width}"\n'
+        f'GPano:CroppedAreaImageHeightPixels="{height}"\n'
+        f'GPano:CroppedAreaLeftPixels="0"\n'
+        f'GPano:CroppedAreaTopPixels="0"/>\n'
+        f'</rdf:RDF>\n'
+        f'</x:xmpmeta>\n'
+        f'<?xpacket end="w"?>'
+    )
+    return xmp
+def write_xmp_to_jpg(input_path, output_path, width, height):
+    """Write XMP metadata to JPEG file following ExifTool's method."""
+    # Read the original JPEG
+    with open(input_path, 'rb') as f:
+        data = f.read()
+    # Find the start of image marker
+    if data[0:2] != b'\xFF\xD8':
+        raise ValueError("Not a valid JPEG file")
+    # Create XMP data
+    xmp_data = create_xmp_block(width, height)
+    # Create APP1 segment for XMP
+    app1_marker = b'\xFF\xE1'
+    xmp_header = b'http://ns.adobe.com/xap/1.0/\x00'
+    xmp_bytes = xmp_data.encode('utf-8')
+    length = len(xmp_header) + len(xmp_bytes) + 2  # +2 for length bytes
+    length_bytes = struct.pack('>H', length)
+    # Construct new file content
+    output = bytearray()
+    output.extend(data[0:2])  # SOI marker
+    output.extend(app1_marker)
+    output.extend(length_bytes)
+    output.extend(xmp_header)
+    output.extend(xmp_bytes)
+    output.extend(data[2:])  # Rest of the original file
+    # Write the new file
+    with open(output_path, 'wb') as f:
+        f.write(output)
+def add_360_metadata(img):
+    """Add 360 photo metadata to a PIL Image and return the path to the processed image."""
+    try:
+        # First, ensure the image is upscaled to 4096x2048
+        target_width, target_height = 4096, 2048
+        if img.width != target_width or img.height != target_height:
+            img = img.resize((target_width, target_height), Image.Resampling.LANCZOS)
+        # Create a temporary file
+        with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp_file:
+            # First save as high-quality JPEG
+            img.save(tmp_file.name, "JPEG", quality=95)
+            # Then inject XMP metadata directly into JPEG file
+            write_xmp_to_jpg(tmp_file.name, tmp_file.name, img.width, img.height)
+            return tmp_file.name
+    except Exception as e:
+        print(f"Error adding 360 metadata: {str(e)}")
+        # Fallback: return the original image path
+        with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp_file:
+            img.save(tmp_file.name, "JPEG", quality=95)
+            return tmp_file.name
 def create_360_viewer_html(image_paths, audio_paths, output_path):
     """Create an HTML file with a 360 viewer and audio player for the given images and audio."""
     # Create a list of image data URIs
     return output_path
+# Update the process_and_display function
+def process_and_display(audio_input, generate_audio, chunk_duration):
+    # Validate chunk duration
+    if chunk_duration is None or chunk_duration <= 0:
+        chunk_duration = 10
+    # Show loading indicator
+    yield [gr.HTML(f"""
+        <div style="text-align: center; margin: 20px;">
+            <p style="font-size: 18px; color: #4a4a4a;">Processing audio in {chunk_duration}-second chunks...</p>
+            <div style="border: 4px solid #f3f3f3; border-top: 4px solid #3498db; border-radius: 50%; width: 30px; height: 30px; animation: spin 2s linear infinite; margin: 0 auto;"></div>
+            <style>@keyframes spin {{ 0% {{ transform: rotate(0deg); }} 100% {{ transform: rotate(360deg); }} }}</style>
+            <p style="font-size: 14px; color: #4a4a4a;">This may take several minutes depending on the audio length...</p>
+        </div>
+    """)] + [gr.Group(visible=False)] * len(group_components) + [None] * (len(output_containers) * 6) + [None, ""]
+    results = get_predictions(audio_input, generate_audio, chunk_duration)
+    # Initialize outputs list
+    outputs = []
+    group_visibility = []
+    all_360_images = []  # Collect all 360 images for the viewer
+    all_music_paths = []  # Collect all music paths for the viewer
+    # Process each result
+    for i, result in enumerate(results):
+        if i < len(output_containers):
+            group_visibility.append(gr.Group(visible=True))
+            outputs.extend([
+                result['emotion'],
+                result['transcription'],
+                result['sentiment'],
+                result['image'],
+                result['image_360'],
+                result['music']
+            ])
+            # Collect the 360-processed images and music
+            if result['image_360']:
+                all_360_images.append(result['image_360'])  # Use the 360-processed image
+            all_music_paths.append(result['music'])  # Can be None if no music generated
+        else:
+            # If we have more results than containers, just extend with None
+            group_visibility.append(gr.Group(visible=False))
+            outputs.extend([None] * 6)
+    # Hide remaining containers
+    for i in range(len(results), len(output_containers)):
+        group_visibility.append(gr.Group(visible=False))
+        outputs.extend([None] * 6)
+    # Create 360 viewer HTML if we have 360 images
+    viewer_html_path = None
+    if all_360_images:
+        # Create a timestamp for unique filenames
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        html_filename = f"MyAVE_{timestamp}.html"
+        # Create a temporary directory for our output
+        output_dir = tempfile.mkdtemp()
+        viewer_html_path = os.path.join(output_dir, html_filename)
+        # Create the HTML file
+        create_360_viewer_html(all_360_images, all_music_paths, viewer_html_path)
+    # After processing, return the results along with other outputs
+    yield [gr.HTML("")] + group_visibility + outputs + [viewer_html_path, js_output]
+# Update the clear_all function to handle the new outputs
+def clear_all():
+    # Create a list with None for all outputs
+    outputs = [None]  # For audio input
+    # For group components (set to invisible)
+    outputs.extend([gr.Group(visible=False)] * len(group_components))
+    # For all output containers (set to None)
+    for _ in output_containers:
+        outputs.extend([None, None, None, None, None, None])  # emotion, transcription, sentiment, image, image_360, music
+    # For loading indicator (empty HTML)
+    outputs.append(gr.HTML(""))
+    # For chunk duration (reset to 10)
+    outputs.append(10)
+    # For example selector (reset to None)
+    outputs.append(None)
+    # For viewer (set to None)
+    outputs.append(None)
+    # For JavaScript output (empty)
+    outputs.append("")
+    return outputs
+# Function to load example audio (placeholder - you need to implement this)
+def load_example_audio(example_name):
+    # This is a placeholder - you need to implement this function
+    # Return the path to the example audio file based on the example_name
+    return None
 # Create the Gradio interface with proper output handling
 with gr.Blocks(title="Affective Virtual Environments - Chunked Processing") as interface:
                 clear_btn = gr.Button("Clear All", variant="secondary")
     # Add a loading indicator
+    loading_indicator = gr.HTML("")
     # Create output components for each chunk type
     output_containers = []