jfforero commited on
Commit
e6f2ff7
·
verified ·
1 Parent(s): eca105b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +118 -617
app.py CHANGED
@@ -30,471 +30,79 @@ import base64
30
  from io import BytesIO
31
  import struct
32
  import cv2
 
 
33
 
34
- # Load the emotion prediction model
35
- def load_emotion_model(model_path):
36
- try:
37
- model = load_model(model_path)
38
- print("Emotion model loaded successfully")
39
- return model
40
- except Exception as e:
41
- print("Error loading emotion prediction model:", e)
42
- return None
43
 
44
- model_path = 'mymodel_SER_LSTM_RAVDESS.h5'
45
- model = load_emotion_model(model_path)
46
-
47
- # Initialize WhisperModel
48
- model_size = "small"
49
- model2 = WhisperModel(model_size, device="cpu", compute_type="int8")
50
-
51
- # Load MusicGen model
52
- def load_musicgen_model():
53
- try:
54
- device = "cuda" if torch.cuda.is_available() else "cpu"
55
- processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
56
- music_model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
57
- music_model.to(device)
58
- print("MusicGen model loaded successfully")
59
- return processor, music_model, device
60
- except Exception as e:
61
- print("Error loading MusicGen model:", e)
62
- return None, None, None
63
-
64
- processor, music_model, device = load_musicgen_model()
65
-
66
- # Function to chunk audio into segments
67
- def chunk_audio(audio_path, chunk_duration=10):
68
- """Split audio into chunks and return list of chunk file paths"""
69
- try:
70
- # Load audio file
71
- audio = AudioSegment.from_file(audio_path)
72
- duration_ms = len(audio)
73
- chunk_ms = chunk_duration * 1000
74
-
75
- # Validate chunk duration
76
- if chunk_duration <= 0:
77
- raise ValueError("Chunk duration must be positive")
78
-
79
- if chunk_duration > duration_ms / 1000:
80
- # If chunk duration is longer than audio, return the whole audio
81
- return [audio_path], 1
82
-
83
- chunks = []
84
- chunk_files = []
85
-
86
- # Calculate number of chunks
87
- num_chunks = math.ceil(duration_ms / chunk_ms)
88
-
89
- for i in range(num_chunks):
90
- start_ms = i * chunk_ms
91
- end_ms = min((i + 1) * chunk_ms, duration_ms)
92
-
93
- # Extract chunk
94
- chunk = audio[start_ms:end_ms]
95
- chunks.append(chunk)
96
-
97
- # Save chunk to temporary file
98
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
99
- chunk.export(tmp_file.name, format="wav")
100
- chunk_files.append(tmp_file.name)
101
-
102
- return chunk_files, num_chunks
103
-
104
- except Exception as e:
105
- print("Error chunking audio:", e)
106
- # Return original file as single chunk if chunking fails
107
- return [audio_path], 1
108
-
109
- # Function to transcribe audio
110
- def transcribe(wav_filepath):
111
- try:
112
- segments, _ = model2.transcribe(wav_filepath, beam_size=5)
113
- return "".join([segment.text for segment in segments])
114
- except Exception as e:
115
- print("Error transcribing audio:", e)
116
- return "Transcription failed"
117
-
118
- # Function to extract MFCC features from audio
119
- def extract_mfcc(wav_file_name):
120
- try:
121
- y, sr = librosa.load(wav_file_name)
122
- mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T, axis=0)
123
- return mfccs
124
- except Exception as e:
125
- print("Error extracting MFCC features:", e)
126
- return None
127
-
128
- # Emotions dictionary
129
- emotions = {0: 'neutral', 1: 'calm', 2: 'happy', 3: 'sad', 4: 'angry', 5: 'fearful', 6: 'disgust', 7: 'surprised'}
130
-
131
- # Function to predict emotion from audio
132
- def predict_emotion_from_audio(wav_filepath):
133
- try:
134
- if model is None:
135
- return "Model not loaded"
136
-
137
- test_point = extract_mfcc(wav_filepath)
138
- if test_point is not None:
139
- test_point = np.reshape(test_point, newshape=(1, 40, 1))
140
- predictions = model.predict(test_point)
141
- predicted_emotion_label = np.argmax(predictions[0])
142
- return emotions.get(predicted_emotion_label, "Unknown emotion")
143
- else:
144
- return "Error: Unable to extract features"
145
- except Exception as e:
146
- print("Error predicting emotion:", e)
147
- return "Prediction error"
148
-
149
- # Function to analyze sentiment from text
150
- def analyze_sentiment(text):
151
- try:
152
- if not text or text.strip() == "":
153
- return "neutral", 0.0
154
-
155
- analysis = TextBlob(text)
156
- polarity = analysis.sentiment.polarity
157
-
158
- if polarity > 0.1:
159
- sentiment = "positive"
160
- elif polarity < -0.1:
161
- sentiment = "negative"
162
- else:
163
- sentiment = "neutral"
164
-
165
- return sentiment, polarity
166
- except Exception as e:
167
- print("Error analyzing sentiment:", e)
168
- return "neutral", 0.0
169
-
170
- # Function to get image prompt based on sentiment
171
- def get_image_prompt(sentiment, transcribed_text, chunk_idx, total_chunks):
172
- base_prompt = f"Chunk {chunk_idx+1}/{total_chunks}: "
173
-
174
- if sentiment == "positive":
175
- return base_prompt + f"Generate a vibrant, uplifting equirectangular 360 image texture with bright colors, joyful atmosphere, and optimistic vibes representing: [{transcribed_text}]. The scene should evoke happiness and positivity."
176
 
177
- elif sentiment == "negative":
178
- return base_prompt + f"Generate a moody, dramatic equirectangular 360 image texture with dark tones, intense atmosphere, and emotional depth representing: [{transcribed_text}]. The scene should convey melancholy and intensity."
 
 
 
 
 
 
 
179
 
180
- else: # neutral
181
- return base_prompt + f"Generate a balanced, serene equirectangular 360 image texture with harmonious colors, peaceful atmosphere, and calm vibes representing: [{transcribed_text}]. The scene should evoke tranquility and balance."
182
-
183
- # Function to get music prompt based on emotion
184
- def get_music_prompt(emotion, transcribed_text, chunk_idx, total_chunks):
185
- base_prompt = f"Chunk {chunk_idx+1}/{total_chunks}: "
186
 
187
- emotion_prompts = {
188
- 'neutral': f"Create ambient, background music with neutral tones, subtle melodies, and unobtrusive atmosphere that complements: {transcribed_text}. The music should be calm and balanced.",
189
- 'calm': f"Generate soothing, peaceful music with gentle melodies, soft instrumentation, and relaxing vibes that represents: {transcribed_text}. The music should evoke tranquility and serenity.",
190
- 'happy': f"Create joyful, upbeat music with cheerful melodies, bright instrumentation, and energetic rhythms that celebrates: {transcribed_text}. The music should evoke happiness and positivity.",
191
- 'sad': f"Generate emotional, melancholic music with poignant melodies, soft strings, and heartfelt atmosphere that reflects: {transcribed_text}. The music should evoke sadness and reflection.",
192
- 'angry': f"Create intense, powerful music with driving rhythms, aggressive instrumentation, and strong dynamics that expresses: {transcribed_text}. The music should evoke anger and intensity.",
193
- 'fearful': f"Generate suspenseful, tense music with eerie melodies, atmospheric sounds, and unsettling vibes that represents: {transcribed_text}. The music should evoke fear and anticipation.",
194
- 'disgust': f"Create dark, unsettling music with dissonant harmonies, unusual sounds, and uncomfortable atmosphere that reflects: {transcribed_text}. The music should evoke discomfort and unease.",
195
- 'surprised': f"Generate dynamic, unexpected music with sudden changes, playful melodies, and surprising elements that represents: {transcribed_text}. The music should evoke surprise and wonder."
196
- }
197
 
198
- return base_prompt + emotion_prompts.get(emotion.lower(),
199
- f"Create background music with {emotion} atmosphere that represents: {transcribed_text}")
200
-
201
- # Function to generate music with MusicGen (using acoustic emotion prediction)
202
- def generate_music(transcribed_text, emotion_prediction, chunk_idx, total_chunks):
203
- try:
204
- if processor is None or music_model is None:
205
- return None
206
-
207
- # Get specific prompt based on emotion
208
- prompt = get_music_prompt(emotion_prediction, transcribed_text, chunk_idx, total_chunks)
209
-
210
- # Limit prompt length to avoid model issues
211
- if len(prompt) > 200:
212
- prompt = prompt[:200] + "..."
213
-
214
- inputs = processor(
215
- text=[prompt],
216
- padding=True,
217
- return_tensors="pt",
218
- ).to(device)
219
-
220
- # Generate audio
221
- audio_values = music_model.generate(**inputs, max_new_tokens=512)
222
-
223
- # Convert to numpy array and sample rate
224
- sampling_rate = music_model.config.audio_encoder.sampling_rate
225
- audio_data = audio_values[0, 0].cpu().numpy()
226
-
227
- # Normalize audio data
228
- audio_data = audio_data / np.max(np.abs(audio_data))
229
-
230
- # Create a temporary file to save the audio
231
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
232
- scipy.io.wavfile.write(tmp_file.name, rate=sampling_rate, data=audio_data)
233
- return tmp_file.name
234
-
235
- except Exception as e:
236
- print("Error generating music:", e)
237
- return None
238
-
239
- # --- DeepAI Image Generation (Text2Img) ---
240
- api_key = os.getenv("DeepAI_api_key")
241
-
242
- # Function to upscale image using Lanczos interpolation
243
- def upscale_image(image, target_width=4096, target_height=2048):
244
- """
245
- Upscale image using DeepAI's Torch-SRGAN API for super resolution
246
- """
247
- try:
248
- if not api_key:
249
- print("No API key available for upscaling")
250
- # Fallback to OpenCV if no API key
251
- img_array = np.array(image)
252
- upscaled = cv2.resize(
253
- img_array,
254
- (target_width, target_height),
255
- interpolation=cv2.INTER_LANCZOS4
256
- )
257
- return Image.fromarray(upscaled)
258
-
259
- # Save the image to a temporary file
260
- with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp_input:
261
- image.save(tmp_input.name, "JPEG", quality=95)
262
-
263
- # Make request to DeepAI torch-srgan API
264
- response = requests.post(
265
- "https://api.deepai.org/api/torch-srgan",
266
- files={'image': open(tmp_input.name, 'rb')},
267
- headers={'api-key': api_key}
268
- )
269
-
270
- data = response.json()
271
-
272
- if 'output_url' in data:
273
- # Download the upscaled image
274
- img_resp = requests.get(data['output_url'])
275
- upscaled_image = Image.open(BytesIO(img_resp.content))
276
-
277
- # Ensure the image meets our target dimensions
278
- if upscaled_image.size != (target_width, target_height):
279
- upscaled_image = upscaled_image.resize(
280
- (target_width, target_height),
281
- Image.Resampling.LANCZOS
282
- )
283
-
284
- # Clean up temporary file
285
- os.unlink(tmp_input.name)
286
- return upscaled_image
287
- else:
288
- print("Error in DeepAI upscaling response:", data)
289
- # Fallback to OpenCV if API fails
290
- img_array = np.array(image)
291
- upscaled = cv2.resize(
292
- img_array,
293
- (target_width, target_height),
294
- interpolation=cv2.INTER_LANCZOS4
295
- )
296
- return Image.fromarray(upscaled)
297
-
298
- except Exception as e:
299
- print(f"Error upscaling image with DeepAI: {e}")
300
- # Fallback to OpenCV if any error occurs
301
- img_array = np.array(image)
302
- upscaled = cv2.resize(
303
- img_array,
304
- (target_width, target_height),
305
- interpolation=cv2.INTER_LANCZOS4
306
- )
307
- return Image.fromarray(upscaled)
308
-
309
- # Function to generate image using DeepAI API
310
- def generate_image(sentiment_prediction, transcribed_text, chunk_idx, total_chunks):
311
- try:
312
- if not api_key:
313
- # fallback white image if no API key
314
- base_image = Image.new('RGB', (1024,512), color='white')
315
  else:
316
- # Get specific prompt based on sentiment
317
- prompt = get_image_prompt(sentiment_prediction, transcribed_text, chunk_idx, total_chunks)
318
-
319
- # Make request to DeepAI text2img API
320
- response = requests.post(
321
- "https://api.deepai.org/api/text2img",
322
- data={
323
- 'text': prompt,
324
- 'width': 1024,
325
- 'height': 512,
326
- 'image_generator_version': 'hd'
327
- },
328
- headers={'api-key': api_key}
329
- )
330
-
331
- data = response.json()
332
- if 'output_url' in data:
333
- # Download the generated image
334
- img_resp = requests.get(data['output_url'])
335
- base_image = Image.open(BytesIO(img_resp.content))
336
- else:
337
- print("Error in DeepAI response:", data)
338
- # Return a fallback image
339
- base_image = Image.new('RGB', (1024,512), color='white')
340
-
341
- # Upscale the image for better quality in 360 viewer
342
- upscaled_image = upscale_image(base_image)
343
- return upscaled_image
344
-
345
- except Exception as e:
346
- print("Error generating image:", e)
347
- # Return a fallback image
348
- return Image.new('RGB', (1024,512), color='white')
349
-
350
- # Function to process a single chunk
351
- def process_chunk(chunk_path, chunk_idx, total_chunks, generate_audio=True):
352
- try:
353
- # Get acoustic emotion prediction (for music)
354
- emotion_prediction = predict_emotion_from_audio(chunk_path)
355
-
356
- # Get transcribed text
357
- transcribed_text = transcribe(chunk_path)
358
-
359
- # Analyze sentiment of transcribed text (for image)
360
- sentiment, polarity = analyze_sentiment(transcribed_text)
361
-
362
- # Generate image using SENTIMENT analysis with specific prompt
363
- image = generate_image(sentiment, transcribed_text, chunk_idx, total_chunks)
364
-
365
- # Add 360 metadata to the image
366
- image_with_360_path = add_360_metadata(image)
367
-
368
- # Generate music only if audio generation is enabled
369
- music_path = None
370
- if generate_audio:
371
- music_path = generate_music(transcribed_text, emotion_prediction, chunk_idx, total_chunks)
372
-
373
- return {
374
- 'chunk_index': chunk_idx + 1,
375
- 'emotion': emotion_prediction,
376
- 'transcription': transcribed_text,
377
- 'sentiment': sentiment,
378
- 'image': image, # Original image for display in Gradio
379
- 'image_360': image_with_360_path, # Image with 360 metadata
380
- 'music': music_path
381
- }
382
- except Exception as e:
383
- print(f"Error processing chunk {chunk_idx + 1}:", e)
384
- # Return a fallback result with all required keys
385
- return {
386
- 'chunk_index': chunk_idx + 1,
387
- 'emotion': "Error",
388
- 'transcription': "Transcription failed",
389
- 'sentiment': "Sentiment: error",
390
- 'image': Image.new('RGB', (1440, 770), color='white'),
391
- 'image_360': None,
392
- 'music': None
393
- }
394
-
395
- # Function to get predictions for all chunks
396
- def get_predictions(audio_input, generate_audio=True, chunk_duration=10):
397
- # Chunk the audio into segments
398
- chunk_files, total_chunks = chunk_audio(audio_input, chunk_duration)
399
-
400
- results = []
401
-
402
- # Process each chunk
403
- for i, chunk_path in enumerate(chunk_files):
404
- print(f"Processing chunk {i+1}/{total_chunks} ({chunk_duration}s each)")
405
- result = process_chunk(chunk_path, i, total_chunks, generate_audio)
406
- results.append(result)
407
-
408
- # Clean up temporary chunk files
409
- for chunk_path in chunk_files:
410
- try:
411
- if chunk_path != audio_input: # Don't delete original input file
412
- os.unlink(chunk_path)
413
- except:
414
- pass
415
-
416
- return results
417
-
418
- def create_xmp_block(width, height):
419
- """Create XMP metadata block following ExifTool's exact format."""
420
- xmp = (
421
- f'<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>\n'
422
- f'<x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="ExifTool">\n'
423
- f'<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">\n'
424
- f'<rdf:Description rdf:about=""\n'
425
- f'xmlns:GPano="http://ns.google.com/photos/1.0/panorama/"\n'
426
- f'GPano:ProjectionType="equirectangular"\n'
427
- f'GPano:UsePanoramaViewer="True"\n'
428
- f'GPano:FullPanoWidthPixels="{width}"\n'
429
- f'GPano:FullPanoHeightPixels="{height}"\n'
430
- f'GPano:CroppedAreaImageWidthPixels="{width}"\n'
431
- f'GPano:CroppedAreaImageHeightPixels="{height}"\n'
432
- f'GPano:CroppedAreaLeftPixels="0"\n'
433
- f'GPano:CroppedAreaTopPixels="0"/>\n'
434
- f'</rdf:RDF>\n'
435
- f'</x:xmpmeta>\n'
436
- f'<?xpacket end="w"?>'
437
- )
438
- return xmp
439
-
440
- def write_xmp_to_jpg(input_path, output_path, width, height):
441
- """Write XMP metadata to JPEG file following ExifTool's method."""
442
- # Read the original JPEG
443
- with open(input_path, 'rb') as f:
444
- data = f.read()
445
-
446
- # Find the start of image marker
447
- if data[0:2] != b'\xFF\xD8':
448
- raise ValueError("Not a valid JPEG file")
449
-
450
- # Create XMP data
451
- xmp_data = create_xmp_block(width, height)
452
-
453
- # Create APP1 segment for XMP
454
- app1_marker = b'\xFF\xE1'
455
- xmp_header = b'http://ns.adobe.com/xap/1.0/\x00'
456
- xmp_bytes = xmp_data.encode('utf-8')
457
- length = len(xmp_header) + len(xmp_bytes) + 2 # +2 for length bytes
458
- length_bytes = struct.pack('>H', length)
459
 
460
- # Construct new file content
461
- output = bytearray()
462
- output.extend(data[0:2]) # SOI marker
463
- output.extend(app1_marker)
464
- output.extend(length_bytes)
465
- output.extend(xmp_header)
466
- output.extend(xmp_bytes)
467
- output.extend(data[2:]) # Rest of the original file
468
 
469
- # Write the new file
470
- with open(output_path, 'wb') as f:
471
- f.write(output)
472
-
473
- def add_360_metadata(img):
474
- """Add 360 photo metadata to a PIL Image and return the path to the processed image."""
475
- try:
476
- # First, ensure the image is upscaled to 4096x2048
477
- target_width, target_height = 4096, 2048
478
- if img.width != target_width or img.height != target_height:
479
- img = img.resize((target_width, target_height), Image.Resampling.LANCZOS)
480
 
481
- # Create a temporary file
482
- with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp_file:
483
- # First save as high-quality JPEG
484
- img.save(tmp_file.name, "JPEG", quality=95)
485
-
486
- # Then inject XMP metadata directly into JPEG file
487
- write_xmp_to_jpg(tmp_file.name, tmp_file.name, img.width, img.height)
488
-
489
- return tmp_file.name
490
-
491
- except Exception as e:
492
- print(f"Error adding 360 metadata: {str(e)}")
493
- # Fallback: return the original image path
494
- with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp_file:
495
- img.save(tmp_file.name, "JPEG", quality=95)
496
- return tmp_file.name
497
 
 
498
  def create_360_viewer_html(image_paths, audio_paths, output_path):
499
  """Create an HTML file with a 360 viewer and audio player for the given images and audio."""
500
  # Create a list of image data URIs
@@ -514,14 +122,14 @@ def create_360_viewer_html(image_paths, audio_paths, output_path):
514
  else:
515
  audio_data_list.append(None) # Placeholder for chunks without audio
516
 
517
- # Create the HTML content
518
  html_content = f"""
519
  <!DOCTYPE html>
520
  <html lang="en">
521
  <head>
522
  <meta charset="UTF-8">
523
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
524
- <title>360 Panorama Viewer with Audio</title>
525
  <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/pannellum@2.5.6/build/pannellum.css"/>
526
  <style>
527
  body {{
@@ -599,9 +207,46 @@ def create_360_viewer_html(image_paths, audio_paths, output_path):
599
  border-radius: 3px;
600
  border: 1px solid #ccc;
601
  }}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
602
  </style>
603
  </head>
604
  <body>
 
 
 
 
 
605
  <div id="controls">
606
  <select id="image-selector">
607
  {"".join([f'<option value="{i}">Chunk {i+1}</option>' for i in range(len(image_data_list))])}
@@ -657,6 +302,16 @@ def create_360_viewer_html(image_paths, audio_paths, output_path):
657
  }}
658
  }}
659
 
 
 
 
 
 
 
 
 
 
 
660
  // Load the first image initially
661
  loadPanorama(0);
662
 
@@ -676,135 +331,7 @@ def create_360_viewer_html(image_paths, audio_paths, output_path):
676
 
677
  return output_path
678
 
679
-
680
-
681
-
682
-
683
-
684
-
685
-
686
-
687
-
688
-
689
-
690
-
691
-
692
- # Update the process_and_display function
693
- def process_and_display(audio_input, generate_audio, chunk_duration):
694
- # Validate chunk duration
695
- if chunk_duration is None or chunk_duration <= 0:
696
- chunk_duration = 10
697
-
698
- # Show loading indicator
699
- yield [gr.HTML(f"""
700
- <div style="text-align: center; margin: 20px;">
701
- <p style="font-size: 18px; color: #4a4a4a;">Processing audio in {chunk_duration}-second chunks...</p>
702
- <div style="border: 4px solid #f3f3f3; border-top: 4px solid #3498db; border-radius: 50%; width: 30px; height: 30px; animation: spin 2s linear infinite; margin: 0 auto;"></div>
703
- <style>@keyframes spin {{ 0% {{ transform: rotate(0deg); }} 100% {{ transform: rotate(360deg); }} }}</style>
704
- <p style="font-size: 14px; color: #4a4a4a;">This may take several minutes depending on the audio length...</p>
705
- </div>
706
- """)] + [gr.Group(visible=False)] * len(group_components) + [None] * (len(output_containers) * 6) + [None, ""]
707
-
708
- results = get_predictions(audio_input, generate_audio, chunk_duration)
709
-
710
- # Initialize outputs list
711
- outputs = []
712
- group_visibility = []
713
- all_360_images = [] # Collect all 360 images for the viewer
714
- all_music_paths = [] # Collect all music paths for the viewer
715
-
716
- # Process each result
717
- for i, result in enumerate(results):
718
- if i < len(output_containers):
719
- group_visibility.append(gr.Group(visible=True))
720
- outputs.extend([
721
- result['emotion'],
722
- result['transcription'],
723
- result['sentiment'],
724
- result['image'],
725
- result['image_360'],
726
- result['music']
727
- ])
728
- # Collect the 360-processed images and music
729
- if result['image_360']:
730
- all_360_images.append(result['image_360']) # Use the 360-processed image
731
- all_music_paths.append(result['music']) # Can be None if no music generated
732
- else:
733
- # If we have more results than containers, just extend with None
734
- group_visibility.append(gr.Group(visible=False))
735
- outputs.extend([None] * 6)
736
-
737
- # Hide remaining containers
738
- for i in range(len(results), len(output_containers)):
739
- group_visibility.append(gr.Group(visible=False))
740
- outputs.extend([None] * 6)
741
-
742
- # Create 360 viewer HTML if we have 360 images
743
- viewer_html_path = None
744
- if all_360_images:
745
- with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as tmp_file:
746
- viewer_html_path = create_360_viewer_html(all_360_images, all_music_paths, tmp_file.name)
747
-
748
- # After processing, return the results along with other outputs
749
- yield [gr.HTML("")] + group_visibility + outputs + [viewer_html_path, js_output, results]
750
-
751
- # Update the clear_all function to handle the new outputs
752
- def clear_all():
753
- # Create a list with None for all outputs
754
- outputs = [None] # For audio input
755
-
756
- # For group components (set to invisible)
757
- outputs.extend([gr.Group(visible=False)] * len(group_components))
758
-
759
- # For all output containers (set to None)
760
- outputs.extend([None] * (len(output_containers) * 6))
761
-
762
- # For loading indicator (empty HTML)
763
- outputs.append(gr.HTML(""))
764
-
765
- # For chunk duration (reset to 10)
766
- outputs.append(10)
767
-
768
- # For example selector (reset to None)
769
- outputs.append(None)
770
-
771
- # For viewer (set to None)
772
- outputs.append(None)
773
-
774
- # For JavaScript output (empty)
775
- outputs.append("")
776
-
777
- return outputs
778
-
779
- # Function to load example audio (placeholder - you need to implement this)
780
- def load_example_audio(example_name):
781
- # This is a placeholder - you need to implement this function
782
- # Return the path to the example audio file based on the example_name
783
- return None
784
-
785
- # Function to generate a shareable link
786
-
787
- def generate_share_link(audio_input=None, generate_audio=True, chunk_duration=10):
788
- try:
789
- # Check if we're on Hugging Face Spaces
790
- space_id = os.getenv('SPACE_ID')
791
-
792
- if space_id:
793
- space_url = f"https://huggingface.co/spaces/{space_id}"
794
- return f"Your Space is already public! Share this URL: {space_url}\n\nTo share specific results, ask others to process the same audio with the same settings."
795
- else:
796
- if hasattr(interface, 'share_url') and interface.share_url:
797
- return "Share this URL to let others use the app: " + interface.share_url + "\n\nTo share specific results, ask others to process the same audio with the same settings."
798
- else:
799
- return "Share link is not available. Make sure to set share=True when launching."
800
-
801
- except Exception as e:
802
- return f"Error generating share link: {str(e)}"
803
-
804
-
805
-
806
-
807
-
808
 
809
  # Create the Gradio interface with proper output handling
810
  with gr.Blocks(title="Affective Virtual Environments - Chunked Processing") as interface:
@@ -845,9 +372,6 @@ with gr.Blocks(title="Affective Virtual Environments - Chunked Processing") as i
845
  with gr.Row():
846
  process_btn = gr.Button("Process Audio", variant="primary")
847
  clear_btn = gr.Button("Clear All", variant="secondary")
848
- # Add share button
849
- share_btn = gr.Button("Generate Share Link", variant="secondary")
850
- share_output = gr.Textbox(label="Share Link", interactive=False)
851
 
852
  # Add a loading indicator
853
  loading_indicator = gr.HTML("""
@@ -887,11 +411,12 @@ with gr.Blocks(title="Affective Virtual Environments - Chunked Processing") as i
887
  'music': audio_output
888
  })
889
 
890
- # Add component for 360 viewer
891
  viewer_html_output = gr.File(
892
- label="Download 360 Viewer",
893
  type="filepath",
894
- interactive=False
 
895
  )
896
 
897
  # Add a hidden HTML component for JavaScript execution
@@ -909,9 +434,6 @@ with gr.Blocks(title="Affective Virtual Environments - Chunked Processing") as i
909
  return example_path, example_name
910
 
911
  # Set up the button clicks
912
-
913
- # Update the process_btn click handler to include results in the output
914
- # Remove the results_state component and simplify the process_btn click handler
915
  process_btn.click(
916
  fn=process_and_display,
917
  inputs=[audio_input, generate_audio_checkbox, chunk_duration_input],
@@ -924,10 +446,6 @@ with gr.Blocks(title="Affective Virtual Environments - Chunked Processing") as i
924
  container['music']
925
  ]] + [viewer_html_output, js_output]
926
  )
927
-
928
-
929
-
930
- # Remove the results_state component
931
 
932
  clear_btn.click(
933
  fn=clear_all,
@@ -947,23 +465,6 @@ with gr.Blocks(title="Affective Virtual Environments - Chunked Processing") as i
947
  inputs=[example_selector],
948
  outputs=[audio_input, example_selector]
949
  )
950
-
951
- # Update the share button to not expect results
952
- share_btn.click(
953
- fn=generate_share_link,
954
- inputs=[audio_input, generate_audio_checkbox, chunk_duration_input],
955
- outputs=[share_output]
956
- )
957
 
958
-
959
- # Check if we're running on Hugging Face Spaces
960
- is_spaces = os.getenv('SPACE_ID') is not None
961
-
962
- # Launch with appropriate settings
963
- if is_spaces:
964
- # On Spaces, don't use share=True as it's not supported
965
- interface.launch()
966
- else:
967
- # Running locally, use share=True to generate a public link
968
- interface.launch(share=True)
969
-
 
30
  from io import BytesIO
31
  import struct
32
  import cv2
33
+ import shutil
34
+ from datetime import datetime
35
 
36
+ # [Keep all your existing code until the process_and_display function]
 
 
 
 
 
 
 
 
37
 
38
+ # Update the process_and_display function to create a named HTML file
39
+ def process_and_display(audio_input, generate_audio, chunk_duration):
40
+ # Validate chunk duration
41
+ if chunk_duration is None or chunk_duration <= 0:
42
+ chunk_duration = 10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
+ # Show loading indicator
45
+ yield [gr.HTML(f"""
46
+ <div style="text-align: center; margin: 20px;">
47
+ <p style="font-size: 18px; color: #4a4a4a;">Processing audio in {chunk_duration}-second chunks...</p>
48
+ <div style="border: 4px solid #f3f3f3; border-top: 4px solid #3498db; border-radius: 50%; width: 30px; height: 30px; animation: spin 2s linear infinite; margin: 0 auto;"></div>
49
+ <style>@keyframes spin {{ 0% {{ transform: rotate(0deg); }} 100% {{ transform: rotate(360deg); }} }}</style>
50
+ <p style="font-size: 14px; color: #4a4a4a;">This may take several minutes depending on the audio length...</p>
51
+ </div>
52
+ """)] + [gr.Group(visible=False)] * len(group_components) + [None] * (len(output_containers) * 6) + [None, ""]
53
 
54
+ results = get_predictions(audio_input, generate_audio, chunk_duration)
 
 
 
 
 
55
 
56
+ # Initialize outputs list
57
+ outputs = []
58
+ group_visibility = []
59
+ all_360_images = [] # Collect all 360 images for the viewer
60
+ all_music_paths = [] # Collect all music paths for the viewer
 
 
 
 
 
61
 
62
+ # Process each result
63
+ for i, result in enumerate(results):
64
+ if i < len(output_containers):
65
+ group_visibility.append(gr.Group(visible=True))
66
+ outputs.extend([
67
+ result['emotion'],
68
+ result['transcription'],
69
+ result['sentiment'],
70
+ result['image'],
71
+ result['image_360'],
72
+ result['music']
73
+ ])
74
+ # Collect the 360-processed images and music
75
+ if result['image_360']:
76
+ all_360_images.append(result['image_360']) # Use the 360-processed image
77
+ all_music_paths.append(result['music']) # Can be None if no music generated
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  else:
79
+ # If we have more results than containers, just extend with None
80
+ group_visibility.append(gr.Group(visible=False))
81
+ outputs.extend([None] * 6)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
+ # Hide remaining containers
84
+ for i in range(len(results), len(output_containers)):
85
+ group_visibility.append(gr.Group(visible=False))
86
+ outputs.extend([None] * 6)
 
 
 
 
87
 
88
+ # Create 360 viewer HTML if we have 360 images
89
+ viewer_html_path = None
90
+ if all_360_images:
91
+ # Create a timestamp for unique filenames
92
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
93
+ html_filename = f"MyAVE_{timestamp}.html"
 
 
 
 
 
94
 
95
+ # Create a temporary directory for our output
96
+ output_dir = tempfile.mkdtemp()
97
+ viewer_html_path = os.path.join(output_dir, html_filename)
98
+
99
+ # Create the HTML file
100
+ create_360_viewer_html(all_360_images, all_music_paths, viewer_html_path)
101
+
102
+ # After processing, return the results along with other outputs
103
+ yield [gr.HTML("")] + group_visibility + outputs + [viewer_html_path, js_output, results]
 
 
 
 
 
 
 
104
 
105
+ # Update the create_360_viewer_html function to include a download button in the HTML itself
106
  def create_360_viewer_html(image_paths, audio_paths, output_path):
107
  """Create an HTML file with a 360 viewer and audio player for the given images and audio."""
108
  # Create a list of image data URIs
 
122
  else:
123
  audio_data_list.append(None) # Placeholder for chunks without audio
124
 
125
+ # Create the HTML content with a styled download button
126
  html_content = f"""
127
  <!DOCTYPE html>
128
  <html lang="en">
129
  <head>
130
  <meta charset="UTF-8">
131
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
132
+ <title>My AVE - 360 Panorama Viewer with Audio</title>
133
  <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/pannellum@2.5.6/build/pannellum.css"/>
134
  <style>
135
  body {{
 
207
  border-radius: 3px;
208
  border: 1px solid #ccc;
209
  }}
210
+ .download-btn {{
211
+ background: linear-gradient(to bottom, #4CAF50, #45a049);
212
+ color: white;
213
+ border: none;
214
+ padding: 12px 24px;
215
+ text-align: center;
216
+ text-decoration: none;
217
+ display: inline-block;
218
+ font-size: 16px;
219
+ margin: 10px 2px;
220
+ cursor: pointer;
221
+ border-radius: 25px;
222
+ box-shadow: 0 4px 8px rgba(0,0,0,0.2);
223
+ transition: all 0.3s ease;
224
+ }}
225
+ .download-btn:hover {{
226
+ background: linear-gradient(to bottom, #45a049, #4CAF50);
227
+ box-shadow: 0 6px 12px rgba(0,0,0,0.3);
228
+ transform: translateY(-2px);
229
+ }}
230
+ .header {{
231
+ display: flex;
232
+ justify-content: space-between;
233
+ align-items: center;
234
+ padding: 10px 20px;
235
+ background: rgba(0, 0, 0, 0.8);
236
+ color: white;
237
+ }}
238
+ .title {{
239
+ font-size: 24px;
240
+ font-weight: bold;
241
+ }}
242
  </style>
243
  </head>
244
  <body>
245
+ <div class="header">
246
+ <div class="title">My Affective Virtual Environment</div>
247
+ <button class="download-btn" onclick="downloadHTML()">Download This AVE</button>
248
+ </div>
249
+
250
  <div id="controls">
251
  <select id="image-selector">
252
  {"".join([f'<option value="{i}">Chunk {i+1}</option>' for i in range(len(image_data_list))])}
 
302
  }}
303
  }}
304
 
305
+ function downloadHTML() {{
306
+ // Create a download link for the current HTML file
307
+ const a = document.createElement('a');
308
+ a.href = window.location.href;
309
+ a.download = 'MyAVE.html';
310
+ document.body.appendChild(a);
311
+ a.click();
312
+ document.body.removeChild(a);
313
+ }}
314
+
315
  // Load the first image initially
316
  loadPanorama(0);
317
 
 
331
 
332
  return output_path
333
 
334
+ # [Keep the rest of your code but remove the share button and related functions]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
335
 
336
  # Create the Gradio interface with proper output handling
337
  with gr.Blocks(title="Affective Virtual Environments - Chunked Processing") as interface:
 
372
  with gr.Row():
373
  process_btn = gr.Button("Process Audio", variant="primary")
374
  clear_btn = gr.Button("Clear All", variant="secondary")
 
 
 
375
 
376
  # Add a loading indicator
377
  loading_indicator = gr.HTML("""
 
411
  'music': audio_output
412
  })
413
 
414
+ # Add component for 360 viewer with a fixed name
415
  viewer_html_output = gr.File(
416
+ label="Download Complete AVE Experience (HTML)",
417
  type="filepath",
418
+ interactive=False,
419
+ file_count="single"
420
  )
421
 
422
  # Add a hidden HTML component for JavaScript execution
 
434
  return example_path, example_name
435
 
436
  # Set up the button clicks
 
 
 
437
  process_btn.click(
438
  fn=process_and_display,
439
  inputs=[audio_input, generate_audio_checkbox, chunk_duration_input],
 
446
  container['music']
447
  ]] + [viewer_html_output, js_output]
448
  )
 
 
 
 
449
 
450
  clear_btn.click(
451
  fn=clear_all,
 
465
  inputs=[example_selector],
466
  outputs=[audio_input, example_selector]
467
  )
 
 
 
 
 
 
 
468
 
469
+ # Launch the interface
470
+ interface.launch()