jfforero commited on
Commit
fd5a39d
·
verified ·
1 Parent(s): 145a57f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +242 -11
app.py CHANGED
@@ -287,6 +287,9 @@ def process_chunk(chunk_path, chunk_idx, total_chunks, generate_audio=True):
287
  # Generate image using SENTIMENT analysis with specific prompt
288
  image = generate_image(sentiment, transcribed_text, chunk_idx, total_chunks)
289
 
 
 
 
290
  # Generate music only if audio generation is enabled
291
  music_path = None
292
  if generate_audio:
@@ -297,7 +300,8 @@ def process_chunk(chunk_path, chunk_idx, total_chunks, generate_audio=True):
297
  'emotion': emotion_prediction,
298
  'transcription': transcribed_text,
299
  'sentiment': sentiment,
300
- 'image': image,
 
301
  'music': music_path
302
  }
303
  except Exception as e:
@@ -309,6 +313,7 @@ def process_chunk(chunk_path, chunk_idx, total_chunks, generate_audio=True):
309
  'transcription': "Transcription failed",
310
  'sentiment': "Sentiment: error",
311
  'image': Image.new('RGB', (1024, 512), color='white'),
 
312
  'music': None
313
  }
314
 
@@ -335,8 +340,192 @@ def get_predictions(audio_input, generate_audio=True, chunk_duration=10):
335
 
336
  return results
337
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
338
 
339
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
340
 
341
  # Replace the create_fade_transition function with this updated version
342
  def create_fade_transition(images, fade_duration=1.0, fps=24):
@@ -419,7 +608,7 @@ def process_and_display(audio_input, generate_audio, chunk_duration):
419
  <style>@keyframes spin {{ 0% {{ transform: rotate(0deg); }} 100% {{ transform: rotate(360deg); }} }}</style>
420
  <p style="font-size: 14px; color: #4a4a4a;">This may take several minutes depending on the audio length...</p>
421
  </div>
422
- """)] + [gr.Group(visible=False)] * len(group_components) + [None] * (len(output_containers) * 5) + [None, None]
423
 
424
  results = get_predictions(audio_input, generate_audio, chunk_duration)
425
 
@@ -427,6 +616,7 @@ def process_and_display(audio_input, generate_audio, chunk_duration):
427
  outputs = []
428
  group_visibility = []
429
  all_images = [] # Collect all generated images for the fade animation
 
430
 
431
  # Process each result
432
  for i, result in enumerate(results):
@@ -437,19 +627,22 @@ def process_and_display(audio_input, generate_audio, chunk_duration):
437
  result['transcription'],
438
  result['sentiment'],
439
  result['image'],
 
440
  result['music']
441
  ])
442
- # Collect the image for the fade animation
443
  all_images.append(result['image'])
 
 
444
  else:
445
  # If we have more results than containers, just extend with None
446
  group_visibility.append(gr.Group(visible=False))
447
- outputs.extend([None] * 5)
448
 
449
  # Hide remaining containers
450
  for i in range(len(results), len(output_containers)):
451
  group_visibility.append(gr.Group(visible=False))
452
- outputs.extend([None] * 5)
453
 
454
  # Create fade animation if we have multiple images
455
  fade_preview = None
@@ -458,8 +651,14 @@ def process_and_display(audio_input, generate_audio, chunk_duration):
458
  # Create a fade animation (GIF)
459
  fade_preview, fade_animation_path = create_fade_transition(all_images, fade_duration=1.5, fps=15)
460
 
 
 
 
 
 
 
461
  # Hide loading indicator and show results
462
- yield [gr.HTML("")] + group_visibility + outputs + [fade_preview, fade_animation_path]
463
 
464
  # Update the clear_all function to handle the new outputs
465
  def clear_all():
@@ -470,7 +669,7 @@ def clear_all():
470
  outputs.extend([gr.Group(visible=False)] * len(group_components))
471
 
472
  # For all output containers (set to None)
473
- outputs.extend([None] * (len(output_containers) * 5))
474
 
475
  # For loading indicator (empty HTML)
476
  outputs.append(gr.HTML(""))
@@ -481,9 +680,10 @@ def clear_all():
481
  # For example selector (reset to None)
482
  outputs.append(None)
483
 
484
- # For fade preview and animation (set to None)
485
  outputs.append(None)
486
  outputs.append(None)
 
487
 
488
  return outputs
489
 
@@ -574,7 +774,36 @@ with gr.Blocks(title="Affective Virtual Environments - Chunked Processing") as i
574
  type="binary",
575
  interactive=False
576
  )
577
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
578
 
579
 
580
  # Function to handle example selection
@@ -624,8 +853,9 @@ with gr.Blocks(title="Affective Virtual Environments - Chunked Processing") as i
624
  container['transcription'],
625
  container['sentiment'],
626
  container['image'],
 
627
  container['music']
628
- ]] + [fade_preview_output, fade_animation_output]
629
  )
630
 
631
  # Set up the clear button
@@ -637,8 +867,9 @@ with gr.Blocks(title="Affective Virtual Environments - Chunked Processing") as i
637
  container['transcription'],
638
  container['sentiment'],
639
  container['image'],
 
640
  container['music']
641
- ]] + [loading_indicator] + [chunk_duration_input] + [example_selector] + [fade_preview_output, fade_animation_output]
642
  )
643
 
644
  # Set up the example loading button
 
287
  # Generate image using SENTIMENT analysis with specific prompt
288
  image = generate_image(sentiment, transcribed_text, chunk_idx, total_chunks)
289
 
290
+ # Add 360 metadata to the image
291
+ image_with_360_path = add_360_metadata(image)
292
+
293
  # Generate music only if audio generation is enabled
294
  music_path = None
295
  if generate_audio:
 
300
  'emotion': emotion_prediction,
301
  'transcription': transcribed_text,
302
  'sentiment': sentiment,
303
+ 'image': image, # Original image for display in Gradio
304
+ 'image_360': image_with_360_path, # Image with 360 metadata
305
  'music': music_path
306
  }
307
  except Exception as e:
 
313
  'transcription': "Transcription failed",
314
  'sentiment': "Sentiment: error",
315
  'image': Image.new('RGB', (1024, 512), color='white'),
316
+ 'image_360': None,
317
  'music': None
318
  }
319
 
 
340
 
341
  return results
342
 
343
+ #
344
+ def create_xmp_block(width, height):
345
+ """Create XMP metadata block following ExifTool's exact format."""
346
+ xmp = (
347
+ f'<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>\n'
348
+ f'<x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="ExifTool">\n'
349
+ f'<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">\n'
350
+ f'<rdf:Description rdf:about=""\n'
351
+ f'xmlns:GPano="http://ns.google.com/photos/1.0/panorama/"\n'
352
+ f'GPano:ProjectionType="equirectangular"\n'
353
+ f'GPano:UsePanoramaViewer="True"\n'
354
+ f'GPano:FullPanoWidthPixels="{width}"\n'
355
+ f'GPano:FullPanoHeightPixels="{height}"\n'
356
+ f'GPano:CroppedAreaImageWidthPixels="{width}"\n'
357
+ f'GPano:CroppedAreaImageHeightPixels="{height}"\n'
358
+ f'GPano:CroppedAreaLeftPixels="0"\n'
359
+ f'GPano:CroppedAreaTopPixels="0"/>\n'
360
+ f'</rdf:RDF>\n'
361
+ f'</x:xmpmeta>\n'
362
+ f'<?xpacket end="w"?>'
363
+ )
364
+ return xmp
365
+
366
+ def write_xmp_to_jpg(input_path, output_path, width, height):
367
+ """Write XMP metadata to JPEG file following ExifTool's method."""
368
+ # Read the original JPEG
369
+ with open(input_path, 'rb') as f:
370
+ data = f.read()
371
+
372
+ # Find the start of image marker
373
+ if data[0:2] != b'\xFF\xD8':
374
+ raise ValueError("Not a valid JPEG file")
375
+
376
+ # Create XMP data
377
+ xmp_data = create_xmp_block(width, height)
378
+
379
+ # Create APP1 segment for XMP
380
+ app1_marker = b'\xFF\xE1'
381
+ xmp_header = b'http://ns.adobe.com/xap/1.0/\x00'
382
+ xmp_bytes = xmp_data.encode('utf-8')
383
+ length = len(xmp_header) + len(xmp_bytes) + 2 # +2 for length bytes
384
+ length_bytes = struct.pack('>H', length)
385
+
386
+ # Construct new file content
387
+ output = bytearray()
388
+ output.extend(data[0:2]) # SOI marker
389
+ output.extend(app1_marker)
390
+ output.extend(length_bytes)
391
+ output.extend(xmp_header)
392
+ output.extend(xmp_bytes)
393
+ output.extend(data[2:]) # Rest of the original file
394
+
395
+ # Write the new file
396
+ with open(output_path, 'wb') as f:
397
+ f.write(output)
398
+
399
+ def add_360_metadata(img):
400
+ """Add 360 photo metadata to a PIL Image and return the path to the processed image."""
401
+ try:
402
+ # Verify the image
403
+ if img.width != 2 * img.height:
404
+ # Resize to 2:1 aspect ratio if needed
405
+ new_width = 2 * img.height
406
+ img = img.resize((new_width, img.height), Image.Resampling.LANCZOS)
407
+
408
+ # Create a temporary file
409
+ with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp_file:
410
+ # First save as high-quality JPEG
411
+ img.save(tmp_file.name, "JPEG", quality=95)
412
+
413
+ # Then inject XMP metadata directly into JPEG file
414
+ write_xmp_to_jpg(tmp_file.name, tmp_file.name, img.width, img.height)
415
+
416
+ return tmp_file.name
417
+
418
+ except Exception as e:
419
+ print(f"Error adding 360 metadata: {str(e)}")
420
+ # Fallback: return the original image path
421
+ with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp_file:
422
+ img.save(tmp_file.name, "JPEG", quality=95)
423
+ return tmp_file.name
424
+
425
 
426
 
427
+
428
+
429
+
430
+ def create_360_viewer_html(image_paths, output_path):
431
+ """Create an HTML file with a 360 viewer for the given images."""
432
+ # Create a list of image data URIs
433
+ image_data_list = []
434
+ for img_path in image_paths:
435
+ with open(img_path, "rb") as f:
436
+ img_data = base64.b64encode(f.read()).decode("utf-8")
437
+ image_data_list.append(f"data:image/jpeg;base64,{img_data}")
438
+
439
+ # Create the HTML content
440
+ html_content = f"""
441
+ <!DOCTYPE html>
442
+ <html lang="en">
443
+ <head>
444
+ <meta charset="UTF-8">
445
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
446
+ <title>360 Panorama Viewer</title>
447
+ <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/pannellum@2.5.6/build/pannellum.css"/>
448
+ <style>
449
+ body {{
450
+ margin: 0;
451
+ overflow: hidden;
452
+ }}
453
+ #panorama {{
454
+ width: 100vw;
455
+ height: 100vh;
456
+ }}
457
+ .pnlm-hotspot.pnlm-info-hotspot {{
458
+ background-color: rgba(0, 150, 255, 0.8);
459
+ border-radius: 50%;
460
+ width: 30px;
461
+ height: 30px;
462
+ }}
463
+ .pnlm-hotspot.pnlm-info-hotspot .pnlm-sprite {{
464
+ filter: brightness(0) invert(1);
465
+ }}
466
+ .pnlm-tooltip {{
467
+ background-color: rgba(0, 0, 0, 0.7);
468
+ color: white;
469
+ border-radius: 3px;
470
+ padding: 5px 10px;
471
+ }}
472
+ #image-selector {{
473
+ position: absolute;
474
+ top: 10px;
475
+ left: 10px;
476
+ z-index: 1000;
477
+ background: rgba(0, 0, 0, 0.7);
478
+ color: white;
479
+ padding: 5px 10px;
480
+ border-radius: 3px;
481
+ }}
482
+ </style>
483
+ </head>
484
+ <body>
485
+ <select id="image-selector">
486
+ {"".join([f'<option value="{i}">Chunk {i+1}</option>' for i in range(len(image_data_list))])}
487
+ </select>
488
+ <div id="panorama"></div>
489
+
490
+ <script type="text/javascript" src="https://cdn.jsdelivr.net/npm/pannellum@2.5.6/build/pannellum.js"></script>
491
+ <script>
492
+ const images = {json.dumps(image_data_list)};
493
+ let currentViewer = null;
494
+
495
+ function loadPanorama(index) {{
496
+ if (currentViewer) {{
497
+ currentViewer.destroy();
498
+ }}
499
+
500
+ currentViewer = pannellum.viewer('panorama', {{
501
+ "type": "equirectangular",
502
+ "panorama": images[index],
503
+ "autoLoad": true,
504
+ "autoRotate": -2,
505
+ "showZoomCtrl": true,
506
+ "showFullscreenCtrl": true,
507
+ "hfov": 100
508
+ }});
509
+ }}
510
+
511
+ // Load the first image initially
512
+ loadPanorama(0);
513
+
514
+ // Handle image selection changes
515
+ document.getElementById('image-selector').addEventListener('change', function(e) {{
516
+ loadPanorama(parseInt(e.target.value));
517
+ }});
518
+ </script>
519
+ </body>
520
+ </html>
521
+ """
522
+
523
+ # Write the HTML to a file
524
+ with open(output_path, 'w') as f:
525
+ f.write(html_content)
526
+
527
+ return output_path
528
+
529
 
530
  # Replace the create_fade_transition function with this updated version
531
  def create_fade_transition(images, fade_duration=1.0, fps=24):
 
608
  <style>@keyframes spin {{ 0% {{ transform: rotate(0deg); }} 100% {{ transform: rotate(360deg); }} }}</style>
609
  <p style="font-size: 14px; color: #4a4a4a;">This may take several minutes depending on the audio length...</p>
610
  </div>
611
+ """)] + [gr.Group(visible=False)] * len(group_components) + [None] * (len(output_containers) * 6) + [None, None, None]
612
 
613
  results = get_predictions(audio_input, generate_audio, chunk_duration)
614
 
 
616
  outputs = []
617
  group_visibility = []
618
  all_images = [] # Collect all generated images for the fade animation
619
+ all_360_images = [] # Collect all 360 images for the viewer
620
 
621
  # Process each result
622
  for i, result in enumerate(results):
 
627
  result['transcription'],
628
  result['sentiment'],
629
  result['image'],
630
+ result['image_360'],
631
  result['music']
632
  ])
633
+ # Collect the images
634
  all_images.append(result['image'])
635
+ if result['image_360']:
636
+ all_360_images.append(result['image_360'])
637
  else:
638
  # If we have more results than containers, just extend with None
639
  group_visibility.append(gr.Group(visible=False))
640
+ outputs.extend([None] * 6)
641
 
642
  # Hide remaining containers
643
  for i in range(len(results), len(output_containers)):
644
  group_visibility.append(gr.Group(visible=False))
645
+ outputs.extend([None] * 6)
646
 
647
  # Create fade animation if we have multiple images
648
  fade_preview = None
 
651
  # Create a fade animation (GIF)
652
  fade_preview, fade_animation_path = create_fade_transition(all_images, fade_duration=1.5, fps=15)
653
 
654
+ # Create 360 viewer HTML if we have 360 images
655
+ viewer_html_path = None
656
+ if all_360_images:
657
+ with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as tmp_file:
658
+ viewer_html_path = create_360_viewer_html(all_360_images, tmp_file.name)
659
+
660
  # Hide loading indicator and show results
661
+ yield [gr.HTML("")] + group_visibility + outputs + [fade_preview, fade_animation_path, viewer_html_path]
662
 
663
  # Update the clear_all function to handle the new outputs
664
  def clear_all():
 
669
  outputs.extend([gr.Group(visible=False)] * len(group_components))
670
 
671
  # For all output containers (set to None)
672
+ outputs.extend([None] * (len(output_containers) * 6)) # Changed from 5 to 6
673
 
674
  # For loading indicator (empty HTML)
675
  outputs.append(gr.HTML(""))
 
680
  # For example selector (reset to None)
681
  outputs.append(None)
682
 
683
+ # For fade preview, animation, and viewer (set to None)
684
  outputs.append(None)
685
  outputs.append(None)
686
+ outputs.append(None) # New output for viewer
687
 
688
  return outputs
689
 
 
774
  type="binary",
775
  interactive=False
776
  )
777
+ # In your output_containers, add the image_360 output
778
+ with gr.Group(visible=False) as chunk_group:
779
+ gr.Markdown(f"### Chunk {i+1} Results")
780
+ with gr.Row():
781
+ emotion_output = gr.Label(label="Acoustic Emotion Prediction")
782
+ transcription_output = gr.Label(label="Transcribed Text")
783
+ sentiment_output = gr.Label(label="Sentiment Analysis")
784
+ with gr.Row():
785
+ image_output = gr.Image(label="Generated Equirectangular Image")
786
+ image_360_output = gr.File(label="Download 360 Image", type="filepath") # New component
787
+ with gr.Row():
788
+ audio_output = gr.Audio(label="Generated Music")
789
+ gr.HTML("<hr style='margin: 20px 0; border: 1px solid #ccc;'>")
790
+
791
+ # Add the 360 viewer component
792
+ with gr.Row():
793
+ fade_preview_output = gr.Image(
794
+ label="Fade Animation Preview",
795
+ interactive=False
796
+ )
797
+ fade_animation_output = gr.File(
798
+ label="Download Fade Animation",
799
+ type="binary",
800
+ interactive=False
801
+ )
802
+ viewer_html_output = gr.File( # New component
803
+ label="Download 360 Viewer",
804
+ type="filepath",
805
+ interactive=False
806
+ )
807
 
808
 
809
  # Function to handle example selection
 
853
  container['transcription'],
854
  container['sentiment'],
855
  container['image'],
856
+ container['image_360'], # New output
857
  container['music']
858
+ ]] + [fade_preview_output, fade_animation_output, viewer_html_output] # Added viewer_html_output
859
  )
860
 
861
  # Set up the clear button
 
867
  container['transcription'],
868
  container['sentiment'],
869
  container['image'],
870
+ container['image_360'], # New output
871
  container['music']
872
+ ]] + [loading_indicator] + [chunk_duration_input] + [example_selector] + [fade_preview_output, fade_animation_output, viewer_html_output] # Added viewer_html_output
873
  )
874
 
875
  # Set up the example loading button