Darknsu commited on
Commit
ed387b2
·
verified ·
1 Parent(s): 403bc03

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +179 -111
main.py CHANGED
@@ -371,6 +371,11 @@
371
 
372
 
373
 
 
 
 
 
 
374
  import os
375
  import json
376
  import torch
@@ -386,6 +391,7 @@ from typing import List, Dict, Optional
386
  from huggingface_hub import hf_hub_download, list_repo_files
387
  import tempfile
388
  import shutil
 
389
 
390
  # Configuration
391
  VIS_CONFIG = {
@@ -473,6 +479,7 @@ class HFVideoDataSet(VideoDataSet):
473
 
474
  # Create temporary directory for this session
475
  self.temp_dir = tempfile.mkdtemp(prefix="hf_video_")
 
476
 
477
  # Download the specific video file if video_name is provided
478
  if video_name:
@@ -483,24 +490,39 @@ class HFVideoDataSet(VideoDataSet):
483
  # Ensure the temp directory exists
484
  os.makedirs(self.temp_dir, exist_ok=True)
485
 
486
- # Copy to temp directory with expected structure
487
  temp_file_path = os.path.join(self.temp_dir, f"{video_name}.npz")
 
488
  shutil.copy2(downloaded_path, temp_file_path)
489
- print(f"Video file ready: {temp_file_path}")
490
 
491
- # Verify file exists
492
  if not os.path.exists(temp_file_path):
493
  raise Exception(f"Failed to copy file to {temp_file_path}")
 
 
 
494
 
495
  except Exception as e:
496
  print(f"Error downloading video {video_name}: {str(e)}")
 
 
 
497
  raise e
498
 
499
  # Set the feature path to our temp directory
500
  opt['video_feature_all_test'] = self.temp_dir
 
501
 
502
  # Initialize parent class
503
- super().__init__(opt, subset, video_name)
 
 
 
 
 
 
 
 
504
 
505
  def __del__(self):
506
  # Clean up temporary directory
@@ -513,111 +535,121 @@ class HFVideoDataSet(VideoDataSet):
513
 
514
  def eval_frame(opt, model, dataset):
515
  """Evaluate model frame by frame"""
516
- test_loader = torch.utils.data.DataLoader(
517
- dataset,
518
- batch_size=opt['batch_size'],
519
- shuffle=False,
520
- num_workers=0,
521
- pin_memory=False
522
- )
523
-
524
- labels_cls = {video_name: [] for video_name in dataset.video_list}
525
- labels_reg = {video_name: [] for video_name in dataset.video_list}
526
- output_cls = {video_name: [] for video_name in dataset.video_list}
527
- output_reg = {video_name: [] for video_name in dataset.video_list}
528
-
529
- model.eval()
530
- with torch.no_grad():
531
- for n_iter, batch_data in enumerate(test_loader):
532
- try:
533
- if len(batch_data) == 4:
534
- input_data, cls_label, reg_label, _ = batch_data
535
- else:
536
- input_data, cls_label, reg_label = batch_data
537
-
538
- input_data = input_data.to(device)
539
- cls_label = cls_label.to(device) if cls_label is not None else None
540
- reg_label = reg_label.to(device) if reg_label is not None else None
541
-
542
- act_cls, act_reg, _ = model(input_data.float())
543
- act_cls = torch.softmax(act_cls, dim=-1)
544
-
545
- for b in range(input_data.size(0)):
546
- batch_idx = n_iter * opt['batch_size'] + b
547
- if batch_idx < len(dataset.inputs):
548
- video_name = dataset.inputs[batch_idx][0]
549
- output_cls[video_name].append(act_cls[b, :].detach().cpu().numpy())
550
- output_reg[video_name].append(act_reg[b, :].detach().cpu().numpy())
551
-
552
- if cls_label is not None:
553
- labels_cls[video_name].append(cls_label[b, :].cpu().numpy())
554
- if reg_label is not None:
555
- labels_reg[video_name].append(reg_label[b, :].cpu().numpy())
556
 
557
- except Exception as e:
558
- print(f"Error in batch {n_iter}: {str(e)}")
559
- continue
560
-
561
- # Stack arrays
562
- for video_name in dataset.video_list:
563
- if output_cls[video_name]:
564
- output_cls[video_name] = np.stack(output_cls[video_name], axis=0)
565
- output_reg[video_name] = np.stack(output_reg[video_name], axis=0)
566
- if labels_cls[video_name]:
567
- labels_cls[video_name] = np.stack(labels_cls[video_name], axis=0)
568
- if labels_reg[video_name]:
569
- labels_reg[video_name] = np.stack(labels_reg[video_name], axis=0)
570
-
571
- return output_cls, output_reg, labels_cls, labels_reg
 
 
 
 
 
 
 
 
 
572
 
573
  def eval_map_nms(opt, dataset, output_cls, output_reg):
574
  """Evaluate with Non-Maximum Suppression"""
575
- result_dict = {}
576
- anchors = opt['anchors']
577
-
578
- for video_name in dataset.video_list:
579
- if video_name not in output_cls or len(output_cls[video_name]) == 0:
580
- result_dict[video_name] = []
581
- continue
582
-
583
- duration = dataset.video_len[video_name]
584
- video_time = float(dataset.video_dict[video_name]["duration"])
585
- frame_to_time = 100.0 * video_time / duration
586
-
587
- proposal_dict = []
588
 
589
- for idx in range(min(duration, len(output_cls[video_name]))):
590
- cls_anc = output_cls[video_name][idx]
591
- reg_anc = output_reg[video_name][idx]
 
 
 
 
 
592
 
593
- for anc_idx in range(len(anchors)):
594
- if anc_idx >= len(cls_anc):
595
- continue
596
-
597
- cls = np.argwhere(cls_anc[anc_idx][:-1] > opt['threshold']).reshape(-1)
598
- if len(cls) == 0:
599
- continue
600
-
601
- ed = idx + anchors[anc_idx] * reg_anc[anc_idx][0]
602
- length = anchors[anc_idx] * np.exp(reg_anc[anc_idx][1])
603
- st = ed - length
604
 
605
- for cidx in range(len(cls)):
606
- label = cls[cidx]
607
- if label < len(dataset.label_name):
608
- tmp_dict = {
609
- "segment": [float(st * frame_to_time / 100.0), float(ed * frame_to_time / 100.0)],
610
- "score": float(cls_anc[anc_idx][label]),
611
- "label": dataset.label_name[label],
612
- "gentime": float(idx * frame_to_time / 100.0)
613
- }
614
- proposal_dict.append(tmp_dict)
615
-
616
- # Apply NMS
617
- proposal_dict = non_max_suppression(proposal_dict, overlapThresh=opt['soft_nms'])
618
- result_dict[video_name] = proposal_dict
619
-
620
- return result_dict
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
621
 
622
  def load_ground_truth(opt, video_name):
623
  """Load ground truth annotations if available"""
@@ -649,6 +681,8 @@ def load_ground_truth(opt, video_name):
649
 
650
  def process_video(video_name, split_number, progress=gr.Progress()):
651
  """Process a single video for action localization"""
 
 
652
  try:
653
  if not video_name or video_name in ["Error: Could not load videos from HF dataset", "Error loading videos"]:
654
  return "Error: Please select a valid video name"
@@ -694,14 +728,18 @@ def process_video(video_name, split_number, progress=gr.Progress()):
694
  model.load_state_dict(checkpoint)
695
 
696
  model.eval()
 
697
 
698
  progress(0.4, desc=f"Downloading video features for {video_name}...")
699
 
700
  # Create dataset with HF integration
701
  try:
702
  dataset = HFVideoDataSet(opt, subset='test', video_name=video_name)
 
703
  except Exception as e:
704
- return f"Error downloading or loading video '{video_name}': {str(e)}\n\nPlease check:\n1. Video name is correct\n2. File exists in HF dataset\n3. Network connection is stable"
 
 
705
 
706
  if len(dataset.video_list) == 0:
707
  return f"Error: No video found with name '{video_name}' in dataset after download"
@@ -709,11 +747,23 @@ def process_video(video_name, split_number, progress=gr.Progress()):
709
  progress(0.6, desc="Running inference...")
710
 
711
  # Run inference
712
- output_cls, output_reg, labels_cls, labels_reg = eval_frame(opt, model, dataset)
 
 
 
 
 
 
713
 
714
  progress(0.8, desc="Processing results...")
715
 
716
- result_dict = eval_map_nms(opt, dataset, output_cls, output_reg)
 
 
 
 
 
 
717
 
718
  # Load ground truth
719
  gt_segments, duration = load_ground_truth(opt, video_name)
@@ -796,21 +846,39 @@ def process_video(video_name, split_number, progress=gr.Progress()):
796
  output_text += f"F1-Score: {f1:.3f}\n"
797
 
798
  progress(1.0, desc="Complete!")
 
799
  return output_text
800
 
801
  except Exception as e:
802
- import traceback
803
  error_details = traceback.format_exc()
804
- return f"Error processing video: {str(e)}\n\nDetailed error:\n{error_details}\n\nPlease check:\n1. Model checkpoint exists\n2. Video exists in HF dataset\n3. All dependencies are installed"
 
 
 
 
 
 
 
 
 
805
 
806
  def refresh_video_list():
807
  """Refresh the list of available videos"""
808
- return gr.Dropdown(choices=get_available_videos_from_hf())
 
 
 
 
 
809
 
810
  # Initialize available videos
811
  print("Loading available videos from Hugging Face dataset...")
812
- available_videos = get_available_videos_from_hf()
813
- if not available_videos or available_videos == ["Error loading videos"]:
 
 
 
 
814
  available_videos = ["Error: Could not load videos from HF dataset"]
815
 
816
  print(f"Available videos: {len(available_videos)} videos found")
@@ -867,7 +935,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="🎬 Temporal Action Localization"
867
 
868
  # Event handlers
869
  refresh_btn.click(
870
- fn=lambda: gr.Dropdown(choices=get_available_videos_from_hf()),
871
  outputs=video_dropdown
872
  )
873
 
 
371
 
372
 
373
 
374
+
375
+
376
+
377
+
378
+
379
  import os
380
  import json
381
  import torch
 
391
  from huggingface_hub import hf_hub_download, list_repo_files
392
  import tempfile
393
  import shutil
394
+ import traceback
395
 
396
  # Configuration
397
  VIS_CONFIG = {
 
479
 
480
  # Create temporary directory for this session
481
  self.temp_dir = tempfile.mkdtemp(prefix="hf_video_")
482
+ print(f"Created temp directory: {self.temp_dir}")
483
 
484
  # Download the specific video file if video_name is provided
485
  if video_name:
 
490
  # Ensure the temp directory exists
491
  os.makedirs(self.temp_dir, exist_ok=True)
492
 
493
+ # Copy to temp directory with expected structure - FIX: Add proper path separator
494
  temp_file_path = os.path.join(self.temp_dir, f"{video_name}.npz")
495
+ print(f"Copying {downloaded_path} to {temp_file_path}")
496
  shutil.copy2(downloaded_path, temp_file_path)
 
497
 
498
+ # Verify file exists and print debug info
499
  if not os.path.exists(temp_file_path):
500
  raise Exception(f"Failed to copy file to {temp_file_path}")
501
+ else:
502
+ print(f"Video file ready: {temp_file_path}")
503
+ print(f"File size: {os.path.getsize(temp_file_path)} bytes")
504
 
505
  except Exception as e:
506
  print(f"Error downloading video {video_name}: {str(e)}")
507
+ # Clean up temp directory on error
508
+ if hasattr(self, 'temp_dir') and os.path.exists(self.temp_dir):
509
+ shutil.rmtree(self.temp_dir)
510
  raise e
511
 
512
  # Set the feature path to our temp directory
513
  opt['video_feature_all_test'] = self.temp_dir
514
+ print(f"Set video_feature_all_test to: {opt['video_feature_all_test']}")
515
 
516
  # Initialize parent class
517
+ try:
518
+ super().__init__(opt, subset, video_name)
519
+ print(f"Successfully initialized dataset with {len(self.video_list)} videos")
520
+ except Exception as e:
521
+ print(f"Error initializing parent VideoDataSet: {str(e)}")
522
+ # Clean up temp directory on error
523
+ if hasattr(self, 'temp_dir') and os.path.exists(self.temp_dir):
524
+ shutil.rmtree(self.temp_dir)
525
+ raise e
526
 
527
  def __del__(self):
528
  # Clean up temporary directory
 
535
 
536
  def eval_frame(opt, model, dataset):
537
  """Evaluate model frame by frame"""
538
+ try:
539
+ test_loader = torch.utils.data.DataLoader(
540
+ dataset,
541
+ batch_size=opt['batch_size'],
542
+ shuffle=False,
543
+ num_workers=0,
544
+ pin_memory=False
545
+ )
546
+
547
+ labels_cls = {video_name: [] for video_name in dataset.video_list}
548
+ labels_reg = {video_name: [] for video_name in dataset.video_list}
549
+ output_cls = {video_name: [] for video_name in dataset.video_list}
550
+ output_reg = {video_name: [] for video_name in dataset.video_list}
551
+
552
+ model.eval()
553
+ with torch.no_grad():
554
+ for n_iter, batch_data in enumerate(test_loader):
555
+ try:
556
+ if len(batch_data) == 4:
557
+ input_data, cls_label, reg_label, _ = batch_data
558
+ else:
559
+ input_data, cls_label, reg_label = batch_data
560
+
561
+ input_data = input_data.to(device)
562
+ cls_label = cls_label.to(device) if cls_label is not None else None
563
+ reg_label = reg_label.to(device) if reg_label is not None else None
564
+
565
+ act_cls, act_reg, _ = model(input_data.float())
566
+ act_cls = torch.softmax(act_cls, dim=-1)
567
+
568
+ for b in range(input_data.size(0)):
569
+ batch_idx = n_iter * opt['batch_size'] + b
570
+ if batch_idx < len(dataset.inputs):
571
+ video_name = dataset.inputs[batch_idx][0]
572
+ output_cls[video_name].append(act_cls[b, :].detach().cpu().numpy())
573
+ output_reg[video_name].append(act_reg[b, :].detach().cpu().numpy())
 
 
 
 
574
 
575
+ if cls_label is not None:
576
+ labels_cls[video_name].append(cls_label[b, :].cpu().numpy())
577
+ if reg_label is not None:
578
+ labels_reg[video_name].append(reg_label[b, :].cpu().numpy())
579
+
580
+ except Exception as e:
581
+ print(f"Error in batch {n_iter}: {str(e)}")
582
+ continue
583
+
584
+ # Stack arrays
585
+ for video_name in dataset.video_list:
586
+ if output_cls[video_name]:
587
+ output_cls[video_name] = np.stack(output_cls[video_name], axis=0)
588
+ output_reg[video_name] = np.stack(output_reg[video_name], axis=0)
589
+ if labels_cls[video_name]:
590
+ labels_cls[video_name] = np.stack(labels_cls[video_name], axis=0)
591
+ if labels_reg[video_name]:
592
+ labels_reg[video_name] = np.stack(labels_reg[video_name], axis=0)
593
+
594
+ return output_cls, output_reg, labels_cls, labels_reg
595
+
596
+ except Exception as e:
597
+ print(f"Error in eval_frame: {str(e)}")
598
+ raise e
599
 
600
  def eval_map_nms(opt, dataset, output_cls, output_reg):
601
  """Evaluate with Non-Maximum Suppression"""
602
+ try:
603
+ result_dict = {}
604
+ anchors = opt['anchors']
 
 
 
 
 
 
 
 
 
 
605
 
606
+ for video_name in dataset.video_list:
607
+ if video_name not in output_cls or len(output_cls[video_name]) == 0:
608
+ result_dict[video_name] = []
609
+ continue
610
+
611
+ duration = dataset.video_len[video_name]
612
+ video_time = float(dataset.video_dict[video_name]["duration"])
613
+ frame_to_time = 100.0 * video_time / duration
614
 
615
+ proposal_dict = []
616
+
617
+ for idx in range(min(duration, len(output_cls[video_name]))):
618
+ cls_anc = output_cls[video_name][idx]
619
+ reg_anc = output_reg[video_name][idx]
 
 
 
 
 
 
620
 
621
+ for anc_idx in range(len(anchors)):
622
+ if anc_idx >= len(cls_anc):
623
+ continue
624
+
625
+ cls = np.argwhere(cls_anc[anc_idx][:-1] > opt['threshold']).reshape(-1)
626
+ if len(cls) == 0:
627
+ continue
628
+
629
+ ed = idx + anchors[anc_idx] * reg_anc[anc_idx][0]
630
+ length = anchors[anc_idx] * np.exp(reg_anc[anc_idx][1])
631
+ st = ed - length
632
+
633
+ for cidx in range(len(cls)):
634
+ label = cls[cidx]
635
+ if label < len(dataset.label_name):
636
+ tmp_dict = {
637
+ "segment": [float(st * frame_to_time / 100.0), float(ed * frame_to_time / 100.0)],
638
+ "score": float(cls_anc[anc_idx][label]),
639
+ "label": dataset.label_name[label],
640
+ "gentime": float(idx * frame_to_time / 100.0)
641
+ }
642
+ proposal_dict.append(tmp_dict)
643
+
644
+ # Apply NMS
645
+ proposal_dict = non_max_suppression(proposal_dict, overlapThresh=opt['soft_nms'])
646
+ result_dict[video_name] = proposal_dict
647
+
648
+ return result_dict
649
+
650
+ except Exception as e:
651
+ print(f"Error in eval_map_nms: {str(e)}")
652
+ raise e
653
 
654
  def load_ground_truth(opt, video_name):
655
  """Load ground truth annotations if available"""
 
681
 
682
  def process_video(video_name, split_number, progress=gr.Progress()):
683
  """Process a single video for action localization"""
684
+ dataset = None # Initialize dataset variable
685
+
686
  try:
687
  if not video_name or video_name in ["Error: Could not load videos from HF dataset", "Error loading videos"]:
688
  return "Error: Please select a valid video name"
 
728
  model.load_state_dict(checkpoint)
729
 
730
  model.eval()
731
+ print("Model loaded successfully")
732
 
733
  progress(0.4, desc=f"Downloading video features for {video_name}...")
734
 
735
  # Create dataset with HF integration
736
  try:
737
  dataset = HFVideoDataSet(opt, subset='test', video_name=video_name)
738
+ print(f"Dataset created successfully with {len(dataset.video_list)} videos")
739
  except Exception as e:
740
+ error_msg = f"Error downloading or loading video '{video_name}': {str(e)}\n\nPlease check:\n1. Video name is correct\n2. File exists in HF dataset\n3. Network connection is stable"
741
+ print(error_msg)
742
+ return error_msg
743
 
744
  if len(dataset.video_list) == 0:
745
  return f"Error: No video found with name '{video_name}' in dataset after download"
 
747
  progress(0.6, desc="Running inference...")
748
 
749
  # Run inference
750
+ try:
751
+ output_cls, output_reg, labels_cls, labels_reg = eval_frame(opt, model, dataset)
752
+ print("Inference completed successfully")
753
+ except Exception as e:
754
+ error_msg = f"Error during inference: {str(e)}"
755
+ print(error_msg)
756
+ return error_msg
757
 
758
  progress(0.8, desc="Processing results...")
759
 
760
+ try:
761
+ result_dict = eval_map_nms(opt, dataset, output_cls, output_reg)
762
+ print("NMS processing completed")
763
+ except Exception as e:
764
+ error_msg = f"Error during NMS processing: {str(e)}"
765
+ print(error_msg)
766
+ return error_msg
767
 
768
  # Load ground truth
769
  gt_segments, duration = load_ground_truth(opt, video_name)
 
846
  output_text += f"F1-Score: {f1:.3f}\n"
847
 
848
  progress(1.0, desc="Complete!")
849
+ print("Processing completed successfully")
850
  return output_text
851
 
852
  except Exception as e:
 
853
  error_details = traceback.format_exc()
854
+ error_msg = f"Error processing video: {str(e)}\n\nDetailed error:\n{error_details}\n\nPlease check:\n1. Model checkpoint exists\n2. Video exists in HF dataset\n3. All dependencies are installed"
855
+ print(error_msg)
856
+ return error_msg
857
+ finally:
858
+ # Ensure cleanup happens even if there's an error
859
+ if dataset is not None and hasattr(dataset, '__del__'):
860
+ try:
861
+ dataset.__del__()
862
+ except Exception as e:
863
+ print(f"Warning: Error during dataset cleanup: {e}")
864
 
865
  def refresh_video_list():
866
  """Refresh the list of available videos"""
867
+ try:
868
+ new_videos = get_available_videos_from_hf()
869
+ return gr.Dropdown(choices=new_videos)
870
+ except Exception as e:
871
+ print(f"Error refreshing video list: {e}")
872
+ return gr.Dropdown(choices=["Error refreshing videos"])
873
 
874
  # Initialize available videos
875
  print("Loading available videos from Hugging Face dataset...")
876
+ try:
877
+ available_videos = get_available_videos_from_hf()
878
+ if not available_videos or available_videos == ["Error loading videos"]:
879
+ available_videos = ["Error: Could not load videos from HF dataset"]
880
+ except Exception as e:
881
+ print(f"Error loading initial video list: {e}")
882
  available_videos = ["Error: Could not load videos from HF dataset"]
883
 
884
  print(f"Available videos: {len(available_videos)} videos found")
 
935
 
936
  # Event handlers
937
  refresh_btn.click(
938
+ fn=refresh_video_list,
939
  outputs=video_dropdown
940
  )
941