Spaces:

Darknsu
/

SAT

Build error

App Files Files Community

Darknsu commited on Jun 22, 2025

Commit

efa906f

verified ·

1 Parent(s): 0b97748

Update main.py

Browse files

Files changed (1) hide show

main.py +611 -2

main.py CHANGED Viewed

@@ -375,6 +375,615 @@
 import os
 import json
@@ -418,7 +1027,7 @@ def download_npz_file(video_name: str) -> str:
     """
     try:
         # Construct the file path in the dataset repo
-        file_path = f"{video_name}.npz"
         # Check if file already exists in cache
         local_path = os.path.join(CACHE_DIR, f"{video_name}.npz")
@@ -455,7 +1064,7 @@ def get_available_videos_from_hf():
         # Filter for .npz files in the I3D subfolder
         videos = []
         for file in files:
-            if file.startswith(f"") and file.endswith('.npz'):
                 # Extract the full filename without extension
                 # For files like "I3D/OP02-R02-TurkeySandwich.npz"
                 video_name = os.path.basename(file).replace('.npz', '')

+# import os
+# import json
+# import torch
+# import numpy as np
+# import gradio as gr
+# import opts_egtea as opts
+# from dataset import VideoDataSet, calc_iou
+# from models import MYNET, SuppressNet
+# from loss_func import cls_loss_func, regress_loss_func
+# from eval import evaluation_detection
+# from iou_utils import non_max_suppression, check_overlap_proposal
+# from typing import List, Dict, Optional
+# from huggingface_hub import hf_hub_download, list_repo_files
+# import tempfile
+# import shutil
+# import traceback
+# # Configuration
+# VIS_CONFIG = {
+#     'iou_threshold': 0.3,
+#     'min_segment_duration': 1.0,
+# }
+# # Hugging Face Dataset Configuration
+# HF_DATASET_REPO = "Darknsu/EGTEA_Dataset"
+# HF_DATASET_SUBFOLDER = "I3D"  # Adjust this based on your dataset structure
+# # Determine device
+# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# print(f"Using device: {device}")
+# # Create local cache directory for downloaded files
+# CACHE_DIR = "./hf_cache"
+# os.makedirs(CACHE_DIR, exist_ok=True)
+# def download_npz_file(video_name: str) -> str:
+#     """
+#     Download .npz file from Hugging Face dataset repository
+#     Returns: Local path to the downloaded file
+#     """
+#     try:
+#         # Construct the file path in the dataset repo
+#         file_path = f"{video_name}.npz"
+#         # Check if file already exists in cache
+#         local_path = os.path.join(CACHE_DIR, f"{video_name}.npz")
+#         if os.path.exists(local_path):
+#             print(f"Using cached file: {local_path}")
+#             return local_path
+#         # Download from Hugging Face dataset
+#         print(f"Downloading {file_path} from {HF_DATASET_REPO}...")
+#         downloaded_path = hf_hub_download(
+#             repo_id=HF_DATASET_REPO,
+#             filename=file_path,
+#             repo_type="dataset",
+#             cache_dir=CACHE_DIR
+#         )
+#         # Copy to our expected location for easier access
+#         shutil.copy2(downloaded_path, local_path)
+#         print(f"File downloaded and cached: {local_path}")
+#         return local_path
+#     except Exception as e:
+#         raise Exception(f"Failed to download {video_name}.npz: {str(e)}")
+# def get_available_videos_from_hf():
+#     """Get list of available videos from Hugging Face dataset repository"""
+#     try:
+#         print("Fetching available videos from Hugging Face dataset...")
+#         files = list_repo_files(
+#             repo_id=HF_DATASET_REPO,
+#             repo_type="dataset"
+#         )
+#         # Filter for .npz files in the I3D subfolder
+#         videos = []
+#         for file in files:
+#             if file.startswith(f"") and file.endswith('.npz'):
+#                 # Extract the full filename without extension
+#                 # For files like "I3D/OP02-R02-TurkeySandwich.npz"
+#                 video_name = os.path.basename(file).replace('.npz', '')
+#                 videos.append(video_name)
+#         videos = sorted(videos)
+#         print(f"Found {len(videos)} videos in dataset: {videos[:5]}{'...' if len(videos) > 5 else ''}")
+#         return videos
+#     except Exception as e:
+#         print(f"Error fetching videos from HF dataset: {str(e)}")
+#         return ["Error loading videos"]
+# class HFVideoDataSet(VideoDataSet):
+#     """
+#     Modified VideoDataSet that downloads files from Hugging Face on demand
+#     """
+#     def __init__(self, opt, subset='test', video_name=None):
+#         # Store the original video_feature_all_test path
+#         self.original_feature_path = opt['video_feature_all_test']
+#         # Create temporary directory for this session
+#         self.temp_dir = tempfile.mkdtemp(prefix="hf_video_")
+#         print(f"Created temp directory: {self.temp_dir}")
+#         # Download the specific video file if video_name is provided
+#         if video_name:
+#             try:
+#                 print(f"Downloading features for video: {video_name}")
+#                 downloaded_path = download_npz_file(video_name)
+#                 # Ensure the temp directory exists
+#                 os.makedirs(self.temp_dir, exist_ok=True)
+#                 # Copy to temp directory with expected structure - FIX: Add proper path separator
+#                 temp_file_path = os.path.join(self.temp_dir, f"{video_name}.npz")
+#                 print(f"Copying {downloaded_path} to {temp_file_path}")
+#                 shutil.copy2(downloaded_path, temp_file_path)
+#                 # Verify file exists and print debug info
+#                 if not os.path.exists(temp_file_path):
+#                     raise Exception(f"Failed to copy file to {temp_file_path}")
+#                 else:
+#                     print(f"Video file ready: {temp_file_path}")
+#                     print(f"File size: {os.path.getsize(temp_file_path)} bytes")
+#             except Exception as e:
+#                 print(f"Error downloading video {video_name}: {str(e)}")
+#                 # Clean up temp directory on error
+#                 if hasattr(self, 'temp_dir') and os.path.exists(self.temp_dir):
+#                     shutil.rmtree(self.temp_dir)
+#                 raise e
+#         # Set the feature path to our temp directory
+#         opt['video_feature_all_test'] = self.temp_dir
+#         print(f"Set video_feature_all_test to: {opt['video_feature_all_test']}")
+#         # Initialize parent class
+#         try:
+#             super().__init__(opt, subset, video_name)
+#             print(f"Successfully initialized dataset with {len(self.video_list)} videos")
+#         except Exception as e:
+#             print(f"Error initializing parent VideoDataSet: {str(e)}")
+#             # Clean up temp directory on error
+#             if hasattr(self, 'temp_dir') and os.path.exists(self.temp_dir):
+#                 shutil.rmtree(self.temp_dir)
+#             raise e
+#     def __del__(self):
+#         # Clean up temporary directory
+#         try:
+#             if hasattr(self, 'temp_dir') and os.path.exists(self.temp_dir):
+#                 shutil.rmtree(self.temp_dir)
+#                 print(f"Cleaned up temp directory: {self.temp_dir}")
+#         except Exception as e:
+#             print(f"Warning: Could not clean up temp directory: {e}")
+# def eval_frame(opt, model, dataset):
+#     """Evaluate model frame by frame"""
+#     try:
+#         test_loader = torch.utils.data.DataLoader(
+#             dataset,
+#             batch_size=opt['batch_size'],
+#             shuffle=False,
+#             num_workers=0,
+#             pin_memory=False
+#         )
+#         labels_cls = {video_name: [] for video_name in dataset.video_list}
+#         labels_reg = {video_name: [] for video_name in dataset.video_list}
+#         output_cls = {video_name: [] for video_name in dataset.video_list}
+#         output_reg = {video_name: [] for video_name in dataset.video_list}
+#         model.eval()
+#         with torch.no_grad():
+#             for n_iter, batch_data in enumerate(test_loader):
+#                 try:
+#                     if len(batch_data) == 4:
+#                         input_data, cls_label, reg_label, _ = batch_data
+#                     else:
+#                         input_data, cls_label, reg_label = batch_data
+#                     input_data = input_data.to(device)
+#                     cls_label = cls_label.to(device) if cls_label is not None else None
+#                     reg_label = reg_label.to(device) if reg_label is not None else None
+#                     act_cls, act_reg, _ = model(input_data.float())
+#                     act_cls = torch.softmax(act_cls, dim=-1)
+#                     for b in range(input_data.size(0)):
+#                         batch_idx = n_iter * opt['batch_size'] + b
+#                         if batch_idx < len(dataset.inputs):
+#                             video_name = dataset.inputs[batch_idx][0]
+#                             output_cls[video_name].append(act_cls[b, :].detach().cpu().numpy())
+#                             output_reg[video_name].append(act_reg[b, :].detach().cpu().numpy())
+#                             if cls_label is not None:
+#                                 labels_cls[video_name].append(cls_label[b, :].cpu().numpy())
+#                             if reg_label is not None:
+#                                 labels_reg[video_name].append(reg_label[b, :].cpu().numpy())
+#                 except Exception as e:
+#                     print(f"Error in batch {n_iter}: {str(e)}")
+#                     continue
+#         # Stack arrays
+#         for video_name in dataset.video_list:
+#             if output_cls[video_name]:
+#                 output_cls[video_name] = np.stack(output_cls[video_name], axis=0)
+#                 output_reg[video_name] = np.stack(output_reg[video_name], axis=0)
+#             if labels_cls[video_name]:
+#                 labels_cls[video_name] = np.stack(labels_cls[video_name], axis=0)
+#             if labels_reg[video_name]:
+#                 labels_reg[video_name] = np.stack(labels_reg[video_name], axis=0)
+#         return output_cls, output_reg, labels_cls, labels_reg
+#     except Exception as e:
+#         print(f"Error in eval_frame: {str(e)}")
+#         raise e
+# def eval_map_nms(opt, dataset, output_cls, output_reg):
+#     """Evaluate with Non-Maximum Suppression"""
+#     try:
+#         result_dict = {}
+#         anchors = opt['anchors']
+#         for video_name in dataset.video_list:
+#             if video_name not in output_cls or len(output_cls[video_name]) == 0:
+#                 result_dict[video_name] = []
+#                 continue
+#             duration = dataset.video_len[video_name]
+#             video_time = float(dataset.video_dict[video_name]["duration"])
+#             frame_to_time = 100.0 * video_time / duration
+#             proposal_dict = []
+#             for idx in range(min(duration, len(output_cls[video_name]))):
+#                 cls_anc = output_cls[video_name][idx]
+#                 reg_anc = output_reg[video_name][idx]
+#                 for anc_idx in range(len(anchors)):
+#                     if anc_idx >= len(cls_anc):
+#                         continue
+#                     cls = np.argwhere(cls_anc[anc_idx][:-1] > opt['threshold']).reshape(-1)
+#                     if len(cls) == 0:
+#                         continue
+#                     ed = idx + anchors[anc_idx] * reg_anc[anc_idx][0]
+#                     length = anchors[anc_idx] * np.exp(reg_anc[anc_idx][1])
+#                     st = ed - length
+#                     for cidx in range(len(cls)):
+#                         label = cls[cidx]
+#                         if label < len(dataset.label_name):
+#                             tmp_dict = {
+#                                 "segment": [float(st * frame_to_time / 100.0), float(ed * frame_to_time / 100.0)],
+#                                 "score": float(cls_anc[anc_idx][label]),
+#                                 "label": dataset.label_name[label],
+#                                 "gentime": float(idx * frame_to_time / 100.0)
+#                             }
+#                             proposal_dict.append(tmp_dict)
+#             # Apply NMS
+#             proposal_dict = non_max_suppression(proposal_dict, overlapThresh=opt['soft_nms'])
+#             result_dict[video_name] = proposal_dict
+#         return result_dict
+#     except Exception as e:
+#         print(f"Error in eval_map_nms: {str(e)}")
+#         raise e
+# def load_ground_truth(opt, video_name):
+#     """Load ground truth annotations if available"""
+#     gt_segments = []
+#     duration = 0
+#     try:
+#         video_anno_file = opt["video_anno"].format(opt["split"])
+#         if os.path.exists(video_anno_file):
+#             with open(video_anno_file, 'r') as f:
+#                 anno_data = json.load(f)
+#             if video_name in anno_data['database']:
+#                 gt_annotations = anno_data['database'][video_name]['annotations']
+#                 duration = anno_data['database'][video_name]['duration']
+#                 for anno in gt_annotations:
+#                     start, end = anno['segment']
+#                     gt_segments.append({
+#                         'label': anno['label'],
+#                         'start': start,
+#                         'end': end,
+#                         'duration': end - start
+#                     })
+#     except Exception as e:
+#         print(f"Could not load ground truth: {str(e)}")
+#     return gt_segments, duration
+# def process_video(video_name, split_number, progress=gr.Progress()):
+#     """Process a single video for action localization"""
+#     dataset = None  # Initialize dataset variable
+#     try:
+#         if not video_name or video_name in ["Error: Could not load videos from HF dataset", "Error loading videos"]:
+#             return "Error: Please select a valid video name"
+#         progress(0.1, desc="Initializing...")
+#         # Parse options
+#         opt = opts.parse_opt()
+#         opt = vars(opt)
+#         opt['mode'] = 'test'
+#         opt['split'] = str(split_number)
+#         opt['checkpoint_path'] = './checkpoint'
+#         opt['video_feature_all_test'] = './data/I3D/'  # This will be overridden by HFVideoDataSet
+#         opt['anchors'] = [int(item) for item in opt['anchors'].split(',')]
+#         opt['batch_size'] = 1
+#         progress(0.2, desc="Checking model checkpoint...")
+#         # Check if required files exist
+#         checkpoint_path = './checkpoint/01_ckp_best.pth.tar'
+#         if not os.path.exists(checkpoint_path):
+#             # Try alternative locations
+#             alt_paths = ['./01_ckp_best.pth.tar', '01_ckp_best.pth.tar']
+#             checkpoint_path = None
+#             for alt_path in alt_paths:
+#                 if os.path.exists(alt_path):
+#                     checkpoint_path = alt_path
+#                     break
+#             if checkpoint_path is None:
+#                 return "Error: Model checkpoint not found. Please ensure '01_ckp_best.pth.tar' is in the repository."
+#         progress(0.3, desc="Loading model...")
+#         # Load model
+#         model = MYNET(opt).to(device)
+#         checkpoint = torch.load(checkpoint_path, map_location=device)
+#         # Handle different checkpoint formats
+#         if 'state_dict' in checkpoint:
+#             model.load_state_dict(checkpoint['state_dict'])
+#         else:
+#             model.load_state_dict(checkpoint)
+#         model.eval()
+#         print("Model loaded successfully")
+#         progress(0.4, desc=f"Downloading video features for {video_name}...")
+#         # Create dataset with HF integration
+#         try:
+#             dataset = HFVideoDataSet(opt, subset='test', video_name=video_name)
+#             print(f"Dataset created successfully with {len(dataset.video_list)} videos")
+#         except Exception as e:
+#             error_msg = f"Error downloading or loading video '{video_name}': {str(e)}\n\nPlease check:\n1. Video name is correct\n2. File exists in HF dataset\n3. Network connection is stable"
+#             print(error_msg)
+#             return error_msg
+#         if len(dataset.video_list) == 0:
+#             return f"Error: No video found with name '{video_name}' in dataset after download"
+#         progress(0.6, desc="Running inference...")
+#         # Run inference
+#         try:
+#             output_cls, output_reg, labels_cls, labels_reg = eval_frame(opt, model, dataset)
+#             print("Inference completed successfully")
+#         except Exception as e:
+#             error_msg = f"Error during inference: {str(e)}"
+#             print(error_msg)
+#             return error_msg
+#         progress(0.8, desc="Processing results...")
+#         try:
+#             result_dict = eval_map_nms(opt, dataset, output_cls, output_reg)
+#             print("NMS processing completed")
+#         except Exception as e:
+#             error_msg = f"Error during NMS processing: {str(e)}"
+#             print(error_msg)
+#             return error_msg
+#         # Load ground truth
+#         gt_segments, duration = load_ground_truth(opt, video_name)
+#         # Process predictions
+#         pred_segments = []
+#         for pred in result_dict.get(video_name, []):
+#             start, end = pred['segment']
+#             pred_segments.append({
+#                 'label': pred['label'],
+#                 'start': start,
+#                 'end': end,
+#                 'duration': end - start,
+#                 'score': pred['score']
+#             })
+#         progress(0.9, desc="Generating output...")
+#         # Generate output text
+#         output_text = f"Predicted Actions for Video: {video_name}\n"
+#         output_text += "=" * 50 + "\n\n"
+#         if pred_segments:
+#             output_text += "PREDICTED ACTIONS:\n"
+#             output_text += "-" * 30 + "\n"
+#             for i, pred in enumerate(pred_segments, 1):
+#                 output_text += f"{i}. {pred['label']}\n"
+#                 output_text += f"   Time: [{pred['start']:.2f}s - {pred['end']:.2f}s]\n"
+#                 output_text += f"   Duration: {pred['duration']:.2f}s\n"
+#                 output_text += f"   Confidence: {pred['score']:.3f}\n\n"
+#         else:
+#             output_text += "No actions detected above threshold.\n\n"
+#         # Add ground truth comparison if available
+#         if gt_segments:
+#             output_text += "\nGROUND TRUTH COMPARISON:\n"
+#             output_text += "-" * 30 + "\n"
+#             # Calculate basic metrics
+#             matched_count = 0
+#             total_pred = len(pred_segments)
+#             total_gt = len(gt_segments)
+#             for gt in gt_segments:
+#                 output_text += f"GT: {gt['label']} [{gt['start']:.2f}s - {gt['end']:.2f}s]\n"
+#                 # Find best matching prediction
+#                 best_match = None
+#                 best_iou = 0
+#                 for pred in pred_segments:
+#                     # Simple overlap calculation
+#                     overlap_start = max(gt['start'], pred['start'])
+#                     overlap_end = min(gt['end'], pred['end'])
+#                     if overlap_end > overlap_start:
+#                         overlap = overlap_end - overlap_start
+#                         union = (gt['end'] - gt['start']) + (pred['end'] - pred['start']) - overlap
+#                         iou = overlap / union if union > 0 else 0
+#                         if iou > best_iou:
+#                             best_iou = iou
+#                             best_match = pred
+#                 if best_match and best_iou > VIS_CONFIG['iou_threshold']:
+#                     matched_count += 1
+#                     output_text += f"    → Matched with: {best_match['label']} (IoU: {best_iou:.3f})\n"
+#                 else:
+#                     output_text += f"    → No match found\n"
+#                 output_text += "\n"
+#             # Summary statistics
+#             precision = matched_count / total_pred if total_pred > 0 else 0
+#             recall = matched_count / total_gt if total_gt > 0 else 0
+#             f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
+#             output_text += f"\nSUMMARY STATISTICS:\n"
+#             output_text += f"Total Predictions: {total_pred}\n"
+#             output_text += f"Total Ground Truth: {total_gt}\n"
+#             output_text += f"Matched: {matched_count}\n"
+#             output_text += f"Precision: {precision:.3f}\n"
+#             output_text += f"Recall: {recall:.3f}\n"
+#             output_text += f"F1-Score: {f1:.3f}\n"
+#         progress(1.0, desc="Complete!")
+#         print("Processing completed successfully")
+#         return output_text
+#     except Exception as e:
+#         error_details = traceback.format_exc()
+#         error_msg = f"Error processing video: {str(e)}\n\nDetailed error:\n{error_details}\n\nPlease check:\n1. Model checkpoint exists\n2. Video exists in HF dataset\n3. All dependencies are installed"
+#         print(error_msg)
+#         return error_msg
+#     finally:
+#         # Ensure cleanup happens even if there's an error
+#         if dataset is not None and hasattr(dataset, '__del__'):
+#             try:
+#                 dataset.__del__()
+#             except Exception as e:
+#                 print(f"Warning: Error during dataset cleanup: {e}")
+# def refresh_video_list():
+#     """Refresh the list of available videos"""
+#     try:
+#         new_videos = get_available_videos_from_hf()
+#         return gr.Dropdown(choices=new_videos)
+#     except Exception as e:
+#         print(f"Error refreshing video list: {e}")
+#         return gr.Dropdown(choices=["Error refreshing videos"])
+# # Initialize available videos
+# print("Loading available videos from Hugging Face dataset...")
+# try:
+#     available_videos = get_available_videos_from_hf()
+#     if not available_videos or available_videos == ["Error loading videos"]:
+#         available_videos = ["Error: Could not load videos from HF dataset"]
+# except Exception as e:
+#     print(f"Error loading initial video list: {e}")
+#     available_videos = ["Error: Could not load videos from HF dataset"]
+# print(f"Available videos: {len(available_videos)} videos found")
+# # Gradio Interface
+# with gr.Blocks(theme=gr.themes.Soft(), title="🎬 Temporal Action Localization") as iface:
+#     gr.Markdown("""
+#     # 🎬 Temporal Action Localization
+#     This app performs temporal action localization on videos using I3D features loaded dynamically from Hugging Face datasets.
+#     **Features:**
+#     - ✅ Dynamic loading from HF dataset repository
+#     - ✅ Real-time inference with progress tracking
+#     - ✅ Ground truth comparison when available
+#     - ✅ Detailed action predictions with confidence scores
+#     """)
+#     with gr.Row():
+#         with gr.Column(scale=1):
+#             video_dropdown = gr.Dropdown(
+#                 label="Select Video",
+#                 choices=available_videos,
+#                 value=available_videos[0] if available_videos and "Error" not in available_videos[0] else None,
+#                 info="Videos loaded from Hugging Face dataset"
+#             )
+#             split_dropdown = gr.Dropdown(
+#                 label="Split Number",
+#                 choices=["1", "2", "3"],
+#                 value="1",
+#                 info="Dataset split for annotations"
+#             )
+#             refresh_btn = gr.Button("🔄 Refresh Video List", variant="secondary")
+#             submit_btn = gr.Button("🚀 Run Action Localization", variant="primary")
+#         with gr.Column(scale=2):
+#             output_text = gr.Textbox(
+#                 label="Action Predictions",
+#                 lines=25,
+#                 max_lines=50,
+#                 show_copy_button=True,
+#                 placeholder="Results will appear here..."
+#             )
+#     gr.Markdown(f"""
+#     **Dataset Source:** [{HF_DATASET_REPO}](https://huggingface.co/datasets/{HF_DATASET_REPO})
+#     **Requirements:**
+#     - Model checkpoint: `01_ckp_best.pth.tar` in repository root
+#     - Video features: Automatically downloaded from HF dataset
+#     """)
+#     # Event handlers
+#     refresh_btn.click(
+#         fn=refresh_video_list,
+#         outputs=video_dropdown
+#     )
+#     submit_btn.click(
+#         fn=process_video,
+#         inputs=[video_dropdown, split_dropdown],
+#         outputs=output_text
+#     )
+#     # Example
+#     if available_videos and "Error" not in available_videos[0]:
+#         gr.Examples(
+#             examples=[[available_videos[0], "1"]],
+#             inputs=[video_dropdown, split_dropdown],
+#             fn=process_video,
+#             outputs=output_text,
+#             cache_examples=False
+#         )
+# if __name__ == '__main__':
+#     print(f"Available videos: {len(available_videos)}")
+#     print(f"Using device: {device}")
+#     print(f"HF Dataset: {HF_DATASET_REPO}")
+#     iface.launch(
+#         server_name="0.0.0.0",
+#         server_port=7860,
+#         share=False
+#     )
 import os
 import json
     """
     try:
         # Construct the file path in the dataset repo
+        file_path = f"{HF_DATASET_SUBFOLDER}/{video_name}.npz"
         # Check if file already exists in cache
         local_path = os.path.join(CACHE_DIR, f"{video_name}.npz")
         # Filter for .npz files in the I3D subfolder
         videos = []
         for file in files:
+            if file.startswith(f"{HF_DATASET_SUBFOLDER}/") and file.endswith('.npz'):
                 # Extract the full filename without extension
                 # For files like "I3D/OP02-R02-TurkeySandwich.npz"
                 video_name = os.path.basename(file).replace('.npz', '')