Spaces:

Darknsu
/

SAT

Running

App Files Files Community

Darknsu commited on Jun 22, 2025

Commit

ef2f1f1

verified ·

1 Parent(s): 807bbf8

Update main.py

Browse files

Files changed (1) hide show

main.py +200 -91

main.py CHANGED Viewed

@@ -372,7 +372,6 @@
 import os
 import json
 import torch
@@ -386,7 +385,8 @@ from eval import evaluation_detection
 from iou_utils import non_max_suppression, check_overlap_proposal
 from typing import List, Dict, Optional
 from huggingface_hub import hf_hub_download, list_repo_files
-from pathlib import Path
 # Configuration
 VIS_CONFIG = {
@@ -394,40 +394,107 @@ VIS_CONFIG = {
     'min_segment_duration': 1.0,
 }
-# Cache directory for downloaded .npz files
-CACHE_DIR = Path("./data/I3D")
-CACHE_DIR.mkdir(parents=True, exist_ok=True)
-# Hugging Face dataset repository
 HF_DATASET_REPO = "Darknsu/EGTEA_Dataset"
-HF_NPZ_SUBFOLDER = "features"  # Adjust if .npz files are in a different subfolder
 # Determine device
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f"Using device: {device}")
 def download_npz_file(video_name: str) -> str:
-    """Download .npz file from HF dataset repo or return cached path"""
-    npz_filename = f"{video_name}.npz"
-    cache_path = CACHE_DIR / npz_filename
-    if cache_path.exists():
-        return str(cache_path)
     try:
-        # Download the .npz file to cache
         downloaded_path = hf_hub_download(
             repo_id=HF_DATASET_REPO,
-            filename=f"{npz_filename}",
             repo_type="dataset",
-            cache_dir=CACHE_DIR,
-            local_dir=CACHE_DIR,
-            local_dir_use_symlinks=False
         )
-        return downloaded_path
     except Exception as e:
-        print(f"Error downloading {npz_filename}: {str(e)}")
-        return None
 def eval_frame(opt, model, dataset):
     """Evaluate model frame by frame"""
@@ -565,28 +632,38 @@ def load_ground_truth(opt, video_name):
     return gt_segments, duration
-def process_video(video_name, split_number):
     """Process a single video for action localization"""
     try:
         # Parse options
         opt = opts.parse_opt()
         opt = vars(opt)
         opt['mode'] = 'test'
         opt['split'] = str(split_number)
         opt['checkpoint_path'] = './checkpoint'
-        opt['video_feature_all_test'] = str(CACHE_DIR)  # Use cache directory
         opt['anchors'] = [int(item) for item in opt['anchors'].split(',')]
         opt['batch_size'] = 1
         # Check if required files exist
         checkpoint_path = './checkpoint/01_ckp_best.pth.tar'
         if not os.path.exists(checkpoint_path):
-            return "Error: Model checkpoint not found at ./checkpoint/01_ckp_best.pth.tar"
-        # Download or get cached .npz file
-        npz_path = download_npz_file(video_name)
-        if not npz_path:
-            return f"Error: Could not download feature file for {video_name}"
         # Load model
         model = MYNET(opt).to(device)
@@ -600,14 +677,21 @@ def process_video(video_name, split_number):
         model.eval()
-        # Create dataset
-        dataset = VideoDataSet(opt, subset='test', video_name=video_name)
         if len(dataset.video_list) == 0:
-            return f"Error: No video found with name '{video_name}' in dataset"
         # Run inference
         output_cls, output_reg, labels_cls, labels_reg = eval_frame(opt, model, dataset)
         result_dict = eval_map_nms(opt, dataset, output_cls, output_reg)
         # Load ground truth
@@ -625,6 +709,8 @@ def process_video(video_name, split_number):
                 'score': pred['score']
             })
         # Generate output text
         output_text = f"Predicted Actions for Video: {video_name}\n"
         output_text += "=" * 50 + "\n\n"
@@ -688,79 +774,102 @@ def process_video(video_name, split_number):
             output_text += f"Recall: {recall:.3f}\n"
             output_text += f"F1-Score: {f1:.3f}\n"
         return output_text
     except Exception as e:
-        return f"Error processing video: {str(e)}\n\nPlease check:\n1. Model checkpoint exists\n2. Feature file exists in HF dataset\n3. All dependencies are installed"
-def get_available_videos():
-    """Get list of available videos from HF dataset repo"""
-    try:
-        # List all files in the features subfolder
-        repo_files = list_repo_files(
-            repo_id=HF_DATASET_REPO,
-            repo_type="dataset",
-        )
-        # Filter for .npz files and extract video names
-        videos = [file.replace('.npz', '') for file in repo_files if file.endswith('.npz')]
-        return sorted(videos) if videos else ["No videos found"]
-    except Exception as e:
-        print(f"Error listing videos: {str(e)}")
-        return ["No videos found"]
 # Initialize available videos
-available_videos = get_available_videos()
 # Gradio Interface
-iface = gr.Interface(
-    fn=process_video,
-    inputs=[
-        gr.Dropdown(
-            label="Select Video",
-            choices=available_videos,
-            value=available_videos[0] if available_videos else None,
-            info="Choose from videos in HF dataset: Darknsu/EGTEA_Dataset"
-        ),
-        gr.Dropdown(
-            label="Split Number",
-            choices=["1", "2", "3"],
-            value="1",
-            info="Dataset split for annotations"
-        )
-    ],
-    outputs=[
-        gr.Textbox(
-            label="Action Predictions",
-            lines=20,
-            max_lines=50,
-            show_copy_button=True
-        )
-    ],
-    title="🎬 Temporal Action Localization",
-    description="""
-    This app performs temporal action localization on videos using I3D features from the EGTEA dataset.
-    **How to use:**
-    1. Select a video from the dropdown (videos are loaded from HF dataset: Darknsu/EGTEA_Dataset)
-    2. Choose the annotation split number
-    3. Click Submit to get action predictions
     **Requirements:**
-    - Model checkpoint: `01_ckp_best.pth.tar` in root directory
-    - Video features: Downloaded from HF dataset at runtime
-    """,
-    examples=[
-        [available_videos[0] if available_videos and available_videos[0] != "No videos found" else "example_video", "1"],
-    ] if available_videos and available_videos[0] != "No videos found" else None,
-    cache_examples=False,
-    theme=gr.themes.Soft()
-)
 if __name__ == '__main__':
-    print(f"Available videos: {available_videos}")
     print(f"Using device: {device}")
     iface.launch(
         server_name="0.0.0.0",
-        server_port=4444,
         share=False
     )

 import os
 import json
 import torch
 from iou_utils import non_max_suppression, check_overlap_proposal
 from typing import List, Dict, Optional
 from huggingface_hub import hf_hub_download, list_repo_files
+import tempfile
+import shutil
 # Configuration
 VIS_CONFIG = {
     'min_segment_duration': 1.0,
 }
+# Hugging Face Dataset Configuration
 HF_DATASET_REPO = "Darknsu/EGTEA_Dataset"
+HF_DATASET_SUBFOLDER = "I3D"  # Adjust this based on your dataset structure
 # Determine device
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f"Using device: {device}")
+# Create local cache directory for downloaded files
+CACHE_DIR = "./hf_cache"
+os.makedirs(CACHE_DIR, exist_ok=True)
 def download_npz_file(video_name: str) -> str:
+    """
+    Download .npz file from Hugging Face dataset repository
+    Returns: Local path to the downloaded file
+    """
     try:
+        # Construct the file path in the dataset repo
+        file_path = f"{HF_DATASET_SUBFOLDER}/{video_name}.npz"
+        # Check if file already exists in cache
+        local_path = os.path.join(CACHE_DIR, f"{video_name}.npz")
+        if os.path.exists(local_path):
+            print(f"Using cached file: {local_path}")
+            return local_path
+        # Download from Hugging Face dataset
+        print(f"Downloading {file_path} from {HF_DATASET_REPO}...")
         downloaded_path = hf_hub_download(
             repo_id=HF_DATASET_REPO,
+            filename=file_path,
             repo_type="dataset",
+            cache_dir=CACHE_DIR
+        )
+        # Copy to our expected location for easier access
+        shutil.copy2(downloaded_path, local_path)
+        print(f"File downloaded and cached: {local_path}")
+        return local_path
+    except Exception as e:
+        raise Exception(f"Failed to download {video_name}.npz: {str(e)}")
+def get_available_videos_from_hf():
+    """Get list of available videos from Hugging Face dataset repository"""
+    try:
+        print("Fetching available videos from Hugging Face dataset...")
+        files = list_repo_files(
+            repo_id=HF_DATASET_REPO,
+            repo_type="dataset"
         )
+        # Filter for .npz files in the I3D subfolder
+        videos = []
+        for file in files:
+            if file.startswith(f"{HF_DATASET_SUBFOLDER}/") and file.endswith('.npz'):
+                video_name = os.path.basename(file).replace('.npz', '')
+                videos.append(video_name)
+        videos = sorted(videos)
+        print(f"Found {len(videos)} videos in dataset")
+        return videos
     except Exception as e:
+        print(f"Error fetching videos from HF dataset: {str(e)}")
+        return ["Error loading videos"]
+class HFVideoDataSet(VideoDataSet):
+    """
+    Modified VideoDataSet that downloads files from Hugging Face on demand
+    """
+    def __init__(self, opt, subset='test', video_name=None):
+        # Store the original video_feature_all_test path
+        self.original_feature_path = opt['video_feature_all_test']
+        # Create temporary directory for this session
+        self.temp_dir = tempfile.mkdtemp(prefix="hf_video_")
+        opt['video_feature_all_test'] = self.temp_dir
+        # Download the specific video file if video_name is provided
+        if video_name:
+            try:
+                downloaded_path = download_npz_file(video_name)
+                # Copy to temp directory with expected structure
+                temp_file_path = os.path.join(self.temp_dir, f"{video_name}.npz")
+                shutil.copy2(downloaded_path, temp_file_path)
+                print(f"Video file ready: {temp_file_path}")
+            except Exception as e:
+                print(f"Warning: Could not download video {video_name}: {str(e)}")
+        # Initialize parent class
+        super().__init__(opt, subset, video_name)
+    def __del__(self):
+        # Clean up temporary directory
+        try:
+            if hasattr(self, 'temp_dir') and os.path.exists(self.temp_dir):
+                shutil.rmtree(self.temp_dir)
+        except:
+            pass
 def eval_frame(opt, model, dataset):
     """Evaluate model frame by frame"""
     return gt_segments, duration
+def process_video(video_name, split_number, progress=gr.Progress()):
     """Process a single video for action localization"""
     try:
+        progress(0.1, desc="Initializing...")
         # Parse options
         opt = opts.parse_opt()
         opt = vars(opt)
         opt['mode'] = 'test'
         opt['split'] = str(split_number)
         opt['checkpoint_path'] = './checkpoint'
+        opt['video_feature_all_test'] = './data/I3D/'  # This will be overridden by HFVideoDataSet
         opt['anchors'] = [int(item) for item in opt['anchors'].split(',')]
         opt['batch_size'] = 1
+        progress(0.2, desc="Checking model checkpoint...")
         # Check if required files exist
         checkpoint_path = './checkpoint/01_ckp_best.pth.tar'
         if not os.path.exists(checkpoint_path):
+            # Try alternative locations
+            alt_paths = ['./01_ckp_best.pth.tar', '01_ckp_best.pth.tar']
+            checkpoint_path = None
+            for alt_path in alt_paths:
+                if os.path.exists(alt_path):
+                    checkpoint_path = alt_path
+                    break
+            if checkpoint_path is None:
+                return "Error: Model checkpoint not found. Please ensure '01_ckp_best.pth.tar' is in the repository."
+        progress(0.3, desc="Loading model...")
         # Load model
         model = MYNET(opt).to(device)
         model.eval()
+        progress(0.4, desc=f"Downloading video features for {video_name}...")
+        # Create dataset with HF integration
+        dataset = HFVideoDataSet(opt, subset='test', video_name=video_name)
         if len(dataset.video_list) == 0:
+            return f"Error: No video found with name '{video_name}' in dataset or failed to download"
+        progress(0.6, desc="Running inference...")
         # Run inference
         output_cls, output_reg, labels_cls, labels_reg = eval_frame(opt, model, dataset)
+        progress(0.8, desc="Processing results...")
         result_dict = eval_map_nms(opt, dataset, output_cls, output_reg)
         # Load ground truth
                 'score': pred['score']
             })
+        progress(0.9, desc="Generating output...")
         # Generate output text
         output_text = f"Predicted Actions for Video: {video_name}\n"
         output_text += "=" * 50 + "\n\n"
             output_text += f"Recall: {recall:.3f}\n"
             output_text += f"F1-Score: {f1:.3f}\n"
+        progress(1.0, desc="Complete!")
         return output_text
     except Exception as e:
+        return f"Error processing video: {str(e)}\n\nPlease check:\n1. Model checkpoint exists\n2. Video exists in HF dataset\n3. All dependencies are installed"
+def refresh_video_list():
+    """Refresh the list of available videos"""
+    return gr.Dropdown(choices=get_available_videos_from_hf())
 # Initialize available videos
+print("Loading available videos from Hugging Face dataset...")
+available_videos = get_available_videos_from_hf()
+if not available_videos or available_videos == ["Error loading videos"]:
+    available_videos = ["Error: Could not load videos from HF dataset"]
+print(f"Available videos: {len(available_videos)} videos found")
 # Gradio Interface
+with gr.Blocks(theme=gr.themes.Soft(), title="🎬 Temporal Action Localization") as iface:
+    gr.Markdown("""
+    # 🎬 Temporal Action Localization
+    This app performs temporal action localization on videos using I3D features loaded dynamically from Hugging Face datasets.
+    **Features:**
+    - ✅ Dynamic loading from HF dataset repository
+    - ✅ Real-time inference with progress tracking
+    - ✅ Ground truth comparison when available
+    - ✅ Detailed action predictions with confidence scores
+    """)
+    with gr.Row():
+        with gr.Column(scale=1):
+            video_dropdown = gr.Dropdown(
+                label="Select Video",
+                choices=available_videos,
+                value=available_videos[0] if available_videos and "Error" not in available_videos[0] else None,
+                info="Videos loaded from Hugging Face dataset"
+            )
+            split_dropdown = gr.Dropdown(
+                label="Split Number",
+                choices=["1", "2", "3"],
+                value="1",
+                info="Dataset split for annotations"
+            )
+            refresh_btn = gr.Button("🔄 Refresh Video List", variant="secondary")
+            submit_btn = gr.Button("🚀 Run Action Localization", variant="primary")
+        with gr.Column(scale=2):
+            output_text = gr.Textbox(
+                label="Action Predictions",
+                lines=25,
+                max_lines=50,
+                show_copy_button=True,
+                placeholder="Results will appear here..."
+            )
+    gr.Markdown(f"""
+    **Dataset Source:** [{HF_DATASET_REPO}](https://huggingface.co/datasets/{HF_DATASET_REPO})
     **Requirements:**
+    - Model checkpoint: `01_ckp_best.pth.tar` in repository root
+    - Video features: Automatically downloaded from HF dataset
+    """)
+    # Event handlers
+    refresh_btn.click(
+        fn=lambda: gr.Dropdown(choices=get_available_videos_from_hf()),
+        outputs=video_dropdown
+    )
+    submit_btn.click(
+        fn=process_video,
+        inputs=[video_dropdown, split_dropdown],
+        outputs=output_text
+    )
+    # Example
+    if available_videos and "Error" not in available_videos[0]:
+        gr.Examples(
+            examples=[[available_videos[0], "1"]],
+            inputs=[video_dropdown, split_dropdown],
+            fn=process_video,
+            outputs=output_text,
+            cache_examples=False
+        )
 if __name__ == '__main__':
+    print(f"Available videos: {len(available_videos)}")
     print(f"Using device: {device}")
+    print(f"HF Dataset: {HF_DATASET_REPO}")
     iface.launch(
         server_name="0.0.0.0",
+        server_port=7860,
         share=False
     )