Spaces:

FraunhoferIPK
/

KIKERP_Demo

Sleeping

App Files Files Community

vivek9chavan commited on Sep 4

Commit

65f6d12

verified ·

1 Parent(s): dc0b0f1

Update dino_processor.py

Browse files

Files changed (1) hide show

dino_processor.py +8 -22

dino_processor.py CHANGED Viewed

@@ -12,7 +12,6 @@ from sklearn.cluster import KMeans
 from scipy.spatial.distance import cdist
 import matplotlib.pyplot as plt
 import shutil # For cleaning up temporary directories
-from datetime import datetime
 # This will import the ViT model definitions from the other file
 import vision_transformer as vits
@@ -21,7 +20,7 @@ import vision_transformer as vits
 # (extract_frames, compute_embeddings, select_representative_frames, generate_attention_maps)
 # I will copy them here for completeness, but you can just leave them as they are.
-def extract_frames(video_path, output_dir, fps=10):
     frames_dir = os.path.join(output_dir, "frames")
     os.makedirs(frames_dir, exist_ok=True)
     cap = cv2.VideoCapture(video_path)
@@ -63,7 +62,7 @@ def compute_embeddings(frame_paths, model, device, batch_size=32):
         embeddings.append(batch_embeddings.cpu().numpy())
     return np.concatenate(embeddings, axis=0), frame_names
-def select_representative_frames(embeddings, frame_names, n_clusters=5, pca_dim=32):
     pca = PCA(n_components=pca_dim, svd_solver='full', random_state=404543)
     pca_results = pca.fit_transform(embeddings)
     kmeans = KMeans(n_clusters=n_clusters, random_state=404543, n_init=10)
@@ -112,28 +111,21 @@ def generate_attention_maps(frame_path, model, device, output_dir, frame_name):
     return overlay_path, attn_path
 # --- Main orchestrator function ---
-def process_video_with_dino(video_path):
     """
     Main function to process a video and generate DINO attention maps.
-    Saves all outputs to a permanent, timestamped folder.
     Args:
         video_path (str): Path to the input video.
     Returns:
         list: A list of tuples, where each tuple contains (overlay_path, attention_map_path).
     """
-    # --- MODIFICATION START ---
-    # 1. Define a permanent archive directory.
-    archive_dir = "dino_archive"
-    os.makedirs(archive_dir, exist_ok=True)
-    # 2. Create a unique, timestamped directory for this specific run.
-    timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
-    output_dir = os.path.join(archive_dir, timestamp)
     os.makedirs(output_dir, exist_ok=True)
-    print(f"Results for this run will be saved in: {output_dir}")
-    # --- MODIFICATION END ---
     device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
@@ -151,7 +143,7 @@ def process_video_with_dino(video_path):
     model.load_state_dict(state_dict, strict=True)
     print("DINO weights loaded successfully from torch.hub.")
-    # Step 1: Extract frames (saves into the new unique output_dir)
     frame_paths = extract_frames(video_path, output_dir)
     if not frame_paths:
         raise ValueError("No frames were extracted from the video.")
@@ -168,13 +160,7 @@ def process_video_with_dino(video_path):
     for frame_name in selected_frames:
         frame_path = os.path.join(frames_dir, frame_name)
         frame_name_no_ext = os.path.splitext(frame_name)[0]
-        # The generated images will now be saved inside the unique timestamped folder
         overlay_path, attn_path = generate_attention_maps(frame_path, model, device, output_dir, frame_name_no_ext)
         results.append((overlay_path, attn_path))
-    # We no longer need the temporary frames, so we can clean them up to save space.
-    # The final images (overlays and heatmaps) will remain.
-    shutil.rmtree(frames_dir)
-    print(f"Cleaned up temporary frames directory: {frames_dir}")
     return results

 from scipy.spatial.distance import cdist
 import matplotlib.pyplot as plt
 import shutil # For cleaning up temporary directories
 # This will import the ViT model definitions from the other file
 import vision_transformer as vits
 # (extract_frames, compute_embeddings, select_representative_frames, generate_attention_maps)
 # I will copy them here for completeness, but you can just leave them as they are.
+def extract_frames(video_path, output_dir, fps=5):
     frames_dir = os.path.join(output_dir, "frames")
     os.makedirs(frames_dir, exist_ok=True)
     cap = cv2.VideoCapture(video_path)
         embeddings.append(batch_embeddings.cpu().numpy())
     return np.concatenate(embeddings, axis=0), frame_names
+def select_representative_frames(embeddings, frame_names, n_clusters=3, pca_dim=32):
     pca = PCA(n_components=pca_dim, svd_solver='full', random_state=404543)
     pca_results = pca.fit_transform(embeddings)
     kmeans = KMeans(n_clusters=n_clusters, random_state=404543, n_init=10)
     return overlay_path, attn_path
 # --- Main orchestrator function ---
+def process_video_with_dino(video_path, output_dir="dino_output"):
     """
     Main function to process a video and generate DINO attention maps.
     Args:
         video_path (str): Path to the input video.
+        output_dir (str): Directory to save all intermediate and final files.
     Returns:
         list: A list of tuples, where each tuple contains (overlay_path, attention_map_path).
     """
+    # Clean up previous runs and create output directory
+    if os.path.exists(output_dir):
+        shutil.rmtree(output_dir)
     os.makedirs(output_dir, exist_ok=True)
     device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
     model.load_state_dict(state_dict, strict=True)
     print("DINO weights loaded successfully from torch.hub.")
+    # Step 1: Extract frames
     frame_paths = extract_frames(video_path, output_dir)
     if not frame_paths:
         raise ValueError("No frames were extracted from the video.")
     for frame_name in selected_frames:
         frame_path = os.path.join(frames_dir, frame_name)
         frame_name_no_ext = os.path.splitext(frame_name)[0]
         overlay_path, attn_path = generate_attention_maps(frame_path, model, device, output_dir, frame_name_no_ext)
         results.append((overlay_path, attn_path))
     return results