Spaces:

throaway2854
/

AI_Video_Auto-Tagger

Running

App Files Files Community

throaway2854 commited on 17 days ago

Commit

2e12519

verified ·

1 Parent(s): 2ffce11

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -31

app.py CHANGED Viewed

@@ -399,51 +399,76 @@ class VideoTagger:
     ) -> Tuple[str, Dict]:
         """
         Tag a video by sampling every N-th frame and aggregating tags.
         Returns:
             combined_tags_str: one unique comma-separated tag string
             debug_info: dict with some stats
         """
         if not video_path or not os.path.exists(video_path):
             raise FileNotFoundError("Video file not found.")
         frame_interval = max(int(frame_interval), 1)
         self._load_model_if_needed()
         if progress is not None:
-            progress(0.0, desc="Opening video...")
         cap = cv2.VideoCapture(video_path)
         if not cap.isOpened():
             raise RuntimeError("Unable to open video file.")
         total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) or 0
         if total_frames <= 0:
             total_frames = 1
         frames_to_process = max(1, (total_frames + frame_interval - 1) // frame_interval)
         aggregated_general: Dict[str, float] = {}
         aggregated_character: Dict[str, float] = {}
         frame_idx = 0
         processed_frames = 0
         batch_tensors: List[np.ndarray] = []
         try:
             while True:
                 ret, frame = cap.read()
                 if not ret:
                     break
                 # Only process every N-th frame
                 if frame_idx % frame_interval == 0:
                     # frame is BGR uint8 from OpenCV
                     arr = self._prepare_frame_bgr(frame)  # (H, W, 3) float32
                     batch_tensors.append(arr)
                     # If batch is full, run inference
                     if len(batch_tensors) >= self.batch_size:
                         num_done = self._run_batch_and_aggregate(
@@ -455,18 +480,21 @@ class VideoTagger:
                         )
                         processed_frames += num_done
                         batch_tensors = []
                         if progress is not None:
                             ratio = min(processed_frames / frames_to_process, 0.99)
                             progress(
                                 ratio,
-                                desc=f"Processing frames {processed_frames}/{frames_to_process}...",
                             )
                 frame_idx += 1
         finally:
             cap.release()
         # Process any leftover frames in the last partial batch
         if batch_tensors:
             num_done = self._run_batch_and_aggregate(
@@ -477,42 +505,52 @@ class VideoTagger:
                 aggregated_character=aggregated_character,
             )
             processed_frames += num_done
         if progress is not None:
             progress(1.0, desc="Finalizing tags...")
         # Merge character + general tags, sorted by score (desc)
         all_tags_with_scores = {**aggregated_general, **aggregated_character}
         # Apply substitutions & exclusions BEFORE final dedup
         adjusted_all_tags: Dict[str, float] = {}
         normalized_subs = {k.strip(): v.strip() for k, v in tag_substitutes.items() if k and v}
         normalized_exclusions = {t.strip() for t in tag_exclusions if t}
         for tag, score in all_tags_with_scores.items():
             original_tag = tag.strip()
             if original_tag in normalized_exclusions:
                 continue
             new_tag = normalized_subs.get(original_tag, original_tag)
             if new_tag in normalized_exclusions:
                 continue
             if new_tag not in adjusted_all_tags or score > adjusted_all_tags[new_tag]:
                 adjusted_all_tags[new_tag] = score
         sorted_tags = sorted(
             adjusted_all_tags.items(),
             key=lambda kv: kv[1],
             reverse=True,
         )
         unique_tags = [tag for tag, _ in sorted_tags]
         combined_tags_str = ", ".join(unique_tags)
         debug_info = {
             "model_repo": self.model_repo,
             "frames_read": int(frame_idx),
@@ -529,7 +567,7 @@ class VideoTagger:
             "num_exclusions": len(normalized_exclusions),
             "batch_size": int(self.batch_size),
         }
         return combined_tags_str, debug_info

     ) -> Tuple[str, Dict]:
         """
         Tag a video by sampling every N-th frame and aggregating tags.
         Returns:
             combined_tags_str: one unique comma-separated tag string
             debug_info: dict with some stats
         """
         if not video_path or not os.path.exists(video_path):
             raise FileNotFoundError("Video file not found.")
         frame_interval = max(int(frame_interval), 1)
+        # Detect if this is the first time the model is being loaded
+        is_first_load = self.model is None
+        if progress is not None:
+            if is_first_load:
+                progress(0.0, desc="Loading model (first run may take a while)...")
+            else:
+                progress(0.0, desc="Opening video...")
+        # Lazy-load model and labels once per process
         self._load_model_if_needed()
         if progress is not None:
+            if is_first_load:
+                # Model just finished loading
+                progress(0.0, desc="Model loaded, opening video...")
+            else:
+                # Keep the message but make clear we're past model loading
+                progress(0.0, desc="Opening video...")
         cap = cv2.VideoCapture(video_path)
         if not cap.isOpened():
             raise RuntimeError("Unable to open video file.")
         total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) or 0
         if total_frames <= 0:
             total_frames = 1
         frames_to_process = max(1, (total_frames + frame_interval - 1) // frame_interval)
         aggregated_general: Dict[str, float] = {}
         aggregated_character: Dict[str, float] = {}
         frame_idx = 0
         processed_frames = 0
         batch_tensors: List[np.ndarray] = []
         try:
             while True:
                 ret, frame = cap.read()
                 if not ret:
                     break
                 # Only process every N-th frame
                 if frame_idx % frame_interval == 0:
                     # frame is BGR uint8 from OpenCV
                     arr = self._prepare_frame_bgr(frame)  # (H, W, 3) float32
                     batch_tensors.append(arr)
+                    # While building the FIRST batch, keep user informed
+                    if progress is not None and processed_frames == 0:
+                        frames_in_first_batch = min(self.batch_size, frames_to_process)
+                        progress(
+                            0.0,
+                            desc=(
+                                f"Collecting frames for first batch "
+                                f"({len(batch_tensors)}/{frames_in_first_batch})..."
+                            ),
+                        )
                     # If batch is full, run inference
                     if len(batch_tensors) >= self.batch_size:
                         num_done = self._run_batch_and_aggregate(
                         )
                         processed_frames += num_done
                         batch_tensors = []
                         if progress is not None:
                             ratio = min(processed_frames / frames_to_process, 0.99)
                             progress(
                                 ratio,
+                                desc=(
+                                    f"Processing frames {processed_frames}/"
+                                    f"{frames_to_process}..."
+                                ),
                             )
                 frame_idx += 1
         finally:
             cap.release()
         # Process any leftover frames in the last partial batch
         if batch_tensors:
             num_done = self._run_batch_and_aggregate(
                 aggregated_character=aggregated_character,
             )
             processed_frames += num_done
+            if progress is not None:
+                ratio = min(processed_frames / frames_to_process, 0.99)
+                progress(
+                    ratio,
+                    desc=(
+                        f"Processing frames {processed_frames}/"
+                        f"{frames_to_process} (final batch)..."
+                    ),
+                )
         if progress is not None:
             progress(1.0, desc="Finalizing tags...")
         # Merge character + general tags, sorted by score (desc)
         all_tags_with_scores = {**aggregated_general, **aggregated_character}
         # Apply substitutions & exclusions BEFORE final dedup
         adjusted_all_tags: Dict[str, float] = {}
         normalized_subs = {k.strip(): v.strip() for k, v in tag_substitutes.items() if k and v}
         normalized_exclusions = {t.strip() for t in tag_exclusions if t}
         for tag, score in all_tags_with_scores.items():
             original_tag = tag.strip()
             if original_tag in normalized_exclusions:
                 continue
             new_tag = normalized_subs.get(original_tag, original_tag)
             if new_tag in normalized_exclusions:
                 continue
             if new_tag not in adjusted_all_tags or score > adjusted_all_tags[new_tag]:
                 adjusted_all_tags[new_tag] = score
         sorted_tags = sorted(
             adjusted_all_tags.items(),
             key=lambda kv: kv[1],
             reverse=True,
         )
         unique_tags = [tag for tag, _ in sorted_tags]
         combined_tags_str = ", ".join(unique_tags)
         debug_info = {
             "model_repo": self.model_repo,
             "frames_read": int(frame_idx),
             "num_exclusions": len(normalized_exclusions),
             "batch_size": int(self.batch_size),
         }
         return combined_tags_str, debug_info