Spaces:

throaway2854
/

AI_Video_Auto-Tagger

Running

App Files Files Community

throaway2854 commited on 17 days ago

Commit

f1419d3

verified ·

1 Parent(s): 82b4129

Update app.py

Browse files

Files changed (1) hide show

app.py +76 -91

app.py CHANGED Viewed

@@ -399,33 +399,22 @@ class VideoTagger:
     ) -> Tuple[str, Dict]:
         """
         Tag a video by sampling every N-th frame and aggregating tags.
-        Returns:
-            combined_tags_str: one unique comma-separated tag string
-            debug_info: dict with some stats
         """
         if not video_path or not os.path.exists(video_path):
             raise FileNotFoundError("Video file not found.")
         frame_interval = max(int(frame_interval), 1)
-        # Detect if this is the first time the model is being loaded
         is_first_load = self.model is None
-        if progress is not None:
-            if is_first_load:
-                progress(0.0, desc="Loading model (first run may take a while)...")
-            else:
-                progress(0.0, desc="Opening video...")
-        # Lazy-load model and labels once per process
         self._load_model_if_needed()
-        if progress is not None:
-            if is_first_load:
-                progress(0.0, desc="Model loaded, opening video...")
-            else:
-                progress(0.0, desc="Opening video...")
         cap = cv2.VideoCapture(video_path)
         if not cap.isOpened():
@@ -435,15 +424,17 @@ class VideoTagger:
         if total_frames <= 0:
             total_frames = 1
-        # Number of frames we will actually process (sampled every N frames)
-        frames_to_process = max(1, (total_frames + frame_interval - 1) // frame_interval)
         aggregated_general: Dict[str, float] = {}
         aggregated_character: Dict[str, float] = {}
-        frame_idx = 0                 # index over all video frames
-        processed_frames = 0          # count of sampled frames fully processed by the model
         batch_tensors: List[np.ndarray] = []
         try:
             while True:
@@ -452,115 +443,109 @@ class VideoTagger:
                     break
                 if frame_idx % frame_interval == 0:
-                    # This is a sampled frame
-                    sampled_index = processed_frames + len(batch_tensors) + 1  # 1-based index among sampled frames
-                    arr = self._prepare_frame_bgr(frame)  # (H, W, 3) float32
-                    batch_tensors.append(arr)
-                    if progress is not None:
-                        # Show which sampled frame we're preparing, and which raw video frame it is.
-                        ratio = min(
-                            (processed_frames + len(batch_tensors)) / frames_to_process,
-                            0.99,
-                        )
                         progress(
-                            ratio,
                             desc=(
-                                f"Preparing sampled frame {sampled_index}/{frames_to_process} "
-                                f"(video frame {frame_idx + 1}/{total_frames})..."
                             ),
                         )
-                    # If batch is full, run inference on it
                     if len(batch_tensors) >= self.batch_size:
-                        # Inform the user we're now running the model on this batch
-                        if progress is not None:
-                            start_sample = processed_frames + 1
-                            end_sample = processed_frames + len(batch_tensors)
-                            ratio = min(
-                                (processed_frames + len(batch_tensors)) / frames_to_process,
-                                0.99,
-                            )
                             progress(
-                                ratio,
                                 desc=(
-                                    f"Running model on batch: sampled frames "
-                                    f"{start_sample}-{end_sample}/{frames_to_process}..."
                                 ),
                             )
-                        num_done = self._run_batch_and_aggregate(
                             batch_tensors,
-                            general_thresh=general_thresh,
-                            character_thresh=character_thresh,
-                            aggregated_general=aggregated_general,
-                            aggregated_character=aggregated_character,
                         )
-                        processed_frames += num_done
                         batch_tensors = []
-                        if progress is not None:
-                            ratio = min(processed_frames / frames_to_process, 0.99)
                             progress(
-                                ratio,
                                 desc=(
-                                    f"Finished processing sampled frames "
-                                    f"{processed_frames}/{frames_to_process}..."
                                 ),
                             )
                 frame_idx += 1
         finally:
             cap.release()
-        # Process any leftover frames in the last partial batch
         if batch_tensors:
-            if progress is not None:
-                start_sample = processed_frames + 1
-                end_sample = processed_frames + len(batch_tensors)
-                ratio = min(
-                    (processed_frames + len(batch_tensors)) / frames_to_process,
-                    0.99,
-                )
                 progress(
-                    ratio,
                     desc=(
-                        f"Running model on final batch: sampled frames "
-                        f"{start_sample}-{end_sample}/{frames_to_process}..."
                     ),
                 )
-            num_done = self._run_batch_and_aggregate(
                 batch_tensors,
-                general_thresh=general_thresh,
-                character_thresh=character_thresh,
-                aggregated_general=aggregated_general,
-                aggregated_character=aggregated_character,
             )
-            processed_frames += num_done
-            if progress is not None:
-                ratio = min(processed_frames / frames_to_process, 0.99)
                 progress(
-                    ratio,
                     desc=(
-                        f"Finished processing all sampled frames "
-                        f"{processed_frames}/{frames_to_process}..."
                     ),
                 )
-        if progress is not None:
             progress(1.0, desc="Finalizing tags...")
-        # Merge character + general tags, sorted by score (desc)
         all_tags_with_scores = {**aggregated_general, **aggregated_character}
-        # Apply substitutions & exclusions BEFORE final dedup
-        adjusted_all_tags: Dict[str, float] = {}
         normalized_subs = {k.strip(): v.strip() for k, v in tag_substitutes.items() if k and v}
         normalized_exclusions = {t.strip() for t in tag_exclusions if t}
         for tag, score in all_tags_with_scores.items():
             original_tag = tag.strip()
@@ -588,17 +573,17 @@ class VideoTagger:
             "model_repo": self.model_repo,
             "frames_read": int(frame_idx),
             "frames_processed": int(processed_frames),
-            "estimated_total_frames": int(total_frames),
-            "estimated_frames_to_process": int(frames_to_process),
-            "num_general_tags_raw": len(aggregated_general),
-            "num_character_tags_raw": len(aggregated_character),
-            "total_unique_tags_after_control": len(unique_tags),
             "frame_interval": int(frame_interval),
             "general_threshold": float(general_thresh),
             "character_threshold": float(character_thresh),
             "num_substitution_rules": len(normalized_subs),
             "num_exclusions": len(normalized_exclusions),
-            "batch_size": int(self.batch_size),
         }
         return combined_tags_str, debug_info

     ) -> Tuple[str, Dict]:
         """
         Tag a video by sampling every N-th frame and aggregating tags.
         """
         if not video_path or not os.path.exists(video_path):
             raise FileNotFoundError("Video file not found.")
         frame_interval = max(int(frame_interval), 1)
         is_first_load = self.model is None
+        if progress:
+            progress(0.0, desc="Loading model..." if is_first_load else "Opening video...")
+        # Lazy-load model & labels once per process
         self._load_model_if_needed()
+        if progress and is_first_load:
+            progress(0.0, desc="Model loaded. Opening video...")
         cap = cv2.VideoCapture(video_path)
         if not cap.isOpened():
         if total_frames <= 0:
             total_frames = 1
+        # How many frames we will actually process (sampled every N frames)
+        sampled_frames = max(1, (total_frames + frame_interval - 1) // frame_interval)
+        total_batches = max(1, (sampled_frames + self.batch_size - 1) // self.batch_size)
         aggregated_general: Dict[str, float] = {}
         aggregated_character: Dict[str, float] = {}
+        frame_idx = 0               # raw video frame index
+        processed_frames = 0        # sampled frames fully processed by the model
         batch_tensors: List[np.ndarray] = []
+        current_batch = 1
         try:
             while True:
                     break
                 if frame_idx % frame_interval == 0:
+                    # This is a sampled frame – add to current batch
+                    batch_tensors.append(self._prepare_frame_bgr(frame))
+                    # For the current batch, compute how many sampled frames it *should* contain
+                    remaining_frames = sampled_frames - processed_frames
+                    current_batch_size = min(self.batch_size, remaining_frames)
+                    # While we are still building the batch, keep percent based on *completed* frames only
+                    if progress:
+                        pct = processed_frames / sampled_frames
                         progress(
+                            pct,
                             desc=(
+                                f"Preparing batch {current_batch}/{total_batches} "
+                                f"({len(batch_tensors)}/{current_batch_size} frames)..."
                             ),
                         )
+                    # If batch is full, run inference
                     if len(batch_tensors) >= self.batch_size:
+                        if progress:
+                            beg = processed_frames + 1
+                            end = processed_frames + len(batch_tensors)
+                            pct = processed_frames / sampled_frames  # still only count completed frames
                             progress(
+                                pct,
                                 desc=(
+                                    f"Processing batch {current_batch}/{total_batches} "
+                                    f"(frames {beg}-{end}/{sampled_frames})..."
                                 ),
                             )
+                        done = self._run_batch_and_aggregate(
                             batch_tensors,
+                            general_thresh,
+                            character_thresh,
+                            aggregated_general,
+                            aggregated_character,
                         )
+                        processed_frames += done
                         batch_tensors = []
+                        if current_batch < total_batches:
+                            current_batch += 1
+                        if progress:
+                            pct = processed_frames / sampled_frames
                             progress(
+                                pct,
                                 desc=(
+                                    f"Completed batch {current_batch - 1}/{total_batches} "
+                                    f"({processed_frames}/{sampled_frames} frames processed)"
                                 ),
                             )
                 frame_idx += 1
         finally:
             cap.release()
+        # Process any leftover frames in the final partial batch
         if batch_tensors:
+            if progress:
+                beg = processed_frames + 1
+                end = processed_frames + len(batch_tensors)
+                pct = processed_frames / sampled_frames  # still only completed frames
                 progress(
+                    pct,
                     desc=(
+                        f"Processing final batch {current_batch}/{total_batches} "
+                        f"(frames {beg}-{end}/{sampled_frames})..."
                     ),
                 )
+            done = self._run_batch_and_aggregate(
                 batch_tensors,
+                general_thresh,
+                character_thresh,
+                aggregated_general,
+                aggregated_character,
             )
+            processed_frames += done
+            if progress:
+                pct = processed_frames / sampled_frames
                 progress(
+                    pct,
                     desc=(
+                        f"Completed batch {current_batch}/{total_batches} "
+                        f"({processed_frames}/{sampled_frames} frames processed)"
                     ),
                 )
+        if progress:
             progress(1.0, desc="Finalizing tags...")
+        # Merge & finalize tags
         all_tags_with_scores = {**aggregated_general, **aggregated_character}
         normalized_subs = {k.strip(): v.strip() for k, v in tag_substitutes.items() if k and v}
         normalized_exclusions = {t.strip() for t in tag_exclusions if t}
+        adjusted_all_tags: Dict[str, float] = {}
         for tag, score in all_tags_with_scores.items():
             original_tag = tag.strip()
             "model_repo": self.model_repo,
             "frames_read": int(frame_idx),
             "frames_processed": int(processed_frames),
+            "sampled_frames": int(sampled_frames),
+            "total_batches": int(total_batches),
+            "batch_size": int(self.batch_size),
             "frame_interval": int(frame_interval),
             "general_threshold": float(general_thresh),
             "character_threshold": float(character_thresh),
+            "num_general_tags_raw": len(aggregated_general),
+            "num_character_tags_raw": len(aggregated_character),
+            "total_unique_tags_after_control": len(unique_tags),
             "num_substitution_rules": len(normalized_subs),
             "num_exclusions": len(normalized_exclusions),
         }
         return combined_tags_str, debug_info