Spaces:

vidhi0405
/

VideoToText

Sleeping

App Files Files Community

vidhi0405 commited on 10 days ago

Commit

bc48923

1 Parent(s): 921c0b3

commit 2

Browse files

Files changed (1) hide show

huggingface_exact_approach.py +69 -10

huggingface_exact_approach.py CHANGED Viewed

@@ -283,8 +283,7 @@ class VideoHighlightDetector:
             outputs = self.model.generate(
                 **inputs,
                 max_new_tokens=8,  # Force very short responses
-                do_sample=False,    # Use greedy decoding for consistency
-                temperature=0.1     # Very low temperature for strict adherence
             )
             response = self.processor.decode(outputs[0], skip_special_tokens=True)
@@ -311,6 +310,44 @@ class VideoHighlightDetector:
         except Exception as e:
             print(f"   ❌ Error processing segment: {str(e)}")
             return False
     def _concatenate_scenes(
         self,
@@ -476,6 +513,8 @@ class VideoHighlightDetector:
         kept_segments1 = []
         kept_segments2 = []
         segments_processed = 0
         total_segments = int(duration / segment_length)
@@ -509,14 +548,21 @@ class VideoHighlightDetector:
             if self.process_segment(segment_path, highlights1):
                 print("   ✅ KEEPING SEGMENT FOR SET 1")
                 kept_segments1.append((start_time, end_time))
             else:
                 print("   ❌ REJECTING SEGMENT FOR SET 1")
             if self.process_segment(segment_path, highlights2):
                 print("   ✅ KEEPING SEGMENT FOR SET 2")
                 kept_segments2.append((start_time, end_time))
             else:
                 print("   ❌ REJECTING SEGMENT FOR SET 2")
             # Clean up segment file
             os.remove(segment_path)
@@ -546,13 +592,26 @@ class VideoHighlightDetector:
         print(f"🏆 Selected Set {selected_set} with {len(final_segments)} segments ({percent_used:.1f}% of video)")
         if not final_segments:
-            return {
-                "error": "No highlights detected in the video with either set of criteria",
-                "video_description": video_desc,
-                "highlights1": highlights1,
-                "highlights2": highlights2,
-                "total_segments": total_segments
-            }
         # Step 4: Create final video
         print(f"🎬 Step 4: Creating final highlights video...")

             outputs = self.model.generate(
                 **inputs,
                 max_new_tokens=8,  # Force very short responses
+                do_sample=False    # Use greedy decoding for consistency
             )
             response = self.processor.decode(outputs[0], skip_special_tokens=True)
         except Exception as e:
             print(f"   ❌ Error processing segment: {str(e)}")
             return False
+    def score_segment(self, video_path: str, highlight_types: str) -> int:
+        """Fallback scorer (1-10) used when strict YES/NO rejects all segments."""
+        messages = [
+            {
+                "role": "system",
+                "content": [{"type": "text", "text": "You are a video highlight scorer. Score relevance conservatively."}]
+            },
+            {
+                "role": "user",
+                "content": [
+                    {"type": "video", "path": video_path},
+                    {"type": "text", "text": (
+                        f"Highlight criteria:\n{highlight_types}\n\n"
+                        "Rate how much this segment matches the criteria on a scale of 1 to 10. "
+                        "Return one number only."
+                    )}
+                ]
+            }
+        ]
+        try:
+            inputs = self.processor.apply_chat_template(
+                messages,
+                add_generation_prompt=True,
+                tokenize=True,
+                return_dict=True,
+                return_tensors="pt"
+            ).to(self.device)
+            outputs = self.model.generate(**inputs, max_new_tokens=8, do_sample=False)
+            response = self.processor.decode(outputs[0], skip_special_tokens=True)
+            response = self._extract_assistant_text(response)
+            match = re.search(r"\b([1-9]|10)\b", response)
+            if match:
+                return int(match.group(1))
+            return 1
+        except Exception:
+            return 1
     def _concatenate_scenes(
         self,
         kept_segments1 = []
         kept_segments2 = []
+        scored_segments1 = []
+        scored_segments2 = []
         segments_processed = 0
         total_segments = int(duration / segment_length)
             if self.process_segment(segment_path, highlights1):
                 print("   ✅ KEEPING SEGMENT FOR SET 1")
                 kept_segments1.append((start_time, end_time))
+                score1 = 10
             else:
                 print("   ❌ REJECTING SEGMENT FOR SET 1")
+                score1 = self.score_segment(segment_path, highlights1)
             if self.process_segment(segment_path, highlights2):
                 print("   ✅ KEEPING SEGMENT FOR SET 2")
                 kept_segments2.append((start_time, end_time))
+                score2 = 10
             else:
                 print("   ❌ REJECTING SEGMENT FOR SET 2")
+                score2 = self.score_segment(segment_path, highlights2)
+            scored_segments1.append({"start": start_time, "end": end_time, "score": score1})
+            scored_segments2.append({"start": start_time, "end": end_time, "score": score2})
             # Clean up segment file
             os.remove(segment_path)
         print(f"🏆 Selected Set {selected_set} with {len(final_segments)} segments ({percent_used:.1f}% of video)")
         if not final_segments:
+            print("⚠️ No strict YES segments found. Falling back to score-based selection.")
+            avg1 = sum(s["score"] for s in scored_segments1) / len(scored_segments1) if scored_segments1 else 0
+            avg2 = sum(s["score"] for s in scored_segments2) / len(scored_segments2) if scored_segments2 else 0
+            if avg2 >= avg1:
+                selected_set = "2"
+                fallback_scores = sorted(scored_segments2, key=lambda x: x["score"], reverse=True)
+            else:
+                selected_set = "1"
+                fallback_scores = sorted(scored_segments1, key=lambda x: x["score"], reverse=True)
+            selected_by_threshold = [s for s in fallback_scores if s["score"] >= 6]
+            if not selected_by_threshold:
+                top_n = max(1, min(3, len(fallback_scores)))
+                selected_by_threshold = fallback_scores[:top_n]
+            final_segments = [(s["start"], s["end"]) for s in selected_by_threshold]
+            selected_duration = sum(end - start for start, end in final_segments)
+            percent_used = (selected_duration / total_duration) * 100 if total_duration > 0 else 0
+            print(f"🏆 Fallback selected Set {selected_set} with {len(final_segments)} segments ({percent_used:.1f}% of video)")
         # Step 4: Create final video
         print(f"🎬 Step 4: Creating final highlights video...")