Spaces:
Sleeping
Sleeping
commit 2
Browse files- huggingface_exact_approach.py +69 -10
huggingface_exact_approach.py
CHANGED
|
@@ -283,8 +283,7 @@ class VideoHighlightDetector:
|
|
| 283 |
outputs = self.model.generate(
|
| 284 |
**inputs,
|
| 285 |
max_new_tokens=8, # Force very short responses
|
| 286 |
-
do_sample=False
|
| 287 |
-
temperature=0.1 # Very low temperature for strict adherence
|
| 288 |
)
|
| 289 |
response = self.processor.decode(outputs[0], skip_special_tokens=True)
|
| 290 |
|
|
@@ -311,6 +310,44 @@ class VideoHighlightDetector:
|
|
| 311 |
except Exception as e:
|
| 312 |
print(f" β Error processing segment: {str(e)}")
|
| 313 |
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 314 |
|
| 315 |
def _concatenate_scenes(
|
| 316 |
self,
|
|
@@ -476,6 +513,8 @@ class VideoHighlightDetector:
|
|
| 476 |
|
| 477 |
kept_segments1 = []
|
| 478 |
kept_segments2 = []
|
|
|
|
|
|
|
| 479 |
segments_processed = 0
|
| 480 |
total_segments = int(duration / segment_length)
|
| 481 |
|
|
@@ -509,14 +548,21 @@ class VideoHighlightDetector:
|
|
| 509 |
if self.process_segment(segment_path, highlights1):
|
| 510 |
print(" β
KEEPING SEGMENT FOR SET 1")
|
| 511 |
kept_segments1.append((start_time, end_time))
|
|
|
|
| 512 |
else:
|
| 513 |
print(" β REJECTING SEGMENT FOR SET 1")
|
| 514 |
-
|
|
|
|
| 515 |
if self.process_segment(segment_path, highlights2):
|
| 516 |
print(" β
KEEPING SEGMENT FOR SET 2")
|
| 517 |
kept_segments2.append((start_time, end_time))
|
|
|
|
| 518 |
else:
|
| 519 |
print(" β REJECTING SEGMENT FOR SET 2")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 520 |
|
| 521 |
# Clean up segment file
|
| 522 |
os.remove(segment_path)
|
|
@@ -546,13 +592,26 @@ class VideoHighlightDetector:
|
|
| 546 |
print(f"π Selected Set {selected_set} with {len(final_segments)} segments ({percent_used:.1f}% of video)")
|
| 547 |
|
| 548 |
if not final_segments:
|
| 549 |
-
|
| 550 |
-
|
| 551 |
-
|
| 552 |
-
|
| 553 |
-
|
| 554 |
-
"
|
| 555 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 556 |
|
| 557 |
# Step 4: Create final video
|
| 558 |
print(f"π¬ Step 4: Creating final highlights video...")
|
|
|
|
| 283 |
outputs = self.model.generate(
|
| 284 |
**inputs,
|
| 285 |
max_new_tokens=8, # Force very short responses
|
| 286 |
+
do_sample=False # Use greedy decoding for consistency
|
|
|
|
| 287 |
)
|
| 288 |
response = self.processor.decode(outputs[0], skip_special_tokens=True)
|
| 289 |
|
|
|
|
| 310 |
except Exception as e:
|
| 311 |
print(f" β Error processing segment: {str(e)}")
|
| 312 |
return False
|
| 313 |
+
|
| 314 |
+
def score_segment(self, video_path: str, highlight_types: str) -> int:
|
| 315 |
+
"""Fallback scorer (1-10) used when strict YES/NO rejects all segments."""
|
| 316 |
+
messages = [
|
| 317 |
+
{
|
| 318 |
+
"role": "system",
|
| 319 |
+
"content": [{"type": "text", "text": "You are a video highlight scorer. Score relevance conservatively."}]
|
| 320 |
+
},
|
| 321 |
+
{
|
| 322 |
+
"role": "user",
|
| 323 |
+
"content": [
|
| 324 |
+
{"type": "video", "path": video_path},
|
| 325 |
+
{"type": "text", "text": (
|
| 326 |
+
f"Highlight criteria:\n{highlight_types}\n\n"
|
| 327 |
+
"Rate how much this segment matches the criteria on a scale of 1 to 10. "
|
| 328 |
+
"Return one number only."
|
| 329 |
+
)}
|
| 330 |
+
]
|
| 331 |
+
}
|
| 332 |
+
]
|
| 333 |
+
|
| 334 |
+
try:
|
| 335 |
+
inputs = self.processor.apply_chat_template(
|
| 336 |
+
messages,
|
| 337 |
+
add_generation_prompt=True,
|
| 338 |
+
tokenize=True,
|
| 339 |
+
return_dict=True,
|
| 340 |
+
return_tensors="pt"
|
| 341 |
+
).to(self.device)
|
| 342 |
+
outputs = self.model.generate(**inputs, max_new_tokens=8, do_sample=False)
|
| 343 |
+
response = self.processor.decode(outputs[0], skip_special_tokens=True)
|
| 344 |
+
response = self._extract_assistant_text(response)
|
| 345 |
+
match = re.search(r"\b([1-9]|10)\b", response)
|
| 346 |
+
if match:
|
| 347 |
+
return int(match.group(1))
|
| 348 |
+
return 1
|
| 349 |
+
except Exception:
|
| 350 |
+
return 1
|
| 351 |
|
| 352 |
def _concatenate_scenes(
|
| 353 |
self,
|
|
|
|
| 513 |
|
| 514 |
kept_segments1 = []
|
| 515 |
kept_segments2 = []
|
| 516 |
+
scored_segments1 = []
|
| 517 |
+
scored_segments2 = []
|
| 518 |
segments_processed = 0
|
| 519 |
total_segments = int(duration / segment_length)
|
| 520 |
|
|
|
|
| 548 |
if self.process_segment(segment_path, highlights1):
|
| 549 |
print(" β
KEEPING SEGMENT FOR SET 1")
|
| 550 |
kept_segments1.append((start_time, end_time))
|
| 551 |
+
score1 = 10
|
| 552 |
else:
|
| 553 |
print(" β REJECTING SEGMENT FOR SET 1")
|
| 554 |
+
score1 = self.score_segment(segment_path, highlights1)
|
| 555 |
+
|
| 556 |
if self.process_segment(segment_path, highlights2):
|
| 557 |
print(" β
KEEPING SEGMENT FOR SET 2")
|
| 558 |
kept_segments2.append((start_time, end_time))
|
| 559 |
+
score2 = 10
|
| 560 |
else:
|
| 561 |
print(" β REJECTING SEGMENT FOR SET 2")
|
| 562 |
+
score2 = self.score_segment(segment_path, highlights2)
|
| 563 |
+
|
| 564 |
+
scored_segments1.append({"start": start_time, "end": end_time, "score": score1})
|
| 565 |
+
scored_segments2.append({"start": start_time, "end": end_time, "score": score2})
|
| 566 |
|
| 567 |
# Clean up segment file
|
| 568 |
os.remove(segment_path)
|
|
|
|
| 592 |
print(f"π Selected Set {selected_set} with {len(final_segments)} segments ({percent_used:.1f}% of video)")
|
| 593 |
|
| 594 |
if not final_segments:
|
| 595 |
+
print("β οΈ No strict YES segments found. Falling back to score-based selection.")
|
| 596 |
+
avg1 = sum(s["score"] for s in scored_segments1) / len(scored_segments1) if scored_segments1 else 0
|
| 597 |
+
avg2 = sum(s["score"] for s in scored_segments2) / len(scored_segments2) if scored_segments2 else 0
|
| 598 |
+
|
| 599 |
+
if avg2 >= avg1:
|
| 600 |
+
selected_set = "2"
|
| 601 |
+
fallback_scores = sorted(scored_segments2, key=lambda x: x["score"], reverse=True)
|
| 602 |
+
else:
|
| 603 |
+
selected_set = "1"
|
| 604 |
+
fallback_scores = sorted(scored_segments1, key=lambda x: x["score"], reverse=True)
|
| 605 |
+
|
| 606 |
+
selected_by_threshold = [s for s in fallback_scores if s["score"] >= 6]
|
| 607 |
+
if not selected_by_threshold:
|
| 608 |
+
top_n = max(1, min(3, len(fallback_scores)))
|
| 609 |
+
selected_by_threshold = fallback_scores[:top_n]
|
| 610 |
+
|
| 611 |
+
final_segments = [(s["start"], s["end"]) for s in selected_by_threshold]
|
| 612 |
+
selected_duration = sum(end - start for start, end in final_segments)
|
| 613 |
+
percent_used = (selected_duration / total_duration) * 100 if total_duration > 0 else 0
|
| 614 |
+
print(f"π Fallback selected Set {selected_set} with {len(final_segments)} segments ({percent_used:.1f}% of video)")
|
| 615 |
|
| 616 |
# Step 4: Create final video
|
| 617 |
print(f"π¬ Step 4: Creating final highlights video...")
|