Spaces:

jhh6576
/

Comic123

Sleeping

App Files Files Community

Comic123 / backend /keyframes /keyframes_smart.py

3v324v23

Update Comic123 with local comic folder files

83e35a7 3 months ago

raw

history blame contribute delete

7.84 kB

	"""
	Smart keyframe generation with eye detection and emotion matching
	"""

	import os
	import cv2
	import srt
	from typing import List
	import numpy as np
	from backend.eye_state_detector import EyeStateDetector, enhance_frame_selection
	from backend.utils import copy_and_rename_file

	def generate_keyframes_smart(video_path: str, story_subs: List, max_frames: int = 48):
	"""
	Generate keyframes with smart selection (no half-closed eyes)

	Args:
	video_path: Path to video file
	story_subs: List of subtitle objects for key story moments
	max_frames: Maximum number of frames to extract (default 48)
	"""

	print(f"🎯 Generating {len(story_subs)} smart keyframes (avoiding closed eyes)")

	# Initialize eye detector
	eye_detector = EyeStateDetector()

	# Ensure output directory exists
	final_dir = "frames/final"
	os.makedirs(final_dir, exist_ok=True)

	# Clear existing frames
	for f in os.listdir(final_dir):
	if f.endswith('.png'):
	os.remove(os.path.join(final_dir, f))

	# Open video
	cap = cv2.VideoCapture(video_path)
	if not cap.isOpened():
	print(f"❌ Failed to open video: {video_path}")
	return False

	fps = cap.get(cv2.CAP_PROP_FPS)
	total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

	print(f"📹 Video: {fps} fps, {total_frames} total frames")
	print(f"👁️ Smart frame selection enabled (avoiding half-closed eyes)")

	# Extract frames
	extracted_count = 0

	for i, sub in enumerate(story_subs[:max_frames]):
	try:
	print(f"\n📝 Processing segment {i+1}/{min(len(story_subs), max_frames)}: {sub.content[:40]}...")

	# Extract multiple candidate frames for this subtitle
	candidates = extract_candidate_frames(
	cap, sub, fps,
	num_candidates=5 # Extract 5 frames to choose from
	)

	if candidates:
	# Select best frame (no half-closed eyes)
	best_frame, eye_state = select_best_candidate(candidates, eye_detector)

	if best_frame is not None:
	output_path = os.path.join(final_dir, f"frame{extracted_count:03d}.png")
	cv2.imwrite(output_path, best_frame)
	extracted_count += 1

	print(f" ✅ Selected frame with {eye_state['state']} eyes (confidence: {eye_state['confidence']:.2f})")
	else:
	print(f" ⚠️ No suitable frame found (all had closed/half-closed eyes)")
	else:
	print(f" ⚠️ Failed to extract candidate frames")

	except Exception as e:
	print(f" ❌ Error processing segment {i+1}: {e}")

	cap.release()

	# If we didn't get enough frames, extract more with relaxed criteria
	if extracted_count < max_frames and extracted_count < 10:
	print(f"\n⚠️ Only extracted {extracted_count} frames, extracting more with relaxed criteria...")
	_extract_additional_frames(video_path, final_dir, extracted_count, max_frames)

	# Final count
	final_frames = len([f for f in os.listdir(final_dir) if f.endswith('.png')])
	print(f"\n✅ Total frames extracted: {final_frames}")
	print(f"👁️ All frames checked for eye quality")

	return final_frames > 0


	def extract_candidate_frames(cap, subtitle, fps, num_candidates=5):
	"""Extract multiple candidate frames from a subtitle segment"""

	candidates = []

	# Calculate time range
	start_time = subtitle.start.total_seconds()
	end_time = subtitle.end.total_seconds()
	duration = end_time - start_time

	# If duration is very short, just get middle frame
	if duration < 0.5:
	num_candidates = 1

	# Extract frames evenly distributed across the duration
	for i in range(num_candidates):
	# Calculate timestamp (avoid very start/end to reduce motion blur)
	if num_candidates == 1:
	time_offset = duration / 2
	else:
	# Distribute between 20% and 80% of duration
	time_offset = 0.2 * duration + (i / (num_candidates - 1)) * 0.6 * duration

	timestamp = start_time + time_offset
	frame_num = int(timestamp * fps)

	# Extract frame
	cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
	ret, frame = cap.read()

	if ret and frame is not None:
	candidates.append(frame)

	return candidates


	def select_best_candidate(candidates: List[np.ndarray], eye_detector: EyeStateDetector):
	"""Select the best frame from candidates based on eye state"""

	best_frame = None
	best_score = -1
	best_state = None

	for i, frame in enumerate(candidates):
	# Save temp frame for analysis
	temp_path = f"temp_candidate_{i}.png"
	cv2.imwrite(temp_path, frame)

	# Check eye state
	eye_state = eye_detector.check_eyes_state(temp_path)

	# Calculate score
	score = calculate_frame_score(eye_state)

	# Update best if this is better
	if score > best_score:
	best_score = score
	best_frame = frame
	best_state = eye_state

	# Clean up temp file
	if os.path.exists(temp_path):
	os.remove(temp_path)

	return best_frame, best_state


	def calculate_frame_score(eye_state):
	"""Calculate a quality score for a frame based on eye state"""

	score = 0.0

	# Eye state scoring (most important)
	if eye_state['state'] == 'open':
	score += 10.0
	elif eye_state['state'] == 'partially_open':
	score += 7.0
	elif eye_state['state'] == 'unknown':
	score += 5.0 # Might be okay (no face detected)
	elif eye_state['state'] == 'half_closed':
	score += 2.0
	else: # closed
	score += 0.0

	# Confidence bonus
	score += eye_state['confidence'] * 3.0

	# Suitability check
	if eye_state['suitable_for_comic']:
	score += 5.0

	return score


	def _extract_additional_frames(video_path: str, output_dir: str, start_count: int, target_count: int):
	"""Extract additional frames with relaxed eye criteria"""

	cap = cv2.VideoCapture(video_path)
	if not cap.isOpened():
	return

	eye_detector = EyeStateDetector()
	total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
	needed = target_count - start_count
	step = total_frames / needed if needed > 0 else 1

	count = start_count
	attempts = 0
	max_attempts = needed * 3 # Try up to 3x frames to find good ones

	while count < target_count and attempts < max_attempts:
	frame_num = int((attempts * step) % total_frames)
	cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
	ret, frame = cap.read()

	if ret:
	# Check eye state with relaxed criteria
	temp_path = f"temp_check_{attempts}.png"
	cv2.imwrite(temp_path, frame)
	eye_state = eye_detector.check_eyes_state(temp_path)

	# Accept if not completely closed
	if eye_state['state'] not in ['closed', 'half_closed']:
	output_path = os.path.join(output_dir, f"frame{count:03d}.png")
	cv2.imwrite(output_path, frame)
	count += 1
	print(f" ✅ Added frame {count} ({eye_state['state']} eyes)")

	# Clean up
	if os.path.exists(temp_path):
	os.remove(temp_path)

	attempts += 1

	cap.release()
	print(f" ✅ Extracted {count - start_count} additional frames")