| """ |
| Story Analyzer and Summarizer |
| Analyzes video content to create compelling comic summaries with emotion matching |
| """ |
|
|
| import cv2 |
| import numpy as np |
| import os |
| import json |
| from typing import List, Dict, Tuple |
| import srt |
| from datetime import timedelta |
| import re |
|
|
| |
| try: |
| from transformers import pipeline |
| TRANSFORMERS_AVAILABLE = True |
| except ImportError: |
| TRANSFORMERS_AVAILABLE = False |
| print("⚠️ Transformers not available, using basic analysis") |
|
|
| class EmotionDetector: |
| """Detect facial emotions in frames""" |
| |
| def __init__(self): |
| self.face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml') |
| |
| |
| self.emotion_keywords = { |
| 'happy': ['happy', 'joy', 'laugh', 'smile', 'excited', 'wonderful', 'great', 'amazing', 'love', 'yes', 'haha', 'yay'], |
| 'sad': ['sad', 'cry', 'tear', 'sorry', 'miss', 'lonely', 'depressed', 'unhappy', 'grief', 'mourn'], |
| 'angry': ['angry', 'mad', 'furious', 'hate', 'annoyed', 'frustrated', 'rage', 'damn', 'hell', 'stupid'], |
| 'surprised': ['surprised', 'shock', 'wow', 'oh', 'what', 'really', 'seriously', 'unbelievable', 'amazing'], |
| 'fear': ['afraid', 'scared', 'fear', 'terrified', 'nervous', 'worry', 'anxious', 'panic', 'help'], |
| 'neutral': ['okay', 'fine', 'yes', 'no', 'maybe', 'think', 'know', 'understand'] |
| } |
| |
| |
| self.emotion_model = None |
| try: |
| if TRANSFORMERS_AVAILABLE: |
| self.emotion_model = pipeline("image-classification", model="dima806/facial_emotions_image_detection") |
| print("✅ Emotion detection model loaded") |
| except: |
| print("⚠️ Using keyword-based emotion detection") |
| |
| def detect_facial_emotion(self, image_path: str) -> Dict[str, float]: |
| """Detect emotion from facial expression""" |
| img = cv2.imread(image_path) |
| if img is None: |
| return {'neutral': 1.0} |
| |
| |
| if self.emotion_model: |
| try: |
| from PIL import Image |
| pil_img = Image.open(image_path) |
| results = self.emotion_model(pil_img) |
| |
| emotions = {} |
| for result in results: |
| emotion = result['label'].lower() |
| score = result['score'] |
| emotions[emotion] = score |
| |
| return emotions |
| except: |
| pass |
| |
| |
| gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) |
| faces = self.face_cascade.detectMultiScale(gray, 1.1, 4) |
| |
| if len(faces) == 0: |
| return {'neutral': 1.0} |
| |
| |
| faces = sorted(faces, key=lambda x: x[2] * x[3], reverse=True) |
| x, y, w, h = faces[0] |
| face_roi = gray[y:y+h, x:x+w] |
| |
| |
| emotions = self._analyze_face_features(face_roi) |
| return emotions |
| |
| def _analyze_face_features(self, face_roi): |
| """Analyze facial features for emotion (basic heuristic)""" |
| h, w = face_roi.shape |
| |
| |
| upper_face = face_roi[0:int(h*0.5), :] |
| lower_face = face_roi[int(h*0.5):, :] |
| |
| |
| upper_variance = np.var(upper_face) |
| lower_variance = np.var(lower_face) |
| |
| |
| emotions = {'neutral': 0.4} |
| |
| |
| if lower_variance > upper_variance * 1.5: |
| emotions['happy'] = 0.6 |
| elif upper_variance > lower_variance * 1.5: |
| emotions['surprised'] = 0.5 |
| else: |
| emotions['neutral'] = 0.8 |
| |
| return emotions |
| |
| def detect_text_emotion(self, text: str) -> Dict[str, float]: |
| """Detect emotion from text/dialogue""" |
| if not text: |
| return {'neutral': 1.0} |
| |
| text_lower = text.lower() |
| emotion_scores = {} |
| |
| |
| for emotion, keywords in self.emotion_keywords.items(): |
| score = 0 |
| for keyword in keywords: |
| if keyword in text_lower: |
| score += 1 |
| |
| if score > 0: |
| emotion_scores[emotion] = min(score * 0.3, 1.0) |
| |
| |
| if '!' in text: |
| emotion_scores['excited'] = emotion_scores.get('excited', 0) + 0.3 |
| if '?' in text: |
| emotion_scores['surprised'] = emotion_scores.get('surprised', 0) + 0.2 |
| if '...' in text: |
| emotion_scores['sad'] = emotion_scores.get('sad', 0) + 0.2 |
| |
| |
| if emotion_scores: |
| total = sum(emotion_scores.values()) |
| emotion_scores = {k: v/total for k, v in emotion_scores.items()} |
| else: |
| emotion_scores = {'neutral': 1.0} |
| |
| return emotion_scores |
|
|
| class StoryAnalyzer: |
| """Analyze story structure and extract key moments""" |
| |
| def __init__(self): |
| self.emotion_detector = EmotionDetector() |
| |
| |
| self.story_elements = { |
| 'introduction': ['begin', 'start', 'once', 'first', 'meet', 'introduce', 'hello'], |
| 'conflict': ['but', 'however', 'problem', 'challenge', 'difficult', 'trouble', 'fight', 'argue'], |
| 'climax': ['finally', 'suddenly', 'then', 'biggest', 'most', 'intense', 'peak', 'critical'], |
| 'resolution': ['end', 'finally', 'resolve', 'solve', 'peace', 'happy', 'conclude', 'finish'] |
| } |
| |
| |
| self.summarizer = None |
| try: |
| if TRANSFORMERS_AVAILABLE: |
| self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn") |
| print("✅ Story summarization model loaded") |
| except: |
| print("⚠️ Using rule-based summarization") |
| |
| def analyze_story(self, subtitles: List[srt.Subtitle], frames: List[str]) -> Dict: |
| """Analyze the complete story structure""" |
| |
| |
| full_text = ' '.join([sub.content for sub in subtitles]) |
| |
| |
| story_arc = self._identify_story_arc(subtitles) |
| |
| |
| key_moments = self._extract_key_moments(subtitles, story_arc) |
| |
| |
| emotional_moments = self._match_emotions(key_moments, frames) |
| |
| |
| summary = self._create_summary(full_text, key_moments) |
| |
| return { |
| 'story_arc': story_arc, |
| 'key_moments': key_moments, |
| 'emotional_moments': emotional_moments, |
| 'summary': summary, |
| 'total_duration': subtitles[-1].end if subtitles else timedelta(0) |
| } |
| |
| def _identify_story_arc(self, subtitles: List[srt.Subtitle]) -> Dict: |
| """Identify the story structure""" |
| total_subs = len(subtitles) |
| if total_subs == 0: |
| return {} |
| |
| |
| act1_end = int(total_subs * 0.25) |
| act2_end = int(total_subs * 0.75) |
| |
| |
| story_arc = { |
| 'introduction': subtitles[:act1_end], |
| 'development': subtitles[act1_end:act2_end], |
| 'climax': subtitles[act2_end:act2_end + int(total_subs * 0.1)], |
| 'resolution': subtitles[act2_end:] |
| } |
| |
| return story_arc |
| |
| def _extract_key_moments(self, subtitles: List[srt.Subtitle], story_arc: Dict) -> List[Dict]: |
| """Extract 10-15 key moments from the story""" |
| key_moments = [] |
| |
| |
| phases = ['introduction', 'development', 'climax', 'resolution'] |
| moments_per_phase = { |
| 'introduction': 2, |
| 'development': 6, |
| 'climax': 3, |
| 'resolution': 2 |
| } |
| |
| for phase in phases: |
| if phase not in story_arc: |
| continue |
| |
| phase_subs = story_arc[phase] |
| if not phase_subs: |
| continue |
| |
| |
| num_moments = moments_per_phase.get(phase, 3) |
| selected = self._select_important_subtitles(phase_subs, num_moments) |
| |
| for sub in selected: |
| key_moments.append({ |
| 'subtitle': sub, |
| 'phase': phase, |
| 'timestamp': sub.start, |
| 'text': sub.content, |
| 'importance': self._calculate_importance(sub) |
| }) |
| |
| |
| key_moments.sort(key=lambda x: x['timestamp']) |
| |
| |
| return key_moments[:15] |
| |
| def _select_important_subtitles(self, subtitles: List[srt.Subtitle], num: int) -> List[srt.Subtitle]: |
| """Select the most important subtitles from a list""" |
| if len(subtitles) <= num: |
| return subtitles |
| |
| |
| scored_subs = [] |
| for sub in subtitles: |
| score = self._calculate_importance(sub) |
| scored_subs.append((sub, score)) |
| |
| |
| scored_subs.sort(key=lambda x: x[1], reverse=True) |
| selected = [sub for sub, score in scored_subs[:num]] |
| |
| |
| selected.sort(key=lambda x: x.start) |
| |
| return selected |
| |
| def _calculate_importance(self, subtitle: srt.Subtitle) -> float: |
| """Calculate importance score for a subtitle""" |
| text = subtitle.content.lower() |
| score = 1.0 |
| |
| |
| score += len(text.split()) * 0.1 |
| |
| |
| if '!' in text: |
| score += 0.5 |
| if '?' in text: |
| score += 0.3 |
| |
| |
| emotion_words = ['love', 'hate', 'fear', 'happy', 'sad', 'angry', 'surprise'] |
| for word in emotion_words: |
| if word in text: |
| score += 0.5 |
| |
| |
| action_words = ['fight', 'run', 'escape', 'save', 'help', 'stop', 'go'] |
| for word in action_words: |
| if word in text: |
| score += 0.4 |
| |
| |
| for element, keywords in self.story_elements.items(): |
| for keyword in keywords: |
| if keyword in text: |
| score += 0.3 |
| |
| return score |
| |
| def _match_emotions(self, key_moments: List[Dict], frames: List[str]) -> List[Dict]: |
| """Match emotions between text and facial expressions""" |
| emotional_moments = [] |
| |
| for moment in key_moments: |
| |
| text_emotions = self.emotion_detector.detect_text_emotion(moment['text']) |
| |
| |
| best_frame = self._find_best_frame(moment['timestamp'], frames) |
| |
| if best_frame: |
| |
| facial_emotions = self.emotion_detector.detect_facial_emotion(best_frame) |
| |
| |
| combined_emotions = self._combine_emotions(text_emotions, facial_emotions) |
| |
| emotional_moments.append({ |
| 'moment': moment, |
| 'frame': best_frame, |
| 'text_emotions': text_emotions, |
| 'facial_emotions': facial_emotions, |
| 'combined_emotions': combined_emotions, |
| 'dominant_emotion': max(combined_emotions.items(), key=lambda x: x[1])[0] |
| }) |
| else: |
| emotional_moments.append({ |
| 'moment': moment, |
| 'frame': None, |
| 'text_emotions': text_emotions, |
| 'facial_emotions': {'neutral': 1.0}, |
| 'combined_emotions': text_emotions, |
| 'dominant_emotion': max(text_emotions.items(), key=lambda x: x[1])[0] |
| }) |
| |
| return emotional_moments |
| |
| def _find_best_frame(self, timestamp: timedelta, frames: List[str]) -> str: |
| """Find the frame closest to the given timestamp""" |
| |
| |
| |
| |
| total_frames = len(frames) |
| if total_frames == 0: |
| return None |
| |
| |
| frame_index = int(timestamp.total_seconds() * 2) % total_frames |
| return frames[frame_index] if frame_index < total_frames else frames[-1] |
| |
| def _combine_emotions(self, text_emotions: Dict[str, float], |
| facial_emotions: Dict[str, float]) -> Dict[str, float]: |
| """Combine text and facial emotions""" |
| combined = {} |
| |
| |
| all_emotions = set(text_emotions.keys()) | set(facial_emotions.keys()) |
| |
| for emotion in all_emotions: |
| text_score = text_emotions.get(emotion, 0) |
| facial_score = facial_emotions.get(emotion, 0) |
| combined[emotion] = (facial_score * 0.6) + (text_score * 0.4) |
| |
| |
| total = sum(combined.values()) |
| if total > 0: |
| combined = {k: v/total for k, v in combined.items()} |
| |
| return combined |
| |
| def _create_summary(self, full_text: str, key_moments: List[Dict]) -> str: |
| """Create a concise summary of the story""" |
| if self.summarizer and len(full_text) > 100: |
| try: |
| |
| summary = self.summarizer(full_text, max_length=130, min_length=30, do_sample=False) |
| return summary[0]['summary_text'] |
| except: |
| pass |
| |
| |
| summary_parts = [] |
| for moment in key_moments[:5]: |
| summary_parts.append(moment['text']) |
| |
| return ' '.join(summary_parts) |
|
|
| class SmartComicGenerator: |
| """Generate comics with emotion-matched panels and story summarization""" |
| |
| def __init__(self): |
| self.story_analyzer = StoryAnalyzer() |
| self.panel_target = 12 |
| |
| def generate_smart_comic(self, video_path: str, output_dir: str = 'output'): |
| """Generate a smart comic with emotion matching and story summary""" |
| print("🎬 Generating Smart Comic with Emotion Matching...") |
| |
| |
| subtitles = self._load_subtitles() |
| |
| |
| frames = self._get_frames() |
| |
| |
| print("📖 Analyzing story structure...") |
| story_analysis = self.story_analyzer.analyze_story(subtitles, frames) |
| |
| |
| print("🎭 Matching emotions and selecting key moments...") |
| comic_panels = self._select_comic_panels(story_analysis) |
| |
| |
| print("📐 Creating comic layout...") |
| comic_data = self._create_comic_layout(comic_panels) |
| |
| |
| self._save_comic(comic_data, output_dir) |
| |
| print(f"✅ Smart comic generated with {len(comic_panels)} panels!") |
| print(f"📝 Story summary: {story_analysis['summary'][:100]}...") |
| |
| return comic_data |
| |
| def _load_subtitles(self) -> List[srt.Subtitle]: |
| """Load subtitles from file""" |
| if os.path.exists('test1.srt'): |
| with open('test1.srt', 'r') as f: |
| return list(srt.parse(f.read())) |
| return [] |
| |
| def _get_frames(self) -> List[str]: |
| """Get all available frames""" |
| frames_dir = 'frames/final' if os.path.exists('frames/final') else 'frames' |
| frames = [] |
| |
| if os.path.exists(frames_dir): |
| frames = [os.path.join(frames_dir, f) for f in sorted(os.listdir(frames_dir)) |
| if f.endswith('.png')] |
| |
| return frames |
| |
| def _select_comic_panels(self, story_analysis: Dict) -> List[Dict]: |
| """Select the best panels for the comic""" |
| emotional_moments = story_analysis['emotional_moments'] |
| |
| |
| selected_panels = [] |
| used_emotions = set() |
| |
| |
| phases_needed = ['introduction', 'development', 'climax', 'resolution'] |
| |
| for phase in phases_needed: |
| phase_moments = [m for m in emotional_moments |
| if m['moment']['phase'] == phase] |
| |
| if phase_moments: |
| |
| for moment in phase_moments: |
| emotion = moment['dominant_emotion'] |
| |
| |
| if emotion not in used_emotions or moment['moment']['importance'] > 3: |
| selected_panels.append(moment) |
| used_emotions.add(emotion) |
| |
| if len(selected_panels) >= self.panel_target: |
| break |
| |
| |
| if len(selected_panels) < self.panel_target: |
| remaining = [m for m in emotional_moments if m not in selected_panels] |
| remaining.sort(key=lambda x: x['moment']['importance'], reverse=True) |
| selected_panels.extend(remaining[:self.panel_target - len(selected_panels)]) |
| |
| |
| selected_panels.sort(key=lambda x: x['moment']['timestamp']) |
| |
| |
| return selected_panels[:self.panel_target] |
| |
| def _create_comic_layout(self, comic_panels: List[Dict]) -> Dict: |
| """Create comic layout with panels and emotion-matched bubbles""" |
| pages = [] |
| panels_per_page = 4 |
| |
| for i in range(0, len(comic_panels), panels_per_page): |
| page_panels = comic_panels[i:i+panels_per_page] |
| |
| page = { |
| 'width': 800, |
| 'height': 600, |
| 'panels': [], |
| 'bubbles': [] |
| } |
| |
| |
| positions = [ |
| (10, 10, 380, 280), |
| (410, 10, 380, 280), |
| (10, 310, 380, 280), |
| (410, 310, 380, 280) |
| ] |
| |
| for j, panel_data in enumerate(page_panels): |
| if j >= 4: |
| break |
| |
| x, y, w, h = positions[j] |
| |
| |
| page['panels'].append({ |
| 'x': x, 'y': y, |
| 'width': w, 'height': h, |
| 'image': panel_data['frame'] or '/frames/frame000.png', |
| 'emotion': panel_data['dominant_emotion'] |
| }) |
| |
| |
| bubble_style = self._get_bubble_style(panel_data['dominant_emotion']) |
| |
| page['bubbles'].append({ |
| 'id': f'bubble_{i+j}', |
| 'x': x + 20, |
| 'y': y + 20, |
| 'width': 180, |
| 'height': 80, |
| 'text': panel_data['moment']['text'], |
| 'emotion': panel_data['dominant_emotion'], |
| 'style': bubble_style |
| }) |
| |
| pages.append(page) |
| |
| return { |
| 'pages': pages, |
| 'summary': comic_panels[0]['moment'].get('summary', ''), |
| 'total_panels': len(comic_panels) |
| } |
| |
| def _get_bubble_style(self, emotion: str) -> Dict: |
| """Get bubble style based on emotion""" |
| styles = { |
| 'happy': { |
| 'border_color': '#4CAF50', |
| 'background': '#E8F5E9', |
| 'font_size': 16, |
| 'border_width': 3 |
| }, |
| 'sad': { |
| 'border_color': '#2196F3', |
| 'background': '#E3F2FD', |
| 'font_size': 14, |
| 'border_width': 2 |
| }, |
| 'angry': { |
| 'border_color': '#F44336', |
| 'background': '#FFEBEE', |
| 'font_size': 18, |
| 'border_width': 4, |
| 'jagged': True |
| }, |
| 'surprised': { |
| 'border_color': '#FF9800', |
| 'background': '#FFF3E0', |
| 'font_size': 16, |
| 'border_width': 3, |
| 'exclamation': True |
| }, |
| 'fear': { |
| 'border_color': '#9C27B0', |
| 'background': '#F3E5F5', |
| 'font_size': 14, |
| 'border_width': 2, |
| 'wavy': True |
| }, |
| 'neutral': { |
| 'border_color': '#333', |
| 'background': '#FFF', |
| 'font_size': 14, |
| 'border_width': 2 |
| } |
| } |
| |
| return styles.get(emotion, styles['neutral']) |
| |
| def _save_comic(self, comic_data: Dict, output_dir: str): |
| """Save comic data and generate HTML""" |
| os.makedirs(output_dir, exist_ok=True) |
| |
| |
| with open(os.path.join(output_dir, 'smart_comic.json'), 'w') as f: |
| json.dump(comic_data, f, indent=2, default=str) |
| |
| |
| html = self._generate_html(comic_data) |
| with open(os.path.join(output_dir, 'smart_comic.html'), 'w') as f: |
| f.write(html) |
| |
| def _generate_html(self, comic_data: Dict) -> str: |
| """Generate HTML for the smart comic""" |
| html = """<!DOCTYPE html> |
| <html> |
| <head> |
| <title>Smart Comic - Emotion Matched</title> |
| <style> |
| body { margin: 0; padding: 20px; background: #f0f0f0; font-family: Arial, sans-serif; } |
| .comic-page { position: relative; background: white; margin: 20px auto; box-shadow: 0 4px 20px rgba(0,0,0,0.1); } |
| .comic-panel { position: absolute; border: 3px solid #333; overflow: hidden; } |
| .comic-panel img { width: 100%; height: 100%; object-fit: contain; background: #000; } |
| .speech-bubble { position: absolute; border-radius: 20px; padding: 15px; font-family: 'Comic Sans MS', cursive; font-weight: bold; text-align: center; z-index: 10; } |
| .emotion-happy { animation: bounce 2s infinite; } |
| .emotion-sad { opacity: 0.8; } |
| .emotion-angry { animation: shake 0.5s infinite; } |
| .emotion-surprised { transform: scale(1.1); } |
| @keyframes bounce { 0%, 100% { transform: translateY(0); } 50% { transform: translateY(-5px); } } |
| @keyframes shake { 0%, 100% { transform: translateX(0); } 25% { transform: translateX(-2px); } 75% { transform: translateX(2px); } } |
| </style> |
| </head> |
| <body> |
| """ |
| |
| for page in comic_data['pages']: |
| html += f'<div class="comic-page" style="width:{page["width"]}px;height:{page["height"]}px;">\n' |
| |
| for panel in page['panels']: |
| html += f'<div class="comic-panel" style="left:{panel["x"]}px;top:{panel["y"]}px;width:{panel["width"]}px;height:{panel["height"]}px;">' |
| html += f'<img src="{panel["image"]}" alt="Panel">' |
| html += '</div>\n' |
| |
| for bubble in page['bubbles']: |
| style = bubble.get('style', {}) |
| emotion_class = f"emotion-{bubble.get('emotion', 'neutral')}" |
| |
| html += f'<div class="speech-bubble {emotion_class}" style="' |
| html += f'left:{bubble["x"]}px;top:{bubble["y"]}px;' |
| html += f'width:{bubble["width"]}px;height:{bubble["height"]}px;' |
| html += f'border: {style.get("border_width", 2)}px solid {style.get("border_color", "#333")};' |
| html += f'background: {style.get("background", "#FFF")};' |
| html += f'font-size: {style.get("font_size", 14)}px;">' |
| html += bubble["text"] |
| html += '</div>\n' |
| |
| html += '</div>\n' |
| |
| html += """ |
| </body> |
| </html>""" |
| |
| return html |
|
|
| |
| def test_smart_comic(video_path='video/sample.mp4'): |
| """Test the smart comic generation""" |
| generator = SmartComicGenerator() |
| comic_data = generator.generate_smart_comic(video_path) |
| print("✅ Smart comic generated successfully!") |
| return comic_data |
|
|
| if __name__ == "__main__": |
| test_smart_comic() |