Spaces:
Paused
Paused
| """ | |
| ShortSmith v2 - Domain Presets Module | |
| Content domain configurations with optimized weights for: | |
| - Sports (audio-heavy: crowd noise, commentary) | |
| - Vlogs (visual-heavy: expressions, reactions) | |
| - Music (balanced: beat drops, performance) | |
| - Podcasts (audio-heavy: speech, emphasis) | |
| - Gaming (balanced: action, audio cues) | |
| """ | |
| from dataclasses import dataclass | |
| from typing import Dict, Optional | |
| from enum import Enum | |
| from utils.logger import get_logger | |
| logger = get_logger("scoring.domain_presets") | |
| class Domain(Enum): | |
| """Supported content domains.""" | |
| SPORTS = "sports" | |
| VLOGS = "vlogs" | |
| MUSIC = "music" | |
| PODCASTS = "podcasts" | |
| GAMING = "gaming" | |
| GENERAL = "general" | |
| class DomainPreset: | |
| """ | |
| Configuration preset for a content domain. | |
| Weights determine how much each signal contributes to the final score. | |
| All weights should sum to 1.0 for proper normalization. | |
| """ | |
| name: str | |
| visual_weight: float # Weight for visual analysis scores | |
| audio_weight: float # Weight for audio analysis scores | |
| motion_weight: float # Weight for motion detection scores | |
| person_weight: float # Weight for target person visibility | |
| # Thresholds | |
| hype_threshold: float # Minimum score to consider a highlight | |
| peak_threshold: float # Threshold for peak detection | |
| # Audio-specific settings | |
| prefer_speech: bool # Prioritize speech segments | |
| prefer_beats: bool # Prioritize beat drops/music | |
| # Description for UI | |
| description: str | |
| def __post_init__(self): | |
| """Validate and normalize weights.""" | |
| total = self.visual_weight + self.audio_weight + self.motion_weight + self.person_weight | |
| if total > 0 and abs(total - 1.0) > 0.01: | |
| # Normalize | |
| self.visual_weight /= total | |
| self.audio_weight /= total | |
| self.motion_weight /= total | |
| self.person_weight /= total | |
| logger.debug(f"Normalized weights for {self.name}") | |
| def get_weights(self) -> Dict[str, float]: | |
| """Get weights as dictionary.""" | |
| return { | |
| "visual": self.visual_weight, | |
| "audio": self.audio_weight, | |
| "motion": self.motion_weight, | |
| "person": self.person_weight, | |
| } | |
| def adjust_for_person_filter(self, enabled: bool) -> "DomainPreset": | |
| """ | |
| Adjust weights when person filtering is enabled/disabled. | |
| When person filtering is enabled, allocate some weight to person visibility. | |
| """ | |
| if not enabled and self.person_weight > 0: | |
| # Redistribute person weight | |
| extra = self.person_weight / 3 | |
| return DomainPreset( | |
| name=self.name, | |
| visual_weight=self.visual_weight + extra, | |
| audio_weight=self.audio_weight + extra, | |
| motion_weight=self.motion_weight + extra, | |
| person_weight=0.0, | |
| hype_threshold=self.hype_threshold, | |
| peak_threshold=self.peak_threshold, | |
| prefer_speech=self.prefer_speech, | |
| prefer_beats=self.prefer_beats, | |
| description=self.description, | |
| ) | |
| return self | |
| # Predefined domain presets | |
| PRESETS: Dict[Domain, DomainPreset] = { | |
| Domain.SPORTS: DomainPreset( | |
| name="Sports", | |
| visual_weight=0.30, | |
| audio_weight=0.45, | |
| motion_weight=0.15, | |
| person_weight=0.10, | |
| hype_threshold=0.4, | |
| peak_threshold=0.7, | |
| prefer_speech=False, | |
| prefer_beats=False, | |
| description="Optimized for sports content: crowd reactions, commentary highlights, action moments", | |
| ), | |
| Domain.VLOGS: DomainPreset( | |
| name="Vlogs", | |
| visual_weight=0.55, | |
| audio_weight=0.20, | |
| motion_weight=0.10, | |
| person_weight=0.15, | |
| hype_threshold=0.35, | |
| peak_threshold=0.65, | |
| prefer_speech=True, | |
| prefer_beats=False, | |
| description="Optimized for vlogs: facial expressions, reactions, storytelling moments", | |
| ), | |
| Domain.MUSIC: DomainPreset( | |
| name="Music", | |
| visual_weight=0.35, | |
| audio_weight=0.45, | |
| motion_weight=0.10, | |
| person_weight=0.10, | |
| hype_threshold=0.4, | |
| peak_threshold=0.7, | |
| prefer_speech=False, | |
| prefer_beats=True, | |
| description="Optimized for music content: beat drops, performance peaks, visual spectacle", | |
| ), | |
| Domain.PODCASTS: DomainPreset( | |
| name="Podcasts", | |
| visual_weight=0.10, | |
| audio_weight=0.75, | |
| motion_weight=0.05, | |
| person_weight=0.10, | |
| hype_threshold=0.3, | |
| peak_threshold=0.6, | |
| prefer_speech=True, | |
| prefer_beats=False, | |
| description="Optimized for podcasts: key statements, emotional moments, important points", | |
| ), | |
| Domain.GAMING: DomainPreset( | |
| name="Gaming", | |
| visual_weight=0.40, | |
| audio_weight=0.35, | |
| motion_weight=0.15, | |
| person_weight=0.10, | |
| hype_threshold=0.4, | |
| peak_threshold=0.7, | |
| prefer_speech=False, | |
| prefer_beats=False, | |
| description="Optimized for gaming: action sequences, reactions, achievement moments", | |
| ), | |
| Domain.GENERAL: DomainPreset( | |
| name="General", | |
| visual_weight=0.40, | |
| audio_weight=0.35, | |
| motion_weight=0.15, | |
| person_weight=0.10, | |
| hype_threshold=0.35, | |
| peak_threshold=0.65, | |
| prefer_speech=False, | |
| prefer_beats=False, | |
| description="Balanced preset for general content", | |
| ), | |
| } | |
| def get_domain_preset( | |
| domain: str | Domain, | |
| person_filter_enabled: bool = False, | |
| ) -> DomainPreset: | |
| """ | |
| Get the preset configuration for a domain. | |
| Args: | |
| domain: Domain name or enum value | |
| person_filter_enabled: Whether person filtering is active | |
| Returns: | |
| DomainPreset for the specified domain | |
| """ | |
| # Convert string to enum if needed | |
| if isinstance(domain, str): | |
| try: | |
| domain = Domain(domain.lower()) | |
| except ValueError: | |
| logger.warning(f"Unknown domain '{domain}', using GENERAL") | |
| domain = Domain.GENERAL | |
| preset = PRESETS.get(domain, PRESETS[Domain.GENERAL]) | |
| if person_filter_enabled: | |
| return preset | |
| else: | |
| return preset.adjust_for_person_filter(False) | |
| def list_domains() -> list[Dict[str, str]]: | |
| """ | |
| List available domains with descriptions. | |
| Returns: | |
| List of domain info dictionaries | |
| """ | |
| return [ | |
| { | |
| "id": domain.value, | |
| "name": preset.name, | |
| "description": preset.description, | |
| } | |
| for domain, preset in PRESETS.items() | |
| ] | |
| def create_custom_preset( | |
| name: str, | |
| visual: float = 0.4, | |
| audio: float = 0.35, | |
| motion: float = 0.15, | |
| person: float = 0.1, | |
| **kwargs, | |
| ) -> DomainPreset: | |
| """ | |
| Create a custom domain preset. | |
| Args: | |
| name: Preset name | |
| visual: Visual weight | |
| audio: Audio weight | |
| motion: Motion weight | |
| person: Person weight | |
| **kwargs: Additional preset parameters | |
| Returns: | |
| Custom DomainPreset | |
| """ | |
| return DomainPreset( | |
| name=name, | |
| visual_weight=visual, | |
| audio_weight=audio, | |
| motion_weight=motion, | |
| person_weight=person, | |
| hype_threshold=kwargs.get("hype_threshold", 0.35), | |
| peak_threshold=kwargs.get("peak_threshold", 0.65), | |
| prefer_speech=kwargs.get("prefer_speech", False), | |
| prefer_beats=kwargs.get("prefer_beats", False), | |
| description=kwargs.get("description", f"Custom preset: {name}"), | |
| ) | |
| # Export public interface | |
| __all__ = [ | |
| "Domain", | |
| "DomainPreset", | |
| "PRESETS", | |
| "get_domain_preset", | |
| "list_domains", | |
| "create_custom_preset", | |
| ] | |