dev_caio / scoring /domain_presets.py
Chaitanya-aitf's picture
Upload 30 files
c4ee290 verified
"""
ShortSmith v2 - Domain Presets Module
Content domain configurations with optimized weights for:
- Sports (audio-heavy: crowd noise, commentary)
- Vlogs (visual-heavy: expressions, reactions)
- Music (balanced: beat drops, performance)
- Podcasts (audio-heavy: speech, emphasis)
- Gaming (balanced: action, audio cues)
"""
from dataclasses import dataclass
from typing import Dict, Optional
from enum import Enum
from utils.logger import get_logger
logger = get_logger("scoring.domain_presets")
class Domain(Enum):
"""Supported content domains."""
SPORTS = "sports"
VLOGS = "vlogs"
MUSIC = "music"
PODCASTS = "podcasts"
GAMING = "gaming"
GENERAL = "general"
@dataclass
class DomainPreset:
"""
Configuration preset for a content domain.
Weights determine how much each signal contributes to the final score.
All weights should sum to 1.0 for proper normalization.
"""
name: str
visual_weight: float # Weight for visual analysis scores
audio_weight: float # Weight for audio analysis scores
motion_weight: float # Weight for motion detection scores
person_weight: float # Weight for target person visibility
# Thresholds
hype_threshold: float # Minimum score to consider a highlight
peak_threshold: float # Threshold for peak detection
# Audio-specific settings
prefer_speech: bool # Prioritize speech segments
prefer_beats: bool # Prioritize beat drops/music
# Description for UI
description: str
def __post_init__(self):
"""Validate and normalize weights."""
total = self.visual_weight + self.audio_weight + self.motion_weight + self.person_weight
if total > 0 and abs(total - 1.0) > 0.01:
# Normalize
self.visual_weight /= total
self.audio_weight /= total
self.motion_weight /= total
self.person_weight /= total
logger.debug(f"Normalized weights for {self.name}")
def get_weights(self) -> Dict[str, float]:
"""Get weights as dictionary."""
return {
"visual": self.visual_weight,
"audio": self.audio_weight,
"motion": self.motion_weight,
"person": self.person_weight,
}
def adjust_for_person_filter(self, enabled: bool) -> "DomainPreset":
"""
Adjust weights when person filtering is enabled/disabled.
When person filtering is enabled, allocate some weight to person visibility.
"""
if not enabled and self.person_weight > 0:
# Redistribute person weight
extra = self.person_weight / 3
return DomainPreset(
name=self.name,
visual_weight=self.visual_weight + extra,
audio_weight=self.audio_weight + extra,
motion_weight=self.motion_weight + extra,
person_weight=0.0,
hype_threshold=self.hype_threshold,
peak_threshold=self.peak_threshold,
prefer_speech=self.prefer_speech,
prefer_beats=self.prefer_beats,
description=self.description,
)
return self
# Predefined domain presets
PRESETS: Dict[Domain, DomainPreset] = {
Domain.SPORTS: DomainPreset(
name="Sports",
visual_weight=0.30,
audio_weight=0.45,
motion_weight=0.15,
person_weight=0.10,
hype_threshold=0.4,
peak_threshold=0.7,
prefer_speech=False,
prefer_beats=False,
description="Optimized for sports content: crowd reactions, commentary highlights, action moments",
),
Domain.VLOGS: DomainPreset(
name="Vlogs",
visual_weight=0.55,
audio_weight=0.20,
motion_weight=0.10,
person_weight=0.15,
hype_threshold=0.35,
peak_threshold=0.65,
prefer_speech=True,
prefer_beats=False,
description="Optimized for vlogs: facial expressions, reactions, storytelling moments",
),
Domain.MUSIC: DomainPreset(
name="Music",
visual_weight=0.35,
audio_weight=0.45,
motion_weight=0.10,
person_weight=0.10,
hype_threshold=0.4,
peak_threshold=0.7,
prefer_speech=False,
prefer_beats=True,
description="Optimized for music content: beat drops, performance peaks, visual spectacle",
),
Domain.PODCASTS: DomainPreset(
name="Podcasts",
visual_weight=0.10,
audio_weight=0.75,
motion_weight=0.05,
person_weight=0.10,
hype_threshold=0.3,
peak_threshold=0.6,
prefer_speech=True,
prefer_beats=False,
description="Optimized for podcasts: key statements, emotional moments, important points",
),
Domain.GAMING: DomainPreset(
name="Gaming",
visual_weight=0.40,
audio_weight=0.35,
motion_weight=0.15,
person_weight=0.10,
hype_threshold=0.4,
peak_threshold=0.7,
prefer_speech=False,
prefer_beats=False,
description="Optimized for gaming: action sequences, reactions, achievement moments",
),
Domain.GENERAL: DomainPreset(
name="General",
visual_weight=0.40,
audio_weight=0.35,
motion_weight=0.15,
person_weight=0.10,
hype_threshold=0.35,
peak_threshold=0.65,
prefer_speech=False,
prefer_beats=False,
description="Balanced preset for general content",
),
}
def get_domain_preset(
domain: str | Domain,
person_filter_enabled: bool = False,
) -> DomainPreset:
"""
Get the preset configuration for a domain.
Args:
domain: Domain name or enum value
person_filter_enabled: Whether person filtering is active
Returns:
DomainPreset for the specified domain
"""
# Convert string to enum if needed
if isinstance(domain, str):
try:
domain = Domain(domain.lower())
except ValueError:
logger.warning(f"Unknown domain '{domain}', using GENERAL")
domain = Domain.GENERAL
preset = PRESETS.get(domain, PRESETS[Domain.GENERAL])
if person_filter_enabled:
return preset
else:
return preset.adjust_for_person_filter(False)
def list_domains() -> list[Dict[str, str]]:
"""
List available domains with descriptions.
Returns:
List of domain info dictionaries
"""
return [
{
"id": domain.value,
"name": preset.name,
"description": preset.description,
}
for domain, preset in PRESETS.items()
]
def create_custom_preset(
name: str,
visual: float = 0.4,
audio: float = 0.35,
motion: float = 0.15,
person: float = 0.1,
**kwargs,
) -> DomainPreset:
"""
Create a custom domain preset.
Args:
name: Preset name
visual: Visual weight
audio: Audio weight
motion: Motion weight
person: Person weight
**kwargs: Additional preset parameters
Returns:
Custom DomainPreset
"""
return DomainPreset(
name=name,
visual_weight=visual,
audio_weight=audio,
motion_weight=motion,
person_weight=person,
hype_threshold=kwargs.get("hype_threshold", 0.35),
peak_threshold=kwargs.get("peak_threshold", 0.65),
prefer_speech=kwargs.get("prefer_speech", False),
prefer_beats=kwargs.get("prefer_beats", False),
description=kwargs.get("description", f"Custom preset: {name}"),
)
# Export public interface
__all__ = [
"Domain",
"DomainPreset",
"PRESETS",
"get_domain_preset",
"list_domains",
"create_custom_preset",
]