""" ScriptProcessor - Orchestrates VO script processing using Gemini prompts. Flow: 1. split() - Uses vo_segment_splitter.md to break VO into segments 2. get_metadata() - Uses vo_segment_enricher.md to enrich each segment 3. match_video() - Uses vo_video_matcher.md to find matching library video 4. generate_prompt() - Uses vo_video_generator.md to create video gen spec """ import json import os from typing import List, Dict, Optional from pathlib import Path from src.logger_config import logger from google_src import ai_studio_sdk class ScriptProcessor: """Processes voice-over scripts through prompt-based pipeline.""" def __init__(self): self._prompt_dir = Path(__file__).parent.parent / "prompt" self._prompts = {} self._load_prompts() def _load_prompts(self): """Load all prompt templates from markdown files.""" prompt_files = { "split": "vo_segment_splitter.md", "metadata": "vo_segment_enricher.md", "match": "vo_video_matcher.md", "prompt_gen": "vo_video_generator.md" } for key, filename in prompt_files.items(): filepath = self._prompt_dir / filename if filepath.exists(): self._prompts[key] = filepath.read_text() logger.debug(f"Loaded prompt: {filename}") else: logger.warning(f"Prompt file not found: {filepath}") self._prompts[key] = "" def _call_gemini(self, prompt: str) -> str: """Call Gemini and return response text.""" response = ai_studio_sdk.generate(prompt) if not response: raise ValueError("Gemini returned empty response") return response.strip() def _parse_json(self, text: str) -> dict | list: """Parse JSON from Gemini response, handling markdown code blocks.""" # Strip markdown code blocks if present text = text.strip() if text.startswith("```json"): text = text[7:] elif text.startswith("```"): text = text[3:] if text.endswith("```"): text = text[:-3] return json.loads(text.strip()) def split(self, vo_script: str) -> List[str]: """ Split voice-over script into segments. Uses vo_segment_splitter.md prompt. Args: vo_script: Full voice-over script text. Returns: List of segment strings. """ logger.debug("ScriptProcessor: Splitting VO script into segments") prompt = self._prompts["split"].replace("{VO_SCRIPT}", vo_script) response = self._call_gemini(prompt) segments = self._parse_json(response) if not isinstance(segments, list): raise ValueError(f"Expected list of segments, got: {type(segments)}") logger.debug(f"ScriptProcessor: Split into {len(segments)} segments") return segments def get_metadata(self, segment: str) -> Dict: """ Get video metadata for a segment. Uses vo_segment_enricher.md prompt. Args: segment: Single segment text. Returns: Metadata dict with visual_intent, category, subjects, etc. """ logger.debug(f"ScriptProcessor: Getting metadata for: {segment[:50]}...") prompt = self._prompts["metadata"].replace("{SEGMENT_TEXT}", segment) response = self._call_gemini(prompt) metadata = self._parse_json(response) if not isinstance(metadata, dict): raise ValueError(f"Expected metadata dict, got: {type(metadata)}") logger.debug(f"ScriptProcessor: Metadata category={metadata.get('category')}") return metadata def match_video(self, metadata: Dict, library_items: List[Dict]) -> Dict: """ Try to match segment metadata against video library. Uses vo_video_matcher.md prompt. Args: metadata: Segment metadata from get_metadata(). library_items: List of video library items with match_keys. Returns: Match result dict with decision, matched_video_id, confidence, reason. """ logger.debug(f"ScriptProcessor: Matching against {len(library_items)} library items") prompt = self._prompts["match"] prompt = prompt.replace("{SEGMENT_METADATA_JSON}", json.dumps(metadata, indent=2)) prompt = prompt.replace("{VIDEO_LIBRARY_MATCH_KEYS_ARRAY_JSON}", json.dumps(library_items, indent=2)) response = self._call_gemini(prompt) result = self._parse_json(response) if not isinstance(result, dict): raise ValueError(f"Expected match result dict, got: {type(result)}") decision = result.get("decision", "no_match") confidence = result.get("confidence", 0) logger.debug(f"ScriptProcessor: Match decision={decision}, confidence={confidence}") return result def generate_prompt(self, metadata: Dict) -> Dict: """ Generate video generation specification. Uses vo_video_generator.md prompt. Args: metadata: Segment metadata from get_metadata(). Returns: Video generation spec with scene_prompt, match_keys, video_parameters, etc. """ logger.debug(f"ScriptProcessor: Generating video prompt for: {metadata.get('segment_text', '')[:50]}...") prompt = self._prompts["prompt_gen"].replace("{SEGMENT_METADATA_JSON}", json.dumps(metadata, indent=2)) response = self._call_gemini(prompt) spec = self._parse_json(response) if not isinstance(spec, dict): raise ValueError(f"Expected spec dict, got: {type(spec)}") logger.debug(f"ScriptProcessor: Generated prompt for scene_type={spec.get('video_parameters', {}).get('style')}") return spec def extract_match_keys(self, video_rows: List[Dict]) -> List[Dict]: """ Extract match keys from raw video library rows. Args: video_rows: List of dicts (raw rows from Google Sheet) Returns: List of dicts with video_id and match_keys. """ result = [] for idx, row in enumerate(video_rows): video_id = row.get("VIDEO_LINK", f"vid_{idx}") # Parse comma-separated fields back to lists subjects = [s.strip() for s in str(row.get("SUBJECTS", "")).split(",") if s.strip()] environment = [e.strip() for e in str(row.get("ENVIRONMENT", "")).split(",") if e.strip()] tone = [t.strip() for t in str(row.get("TONE", "")).split(",") if t.strip()] match_keys = { "abstract_level": row.get("ABSTRACT_LEVEL", ""), "scene_prompt": row.get("SCENE_PROMPT", ""), "system_prompt": row.get("SYSTEM_PROMPT", ""), "negative_prompt": row.get("NEGATIVE_PROMPT", ""), "segment_text": row.get("SEGMENT_TEXT", ""), "visual_intent": row.get("VISUAL_INTENT", ""), "category": row.get("CATEGORY", ""), "sub_category": row.get("SUB_CATEGORY", ""), "scene_type": row.get("SCENE_TYPE", ""), "subjects": subjects, "environment": environment, "tone": tone, } result.append({ "video_id": video_id, "match_keys": match_keys, }) return result def process_segment(self, segment: str, library_items: Optional[List[Dict]] = None) -> Dict: """ Process a single segment end-to-end. Args: segment: Segment text. library_items: Optional list of library items for matching. Returns: Dict with: - metadata: Segment metadata - match: Match result (or None if library empty) - prompt_spec: Video gen spec (or None if matched) """ result = {"segment": segment, "metadata": None, "match": None, "prompt_spec": None} # Get metadata result["metadata"] = self.get_metadata(segment) # Try matching if library not empty if library_items and len(library_items) > 0: result["match"] = self.match_video(result["metadata"], library_items) if result["match"].get("decision") == "reuse": logger.debug(f"ScriptProcessor: Reusing video {result['match'].get('matched_video_id')}") return result # Generate prompt for new video result["prompt_spec"] = self.generate_prompt(result["metadata"]) return result # Module-level singleton _script_processor: Optional[ScriptProcessor] = None def get_script_processor() -> ScriptProcessor: """Get singleton ScriptProcessor instance.""" global _script_processor if _script_processor is None: _script_processor = ScriptProcessor() return _script_processor def reset_script_processor() -> None: """Reset singleton (useful for testing).""" global _script_processor _script_processor = None