Tools / src /asset_manager /script_processor.py
jebin2's picture
refactor: Centralize logger import to src.logger_config across various modules.
f20025d
"""
ScriptProcessor - Orchestrates VO script processing using Gemini prompts.
Flow:
1. split() - Uses vo_segment_splitter.md to break VO into segments
2. get_metadata() - Uses vo_segment_enricher.md to enrich each segment
3. match_video() - Uses vo_video_matcher.md to find matching library video
4. generate_prompt() - Uses vo_video_generator.md to create video gen spec
"""
import json
import os
from typing import List, Dict, Optional
from pathlib import Path
from src.logger_config import logger
from google_src import ai_studio_sdk
class ScriptProcessor:
"""Processes voice-over scripts through prompt-based pipeline."""
def __init__(self):
self._prompt_dir = Path(__file__).parent.parent / "prompt"
self._prompts = {}
self._load_prompts()
def _load_prompts(self):
"""Load all prompt templates from markdown files."""
prompt_files = {
"split": "vo_segment_splitter.md",
"metadata": "vo_segment_enricher.md",
"match": "vo_video_matcher.md",
"prompt_gen": "vo_video_generator.md"
}
for key, filename in prompt_files.items():
filepath = self._prompt_dir / filename
if filepath.exists():
self._prompts[key] = filepath.read_text()
logger.debug(f"Loaded prompt: {filename}")
else:
logger.warning(f"Prompt file not found: {filepath}")
self._prompts[key] = ""
def _call_gemini(self, prompt: str) -> str:
"""Call Gemini and return response text."""
response = ai_studio_sdk.generate(prompt)
if not response:
raise ValueError("Gemini returned empty response")
return response.strip()
def _parse_json(self, text: str) -> dict | list:
"""Parse JSON from Gemini response, handling markdown code blocks."""
# Strip markdown code blocks if present
text = text.strip()
if text.startswith("```json"):
text = text[7:]
elif text.startswith("```"):
text = text[3:]
if text.endswith("```"):
text = text[:-3]
return json.loads(text.strip())
def split(self, vo_script: str) -> List[str]:
"""
Split voice-over script into segments.
Uses vo_segment_splitter.md prompt.
Args:
vo_script: Full voice-over script text.
Returns:
List of segment strings.
"""
logger.debug("ScriptProcessor: Splitting VO script into segments")
prompt = self._prompts["split"].replace("{VO_SCRIPT}", vo_script)
response = self._call_gemini(prompt)
segments = self._parse_json(response)
if not isinstance(segments, list):
raise ValueError(f"Expected list of segments, got: {type(segments)}")
logger.debug(f"ScriptProcessor: Split into {len(segments)} segments")
return segments
def get_metadata(self, segment: str) -> Dict:
"""
Get video metadata for a segment.
Uses vo_segment_enricher.md prompt.
Args:
segment: Single segment text.
Returns:
Metadata dict with visual_intent, category, subjects, etc.
"""
logger.debug(f"ScriptProcessor: Getting metadata for: {segment[:50]}...")
prompt = self._prompts["metadata"].replace("{SEGMENT_TEXT}", segment)
response = self._call_gemini(prompt)
metadata = self._parse_json(response)
if not isinstance(metadata, dict):
raise ValueError(f"Expected metadata dict, got: {type(metadata)}")
logger.debug(f"ScriptProcessor: Metadata category={metadata.get('category')}")
return metadata
def match_video(self, metadata: Dict, library_items: List[Dict]) -> Dict:
"""
Try to match segment metadata against video library.
Uses vo_video_matcher.md prompt.
Args:
metadata: Segment metadata from get_metadata().
library_items: List of video library items with match_keys.
Returns:
Match result dict with decision, matched_video_id, confidence, reason.
"""
logger.debug(f"ScriptProcessor: Matching against {len(library_items)} library items")
prompt = self._prompts["match"]
prompt = prompt.replace("{SEGMENT_METADATA_JSON}", json.dumps(metadata, indent=2))
prompt = prompt.replace("{VIDEO_LIBRARY_MATCH_KEYS_ARRAY_JSON}", json.dumps(library_items, indent=2))
response = self._call_gemini(prompt)
result = self._parse_json(response)
if not isinstance(result, dict):
raise ValueError(f"Expected match result dict, got: {type(result)}")
decision = result.get("decision", "no_match")
confidence = result.get("confidence", 0)
logger.debug(f"ScriptProcessor: Match decision={decision}, confidence={confidence}")
return result
def generate_prompt(self, metadata: Dict) -> Dict:
"""
Generate video generation specification.
Uses vo_video_generator.md prompt.
Args:
metadata: Segment metadata from get_metadata().
Returns:
Video generation spec with scene_prompt, match_keys, video_parameters, etc.
"""
logger.debug(f"ScriptProcessor: Generating video prompt for: {metadata.get('segment_text', '')[:50]}...")
prompt = self._prompts["prompt_gen"].replace("{SEGMENT_METADATA_JSON}", json.dumps(metadata, indent=2))
response = self._call_gemini(prompt)
spec = self._parse_json(response)
if not isinstance(spec, dict):
raise ValueError(f"Expected spec dict, got: {type(spec)}")
logger.debug(f"ScriptProcessor: Generated prompt for scene_type={spec.get('video_parameters', {}).get('style')}")
return spec
def extract_match_keys(self, video_rows: List[Dict]) -> List[Dict]:
"""
Extract match keys from raw video library rows.
Args:
video_rows: List of dicts (raw rows from Google Sheet)
Returns:
List of dicts with video_id and match_keys.
"""
result = []
for idx, row in enumerate(video_rows):
video_id = row.get("VIDEO_LINK", f"vid_{idx}")
# Parse comma-separated fields back to lists
subjects = [s.strip() for s in str(row.get("SUBJECTS", "")).split(",") if s.strip()]
environment = [e.strip() for e in str(row.get("ENVIRONMENT", "")).split(",") if e.strip()]
tone = [t.strip() for t in str(row.get("TONE", "")).split(",") if t.strip()]
match_keys = {
"abstract_level": row.get("ABSTRACT_LEVEL", ""),
"scene_prompt": row.get("SCENE_PROMPT", ""),
"system_prompt": row.get("SYSTEM_PROMPT", ""),
"negative_prompt": row.get("NEGATIVE_PROMPT", ""),
"segment_text": row.get("SEGMENT_TEXT", ""),
"visual_intent": row.get("VISUAL_INTENT", ""),
"category": row.get("CATEGORY", ""),
"sub_category": row.get("SUB_CATEGORY", ""),
"scene_type": row.get("SCENE_TYPE", ""),
"subjects": subjects,
"environment": environment,
"tone": tone,
}
result.append({
"video_id": video_id,
"match_keys": match_keys,
})
return result
def process_segment(self, segment: str, library_items: Optional[List[Dict]] = None) -> Dict:
"""
Process a single segment end-to-end.
Args:
segment: Segment text.
library_items: Optional list of library items for matching.
Returns:
Dict with:
- metadata: Segment metadata
- match: Match result (or None if library empty)
- prompt_spec: Video gen spec (or None if matched)
"""
result = {"segment": segment, "metadata": None, "match": None, "prompt_spec": None}
# Get metadata
result["metadata"] = self.get_metadata(segment)
# Try matching if library not empty
if library_items and len(library_items) > 0:
result["match"] = self.match_video(result["metadata"], library_items)
if result["match"].get("decision") == "reuse":
logger.debug(f"ScriptProcessor: Reusing video {result['match'].get('matched_video_id')}")
return result
# Generate prompt for new video
result["prompt_spec"] = self.generate_prompt(result["metadata"])
return result
# Module-level singleton
_script_processor: Optional[ScriptProcessor] = None
def get_script_processor() -> ScriptProcessor:
"""Get singleton ScriptProcessor instance."""
global _script_processor
if _script_processor is None:
_script_processor = ScriptProcessor()
return _script_processor
def reset_script_processor() -> None:
"""Reset singleton (useful for testing)."""
global _script_processor
_script_processor = None