|
|
""" |
|
|
ScriptProcessor - Orchestrates VO script processing using Gemini prompts. |
|
|
|
|
|
Flow: |
|
|
1. split() - Uses vo_segment_splitter.md to break VO into segments |
|
|
2. get_metadata() - Uses vo_segment_enricher.md to enrich each segment |
|
|
3. match_video() - Uses vo_video_matcher.md to find matching library video |
|
|
4. generate_prompt() - Uses vo_video_generator.md to create video gen spec |
|
|
""" |
|
|
|
|
|
import json |
|
|
import os |
|
|
from typing import List, Dict, Optional |
|
|
from pathlib import Path |
|
|
|
|
|
from src.logger_config import logger |
|
|
from google_src import ai_studio_sdk |
|
|
|
|
|
|
|
|
class ScriptProcessor: |
|
|
"""Processes voice-over scripts through prompt-based pipeline.""" |
|
|
|
|
|
def __init__(self): |
|
|
self._prompt_dir = Path(__file__).parent.parent / "prompt" |
|
|
self._prompts = {} |
|
|
self._load_prompts() |
|
|
|
|
|
def _load_prompts(self): |
|
|
"""Load all prompt templates from markdown files.""" |
|
|
prompt_files = { |
|
|
"split": "vo_segment_splitter.md", |
|
|
"metadata": "vo_segment_enricher.md", |
|
|
"match": "vo_video_matcher.md", |
|
|
"prompt_gen": "vo_video_generator.md" |
|
|
} |
|
|
|
|
|
for key, filename in prompt_files.items(): |
|
|
filepath = self._prompt_dir / filename |
|
|
if filepath.exists(): |
|
|
self._prompts[key] = filepath.read_text() |
|
|
logger.debug(f"Loaded prompt: {filename}") |
|
|
else: |
|
|
logger.warning(f"Prompt file not found: {filepath}") |
|
|
self._prompts[key] = "" |
|
|
|
|
|
def _call_gemini(self, prompt: str) -> str: |
|
|
"""Call Gemini and return response text.""" |
|
|
response = ai_studio_sdk.generate(prompt) |
|
|
if not response: |
|
|
raise ValueError("Gemini returned empty response") |
|
|
return response.strip() |
|
|
|
|
|
def _parse_json(self, text: str) -> dict | list: |
|
|
"""Parse JSON from Gemini response, handling markdown code blocks.""" |
|
|
|
|
|
text = text.strip() |
|
|
if text.startswith("```json"): |
|
|
text = text[7:] |
|
|
elif text.startswith("```"): |
|
|
text = text[3:] |
|
|
if text.endswith("```"): |
|
|
text = text[:-3] |
|
|
|
|
|
return json.loads(text.strip()) |
|
|
|
|
|
def split(self, vo_script: str) -> List[str]: |
|
|
""" |
|
|
Split voice-over script into segments. |
|
|
|
|
|
Uses vo_segment_splitter.md prompt. |
|
|
|
|
|
Args: |
|
|
vo_script: Full voice-over script text. |
|
|
|
|
|
Returns: |
|
|
List of segment strings. |
|
|
""" |
|
|
logger.debug("ScriptProcessor: Splitting VO script into segments") |
|
|
|
|
|
prompt = self._prompts["split"].replace("{VO_SCRIPT}", vo_script) |
|
|
response = self._call_gemini(prompt) |
|
|
|
|
|
segments = self._parse_json(response) |
|
|
|
|
|
if not isinstance(segments, list): |
|
|
raise ValueError(f"Expected list of segments, got: {type(segments)}") |
|
|
|
|
|
logger.debug(f"ScriptProcessor: Split into {len(segments)} segments") |
|
|
return segments |
|
|
|
|
|
def get_metadata(self, segment: str) -> Dict: |
|
|
""" |
|
|
Get video metadata for a segment. |
|
|
|
|
|
Uses vo_segment_enricher.md prompt. |
|
|
|
|
|
Args: |
|
|
segment: Single segment text. |
|
|
|
|
|
Returns: |
|
|
Metadata dict with visual_intent, category, subjects, etc. |
|
|
""" |
|
|
logger.debug(f"ScriptProcessor: Getting metadata for: {segment[:50]}...") |
|
|
|
|
|
prompt = self._prompts["metadata"].replace("{SEGMENT_TEXT}", segment) |
|
|
response = self._call_gemini(prompt) |
|
|
|
|
|
metadata = self._parse_json(response) |
|
|
|
|
|
if not isinstance(metadata, dict): |
|
|
raise ValueError(f"Expected metadata dict, got: {type(metadata)}") |
|
|
|
|
|
logger.debug(f"ScriptProcessor: Metadata category={metadata.get('category')}") |
|
|
return metadata |
|
|
|
|
|
def match_video(self, metadata: Dict, library_items: List[Dict]) -> Dict: |
|
|
""" |
|
|
Try to match segment metadata against video library. |
|
|
|
|
|
Uses vo_video_matcher.md prompt. |
|
|
|
|
|
Args: |
|
|
metadata: Segment metadata from get_metadata(). |
|
|
library_items: List of video library items with match_keys. |
|
|
|
|
|
Returns: |
|
|
Match result dict with decision, matched_video_id, confidence, reason. |
|
|
""" |
|
|
logger.debug(f"ScriptProcessor: Matching against {len(library_items)} library items") |
|
|
|
|
|
prompt = self._prompts["match"] |
|
|
prompt = prompt.replace("{SEGMENT_METADATA_JSON}", json.dumps(metadata, indent=2)) |
|
|
prompt = prompt.replace("{VIDEO_LIBRARY_MATCH_KEYS_ARRAY_JSON}", json.dumps(library_items, indent=2)) |
|
|
|
|
|
response = self._call_gemini(prompt) |
|
|
result = self._parse_json(response) |
|
|
|
|
|
if not isinstance(result, dict): |
|
|
raise ValueError(f"Expected match result dict, got: {type(result)}") |
|
|
|
|
|
decision = result.get("decision", "no_match") |
|
|
confidence = result.get("confidence", 0) |
|
|
logger.debug(f"ScriptProcessor: Match decision={decision}, confidence={confidence}") |
|
|
|
|
|
return result |
|
|
|
|
|
def generate_prompt(self, metadata: Dict) -> Dict: |
|
|
""" |
|
|
Generate video generation specification. |
|
|
|
|
|
Uses vo_video_generator.md prompt. |
|
|
|
|
|
Args: |
|
|
metadata: Segment metadata from get_metadata(). |
|
|
|
|
|
Returns: |
|
|
Video generation spec with scene_prompt, match_keys, video_parameters, etc. |
|
|
""" |
|
|
logger.debug(f"ScriptProcessor: Generating video prompt for: {metadata.get('segment_text', '')[:50]}...") |
|
|
|
|
|
prompt = self._prompts["prompt_gen"].replace("{SEGMENT_METADATA_JSON}", json.dumps(metadata, indent=2)) |
|
|
response = self._call_gemini(prompt) |
|
|
|
|
|
spec = self._parse_json(response) |
|
|
|
|
|
if not isinstance(spec, dict): |
|
|
raise ValueError(f"Expected spec dict, got: {type(spec)}") |
|
|
|
|
|
logger.debug(f"ScriptProcessor: Generated prompt for scene_type={spec.get('video_parameters', {}).get('style')}") |
|
|
return spec |
|
|
|
|
|
def extract_match_keys(self, video_rows: List[Dict]) -> List[Dict]: |
|
|
""" |
|
|
Extract match keys from raw video library rows. |
|
|
|
|
|
Args: |
|
|
video_rows: List of dicts (raw rows from Google Sheet) |
|
|
|
|
|
Returns: |
|
|
List of dicts with video_id and match_keys. |
|
|
""" |
|
|
result = [] |
|
|
for idx, row in enumerate(video_rows): |
|
|
video_id = row.get("VIDEO_LINK", f"vid_{idx}") |
|
|
|
|
|
|
|
|
subjects = [s.strip() for s in str(row.get("SUBJECTS", "")).split(",") if s.strip()] |
|
|
environment = [e.strip() for e in str(row.get("ENVIRONMENT", "")).split(",") if e.strip()] |
|
|
tone = [t.strip() for t in str(row.get("TONE", "")).split(",") if t.strip()] |
|
|
|
|
|
match_keys = { |
|
|
"abstract_level": row.get("ABSTRACT_LEVEL", ""), |
|
|
"scene_prompt": row.get("SCENE_PROMPT", ""), |
|
|
"system_prompt": row.get("SYSTEM_PROMPT", ""), |
|
|
"negative_prompt": row.get("NEGATIVE_PROMPT", ""), |
|
|
"segment_text": row.get("SEGMENT_TEXT", ""), |
|
|
"visual_intent": row.get("VISUAL_INTENT", ""), |
|
|
"category": row.get("CATEGORY", ""), |
|
|
"sub_category": row.get("SUB_CATEGORY", ""), |
|
|
"scene_type": row.get("SCENE_TYPE", ""), |
|
|
"subjects": subjects, |
|
|
"environment": environment, |
|
|
"tone": tone, |
|
|
} |
|
|
|
|
|
result.append({ |
|
|
"video_id": video_id, |
|
|
"match_keys": match_keys, |
|
|
}) |
|
|
|
|
|
return result |
|
|
|
|
|
def process_segment(self, segment: str, library_items: Optional[List[Dict]] = None) -> Dict: |
|
|
""" |
|
|
Process a single segment end-to-end. |
|
|
|
|
|
Args: |
|
|
segment: Segment text. |
|
|
library_items: Optional list of library items for matching. |
|
|
|
|
|
Returns: |
|
|
Dict with: |
|
|
- metadata: Segment metadata |
|
|
- match: Match result (or None if library empty) |
|
|
- prompt_spec: Video gen spec (or None if matched) |
|
|
""" |
|
|
result = {"segment": segment, "metadata": None, "match": None, "prompt_spec": None} |
|
|
|
|
|
|
|
|
result["metadata"] = self.get_metadata(segment) |
|
|
|
|
|
|
|
|
if library_items and len(library_items) > 0: |
|
|
result["match"] = self.match_video(result["metadata"], library_items) |
|
|
|
|
|
if result["match"].get("decision") == "reuse": |
|
|
logger.debug(f"ScriptProcessor: Reusing video {result['match'].get('matched_video_id')}") |
|
|
return result |
|
|
|
|
|
|
|
|
result["prompt_spec"] = self.generate_prompt(result["metadata"]) |
|
|
return result |
|
|
|
|
|
|
|
|
|
|
|
_script_processor: Optional[ScriptProcessor] = None |
|
|
|
|
|
|
|
|
def get_script_processor() -> ScriptProcessor: |
|
|
"""Get singleton ScriptProcessor instance.""" |
|
|
global _script_processor |
|
|
if _script_processor is None: |
|
|
_script_processor = ScriptProcessor() |
|
|
return _script_processor |
|
|
|
|
|
|
|
|
def reset_script_processor() -> None: |
|
|
"""Reset singleton (useful for testing).""" |
|
|
global _script_processor |
|
|
_script_processor = None |
|
|
|