Spaces:

Elvoro
/

Tools

Running

App Files Files Community

Tools / src /asset_manager /script_processor.py

jebin2

refactor: Centralize logger import to src.logger_config across various modules.

f20025d 3 days ago

raw

history blame contribute delete

9.61 kB

	"""
	ScriptProcessor - Orchestrates VO script processing using Gemini prompts.

	Flow:
	1. split() - Uses vo_segment_splitter.md to break VO into segments
	2. get_metadata() - Uses vo_segment_enricher.md to enrich each segment
	3. match_video() - Uses vo_video_matcher.md to find matching library video
	4. generate_prompt() - Uses vo_video_generator.md to create video gen spec
	"""

	import json
	import os
	from typing import List, Dict, Optional
	from pathlib import Path

	from src.logger_config import logger
	from google_src import ai_studio_sdk


	class ScriptProcessor:
	"""Processes voice-over scripts through prompt-based pipeline."""

	def __init__(self):
	self._prompt_dir = Path(__file__).parent.parent / "prompt"
	self._prompts = {}
	self._load_prompts()

	def _load_prompts(self):
	"""Load all prompt templates from markdown files."""
	prompt_files = {
	"split": "vo_segment_splitter.md",
	"metadata": "vo_segment_enricher.md",
	"match": "vo_video_matcher.md",
	"prompt_gen": "vo_video_generator.md"
	}

	for key, filename in prompt_files.items():
	filepath = self._prompt_dir / filename
	if filepath.exists():
	self._prompts[key] = filepath.read_text()
	logger.debug(f"Loaded prompt: {filename}")
	else:
	logger.warning(f"Prompt file not found: {filepath}")
	self._prompts[key] = ""

	def _call_gemini(self, prompt: str) -> str:
	"""Call Gemini and return response text."""
	response = ai_studio_sdk.generate(prompt)
	if not response:
	raise ValueError("Gemini returned empty response")
	return response.strip()

	def _parse_json(self, text: str) -> dict \| list:
	"""Parse JSON from Gemini response, handling markdown code blocks."""
	# Strip markdown code blocks if present
	text = text.strip()
	if text.startswith("```json"):
	text = text[7:]
	elif text.startswith("```"):
	text = text[3:]
	if text.endswith("```"):
	text = text[:-3]

	return json.loads(text.strip())

	def split(self, vo_script: str) -> List[str]:
	"""
	Split voice-over script into segments.

	Uses vo_segment_splitter.md prompt.

	Args:
	vo_script: Full voice-over script text.

	Returns:
	List of segment strings.
	"""
	logger.debug("ScriptProcessor: Splitting VO script into segments")

	prompt = self._prompts["split"].replace("{VO_SCRIPT}", vo_script)
	response = self._call_gemini(prompt)

	segments = self._parse_json(response)

	if not isinstance(segments, list):
	raise ValueError(f"Expected list of segments, got: {type(segments)}")

	logger.debug(f"ScriptProcessor: Split into {len(segments)} segments")
	return segments

	def get_metadata(self, segment: str) -> Dict:
	"""
	Get video metadata for a segment.

	Uses vo_segment_enricher.md prompt.

	Args:
	segment: Single segment text.

	Returns:
	Metadata dict with visual_intent, category, subjects, etc.
	"""
	logger.debug(f"ScriptProcessor: Getting metadata for: {segment[:50]}...")

	prompt = self._prompts["metadata"].replace("{SEGMENT_TEXT}", segment)
	response = self._call_gemini(prompt)

	metadata = self._parse_json(response)

	if not isinstance(metadata, dict):
	raise ValueError(f"Expected metadata dict, got: {type(metadata)}")

	logger.debug(f"ScriptProcessor: Metadata category={metadata.get('category')}")
	return metadata

	def match_video(self, metadata: Dict, library_items: List[Dict]) -> Dict:
	"""
	Try to match segment metadata against video library.

	Uses vo_video_matcher.md prompt.

	Args:
	metadata: Segment metadata from get_metadata().
	library_items: List of video library items with match_keys.

	Returns:
	Match result dict with decision, matched_video_id, confidence, reason.
	"""
	logger.debug(f"ScriptProcessor: Matching against {len(library_items)} library items")

	prompt = self._prompts["match"]
	prompt = prompt.replace("{SEGMENT_METADATA_JSON}", json.dumps(metadata, indent=2))
	prompt = prompt.replace("{VIDEO_LIBRARY_MATCH_KEYS_ARRAY_JSON}", json.dumps(library_items, indent=2))

	response = self._call_gemini(prompt)
	result = self._parse_json(response)

	if not isinstance(result, dict):
	raise ValueError(f"Expected match result dict, got: {type(result)}")

	decision = result.get("decision", "no_match")
	confidence = result.get("confidence", 0)
	logger.debug(f"ScriptProcessor: Match decision={decision}, confidence={confidence}")

	return result

	def generate_prompt(self, metadata: Dict) -> Dict:
	"""
	Generate video generation specification.

	Uses vo_video_generator.md prompt.

	Args:
	metadata: Segment metadata from get_metadata().

	Returns:
	Video generation spec with scene_prompt, match_keys, video_parameters, etc.
	"""
	logger.debug(f"ScriptProcessor: Generating video prompt for: {metadata.get('segment_text', '')[:50]}...")

	prompt = self._prompts["prompt_gen"].replace("{SEGMENT_METADATA_JSON}", json.dumps(metadata, indent=2))
	response = self._call_gemini(prompt)

	spec = self._parse_json(response)

	if not isinstance(spec, dict):
	raise ValueError(f"Expected spec dict, got: {type(spec)}")

	logger.debug(f"ScriptProcessor: Generated prompt for scene_type={spec.get('video_parameters', {}).get('style')}")
	return spec

	def extract_match_keys(self, video_rows: List[Dict]) -> List[Dict]:
	"""
	Extract match keys from raw video library rows.

	Args:
	video_rows: List of dicts (raw rows from Google Sheet)

	Returns:
	List of dicts with video_id and match_keys.
	"""
	result = []
	for idx, row in enumerate(video_rows):
	video_id = row.get("VIDEO_LINK", f"vid_{idx}")

	# Parse comma-separated fields back to lists
	subjects = [s.strip() for s in str(row.get("SUBJECTS", "")).split(",") if s.strip()]
	environment = [e.strip() for e in str(row.get("ENVIRONMENT", "")).split(",") if e.strip()]
	tone = [t.strip() for t in str(row.get("TONE", "")).split(",") if t.strip()]

	match_keys = {
	"abstract_level": row.get("ABSTRACT_LEVEL", ""),
	"scene_prompt": row.get("SCENE_PROMPT", ""),
	"system_prompt": row.get("SYSTEM_PROMPT", ""),
	"negative_prompt": row.get("NEGATIVE_PROMPT", ""),
	"segment_text": row.get("SEGMENT_TEXT", ""),
	"visual_intent": row.get("VISUAL_INTENT", ""),
	"category": row.get("CATEGORY", ""),
	"sub_category": row.get("SUB_CATEGORY", ""),
	"scene_type": row.get("SCENE_TYPE", ""),
	"subjects": subjects,
	"environment": environment,
	"tone": tone,
	}

	result.append({
	"video_id": video_id,
	"match_keys": match_keys,
	})

	return result

	def process_segment(self, segment: str, library_items: Optional[List[Dict]] = None) -> Dict:
	"""
	Process a single segment end-to-end.

	Args:
	segment: Segment text.
	library_items: Optional list of library items for matching.

	Returns:
	Dict with:
	- metadata: Segment metadata
	- match: Match result (or None if library empty)
	- prompt_spec: Video gen spec (or None if matched)
	"""
	result = {"segment": segment, "metadata": None, "match": None, "prompt_spec": None}

	# Get metadata
	result["metadata"] = self.get_metadata(segment)

	# Try matching if library not empty
	if library_items and len(library_items) > 0:
	result["match"] = self.match_video(result["metadata"], library_items)

	if result["match"].get("decision") == "reuse":
	logger.debug(f"ScriptProcessor: Reusing video {result['match'].get('matched_video_id')}")
	return result

	# Generate prompt for new video
	result["prompt_spec"] = self.generate_prompt(result["metadata"])
	return result


	# Module-level singleton
	_script_processor: Optional[ScriptProcessor] = None


	def get_script_processor() -> ScriptProcessor:
	"""Get singleton ScriptProcessor instance."""
	global _script_processor
	if _script_processor is None:
	_script_processor = ScriptProcessor()
	return _script_processor


	def reset_script_processor() -> None:
	"""Reset singleton (useful for testing)."""
	global _script_processor
	_script_processor = None