| """ |
| Phase 3: Validation to ensure generators use ONLY semantic plan, not raw prompt. |
| |
| This module provides validation functions to detect prompt leakage. |
| """ |
|
|
| from typing import Dict, Any, List, Tuple |
|
|
|
|
| def extract_keywords(text: str) -> set: |
| """Extract meaningful keywords from text (simple heuristic).""" |
| |
| stopwords = { |
| "the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for", |
| "of", "with", "by", "from", "as", "is", "are", "was", "were", "be", |
| "been", "being", "have", "has", "had", "do", "does", "did", "will", |
| "would", "should", "could", "may", "might", "must", "can", "this", |
| "that", "these", "those", "it", "its", "they", "them", "their", |
| } |
| |
| |
| words = text.lower().replace(",", " ").replace(".", " ").split() |
| keywords = {w.strip() for w in words if len(w) > 3 and w not in stopwords} |
| return keywords |
|
|
|
|
| def detect_prompt_leakage( |
| original_prompt: str, |
| plan: Dict[str, Any], |
| text_prompt: str, |
| image_prompt: str, |
| audio_prompt: str, |
| ) -> Dict[str, Any]: |
| """ |
| Detect if any generator prompt contains words from original prompt |
| that are NOT in the semantic plan. |
| |
| Returns: |
| { |
| "has_leakage": bool, |
| "leakage_details": { |
| "text": [...], |
| "image": [...], |
| "audio": [...] |
| }, |
| "plan_coverage": float # How much of original prompt is covered by plan |
| } |
| """ |
| original_keywords = extract_keywords(original_prompt) |
| |
| |
| plan_text = _plan_to_text(plan) |
| plan_keywords = extract_keywords(plan_text) |
| |
| |
| leakage_candidates = original_keywords - plan_keywords |
| |
| |
| text_keywords = extract_keywords(text_prompt) |
| image_keywords = extract_keywords(image_prompt) |
| audio_keywords = extract_keywords(audio_prompt) |
| |
| text_leakage = text_keywords & leakage_candidates |
| image_leakage = image_keywords & leakage_candidates |
| audio_leakage = audio_keywords & leakage_candidates |
| |
| has_leakage = bool(text_leakage or image_leakage or audio_leakage) |
| |
| |
| plan_coverage = len(plan_keywords & original_keywords) / max(len(original_keywords), 1) |
| |
| return { |
| "has_leakage": has_leakage, |
| "leakage_details": { |
| "text": sorted(text_leakage), |
| "image": sorted(image_leakage), |
| "audio": sorted(audio_leakage), |
| }, |
| "plan_coverage": round(plan_coverage, 4), |
| "original_keywords_count": len(original_keywords), |
| "plan_keywords_count": len(plan_keywords), |
| } |
|
|
|
|
| def _plan_to_text(plan: Dict[str, Any]) -> str: |
| """Convert semantic plan to text representation for keyword extraction.""" |
| parts = [] |
| |
| if isinstance(plan, dict): |
| |
| for key, value in plan.items(): |
| if isinstance(value, str): |
| parts.append(value) |
| elif isinstance(value, list): |
| parts.extend(str(v) for v in value if isinstance(v, str)) |
| elif isinstance(value, dict): |
| |
| parts.append(_plan_to_text(value)) |
| |
| return " ".join(parts) |
|
|
|
|
| def validate_conditioning_strictness( |
| original_prompt: str, |
| plan: Dict[str, Any], |
| modality_prompts: Dict[str, str], |
| ) -> Tuple[bool, str]: |
| """ |
| Validate that modality prompts are derived strictly from plan. |
| |
| Returns: |
| (is_valid, reason) |
| """ |
| validation = detect_prompt_leakage( |
| original_prompt=original_prompt, |
| plan=plan, |
| text_prompt=modality_prompts.get("text_prompt", ""), |
| image_prompt=modality_prompts.get("image_prompt", ""), |
| audio_prompt=modality_prompts.get("audio_prompt", ""), |
| ) |
| |
| if validation["has_leakage"]: |
| leakage = validation["leakage_details"] |
| reasons = [] |
| if leakage["text"]: |
| reasons.append(f"Text prompt leaks: {leakage['text']}") |
| if leakage["image"]: |
| reasons.append(f"Image prompt leaks: {leakage['image']}") |
| if leakage["audio"]: |
| reasons.append(f"Audio prompt leaks: {leakage['audio']}") |
| return False, "; ".join(reasons) |
| |
| return True, "All prompts derived from plan" |
|
|
|
|
| def check_plan_completeness(plan: Dict[str, Any]) -> Dict[str, bool]: |
| """Check if plan has all necessary fields for conditioning.""" |
| if isinstance(plan, dict): |
| has_scene = bool(plan.get("scene_summary") or plan.get("scene")) |
| has_visual = bool(plan.get("visual_attributes") or plan.get("visual_elements")) |
| has_audio = bool(plan.get("audio_elements") or plan.get("audio_intent")) |
| has_entities = bool(plan.get("primary_entities") or plan.get("main_subjects")) |
| |
| return { |
| "has_scene": has_scene, |
| "has_visual": has_visual, |
| "has_audio": has_audio, |
| "has_entities": has_entities, |
| "is_complete": has_scene and (has_visual or has_audio), |
| } |
| |
| return { |
| "has_scene": False, |
| "has_visual": False, |
| "has_audio": False, |
| "has_entities": False, |
| "is_complete": False, |
| } |
|
|