MultiModal-Coherence-AI / src /planner /semantic_compiler.py
pratik-250620's picture
Upload folder using huggingface_hub
6835659 verified
from typing import Any, Dict, List
def _get_nested(plan: Dict[str, Any], *keys: str) -> Any:
cur: Any = plan
for key in keys:
if not isinstance(cur, dict) or key not in cur:
return None
cur = cur[key]
return cur
def _as_list(value: Any) -> List[str]:
if value is None:
return []
if isinstance(value, list):
return [str(v) for v in value if str(v).strip()]
if isinstance(value, str):
return [value] if value.strip() else []
return []
def _join(parts: List[str]) -> str:
cleaned = [p.strip() for p in parts if isinstance(p, str) and p.strip()]
return ", ".join(cleaned)
def compile_semantic_prompts(plan: Dict[str, Any]) -> Dict[str, str]:
"""
Turn a semantic plan into modality-locked prompts.
No creativity allowed here.
"""
core = plan.get("scene_summary") or ""
primary = _as_list(plan.get("primary_entities"))
if not primary:
primary = _as_list(_get_nested(plan, "core_semantics", "main_subjects"))
visuals = _as_list(plan.get("visual_attributes"))
if not visuals:
visuals = (
_as_list(_get_nested(plan, "style_controls", "visual_style"))
+ _as_list(_get_nested(plan, "style_controls", "color_palette"))
+ _as_list(_get_nested(plan, "style_controls", "lighting"))
+ _as_list(_get_nested(plan, "style_controls", "camera"))
+ _as_list(_get_nested(plan, "image_constraints", "environment_details"))
+ _as_list(_get_nested(plan, "image_constraints", "composition"))
)
mood = _as_list(plan.get("mood_emotion"))
if not mood:
mood = _as_list(_get_nested(plan, "style_controls", "mood_emotion"))
audio = _as_list(plan.get("audio_elements"))
if not audio:
audio = (
_as_list(_get_nested(plan, "audio_constraints", "sound_sources"))
+ _as_list(_get_nested(plan, "audio_constraints", "ambience"))
)
style = _as_list(plan.get("style"))
if not style:
style = _as_list(_get_nested(plan, "style_controls", "visual_style"))
text_prompt = (
"Describe the following scene clearly and literally:\n"
f"Scene: {core}\n"
f"Entities: {_join(primary)}\n"
f"Mood: {_join(mood)}\n"
f"Style: {_join(style)}\n"
"Do not add new elements."
)
image_prompt = (
f"{core}. "
f"Visual elements: {_join(visuals)}. "
f"Entities present: {_join(primary)}. "
f"Style: {_join(style)}. "
"No extra objects, no text, no symbols."
)
audio_prompt = (
f"Audio scene matching: {core}. "
f"Sound elements: {_join(audio)}. "
f"Mood: {_join(mood)}. "
"No music unless explicitly stated."
)
return {
"text": text_prompt,
"image": image_prompt,
"audio": audio_prompt,
}