Spaces:

pratik-250620
/

MultiModal-Coherence-AI

Running

File size: 2,915 Bytes
from typing import Any, Dict, List


def _get_nested(plan: Dict[str, Any], *keys: str) -> Any:
    cur: Any = plan
    for key in keys:
        if not isinstance(cur, dict) or key not in cur:
            return None
        cur = cur[key]
    return cur


def _as_list(value: Any) -> List[str]:
    if value is None:
        return []
    if isinstance(value, list):
        return [str(v) for v in value if str(v).strip()]
    if isinstance(value, str):
        return [value] if value.strip() else []
    return []


def _join(parts: List[str]) -> str:
    cleaned = [p.strip() for p in parts if isinstance(p, str) and p.strip()]
    return ", ".join(cleaned)


def compile_semantic_prompts(plan: Dict[str, Any]) -> Dict[str, str]:
    """
    Turn a semantic plan into modality-locked prompts.
    No creativity allowed here.
    """
    core = plan.get("scene_summary") or ""

    primary = _as_list(plan.get("primary_entities"))
    if not primary:
        primary = _as_list(_get_nested(plan, "core_semantics", "main_subjects"))

    visuals = _as_list(plan.get("visual_attributes"))
    if not visuals:
        visuals = (
            _as_list(_get_nested(plan, "style_controls", "visual_style"))
            + _as_list(_get_nested(plan, "style_controls", "color_palette"))
            + _as_list(_get_nested(plan, "style_controls", "lighting"))
            + _as_list(_get_nested(plan, "style_controls", "camera"))
            + _as_list(_get_nested(plan, "image_constraints", "environment_details"))
            + _as_list(_get_nested(plan, "image_constraints", "composition"))
        )

    mood = _as_list(plan.get("mood_emotion"))
    if not mood:
        mood = _as_list(_get_nested(plan, "style_controls", "mood_emotion"))

    audio = _as_list(plan.get("audio_elements"))
    if not audio:
        audio = (
            _as_list(_get_nested(plan, "audio_constraints", "sound_sources"))
            + _as_list(_get_nested(plan, "audio_constraints", "ambience"))
        )

    style = _as_list(plan.get("style"))
    if not style:
        style = _as_list(_get_nested(plan, "style_controls", "visual_style"))

    text_prompt = (
        "Describe the following scene clearly and literally:\n"
        f"Scene: {core}\n"
        f"Entities: {_join(primary)}\n"
        f"Mood: {_join(mood)}\n"
        f"Style: {_join(style)}\n"
        "Do not add new elements."
    )

    image_prompt = (
        f"{core}. "
        f"Visual elements: {_join(visuals)}. "
        f"Entities present: {_join(primary)}. "
        f"Style: {_join(style)}. "
        "No extra objects, no text, no symbols."
    )

    audio_prompt = (
        f"Audio scene matching: {core}. "
        f"Sound elements: {_join(audio)}. "
        f"Mood: {_join(mood)}. "
        "No music unless explicitly stated."
    )

    return {
        "text": text_prompt,
        "image": image_prompt,
        "audio": audio_prompt,
    }