Spaces:

ZENLLC
/

Section2.11.2

Sleeping

Section2.11.2

File size: 23,450 Bytes

dad0db3

import os
import base64
from io import BytesIO
from typing import List, Dict, Optional, Tuple

import gradio as gr
from openai import OpenAI
from PIL import Image

# -------------------------------------------------------------------
# App Metadata
# -------------------------------------------------------------------

APP_TITLE = "ZEN AI Co. Module 2 | Agent Assembler"
APP_DESCRIPTION = """
OpenAI-only teaching rig for building AI model UIs.

• Uses GPT-5 for text generation.
• Uses DALL·E 3 (with fallback to gpt-image-1) for image generation.
• Lets you edit the system prompt, role, tone, and output format.
• Provides sliders and controls to experiment with behavior.
• Automatically generates images when the user asks for one, with an option
  to always generate images as well.
"""

DEFAULT_TEMPERATURE = 0.7
DEFAULT_TOP_P = 1.0
DEFAULT_MAX_TOKENS = 1024
DEFAULT_PRESENCE_PENALTY = 0.0
DEFAULT_FREQUENCY_PENALTY = 0.0

# -------------------------------------------------------------------
# OpenAI Client Helper
# -------------------------------------------------------------------

def get_openai_client(key_override: Optional[str] = None) -> OpenAI:
    """
    Returns an OpenAI client using either:
    1) key from the UI override, or
    2) OPENAI_API_KEY environment variable.
    """
    api_key = (key_override or "").strip() or os.getenv("OPENAI_API_KEY", "").strip()
    if not api_key:
        raise ValueError(
            "OpenAI API key missing. Set OPENAI_API_KEY env var "
            "or paste it into the sidebar."
        )
    return OpenAI(api_key=api_key)

# -------------------------------------------------------------------
# Prompt & Style Helpers
# -------------------------------------------------------------------

def build_system_instructions(
    user_system_prompt: str,
    assistant_role: str,
    output_mode: str,
    tone: str,
    temperature: float,
    top_p: float,
    presence_penalty: float,
    frequency_penalty: float,
) -> str:
    """
    Build a system prompt string combining user-provided base instructions
    with role + format + tone + "virtual sampling" metadata.

    We encode the slider settings as behavior hints because some GPT-5 variants
    do not accept temperature/top_p/penalties as API parameters.
    """
    role_map = {
        "General Assistant": "Behave as a highly capable, calm general-purpose AI assistant.",
        "Teacher / Instructor": "Behave as a patient educator. Explain concepts step-by-step and check for understanding.",
        "Engineer / Architect": "Behave as a senior engineer and systems architect. Be explicit, structured, and precise.",
        "Storyteller / Creative": "Behave as a creative storyteller. Use vivid but clear language while staying coherent.",
    }

    output_map = {
        "Standard Chat": "Respond like a normal chat, with paragraphs kept short and skimmable.",
        "Executive Report": (
            "Respond as an executive brief with headings, short sections, and bullet points "
            "highlighting key decisions and risks."
        ),
        "Infographic Outline": (
            "Respond as an infographic blueprint with section titles and short bullet lines. "
            "Focus on clarity and label-friendly phrases."
        ),
        "Bullet Summary": (
            "Respond as a tight bullet summary (5–10 bullets) capturing only the most important details."
        ),
    }

    tone_map = {
        "Neutral": "Keep the tone neutral and globally understandable.",
        "Bold / Visionary": "Use confident, forward-looking language, but stay concrete and honest.",
        "Minimalist": "Be extremely concise. Prefer fewer words and high information density.",
    }

    sampling_hint = (
        "SAMPLING HINTS FROM UI SLIDERS:\n"
        f"- Temperature slider: {temperature:.2f} (higher = more creative and speculative).\n"
        f"- Top-p slider: {top_p:.2f} (lower = more conservative).\n"
        f"- Presence penalty slider: {presence_penalty:.2f} (higher = encourage new topics).\n"
        f"- Frequency penalty slider: {frequency_penalty:.2f} (higher = reduce repetition).\n"
        "You must interpret these values as behavioral guidance even if the underlying "
        "model ignores sampling parameters."
    )

    parts = [
        (user_system_prompt or "").strip(),
        "",
        f"ASSISTANT ROLE: {role_map.get(assistant_role, '')}",
        f"OUTPUT MODE: {output_map.get(output_mode, '')}",
        f"TONE: {tone_map.get(tone, '')}",
        "",
        sampling_hint,
        "",
        "Always output clean Markdown.",
    ]
    return "\n".join(p for p in parts if p.strip())


def history_to_openai_messages(
    history_messages: List[Dict[str, str]],
    user_message: str,
    system_instructions: str,
) -> List[Dict[str, str]]:
    """
    Convert Chatbot-style history (list of {role, content}) into an OpenAI
    messages list with an added system message and new user query.
    """
    messages: List[Dict[str, str]] = []
    if system_instructions:
        messages.append({"role": "system", "content": system_instructions})

    # Reuse existing history
    for msg in history_messages:
        role = msg.get("role")
        content = msg.get("content", "")
        if role in ("user", "assistant", "system") and content:
            messages.append({"role": role, "content": content})

    # New query
    messages.append({"role": "user", "content": user_message})
    return messages

# -------------------------------------------------------------------
# Text & Image Generation Helpers
# -------------------------------------------------------------------

def call_openai_text(
    openai_key: Optional[str],
    messages: List[Dict[str, str]],
    max_tokens: int,
) -> str:
    """
    Call GPT-5 via Chat Completions using only supported parameters:
    - model
    - messages
    - max_completion_tokens
    """
    client = get_openai_client(openai_key)
    completion = client.chat.completions.create(
        model="gpt-5",  # change to exact variant you have (e.g. "gpt-5.1-mini") if needed
        messages=messages,
        max_completion_tokens=max_tokens,
    )
    return completion.choices[0].message.content


def call_openai_image_with_fallback(
    openai_key: Optional[str],
    prompt: str,
    size: str = "1024x1024",
) -> Optional[Image.Image]:
    """
    Try DALL·E 3 first. If it fails, fall back to gpt-image-1.
    We explicitly request base64 output and handle missing b64_json safely.
    """
    client = get_openai_client(openai_key)
    last_error: Optional[Exception] = None

    for model_name in ["dall-e-3", "gpt-image-1"]:
        try:
            response = client.images.generate(
                model=model_name,
                prompt=prompt,
                size=size,
                n=1,
                quality="hd",               # high quality
                response_format="b64_json", # ensure base64 output
            )
            if not response.data:
                continue

            b64 = getattr(response.data[0], "b64_json", None)
            if not b64:
                # No base64 data; try next model
                continue

            img_bytes = base64.b64decode(b64)
            return Image.open(BytesIO(img_bytes))
        except Exception as e:
            last_error = e
            # Try next model in the list if available
            continue

    if last_error:
        # Bubble up the last error so caller can log it or display a message
        raise last_error

    return None

# -------------------------------------------------------------------
# Starter Prompts
# -------------------------------------------------------------------

STARTER_PROMPTS = {
    "Explain AI Literacy to a 13-year-old":
        "Explain what AI literacy is to a 13-year-old who loves YouTube and video games. "
        "Use examples from their world and end with 3 practical things they can do this week.",

    "Executive Brief: AI Strategy for a Nonprofit":
        "Create an executive brief for a youth-serving nonprofit that wants to adopt AI tools. "
        "Include priorities, risks, and quick wins in under 800 words.",

    "Infographic Outline: ZEN AI Pioneer Program":
        "Create an infographic outline that explains the ZEN AI Pioneer Program: "
        "what it is, who it serves, what makes it historic, and 3 key stats. "
        "Make the sections short and label-friendly.",

    "Creative Image Prompt: Futuristic ZEN AI Lab":
        "Describe a futuristic but realistic ZEN AI Co. lab where youth are building their own AI tools. "
        "Focus on what the scene looks like so it can be turned into an illustration. "
        "End with a separate final paragraph that is ONLY the pure image prompt text.",

    "Debugging Prompt: Why is my model hallucinating?":
        "I built a small AI app and the model is hallucinating facts about my organization. "
        "Explain why that happens and propose a 3-layer mitigation strategy (prompting, retrieval, UX).",
}

def get_starter_prompt(choice: str) -> str:
    return STARTER_PROMPTS.get(choice, "")

# -------------------------------------------------------------------
# Image Intent Detection
# -------------------------------------------------------------------

def wants_image_from_text(text: str) -> bool:
    """
    Heuristic to decide if the user is asking for an image.

    Triggers on phrases like:
    - "generate an image"
    - "create an image"
    - "make an image"
    - "image of"
    - "picture of"
    - "draw"
    - "illustration"
    - "infographic"
    - "poster"
    - "logo"
    - "cover art"
    - "thumbnail"

    But avoids when user explicitly says they do NOT want an image.
    """
    t = text.lower()

    # Negative patterns
    negative_patterns = [
        "don't generate an image",
        "dont generate an image",
        "don't create an image",
        "dont create an image",
        "no image",
        "no images",
        "without an image",
    ]
    if any(p in t for p in negative_patterns):
        return False

    positive_patterns = [
        "generate an image",
        "create an image",
        "make an image",
        "generate a picture",
        "create a picture",
        "picture of",
        "image of",
        "draw ",
        "draw an",
        "draw a",
        "illustration",
        "infographic",
        "poster",
        "logo",
        "cover art",
        "thumbnail",
        "album art",
    ]

    return any(p in t for p in positive_patterns)

# -------------------------------------------------------------------
# Core Chat Logic
# -------------------------------------------------------------------

def agent_assembler_chat(
    user_message: str,
    chat_history: List[Dict[str, str]],
    openai_key_ui: str,
    system_prompt_ui: str,
    assistant_role: str,
    output_mode: str,
    tone: str,
    temperature: float,
    top_p: float,
    max_tokens: int,
    presence_penalty: float,
    frequency_penalty: float,
    always_generate_image: bool,
    image_style: str,
    image_aspect: str,
) -> Tuple[List[Dict[str, str]], Optional[Image.Image]]:
    """
    Main callback: GPT-5 text + optional image generation.

    - Detects image intent from user text automatically.
    - Optionally always generates an image if the toggle is on.
    - chat_history is a list of messages: [{role, content}, ...]
    """
    if not user_message.strip():
        return chat_history, None

    # Build system instructions (including slider hints)
    system_instructions = build_system_instructions(
        user_system_prompt=system_prompt_ui,
        assistant_role=assistant_role,
        output_mode=output_mode,
        tone=tone,
        temperature=temperature,
        top_p=top_p,
        presence_penalty=presence_penalty,
        frequency_penalty=frequency_penalty,
    )

    # Prepare messages for OpenAI
    openai_messages = history_to_openai_messages(
        history_messages=chat_history,
        user_message=user_message,
        system_instructions=system_instructions,
    )

    # Call GPT-5
    try:
        ai_reply = call_openai_text(
            openai_key=openai_key_ui,
            messages=openai_messages,
            max_tokens=max_tokens,
        )
    except Exception as e:
        ai_reply = (
            "There was an error calling GPT-5.\n\n"
            f"Short message: `{e}`\n\n"
            "Check that your API key is valid and that the model name matches "
            "what is available in your OpenAI account."
        )

    # Update history
    chat_history = chat_history + [
        {"role": "user", "content": user_message},
        {"role": "assistant", "content": ai_reply},
    ]

    # Decide whether to generate an image
    auto_image = wants_image_from_text(user_message)
    should_generate_image = always_generate_image or auto_image

    generated_image: Optional[Image.Image] = None
    if should_generate_image:
        # Map aspect label to image size
        aspect_to_size = {
            "Square (1:1)": "1024x1024",
            "Portrait (9:16)": "1024x1792",
            "Landscape (16:9)": "1792x1024",
        }
        size = aspect_to_size.get(image_aspect, "1024x1024")

        # Build image prompt
        image_prompt = (
            f"{user_message.strip()}\n\n"
            f"IMAGE STYLE: {image_style}. "
            "High readability, clean composition, suitable for presentations or infographics."
        )

        try:
            generated_image = call_openai_image_with_fallback(
                openai_key=openai_key_ui,
                prompt=image_prompt,
                size=size,
            )
            if generated_image is None:
                # No explicit exception but no image either
                if chat_history and chat_history[-1].get("role") == "assistant":
                    chat_history[-1]["content"] += (
                        "\n\n_Image generation returned no data. "
                        "Check your OpenAI key and image model availability._"
                    )
        except Exception as e:
            # Attach error note to latest assistant message
            if chat_history and chat_history[-1].get("role") == "assistant":
                chat_history[-1]["content"] += (
                    f"\n\n_Image generation failed: `{e}`. "
                    "Check your OpenAI key and dalle-3 / gpt-image-1 availability._"
                )

    return chat_history, generated_image


def clear_chat() -> Tuple[List[Dict[str, str]], Optional[Image.Image]]:
    """
    Clear chat and image.
    """
    return [], None

# -------------------------------------------------------------------
# Gradio UI
# -------------------------------------------------------------------

DEFAULT_SYSTEM_PROMPT = """You are ZEN AI Co.'s Agent Assembler.

You help people understand how large language models and image models work
by giving clear, practical, and honest answers. You are allowed to:

- Explain how prompts, system messages, and parameters change behavior.
- Suggest better prompts and show before/after improvements.
- Design prompts for text and images that are copy-paste-ready.
- Produce reports, outlines, and infographic blueprints.

You must:

- Avoid making up API capabilities that do not exist.
- Be honest when you don't know something or lack context.
- Keep spelling and formatting very precise, especially in prompts and labels.
"""

def build_interface() -> gr.Blocks:
    with gr.Blocks(title=APP_TITLE) as demo:
        gr.Markdown(f"# {APP_TITLE}")
        gr.Markdown(APP_DESCRIPTION)

        with gr.Row():
            # LEFT: Chat + image + input
            with gr.Column(scale=3):
                chatbot = gr.Chatbot(
                    label="Agent Assembler Chat",
                    height=520,
                )
                image_out = gr.Image(
                    label="Latest Generated Image (DALL·E 3 / gpt-image-1)",
                    height=320,
                    interactive=False,
                )

                user_input = gr.Textbox(
                    label="Your message",
                    placeholder="Ask a question, request a report, or describe a scene for image generation...",
                    lines=3,
                )

                with gr.Row():
                    send_btn = gr.Button("Send", variant="primary")
                    clear_btn = gr.Button("Clear Chat")

                gr.Markdown("### Starter Prompts")
                starter_choice = gr.Dropdown(
                    label="Pick a starter prompt to auto-fill the input",
                    choices=list(STARTER_PROMPTS.keys()),
                    value="Explain AI Literacy to a 13-year-old",
                )
                starter_btn = gr.Button("Load Starter Prompt")

            # RIGHT: Controls / teaching panel
            with gr.Column(scale=2):
                gr.Markdown("## API & System Prompt")

                openai_key_ui = gr.Textbox(
                    label="OpenAI API Key (optional; otherwise uses OPENAI_API_KEY env var)",
                    type="password",
                )

                system_prompt_ui = gr.Textbox(
                    label="System / Model Instructions",
                    value=DEFAULT_SYSTEM_PROMPT,
                    lines=10,
                )

                gr.Markdown("## Behavior & Style")

                assistant_role = gr.Radio(
                    label="Assistant Role",
                    choices=[
                        "General Assistant",
                        "Teacher / Instructor",
                        "Engineer / Architect",
                        "Storyteller / Creative",
                    ],
                    value="General Assistant",
                )

                output_mode = gr.Radio(
                    label="Output Format",
                    choices=[
                        "Standard Chat",
                        "Executive Report",
                        "Infographic Outline",
                        "Bullet Summary",
                    ],
                    value="Standard Chat",
                )

                tone = gr.Radio(
                    label="Tone",
                    choices=[
                        "Neutral",
                        "Bold / Visionary",
                        "Minimalist",
                    ],
                    value="Neutral",
                )

                gr.Markdown("## Sampling (Experiment Zone)\n"
                            "These are teaching controls; for some GPT-5 variants they only influence behavior via the system prompt.")

                temperature = gr.Slider(
                    label="Temperature (creativity / randomness)",
                    minimum=0.0,
                    maximum=1.5,
                    value=DEFAULT_TEMPERATURE,
                    step=0.05,
                )

                top_p = gr.Slider(
                    label="Top-p (nucleus sampling)",
                    minimum=0.1,
                    maximum=1.0,
                    value=DEFAULT_TOP_P,
                    step=0.05,
                )

                max_tokens = gr.Slider(
                    label="Max completion tokens",
                    minimum=128,
                    maximum=4096,
                    value=DEFAULT_MAX_TOKENS,
                    step=128,
                )

                presence_penalty = gr.Slider(
                    label="Presence penalty (encourage new topics)",
                    minimum=-2.0,
                    maximum=2.0,
                    value=DEFAULT_PRESENCE_PENALTY,
                    step=0.1,
                )

                frequency_penalty = gr.Slider(
                    label="Frequency penalty (discourage repetition)",
                    minimum=-2.0,
                    maximum=2.0,
                    value=DEFAULT_FREQUENCY_PENALTY,
                    step=0.1,
                )

                gr.Markdown("## Image Generation")

                always_generate_image = gr.Checkbox(
                    label="Always generate an image for each message (in addition to auto-detect intent)",
                    value=False,
                )

                image_style = gr.Radio(
                    label="Image Style",
                    choices=[
                        "Futuristic glass UI dashboard",
                        "Clean infographic illustration",
                        "Soft watercolor concept art",
                        "High-contrast comic / graphic novel",
                        "Photorealistic lab / studio scene",
                    ],
                    value="Clean infographic illustration",
                )

                image_aspect = gr.Radio(
                    label="Aspect Ratio",
                    choices=[
                        "Square (1:1)",
                        "Portrait (9:16)",
                        "Landscape (16:9)",
                    ],
                    value="Square (1:1)",
                )

        # Shared chat state: list of messages (dicts)
        chat_state = gr.State([])

        # Send button: main call
        send_btn.click(
            fn=agent_assembler_chat,
            inputs=[
                user_input,
                chat_state,
                openai_key_ui,
                system_prompt_ui,
                assistant_role,
                output_mode,
                tone,
                temperature,
                top_p,
                max_tokens,
                presence_penalty,
                frequency_penalty,
                always_generate_image,
                image_style,
                image_aspect,
            ],
            outputs=[chatbot, image_out],
        ).then(
            fn=lambda msgs: (msgs, ""),  # sync state, clear input
            inputs=chatbot,
            outputs=[chat_state, user_input],
        )

        # Submit on Enter
        user_input.submit(
            fn=agent_assembler_chat,
            inputs=[
                user_input,
                chat_state,
                openai_key_ui,
                system_prompt_ui,
                assistant_role,
                output_mode,
                tone,
                temperature,
                top_p,
                max_tokens,
                presence_penalty,
                frequency_penalty,
                always_generate_image,
                image_style,
                image_aspect,
            ],
            outputs=[chatbot, image_out],
        ).then(
            fn=lambda msgs: (msgs, ""),
            inputs=chatbot,
            outputs=[chat_state, user_input],
        )

        # Clear chat
        clear_btn.click(
            fn=clear_chat,
            inputs=None,
            outputs=[chatbot, image_out],
        ).then(
            fn=lambda: [],
            inputs=None,
            outputs=chat_state,
        )

        # Starter prompt loader
        def _load_starter(choice: str) -> str:
            return get_starter_prompt(choice)

        starter_btn.click(
            fn=_load_starter,
            inputs=[starter_choice],
            outputs=[user_input],
        )

    return demo


if __name__ == "__main__":
    demo = build_interface()
    demo.launch()