import os import base64 from io import BytesIO from typing import List, Dict, Optional, Tuple import gradio as gr from openai import OpenAI from PIL import Image # ------------------------------------------------------------------- # App Metadata # ------------------------------------------------------------------- APP_TITLE = "ZEN AI Co. Module 2 | Agent Assembler" APP_DESCRIPTION = """ OpenAI-only teaching rig for building AI model UIs. • Uses GPT-5 for text generation. • Uses DALL·E 3 (with fallback to gpt-image-1) for image generation. • Lets you edit the system prompt, role, tone, and output format. • Provides sliders and controls to experiment with behavior. • Automatically generates images when the user asks for one, with an option to always generate images as well. """ DEFAULT_TEMPERATURE = 0.7 DEFAULT_TOP_P = 1.0 DEFAULT_MAX_TOKENS = 1024 DEFAULT_PRESENCE_PENALTY = 0.0 DEFAULT_FREQUENCY_PENALTY = 0.0 # ------------------------------------------------------------------- # OpenAI Client Helper # ------------------------------------------------------------------- def get_openai_client(key_override: Optional[str] = None) -> OpenAI: """ Returns an OpenAI client using either: 1) key from the UI override, or 2) OPENAI_API_KEY environment variable. """ api_key = (key_override or "").strip() or os.getenv("OPENAI_API_KEY", "").strip() if not api_key: raise ValueError( "OpenAI API key missing. Set OPENAI_API_KEY env var " "or paste it into the sidebar." ) return OpenAI(api_key=api_key) # ------------------------------------------------------------------- # Prompt & Style Helpers # ------------------------------------------------------------------- def build_system_instructions( user_system_prompt: str, assistant_role: str, output_mode: str, tone: str, temperature: float, top_p: float, presence_penalty: float, frequency_penalty: float, ) -> str: """ Build a system prompt string combining user-provided base instructions with role + format + tone + "virtual sampling" metadata. We encode the slider settings as behavior hints because some GPT-5 variants do not accept temperature/top_p/penalties as API parameters. """ role_map = { "General Assistant": "Behave as a highly capable, calm general-purpose AI assistant.", "Teacher / Instructor": "Behave as a patient educator. Explain concepts step-by-step and check for understanding.", "Engineer / Architect": "Behave as a senior engineer and systems architect. Be explicit, structured, and precise.", "Storyteller / Creative": "Behave as a creative storyteller. Use vivid but clear language while staying coherent.", } output_map = { "Standard Chat": "Respond like a normal chat, with paragraphs kept short and skimmable.", "Executive Report": ( "Respond as an executive brief with headings, short sections, and bullet points " "highlighting key decisions and risks." ), "Infographic Outline": ( "Respond as an infographic blueprint with section titles and short bullet lines. " "Focus on clarity and label-friendly phrases." ), "Bullet Summary": ( "Respond as a tight bullet summary (5–10 bullets) capturing only the most important details." ), } tone_map = { "Neutral": "Keep the tone neutral and globally understandable.", "Bold / Visionary": "Use confident, forward-looking language, but stay concrete and honest.", "Minimalist": "Be extremely concise. Prefer fewer words and high information density.", } sampling_hint = ( "SAMPLING HINTS FROM UI SLIDERS:\n" f"- Temperature slider: {temperature:.2f} (higher = more creative and speculative).\n" f"- Top-p slider: {top_p:.2f} (lower = more conservative).\n" f"- Presence penalty slider: {presence_penalty:.2f} (higher = encourage new topics).\n" f"- Frequency penalty slider: {frequency_penalty:.2f} (higher = reduce repetition).\n" "You must interpret these values as behavioral guidance even if the underlying " "model ignores sampling parameters." ) parts = [ (user_system_prompt or "").strip(), "", f"ASSISTANT ROLE: {role_map.get(assistant_role, '')}", f"OUTPUT MODE: {output_map.get(output_mode, '')}", f"TONE: {tone_map.get(tone, '')}", "", sampling_hint, "", "Always output clean Markdown.", ] return "\n".join(p for p in parts if p.strip()) def history_to_openai_messages( history_messages: List[Dict[str, str]], user_message: str, system_instructions: str, ) -> List[Dict[str, str]]: """ Convert Chatbot-style history (list of {role, content}) into an OpenAI messages list with an added system message and new user query. """ messages: List[Dict[str, str]] = [] if system_instructions: messages.append({"role": "system", "content": system_instructions}) # Reuse existing history for msg in history_messages: role = msg.get("role") content = msg.get("content", "") if role in ("user", "assistant", "system") and content: messages.append({"role": role, "content": content}) # New query messages.append({"role": "user", "content": user_message}) return messages # ------------------------------------------------------------------- # Text & Image Generation Helpers # ------------------------------------------------------------------- def call_openai_text( openai_key: Optional[str], messages: List[Dict[str, str]], max_tokens: int, ) -> str: """ Call GPT-5 via Chat Completions using only supported parameters: - model - messages - max_completion_tokens """ client = get_openai_client(openai_key) completion = client.chat.completions.create( model="gpt-5", # change to exact variant you have (e.g. "gpt-5.1-mini") if needed messages=messages, max_completion_tokens=max_tokens, ) return completion.choices[0].message.content def call_openai_image_with_fallback( openai_key: Optional[str], prompt: str, size: str = "1024x1024", ) -> Optional[Image.Image]: """ Try DALL·E 3 first. If it fails, fall back to gpt-image-1. We explicitly request base64 output and handle missing b64_json safely. """ client = get_openai_client(openai_key) last_error: Optional[Exception] = None for model_name in ["dall-e-3", "gpt-image-1"]: try: response = client.images.generate( model=model_name, prompt=prompt, size=size, n=1, quality="hd", # high quality response_format="b64_json", # ensure base64 output ) if not response.data: continue b64 = getattr(response.data[0], "b64_json", None) if not b64: # No base64 data; try next model continue img_bytes = base64.b64decode(b64) return Image.open(BytesIO(img_bytes)) except Exception as e: last_error = e # Try next model in the list if available continue if last_error: # Bubble up the last error so caller can log it or display a message raise last_error return None # ------------------------------------------------------------------- # Starter Prompts # ------------------------------------------------------------------- STARTER_PROMPTS = { "Explain AI Literacy to a 13-year-old": "Explain what AI literacy is to a 13-year-old who loves YouTube and video games. " "Use examples from their world and end with 3 practical things they can do this week.", "Executive Brief: AI Strategy for a Nonprofit": "Create an executive brief for a youth-serving nonprofit that wants to adopt AI tools. " "Include priorities, risks, and quick wins in under 800 words.", "Infographic Outline: ZEN AI Pioneer Program": "Create an infographic outline that explains the ZEN AI Pioneer Program: " "what it is, who it serves, what makes it historic, and 3 key stats. " "Make the sections short and label-friendly.", "Creative Image Prompt: Futuristic ZEN AI Lab": "Describe a futuristic but realistic ZEN AI Co. lab where youth are building their own AI tools. " "Focus on what the scene looks like so it can be turned into an illustration. " "End with a separate final paragraph that is ONLY the pure image prompt text.", "Debugging Prompt: Why is my model hallucinating?": "I built a small AI app and the model is hallucinating facts about my organization. " "Explain why that happens and propose a 3-layer mitigation strategy (prompting, retrieval, UX).", } def get_starter_prompt(choice: str) -> str: return STARTER_PROMPTS.get(choice, "") # ------------------------------------------------------------------- # Image Intent Detection # ------------------------------------------------------------------- def wants_image_from_text(text: str) -> bool: """ Heuristic to decide if the user is asking for an image. Triggers on phrases like: - "generate an image" - "create an image" - "make an image" - "image of" - "picture of" - "draw" - "illustration" - "infographic" - "poster" - "logo" - "cover art" - "thumbnail" But avoids when user explicitly says they do NOT want an image. """ t = text.lower() # Negative patterns negative_patterns = [ "don't generate an image", "dont generate an image", "don't create an image", "dont create an image", "no image", "no images", "without an image", ] if any(p in t for p in negative_patterns): return False positive_patterns = [ "generate an image", "create an image", "make an image", "generate a picture", "create a picture", "picture of", "image of", "draw ", "draw an", "draw a", "illustration", "infographic", "poster", "logo", "cover art", "thumbnail", "album art", ] return any(p in t for p in positive_patterns) # ------------------------------------------------------------------- # Core Chat Logic # ------------------------------------------------------------------- def agent_assembler_chat( user_message: str, chat_history: List[Dict[str, str]], openai_key_ui: str, system_prompt_ui: str, assistant_role: str, output_mode: str, tone: str, temperature: float, top_p: float, max_tokens: int, presence_penalty: float, frequency_penalty: float, always_generate_image: bool, image_style: str, image_aspect: str, ) -> Tuple[List[Dict[str, str]], Optional[Image.Image]]: """ Main callback: GPT-5 text + optional image generation. - Detects image intent from user text automatically. - Optionally always generates an image if the toggle is on. - chat_history is a list of messages: [{role, content}, ...] """ if not user_message.strip(): return chat_history, None # Build system instructions (including slider hints) system_instructions = build_system_instructions( user_system_prompt=system_prompt_ui, assistant_role=assistant_role, output_mode=output_mode, tone=tone, temperature=temperature, top_p=top_p, presence_penalty=presence_penalty, frequency_penalty=frequency_penalty, ) # Prepare messages for OpenAI openai_messages = history_to_openai_messages( history_messages=chat_history, user_message=user_message, system_instructions=system_instructions, ) # Call GPT-5 try: ai_reply = call_openai_text( openai_key=openai_key_ui, messages=openai_messages, max_tokens=max_tokens, ) except Exception as e: ai_reply = ( "There was an error calling GPT-5.\n\n" f"Short message: `{e}`\n\n" "Check that your API key is valid and that the model name matches " "what is available in your OpenAI account." ) # Update history chat_history = chat_history + [ {"role": "user", "content": user_message}, {"role": "assistant", "content": ai_reply}, ] # Decide whether to generate an image auto_image = wants_image_from_text(user_message) should_generate_image = always_generate_image or auto_image generated_image: Optional[Image.Image] = None if should_generate_image: # Map aspect label to image size aspect_to_size = { "Square (1:1)": "1024x1024", "Portrait (9:16)": "1024x1792", "Landscape (16:9)": "1792x1024", } size = aspect_to_size.get(image_aspect, "1024x1024") # Build image prompt image_prompt = ( f"{user_message.strip()}\n\n" f"IMAGE STYLE: {image_style}. " "High readability, clean composition, suitable for presentations or infographics." ) try: generated_image = call_openai_image_with_fallback( openai_key=openai_key_ui, prompt=image_prompt, size=size, ) if generated_image is None: # No explicit exception but no image either if chat_history and chat_history[-1].get("role") == "assistant": chat_history[-1]["content"] += ( "\n\n_Image generation returned no data. " "Check your OpenAI key and image model availability._" ) except Exception as e: # Attach error note to latest assistant message if chat_history and chat_history[-1].get("role") == "assistant": chat_history[-1]["content"] += ( f"\n\n_Image generation failed: `{e}`. " "Check your OpenAI key and dalle-3 / gpt-image-1 availability._" ) return chat_history, generated_image def clear_chat() -> Tuple[List[Dict[str, str]], Optional[Image.Image]]: """ Clear chat and image. """ return [], None # ------------------------------------------------------------------- # Gradio UI # ------------------------------------------------------------------- DEFAULT_SYSTEM_PROMPT = """You are ZEN AI Co.'s Agent Assembler. You help people understand how large language models and image models work by giving clear, practical, and honest answers. You are allowed to: - Explain how prompts, system messages, and parameters change behavior. - Suggest better prompts and show before/after improvements. - Design prompts for text and images that are copy-paste-ready. - Produce reports, outlines, and infographic blueprints. You must: - Avoid making up API capabilities that do not exist. - Be honest when you don't know something or lack context. - Keep spelling and formatting very precise, especially in prompts and labels. """ def build_interface() -> gr.Blocks: with gr.Blocks(title=APP_TITLE) as demo: gr.Markdown(f"# {APP_TITLE}") gr.Markdown(APP_DESCRIPTION) with gr.Row(): # LEFT: Chat + image + input with gr.Column(scale=3): chatbot = gr.Chatbot( label="Agent Assembler Chat", height=520, ) image_out = gr.Image( label="Latest Generated Image (DALL·E 3 / gpt-image-1)", height=320, interactive=False, ) user_input = gr.Textbox( label="Your message", placeholder="Ask a question, request a report, or describe a scene for image generation...", lines=3, ) with gr.Row(): send_btn = gr.Button("Send", variant="primary") clear_btn = gr.Button("Clear Chat") gr.Markdown("### Starter Prompts") starter_choice = gr.Dropdown( label="Pick a starter prompt to auto-fill the input", choices=list(STARTER_PROMPTS.keys()), value="Explain AI Literacy to a 13-year-old", ) starter_btn = gr.Button("Load Starter Prompt") # RIGHT: Controls / teaching panel with gr.Column(scale=2): gr.Markdown("## API & System Prompt") openai_key_ui = gr.Textbox( label="OpenAI API Key (optional; otherwise uses OPENAI_API_KEY env var)", type="password", ) system_prompt_ui = gr.Textbox( label="System / Model Instructions", value=DEFAULT_SYSTEM_PROMPT, lines=10, ) gr.Markdown("## Behavior & Style") assistant_role = gr.Radio( label="Assistant Role", choices=[ "General Assistant", "Teacher / Instructor", "Engineer / Architect", "Storyteller / Creative", ], value="General Assistant", ) output_mode = gr.Radio( label="Output Format", choices=[ "Standard Chat", "Executive Report", "Infographic Outline", "Bullet Summary", ], value="Standard Chat", ) tone = gr.Radio( label="Tone", choices=[ "Neutral", "Bold / Visionary", "Minimalist", ], value="Neutral", ) gr.Markdown("## Sampling (Experiment Zone)\n" "These are teaching controls; for some GPT-5 variants they only influence behavior via the system prompt.") temperature = gr.Slider( label="Temperature (creativity / randomness)", minimum=0.0, maximum=1.5, value=DEFAULT_TEMPERATURE, step=0.05, ) top_p = gr.Slider( label="Top-p (nucleus sampling)", minimum=0.1, maximum=1.0, value=DEFAULT_TOP_P, step=0.05, ) max_tokens = gr.Slider( label="Max completion tokens", minimum=128, maximum=4096, value=DEFAULT_MAX_TOKENS, step=128, ) presence_penalty = gr.Slider( label="Presence penalty (encourage new topics)", minimum=-2.0, maximum=2.0, value=DEFAULT_PRESENCE_PENALTY, step=0.1, ) frequency_penalty = gr.Slider( label="Frequency penalty (discourage repetition)", minimum=-2.0, maximum=2.0, value=DEFAULT_FREQUENCY_PENALTY, step=0.1, ) gr.Markdown("## Image Generation") always_generate_image = gr.Checkbox( label="Always generate an image for each message (in addition to auto-detect intent)", value=False, ) image_style = gr.Radio( label="Image Style", choices=[ "Futuristic glass UI dashboard", "Clean infographic illustration", "Soft watercolor concept art", "High-contrast comic / graphic novel", "Photorealistic lab / studio scene", ], value="Clean infographic illustration", ) image_aspect = gr.Radio( label="Aspect Ratio", choices=[ "Square (1:1)", "Portrait (9:16)", "Landscape (16:9)", ], value="Square (1:1)", ) # Shared chat state: list of messages (dicts) chat_state = gr.State([]) # Send button: main call send_btn.click( fn=agent_assembler_chat, inputs=[ user_input, chat_state, openai_key_ui, system_prompt_ui, assistant_role, output_mode, tone, temperature, top_p, max_tokens, presence_penalty, frequency_penalty, always_generate_image, image_style, image_aspect, ], outputs=[chatbot, image_out], ).then( fn=lambda msgs: (msgs, ""), # sync state, clear input inputs=chatbot, outputs=[chat_state, user_input], ) # Submit on Enter user_input.submit( fn=agent_assembler_chat, inputs=[ user_input, chat_state, openai_key_ui, system_prompt_ui, assistant_role, output_mode, tone, temperature, top_p, max_tokens, presence_penalty, frequency_penalty, always_generate_image, image_style, image_aspect, ], outputs=[chatbot, image_out], ).then( fn=lambda msgs: (msgs, ""), inputs=chatbot, outputs=[chat_state, user_input], ) # Clear chat clear_btn.click( fn=clear_chat, inputs=None, outputs=[chatbot, image_out], ).then( fn=lambda: [], inputs=None, outputs=chat_state, ) # Starter prompt loader def _load_starter(choice: str) -> str: return get_starter_prompt(choice) starter_btn.click( fn=_load_starter, inputs=[starter_choice], outputs=[user_input], ) return demo if __name__ == "__main__": demo = build_interface() demo.launch()