Spaces:
Sleeping
Sleeping
| import os | |
| import base64 | |
| from io import BytesIO | |
| from typing import List, Dict, Optional, Tuple | |
| import gradio as gr | |
| from openai import OpenAI | |
| from PIL import Image | |
| # ------------------------------------------------------------------- | |
| # App Metadata | |
| # ------------------------------------------------------------------- | |
| APP_TITLE = "ZEN AI Co. Module 2 | Agent Assembler" | |
| APP_DESCRIPTION = """ | |
| OpenAI-only teaching rig for building AI model UIs. | |
| • Uses GPT-5 for text generation. | |
| • Uses DALL·E 3 (with fallback to gpt-image-1) for image generation. | |
| • Lets you edit the system prompt, role, tone, and output format. | |
| • Provides sliders and controls to experiment with behavior. | |
| • Automatically generates images when the user asks for one, with an option | |
| to always generate images as well. | |
| """ | |
| DEFAULT_TEMPERATURE = 0.7 | |
| DEFAULT_TOP_P = 1.0 | |
| DEFAULT_MAX_TOKENS = 1024 | |
| DEFAULT_PRESENCE_PENALTY = 0.0 | |
| DEFAULT_FREQUENCY_PENALTY = 0.0 | |
| # ------------------------------------------------------------------- | |
| # OpenAI Client Helper | |
| # ------------------------------------------------------------------- | |
| def get_openai_client(key_override: Optional[str] = None) -> OpenAI: | |
| """ | |
| Returns an OpenAI client using either: | |
| 1) key from the UI override, or | |
| 2) OPENAI_API_KEY environment variable. | |
| """ | |
| api_key = (key_override or "").strip() or os.getenv("OPENAI_API_KEY", "").strip() | |
| if not api_key: | |
| raise ValueError( | |
| "OpenAI API key missing. Set OPENAI_API_KEY env var " | |
| "or paste it into the sidebar." | |
| ) | |
| return OpenAI(api_key=api_key) | |
| # ------------------------------------------------------------------- | |
| # Prompt & Style Helpers | |
| # ------------------------------------------------------------------- | |
| def build_system_instructions( | |
| user_system_prompt: str, | |
| assistant_role: str, | |
| output_mode: str, | |
| tone: str, | |
| temperature: float, | |
| top_p: float, | |
| presence_penalty: float, | |
| frequency_penalty: float, | |
| ) -> str: | |
| """ | |
| Build a system prompt string combining user-provided base instructions | |
| with role + format + tone + "virtual sampling" metadata. | |
| We encode the slider settings as behavior hints because some GPT-5 variants | |
| do not accept temperature/top_p/penalties as API parameters. | |
| """ | |
| role_map = { | |
| "General Assistant": "Behave as a highly capable, calm general-purpose AI assistant.", | |
| "Teacher / Instructor": "Behave as a patient educator. Explain concepts step-by-step and check for understanding.", | |
| "Engineer / Architect": "Behave as a senior engineer and systems architect. Be explicit, structured, and precise.", | |
| "Storyteller / Creative": "Behave as a creative storyteller. Use vivid but clear language while staying coherent.", | |
| } | |
| output_map = { | |
| "Standard Chat": "Respond like a normal chat, with paragraphs kept short and skimmable.", | |
| "Executive Report": ( | |
| "Respond as an executive brief with headings, short sections, and bullet points " | |
| "highlighting key decisions and risks." | |
| ), | |
| "Infographic Outline": ( | |
| "Respond as an infographic blueprint with section titles and short bullet lines. " | |
| "Focus on clarity and label-friendly phrases." | |
| ), | |
| "Bullet Summary": ( | |
| "Respond as a tight bullet summary (5–10 bullets) capturing only the most important details." | |
| ), | |
| } | |
| tone_map = { | |
| "Neutral": "Keep the tone neutral and globally understandable.", | |
| "Bold / Visionary": "Use confident, forward-looking language, but stay concrete and honest.", | |
| "Minimalist": "Be extremely concise. Prefer fewer words and high information density.", | |
| } | |
| sampling_hint = ( | |
| "SAMPLING HINTS FROM UI SLIDERS:\n" | |
| f"- Temperature slider: {temperature:.2f} (higher = more creative and speculative).\n" | |
| f"- Top-p slider: {top_p:.2f} (lower = more conservative).\n" | |
| f"- Presence penalty slider: {presence_penalty:.2f} (higher = encourage new topics).\n" | |
| f"- Frequency penalty slider: {frequency_penalty:.2f} (higher = reduce repetition).\n" | |
| "You must interpret these values as behavioral guidance even if the underlying " | |
| "model ignores sampling parameters." | |
| ) | |
| parts = [ | |
| (user_system_prompt or "").strip(), | |
| "", | |
| f"ASSISTANT ROLE: {role_map.get(assistant_role, '')}", | |
| f"OUTPUT MODE: {output_map.get(output_mode, '')}", | |
| f"TONE: {tone_map.get(tone, '')}", | |
| "", | |
| sampling_hint, | |
| "", | |
| "Always output clean Markdown.", | |
| ] | |
| return "\n".join(p for p in parts if p.strip()) | |
| def history_to_openai_messages( | |
| history_messages: List[Dict[str, str]], | |
| user_message: str, | |
| system_instructions: str, | |
| ) -> List[Dict[str, str]]: | |
| """ | |
| Convert Chatbot-style history (list of {role, content}) into an OpenAI | |
| messages list with an added system message and new user query. | |
| """ | |
| messages: List[Dict[str, str]] = [] | |
| if system_instructions: | |
| messages.append({"role": "system", "content": system_instructions}) | |
| # Reuse existing history | |
| for msg in history_messages: | |
| role = msg.get("role") | |
| content = msg.get("content", "") | |
| if role in ("user", "assistant", "system") and content: | |
| messages.append({"role": role, "content": content}) | |
| # New query | |
| messages.append({"role": "user", "content": user_message}) | |
| return messages | |
| # ------------------------------------------------------------------- | |
| # Text & Image Generation Helpers | |
| # ------------------------------------------------------------------- | |
| def call_openai_text( | |
| openai_key: Optional[str], | |
| messages: List[Dict[str, str]], | |
| max_tokens: int, | |
| ) -> str: | |
| """ | |
| Call GPT-5 via Chat Completions using only supported parameters: | |
| - model | |
| - messages | |
| - max_completion_tokens | |
| """ | |
| client = get_openai_client(openai_key) | |
| completion = client.chat.completions.create( | |
| model="gpt-5", # change to exact variant you have (e.g. "gpt-5.1-mini") if needed | |
| messages=messages, | |
| max_completion_tokens=max_tokens, | |
| ) | |
| return completion.choices[0].message.content | |
| def call_openai_image_with_fallback( | |
| openai_key: Optional[str], | |
| prompt: str, | |
| size: str = "1024x1024", | |
| ) -> Optional[Image.Image]: | |
| """ | |
| Try DALL·E 3 first. If it fails, fall back to gpt-image-1. | |
| We explicitly request base64 output and handle missing b64_json safely. | |
| """ | |
| client = get_openai_client(openai_key) | |
| last_error: Optional[Exception] = None | |
| for model_name in ["dall-e-3", "gpt-image-1"]: | |
| try: | |
| response = client.images.generate( | |
| model=model_name, | |
| prompt=prompt, | |
| size=size, | |
| n=1, | |
| quality="hd", # high quality | |
| response_format="b64_json", # ensure base64 output | |
| ) | |
| if not response.data: | |
| continue | |
| b64 = getattr(response.data[0], "b64_json", None) | |
| if not b64: | |
| # No base64 data; try next model | |
| continue | |
| img_bytes = base64.b64decode(b64) | |
| return Image.open(BytesIO(img_bytes)) | |
| except Exception as e: | |
| last_error = e | |
| # Try next model in the list if available | |
| continue | |
| if last_error: | |
| # Bubble up the last error so caller can log it or display a message | |
| raise last_error | |
| return None | |
| # ------------------------------------------------------------------- | |
| # Starter Prompts | |
| # ------------------------------------------------------------------- | |
| STARTER_PROMPTS = { | |
| "Explain AI Literacy to a 13-year-old": | |
| "Explain what AI literacy is to a 13-year-old who loves YouTube and video games. " | |
| "Use examples from their world and end with 3 practical things they can do this week.", | |
| "Executive Brief: AI Strategy for a Nonprofit": | |
| "Create an executive brief for a youth-serving nonprofit that wants to adopt AI tools. " | |
| "Include priorities, risks, and quick wins in under 800 words.", | |
| "Infographic Outline: ZEN AI Pioneer Program": | |
| "Create an infographic outline that explains the ZEN AI Pioneer Program: " | |
| "what it is, who it serves, what makes it historic, and 3 key stats. " | |
| "Make the sections short and label-friendly.", | |
| "Creative Image Prompt: Futuristic ZEN AI Lab": | |
| "Describe a futuristic but realistic ZEN AI Co. lab where youth are building their own AI tools. " | |
| "Focus on what the scene looks like so it can be turned into an illustration. " | |
| "End with a separate final paragraph that is ONLY the pure image prompt text.", | |
| "Debugging Prompt: Why is my model hallucinating?": | |
| "I built a small AI app and the model is hallucinating facts about my organization. " | |
| "Explain why that happens and propose a 3-layer mitigation strategy (prompting, retrieval, UX).", | |
| } | |
| def get_starter_prompt(choice: str) -> str: | |
| return STARTER_PROMPTS.get(choice, "") | |
| # ------------------------------------------------------------------- | |
| # Image Intent Detection | |
| # ------------------------------------------------------------------- | |
| def wants_image_from_text(text: str) -> bool: | |
| """ | |
| Heuristic to decide if the user is asking for an image. | |
| Triggers on phrases like: | |
| - "generate an image" | |
| - "create an image" | |
| - "make an image" | |
| - "image of" | |
| - "picture of" | |
| - "draw" | |
| - "illustration" | |
| - "infographic" | |
| - "poster" | |
| - "logo" | |
| - "cover art" | |
| - "thumbnail" | |
| But avoids when user explicitly says they do NOT want an image. | |
| """ | |
| t = text.lower() | |
| # Negative patterns | |
| negative_patterns = [ | |
| "don't generate an image", | |
| "dont generate an image", | |
| "don't create an image", | |
| "dont create an image", | |
| "no image", | |
| "no images", | |
| "without an image", | |
| ] | |
| if any(p in t for p in negative_patterns): | |
| return False | |
| positive_patterns = [ | |
| "generate an image", | |
| "create an image", | |
| "make an image", | |
| "generate a picture", | |
| "create a picture", | |
| "picture of", | |
| "image of", | |
| "draw ", | |
| "draw an", | |
| "draw a", | |
| "illustration", | |
| "infographic", | |
| "poster", | |
| "logo", | |
| "cover art", | |
| "thumbnail", | |
| "album art", | |
| ] | |
| return any(p in t for p in positive_patterns) | |
| # ------------------------------------------------------------------- | |
| # Core Chat Logic | |
| # ------------------------------------------------------------------- | |
| def agent_assembler_chat( | |
| user_message: str, | |
| chat_history: List[Dict[str, str]], | |
| openai_key_ui: str, | |
| system_prompt_ui: str, | |
| assistant_role: str, | |
| output_mode: str, | |
| tone: str, | |
| temperature: float, | |
| top_p: float, | |
| max_tokens: int, | |
| presence_penalty: float, | |
| frequency_penalty: float, | |
| always_generate_image: bool, | |
| image_style: str, | |
| image_aspect: str, | |
| ) -> Tuple[List[Dict[str, str]], Optional[Image.Image]]: | |
| """ | |
| Main callback: GPT-5 text + optional image generation. | |
| - Detects image intent from user text automatically. | |
| - Optionally always generates an image if the toggle is on. | |
| - chat_history is a list of messages: [{role, content}, ...] | |
| """ | |
| if not user_message.strip(): | |
| return chat_history, None | |
| # Build system instructions (including slider hints) | |
| system_instructions = build_system_instructions( | |
| user_system_prompt=system_prompt_ui, | |
| assistant_role=assistant_role, | |
| output_mode=output_mode, | |
| tone=tone, | |
| temperature=temperature, | |
| top_p=top_p, | |
| presence_penalty=presence_penalty, | |
| frequency_penalty=frequency_penalty, | |
| ) | |
| # Prepare messages for OpenAI | |
| openai_messages = history_to_openai_messages( | |
| history_messages=chat_history, | |
| user_message=user_message, | |
| system_instructions=system_instructions, | |
| ) | |
| # Call GPT-5 | |
| try: | |
| ai_reply = call_openai_text( | |
| openai_key=openai_key_ui, | |
| messages=openai_messages, | |
| max_tokens=max_tokens, | |
| ) | |
| except Exception as e: | |
| ai_reply = ( | |
| "There was an error calling GPT-5.\n\n" | |
| f"Short message: `{e}`\n\n" | |
| "Check that your API key is valid and that the model name matches " | |
| "what is available in your OpenAI account." | |
| ) | |
| # Update history | |
| chat_history = chat_history + [ | |
| {"role": "user", "content": user_message}, | |
| {"role": "assistant", "content": ai_reply}, | |
| ] | |
| # Decide whether to generate an image | |
| auto_image = wants_image_from_text(user_message) | |
| should_generate_image = always_generate_image or auto_image | |
| generated_image: Optional[Image.Image] = None | |
| if should_generate_image: | |
| # Map aspect label to image size | |
| aspect_to_size = { | |
| "Square (1:1)": "1024x1024", | |
| "Portrait (9:16)": "1024x1792", | |
| "Landscape (16:9)": "1792x1024", | |
| } | |
| size = aspect_to_size.get(image_aspect, "1024x1024") | |
| # Build image prompt | |
| image_prompt = ( | |
| f"{user_message.strip()}\n\n" | |
| f"IMAGE STYLE: {image_style}. " | |
| "High readability, clean composition, suitable for presentations or infographics." | |
| ) | |
| try: | |
| generated_image = call_openai_image_with_fallback( | |
| openai_key=openai_key_ui, | |
| prompt=image_prompt, | |
| size=size, | |
| ) | |
| if generated_image is None: | |
| # No explicit exception but no image either | |
| if chat_history and chat_history[-1].get("role") == "assistant": | |
| chat_history[-1]["content"] += ( | |
| "\n\n_Image generation returned no data. " | |
| "Check your OpenAI key and image model availability._" | |
| ) | |
| except Exception as e: | |
| # Attach error note to latest assistant message | |
| if chat_history and chat_history[-1].get("role") == "assistant": | |
| chat_history[-1]["content"] += ( | |
| f"\n\n_Image generation failed: `{e}`. " | |
| "Check your OpenAI key and dalle-3 / gpt-image-1 availability._" | |
| ) | |
| return chat_history, generated_image | |
| def clear_chat() -> Tuple[List[Dict[str, str]], Optional[Image.Image]]: | |
| """ | |
| Clear chat and image. | |
| """ | |
| return [], None | |
| # ------------------------------------------------------------------- | |
| # Gradio UI | |
| # ------------------------------------------------------------------- | |
| DEFAULT_SYSTEM_PROMPT = """You are ZEN AI Co.'s Agent Assembler. | |
| You help people understand how large language models and image models work | |
| by giving clear, practical, and honest answers. You are allowed to: | |
| - Explain how prompts, system messages, and parameters change behavior. | |
| - Suggest better prompts and show before/after improvements. | |
| - Design prompts for text and images that are copy-paste-ready. | |
| - Produce reports, outlines, and infographic blueprints. | |
| You must: | |
| - Avoid making up API capabilities that do not exist. | |
| - Be honest when you don't know something or lack context. | |
| - Keep spelling and formatting very precise, especially in prompts and labels. | |
| """ | |
| def build_interface() -> gr.Blocks: | |
| with gr.Blocks(title=APP_TITLE) as demo: | |
| gr.Markdown(f"# {APP_TITLE}") | |
| gr.Markdown(APP_DESCRIPTION) | |
| with gr.Row(): | |
| # LEFT: Chat + image + input | |
| with gr.Column(scale=3): | |
| chatbot = gr.Chatbot( | |
| label="Agent Assembler Chat", | |
| height=520, | |
| ) | |
| image_out = gr.Image( | |
| label="Latest Generated Image (DALL·E 3 / gpt-image-1)", | |
| height=320, | |
| interactive=False, | |
| ) | |
| user_input = gr.Textbox( | |
| label="Your message", | |
| placeholder="Ask a question, request a report, or describe a scene for image generation...", | |
| lines=3, | |
| ) | |
| with gr.Row(): | |
| send_btn = gr.Button("Send", variant="primary") | |
| clear_btn = gr.Button("Clear Chat") | |
| gr.Markdown("### Starter Prompts") | |
| starter_choice = gr.Dropdown( | |
| label="Pick a starter prompt to auto-fill the input", | |
| choices=list(STARTER_PROMPTS.keys()), | |
| value="Explain AI Literacy to a 13-year-old", | |
| ) | |
| starter_btn = gr.Button("Load Starter Prompt") | |
| # RIGHT: Controls / teaching panel | |
| with gr.Column(scale=2): | |
| gr.Markdown("## API & System Prompt") | |
| openai_key_ui = gr.Textbox( | |
| label="OpenAI API Key (optional; otherwise uses OPENAI_API_KEY env var)", | |
| type="password", | |
| ) | |
| system_prompt_ui = gr.Textbox( | |
| label="System / Model Instructions", | |
| value=DEFAULT_SYSTEM_PROMPT, | |
| lines=10, | |
| ) | |
| gr.Markdown("## Behavior & Style") | |
| assistant_role = gr.Radio( | |
| label="Assistant Role", | |
| choices=[ | |
| "General Assistant", | |
| "Teacher / Instructor", | |
| "Engineer / Architect", | |
| "Storyteller / Creative", | |
| ], | |
| value="General Assistant", | |
| ) | |
| output_mode = gr.Radio( | |
| label="Output Format", | |
| choices=[ | |
| "Standard Chat", | |
| "Executive Report", | |
| "Infographic Outline", | |
| "Bullet Summary", | |
| ], | |
| value="Standard Chat", | |
| ) | |
| tone = gr.Radio( | |
| label="Tone", | |
| choices=[ | |
| "Neutral", | |
| "Bold / Visionary", | |
| "Minimalist", | |
| ], | |
| value="Neutral", | |
| ) | |
| gr.Markdown("## Sampling (Experiment Zone)\n" | |
| "These are teaching controls; for some GPT-5 variants they only influence behavior via the system prompt.") | |
| temperature = gr.Slider( | |
| label="Temperature (creativity / randomness)", | |
| minimum=0.0, | |
| maximum=1.5, | |
| value=DEFAULT_TEMPERATURE, | |
| step=0.05, | |
| ) | |
| top_p = gr.Slider( | |
| label="Top-p (nucleus sampling)", | |
| minimum=0.1, | |
| maximum=1.0, | |
| value=DEFAULT_TOP_P, | |
| step=0.05, | |
| ) | |
| max_tokens = gr.Slider( | |
| label="Max completion tokens", | |
| minimum=128, | |
| maximum=4096, | |
| value=DEFAULT_MAX_TOKENS, | |
| step=128, | |
| ) | |
| presence_penalty = gr.Slider( | |
| label="Presence penalty (encourage new topics)", | |
| minimum=-2.0, | |
| maximum=2.0, | |
| value=DEFAULT_PRESENCE_PENALTY, | |
| step=0.1, | |
| ) | |
| frequency_penalty = gr.Slider( | |
| label="Frequency penalty (discourage repetition)", | |
| minimum=-2.0, | |
| maximum=2.0, | |
| value=DEFAULT_FREQUENCY_PENALTY, | |
| step=0.1, | |
| ) | |
| gr.Markdown("## Image Generation") | |
| always_generate_image = gr.Checkbox( | |
| label="Always generate an image for each message (in addition to auto-detect intent)", | |
| value=False, | |
| ) | |
| image_style = gr.Radio( | |
| label="Image Style", | |
| choices=[ | |
| "Futuristic glass UI dashboard", | |
| "Clean infographic illustration", | |
| "Soft watercolor concept art", | |
| "High-contrast comic / graphic novel", | |
| "Photorealistic lab / studio scene", | |
| ], | |
| value="Clean infographic illustration", | |
| ) | |
| image_aspect = gr.Radio( | |
| label="Aspect Ratio", | |
| choices=[ | |
| "Square (1:1)", | |
| "Portrait (9:16)", | |
| "Landscape (16:9)", | |
| ], | |
| value="Square (1:1)", | |
| ) | |
| # Shared chat state: list of messages (dicts) | |
| chat_state = gr.State([]) | |
| # Send button: main call | |
| send_btn.click( | |
| fn=agent_assembler_chat, | |
| inputs=[ | |
| user_input, | |
| chat_state, | |
| openai_key_ui, | |
| system_prompt_ui, | |
| assistant_role, | |
| output_mode, | |
| tone, | |
| temperature, | |
| top_p, | |
| max_tokens, | |
| presence_penalty, | |
| frequency_penalty, | |
| always_generate_image, | |
| image_style, | |
| image_aspect, | |
| ], | |
| outputs=[chatbot, image_out], | |
| ).then( | |
| fn=lambda msgs: (msgs, ""), # sync state, clear input | |
| inputs=chatbot, | |
| outputs=[chat_state, user_input], | |
| ) | |
| # Submit on Enter | |
| user_input.submit( | |
| fn=agent_assembler_chat, | |
| inputs=[ | |
| user_input, | |
| chat_state, | |
| openai_key_ui, | |
| system_prompt_ui, | |
| assistant_role, | |
| output_mode, | |
| tone, | |
| temperature, | |
| top_p, | |
| max_tokens, | |
| presence_penalty, | |
| frequency_penalty, | |
| always_generate_image, | |
| image_style, | |
| image_aspect, | |
| ], | |
| outputs=[chatbot, image_out], | |
| ).then( | |
| fn=lambda msgs: (msgs, ""), | |
| inputs=chatbot, | |
| outputs=[chat_state, user_input], | |
| ) | |
| # Clear chat | |
| clear_btn.click( | |
| fn=clear_chat, | |
| inputs=None, | |
| outputs=[chatbot, image_out], | |
| ).then( | |
| fn=lambda: [], | |
| inputs=None, | |
| outputs=chat_state, | |
| ) | |
| # Starter prompt loader | |
| def _load_starter(choice: str) -> str: | |
| return get_starter_prompt(choice) | |
| starter_btn.click( | |
| fn=_load_starter, | |
| inputs=[starter_choice], | |
| outputs=[user_input], | |
| ) | |
| return demo | |
| if __name__ == "__main__": | |
| demo = build_interface() | |
| demo.launch() | |