Spaces:

ZENLLC
/

AgentAssembler

Sleeping

App Files Files Community

ZENLLC commited on Nov 25, 2025

Commit

9d0ccc3

verified ·

1 Parent(s): d199647

Create app.py

Browse files

Files changed (1) hide show

app.py +601 -0

app.py ADDED Viewed

	@@ -0,0 +1,601 @@

+import os
+import base64
+from io import BytesIO
+from typing import List, Tuple, Optional
+import gradio as gr
+from openai import OpenAI
+from google import genai
+from google.genai import types
+from PIL import Image
+# -------------------------------------------------------------------
+# Config
+# -------------------------------------------------------------------
+APP_TITLE = "ZEN AI Co. Module 2 | Agent Assembler"
+APP_DESCRIPTION = """
+Multi-model agent that can chat, draft reports, generate infographic briefs,
+and create images using GPT-5, Gemini 2.5 Pro, Gemini 3 Pro, Nano Banana,
+Nano Banana Pro, and DALL·E 3.
+"""
+# Reasonable defaults if user doesn't touch sliders
+DEFAULT_TEMPERATURE = 0.6
+DEFAULT_MAX_TOKENS = 1024
+# -------------------------------------------------------------------
+# Helpers: API clients
+# -------------------------------------------------------------------
+def get_openai_client(key_override: Optional[str] = None) -> OpenAI:
+    """
+    Returns an OpenAI client using either:
+    1) key from the UI override, or
+    2) OPENAI_API_KEY environment variable.
+    This satisfies the “two places for API keys” requirement.
+    """
+    api_key = (key_override or "").strip() or os.getenv("OPENAI_API_KEY", "").strip()
+    if not api_key:
+        raise ValueError(
+            "OpenAI API key missing. "
+            "Either set OPENAI_API_KEY env var or paste it in the sidebar."
+        )
+    return OpenAI(api_key=api_key)
+def get_google_client(key_override: Optional[str] = None) -> genai.Client:
+    """
+    Returns a Google GenAI client using either:
+    1) key from the UI override, or
+    2) GOOGLE_API_KEY environment variable.
+    """
+    api_key = (key_override or "").strip() or os.getenv("GOOGLE_API_KEY", "").strip()
+    if not api_key:
+        raise ValueError(
+            "Google Gemini API key missing. "
+            "Either set GOOGLE_API_KEY env var or paste it in the sidebar."
+        )
+    return genai.Client(api_key=api_key)
+# -------------------------------------------------------------------
+# Helpers: Prompt & style shaping
+# -------------------------------------------------------------------
+def build_system_instructions(
+    base_instructions: str,
+    theme: str,
+    output_mode: str,
+    tone: str,
+) -> str:
+    """
+    Builds a strong system prompt that shapes behavior according to theme,
+    output mode, and tone.
+    """
+    theme_map = {
+        "ZEN Dark": "Use a sleek, modern, slightly futuristic tone. Be concise but high signal.",
+        "ZEN Light": "Use a clear, friendly, educational tone suitable for learners of all ages.",
+        "Research / Technical": "Write like a senior research engineer: rigorous, structured, and explicit.",
+        "Youth AI Pioneer": "Explain things in simple, motivating language suitable for ages 11–18, "
+                            "but never dumb it down.",
+    }
+    output_map = {
+        "Standard Chat": "Respond like a normal assistant, but keep paragraphs tight and skimmable.",
+        "Executive Report": "Respond as a structured executive brief with headings, bullets, and 1–2 sentence insights.",
+        "Infographic Outline": "Respond as a bullet-point infographic blueprint with short, punchy lines and clear sections.",
+        "Bullet Summary": "Respond as a compact bullet summary with 5–10 bullets max.",
+    }
+    tone_map = {
+        "Neutral": "Keep style neutral and globally understandable.",
+        "Bold / Visionary": "Lean into visionary, high-energy language while staying precise and concrete.",
+        "Minimalist": "Be extremely concise; prioritize clarity over flourish.",
+    }
+    parts = [
+        base_instructions.strip(),
+        "",
+        f"STYLE THEME: {theme_map.get(theme, '')}",
+        f"OUTPUT MODE: {output_map.get(output_mode, '')}",
+        f"TONE: {tone_map.get(tone, '')}",
+        "",
+        "Always format results cleanly in Markdown.",
+    ]
+    return "\n".join(p for p in parts if p.strip())
+def history_to_messages(
+    history: List[Tuple[str, str]],
+    user_message: str,
+    system_instructions: str,
+) -> List[dict]:
+    """
+    Converts Gradio Chatbot history into OpenAI-style messages.
+    """
+    messages: List[dict] = []
+    if system_instructions:
+        messages.append({"role": "system", "content": system_instructions})
+    for user, bot in history:
+        if user:
+            messages.append({"role": "user", "content": user})
+        if bot:
+            messages.append({"role": "assistant", "content": bot})
+    messages.append({"role": "user", "content": user_message})
+    return messages
+def history_to_gemini_prompt(
+    history: List[Tuple[str, str]],
+    user_message: str,
+    system_instructions: str,
+) -> str:
+    """
+    Flattens history into a single text prompt for Gemini.
+    """
+    lines = []
+    if system_instructions:
+        lines.append(f"SYSTEM:\n{system_instructions}\n")
+    for u, a in history:
+        if u:
+            lines.append(f"USER: {u}")
+        if a:
+            lines.append(f"ASSISTANT: {a}")
+    lines.append(f"USER: {user_message}")
+    lines.append("ASSISTANT:")
+    return "\n\n".join(lines)
+# -------------------------------------------------------------------
+# Helpers: Model calls (text)
+# -------------------------------------------------------------------
+def call_openai_text(
+    openai_key: Optional[str],
+    messages: List[dict],
+    temperature: float,
+    max_tokens: int,
+) -> str:
+    client = get_openai_client(openai_key)
+    completion = client.chat.completions.create(
+        model="gpt-5",  # You can change to gpt-5.1 or whatever is available in your project
+        messages=messages,
+        temperature=temperature,
+        max_tokens=max_tokens,
+    )
+    return completion.choices[0].message.content
+def call_gemini_text(
+    google_key: Optional[str],
+    model_id: str,
+    prompt: str,
+    temperature: float,
+    max_tokens: int,
+) -> str:
+    client = get_google_client(google_key)
+    response = client.models.generate_content(
+        model=model_id,
+        contents=[prompt],
+        config=types.GenerateContentConfig(
+            temperature=temperature,
+            max_output_tokens=max_tokens,
+        ),
+    )
+    return response.text
+def call_hybrid_text(
+    openai_key: Optional[str],
+    google_key: Optional[str],
+    gemini_model_id: str,
+    messages: List[dict],
+    gemini_prompt: str,
+    temperature: float,
+    max_tokens: int,
+) -> str:
+    """
+    Calls GPT-5 and Gemini (2.5 Pro or 3 Pro) and fuses their answers.
+    """
+    try:
+        gpt_answer = call_openai_text(openai_key, messages, temperature, max_tokens)
+    except Exception as e:
+        gpt_answer = f"[GPT-5 call failed: {e}]"
+    try:
+        gemini_answer = call_gemini_text(
+            google_key, gemini_model_id, gemini_prompt, temperature, max_tokens
+        )
+    except Exception as e:
+        gemini_answer = f"[Gemini call failed: {e}]"
+    fused = (
+        "### GPT-5 Perspective\n"
+        f"{gpt_answer}\n\n"
+        "### Gemini Perspective\n"
+        f"{gemini_answer}"
+    )
+    return fused
+# -------------------------------------------------------------------
+# Helpers: Image generation
+# -------------------------------------------------------------------
+def call_openai_dalle(
+    openai_key: Optional[str],
+    prompt: str,
+    size: str = "1024x1024",
+) -> Optional[Image.Image]:
+    """
+    Uses DALL·E 3 via OpenAI Images API to generate a PIL image.
+    """
+    client = get_openai_client(openai_key)
+    response = client.images.generate(
+        model="dall-e-3",
+        prompt=prompt,
+        size=size,
+        n=1,
+    )
+    if not response.data:
+        return None
+    # DALL·E responses can be URL or base64; here we handle base64
+    img_data = response.data[0].b64_json
+    img_bytes = base64.b64decode(img_data)
+    return Image.open(BytesIO(img_bytes))
+def call_gemini_image(
+    google_key: Optional[str],
+    model_id: str,
+    prompt: str,
+) -> Optional[Image.Image]:
+    """
+    Uses Nano Banana (gemini-2.5-flash-image) or Nano Banana Pro
+    (gemini-3-pro-image-preview) via Google GenAI SDK.
+    """
+    client = get_google_client(google_key)
+    response = client.models.generate_content(
+        model=model_id,
+        contents=[prompt],
+    )
+    # Follow pattern from official docs: walk parts for inline image data
+    for candidate in response.candidates:
+        for part in candidate.content.parts:
+            inline = getattr(part, "inline_data", None)
+            if inline and getattr(inline, "data", None):
+                img_bytes = base64.b64decode(inline.data)
+                return Image.open(BytesIO(img_bytes))
+    return None
+# -------------------------------------------------------------------
+# Core chat function used by Gradio
+# -------------------------------------------------------------------
+def agent_assembler_chat(
+    user_message: str,
+    chat_history: List[Tuple[str, str]],
+    openai_key_ui: str,
+    google_key_ui: str,
+    model_family: str,
+    gemini_model_choice: str,
+    output_mode: str,
+    theme: str,
+    tone: str,
+    temperature: float,
+    max_tokens: int,
+    generate_image: bool,
+    image_backend: str,
+) -> Tuple[List[Tuple[str, str]], Optional[Image.Image]]:
+    """
+    Main callback for the app. Returns updated chat history & optional image.
+    """
+    if not user_message.strip():
+        return chat_history, None
+    base_system = (
+        "You are ZEN AI Co.'s **Agent Assembler**, a multi-model orchestrator. "
+        "You can:\n"
+        "- Hold deep, contextual conversations about AI literacy, automation, and education.\n"
+        "- Generate executive reports and structured briefs.\n"
+        "- Produce detailed infographic blueprints with clear sections and labels.\n"
+        "- Collaborate with image models by designing precise, typo-free prompts.\n"
+        "\n"
+        "Always:\n"
+        "- Avoid hallucinating APIs or capabilities you don't actually have.\n"
+        "- Make outputs copy-paste-ready for real projects.\n"
+        "- Keep spelling and formatting extremely precise."
+    )
+    system_instructions = build_system_instructions(
+        base_instructions=base_system,
+        theme=theme,
+        output_mode=output_mode,
+        tone=tone,
+    )
+    # Prepare conversations for both stacks
+    messages = history_to_messages(chat_history, user_message, system_instructions)
+    gemini_prompt = history_to_gemini_prompt(chat_history, user_message, system_instructions)
+    # Decide which text model(s) to call
+    if model_family == "OpenAI: GPT-5":
+        ai_reply = call_openai_text(
+            openai_key=openai_key_ui,
+            messages=messages,
+            temperature=temperature,
+            max_tokens=max_tokens,
+        )
+    elif model_family.startswith("Google Gemini"):
+        if gemini_model_choice == "Gemini 2.5 Pro":
+            model_id = "gemini-2.5-pro"
+        else:
+            model_id = "gemini-3-pro-preview"
+        ai_reply = call_gemini_text(
+            google_key=google_key_ui,
+            model_id=model_id,
+            prompt=gemini_prompt,
+            temperature=temperature,
+            max_tokens=max_tokens,
+        )
+    else:  # Hybrid mode
+        if gemini_model_choice == "Gemini 2.5 Pro":
+            model_id = "gemini-2.5-pro"
+        else:
+            model_id = "gemini-3-pro-preview"
+        ai_reply = call_hybrid_text(
+            openai_key=openai_key_ui,
+            google_key=google_key_ui,
+            gemini_model_id=model_id,
+            messages=messages,
+            gemini_prompt=gemini_prompt,
+            temperature=temperature,
+            max_tokens=max_tokens,
+        )
+    # Update chat history
+    chat_history = chat_history + [(user_message, ai_reply)]
+    # Optional image generation
+    generated_image: Optional[Image.Image] = None
+    if generate_image:
+        # Build an image-oriented prompt from the last user query + output mode
+        image_prompt = (
+            f"{user_message.strip()}\n\n"
+            f"Image intent: {output_mode}. "
+            "Render clean, readable text if any labels are required. "
+            "Use a style that would fit the ZEN AI Co. brand."
+        )
+        try:
+            if image_backend == "DALL·E 3 (OpenAI)":
+                generated_image = call_openai_dalle(
+                    openai_key=openai_key_ui, prompt=image_prompt
+                )
+            elif image_backend == "Nano Banana (Gemini 2.5 Flash Image)":
+                generated_image = call_gemini_image(
+                    google_key=google_key_ui,
+                    model_id="gemini-2.5-flash-image",
+                    prompt=image_prompt,
+                )
+            else:  # Nano Banana Pro
+                generated_image = call_gemini_image(
+                    google_key=google_key_ui,
+                    model_id="gemini-3-pro-image-preview",
+                    prompt=image_prompt,
+                )
+        except Exception as e:
+            # Append a note to the assistant message if image fails
+            chat_history[-1] = (
+                chat_history[-1][0],
+                chat_history[-1][1]
+                + f"\n\n_Image generation failed: {e}_",
+            )
+    return chat_history, generated_image
+def clear_chat():
+    return [], None
+# -------------------------------------------------------------------
+# Gradio UI
+# -------------------------------------------------------------------
+def build_interface() -> gr.Blocks:
+    with gr.Blocks(title=APP_TITLE) as demo:
+        gr.Markdown(f"# {APP_TITLE}")
+        gr.Markdown(APP_DESCRIPTION)
+        with gr.Row():
+            # Left: Chat + image output
+            with gr.Column(scale=3):
+                chatbot = gr.Chatbot(
+                    label="Agent Assembler Chat",
+                    type="messages",
+                    height=520,
+                )
+                image_out = gr.Image(
+                    label="Latest Generated Image",
+                    height=320,
+                    interactive=False,
+                )
+                user_input = gr.Textbox(
+                    label="Your message",
+                    placeholder="Ask for a chat, a report, an infographic outline, or an image...",
+                    lines=3,
+                )
+                with gr.Row():
+                    send_btn = gr.Button("Send", variant="primary")
+                    clear_btn = gr.Button("Clear")
+            # Right: Control panel
+            with gr.Column(scale=2):
+                gr.Markdown("## API Keys")
+                openai_key_ui = gr.Textbox(
+                    label="OpenAI API Key (optional, otherwise uses OPENAI_API_KEY env var)",
+                    type="password",
+                )
+                google_key_ui = gr.Textbox(
+                    label="Google Gemini API Key (optional, otherwise uses GOOGLE_API_KEY env var)",
+                    type="password",
+                )
+                gr.Markdown("## Model & Style")
+                model_family = gr.Radio(
+                    label="Primary Model Routing",
+                    choices=[
+                        "OpenAI: GPT-5",
+                        "Google Gemini: Single",
+                        "Hybrid: GPT-5 + Gemini",
+                    ],
+                    value="Hybrid: GPT-5 + Gemini",
+                )
+                gemini_model_choice = gr.Radio(
+                    label="Gemini Model",
+                    choices=["Gemini 2.5 Pro", "Gemini 3 Pro (preview)"],
+                    value="Gemini 3 Pro (preview)",
+                )
+                output_mode = gr.Radio(
+                    label="Output Mode",
+                    choices=[
+                        "Standard Chat",
+                        "Executive Report",
+                        "Infographic Outline",
+                        "Bullet Summary",
+                    ],
+                    value="Standard Chat",
+                )
+                theme = gr.Radio(
+                    label="Theme (response style)",
+                    choices=[
+                        "ZEN Dark",
+                        "ZEN Light",
+                        "Research / Technical",
+                        "Youth AI Pioneer",
+                    ],
+                    value="ZEN Dark",
+                )
+                tone = gr.Radio(
+                    label="Tone",
+                    choices=["Neutral", "Bold / Visionary", "Minimalist"],
+                    value="Neutral",
+                )
+                gr.Markdown("## Generation Controls")
+                temperature = gr.Slider(
+                    label="Temperature (creativity)",
+                    minimum=0.0,
+                    maximum=1.5,
+                    value=DEFAULT_TEMPERATURE,
+                    step=0.05,
+                )
+                max_tokens = gr.Slider(
+                    label="Max Tokens (text length)",
+                    minimum=128,
+                    maximum=4096,
+                    value=DEFAULT_MAX_TOKENS,
+                    step=128,
+                )
+                gr.Markdown("## Image Generation")
+                generate_image = gr.Checkbox(
+                    label="Also generate an image for this message",
+                    value=False,
+                )
+                image_backend = gr.Radio(
+                    label="Image Backend",
+                    choices=[
+                        "DALL·E 3 (OpenAI)",
+                        "Nano Banana (Gemini 2.5 Flash Image)",
+                        "Nano Banana Pro (Gemini 3 Pro Image Preview)",
+                    ],
+                    value="Nano Banana Pro (Gemini 3 Pro Image Preview)",
+                )
+        # State for chat history
+        chat_state = gr.State([])
+        # Wire up events
+        send_btn.click(
+            fn=agent_assembler_chat,
+            inputs=[
+                user_input,
+                chat_state,
+                openai_key_ui,
+                google_key_ui,
+                model_family,
+                gemini_model_choice,
+                output_mode,
+                theme,
+                tone,
+                temperature,
+                max_tokens,
+                generate_image,
+                image_backend,
+            ],
+            outputs=[chatbot, image_out],
+        ).then(
+            fn=lambda h: (h, ""),  # update state + clear box
+            inputs=chatbot,
+            outputs=[chat_state, user_input],
+        )
+        user_input.submit(
+            fn=agent_assembler_chat,
+            inputs=[
+                user_input,
+                chat_state,
+                openai_key_ui,
+                google_key_ui,
+                model_family,
+                gemini_model_choice,
+                output_mode,
+                theme,
+                tone,
+                temperature,
+                max_tokens,
+                generate_image,
+                image_backend,
+            ],
+            outputs=[chatbot, image_out],
+        ).then(
+            fn=lambda h: (h, ""),  # update state + clear box
+            inputs=chatbot,
+            outputs=[chat_state, user_input],
+        )
+        clear_btn.click(
+            fn=clear_chat,
+            inputs=None,
+            outputs=[chatbot, image_out],
+        ).then(
+            fn=lambda: [],
+            inputs=None,
+            outputs=chat_state,
+        )
+    return demo
+if __name__ == "__main__":
+    demo = build_interface()
+    demo.launch()