Spaces:

ZENLLC
/

AgentAssembler

Sleeping

App Files Files Community

ZENLLC commited on Nov 25, 2025

Commit

131a626

verified ·

1 Parent(s): 354f1d9

Update app.py

Browse files

Files changed (1) hide show

app.py +132 -39

app.py CHANGED Viewed

@@ -16,13 +16,11 @@ APP_DESCRIPTION = """
 OpenAI-only teaching rig for building AI model UIs.
 • Uses GPT-5 for text generation.
-• Uses DALL·E 3 for image generation.
 • Lets you edit the system prompt, role, tone, and output format.
 • Provides sliders and controls to experiment with behavior.
-• Includes starter prompts to show different use cases (chat, reports, infographics, visuals).
-NOTE: Some newer GPT-5 variants ignore temperature/top-p/penalty parameters.
-This app keeps the controls for teaching, and encodes their values into the instructions instead.
 """
 DEFAULT_TEMPERATURE = 0.7
@@ -66,7 +64,9 @@ def build_system_instructions(
     """
     Build a system prompt string combining user-provided base instructions
     with role + format + tone + "virtual sampling" metadata.
-    (We encode the slider settings here since GPT-5 may not support those params directly.)
     """
     role_map = {
         "General Assistant": "Behave as a highly capable, calm general-purpose AI assistant.",
@@ -145,7 +145,7 @@ def history_to_openai_messages(
     return messages
 # -------------------------------------------------------------------
-# OpenAI Text & Image Calls
 # -------------------------------------------------------------------
 def call_openai_text(
@@ -158,9 +158,6 @@ def call_openai_text(
     - model
     - messages
     - max_completion_tokens
-    Newer GPT-5 variants may not support custom temperature/top_p/penalties,
-    so we rely on the system prompt for behavior control instead.
     """
     client = get_openai_client(openai_key)
     completion = client.chat.completions.create(
@@ -171,27 +168,48 @@ def call_openai_text(
     return completion.choices[0].message.content
-def call_openai_dalle(
     openai_key: Optional[str],
     prompt: str,
     size: str = "1024x1024",
 ) -> Optional[Image.Image]:
     """
-    Use DALL·E 3 to generate a PIL image.
     """
     client = get_openai_client(openai_key)
-    response = client.images.generate(
-        model="dall-e-3",
-        prompt=prompt,
-        size=size,
-        n=1,
-    )
-    if not response.data:
-        return None
-    img_data = response.data[0].b64_json
-    img_bytes = base64.b64decode(img_data)
-    return Image.open(BytesIO(img_bytes))
 # -------------------------------------------------------------------
 # Starter Prompts
@@ -224,6 +242,67 @@ STARTER_PROMPTS = {
 def get_starter_prompt(choice: str) -> str:
     return STARTER_PROMPTS.get(choice, "")
 # -------------------------------------------------------------------
 # Core Chat Logic
 # -------------------------------------------------------------------
@@ -241,13 +320,16 @@ def agent_assembler_chat(
     max_tokens: int,
     presence_penalty: float,
     frequency_penalty: float,
-    generate_image: bool,
     image_style: str,
     image_aspect: str,
 ) -> Tuple[List[Dict[str, str]], Optional[Image.Image]]:
     """
-    Main callback: GPT-5 text + optional DALL·E 3 image.
-    chat_history is a list of messages: [{role, content}, ...]
     """
     if not user_message.strip():
         return chat_history, None
@@ -271,7 +353,7 @@ def agent_assembler_chat(
         system_instructions=system_instructions,
     )
-    # Call GPT-5 (minimal parameters)
     try:
         ai_reply = call_openai_text(
             openai_key=openai_key_ui,
@@ -292,10 +374,13 @@ def agent_assembler_chat(
         {"role": "assistant", "content": ai_reply},
     ]
-    # Optional image generation
     generated_image: Optional[Image.Image] = None
-    if generate_image:
-        # Map aspect label to DALL·E size
         aspect_to_size = {
             "Square (1:1)": "1024x1024",
             "Portrait (9:16)": "1024x1792",
@@ -311,17 +396,24 @@ def agent_assembler_chat(
         )
         try:
-            generated_image = call_openai_dalle(
                 openai_key=openai_key_ui,
                 prompt=image_prompt,
                 size=size,
             )
         except Exception as e:
             # Attach error note to latest assistant message
             if chat_history and chat_history[-1].get("role") == "assistant":
                 chat_history[-1]["content"] += (
                     f"\n\n_Image generation failed: `{e}`. "
-                    "Check your OpenAI key and dalle-3 availability._"
                 )
     return chat_history, generated_image
@@ -367,7 +459,7 @@ def build_interface() -> gr.Blocks:
                     height=520,
                 )
                 image_out = gr.Image(
-                    label="Latest Generated Image (DALL·E 3)",
                     height=320,
                     interactive=False,
                 )
@@ -439,7 +531,8 @@ def build_interface() -> gr.Blocks:
                     value="Neutral",
                 )
-                gr.Markdown("## Sampling (Experiment Zone)")
                 temperature = gr.Slider(
                     label="Temperature (creativity / randomness)",
@@ -481,10 +574,10 @@ def build_interface() -> gr.Blocks:
                     step=0.1,
                 )
-                gr.Markdown("## Image Generation (DALL·E 3)")
-                generate_image = gr.Checkbox(
-                    label="Also generate an image for this message",
                     value=False,
                 )
@@ -529,7 +622,7 @@ def build_interface() -> gr.Blocks:
                 max_tokens,
                 presence_penalty,
                 frequency_penalty,
-                generate_image,
                 image_style,
                 image_aspect,
             ],
@@ -556,7 +649,7 @@ def build_interface() -> gr.Blocks:
                 max_tokens,
                 presence_penalty,
                 frequency_penalty,
-                generate_image,
                 image_style,
                 image_aspect,
             ],

 OpenAI-only teaching rig for building AI model UIs.
 • Uses GPT-5 for text generation.
+• Uses DALL·E 3 (with fallback to gpt-image-1) for image generation.
 • Lets you edit the system prompt, role, tone, and output format.
 • Provides sliders and controls to experiment with behavior.
+• Automatically generates images when the user asks for one, with an option
+  to always generate images as well.
 """
 DEFAULT_TEMPERATURE = 0.7
     """
     Build a system prompt string combining user-provided base instructions
     with role + format + tone + "virtual sampling" metadata.
+    We encode the slider settings as behavior hints because some GPT-5 variants
+    do not accept temperature/top_p/penalties as API parameters.
     """
     role_map = {
         "General Assistant": "Behave as a highly capable, calm general-purpose AI assistant.",
     return messages
 # -------------------------------------------------------------------
+# Text & Image Generation Helpers
 # -------------------------------------------------------------------
 def call_openai_text(
     - model
     - messages
     - max_completion_tokens
     """
     client = get_openai_client(openai_key)
     completion = client.chat.completions.create(
     return completion.choices[0].message.content
+def call_openai_image_with_fallback(
     openai_key: Optional[str],
     prompt: str,
     size: str = "1024x1024",
 ) -> Optional[Image.Image]:
     """
+    Try DALL·E 3 first. If it fails, fall back to gpt-image-1.
+    We explicitly request base64 output and handle missing b64_json safely.
     """
     client = get_openai_client(openai_key)
+    last_error: Optional[Exception] = None
+    for model_name in ["dall-e-3", "gpt-image-1"]:
+        try:
+            response = client.images.generate(
+                model=model_name,
+                prompt=prompt,
+                size=size,
+                n=1,
+                quality="hd",               # high quality
+                response_format="b64_json", # ensure base64 output
+            )
+            if not response.data:
+                continue
+            b64 = getattr(response.data[0], "b64_json", None)
+            if not b64:
+                # No base64 data; try next model
+                continue
+            img_bytes = base64.b64decode(b64)
+            return Image.open(BytesIO(img_bytes))
+        except Exception as e:
+            last_error = e
+            # Try next model in the list if available
+            continue
+    if last_error:
+        # Bubble up the last error so caller can log it or display a message
+        raise last_error
+    return None
 # -------------------------------------------------------------------
 # Starter Prompts
 def get_starter_prompt(choice: str) -> str:
     return STARTER_PROMPTS.get(choice, "")
+# -------------------------------------------------------------------
+# Image Intent Detection
+# -------------------------------------------------------------------
+def wants_image_from_text(text: str) -> bool:
+    """
+    Heuristic to decide if the user is asking for an image.
+    Triggers on phrases like:
+    - "generate an image"
+    - "create an image"
+    - "make an image"
+    - "image of"
+    - "picture of"
+    - "draw"
+    - "illustration"
+    - "infographic"
+    - "poster"
+    - "logo"
+    - "cover art"
+    - "thumbnail"
+    But avoids when user explicitly says they do NOT want an image.
+    """
+    t = text.lower()
+    # Negative patterns
+    negative_patterns = [
+        "don't generate an image",
+        "dont generate an image",
+        "don't create an image",
+        "dont create an image",
+        "no image",
+        "no images",
+        "without an image",
+    ]
+    if any(p in t for p in negative_patterns):
+        return False
+    positive_patterns = [
+        "generate an image",
+        "create an image",
+        "make an image",
+        "generate a picture",
+        "create a picture",
+        "picture of",
+        "image of",
+        "draw ",
+        "draw an",
+        "draw a",
+        "illustration",
+        "infographic",
+        "poster",
+        "logo",
+        "cover art",
+        "thumbnail",
+        "album art",
+    ]
+    return any(p in t for p in positive_patterns)
 # -------------------------------------------------------------------
 # Core Chat Logic
 # -------------------------------------------------------------------
     max_tokens: int,
     presence_penalty: float,
     frequency_penalty: float,
+    always_generate_image: bool,
     image_style: str,
     image_aspect: str,
 ) -> Tuple[List[Dict[str, str]], Optional[Image.Image]]:
     """
+    Main callback: GPT-5 text + optional image generation.
+    - Detects image intent from user text automatically.
+    - Optionally always generates an image if the toggle is on.
+    - chat_history is a list of messages: [{role, content}, ...]
     """
     if not user_message.strip():
         return chat_history, None
         system_instructions=system_instructions,
     )
+    # Call GPT-5
     try:
         ai_reply = call_openai_text(
             openai_key=openai_key_ui,
         {"role": "assistant", "content": ai_reply},
     ]
+    # Decide whether to generate an image
+    auto_image = wants_image_from_text(user_message)
+    should_generate_image = always_generate_image or auto_image
     generated_image: Optional[Image.Image] = None
+    if should_generate_image:
+        # Map aspect label to image size
         aspect_to_size = {
             "Square (1:1)": "1024x1024",
             "Portrait (9:16)": "1024x1792",
         )
         try:
+            generated_image = call_openai_image_with_fallback(
                 openai_key=openai_key_ui,
                 prompt=image_prompt,
                 size=size,
             )
+            if generated_image is None:
+                # No explicit exception but no image either
+                if chat_history and chat_history[-1].get("role") == "assistant":
+                    chat_history[-1]["content"] += (
+                        "\n\n_Image generation returned no data. "
+                        "Check your OpenAI key and image model availability._"
+                    )
         except Exception as e:
             # Attach error note to latest assistant message
             if chat_history and chat_history[-1].get("role") == "assistant":
                 chat_history[-1]["content"] += (
                     f"\n\n_Image generation failed: `{e}`. "
+                    "Check your OpenAI key and dalle-3 / gpt-image-1 availability._"
                 )
     return chat_history, generated_image
                     height=520,
                 )
                 image_out = gr.Image(
+                    label="Latest Generated Image (DALL·E 3 / gpt-image-1)",
                     height=320,
                     interactive=False,
                 )
                     value="Neutral",
                 )
+                gr.Markdown("## Sampling (Experiment Zone)\n"
+                            "These are teaching controls; for some GPT-5 variants they only influence behavior via the system prompt.")
                 temperature = gr.Slider(
                     label="Temperature (creativity / randomness)",
                     step=0.1,
                 )
+                gr.Markdown("## Image Generation")
+                always_generate_image = gr.Checkbox(
+                    label="Always generate an image for each message (in addition to auto-detect intent)",
                     value=False,
                 )
                 max_tokens,
                 presence_penalty,
                 frequency_penalty,
+                always_generate_image,
                 image_style,
                 image_aspect,
             ],
                 max_tokens,
                 presence_penalty,
                 frequency_penalty,
+                always_generate_image,
                 image_style,
                 image_aspect,
             ],