ZENLLC commited on
Commit
131a626
·
verified ·
1 Parent(s): 354f1d9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +132 -39
app.py CHANGED
@@ -16,13 +16,11 @@ APP_DESCRIPTION = """
16
  OpenAI-only teaching rig for building AI model UIs.
17
 
18
  • Uses GPT-5 for text generation.
19
- • Uses DALL·E 3 for image generation.
20
  • Lets you edit the system prompt, role, tone, and output format.
21
  • Provides sliders and controls to experiment with behavior.
22
- Includes starter prompts to show different use cases (chat, reports, infographics, visuals).
23
-
24
- NOTE: Some newer GPT-5 variants ignore temperature/top-p/penalty parameters.
25
- This app keeps the controls for teaching, and encodes their values into the instructions instead.
26
  """
27
 
28
  DEFAULT_TEMPERATURE = 0.7
@@ -66,7 +64,9 @@ def build_system_instructions(
66
  """
67
  Build a system prompt string combining user-provided base instructions
68
  with role + format + tone + "virtual sampling" metadata.
69
- (We encode the slider settings here since GPT-5 may not support those params directly.)
 
 
70
  """
71
  role_map = {
72
  "General Assistant": "Behave as a highly capable, calm general-purpose AI assistant.",
@@ -145,7 +145,7 @@ def history_to_openai_messages(
145
  return messages
146
 
147
  # -------------------------------------------------------------------
148
- # OpenAI Text & Image Calls
149
  # -------------------------------------------------------------------
150
 
151
  def call_openai_text(
@@ -158,9 +158,6 @@ def call_openai_text(
158
  - model
159
  - messages
160
  - max_completion_tokens
161
-
162
- Newer GPT-5 variants may not support custom temperature/top_p/penalties,
163
- so we rely on the system prompt for behavior control instead.
164
  """
165
  client = get_openai_client(openai_key)
166
  completion = client.chat.completions.create(
@@ -171,27 +168,48 @@ def call_openai_text(
171
  return completion.choices[0].message.content
172
 
173
 
174
- def call_openai_dalle(
175
  openai_key: Optional[str],
176
  prompt: str,
177
  size: str = "1024x1024",
178
  ) -> Optional[Image.Image]:
179
  """
180
- Use DALL·E 3 to generate a PIL image.
 
181
  """
182
  client = get_openai_client(openai_key)
183
- response = client.images.generate(
184
- model="dall-e-3",
185
- prompt=prompt,
186
- size=size,
187
- n=1,
188
- )
189
- if not response.data:
190
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
 
192
- img_data = response.data[0].b64_json
193
- img_bytes = base64.b64decode(img_data)
194
- return Image.open(BytesIO(img_bytes))
195
 
196
  # -------------------------------------------------------------------
197
  # Starter Prompts
@@ -224,6 +242,67 @@ STARTER_PROMPTS = {
224
  def get_starter_prompt(choice: str) -> str:
225
  return STARTER_PROMPTS.get(choice, "")
226
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  # -------------------------------------------------------------------
228
  # Core Chat Logic
229
  # -------------------------------------------------------------------
@@ -241,13 +320,16 @@ def agent_assembler_chat(
241
  max_tokens: int,
242
  presence_penalty: float,
243
  frequency_penalty: float,
244
- generate_image: bool,
245
  image_style: str,
246
  image_aspect: str,
247
  ) -> Tuple[List[Dict[str, str]], Optional[Image.Image]]:
248
  """
249
- Main callback: GPT-5 text + optional DALL·E 3 image.
250
- chat_history is a list of messages: [{role, content}, ...]
 
 
 
251
  """
252
  if not user_message.strip():
253
  return chat_history, None
@@ -271,7 +353,7 @@ def agent_assembler_chat(
271
  system_instructions=system_instructions,
272
  )
273
 
274
- # Call GPT-5 (minimal parameters)
275
  try:
276
  ai_reply = call_openai_text(
277
  openai_key=openai_key_ui,
@@ -292,10 +374,13 @@ def agent_assembler_chat(
292
  {"role": "assistant", "content": ai_reply},
293
  ]
294
 
295
- # Optional image generation
 
 
 
296
  generated_image: Optional[Image.Image] = None
297
- if generate_image:
298
- # Map aspect label to DALL·E size
299
  aspect_to_size = {
300
  "Square (1:1)": "1024x1024",
301
  "Portrait (9:16)": "1024x1792",
@@ -311,17 +396,24 @@ def agent_assembler_chat(
311
  )
312
 
313
  try:
314
- generated_image = call_openai_dalle(
315
  openai_key=openai_key_ui,
316
  prompt=image_prompt,
317
  size=size,
318
  )
 
 
 
 
 
 
 
319
  except Exception as e:
320
  # Attach error note to latest assistant message
321
  if chat_history and chat_history[-1].get("role") == "assistant":
322
  chat_history[-1]["content"] += (
323
  f"\n\n_Image generation failed: `{e}`. "
324
- "Check your OpenAI key and dalle-3 availability._"
325
  )
326
 
327
  return chat_history, generated_image
@@ -367,7 +459,7 @@ def build_interface() -> gr.Blocks:
367
  height=520,
368
  )
369
  image_out = gr.Image(
370
- label="Latest Generated Image (DALL·E 3)",
371
  height=320,
372
  interactive=False,
373
  )
@@ -439,7 +531,8 @@ def build_interface() -> gr.Blocks:
439
  value="Neutral",
440
  )
441
 
442
- gr.Markdown("## Sampling (Experiment Zone)")
 
443
 
444
  temperature = gr.Slider(
445
  label="Temperature (creativity / randomness)",
@@ -481,10 +574,10 @@ def build_interface() -> gr.Blocks:
481
  step=0.1,
482
  )
483
 
484
- gr.Markdown("## Image Generation (DALL·E 3)")
485
 
486
- generate_image = gr.Checkbox(
487
- label="Also generate an image for this message",
488
  value=False,
489
  )
490
 
@@ -529,7 +622,7 @@ def build_interface() -> gr.Blocks:
529
  max_tokens,
530
  presence_penalty,
531
  frequency_penalty,
532
- generate_image,
533
  image_style,
534
  image_aspect,
535
  ],
@@ -556,7 +649,7 @@ def build_interface() -> gr.Blocks:
556
  max_tokens,
557
  presence_penalty,
558
  frequency_penalty,
559
- generate_image,
560
  image_style,
561
  image_aspect,
562
  ],
 
16
  OpenAI-only teaching rig for building AI model UIs.
17
 
18
  • Uses GPT-5 for text generation.
19
+ • Uses DALL·E 3 (with fallback to gpt-image-1) for image generation.
20
  • Lets you edit the system prompt, role, tone, and output format.
21
  • Provides sliders and controls to experiment with behavior.
22
+ Automatically generates images when the user asks for one, with an option
23
+ to always generate images as well.
 
 
24
  """
25
 
26
  DEFAULT_TEMPERATURE = 0.7
 
64
  """
65
  Build a system prompt string combining user-provided base instructions
66
  with role + format + tone + "virtual sampling" metadata.
67
+
68
+ We encode the slider settings as behavior hints because some GPT-5 variants
69
+ do not accept temperature/top_p/penalties as API parameters.
70
  """
71
  role_map = {
72
  "General Assistant": "Behave as a highly capable, calm general-purpose AI assistant.",
 
145
  return messages
146
 
147
  # -------------------------------------------------------------------
148
+ # Text & Image Generation Helpers
149
  # -------------------------------------------------------------------
150
 
151
  def call_openai_text(
 
158
  - model
159
  - messages
160
  - max_completion_tokens
 
 
 
161
  """
162
  client = get_openai_client(openai_key)
163
  completion = client.chat.completions.create(
 
168
  return completion.choices[0].message.content
169
 
170
 
171
+ def call_openai_image_with_fallback(
172
  openai_key: Optional[str],
173
  prompt: str,
174
  size: str = "1024x1024",
175
  ) -> Optional[Image.Image]:
176
  """
177
+ Try DALL·E 3 first. If it fails, fall back to gpt-image-1.
178
+ We explicitly request base64 output and handle missing b64_json safely.
179
  """
180
  client = get_openai_client(openai_key)
181
+ last_error: Optional[Exception] = None
182
+
183
+ for model_name in ["dall-e-3", "gpt-image-1"]:
184
+ try:
185
+ response = client.images.generate(
186
+ model=model_name,
187
+ prompt=prompt,
188
+ size=size,
189
+ n=1,
190
+ quality="hd", # high quality
191
+ response_format="b64_json", # ensure base64 output
192
+ )
193
+ if not response.data:
194
+ continue
195
+
196
+ b64 = getattr(response.data[0], "b64_json", None)
197
+ if not b64:
198
+ # No base64 data; try next model
199
+ continue
200
+
201
+ img_bytes = base64.b64decode(b64)
202
+ return Image.open(BytesIO(img_bytes))
203
+ except Exception as e:
204
+ last_error = e
205
+ # Try next model in the list if available
206
+ continue
207
+
208
+ if last_error:
209
+ # Bubble up the last error so caller can log it or display a message
210
+ raise last_error
211
 
212
+ return None
 
 
213
 
214
  # -------------------------------------------------------------------
215
  # Starter Prompts
 
242
  def get_starter_prompt(choice: str) -> str:
243
  return STARTER_PROMPTS.get(choice, "")
244
 
245
+ # -------------------------------------------------------------------
246
+ # Image Intent Detection
247
+ # -------------------------------------------------------------------
248
+
249
+ def wants_image_from_text(text: str) -> bool:
250
+ """
251
+ Heuristic to decide if the user is asking for an image.
252
+
253
+ Triggers on phrases like:
254
+ - "generate an image"
255
+ - "create an image"
256
+ - "make an image"
257
+ - "image of"
258
+ - "picture of"
259
+ - "draw"
260
+ - "illustration"
261
+ - "infographic"
262
+ - "poster"
263
+ - "logo"
264
+ - "cover art"
265
+ - "thumbnail"
266
+
267
+ But avoids when user explicitly says they do NOT want an image.
268
+ """
269
+ t = text.lower()
270
+
271
+ # Negative patterns
272
+ negative_patterns = [
273
+ "don't generate an image",
274
+ "dont generate an image",
275
+ "don't create an image",
276
+ "dont create an image",
277
+ "no image",
278
+ "no images",
279
+ "without an image",
280
+ ]
281
+ if any(p in t for p in negative_patterns):
282
+ return False
283
+
284
+ positive_patterns = [
285
+ "generate an image",
286
+ "create an image",
287
+ "make an image",
288
+ "generate a picture",
289
+ "create a picture",
290
+ "picture of",
291
+ "image of",
292
+ "draw ",
293
+ "draw an",
294
+ "draw a",
295
+ "illustration",
296
+ "infographic",
297
+ "poster",
298
+ "logo",
299
+ "cover art",
300
+ "thumbnail",
301
+ "album art",
302
+ ]
303
+
304
+ return any(p in t for p in positive_patterns)
305
+
306
  # -------------------------------------------------------------------
307
  # Core Chat Logic
308
  # -------------------------------------------------------------------
 
320
  max_tokens: int,
321
  presence_penalty: float,
322
  frequency_penalty: float,
323
+ always_generate_image: bool,
324
  image_style: str,
325
  image_aspect: str,
326
  ) -> Tuple[List[Dict[str, str]], Optional[Image.Image]]:
327
  """
328
+ Main callback: GPT-5 text + optional image generation.
329
+
330
+ - Detects image intent from user text automatically.
331
+ - Optionally always generates an image if the toggle is on.
332
+ - chat_history is a list of messages: [{role, content}, ...]
333
  """
334
  if not user_message.strip():
335
  return chat_history, None
 
353
  system_instructions=system_instructions,
354
  )
355
 
356
+ # Call GPT-5
357
  try:
358
  ai_reply = call_openai_text(
359
  openai_key=openai_key_ui,
 
374
  {"role": "assistant", "content": ai_reply},
375
  ]
376
 
377
+ # Decide whether to generate an image
378
+ auto_image = wants_image_from_text(user_message)
379
+ should_generate_image = always_generate_image or auto_image
380
+
381
  generated_image: Optional[Image.Image] = None
382
+ if should_generate_image:
383
+ # Map aspect label to image size
384
  aspect_to_size = {
385
  "Square (1:1)": "1024x1024",
386
  "Portrait (9:16)": "1024x1792",
 
396
  )
397
 
398
  try:
399
+ generated_image = call_openai_image_with_fallback(
400
  openai_key=openai_key_ui,
401
  prompt=image_prompt,
402
  size=size,
403
  )
404
+ if generated_image is None:
405
+ # No explicit exception but no image either
406
+ if chat_history and chat_history[-1].get("role") == "assistant":
407
+ chat_history[-1]["content"] += (
408
+ "\n\n_Image generation returned no data. "
409
+ "Check your OpenAI key and image model availability._"
410
+ )
411
  except Exception as e:
412
  # Attach error note to latest assistant message
413
  if chat_history and chat_history[-1].get("role") == "assistant":
414
  chat_history[-1]["content"] += (
415
  f"\n\n_Image generation failed: `{e}`. "
416
+ "Check your OpenAI key and dalle-3 / gpt-image-1 availability._"
417
  )
418
 
419
  return chat_history, generated_image
 
459
  height=520,
460
  )
461
  image_out = gr.Image(
462
+ label="Latest Generated Image (DALL·E 3 / gpt-image-1)",
463
  height=320,
464
  interactive=False,
465
  )
 
531
  value="Neutral",
532
  )
533
 
534
+ gr.Markdown("## Sampling (Experiment Zone)\n"
535
+ "These are teaching controls; for some GPT-5 variants they only influence behavior via the system prompt.")
536
 
537
  temperature = gr.Slider(
538
  label="Temperature (creativity / randomness)",
 
574
  step=0.1,
575
  )
576
 
577
+ gr.Markdown("## Image Generation")
578
 
579
+ always_generate_image = gr.Checkbox(
580
+ label="Always generate an image for each message (in addition to auto-detect intent)",
581
  value=False,
582
  )
583
 
 
622
  max_tokens,
623
  presence_penalty,
624
  frequency_penalty,
625
+ always_generate_image,
626
  image_style,
627
  image_aspect,
628
  ],
 
649
  max_tokens,
650
  presence_penalty,
651
  frequency_penalty,
652
+ always_generate_image,
653
  image_style,
654
  image_aspect,
655
  ],