ZENLLC commited on
Commit
9d0ccc3
·
verified ·
1 Parent(s): d199647

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +601 -0
app.py ADDED
@@ -0,0 +1,601 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import base64
3
+ from io import BytesIO
4
+ from typing import List, Tuple, Optional
5
+
6
+ import gradio as gr
7
+ from openai import OpenAI
8
+ from google import genai
9
+ from google.genai import types
10
+ from PIL import Image
11
+
12
+ # -------------------------------------------------------------------
13
+ # Config
14
+ # -------------------------------------------------------------------
15
+
16
+ APP_TITLE = "ZEN AI Co. Module 2 | Agent Assembler"
17
+ APP_DESCRIPTION = """
18
+ Multi-model agent that can chat, draft reports, generate infographic briefs,
19
+ and create images using GPT-5, Gemini 2.5 Pro, Gemini 3 Pro, Nano Banana,
20
+ Nano Banana Pro, and DALL·E 3.
21
+ """
22
+
23
+ # Reasonable defaults if user doesn't touch sliders
24
+ DEFAULT_TEMPERATURE = 0.6
25
+ DEFAULT_MAX_TOKENS = 1024
26
+
27
+ # -------------------------------------------------------------------
28
+ # Helpers: API clients
29
+ # -------------------------------------------------------------------
30
+
31
+ def get_openai_client(key_override: Optional[str] = None) -> OpenAI:
32
+ """
33
+ Returns an OpenAI client using either:
34
+ 1) key from the UI override, or
35
+ 2) OPENAI_API_KEY environment variable.
36
+
37
+ This satisfies the “two places for API keys” requirement.
38
+ """
39
+ api_key = (key_override or "").strip() or os.getenv("OPENAI_API_KEY", "").strip()
40
+ if not api_key:
41
+ raise ValueError(
42
+ "OpenAI API key missing. "
43
+ "Either set OPENAI_API_KEY env var or paste it in the sidebar."
44
+ )
45
+ return OpenAI(api_key=api_key)
46
+
47
+
48
+ def get_google_client(key_override: Optional[str] = None) -> genai.Client:
49
+ """
50
+ Returns a Google GenAI client using either:
51
+ 1) key from the UI override, or
52
+ 2) GOOGLE_API_KEY environment variable.
53
+ """
54
+ api_key = (key_override or "").strip() or os.getenv("GOOGLE_API_KEY", "").strip()
55
+ if not api_key:
56
+ raise ValueError(
57
+ "Google Gemini API key missing. "
58
+ "Either set GOOGLE_API_KEY env var or paste it in the sidebar."
59
+ )
60
+ return genai.Client(api_key=api_key)
61
+
62
+ # -------------------------------------------------------------------
63
+ # Helpers: Prompt & style shaping
64
+ # -------------------------------------------------------------------
65
+
66
+ def build_system_instructions(
67
+ base_instructions: str,
68
+ theme: str,
69
+ output_mode: str,
70
+ tone: str,
71
+ ) -> str:
72
+ """
73
+ Builds a strong system prompt that shapes behavior according to theme,
74
+ output mode, and tone.
75
+ """
76
+ theme_map = {
77
+ "ZEN Dark": "Use a sleek, modern, slightly futuristic tone. Be concise but high signal.",
78
+ "ZEN Light": "Use a clear, friendly, educational tone suitable for learners of all ages.",
79
+ "Research / Technical": "Write like a senior research engineer: rigorous, structured, and explicit.",
80
+ "Youth AI Pioneer": "Explain things in simple, motivating language suitable for ages 11–18, "
81
+ "but never dumb it down.",
82
+ }
83
+
84
+ output_map = {
85
+ "Standard Chat": "Respond like a normal assistant, but keep paragraphs tight and skimmable.",
86
+ "Executive Report": "Respond as a structured executive brief with headings, bullets, and 1–2 sentence insights.",
87
+ "Infographic Outline": "Respond as a bullet-point infographic blueprint with short, punchy lines and clear sections.",
88
+ "Bullet Summary": "Respond as a compact bullet summary with 5–10 bullets max.",
89
+ }
90
+
91
+ tone_map = {
92
+ "Neutral": "Keep style neutral and globally understandable.",
93
+ "Bold / Visionary": "Lean into visionary, high-energy language while staying precise and concrete.",
94
+ "Minimalist": "Be extremely concise; prioritize clarity over flourish.",
95
+ }
96
+
97
+ parts = [
98
+ base_instructions.strip(),
99
+ "",
100
+ f"STYLE THEME: {theme_map.get(theme, '')}",
101
+ f"OUTPUT MODE: {output_map.get(output_mode, '')}",
102
+ f"TONE: {tone_map.get(tone, '')}",
103
+ "",
104
+ "Always format results cleanly in Markdown.",
105
+ ]
106
+ return "\n".join(p for p in parts if p.strip())
107
+
108
+
109
+ def history_to_messages(
110
+ history: List[Tuple[str, str]],
111
+ user_message: str,
112
+ system_instructions: str,
113
+ ) -> List[dict]:
114
+ """
115
+ Converts Gradio Chatbot history into OpenAI-style messages.
116
+ """
117
+ messages: List[dict] = []
118
+ if system_instructions:
119
+ messages.append({"role": "system", "content": system_instructions})
120
+
121
+ for user, bot in history:
122
+ if user:
123
+ messages.append({"role": "user", "content": user})
124
+ if bot:
125
+ messages.append({"role": "assistant", "content": bot})
126
+
127
+ messages.append({"role": "user", "content": user_message})
128
+ return messages
129
+
130
+
131
+ def history_to_gemini_prompt(
132
+ history: List[Tuple[str, str]],
133
+ user_message: str,
134
+ system_instructions: str,
135
+ ) -> str:
136
+ """
137
+ Flattens history into a single text prompt for Gemini.
138
+ """
139
+ lines = []
140
+ if system_instructions:
141
+ lines.append(f"SYSTEM:\n{system_instructions}\n")
142
+
143
+ for u, a in history:
144
+ if u:
145
+ lines.append(f"USER: {u}")
146
+ if a:
147
+ lines.append(f"ASSISTANT: {a}")
148
+
149
+ lines.append(f"USER: {user_message}")
150
+ lines.append("ASSISTANT:")
151
+ return "\n\n".join(lines)
152
+
153
+ # -------------------------------------------------------------------
154
+ # Helpers: Model calls (text)
155
+ # -------------------------------------------------------------------
156
+
157
+ def call_openai_text(
158
+ openai_key: Optional[str],
159
+ messages: List[dict],
160
+ temperature: float,
161
+ max_tokens: int,
162
+ ) -> str:
163
+ client = get_openai_client(openai_key)
164
+ completion = client.chat.completions.create(
165
+ model="gpt-5", # You can change to gpt-5.1 or whatever is available in your project
166
+ messages=messages,
167
+ temperature=temperature,
168
+ max_tokens=max_tokens,
169
+ )
170
+ return completion.choices[0].message.content
171
+
172
+
173
+ def call_gemini_text(
174
+ google_key: Optional[str],
175
+ model_id: str,
176
+ prompt: str,
177
+ temperature: float,
178
+ max_tokens: int,
179
+ ) -> str:
180
+ client = get_google_client(google_key)
181
+ response = client.models.generate_content(
182
+ model=model_id,
183
+ contents=[prompt],
184
+ config=types.GenerateContentConfig(
185
+ temperature=temperature,
186
+ max_output_tokens=max_tokens,
187
+ ),
188
+ )
189
+ return response.text
190
+
191
+
192
+ def call_hybrid_text(
193
+ openai_key: Optional[str],
194
+ google_key: Optional[str],
195
+ gemini_model_id: str,
196
+ messages: List[dict],
197
+ gemini_prompt: str,
198
+ temperature: float,
199
+ max_tokens: int,
200
+ ) -> str:
201
+ """
202
+ Calls GPT-5 and Gemini (2.5 Pro or 3 Pro) and fuses their answers.
203
+ """
204
+ try:
205
+ gpt_answer = call_openai_text(openai_key, messages, temperature, max_tokens)
206
+ except Exception as e:
207
+ gpt_answer = f"[GPT-5 call failed: {e}]"
208
+
209
+ try:
210
+ gemini_answer = call_gemini_text(
211
+ google_key, gemini_model_id, gemini_prompt, temperature, max_tokens
212
+ )
213
+ except Exception as e:
214
+ gemini_answer = f"[Gemini call failed: {e}]"
215
+
216
+ fused = (
217
+ "### GPT-5 Perspective\n"
218
+ f"{gpt_answer}\n\n"
219
+ "### Gemini Perspective\n"
220
+ f"{gemini_answer}"
221
+ )
222
+ return fused
223
+
224
+ # -------------------------------------------------------------------
225
+ # Helpers: Image generation
226
+ # -------------------------------------------------------------------
227
+
228
+ def call_openai_dalle(
229
+ openai_key: Optional[str],
230
+ prompt: str,
231
+ size: str = "1024x1024",
232
+ ) -> Optional[Image.Image]:
233
+ """
234
+ Uses DALL·E 3 via OpenAI Images API to generate a PIL image.
235
+ """
236
+ client = get_openai_client(openai_key)
237
+ response = client.images.generate(
238
+ model="dall-e-3",
239
+ prompt=prompt,
240
+ size=size,
241
+ n=1,
242
+ )
243
+ if not response.data:
244
+ return None
245
+
246
+ # DALL·E responses can be URL or base64; here we handle base64
247
+ img_data = response.data[0].b64_json
248
+ img_bytes = base64.b64decode(img_data)
249
+ return Image.open(BytesIO(img_bytes))
250
+
251
+
252
+ def call_gemini_image(
253
+ google_key: Optional[str],
254
+ model_id: str,
255
+ prompt: str,
256
+ ) -> Optional[Image.Image]:
257
+ """
258
+ Uses Nano Banana (gemini-2.5-flash-image) or Nano Banana Pro
259
+ (gemini-3-pro-image-preview) via Google GenAI SDK.
260
+ """
261
+ client = get_google_client(google_key)
262
+ response = client.models.generate_content(
263
+ model=model_id,
264
+ contents=[prompt],
265
+ )
266
+
267
+ # Follow pattern from official docs: walk parts for inline image data
268
+ for candidate in response.candidates:
269
+ for part in candidate.content.parts:
270
+ inline = getattr(part, "inline_data", None)
271
+ if inline and getattr(inline, "data", None):
272
+ img_bytes = base64.b64decode(inline.data)
273
+ return Image.open(BytesIO(img_bytes))
274
+
275
+ return None
276
+
277
+ # -------------------------------------------------------------------
278
+ # Core chat function used by Gradio
279
+ # -------------------------------------------------------------------
280
+
281
+ def agent_assembler_chat(
282
+ user_message: str,
283
+ chat_history: List[Tuple[str, str]],
284
+ openai_key_ui: str,
285
+ google_key_ui: str,
286
+ model_family: str,
287
+ gemini_model_choice: str,
288
+ output_mode: str,
289
+ theme: str,
290
+ tone: str,
291
+ temperature: float,
292
+ max_tokens: int,
293
+ generate_image: bool,
294
+ image_backend: str,
295
+ ) -> Tuple[List[Tuple[str, str]], Optional[Image.Image]]:
296
+ """
297
+ Main callback for the app. Returns updated chat history & optional image.
298
+ """
299
+ if not user_message.strip():
300
+ return chat_history, None
301
+
302
+ base_system = (
303
+ "You are ZEN AI Co.'s **Agent Assembler**, a multi-model orchestrator. "
304
+ "You can:\n"
305
+ "- Hold deep, contextual conversations about AI literacy, automation, and education.\n"
306
+ "- Generate executive reports and structured briefs.\n"
307
+ "- Produce detailed infographic blueprints with clear sections and labels.\n"
308
+ "- Collaborate with image models by designing precise, typo-free prompts.\n"
309
+ "\n"
310
+ "Always:\n"
311
+ "- Avoid hallucinating APIs or capabilities you don't actually have.\n"
312
+ "- Make outputs copy-paste-ready for real projects.\n"
313
+ "- Keep spelling and formatting extremely precise."
314
+ )
315
+
316
+ system_instructions = build_system_instructions(
317
+ base_instructions=base_system,
318
+ theme=theme,
319
+ output_mode=output_mode,
320
+ tone=tone,
321
+ )
322
+
323
+ # Prepare conversations for both stacks
324
+ messages = history_to_messages(chat_history, user_message, system_instructions)
325
+ gemini_prompt = history_to_gemini_prompt(chat_history, user_message, system_instructions)
326
+
327
+ # Decide which text model(s) to call
328
+ if model_family == "OpenAI: GPT-5":
329
+ ai_reply = call_openai_text(
330
+ openai_key=openai_key_ui,
331
+ messages=messages,
332
+ temperature=temperature,
333
+ max_tokens=max_tokens,
334
+ )
335
+ elif model_family.startswith("Google Gemini"):
336
+ if gemini_model_choice == "Gemini 2.5 Pro":
337
+ model_id = "gemini-2.5-pro"
338
+ else:
339
+ model_id = "gemini-3-pro-preview"
340
+
341
+ ai_reply = call_gemini_text(
342
+ google_key=google_key_ui,
343
+ model_id=model_id,
344
+ prompt=gemini_prompt,
345
+ temperature=temperature,
346
+ max_tokens=max_tokens,
347
+ )
348
+ else: # Hybrid mode
349
+ if gemini_model_choice == "Gemini 2.5 Pro":
350
+ model_id = "gemini-2.5-pro"
351
+ else:
352
+ model_id = "gemini-3-pro-preview"
353
+
354
+ ai_reply = call_hybrid_text(
355
+ openai_key=openai_key_ui,
356
+ google_key=google_key_ui,
357
+ gemini_model_id=model_id,
358
+ messages=messages,
359
+ gemini_prompt=gemini_prompt,
360
+ temperature=temperature,
361
+ max_tokens=max_tokens,
362
+ )
363
+
364
+ # Update chat history
365
+ chat_history = chat_history + [(user_message, ai_reply)]
366
+
367
+ # Optional image generation
368
+ generated_image: Optional[Image.Image] = None
369
+ if generate_image:
370
+ # Build an image-oriented prompt from the last user query + output mode
371
+ image_prompt = (
372
+ f"{user_message.strip()}\n\n"
373
+ f"Image intent: {output_mode}. "
374
+ "Render clean, readable text if any labels are required. "
375
+ "Use a style that would fit the ZEN AI Co. brand."
376
+ )
377
+
378
+ try:
379
+ if image_backend == "DALL·E 3 (OpenAI)":
380
+ generated_image = call_openai_dalle(
381
+ openai_key=openai_key_ui, prompt=image_prompt
382
+ )
383
+ elif image_backend == "Nano Banana (Gemini 2.5 Flash Image)":
384
+ generated_image = call_gemini_image(
385
+ google_key=google_key_ui,
386
+ model_id="gemini-2.5-flash-image",
387
+ prompt=image_prompt,
388
+ )
389
+ else: # Nano Banana Pro
390
+ generated_image = call_gemini_image(
391
+ google_key=google_key_ui,
392
+ model_id="gemini-3-pro-image-preview",
393
+ prompt=image_prompt,
394
+ )
395
+ except Exception as e:
396
+ # Append a note to the assistant message if image fails
397
+ chat_history[-1] = (
398
+ chat_history[-1][0],
399
+ chat_history[-1][1]
400
+ + f"\n\n_Image generation failed: {e}_",
401
+ )
402
+
403
+ return chat_history, generated_image
404
+
405
+
406
+ def clear_chat():
407
+ return [], None
408
+
409
+ # -------------------------------------------------------------------
410
+ # Gradio UI
411
+ # -------------------------------------------------------------------
412
+
413
+ def build_interface() -> gr.Blocks:
414
+ with gr.Blocks(title=APP_TITLE) as demo:
415
+ gr.Markdown(f"# {APP_TITLE}")
416
+ gr.Markdown(APP_DESCRIPTION)
417
+
418
+ with gr.Row():
419
+ # Left: Chat + image output
420
+ with gr.Column(scale=3):
421
+ chatbot = gr.Chatbot(
422
+ label="Agent Assembler Chat",
423
+ type="messages",
424
+ height=520,
425
+ )
426
+ image_out = gr.Image(
427
+ label="Latest Generated Image",
428
+ height=320,
429
+ interactive=False,
430
+ )
431
+ user_input = gr.Textbox(
432
+ label="Your message",
433
+ placeholder="Ask for a chat, a report, an infographic outline, or an image...",
434
+ lines=3,
435
+ )
436
+
437
+ with gr.Row():
438
+ send_btn = gr.Button("Send", variant="primary")
439
+ clear_btn = gr.Button("Clear")
440
+
441
+ # Right: Control panel
442
+ with gr.Column(scale=2):
443
+ gr.Markdown("## API Keys")
444
+ openai_key_ui = gr.Textbox(
445
+ label="OpenAI API Key (optional, otherwise uses OPENAI_API_KEY env var)",
446
+ type="password",
447
+ )
448
+ google_key_ui = gr.Textbox(
449
+ label="Google Gemini API Key (optional, otherwise uses GOOGLE_API_KEY env var)",
450
+ type="password",
451
+ )
452
+
453
+ gr.Markdown("## Model & Style")
454
+
455
+ model_family = gr.Radio(
456
+ label="Primary Model Routing",
457
+ choices=[
458
+ "OpenAI: GPT-5",
459
+ "Google Gemini: Single",
460
+ "Hybrid: GPT-5 + Gemini",
461
+ ],
462
+ value="Hybrid: GPT-5 + Gemini",
463
+ )
464
+
465
+ gemini_model_choice = gr.Radio(
466
+ label="Gemini Model",
467
+ choices=["Gemini 2.5 Pro", "Gemini 3 Pro (preview)"],
468
+ value="Gemini 3 Pro (preview)",
469
+ )
470
+
471
+ output_mode = gr.Radio(
472
+ label="Output Mode",
473
+ choices=[
474
+ "Standard Chat",
475
+ "Executive Report",
476
+ "Infographic Outline",
477
+ "Bullet Summary",
478
+ ],
479
+ value="Standard Chat",
480
+ )
481
+
482
+ theme = gr.Radio(
483
+ label="Theme (response style)",
484
+ choices=[
485
+ "ZEN Dark",
486
+ "ZEN Light",
487
+ "Research / Technical",
488
+ "Youth AI Pioneer",
489
+ ],
490
+ value="ZEN Dark",
491
+ )
492
+
493
+ tone = gr.Radio(
494
+ label="Tone",
495
+ choices=["Neutral", "Bold / Visionary", "Minimalist"],
496
+ value="Neutral",
497
+ )
498
+
499
+ gr.Markdown("## Generation Controls")
500
+
501
+ temperature = gr.Slider(
502
+ label="Temperature (creativity)",
503
+ minimum=0.0,
504
+ maximum=1.5,
505
+ value=DEFAULT_TEMPERATURE,
506
+ step=0.05,
507
+ )
508
+
509
+ max_tokens = gr.Slider(
510
+ label="Max Tokens (text length)",
511
+ minimum=128,
512
+ maximum=4096,
513
+ value=DEFAULT_MAX_TOKENS,
514
+ step=128,
515
+ )
516
+
517
+ gr.Markdown("## Image Generation")
518
+
519
+ generate_image = gr.Checkbox(
520
+ label="Also generate an image for this message",
521
+ value=False,
522
+ )
523
+
524
+ image_backend = gr.Radio(
525
+ label="Image Backend",
526
+ choices=[
527
+ "DALL·E 3 (OpenAI)",
528
+ "Nano Banana (Gemini 2.5 Flash Image)",
529
+ "Nano Banana Pro (Gemini 3 Pro Image Preview)",
530
+ ],
531
+ value="Nano Banana Pro (Gemini 3 Pro Image Preview)",
532
+ )
533
+
534
+ # State for chat history
535
+ chat_state = gr.State([])
536
+
537
+ # Wire up events
538
+ send_btn.click(
539
+ fn=agent_assembler_chat,
540
+ inputs=[
541
+ user_input,
542
+ chat_state,
543
+ openai_key_ui,
544
+ google_key_ui,
545
+ model_family,
546
+ gemini_model_choice,
547
+ output_mode,
548
+ theme,
549
+ tone,
550
+ temperature,
551
+ max_tokens,
552
+ generate_image,
553
+ image_backend,
554
+ ],
555
+ outputs=[chatbot, image_out],
556
+ ).then(
557
+ fn=lambda h: (h, ""), # update state + clear box
558
+ inputs=chatbot,
559
+ outputs=[chat_state, user_input],
560
+ )
561
+
562
+ user_input.submit(
563
+ fn=agent_assembler_chat,
564
+ inputs=[
565
+ user_input,
566
+ chat_state,
567
+ openai_key_ui,
568
+ google_key_ui,
569
+ model_family,
570
+ gemini_model_choice,
571
+ output_mode,
572
+ theme,
573
+ tone,
574
+ temperature,
575
+ max_tokens,
576
+ generate_image,
577
+ image_backend,
578
+ ],
579
+ outputs=[chatbot, image_out],
580
+ ).then(
581
+ fn=lambda h: (h, ""), # update state + clear box
582
+ inputs=chatbot,
583
+ outputs=[chat_state, user_input],
584
+ )
585
+
586
+ clear_btn.click(
587
+ fn=clear_chat,
588
+ inputs=None,
589
+ outputs=[chatbot, image_out],
590
+ ).then(
591
+ fn=lambda: [],
592
+ inputs=None,
593
+ outputs=chat_state,
594
+ )
595
+
596
+ return demo
597
+
598
+
599
+ if __name__ == "__main__":
600
+ demo = build_interface()
601
+ demo.launch()