Spaces:

ZENLLC
/

M2S1.11

Sleeping

App Files Files Community

ZENLLC commited on Nov 6, 2025

Commit

3c66ead

verified ·

1 Parent(s): e281c43

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -33

app.py CHANGED Viewed

@@ -37,7 +37,7 @@ BLOCKLIST = ["<script", "</script>", "{{", "}}"]
 # -----------------------------
-# Lazy Imports (so the app boots even if a provider SDK isn't installed)
 # -----------------------------
 def _lazy_import_openai():
     try:
@@ -56,7 +56,7 @@ def _lazy_import_gemini():
 # -----------------------------
-# Utility helpers
 # -----------------------------
 def is_blocked(text: str) -> bool:
     if not text:
@@ -66,7 +66,6 @@ def is_blocked(text: str) -> bool:
 def pil_to_base64(image) -> str:
-    """Convert PIL image to base64 JPEG for potential API usage."""
     buffer = BytesIO()
     image.convert("RGB").save(buffer, format="JPEG", quality=92)
     return base64.b64encode(buffer.getvalue()).decode("utf-8")
@@ -77,16 +76,10 @@ def approx_tokens_from_chars(text: str) -> int:
 def estimate_cost(provider_label: str, model: str, prompt: str, reply: str) -> float:
-    """
-    Super rough cost estimator. Tune to your account reality.
-    Using blended, illustrative CPMs for demo purposes only.
-    """
     toks = approx_tokens_from_chars(prompt) + approx_tokens_from_chars(reply)
     if provider_label.startswith("OpenAI"):
-        # Example: blended $7.5 / 1M tokens
-        return round(toks / 1_000_000.0 * 7.5, 4)
-    # Google/Gemini placeholder: $5 / 1M tokens
-    return round(toks / 1_000_000.0 * 5.0, 4)
 # -----------------------------
@@ -102,21 +95,36 @@ def call_openai_chat(
     max_tokens: int,
 ) -> str:
     """
-    Calls OpenAI Chat Completions. History uses OpenAI role/content format.
     """
     OpenAI = _lazy_import_openai()
     client = OpenAI(api_key=api_key)
-    messages = [{"role": "system", "content": system_prompt.strip() or SYSTEM_DEFAULT}]
-    messages.extend(history_messages)
     messages.append({"role": "user", "content": user_message})
-    resp = client.chat.completions.create(
         model=(model.strip() or DEFAULT_OPENAI_MODEL),
         messages=messages,
         temperature=float(temperature),
-        max_tokens=int(max_tokens),
     )
     return resp.choices[0].message.content
@@ -128,9 +136,6 @@ def call_gemini_generate(
     image=None,
     temperature: float = 0.4,
 ) -> str:
-    """
-    Calls Gemini/Nano-Banana. Supports optional image (PIL) as part.
-    """
     genai = _lazy_import_gemini()
     genai.configure(api_key=api_key)
@@ -150,22 +155,17 @@ def call_gemini_generate(
     parts: List[Any] = [user_message or ""]
     if image is not None:
-        # google-generativeai accepts PIL image directly as a part
         parts.append(image)
     resp = model_obj.generate_content(parts)
-    # Prefer .text if available
     if hasattr(resp, "text") and resp.text:
         return resp.text
-    # Fallback: candidates/parts
     cand = getattr(resp, "candidates", None)
     if cand and getattr(cand[0], "content", None):
         parts = getattr(cand[0].content, "parts", None)
         if parts and hasattr(parts[0], "text"):
             return parts[0].text
     return "(No response text returned.)"
@@ -173,9 +173,6 @@ def call_gemini_generate(
 # Orchestration
 # -----------------------------
 def to_openai_history(gradio_history: List[Tuple[str, str]]) -> List[Dict[str, str]]:
-    """
-    Convert Gradio Chatbot history ([(user, assistant), ...]) to OpenAI messages.
-    """
     oai: List[Dict[str, str]] = []
     for user_msg, ai_msg in gradio_history or []:
         if user_msg:
@@ -197,9 +194,6 @@ def infer(
     max_tokens: int,
     history: List[Tuple[str, str]],
 ):
-    """
-    Main entry: routes to the chosen provider, returns updated chat, latency, cost.
-    """
     if not (user_message and user_message.strip()):
         raise gr.Error("Please enter a prompt (or pick a starter prompt).")
     if is_blocked(user_message):
@@ -296,7 +290,7 @@ with gr.Blocks(fill_height=True, theme=gr.themes.Soft(), title="ZEN Dual-Engine
             )
             with gr.Row():
                 temperature = gr.Slider(0.0, 1.0, value=0.5, step=0.05, label="Temperature")
-                max_tokens = gr.Slider(128, 4096, value=1024, step=64, label="Max tokens (OpenAI path)")
             with gr.Row():
                 send = gr.Button("🚀 Generate", variant="primary")
@@ -306,7 +300,7 @@ with gr.Blocks(fill_height=True, theme=gr.themes.Soft(), title="ZEN Dual-Engine
             chat = gr.Chatbot(
                 label="Conversation",
                 height=420,
-                type="messages",   # avoids deprecation in Gradio 5.x
                 avatar_images=(None, None),
             )
@@ -358,6 +352,6 @@ with gr.Blocks(fill_height=True, theme=gr.themes.Soft(), title="ZEN Dual-Engine
         return [], 0, 0.0, None, ""
     clear.click(on_clear, outputs=[chat, latency, cost, image, user_message])
-# Main (patched: no concurrency_count in Gradio 5.49.1)
 if __name__ == "__main__":
     demo.queue(max_size=64).launch()

 # -----------------------------
+# Lazy Imports
 # -----------------------------
 def _lazy_import_openai():
     try:
 # -----------------------------
+# Utils
 # -----------------------------
 def is_blocked(text: str) -> bool:
     if not text:
 def pil_to_base64(image) -> str:
     buffer = BytesIO()
     image.convert("RGB").save(buffer, format="JPEG", quality=92)
     return base64.b64encode(buffer.getvalue()).decode("utf-8")
 def estimate_cost(provider_label: str, model: str, prompt: str, reply: str) -> float:
     toks = approx_tokens_from_chars(prompt) + approx_tokens_from_chars(reply)
     if provider_label.startswith("OpenAI"):
+        return round(toks / 1_000_000.0 * 7.5, 4)  # illustrative
+    return round(toks / 1_000_000.0 * 5.0, 4)      # illustrative
 # -----------------------------
     max_tokens: int,
 ) -> str:
     """
+    Calls OpenAI Chat Completions. Auto-switches from `max_tokens` to
+    `max_completion_tokens` if the model requires it.
     """
     OpenAI = _lazy_import_openai()
     client = OpenAI(api_key=api_key)
+    messages = [{"role": "system", "content": (system_prompt.strip() or SYSTEM_DEFAULT)}]
+    messages.extend(history_messages or [])
     messages.append({"role": "user", "content": user_message})
+    kwargs = dict(
         model=(model.strip() or DEFAULT_OPENAI_MODEL),
         messages=messages,
         temperature=float(temperature),
     )
+    # First try with legacy param
+    try:
+        kwargs["max_tokens"] = int(max_tokens)
+        resp = client.chat.completions.create(**kwargs)
+    except Exception as e:
+        msg = str(e)
+        # Auto-retry with new param when model rejects max_tokens
+        if "max_tokens" in msg and ("max_completion_tokens" in msg or "Unsupported parameter" in msg):
+            kwargs.pop("max_tokens", None)
+            kwargs["max_completion_tokens"] = int(max_tokens)
+            resp = client.chat.completions.create(**kwargs)
+        else:
+            raise
     return resp.choices[0].message.content
     image=None,
     temperature: float = 0.4,
 ) -> str:
     genai = _lazy_import_gemini()
     genai.configure(api_key=api_key)
     parts: List[Any] = [user_message or ""]
     if image is not None:
         parts.append(image)
     resp = model_obj.generate_content(parts)
     if hasattr(resp, "text") and resp.text:
         return resp.text
     cand = getattr(resp, "candidates", None)
     if cand and getattr(cand[0], "content", None):
         parts = getattr(cand[0].content, "parts", None)
         if parts and hasattr(parts[0], "text"):
             return parts[0].text
     return "(No response text returned.)"
 # Orchestration
 # -----------------------------
 def to_openai_history(gradio_history: List[Tuple[str, str]]) -> List[Dict[str, str]]:
     oai: List[Dict[str, str]] = []
     for user_msg, ai_msg in gradio_history or []:
         if user_msg:
     max_tokens: int,
     history: List[Tuple[str, str]],
 ):
     if not (user_message and user_message.strip()):
         raise gr.Error("Please enter a prompt (or pick a starter prompt).")
     if is_blocked(user_message):
             )
             with gr.Row():
                 temperature = gr.Slider(0.0, 1.0, value=0.5, step=0.05, label="Temperature")
+                max_tokens = gr.Slider(128, 4096, value=1024, step=64, label="Max completion tokens (OpenAI path)")
             with gr.Row():
                 send = gr.Button("🚀 Generate", variant="primary")
             chat = gr.Chatbot(
                 label="Conversation",
                 height=420,
+                type="messages",
                 avatar_images=(None, None),
             )
         return [], 0, 0.0, None, ""
     clear.click(on_clear, outputs=[chat, latency, cost, image, user_message])
+# Main
 if __name__ == "__main__":
     demo.queue(max_size=64).launch()