Spaces:

LongeneckerPMO
/

openi_test

Sleeping

resumesearch commited on Jun 19, 2025

Commit

98c7bc9

verified ·

1 Parent(s): 260eb5d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -70,6 +70,7 @@ def token_cost(model: str, p: int, c: int) -> float:
 # ────────────────────────────────
 def safe_chat_stream(convo: list[dict], max_ctx: int, max_rep: int, models: list[str]):
     last_exc = None
     for m in models:
         try:
@@ -84,12 +85,20 @@ def safe_chat_stream(convo: list[dict], max_ctx: int, max_rep: int, models: list
             for chunk in stream:
                 delta = chunk.choices[0].delta.content or ""
                 reply += delta
-                yield reply, None
-            usage = client.chat.completions.create(
-                model=m,
-                messages=convo + [{"role": "assistant", "content": reply}],
-                max_tokens=0,
-            ).usage
             yield reply, usage
             return
         except Exception as e:

 # ────────────────────────────────
 def safe_chat_stream(convo: list[dict], max_ctx: int, max_rep: int, models: list[str]):
+    """Stream reply; after completion return usage safely (avoids max_tokens=0 bug)."""
     last_exc = None
     for m in models:
         try:
             for chunk in stream:
                 delta = chunk.choices[0].delta.content or ""
                 reply += delta
+                yield reply, None  # still streaming
+            # --- Retrieve usage tokens in a way that never requests max_tokens=0 ---
+            try:
+                usage_resp = client.chat.completions.create(
+                    model=m,
+                    messages=convo + [{"role": "assistant", "content": reply}],
+                    max_tokens=1,             # 0 can trigger 400 on some models/tiers
+                    temperature=0,
+                )
+                usage = usage_resp.usage
+            except Exception:
+                # fallback: estimate usage roughly if call above fails
+                usage = None
             yield reply, usage
             return
         except Exception as e: