resumesearch commited on
Commit
98c7bc9
Β·
verified Β·
1 Parent(s): 260eb5d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -6
app.py CHANGED
@@ -70,6 +70,7 @@ def token_cost(model: str, p: int, c: int) -> float:
70
  # ────────────────────────────────
71
 
72
  def safe_chat_stream(convo: list[dict], max_ctx: int, max_rep: int, models: list[str]):
 
73
  last_exc = None
74
  for m in models:
75
  try:
@@ -84,12 +85,20 @@ def safe_chat_stream(convo: list[dict], max_ctx: int, max_rep: int, models: list
84
  for chunk in stream:
85
  delta = chunk.choices[0].delta.content or ""
86
  reply += delta
87
- yield reply, None
88
- usage = client.chat.completions.create(
89
- model=m,
90
- messages=convo + [{"role": "assistant", "content": reply}],
91
- max_tokens=0,
92
- ).usage
 
 
 
 
 
 
 
 
93
  yield reply, usage
94
  return
95
  except Exception as e:
 
70
  # ────────────────────────────────
71
 
72
  def safe_chat_stream(convo: list[dict], max_ctx: int, max_rep: int, models: list[str]):
73
+ """Stream reply; after completion return usage safely (avoids max_tokens=0 bug)."""
74
  last_exc = None
75
  for m in models:
76
  try:
 
85
  for chunk in stream:
86
  delta = chunk.choices[0].delta.content or ""
87
  reply += delta
88
+ yield reply, None # still streaming
89
+
90
+ # --- Retrieve usage tokens in a way that never requests max_tokens=0 ---
91
+ try:
92
+ usage_resp = client.chat.completions.create(
93
+ model=m,
94
+ messages=convo + [{"role": "assistant", "content": reply}],
95
+ max_tokens=1, # 0 can trigger 400 on some models/tiers
96
+ temperature=0,
97
+ )
98
+ usage = usage_resp.usage
99
+ except Exception:
100
+ # fallback: estimate usage roughly if call above fails
101
+ usage = None
102
  yield reply, usage
103
  return
104
  except Exception as e: