resumesearch commited on
Commit
cd848e7
Β·
verified Β·
1 Parent(s): 98c7bc9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +164 -77
app.py CHANGED
@@ -4,10 +4,11 @@ import tiktoken
4
  import gradio as gr
5
  from openai import OpenAI
6
 
7
- """
8
- CodeBot – Streaming Coding Assistant (Polished UX)
9
  -------------------------------------------------
10
- β€’ OpenAI Python SDK β‰₯β€―1.0.0 β€’ GradioΒ β‰₯β€―5.34.1 β€’ tiktoken
 
 
11
 
12
  This version keeps every original feature **without breaking** behaviour, then layers:
13
  – OpenAI streaming
@@ -15,68 +16,101 @@ This version keeps every original feature **without breaking** behaviour, then l
15
  – Advanced‑settings accordion + dark‑mode toggle
16
  – Queue & rate‑limit safety
17
  – Optional file‑upload support
18
- All additions are strictly additiveβ€”comment them out and the legacy path still runs.
 
19
  """
20
 
21
  # ────────────────────────────────
22
- # 1Β Β·Β InitialisationΒ &Β constants
23
  # ────────────────────────────────
 
24
  client = OpenAI(api_key=os.getenv("OPENAI_API_KEY", "").strip())
25
 
26
- _env_models = os.getenv("OPENAI_MODEL_LIST", "gpt-4-32k,gpt-4,gpt-3.5-turbo")
27
- ALL_MODELS: list[str] = [m.strip() for m in _env_models.split(",") if m.strip()]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
- DEFAULT_MAX_CONTEXT = 32_768 # tokens
 
 
 
30
  BUFFER_TOKENS = 500 # reserve for model reply
31
  DEFAULT_REPLY_MAX = 2_048 # tokens
32
  TEMPERATURE = 0.3
33
 
34
- # Rough pricing map (USD /β€―1β€―000β€―tokens)
35
- PRICES = {
36
- "gpt-4-32k": (0.01, 0.03),
37
- "gpt-4": (0.03, 0.06),
38
- "gpt-3.5-turbo": (0.001, 0.002),
39
- }
40
-
41
  # ────────────────────────────────
42
- # 2Β Β·Β Helpers
43
  # ────────────────────────────────
 
44
  @functools.lru_cache(maxsize=128)
45
  def count_tokens(text: str, model: str) -> int:
46
- enc = tiktoken.encoding_for_model(model)
 
 
 
 
 
47
  return len(enc.encode(text))
48
 
49
-
50
  def trim_conversation(convo: list[dict], model: str, max_context: int) -> list[dict]:
51
- kept = [convo[0]]
52
  total = count_tokens(convo[0]["content"], model)
53
- for msg in reversed(convo[1:]):
 
54
  t = count_tokens(msg["content"], model)
 
55
  if total + t + BUFFER_TOKENS > max_context:
56
  break
57
- kept.insert(1, msg)
58
  total += t
59
  return kept
60
 
61
-
62
  def token_cost(model: str, p: int, c: int) -> float:
63
- if model not in PRICES:
 
64
  return 0.0
65
- return round(((p * PRICES[model][0]) + (c * PRICES[model][1])) / 1000, 4)
66
-
67
 
68
  # ────────────────────────────────
69
- # 3Β Β·Β OpenAI helpers (streaming)
70
  # ────────────────────────────────
71
-
72
  def safe_chat_stream(convo: list[dict], max_ctx: int, max_rep: int, models: list[str]):
73
  """Stream reply; after completion return usage safely (avoids max_tokens=0 bug)."""
74
  last_exc = None
75
  for m in models:
76
  try:
 
 
 
 
77
  stream = client.chat.completions.create(
78
  model=m,
79
- messages=convo,
80
  max_tokens=max_rep,
81
  temperature=TEMPERATURE,
82
  stream=True,
@@ -85,48 +119,58 @@ def safe_chat_stream(convo: list[dict], max_ctx: int, max_rep: int, models: list
85
  for chunk in stream:
86
  delta = chunk.choices[0].delta.content or ""
87
  reply += delta
88
- yield reply, None # still streaming
89
 
90
  # --- Retrieve usage tokens in a way that never requests max_tokens=0 ---
91
  try:
 
 
 
92
  usage_resp = client.chat.completions.create(
93
  model=m,
94
- messages=convo + [{"role": "assistant", "content": reply}],
95
- max_tokens=1, # 0 can trigger 400 on some models/tiers
96
  temperature=0,
97
  )
98
  usage = usage_resp.usage
99
  except Exception:
100
  # fallback: estimate usage roughly if call above fails
101
- usage = None
102
- yield reply, usage
 
 
 
 
103
  return
104
  except Exception as e:
105
  msg = str(e).lower()
106
  if "context length" in msg:
107
- convo = trim_conversation(convo, m, max_ctx)
108
- continue
 
 
109
  if "model_not_found" in msg or "does not exist" in msg or "404" in msg:
110
  last_exc = e
111
- continue
112
  last_exc = e
113
- break
114
- raise last_exc or RuntimeError("All models failed")
 
115
 
116
 
117
  # ────────────────────────────────
118
- # 4Β Β·Β Gradio generators
119
  # ────────────────────────────────
120
-
121
  def chat_stream(user_msg: str, hist: list[tuple[str, str]], sys_prompt: str, sel_model: str, ctx: int, rep: int):
122
  user_msg = (user_msg or "").strip()
123
  if not user_msg:
124
- yield hist, ""
125
  return
 
126
  if not client.api_key:
127
  hist = hist or []
128
- hist.append((user_msg, "❌ OPENAI_API_KEY not set."))
129
- yield hist, ""
130
  return
131
 
132
  convo = [{"role": "system", "content": sys_prompt}]
@@ -136,76 +180,119 @@ def chat_stream(user_msg: str, hist: list[tuple[str, str]], sys_prompt: str, sel
136
  convo.append({"role": "user", "content": user_msg})
137
 
138
  hist = hist or []
139
- hist.append((user_msg, ""))
140
- yield hist, ""
141
 
142
- models = [sel_model] + [m for m in ALL_MODELS if m != sel_model]
 
143
 
 
144
  try:
145
- acc, usage_final = "", None
146
- for part, usage in safe_chat_stream(convo, ctx, rep, models):
 
 
 
147
  acc = part
148
  hist[-1] = (user_msg, acc)
149
  if usage:
150
  usage_final = usage
151
- yield hist, ""
 
 
152
  if usage_final:
153
  pt, ct = usage_final.prompt_tokens, usage_final.completion_tokens
154
- cost = token_cost(sel_model, pt, ct)
155
- meta = f"\n\n---\nπŸ”’Β {pt+ct} tokens (prompt {pt} / completion {ct}) Β· πŸ’²{cost} USD"
156
  hist[-1] = (user_msg, acc + meta)
157
- yield hist, ""
 
 
 
158
  except Exception as e:
159
  hist[-1] = (user_msg, f"❌ OpenAI error: {e}")
160
- yield hist, ""
161
-
162
 
163
  def clear_chat():
164
- return []
165
-
166
 
167
  # ────────────────────────────────
168
- # 5Β Β·Β UI
169
  # ────────────────────────────────
170
- with gr.Blocks(title="πŸ€–Β CodeBot", theme=gr.themes.Soft()) as demo:
171
-
172
  gr.HTML("""
173
  <script>document.addEventListener('keydown',e=>{if(e.key==='d'&&e.ctrlKey){document.documentElement.classList.toggle('dark');}});</script>
174
  """)
 
175
 
176
- gr.Markdown("""## CodeBot – Ask me about Python, C#, SQL …""")
 
177
 
178
- with gr.Accordion("Advanced β–Ύ", open=False):
179
  with gr.Row():
180
- mdl = gr.Dropdown(ALL_MODELS, value=ALL_MODELS[0], label="Model")
181
- ctx_s = gr.Slider(1000, DEFAULT_MAX_CONTEXT, step=256, value=DEFAULT_MAX_CONTEXT, label="Max context")
182
- rep_s = gr.Slider(100, 8192, step=100, value=DEFAULT_REPLY_MAX, label="Max reply")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
 
184
  ex_list = [
185
  "How do I implement quicksort in Python?",
186
  "Show me a C# LINQ group-by example.",
187
  "Explain async/await in Python.",
 
 
188
  ]
189
  with gr.Row():
190
- ex_drop = gr.Dropdown(ex_list, label="Examples")
191
- ex_btn = gr.Button("Load")
192
-
193
- sys_txt = gr.Textbox("You are CodeBot, an expert software engineer …", lines=3, label="System prompt")
194
-
195
- chat = gr.Chatbot(value=[("", "πŸ‘‹Β Hello!Β I'mΒ CodeBot.")], label="Conversation", height=500)
196
-
 
 
 
197
  with gr.Row():
198
- usr_in = gr.Textbox(placeholder="Ask me anything…", show_label=False)
199
  send = gr.Button("Send", variant="primary")
200
- clr = gr.Button("Clear", variant="secondary")
201
 
202
  ex_btn.click(lambda q: q or "", inputs=ex_drop, outputs=usr_in)
203
-
204
- send.click(chat_stream, inputs=[usr_in, chat, sys_txt, mdl, ctx_s, rep_s], outputs=[chat, usr_in])
205
- clr.click(clear_chat, outputs=chat)
206
 
207
  # Queue for concurrency safety (comment out if unused)
208
  demo.queue(max_size=32, default_concurrency_limit=int(os.getenv("CODEBOT_CONCURRENCY", "2")))
209
 
210
  if __name__ == "__main__":
211
- demo.launch()
 
4
  import gradio as gr
5
  from openai import OpenAI
6
 
7
+ """CodeBot – Streaming Coding Assistant (Polished UX)
 
8
  -------------------------------------------------
9
+ β€’ OpenAI Python SDK β‰₯β€―1.0.0
10
+ β€’ Gradioβ€―β‰₯β€―5.34.1
11
+ β€’ tiktoken
12
 
13
  This version keeps every original feature **without breaking** behaviour, then layers:
14
  – OpenAI streaming
 
16
  – Advanced‑settings accordion + dark‑mode toggle
17
  – Queue & rate‑limit safety
18
  – Optional file‑upload support
19
+ – **Improved UI clarity for model selection and status**
20
+ - **Updated to include smarter OpenAI models**
21
  """
22
 
23
  # ────────────────────────────────
24
+ # 1 Β· Initialisation & constants
25
  # ────────────────────────────────
26
+
27
  client = OpenAI(api_key=os.getenv("OPENAI_API_KEY", "").strip())
28
 
29
+ # Define model details including pricing and max context
30
+ # Refer to OpenAI's official pricing and model docs for the most current information:
31
+ # https://platform.openai.com/docs/models/overview
32
+ # https://openai.com/api/pricing/
33
+ MODEL_DETAILS = {
34
+ # GPT-4o family (latest and generally recommended for most tasks)
35
+ "gpt-4o": {"input_price": 5.00, "output_price": 15.00, "max_context": 128_000}, # Corrected pricing based on up-to-date info, assuming text only for simplicity
36
+ "gpt-4o-mini": {"input_price": 0.15, "output_price": 0.60, "max_context": 128_000},
37
+
38
+ # Reasoning models (good for complex logic, coding, math)
39
+ "o3": {"input_price": 2.00, "output_price": 8.00, "max_context": 200_000},
40
+ "o3-pro": {"input_price": 20.00, "output_price": 80.00, "max_context": 200_000},
41
+ "o4-mini": {"input_price": 1.10, "output_price": 4.40, "max_context": 200_000},
42
+
43
+ # Older GPT-4 models (still available but consider migrating to -4o)
44
+ "gpt-4-32k": {"input_price": 0.03, "output_price": 0.06, "max_context": 32_768},
45
+ "gpt-4": {"input_price": 0.03, "output_price": 0.06, "max_context": 8_192}, # Price here may be for older versions, current GPT-4 Turbo is usually cheaper
46
+ "gpt-3.5-turbo": {"input_price": 0.001, "output_price": 0.002, "max_context": 16_385},
47
+ }
48
+
49
+ # Ensure models from environment variable are prioritized if set, otherwise use a default sensible list
50
+ _env_models = os.getenv("OPENAI_MODEL_LIST", "gpt-4o,gpt-4o-mini,o3,gpt-3.5-turbo")
51
+ ALL_MODELS: list[str] = [m.strip() for m in _env_models.split(",") if m.strip() and m.strip() in MODEL_DETAILS]
52
+
53
+ # Add any models from MODEL_DETAILS that weren't in the env variable, ensuring no duplicates
54
+ for model in MODEL_DETAILS:
55
+ if model not in ALL_MODELS:
56
+ ALL_MODELS.append(model)
57
 
58
+ if not ALL_MODELS:
59
+ ALL_MODELS = list(MODEL_DETAILS.keys()) # Fallback if env variable is empty or invalid
60
+
61
+ DEFAULT_MAX_CONTEXT = MODEL_DETAILS.get(ALL_MODELS[0], {}).get("max_context", 128_000)
62
  BUFFER_TOKENS = 500 # reserve for model reply
63
  DEFAULT_REPLY_MAX = 2_048 # tokens
64
  TEMPERATURE = 0.3
65
 
 
 
 
 
 
 
 
66
  # ────────────────────────────────
67
+ # 2 Β· Helpers
68
  # ────────────────────────────────
69
+
70
  @functools.lru_cache(maxsize=128)
71
  def count_tokens(text: str, model: str) -> int:
72
+ try:
73
+ enc = tiktoken.encoding_for_model(model)
74
+ except KeyError:
75
+ # Fallback for models not directly supported by tiktoken (e.g., brand new ones)
76
+ # Use a common encoding like 'cl100k_base' or raise an error if strictness is needed.
77
+ enc = tiktoken.get_encoding("cl100k_base")
78
  return len(enc.encode(text))
79
 
 
80
  def trim_conversation(convo: list[dict], model: str, max_context: int) -> list[dict]:
81
+ kept = [convo[0]] # Always keep the system prompt
82
  total = count_tokens(convo[0]["content"], model)
83
+
84
+ for msg in reversed(convo[1:]): # Iterate from most recent user/assistant messages
85
  t = count_tokens(msg["content"], model)
86
+ # Check if adding this message exceeds context, reserving buffer for reply
87
  if total + t + BUFFER_TOKENS > max_context:
88
  break
89
+ kept.insert(1, msg) # Insert at position 1 to maintain chronological order after system prompt
90
  total += t
91
  return kept
92
 
 
93
  def token_cost(model: str, p: int, c: int) -> float:
94
+ details = MODEL_DETAILS.get(model)
95
+ if not details:
96
  return 0.0
97
+ return round(((p * details["input_price"]) + (c * details["output_price"])) / 1_000_000, 6) # Corrected to per 1M tokens
 
98
 
99
  # ────────────────────────────────
100
+ # 3 Β· OpenAI helpers (streaming)
101
  # ────────────────────────────────
 
102
  def safe_chat_stream(convo: list[dict], max_ctx: int, max_rep: int, models: list[str]):
103
  """Stream reply; after completion return usage safely (avoids max_tokens=0 bug)."""
104
  last_exc = None
105
  for m in models:
106
  try:
107
+ # Ensure the selected model is valid and its max context is used
108
+ current_model_max_context = MODEL_DETAILS.get(m, {}).get("max_context", max_ctx)
109
+ trimmed_convo = trim_conversation(convo, m, current_model_max_context)
110
+
111
  stream = client.chat.completions.create(
112
  model=m,
113
+ messages=trimmed_convo,
114
  max_tokens=max_rep,
115
  temperature=TEMPERATURE,
116
  stream=True,
 
119
  for chunk in stream:
120
  delta = chunk.choices[0].delta.content or ""
121
  reply += delta
122
+ yield reply, None, m # Yield reply, None for usage, and the model name while streaming
123
 
124
  # --- Retrieve usage tokens in a way that never requests max_tokens=0 ---
125
  try:
126
+ # To get accurate usage, ideally you'd send the full conversation + reply back
127
+ # This call is mainly to get token usage if the stream doesn't provide it directly
128
+ # (some newer SDK versions might have it on stream.usage)
129
  usage_resp = client.chat.completions.create(
130
  model=m,
131
+ messages=trimmed_convo + [{"role": "assistant", "content": reply}],
132
+ max_tokens=1, # 0 can trigger 400 on some models/tiers
133
  temperature=0,
134
  )
135
  usage = usage_resp.usage
136
  except Exception:
137
  # fallback: estimate usage roughly if call above fails
138
+ # This estimation is crude but better than nothing
139
+ prompt_tokens_est = count_tokens(" ".join([msg["content"] for msg in trimmed_convo]), m)
140
+ completion_tokens_est = count_tokens(reply, m)
141
+ usage = type('obj', (object,), {'prompt_tokens': prompt_tokens_est, 'completion_tokens': completion_tokens_est})()
142
+
143
+ yield reply, usage, m # Yield final reply, usage, and the model name
144
  return
145
  except Exception as e:
146
  msg = str(e).lower()
147
  if "context length" in msg:
148
+ # If context length error, try trimming more aggressively or try next model
149
+ convo = trim_conversation(convo, m, max_ctx * 0.8) # Try 80% of max context
150
+ last_exc = e
151
+ continue # Try the same model again with more aggressive trimming
152
  if "model_not_found" in msg or "does not exist" in msg or "404" in msg:
153
  last_exc = e
154
+ continue # Try the next model in the list
155
  last_exc = e
156
+ break # For other errors, break and re-raise
157
+
158
+ raise last_exc or RuntimeError("All models failed or an unexpected error occurred.")
159
 
160
 
161
  # ────────────────────────────────
162
+ # 4 Β· Gradio generators
163
  # ────────────────────────────────
 
164
  def chat_stream(user_msg: str, hist: list[tuple[str, str]], sys_prompt: str, sel_model: str, ctx: int, rep: int):
165
  user_msg = (user_msg or "").strip()
166
  if not user_msg:
167
+ yield hist, "", "Please enter a message.", "" # Clear user input and show message
168
  return
169
+
170
  if not client.api_key:
171
  hist = hist or []
172
+ hist.append((user_msg, "❌ OPENAI_API_KEY not set. Please set your API key in environment variables."))
173
+ yield hist, "", "API Key Not Set", ""
174
  return
175
 
176
  convo = [{"role": "system", "content": sys_prompt}]
 
180
  convo.append({"role": "user", "content": user_msg})
181
 
182
  hist = hist or []
183
+ hist.append((user_msg, "")) # Append user message, assistant's reply will be filled in
 
184
 
185
+ status_message = f"Using model: **{sel_model}**"
186
+ yield hist, "", status_message, "" # Update status immediately
187
 
188
+ models_to_try = [sel_model] + [m for m in ALL_MODELS if m != sel_model]
189
  try:
190
+ acc = ""
191
+ usage_final = None
192
+ used_model = sel_model # Store the actual model that succeeded
193
+
194
+ for part, usage, model_name in safe_chat_stream(convo, ctx, rep, models_to_try):
195
  acc = part
196
  hist[-1] = (user_msg, acc)
197
  if usage:
198
  usage_final = usage
199
+ used_model = model_name # Update to the actual model that generated the response
200
+ yield hist, "", f"Using model: **{used_model}**", "" # Continuously update status
201
+
202
  if usage_final:
203
  pt, ct = usage_final.prompt_tokens, usage_final.completion_tokens
204
+ cost = token_cost(used_model, pt, ct)
205
+ meta = f"\n\n---\nπŸ”’ {pt+ct} tokens (prompt {pt} / completion {ct}) Β· πŸ’²{cost:.6f} USD"
206
  hist[-1] = (user_msg, acc + meta)
207
+ yield hist, "", f"Completed with model: **{used_model}** {meta}", ""
208
+ else:
209
+ yield hist, "", f"Completed with model: **{used_model}** (Usage details not available)", ""
210
+
211
  except Exception as e:
212
  hist[-1] = (user_msg, f"❌ OpenAI error: {e}")
213
+ yield hist, "", f"Error with model: **{sel_model}** - {e}", ""
 
214
 
215
  def clear_chat():
216
+ return [], "", "", "" # Also clear status and user input
 
217
 
218
  # ────────────────────────────────
219
+ # 5 Β· UI
220
  # ────────────────────────────────
221
+ with gr.Blocks(title="πŸ€– CodeBot", theme=gr.themes.Soft()) as demo:
 
222
  gr.HTML("""
223
  <script>document.addEventListener('keydown',e=>{if(e.key==='d'&&e.ctrlKey){document.documentElement.classList.toggle('dark');}});</script>
224
  """)
225
+ gr.Markdown("## CodeBot – Ask me about Python, C#, SQL …")
226
 
227
+ # Status message display
228
+ status_display = gr.Markdown(value="Ready.", elem_id="status_display")
229
 
230
+ with gr.Accordion("Advanced Settings β–Ύ", open=False):
231
  with gr.Row():
232
+ mdl = gr.Dropdown(
233
+ ALL_MODELS,
234
+ value=ALL_MODELS[0],
235
+ label="Model",
236
+ info="Select the OpenAI model to use for generation."
237
+ )
238
+ # Dynamically update max context slider based on selected model
239
+ ctx_s = gr.Slider(
240
+ minimum=1000,
241
+ maximum=max(mdl_data["max_context"] for mdl_data in MODEL_DETAILS.values()),
242
+ step=256,
243
+ value=DEFAULT_MAX_CONTEXT,
244
+ label="Max Context Tokens",
245
+ info="Maximum number of tokens for the entire conversation context (history + current message)."
246
+ )
247
+ rep_s = gr.Slider(
248
+ minimum=100,
249
+ maximum=4096, # Set a reasonable max reply limit, avoid setting it to full context
250
+ step=100,
251
+ value=DEFAULT_REPLY_MAX,
252
+ label="Max Reply Tokens",
253
+ info="Maximum number of tokens the model will generate in its response."
254
+ )
255
+
256
+ # Function to update max context slider based on dropdown selection
257
+ def update_max_context_slider(selected_model):
258
+ return MODEL_DETAILS.get(selected_model, {}).get("max_context", DEFAULT_MAX_CONTEXT)
259
+
260
+ mdl.change(
261
+ fn=update_max_context_slider,
262
+ inputs=mdl,
263
+ outputs=ctx_s
264
+ )
265
 
266
  ex_list = [
267
  "How do I implement quicksort in Python?",
268
  "Show me a C# LINQ group-by example.",
269
  "Explain async/await in Python.",
270
+ "What are the key differences between SQL and NoSQL databases?",
271
+ "Write a simple 'Hello, World!' program in Rust."
272
  ]
273
  with gr.Row():
274
+ ex_drop = gr.Dropdown(ex_list, label="Examples", info="Quickly load a common coding query.")
275
+ ex_btn = gr.Button("Load Example")
276
+
277
+ sys_txt = gr.Textbox(
278
+ "You are CodeBot, an expert software engineer specializing in Python, C#, and SQL. Provide clear, concise, and accurate code examples and explanations. Always consider context and best practices.",
279
+ lines=3,
280
+ label="System Prompt",
281
+ info="This prompt guides the AI's behavior and personality. Adjust it for different roles."
282
+ )
283
+ chat = gr.Chatbot(value=[("", "πŸ‘‹ Hello! I'm CodeBot. How can I help you today?")], label="Conversation", height=500)
284
  with gr.Row():
285
+ usr_in = gr.Textbox(placeholder="Ask me anything…", show_label=False, container=False)
286
  send = gr.Button("Send", variant="primary")
287
+ clr = gr.Button("Clear Chat", variant="secondary")
288
 
289
  ex_btn.click(lambda q: q or "", inputs=ex_drop, outputs=usr_in)
290
+ send.click(chat_stream, inputs=[usr_in, chat, sys_txt, mdl, ctx_s, rep_s], outputs=[chat, usr_in, status_display])
291
+ usr_in.submit(chat_stream, inputs=[usr_in, chat, sys_txt, mdl, ctx_s, rep_s], outputs=[chat, usr_in, status_display]) # Allow pressing Enter
292
+ clr.click(clear_chat, outputs=[chat, usr_in, status_display, ex_drop]) # Clear examples dropdown too for full reset
293
 
294
  # Queue for concurrency safety (comment out if unused)
295
  demo.queue(max_size=32, default_concurrency_limit=int(os.getenv("CODEBOT_CONCURRENCY", "2")))
296
 
297
  if __name__ == "__main__":
298
+ demo.launch()