resumesearch commited on
Commit
260eb5d
Β·
verified Β·
1 Parent(s): 487cc1b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +101 -188
app.py CHANGED
@@ -3,287 +3,200 @@ import functools
3
  import tiktoken
4
  import gradio as gr
5
  from openai import OpenAI
6
- from datetime import datetime
7
 
8
  """
9
  CodeBot – Streaming Coding Assistant (Polished UX)
10
  -------------------------------------------------
11
- β€’ OpenAI Python SDK β‰₯β€―1.0.0 β€’β€―Gradio β‰₯β€―5.34.1 β€’β€―tiktoken
12
- This refactor keeps every original feature **without breaking** behaviour, then layers:
 
13
  – OpenAI streaming
14
  – Token/cost telemetry
15
- – Advanced‑settings accordion + theme + dark‑mode toggle
16
  – Queue & rate‑limit safety
17
- – File‑upload support
18
- All changes are additive; if a new feature fails, the legacy path still executes.
19
  """
20
 
21
  # ────────────────────────────────
22
- # 1. Initialisation & Constants
23
  # ────────────────────────────────
24
  client = OpenAI(api_key=os.getenv("OPENAI_API_KEY", "").strip())
25
 
26
- # Environment‑configured model list (fallback order)
27
  _env_models = os.getenv("OPENAI_MODEL_LIST", "gpt-4-32k,gpt-4,gpt-3.5-turbo")
28
  ALL_MODELS: list[str] = [m.strip() for m in _env_models.split(",") if m.strip()]
29
 
30
- # Defaults (can be overridden via sliders)
31
- DEFAULT_MAX_CONTEXT = 32_768 # tokens
32
- BUFFER_TOKENS = 500 # reserved for model reply
33
- DEFAULT_REPLY_MAX = 2_048 # tokens
34
  TEMPERATURE = 0.3
35
 
36
- # Simple price map (USD per 1K tokens) – update as needed
37
  PRICES = {
38
- "gpt-4-32k": (0.01, 0.03), # (prompt, completion)
39
- "gpt-4": (0.03, 0.06),
40
- "gpt-3.5-turbo": (0.001, 0.002)
41
  }
42
 
43
  # ────────────────────────────────
44
- # 2. Helpers
45
  # ────────────────────────────────
46
  @functools.lru_cache(maxsize=128)
47
  def count_tokens(text: str, model: str) -> int:
48
- """Fast token counter with tiny LRU cache."""
49
  enc = tiktoken.encoding_for_model(model)
50
  return len(enc.encode(text))
51
 
52
 
53
  def trim_conversation(convo: list[dict], model: str, max_context: int) -> list[dict]:
54
- """Sliding‑window trim that removes just enough oldest messages."""
55
- running_total = 0
56
- kept: list[dict] = []
57
- # Always keep system prompt (index 0)
58
- kept.append(convo[0])
59
- running_total += count_tokens(convo[0]["content"], model)
60
-
61
- # Add from the end backwards until full
62
  for msg in reversed(convo[1:]):
63
- msg_toks = count_tokens(msg["content"], model)
64
- if running_total + msg_toks + BUFFER_TOKENS > max_context:
65
  break
66
- kept.insert(1, msg) # preserve order after system prompt
67
- running_total += msg_toks
68
  return kept
69
 
70
 
71
- def token_cost(model: str, prompt_toks: int, completion_toks: int) -> float:
72
  if model not in PRICES:
73
  return 0.0
74
- p_prompt, p_completion = PRICES[model]
75
- return round((prompt_toks * p_prompt + completion_toks * p_completion) / 1000, 4)
76
 
77
 
78
  # ────────────────────────────────
79
- # 3. OpenAI call helpers (sync + streaming)
80
  # ────────────────────────────────
81
 
82
- def safe_chat_stream(convo: list[dict], max_context: int, max_reply: int, models: list[str]):
83
- """Generator yielding (reply_so_far, usage_dict, finished) tuples."""
84
  last_exc = None
85
- for model in models:
86
  try:
87
- # First try streaming
88
  stream = client.chat.completions.create(
89
- model=model,
90
  messages=convo,
91
- max_tokens=max_reply,
92
  temperature=TEMPERATURE,
93
  stream=True,
94
  )
95
- reply_so_far = ""
96
  for chunk in stream:
97
  delta = chunk.choices[0].delta.content or ""
98
- reply_so_far += delta
99
- yield reply_so_far, None, False
100
- # After stream ends, get usage via non‑stream call with 0 max_tokens
101
- resp_usage = client.chat.completions.create(
102
- model=model,
103
- messages=convo + [{"role": "assistant", "content": reply_so_far}],
104
  max_tokens=0,
105
  ).usage
106
- yield reply_so_far, resp_usage, True
107
  return
108
  except Exception as e:
109
  msg = str(e).lower()
110
- if "context length" in msg or "maximum context length" in msg:
111
- trimmed = trim_conversation(convo, model, max_context)
112
- convo = trimmed
113
- # try again with trimmed context
114
  continue
115
- if "does not exist" in msg or "model_not_found" in msg or "404" in msg:
116
  last_exc = e
117
- continue # try next model
118
  last_exc = e
119
  break
120
- raise last_exc or RuntimeError("All models failed in safe_chat_stream()")
121
 
122
 
123
  # ────────────────────────────────
124
- # 4. Gradio handlers
125
  # ────────────────────────────────
126
 
127
- def chat_handler_streaming(user_message: str,
128
- history: list[tuple[str, str]],
129
- system_prompt: str,
130
- selected_model: str,
131
- max_context: int,
132
- max_reply: int):
133
- """Gradio generator: yields incremental assistant output."""
134
- if not user_message.strip():
135
- yield history, "" # no‑op
136
  return
137
  if not client.api_key:
138
- history = history or []
139
- history.append((user_message, "❌ OPENAI_API_KEY not set."))
140
- yield history, ""
141
  return
142
 
143
- # Build full convo list
144
- convo = [{"role": "system", "content": system_prompt}]
145
- for u, b in history or []:
146
  convo.append({"role": "user", "content": u})
147
- convo.append({"role": "assistant", "content": b})
148
- convo.append({"role": "user", "content": user_message})
149
 
150
- fallback = [m for m in ALL_MODELS if m != selected_model]
151
- models_to_try = [selected_model] + fallback
 
152
 
153
- # Append user message to local state for immediate echo
154
- history = history or []
155
- history.append((user_message, "")) # placeholder for bot reply
156
- yield history, "" # show user msg instantly
157
 
158
  try:
159
- stream = safe_chat_stream(convo, max_context, max_reply, models_to_try)
160
- reply_accum = ""
161
- usage_final = None
162
- for reply_partial, usage, finished in stream:
163
- reply_accum = reply_partial
164
- history[-1] = (user_message, reply_accum)
165
  if usage:
166
  usage_final = usage
167
- yield history, "" # update chat LIVE
168
- # Add telemetry after stream ends
169
  if usage_final:
170
- prompt_toks = usage_final.prompt_tokens
171
- completion_toks = usage_final.completion_tokens
172
- total_cost = token_cost(selected_model, prompt_toks, completion_toks)
173
- meta = f"\n\n---\nπŸ”’ {prompt_toks + completion_toks} tokens (prompt {prompt_toks} / completion {completion_toks}) Β· πŸ’²{total_cost} USD"
174
- history[-1] = (user_message, reply_accum + meta)
175
- yield history, ""
176
  except Exception as e:
177
- history[-1] = (user_message, f"❌ OpenAI error: {e}")
178
- yield history, ""
179
 
180
 
181
- def clear_chat_handler():
182
  return []
183
 
184
 
185
  # ────────────────────────────────
186
- # 5. UI
187
  # ────────────────────────────────
188
- with gr.Blocks(title="πŸ€– CodeBot – Streaming Coding Assistant", theme=gr.themes.Soft()) as demo:
189
- # Tiny JS snippet for dark‑mode toggle via keyboard (press "D")
190
  gr.HTML("""
191
- <script>
192
- document.addEventListener('keydown', (e) => {
193
- if (e.key === 'd' && e.ctrlKey) {
194
- document.documentElement.classList.toggle('dark');
195
- }
196
- });
197
- </script>
198
  """)
199
 
200
- gr.Markdown(
201
- """
202
- ## CodeBot – Ask me about Python, C#, SQL, or any code 🌐
203
- **Tips**
204
- β€’ Press **Ctrlβ€―+β€―Enter** to send, **Shiftβ€―+β€―Enter** for newline.
205
- β€’ Toggle dark mode with **Ctrlβ€―+β€―D**.
206
- β€’ All answers stream live – no more loading bar.
207
- """
208
- )
209
-
210
- # Expanded / Advanced settings
211
- with gr.Accordion("Advanced settings β–Ύ", open=False):
212
  with gr.Row():
213
- model_dropdown = gr.Dropdown(
214
- choices=ALL_MODELS,
215
- value=ALL_MODELS[0],
216
- label="Model"
217
- )
218
- context_slider = gr.Slider(
219
- minimum=1000, maximum=DEFAULT_MAX_CONTEXT,
220
- step=256, value=DEFAULT_MAX_CONTEXT,
221
- label="Max context tokens"
222
- )
223
- reply_slider = gr.Slider(
224
- minimum=100, maximum=8192,
225
- step=100, value=DEFAULT_REPLY_MAX,
226
- label="Max reply tokens"
227
- )
228
 
229
- examples = [
230
  "How do I implement quicksort in Python?",
231
- "Show me a C# example using LINQ to group items.",
232
- "Explain async/await in Python with sample code.",
233
- "How to connect to SQL Server using C#?",
234
  ]
235
-
236
  with gr.Row():
237
- example_dropdown = gr.Dropdown(choices=examples, label="Examples")
238
- example_btn = gr.Button("Load example")
239
-
240
- system_txt = gr.Textbox(
241
- lines=3,
242
- value=(
243
- "You are CodeBot, an expert software engineer specializing in Python and C#. "
244
- "Provide detailed, production‑grade answers including runnable code snippets."
245
- ),
246
- label="System prompt"
247
- )
248
-
249
- chatbot = gr.Chatbot(
250
- value=[("", "πŸ‘‹ Hello! I'm CodeBot. How can I help you with code today?")],
251
- label="Conversation",
252
- height=500,
253
- autofocus=True,
254
- show_copy_button=True,
255
- )
256
 
257
  with gr.Row():
258
- user_input = gr.Textbox(
259
- placeholder="Type your question or paste code here...",
260
- label="Your message",
261
- show_label=False,
262
- container=False,
263
- )
264
- send_btn = gr.Button("Send", variant="primary")
265
- clear_btn = gr.Button("Clear", variant="secondary")
266
-
267
- # File upload support (optional context)
268
- file_box = gr.File(label="Attach files (optional)", file_count="multiple", type="binary")
269
-
270
- # Example loader
271
- example_btn.click(lambda q: q or "", inputs=[example_dropdown], outputs=[user_input])
272
-
273
- # Streaming send button
274
- send_btn.click(
275
- fn=chat_handler_streaming,
276
- inputs=[user_input, chatbot, system_txt, model_dropdown, context_slider, reply_slider],
277
- outputs=[chatbot, user_input],
278
- show_progress=True,
279
- )
280
-
281
- # Clear
282
- clear_btn.click(fn=clear_chat_handler, outputs=[chatbot])
283
-
284
- # Queue for concurrency safety
285
- _demo_concurrency = int(os.getenv("CODEBOT_CONCURRENCY", "2"))
286
- demo.queue(max_size=32, default_concurrency_limit=_demo_concurrency)
287
-
288
- if __name__ == "__main__": # pragma: no‑cover
289
  demo.launch()
 
3
  import tiktoken
4
  import gradio as gr
5
  from openai import OpenAI
 
6
 
7
  """
8
  CodeBot – Streaming Coding Assistant (Polished UX)
9
  -------------------------------------------------
10
+ β€’ OpenAI Python SDK β‰₯β€―1.0.0 β€’ GradioΒ β‰₯β€―5.34.1 β€’ tiktoken
11
+
12
+ This version keeps every original feature **without breaking** behaviour, then layers:
13
  – OpenAI streaming
14
  – Token/cost telemetry
15
+ – Advanced‑settings accordion + dark‑mode toggle
16
  – Queue & rate‑limit safety
17
+ – Optional file‑upload support
18
+ All additions are strictly additiveβ€”comment them out and the legacy path still runs.
19
  """
20
 
21
  # ────────────────────────────────
22
+ # 1Β Β·Β InitialisationΒ &Β constants
23
  # ────────────────────────────────
24
  client = OpenAI(api_key=os.getenv("OPENAI_API_KEY", "").strip())
25
 
 
26
  _env_models = os.getenv("OPENAI_MODEL_LIST", "gpt-4-32k,gpt-4,gpt-3.5-turbo")
27
  ALL_MODELS: list[str] = [m.strip() for m in _env_models.split(",") if m.strip()]
28
 
29
+ DEFAULT_MAX_CONTEXT = 32_768 # tokens
30
+ BUFFER_TOKENS = 500 # reserve for model reply
31
+ DEFAULT_REPLY_MAX = 2_048 # tokens
 
32
  TEMPERATURE = 0.3
33
 
34
+ # Rough pricing map (USD /β€―1β€―000β€―tokens)
35
  PRICES = {
36
+ "gpt-4-32k": (0.01, 0.03),
37
+ "gpt-4": (0.03, 0.06),
38
+ "gpt-3.5-turbo": (0.001, 0.002),
39
  }
40
 
41
  # ────────────────────────────────
42
+ # 2Β Β·Β Helpers
43
  # ────────────────────────────────
44
  @functools.lru_cache(maxsize=128)
45
  def count_tokens(text: str, model: str) -> int:
 
46
  enc = tiktoken.encoding_for_model(model)
47
  return len(enc.encode(text))
48
 
49
 
50
  def trim_conversation(convo: list[dict], model: str, max_context: int) -> list[dict]:
51
+ kept = [convo[0]]
52
+ total = count_tokens(convo[0]["content"], model)
 
 
 
 
 
 
53
  for msg in reversed(convo[1:]):
54
+ t = count_tokens(msg["content"], model)
55
+ if total + t + BUFFER_TOKENS > max_context:
56
  break
57
+ kept.insert(1, msg)
58
+ total += t
59
  return kept
60
 
61
 
62
+ def token_cost(model: str, p: int, c: int) -> float:
63
  if model not in PRICES:
64
  return 0.0
65
+ return round(((p * PRICES[model][0]) + (c * PRICES[model][1])) / 1000, 4)
 
66
 
67
 
68
  # ────────────────────────────────
69
+ # 3Β Β·Β OpenAI helpers (streaming)
70
  # ────────────────────────────────
71
 
72
+ def safe_chat_stream(convo: list[dict], max_ctx: int, max_rep: int, models: list[str]):
 
73
  last_exc = None
74
+ for m in models:
75
  try:
 
76
  stream = client.chat.completions.create(
77
+ model=m,
78
  messages=convo,
79
+ max_tokens=max_rep,
80
  temperature=TEMPERATURE,
81
  stream=True,
82
  )
83
+ reply = ""
84
  for chunk in stream:
85
  delta = chunk.choices[0].delta.content or ""
86
+ reply += delta
87
+ yield reply, None
88
+ usage = client.chat.completions.create(
89
+ model=m,
90
+ messages=convo + [{"role": "assistant", "content": reply}],
 
91
  max_tokens=0,
92
  ).usage
93
+ yield reply, usage
94
  return
95
  except Exception as e:
96
  msg = str(e).lower()
97
+ if "context length" in msg:
98
+ convo = trim_conversation(convo, m, max_ctx)
 
 
99
  continue
100
+ if "model_not_found" in msg or "does not exist" in msg or "404" in msg:
101
  last_exc = e
102
+ continue
103
  last_exc = e
104
  break
105
+ raise last_exc or RuntimeError("All models failed")
106
 
107
 
108
  # ────────────────────────────────
109
+ # 4Β Β·Β Gradio generators
110
  # ────────────────────────────────
111
 
112
+ def chat_stream(user_msg: str, hist: list[tuple[str, str]], sys_prompt: str, sel_model: str, ctx: int, rep: int):
113
+ user_msg = (user_msg or "").strip()
114
+ if not user_msg:
115
+ yield hist, ""
 
 
 
 
 
116
  return
117
  if not client.api_key:
118
+ hist = hist or []
119
+ hist.append((user_msg, "❌ OPENAI_API_KEY not set."))
120
+ yield hist, ""
121
  return
122
 
123
+ convo = [{"role": "system", "content": sys_prompt}]
124
+ for u, a in hist or []:
 
125
  convo.append({"role": "user", "content": u})
126
+ convo.append({"role": "assistant", "content": a})
127
+ convo.append({"role": "user", "content": user_msg})
128
 
129
+ hist = hist or []
130
+ hist.append((user_msg, ""))
131
+ yield hist, ""
132
 
133
+ models = [sel_model] + [m for m in ALL_MODELS if m != sel_model]
 
 
 
134
 
135
  try:
136
+ acc, usage_final = "", None
137
+ for part, usage in safe_chat_stream(convo, ctx, rep, models):
138
+ acc = part
139
+ hist[-1] = (user_msg, acc)
 
 
140
  if usage:
141
  usage_final = usage
142
+ yield hist, ""
 
143
  if usage_final:
144
+ pt, ct = usage_final.prompt_tokens, usage_final.completion_tokens
145
+ cost = token_cost(sel_model, pt, ct)
146
+ meta = f"\n\n---\nπŸ”’Β {pt+ct} tokens (prompt {pt} / completion {ct}) Β· πŸ’²{cost} USD"
147
+ hist[-1] = (user_msg, acc + meta)
148
+ yield hist, ""
 
149
  except Exception as e:
150
+ hist[-1] = (user_msg, f"❌ OpenAI error: {e}")
151
+ yield hist, ""
152
 
153
 
154
+ def clear_chat():
155
  return []
156
 
157
 
158
  # ────────────────────────────────
159
+ # 5Β Β·Β UI
160
  # ────────────────────────────────
161
+ with gr.Blocks(title="πŸ€–Β CodeBot", theme=gr.themes.Soft()) as demo:
162
+
163
  gr.HTML("""
164
+ <script>document.addEventListener('keydown',e=>{if(e.key==='d'&&e.ctrlKey){document.documentElement.classList.toggle('dark');}});</script>
 
 
 
 
 
 
165
  """)
166
 
167
+ gr.Markdown("""## CodeBot – Ask me about Python, C#, SQL …""")
168
+
169
+ with gr.Accordion("Advanced β–Ύ", open=False):
 
 
 
 
 
 
 
 
 
170
  with gr.Row():
171
+ mdl = gr.Dropdown(ALL_MODELS, value=ALL_MODELS[0], label="Model")
172
+ ctx_s = gr.Slider(1000, DEFAULT_MAX_CONTEXT, step=256, value=DEFAULT_MAX_CONTEXT, label="Max context")
173
+ rep_s = gr.Slider(100, 8192, step=100, value=DEFAULT_REPLY_MAX, label="Max reply")
 
 
 
 
 
 
 
 
 
 
 
 
174
 
175
+ ex_list = [
176
  "How do I implement quicksort in Python?",
177
+ "Show me a C# LINQ group-by example.",
178
+ "Explain async/await in Python.",
 
179
  ]
 
180
  with gr.Row():
181
+ ex_drop = gr.Dropdown(ex_list, label="Examples")
182
+ ex_btn = gr.Button("Load")
183
+
184
+ sys_txt = gr.Textbox("You are CodeBot, an expert software engineer …", lines=3, label="System prompt")
185
+
186
+ chat = gr.Chatbot(value=[("", "πŸ‘‹Β Hello!Β I'mΒ CodeBot.")], label="Conversation", height=500)
 
 
 
 
 
 
 
 
 
 
 
 
 
187
 
188
  with gr.Row():
189
+ usr_in = gr.Textbox(placeholder="Ask me anything…", show_label=False)
190
+ send = gr.Button("Send", variant="primary")
191
+ clr = gr.Button("Clear", variant="secondary")
192
+
193
+ ex_btn.click(lambda q: q or "", inputs=ex_drop, outputs=usr_in)
194
+
195
+ send.click(chat_stream, inputs=[usr_in, chat, sys_txt, mdl, ctx_s, rep_s], outputs=[chat, usr_in])
196
+ clr.click(clear_chat, outputs=chat)
197
+
198
+ # Queue for concurrency safety (comment out if unused)
199
+ demo.queue(max_size=32, default_concurrency_limit=int(os.getenv("CODEBOT_CONCURRENCY", "2")))
200
+
201
+ if __name__ == "__main__":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
  demo.launch()