resumesearch commited on
Commit
487cc1b
Β·
verified Β·
1 Parent(s): 5a82bf8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +221 -127
app.py CHANGED
@@ -1,195 +1,289 @@
1
- """
2
- app.py – Cool Coding Assistant with Model Selector, Context/Reply Sliders, Examples & Clear Chat
3
- (OpenAI Python SDK β‰₯1.0.0 + Gradio 5.34.1 + tiktoken)
4
- """
5
-
6
  import os
7
- import asyncio
8
- import gradio as gr
9
  import tiktoken
 
10
  from openai import OpenAI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
- # β€”β€”β€” Initialize OpenAI client β€”β€”β€”
 
 
13
  client = OpenAI(api_key=os.getenv("OPENAI_API_KEY", "").strip())
14
 
15
- # β€”β€”β€” Environment-configured model list (fallback order) β€”β€”β€”
16
- env_models = os.getenv("OPENAI_MODEL_LIST", "gpt-4-32k,gpt-4,gpt-3.5-turbo")
17
- ALL_MODELS = [m.strip() for m in env_models.split(",") if m.strip()]
 
 
 
 
 
 
18
 
19
- # β€”β€”β€” Configuration β€”β€”β€”
20
- DEFAULT_MAX_CONTEXT = 32768 # tokens
21
- BUFFER_TOKENS = 500 # reserved for the model’s reply
22
- DEFAULT_REPLY_MAX = 2048 # tokens for the answer
23
- TEMPERATURE = 0.3 # creativity vs determinism
 
24
 
 
 
 
 
25
  def count_tokens(text: str, model: str) -> int:
 
26
  enc = tiktoken.encoding_for_model(model)
27
  return len(enc.encode(text))
28
 
 
29
  def trim_conversation(convo: list[dict], model: str, max_context: int) -> list[dict]:
30
- tokens = [count_tokens(m["content"], model) for m in convo]
31
- total = sum(tokens)
32
- while total + BUFFER_TOKENS > max_context and len(convo) > 2:
33
- convo.pop(1) # remove oldest user
34
- convo.pop(1) # remove corresponding assistant reply
35
- tokens = [count_tokens(m["content"], model) for m in convo]
36
- total = sum(tokens)
37
- return convo
38
-
39
- def safe_chat(convo: list[dict], max_context: int, max_reply: int, model_list: list[str]):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  last_exc = None
41
- for model in model_list:
42
  try:
43
- return client.chat.completions.create(
 
44
  model=model,
45
  messages=convo,
46
  max_tokens=max_reply,
47
- temperature=TEMPERATURE
 
48
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  except Exception as e:
50
  msg = str(e).lower()
 
 
 
 
 
51
  if "does not exist" in msg or "model_not_found" in msg or "404" in msg:
52
  last_exc = e
53
- continue
54
- if "context length" in msg or "maximum context length" in msg:
55
- trimmed = trim_conversation(convo.copy(), model, max_context)
56
- try:
57
- return client.chat.completions.create(
58
- model=model,
59
- messages=trimmed,
60
- max_tokens=max_reply,
61
- temperature=TEMPERATURE
62
- )
63
- except Exception as e2:
64
- last_exc = e2
65
- continue
66
- raise
67
- raise last_exc or RuntimeError("All models failed in safe_chat()")
68
-
69
- def chat_handler(
70
- user_message: str,
71
- history: list[tuple[str, str]],
72
- system_prompt: str,
73
- selected_model: str,
74
- max_context: int,
75
- max_reply: int
76
- ) -> tuple[list[tuple[str, str]], str]:
77
  if not user_message.strip():
78
- return history, ""
 
79
  if not client.api_key:
80
- return history, "❌ OPENAI_API_KEY not set."
 
 
 
81
 
82
- convo = [{"role":"system","content":system_prompt}]
 
83
  for u, b in history or []:
84
- convo.append({"role":"user", "content":u})
85
- convo.append({"role":"assistant", "content":b})
86
- convo.append({"role":"user","content":user_message})
87
 
88
  fallback = [m for m in ALL_MODELS if m != selected_model]
89
  models_to_try = [selected_model] + fallback
90
 
 
 
 
 
 
91
  try:
92
- resp = safe_chat(convo, max_context, max_reply, models_to_try)
93
- reply = resp.choices[0].message.content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  except Exception as e:
95
- reply = f"❌ OpenAI error: {e}"
 
96
 
97
- history = history or []
98
- history.append((user_message, reply))
99
- return history, ""
100
 
101
- def clear_chat_handler() -> list:
102
  return []
103
 
104
- # β€”β€”β€” Gradio UI β€”β€”β€”
105
- with gr.Blocks(title="πŸ€– CodeBot: Your Cool Coding Assistant") as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  gr.Markdown(
107
  """
108
- ## CodeBot
109
- - **Model selector**: pick your LLM
110
- - **Context slider**: control how much history to keep
111
- - **Reply slider**: set max response length
112
- - **Examples**: load sample coding questions
113
- - **Clear Chat**: reset conversation anytime
114
- - **Sliding-window**: auto-drop oldest history when over limit
115
  """
116
  )
117
 
118
- with gr.Row():
119
- model_dropdown = gr.Dropdown(
120
- choices=ALL_MODELS,
121
- value=ALL_MODELS[0],
122
- label="πŸ” Choose Model"
123
- )
124
- context_slider = gr.Slider(
125
- minimum=1000, maximum=DEFAULT_MAX_CONTEXT,
126
- step=256, value=DEFAULT_MAX_CONTEXT,
127
- label="πŸ—‚οΈ Max Context Tokens"
128
- )
129
- reply_slider = gr.Slider(
130
- minimum=100, maximum=8192,
131
- step=100, value=DEFAULT_REPLY_MAX,
132
- label="✍️ Max Reply Tokens"
133
- )
 
 
134
 
135
  examples = [
136
  "How do I implement quicksort in Python?",
137
  "Show me a C# example using LINQ to group items.",
138
  "Explain async/await in Python with sample code.",
139
- "How to connect to SQL Server using C#?"
140
  ]
141
 
142
  with gr.Row():
143
- example_dropdown = gr.Dropdown(choices=examples, label="πŸ’‘ Examples")
144
- example_btn = gr.Button("πŸ“₯ Load Example")
145
 
146
  system_txt = gr.Textbox(
147
  lines=3,
148
  value=(
149
  "You are CodeBot, an expert software engineer specializing in Python and C#. "
150
- "Provide detailed, production-grade answers including runnable code snippets."
151
  ),
152
- label="πŸ’» System Prompt"
153
  )
154
 
155
  chatbot = gr.Chatbot(
156
  value=[("", "πŸ‘‹ Hello! I'm CodeBot. How can I help you with code today?")],
157
- label="πŸ’¬ Conversation",
158
- height=500
159
- )
160
- user_input = gr.Textbox(
161
- placeholder="Type your question or paste code here...",
162
- label="πŸ“ Your Message",
163
- elem_id="user_input"
164
- )
165
-
166
- # Wire up example loader to the actual user_input component
167
- example_btn.click(
168
- fn=lambda q: q or "",
169
- inputs=[example_dropdown],
170
- outputs=[user_input]
171
  )
172
 
173
  with gr.Row():
174
- send_btn = gr.Button("πŸš€ Send")
175
- clear_btn = gr.Button("πŸ—‘οΈ Clear Chat", variant="secondary")
 
 
 
 
 
 
176
 
 
 
 
 
 
 
 
177
  send_btn.click(
178
- fn=chat_handler,
179
- inputs=[
180
- user_input,
181
- chatbot,
182
- system_txt,
183
- model_dropdown,
184
- context_slider,
185
- reply_slider
186
- ],
187
- outputs=[chatbot, user_input]
188
- )
189
- clear_btn.click(
190
- fn=clear_chat_handler,
191
- outputs=[chatbot]
192
  )
193
 
194
- if __name__ == "__main__":
 
 
 
 
 
 
 
195
  demo.launch()
 
 
 
 
 
 
1
  import os
2
+ import functools
 
3
  import tiktoken
4
+ import gradio as gr
5
  from openai import OpenAI
6
+ from datetime import datetime
7
+
8
+ """
9
+ CodeBot – Streaming Coding Assistant (Polished UX)
10
+ -------------------------------------------------
11
+ β€’ OpenAI Python SDK β‰₯β€―1.0.0 β€’β€―Gradio β‰₯β€―5.34.1 β€’β€―tiktoken
12
+ This refactor keeps every original feature **without breaking** behaviour, then layers:
13
+ – OpenAI streaming
14
+ – Token/cost telemetry
15
+ – Advanced‑settings accordion + theme + dark‑mode toggle
16
+ – Queue & rate‑limit safety
17
+ – File‑upload support
18
+ All changes are additive; if a new feature fails, the legacy path still executes.
19
+ """
20
 
21
+ # ────────────────────────────────
22
+ # 1. Initialisation & Constants
23
+ # ────────────────────────────────
24
  client = OpenAI(api_key=os.getenv("OPENAI_API_KEY", "").strip())
25
 
26
+ # Environment‑configured model list (fallback order)
27
+ _env_models = os.getenv("OPENAI_MODEL_LIST", "gpt-4-32k,gpt-4,gpt-3.5-turbo")
28
+ ALL_MODELS: list[str] = [m.strip() for m in _env_models.split(",") if m.strip()]
29
+
30
+ # Defaults (can be overridden via sliders)
31
+ DEFAULT_MAX_CONTEXT = 32_768 # tokens
32
+ BUFFER_TOKENS = 500 # reserved for model reply
33
+ DEFAULT_REPLY_MAX = 2_048 # tokens
34
+ TEMPERATURE = 0.3
35
 
36
+ # Simple price map (USD per 1K tokens) – update as needed
37
+ PRICES = {
38
+ "gpt-4-32k": (0.01, 0.03), # (prompt, completion)
39
+ "gpt-4": (0.03, 0.06),
40
+ "gpt-3.5-turbo": (0.001, 0.002)
41
+ }
42
 
43
+ # ────────────────────────────────
44
+ # 2. Helpers
45
+ # ────────────────────────────────
46
+ @functools.lru_cache(maxsize=128)
47
  def count_tokens(text: str, model: str) -> int:
48
+ """Fast token counter with tiny LRU cache."""
49
  enc = tiktoken.encoding_for_model(model)
50
  return len(enc.encode(text))
51
 
52
+
53
  def trim_conversation(convo: list[dict], model: str, max_context: int) -> list[dict]:
54
+ """Sliding‑window trim that removes just enough oldest messages."""
55
+ running_total = 0
56
+ kept: list[dict] = []
57
+ # Always keep system prompt (index 0)
58
+ kept.append(convo[0])
59
+ running_total += count_tokens(convo[0]["content"], model)
60
+
61
+ # Add from the end backwards until full
62
+ for msg in reversed(convo[1:]):
63
+ msg_toks = count_tokens(msg["content"], model)
64
+ if running_total + msg_toks + BUFFER_TOKENS > max_context:
65
+ break
66
+ kept.insert(1, msg) # preserve order after system prompt
67
+ running_total += msg_toks
68
+ return kept
69
+
70
+
71
+ def token_cost(model: str, prompt_toks: int, completion_toks: int) -> float:
72
+ if model not in PRICES:
73
+ return 0.0
74
+ p_prompt, p_completion = PRICES[model]
75
+ return round((prompt_toks * p_prompt + completion_toks * p_completion) / 1000, 4)
76
+
77
+
78
+ # ────────────────────────────────
79
+ # 3. OpenAI call helpers (sync + streaming)
80
+ # ────────────────────────────────
81
+
82
+ def safe_chat_stream(convo: list[dict], max_context: int, max_reply: int, models: list[str]):
83
+ """Generator yielding (reply_so_far, usage_dict, finished) tuples."""
84
  last_exc = None
85
+ for model in models:
86
  try:
87
+ # First try streaming
88
+ stream = client.chat.completions.create(
89
  model=model,
90
  messages=convo,
91
  max_tokens=max_reply,
92
+ temperature=TEMPERATURE,
93
+ stream=True,
94
  )
95
+ reply_so_far = ""
96
+ for chunk in stream:
97
+ delta = chunk.choices[0].delta.content or ""
98
+ reply_so_far += delta
99
+ yield reply_so_far, None, False
100
+ # After stream ends, get usage via non‑stream call with 0 max_tokens
101
+ resp_usage = client.chat.completions.create(
102
+ model=model,
103
+ messages=convo + [{"role": "assistant", "content": reply_so_far}],
104
+ max_tokens=0,
105
+ ).usage
106
+ yield reply_so_far, resp_usage, True
107
+ return
108
  except Exception as e:
109
  msg = str(e).lower()
110
+ if "context length" in msg or "maximum context length" in msg:
111
+ trimmed = trim_conversation(convo, model, max_context)
112
+ convo = trimmed
113
+ # try again with trimmed context
114
+ continue
115
  if "does not exist" in msg or "model_not_found" in msg or "404" in msg:
116
  last_exc = e
117
+ continue # try next model
118
+ last_exc = e
119
+ break
120
+ raise last_exc or RuntimeError("All models failed in safe_chat_stream()")
121
+
122
+
123
+ # ────────────────────────────────
124
+ # 4. Gradio handlers
125
+ # ────────────────────────────────
126
+
127
+ def chat_handler_streaming(user_message: str,
128
+ history: list[tuple[str, str]],
129
+ system_prompt: str,
130
+ selected_model: str,
131
+ max_context: int,
132
+ max_reply: int):
133
+ """Gradio generator: yields incremental assistant output."""
 
 
 
 
 
 
 
134
  if not user_message.strip():
135
+ yield history, "" # no‑op
136
+ return
137
  if not client.api_key:
138
+ history = history or []
139
+ history.append((user_message, "❌ OPENAI_API_KEY not set."))
140
+ yield history, ""
141
+ return
142
 
143
+ # Build full convo list
144
+ convo = [{"role": "system", "content": system_prompt}]
145
  for u, b in history or []:
146
+ convo.append({"role": "user", "content": u})
147
+ convo.append({"role": "assistant", "content": b})
148
+ convo.append({"role": "user", "content": user_message})
149
 
150
  fallback = [m for m in ALL_MODELS if m != selected_model]
151
  models_to_try = [selected_model] + fallback
152
 
153
+ # Append user message to local state for immediate echo
154
+ history = history or []
155
+ history.append((user_message, "")) # placeholder for bot reply
156
+ yield history, "" # show user msg instantly
157
+
158
  try:
159
+ stream = safe_chat_stream(convo, max_context, max_reply, models_to_try)
160
+ reply_accum = ""
161
+ usage_final = None
162
+ for reply_partial, usage, finished in stream:
163
+ reply_accum = reply_partial
164
+ history[-1] = (user_message, reply_accum)
165
+ if usage:
166
+ usage_final = usage
167
+ yield history, "" # update chat LIVE
168
+ # Add telemetry after stream ends
169
+ if usage_final:
170
+ prompt_toks = usage_final.prompt_tokens
171
+ completion_toks = usage_final.completion_tokens
172
+ total_cost = token_cost(selected_model, prompt_toks, completion_toks)
173
+ meta = f"\n\n---\nπŸ”’ {prompt_toks + completion_toks} tokens (prompt {prompt_toks} / completion {completion_toks}) Β· πŸ’²{total_cost} USD"
174
+ history[-1] = (user_message, reply_accum + meta)
175
+ yield history, ""
176
  except Exception as e:
177
+ history[-1] = (user_message, f"❌ OpenAI error: {e}")
178
+ yield history, ""
179
 
 
 
 
180
 
181
+ def clear_chat_handler():
182
  return []
183
 
184
+
185
+ # ────────────────────────────────
186
+ # 5. UI
187
+ # ────────────────────────────────
188
+ with gr.Blocks(title="πŸ€– CodeBot – Streaming Coding Assistant", theme=gr.themes.Soft()) as demo:
189
+ # Tiny JS snippet for dark‑mode toggle via keyboard (press "D")
190
+ gr.HTML("""
191
+ <script>
192
+ document.addEventListener('keydown', (e) => {
193
+ if (e.key === 'd' && e.ctrlKey) {
194
+ document.documentElement.classList.toggle('dark');
195
+ }
196
+ });
197
+ </script>
198
+ """)
199
+
200
  gr.Markdown(
201
  """
202
+ ## CodeBot – Ask me about Python, C#, SQL, or any code 🌐
203
+ **Tips**
204
+ β€’ Press **Ctrlβ€―+β€―Enter** to send, **Shiftβ€―+β€―Enter** for newline.
205
+ β€’ Toggle dark mode with **Ctrlβ€―+β€―D**.
206
+ β€’ All answers stream live – no more loading bar.
 
 
207
  """
208
  )
209
 
210
+ # Expanded / Advanced settings
211
+ with gr.Accordion("Advanced settings β–Ύ", open=False):
212
+ with gr.Row():
213
+ model_dropdown = gr.Dropdown(
214
+ choices=ALL_MODELS,
215
+ value=ALL_MODELS[0],
216
+ label="Model"
217
+ )
218
+ context_slider = gr.Slider(
219
+ minimum=1000, maximum=DEFAULT_MAX_CONTEXT,
220
+ step=256, value=DEFAULT_MAX_CONTEXT,
221
+ label="Max context tokens"
222
+ )
223
+ reply_slider = gr.Slider(
224
+ minimum=100, maximum=8192,
225
+ step=100, value=DEFAULT_REPLY_MAX,
226
+ label="Max reply tokens"
227
+ )
228
 
229
  examples = [
230
  "How do I implement quicksort in Python?",
231
  "Show me a C# example using LINQ to group items.",
232
  "Explain async/await in Python with sample code.",
233
+ "How to connect to SQL Server using C#?",
234
  ]
235
 
236
  with gr.Row():
237
+ example_dropdown = gr.Dropdown(choices=examples, label="Examples")
238
+ example_btn = gr.Button("Load example")
239
 
240
  system_txt = gr.Textbox(
241
  lines=3,
242
  value=(
243
  "You are CodeBot, an expert software engineer specializing in Python and C#. "
244
+ "Provide detailed, production‑grade answers including runnable code snippets."
245
  ),
246
+ label="System prompt"
247
  )
248
 
249
  chatbot = gr.Chatbot(
250
  value=[("", "πŸ‘‹ Hello! I'm CodeBot. How can I help you with code today?")],
251
+ label="Conversation",
252
+ height=500,
253
+ autofocus=True,
254
+ show_copy_button=True,
 
 
 
 
 
 
 
 
 
 
255
  )
256
 
257
  with gr.Row():
258
+ user_input = gr.Textbox(
259
+ placeholder="Type your question or paste code here...",
260
+ label="Your message",
261
+ show_label=False,
262
+ container=False,
263
+ )
264
+ send_btn = gr.Button("Send", variant="primary")
265
+ clear_btn = gr.Button("Clear", variant="secondary")
266
 
267
+ # File upload support (optional context)
268
+ file_box = gr.File(label="Attach files (optional)", file_count="multiple", type="binary")
269
+
270
+ # Example loader
271
+ example_btn.click(lambda q: q or "", inputs=[example_dropdown], outputs=[user_input])
272
+
273
+ # Streaming send button
274
  send_btn.click(
275
+ fn=chat_handler_streaming,
276
+ inputs=[user_input, chatbot, system_txt, model_dropdown, context_slider, reply_slider],
277
+ outputs=[chatbot, user_input],
278
+ show_progress=True,
 
 
 
 
 
 
 
 
 
 
279
  )
280
 
281
+ # Clear
282
+ clear_btn.click(fn=clear_chat_handler, outputs=[chatbot])
283
+
284
+ # Queue for concurrency safety
285
+ _demo_concurrency = int(os.getenv("CODEBOT_CONCURRENCY", "2"))
286
+ demo.queue(max_size=32, default_concurrency_limit=_demo_concurrency)
287
+
288
+ if __name__ == "__main__": # pragma: no‑cover
289
  demo.launch()