resumesearch commited on
Commit
1e12ce1
Β·
verified Β·
1 Parent(s): f9d8c5e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -81
app.py CHANGED
@@ -1,62 +1,53 @@
1
  """
2
- app.py – Advanced Chatbot with Multi-Model Fallback & Long-Input Safety
3
  (OpenAI Python SDK β‰₯1.0.0)
4
  """
5
 
6
  import os
7
- import asyncio
8
  import gradio as gr
9
  import tiktoken
10
  from openai import OpenAI
11
 
12
- # 1) Init OpenAI client
13
  client = OpenAI(api_key=os.getenv("OPENAI_API_KEY", "").strip())
14
 
15
- # 2) Read prioritized model list from env (comma-separated)
16
- # Default: gpt-4-32k β†’ gpt-4 β†’ gpt-3.5-turbo
17
  model_list = os.getenv("OPENAI_MODEL_LIST", "gpt-4-32k,gpt-4,gpt-3.5-turbo")
18
  MODELS = [m.strip() for m in model_list.split(",") if m.strip()]
19
 
20
- # 3) Token-limit & summarization settings
21
- MAX_CONTEXT = 32768 # e.g. for gpt-4-32k
22
- BUFFER_TOKENS = 500 # reserved for the model’s reply
23
- SUMMARY_MAX = 1024 # each chunk’s summary limit
24
- REPLY_MAX = 2048 # tokens for the final answer
25
- TEMPERATURE = 0.3
26
 
27
  def count_tokens(text: str, model: str) -> int:
 
28
  enc = tiktoken.encoding_for_model(model)
29
  return len(enc.encode(text))
30
 
31
- def chunk_text(text: str, max_toks: int, model: str) -> list[str]:
32
- words, chunks, cur = text.split(), [], []
33
- for w in words:
34
- cur.append(w)
35
- if count_tokens(" ".join(cur), model) >= max_toks:
36
- last = cur.pop()
37
- chunks.append(" ".join(cur))
38
- cur = [last]
39
- if cur:
40
- chunks.append(" ".join(cur))
41
- return chunks
42
-
43
- async def summarize_chunk(chunk: str, model: str) -> str:
44
- resp = await client.chat.completions.create(
45
- model=model,
46
- messages=[
47
- {"role":"system","content":"You are a concise summarizer."},
48
- {"role":"user", "content":f"Summarize this text briefly, preserving key details:\n\n{chunk}"}
49
- ],
50
- max_tokens=SUMMARY_MAX,
51
- temperature=0.0
52
- )
53
- return resp.choices[0].message.content.strip()
54
-
55
- def safe_chat(convo: list[dict], max_reply: int):
56
  """
57
- 1) Try each model in MODELS in order
58
- 2) On model_not_found β†’ try next
59
- 3) On context-length β†’ summarize last user msg & retry that same model once
60
  """
61
  last_exc = None
62
 
@@ -65,69 +56,54 @@ def safe_chat(convo: list[dict], max_reply: int):
65
  return client.chat.completions.create(
66
  model=model,
67
  messages=convo,
68
- max_tokens=max_reply,
69
  temperature=TEMPERATURE
70
  )
71
  except Exception as e:
72
- text = str(e).lower()
73
- # MODEL NOT FOUND β†’ skip to next
74
- if "does not exist" in text or "model_not_found" in text or "404" in text:
75
  last_exc = e
76
  continue
77
-
78
- # CONTEXT-LENGTH ERROR β†’ summarize + retry this same model once
79
- if "maximum context length" in text or "context length" in text:
80
- used = count_tokens("".join(m["content"] for m in convo[:-1]), model)
81
- allowed = MAX_CONTEXT - used - BUFFER_TOKENS
82
- if allowed < 100:
83
- last_exc = RuntimeError("Input too large even after trimming.")
84
- break
85
-
86
- # chunk & summarize the last message
87
- last_msg = convo[-1]["content"]
88
- pieces = chunk_text(last_msg, allowed // 2, model)
89
- summaries = asyncio.get_event_loop().run_until_complete(
90
- asyncio.gather(*(summarize_chunk(p, model) for p in pieces))
91
- )
92
- convo[-1]["content"] = " ".join(summaries)
93
-
94
- # retry once on this model
95
  try:
96
  return client.chat.completions.create(
97
  model=model,
98
- messages=convo,
99
- max_tokens=max_reply,
100
  temperature=TEMPERATURE
101
  )
102
  except Exception as e2:
103
  last_exc = e2
104
  continue
105
-
106
- # any other error β†’ bubble up
107
  raise
108
 
109
- # if none worked:
110
  raise last_exc or RuntimeError("All models failed in safe_chat()")
111
 
112
  def chat_handler(
113
  user_message: str,
114
- history: list[tuple[str,str]],
115
  system_prompt: str
116
- ) -> tuple[list[tuple[str,str]], str]:
 
117
  if not user_message.strip():
118
  return history, ""
119
  if not client.api_key:
120
  return history, "❌ OPENAI_API_KEY not set."
121
 
122
- # Build convo payload
123
- convo = [{"role":"system","content":system_prompt}]
124
  for u, b in history or []:
125
- convo.append({"role":"user", "content":u})
126
- convo.append({"role":"assistant", "content":b})
127
- convo.append({"role":"user","content":user_message})
128
 
129
  try:
130
- resp = safe_chat(convo, max_reply=REPLY_MAX)
131
  reply = resp.choices[0].message.content
132
  except Exception as e:
133
  reply = f"❌ OpenAI error: {e}"
@@ -137,12 +113,13 @@ def chat_handler(
137
  return history, ""
138
 
139
  # β€”β€”β€” Gradio UI β€”β€”β€”
140
- with gr.Blocks(title="πŸ€– Advanced Chatbot") as demo:
141
- gr.Markdown(f"**πŸ”— Models to try (in order):** {', '.join(MODELS)}")
142
  gr.Markdown(
143
  """
144
- Paste arbitrarily long code or text; the bot will auto-summarize overflow.
145
- It will also automatically fall back if a model isn’t available.
 
 
146
  """
147
  )
148
 
@@ -155,9 +132,9 @@ with gr.Blocks(title="πŸ€– Advanced Chatbot") as demo:
155
  label="System Prompt"
156
  )
157
 
158
- chatbot = gr.Chatbot(label="Conversation")
159
- user_input = gr.Textbox(placeholder="Type your message here…", label="You")
160
- send_btn = gr.Button("Send")
161
 
162
  send_btn.click(
163
  fn=chat_handler,
@@ -167,3 +144,4 @@ with gr.Blocks(title="πŸ€– Advanced Chatbot") as demo:
167
 
168
  if __name__ == "__main__":
169
  demo.launch()
 
 
1
  """
2
+ app.py – Advanced Chatbot with Sliding-Window Context Management
3
  (OpenAI Python SDK β‰₯1.0.0)
4
  """
5
 
6
  import os
 
7
  import gradio as gr
8
  import tiktoken
9
  from openai import OpenAI
10
 
11
+ # β€”β€”β€” Initialize OpenAI client β€”β€”β€”
12
  client = OpenAI(api_key=os.getenv("OPENAI_API_KEY", "").strip())
13
 
14
+ # β€”β€”β€” Model fallback list (highest quality first) β€”β€”β€”
15
+ # Comma-separated in env, e.g. "gpt-4-32k,gpt-4,gpt-3.5-turbo"
16
  model_list = os.getenv("OPENAI_MODEL_LIST", "gpt-4-32k,gpt-4,gpt-3.5-turbo")
17
  MODELS = [m.strip() for m in model_list.split(",") if m.strip()]
18
 
19
+ # β€”β€”β€” Configuration β€”β€”β€”
20
+ MAX_CONTEXT = 32768 # model’s max context window (e.g. GPT-4-32k)
21
+ BUFFER_TOKENS = 500 # reserved tokens for the reply
22
+ REPLY_MAX = 2048 # max tokens in the answer
23
+ TEMPERATURE = 0.3 # creativity vs. determinism
 
24
 
25
  def count_tokens(text: str, model: str) -> int:
26
+ """Return the number of tokens for given text under specified model."""
27
  enc = tiktoken.encoding_for_model(model)
28
  return len(enc.encode(text))
29
 
30
+ def trim_conversation(convo: list[dict], model: str) -> list[dict]:
31
+ """
32
+ Slide the window: drop oldest user/assistant turns
33
+ until total tokens + buffer ≀ MAX_CONTEXT.
34
+ """
35
+ # compute initial usage
36
+ tokens = [count_tokens(m["content"], model) for m in convo]
37
+ total = sum(tokens)
38
+ # pop oldest turns (after system prompt) while over budget
39
+ while total + BUFFER_TOKENS > MAX_CONTEXT and len(convo) > 2:
40
+ convo.pop(1) # remove oldest user
41
+ convo.pop(1) # remove that assistant reply
42
+ tokens = [count_tokens(m["content"], model) for m in convo]
43
+ total = sum(tokens)
44
+ return convo
45
+
46
+ def safe_chat(convo: list[dict]):
 
 
 
 
 
 
 
 
47
  """
48
+ 1) Try each model in MODELS in order.
49
+ 2) If model-not-found, skip to next.
50
+ 3) If context-length error, trim history and retry same model once.
51
  """
52
  last_exc = None
53
 
 
56
  return client.chat.completions.create(
57
  model=model,
58
  messages=convo,
59
+ max_tokens=REPLY_MAX,
60
  temperature=TEMPERATURE
61
  )
62
  except Exception as e:
63
+ msg = str(e).lower()
64
+ # model not found β†’ fallback
65
+ if "does not exist" in msg or "model_not_found" in msg or "404" in msg:
66
  last_exc = e
67
  continue
68
+ # context-length error β†’ trim & retry
69
+ if "context length" in msg or "maximum context length" in msg:
70
+ trimmed = trim_conversation(convo.copy(), model)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  try:
72
  return client.chat.completions.create(
73
  model=model,
74
+ messages=trimmed,
75
+ max_tokens=REPLY_MAX,
76
  temperature=TEMPERATURE
77
  )
78
  except Exception as e2:
79
  last_exc = e2
80
  continue
81
+ # other errors β†’ re-raise
 
82
  raise
83
 
84
+ # none succeeded
85
  raise last_exc or RuntimeError("All models failed in safe_chat()")
86
 
87
  def chat_handler(
88
  user_message: str,
89
+ history: list[tuple[str, str]],
90
  system_prompt: str
91
+ ) -> tuple[list[tuple[str, str]], str]:
92
+ """Gradio handler: builds convo, calls safe_chat, updates history."""
93
  if not user_message.strip():
94
  return history, ""
95
  if not client.api_key:
96
  return history, "❌ OPENAI_API_KEY not set."
97
 
98
+ # Build the conversation payload
99
+ convo = [{"role": "system", "content": system_prompt}]
100
  for u, b in history or []:
101
+ convo.append({"role": "user", "content": u})
102
+ convo.append({"role": "assistant", "content": b})
103
+ convo.append({"role": "user", "content": user_message})
104
 
105
  try:
106
+ resp = safe_chat(convo)
107
  reply = resp.choices[0].message.content
108
  except Exception as e:
109
  reply = f"❌ OpenAI error: {e}"
 
113
  return history, ""
114
 
115
  # β€”β€”β€” Gradio UI β€”β€”β€”
116
+ with gr.Blocks(title="πŸ€– Advanced Chatbot (Sliding-Window Context)") as demo:
 
117
  gr.Markdown(
118
  """
119
+ # Advanced Chatbot
120
+ This bot preserves your newest input by **sliding** out the oldest history
121
+ when you exceed the model's context windowβ€”no summarization required.
122
+ It also **automatically falls back** through multiple models if one isn’t available.
123
  """
124
  )
125
 
 
132
  label="System Prompt"
133
  )
134
 
135
+ chatbot = gr.Chatbot(label="Conversation")
136
+ user_input = gr.Textbox(placeholder="Type your message here...", label="You")
137
+ send_btn = gr.Button("Send")
138
 
139
  send_btn.click(
140
  fn=chat_handler,
 
144
 
145
  if __name__ == "__main__":
146
  demo.launch()
147
+