resumesearch commited on
Commit
f9d8c5e
Β·
verified Β·
1 Parent(s): 6d260b0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -57
app.py CHANGED
@@ -1,5 +1,5 @@
1
  """
2
- app.py – Advanced Chatbot with Automatic Long-Input Handling
3
  (OpenAI Python SDK β‰₯1.0.0)
4
  """
5
 
@@ -9,22 +9,26 @@ import gradio as gr
9
  import tiktoken
10
  from openai import OpenAI
11
 
12
- # Instantiate the new client
13
  client = OpenAI(api_key=os.getenv("OPENAI_API_KEY", "").strip())
14
 
15
- # Configuration
16
- MODEL_NAME = "gpt-4-32k"
17
- MAX_CONTEXT = 32768
18
- SUMMARY_MAX = 1024
19
- REPLY_MAX = 2048
20
- TEMPERATURE = 0.3
21
- BUFFER_TOKENS = 500 # leave room for the model's answer
22
 
23
- def count_tokens(text: str, model: str = MODEL_NAME) -> int:
 
 
 
 
 
 
 
24
  enc = tiktoken.encoding_for_model(model)
25
  return len(enc.encode(text))
26
 
27
- def chunk_text(text: str, max_toks: int, model: str = MODEL_NAME) -> list[str]:
28
  words, chunks, cur = text.split(), [], []
29
  for w in words:
30
  cur.append(w)
@@ -36,59 +40,74 @@ def chunk_text(text: str, max_toks: int, model: str = MODEL_NAME) -> list[str]:
36
  chunks.append(" ".join(cur))
37
  return chunks
38
 
39
- async def summarize_chunk(chunk: str) -> str:
40
  resp = await client.chat.completions.create(
41
- model=MODEL_NAME,
42
  messages=[
43
- {"role":"system", "content":"You are a concise summarizer."},
44
- {"role":"user", "content":f"Summarize this text briefly, preserving key details:\n\n{chunk}"}
45
  ],
46
  max_tokens=SUMMARY_MAX,
47
- temperature=0.0,
48
  )
49
  return resp.choices[0].message.content.strip()
50
 
51
  def safe_chat(convo: list[dict], max_reply: int):
52
  """
53
- 1) Try a normal chat
54
- 2) On context-length error, summarize only the last user message
55
- 3) Retry once with the summaries in place
56
  """
57
- try:
58
- return client.chat.completions.create(
59
- model=MODEL_NAME,
60
- messages=convo,
61
- max_tokens=max_reply,
62
- temperature=TEMPERATURE
63
- )
64
- except Exception as e:
65
- text = str(e).lower()
66
- if "context length" not in text and "maximum context length" not in text:
67
- # Not a token-limit issue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  raise
69
 
70
- # How many tokens have we used so far?
71
- used = count_tokens("".join(m["content"] for m in convo[:-1]), MODEL_NAME)
72
- allowed = MAX_CONTEXT - used - BUFFER_TOKENS
73
- if allowed < 100:
74
- raise RuntimeError("Even after trimming, input is too large.")
75
-
76
- # Chunk & summarize the last message
77
- last_msg = convo[-1]["content"]
78
- bits = chunk_text(last_msg, max_toks=allowed // 2, model=MODEL_NAME)
79
- summaries = asyncio.get_event_loop().run_until_complete(
80
- asyncio.gather(*(summarize_chunk(b) for b in bits))
81
- )
82
-
83
- convo[-1]["content"] = " ".join(summaries)
84
-
85
- # Retry once
86
- return client.chat.completions.create(
87
- model=MODEL_NAME,
88
- messages=convo,
89
- max_tokens=max_reply,
90
- temperature=TEMPERATURE
91
- )
92
 
93
  def chat_handler(
94
  user_message: str,
@@ -100,12 +119,12 @@ def chat_handler(
100
  if not client.api_key:
101
  return history, "❌ OPENAI_API_KEY not set."
102
 
103
- # Build the full conversation
104
  convo = [{"role":"system","content":system_prompt}]
105
  for u, b in history or []:
106
  convo.append({"role":"user", "content":u})
107
  convo.append({"role":"assistant", "content":b})
108
- convo.append({"role":"user", "content":user_message})
109
 
110
  try:
111
  resp = safe_chat(convo, max_reply=REPLY_MAX)
@@ -118,12 +137,12 @@ def chat_handler(
118
  return history, ""
119
 
120
  # β€”β€”β€” Gradio UI β€”β€”β€”
121
- with gr.Blocks(title="πŸ€– Advanced Chatbot (Long-Input Safe)") as demo:
 
122
  gr.Markdown(
123
  """
124
- # Advanced Chatbot
125
  Paste arbitrarily long code or text; the bot will auto-summarize overflow.
126
- Expert in Python & C# with production-grade answers.
127
  """
128
  )
129
 
 
1
  """
2
+ app.py – Advanced Chatbot with Multi-Model Fallback & Long-Input Safety
3
  (OpenAI Python SDK β‰₯1.0.0)
4
  """
5
 
 
9
  import tiktoken
10
  from openai import OpenAI
11
 
12
+ # 1) Init OpenAI client
13
  client = OpenAI(api_key=os.getenv("OPENAI_API_KEY", "").strip())
14
 
15
+ # 2) Read prioritized model list from env (comma-separated)
16
+ # Default: gpt-4-32k β†’ gpt-4 β†’ gpt-3.5-turbo
17
+ model_list = os.getenv("OPENAI_MODEL_LIST", "gpt-4-32k,gpt-4,gpt-3.5-turbo")
18
+ MODELS = [m.strip() for m in model_list.split(",") if m.strip()]
 
 
 
19
 
20
+ # 3) Token-limit & summarization settings
21
+ MAX_CONTEXT = 32768 # e.g. for gpt-4-32k
22
+ BUFFER_TOKENS = 500 # reserved for the model’s reply
23
+ SUMMARY_MAX = 1024 # each chunk’s summary limit
24
+ REPLY_MAX = 2048 # tokens for the final answer
25
+ TEMPERATURE = 0.3
26
+
27
+ def count_tokens(text: str, model: str) -> int:
28
  enc = tiktoken.encoding_for_model(model)
29
  return len(enc.encode(text))
30
 
31
+ def chunk_text(text: str, max_toks: int, model: str) -> list[str]:
32
  words, chunks, cur = text.split(), [], []
33
  for w in words:
34
  cur.append(w)
 
40
  chunks.append(" ".join(cur))
41
  return chunks
42
 
43
+ async def summarize_chunk(chunk: str, model: str) -> str:
44
  resp = await client.chat.completions.create(
45
+ model=model,
46
  messages=[
47
+ {"role":"system","content":"You are a concise summarizer."},
48
+ {"role":"user", "content":f"Summarize this text briefly, preserving key details:\n\n{chunk}"}
49
  ],
50
  max_tokens=SUMMARY_MAX,
51
+ temperature=0.0
52
  )
53
  return resp.choices[0].message.content.strip()
54
 
55
  def safe_chat(convo: list[dict], max_reply: int):
56
  """
57
+ 1) Try each model in MODELS in order
58
+ 2) On model_not_found β†’ try next
59
+ 3) On context-length β†’ summarize last user msg & retry that same model once
60
  """
61
+ last_exc = None
62
+
63
+ for model in MODELS:
64
+ try:
65
+ return client.chat.completions.create(
66
+ model=model,
67
+ messages=convo,
68
+ max_tokens=max_reply,
69
+ temperature=TEMPERATURE
70
+ )
71
+ except Exception as e:
72
+ text = str(e).lower()
73
+ # MODEL NOT FOUND β†’ skip to next
74
+ if "does not exist" in text or "model_not_found" in text or "404" in text:
75
+ last_exc = e
76
+ continue
77
+
78
+ # CONTEXT-LENGTH ERROR β†’ summarize + retry this same model once
79
+ if "maximum context length" in text or "context length" in text:
80
+ used = count_tokens("".join(m["content"] for m in convo[:-1]), model)
81
+ allowed = MAX_CONTEXT - used - BUFFER_TOKENS
82
+ if allowed < 100:
83
+ last_exc = RuntimeError("Input too large even after trimming.")
84
+ break
85
+
86
+ # chunk & summarize the last message
87
+ last_msg = convo[-1]["content"]
88
+ pieces = chunk_text(last_msg, allowed // 2, model)
89
+ summaries = asyncio.get_event_loop().run_until_complete(
90
+ asyncio.gather(*(summarize_chunk(p, model) for p in pieces))
91
+ )
92
+ convo[-1]["content"] = " ".join(summaries)
93
+
94
+ # retry once on this model
95
+ try:
96
+ return client.chat.completions.create(
97
+ model=model,
98
+ messages=convo,
99
+ max_tokens=max_reply,
100
+ temperature=TEMPERATURE
101
+ )
102
+ except Exception as e2:
103
+ last_exc = e2
104
+ continue
105
+
106
+ # any other error β†’ bubble up
107
  raise
108
 
109
+ # if none worked:
110
+ raise last_exc or RuntimeError("All models failed in safe_chat()")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
  def chat_handler(
113
  user_message: str,
 
119
  if not client.api_key:
120
  return history, "❌ OPENAI_API_KEY not set."
121
 
122
+ # Build convo payload
123
  convo = [{"role":"system","content":system_prompt}]
124
  for u, b in history or []:
125
  convo.append({"role":"user", "content":u})
126
  convo.append({"role":"assistant", "content":b})
127
+ convo.append({"role":"user","content":user_message})
128
 
129
  try:
130
  resp = safe_chat(convo, max_reply=REPLY_MAX)
 
137
  return history, ""
138
 
139
  # β€”β€”β€” Gradio UI β€”β€”β€”
140
+ with gr.Blocks(title="πŸ€– Advanced Chatbot") as demo:
141
+ gr.Markdown(f"**πŸ”— Models to try (in order):** {', '.join(MODELS)}")
142
  gr.Markdown(
143
  """
 
144
  Paste arbitrarily long code or text; the bot will auto-summarize overflow.
145
+ It will also automatically fall back if a model isn’t available.
146
  """
147
  )
148