Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
"""
|
| 2 |
-
app.py β Advanced Chatbot with
|
| 3 |
(OpenAI Python SDK β₯1.0.0)
|
| 4 |
"""
|
| 5 |
|
|
@@ -9,22 +9,26 @@ import gradio as gr
|
|
| 9 |
import tiktoken
|
| 10 |
from openai import OpenAI
|
| 11 |
|
| 12 |
-
#
|
| 13 |
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY", "").strip())
|
| 14 |
|
| 15 |
-
#
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
REPLY_MAX = 2048
|
| 20 |
-
TEMPERATURE = 0.3
|
| 21 |
-
BUFFER_TOKENS = 500 # leave room for the model's answer
|
| 22 |
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
enc = tiktoken.encoding_for_model(model)
|
| 25 |
return len(enc.encode(text))
|
| 26 |
|
| 27 |
-
def chunk_text(text: str, max_toks: int, model: str
|
| 28 |
words, chunks, cur = text.split(), [], []
|
| 29 |
for w in words:
|
| 30 |
cur.append(w)
|
|
@@ -36,59 +40,74 @@ def chunk_text(text: str, max_toks: int, model: str = MODEL_NAME) -> list[str]:
|
|
| 36 |
chunks.append(" ".join(cur))
|
| 37 |
return chunks
|
| 38 |
|
| 39 |
-
async def summarize_chunk(chunk: str) -> str:
|
| 40 |
resp = await client.chat.completions.create(
|
| 41 |
-
model=
|
| 42 |
messages=[
|
| 43 |
-
{"role":"system",
|
| 44 |
-
{"role":"user",
|
| 45 |
],
|
| 46 |
max_tokens=SUMMARY_MAX,
|
| 47 |
-
temperature=0.0
|
| 48 |
)
|
| 49 |
return resp.choices[0].message.content.strip()
|
| 50 |
|
| 51 |
def safe_chat(convo: list[dict], max_reply: int):
|
| 52 |
"""
|
| 53 |
-
1) Try
|
| 54 |
-
2) On
|
| 55 |
-
3)
|
| 56 |
"""
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
raise
|
| 69 |
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
allowed = MAX_CONTEXT - used - BUFFER_TOKENS
|
| 73 |
-
if allowed < 100:
|
| 74 |
-
raise RuntimeError("Even after trimming, input is too large.")
|
| 75 |
-
|
| 76 |
-
# Chunk & summarize the last message
|
| 77 |
-
last_msg = convo[-1]["content"]
|
| 78 |
-
bits = chunk_text(last_msg, max_toks=allowed // 2, model=MODEL_NAME)
|
| 79 |
-
summaries = asyncio.get_event_loop().run_until_complete(
|
| 80 |
-
asyncio.gather(*(summarize_chunk(b) for b in bits))
|
| 81 |
-
)
|
| 82 |
-
|
| 83 |
-
convo[-1]["content"] = " ".join(summaries)
|
| 84 |
-
|
| 85 |
-
# Retry once
|
| 86 |
-
return client.chat.completions.create(
|
| 87 |
-
model=MODEL_NAME,
|
| 88 |
-
messages=convo,
|
| 89 |
-
max_tokens=max_reply,
|
| 90 |
-
temperature=TEMPERATURE
|
| 91 |
-
)
|
| 92 |
|
| 93 |
def chat_handler(
|
| 94 |
user_message: str,
|
|
@@ -100,12 +119,12 @@ def chat_handler(
|
|
| 100 |
if not client.api_key:
|
| 101 |
return history, "β OPENAI_API_KEY not set."
|
| 102 |
|
| 103 |
-
# Build
|
| 104 |
convo = [{"role":"system","content":system_prompt}]
|
| 105 |
for u, b in history or []:
|
| 106 |
convo.append({"role":"user", "content":u})
|
| 107 |
convo.append({"role":"assistant", "content":b})
|
| 108 |
-
convo.append({"role":"user",
|
| 109 |
|
| 110 |
try:
|
| 111 |
resp = safe_chat(convo, max_reply=REPLY_MAX)
|
|
@@ -118,12 +137,12 @@ def chat_handler(
|
|
| 118 |
return history, ""
|
| 119 |
|
| 120 |
# βββ Gradio UI βββ
|
| 121 |
-
with gr.Blocks(title="π€ Advanced Chatbot
|
|
|
|
| 122 |
gr.Markdown(
|
| 123 |
"""
|
| 124 |
-
# Advanced Chatbot
|
| 125 |
Paste arbitrarily long code or text; the bot will auto-summarize overflow.
|
| 126 |
-
|
| 127 |
"""
|
| 128 |
)
|
| 129 |
|
|
|
|
| 1 |
"""
|
| 2 |
+
app.py β Advanced Chatbot with Multi-Model Fallback & Long-Input Safety
|
| 3 |
(OpenAI Python SDK β₯1.0.0)
|
| 4 |
"""
|
| 5 |
|
|
|
|
| 9 |
import tiktoken
|
| 10 |
from openai import OpenAI
|
| 11 |
|
| 12 |
+
# 1) Init OpenAI client
|
| 13 |
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY", "").strip())
|
| 14 |
|
| 15 |
+
# 2) Read prioritized model list from env (comma-separated)
|
| 16 |
+
# Default: gpt-4-32k β gpt-4 β gpt-3.5-turbo
|
| 17 |
+
model_list = os.getenv("OPENAI_MODEL_LIST", "gpt-4-32k,gpt-4,gpt-3.5-turbo")
|
| 18 |
+
MODELS = [m.strip() for m in model_list.split(",") if m.strip()]
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
+
# 3) Token-limit & summarization settings
|
| 21 |
+
MAX_CONTEXT = 32768 # e.g. for gpt-4-32k
|
| 22 |
+
BUFFER_TOKENS = 500 # reserved for the modelβs reply
|
| 23 |
+
SUMMARY_MAX = 1024 # each chunkβs summary limit
|
| 24 |
+
REPLY_MAX = 2048 # tokens for the final answer
|
| 25 |
+
TEMPERATURE = 0.3
|
| 26 |
+
|
| 27 |
+
def count_tokens(text: str, model: str) -> int:
|
| 28 |
enc = tiktoken.encoding_for_model(model)
|
| 29 |
return len(enc.encode(text))
|
| 30 |
|
| 31 |
+
def chunk_text(text: str, max_toks: int, model: str) -> list[str]:
|
| 32 |
words, chunks, cur = text.split(), [], []
|
| 33 |
for w in words:
|
| 34 |
cur.append(w)
|
|
|
|
| 40 |
chunks.append(" ".join(cur))
|
| 41 |
return chunks
|
| 42 |
|
| 43 |
+
async def summarize_chunk(chunk: str, model: str) -> str:
|
| 44 |
resp = await client.chat.completions.create(
|
| 45 |
+
model=model,
|
| 46 |
messages=[
|
| 47 |
+
{"role":"system","content":"You are a concise summarizer."},
|
| 48 |
+
{"role":"user", "content":f"Summarize this text briefly, preserving key details:\n\n{chunk}"}
|
| 49 |
],
|
| 50 |
max_tokens=SUMMARY_MAX,
|
| 51 |
+
temperature=0.0
|
| 52 |
)
|
| 53 |
return resp.choices[0].message.content.strip()
|
| 54 |
|
| 55 |
def safe_chat(convo: list[dict], max_reply: int):
|
| 56 |
"""
|
| 57 |
+
1) Try each model in MODELS in order
|
| 58 |
+
2) On model_not_found β try next
|
| 59 |
+
3) On context-length β summarize last user msg & retry that same model once
|
| 60 |
"""
|
| 61 |
+
last_exc = None
|
| 62 |
+
|
| 63 |
+
for model in MODELS:
|
| 64 |
+
try:
|
| 65 |
+
return client.chat.completions.create(
|
| 66 |
+
model=model,
|
| 67 |
+
messages=convo,
|
| 68 |
+
max_tokens=max_reply,
|
| 69 |
+
temperature=TEMPERATURE
|
| 70 |
+
)
|
| 71 |
+
except Exception as e:
|
| 72 |
+
text = str(e).lower()
|
| 73 |
+
# MODEL NOT FOUND β skip to next
|
| 74 |
+
if "does not exist" in text or "model_not_found" in text or "404" in text:
|
| 75 |
+
last_exc = e
|
| 76 |
+
continue
|
| 77 |
+
|
| 78 |
+
# CONTEXT-LENGTH ERROR β summarize + retry this same model once
|
| 79 |
+
if "maximum context length" in text or "context length" in text:
|
| 80 |
+
used = count_tokens("".join(m["content"] for m in convo[:-1]), model)
|
| 81 |
+
allowed = MAX_CONTEXT - used - BUFFER_TOKENS
|
| 82 |
+
if allowed < 100:
|
| 83 |
+
last_exc = RuntimeError("Input too large even after trimming.")
|
| 84 |
+
break
|
| 85 |
+
|
| 86 |
+
# chunk & summarize the last message
|
| 87 |
+
last_msg = convo[-1]["content"]
|
| 88 |
+
pieces = chunk_text(last_msg, allowed // 2, model)
|
| 89 |
+
summaries = asyncio.get_event_loop().run_until_complete(
|
| 90 |
+
asyncio.gather(*(summarize_chunk(p, model) for p in pieces))
|
| 91 |
+
)
|
| 92 |
+
convo[-1]["content"] = " ".join(summaries)
|
| 93 |
+
|
| 94 |
+
# retry once on this model
|
| 95 |
+
try:
|
| 96 |
+
return client.chat.completions.create(
|
| 97 |
+
model=model,
|
| 98 |
+
messages=convo,
|
| 99 |
+
max_tokens=max_reply,
|
| 100 |
+
temperature=TEMPERATURE
|
| 101 |
+
)
|
| 102 |
+
except Exception as e2:
|
| 103 |
+
last_exc = e2
|
| 104 |
+
continue
|
| 105 |
+
|
| 106 |
+
# any other error β bubble up
|
| 107 |
raise
|
| 108 |
|
| 109 |
+
# if none worked:
|
| 110 |
+
raise last_exc or RuntimeError("All models failed in safe_chat()")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
|
| 112 |
def chat_handler(
|
| 113 |
user_message: str,
|
|
|
|
| 119 |
if not client.api_key:
|
| 120 |
return history, "β OPENAI_API_KEY not set."
|
| 121 |
|
| 122 |
+
# Build convo payload
|
| 123 |
convo = [{"role":"system","content":system_prompt}]
|
| 124 |
for u, b in history or []:
|
| 125 |
convo.append({"role":"user", "content":u})
|
| 126 |
convo.append({"role":"assistant", "content":b})
|
| 127 |
+
convo.append({"role":"user","content":user_message})
|
| 128 |
|
| 129 |
try:
|
| 130 |
resp = safe_chat(convo, max_reply=REPLY_MAX)
|
|
|
|
| 137 |
return history, ""
|
| 138 |
|
| 139 |
# βββ Gradio UI βββ
|
| 140 |
+
with gr.Blocks(title="π€ Advanced Chatbot") as demo:
|
| 141 |
+
gr.Markdown(f"**π Models to try (in order):** {', '.join(MODELS)}")
|
| 142 |
gr.Markdown(
|
| 143 |
"""
|
|
|
|
| 144 |
Paste arbitrarily long code or text; the bot will auto-summarize overflow.
|
| 145 |
+
It will also automatically fall back if a model isnβt available.
|
| 146 |
"""
|
| 147 |
)
|
| 148 |
|