Spaces:

LongeneckerPMO
/

openi_test

Sleeping

App Files Files Community

resumesearch commited on Jun 18, 2025

Commit

1e12ce1

verified ·

1 Parent(s): f9d8c5e

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -81

app.py CHANGED Viewed

@@ -1,62 +1,53 @@
 """
-app.py – Advanced Chatbot with Multi-Model Fallback & Long-Input Safety
 (OpenAI Python SDK ≥1.0.0)
 """
 import os
-import asyncio
 import gradio as gr
 import tiktoken
 from openai import OpenAI
-# 1) Init OpenAI client
 client = OpenAI(api_key=os.getenv("OPENAI_API_KEY", "").strip())
-# 2) Read prioritized model list from env (comma-separated)
-#    Default: gpt-4-32k → gpt-4 → gpt-3.5-turbo
 model_list = os.getenv("OPENAI_MODEL_LIST", "gpt-4-32k,gpt-4,gpt-3.5-turbo")
 MODELS = [m.strip() for m in model_list.split(",") if m.strip()]
-# 3) Token-limit & summarization settings
-MAX_CONTEXT     = 32768   # e.g. for gpt-4-32k
-BUFFER_TOKENS   = 500     # reserved for the model’s reply
-SUMMARY_MAX     = 1024    # each chunk’s summary limit
-REPLY_MAX       = 2048    # tokens for the final answer
-TEMPERATURE     = 0.3
 def count_tokens(text: str, model: str) -> int:
     enc = tiktoken.encoding_for_model(model)
     return len(enc.encode(text))
-def chunk_text(text: str, max_toks: int, model: str) -> list[str]:
-    words, chunks, cur = text.split(), [], []
-    for w in words:
-        cur.append(w)
-        if count_tokens(" ".join(cur), model) >= max_toks:
-            last = cur.pop()
-            chunks.append(" ".join(cur))
-            cur = [last]
-    if cur:
-        chunks.append(" ".join(cur))
-    return chunks
-async def summarize_chunk(chunk: str, model: str) -> str:
-    resp = await client.chat.completions.create(
-        model=model,
-        messages=[
-            {"role":"system","content":"You are a concise summarizer."},
-            {"role":"user",  "content":f"Summarize this text briefly, preserving key details:\n\n{chunk}"}
-        ],
-        max_tokens=SUMMARY_MAX,
-        temperature=0.0
-    )
-    return resp.choices[0].message.content.strip()
-def safe_chat(convo: list[dict], max_reply: int):
     """
-    1) Try each model in MODELS in order
-    2) On model_not_found → try next
-    3) On context-length → summarize last user msg & retry that same model once
     """
     last_exc = None
@@ -65,69 +56,54 @@ def safe_chat(convo: list[dict], max_reply: int):
             return client.chat.completions.create(
                 model=model,
                 messages=convo,
-                max_tokens=max_reply,
                 temperature=TEMPERATURE
             )
         except Exception as e:
-            text = str(e).lower()
-            # MODEL NOT FOUND → skip to next
-            if "does not exist" in text or "model_not_found" in text or "404" in text:
                 last_exc = e
                 continue
-            # CONTEXT-LENGTH ERROR → summarize + retry this same model once
-            if "maximum context length" in text or "context length" in text:
-                used = count_tokens("".join(m["content"] for m in convo[:-1]), model)
-                allowed = MAX_CONTEXT - used - BUFFER_TOKENS
-                if allowed < 100:
-                    last_exc = RuntimeError("Input too large even after trimming.")
-                    break
-                # chunk & summarize the last message
-                last_msg = convo[-1]["content"]
-                pieces = chunk_text(last_msg, allowed // 2, model)
-                summaries = asyncio.get_event_loop().run_until_complete(
-                    asyncio.gather(*(summarize_chunk(p, model) for p in pieces))
-                )
-                convo[-1]["content"] = " ".join(summaries)
-                # retry once on this model
                 try:
                     return client.chat.completions.create(
                         model=model,
-                        messages=convo,
-                        max_tokens=max_reply,
                         temperature=TEMPERATURE
                     )
                 except Exception as e2:
                     last_exc = e2
                     continue
-            # any other error → bubble up
             raise
-    # if none worked:
     raise last_exc or RuntimeError("All models failed in safe_chat()")
 def chat_handler(
     user_message: str,
-    history: list[tuple[str,str]],
     system_prompt: str
-) -> tuple[list[tuple[str,str]], str]:
     if not user_message.strip():
         return history, ""
     if not client.api_key:
         return history, "❌ OPENAI_API_KEY not set."
-    # Build convo payload
-    convo = [{"role":"system","content":system_prompt}]
     for u, b in history or []:
-        convo.append({"role":"user",      "content":u})
-        convo.append({"role":"assistant", "content":b})
-    convo.append({"role":"user","content":user_message})
     try:
-        resp = safe_chat(convo, max_reply=REPLY_MAX)
         reply = resp.choices[0].message.content
     except Exception as e:
         reply = f"❌ OpenAI error: {e}"
@@ -137,12 +113,13 @@ def chat_handler(
     return history, ""
 # ——— Gradio UI ———
-with gr.Blocks(title="🤖 Advanced Chatbot") as demo:
-    gr.Markdown(f"**🔗 Models to try (in order):** {', '.join(MODELS)}")
     gr.Markdown(
         """
-        Paste arbitrarily long code or text; the bot will auto-summarize overflow.
-        It will also automatically fall back if a model isn’t available.
         """
     )
@@ -155,9 +132,9 @@ with gr.Blocks(title="🤖 Advanced Chatbot") as demo:
         label="System Prompt"
     )
-    chatbot    = gr.Chatbot(label="Conversation")
-    user_input = gr.Textbox(placeholder="Type your message here…", label="You")
-    send_btn   = gr.Button("Send")
     send_btn.click(
         fn=chat_handler,
@@ -167,3 +144,4 @@ with gr.Blocks(title="🤖 Advanced Chatbot") as demo:
 if __name__ == "__main__":
     demo.launch()

 """
+app.py – Advanced Chatbot with Sliding-Window Context Management
 (OpenAI Python SDK ≥1.0.0)
 """
 import os
 import gradio as gr
 import tiktoken
 from openai import OpenAI
+# ——— Initialize OpenAI client ———
 client = OpenAI(api_key=os.getenv("OPENAI_API_KEY", "").strip())
+# ——— Model fallback list (highest quality first) ———
+# Comma-separated in env, e.g. "gpt-4-32k,gpt-4,gpt-3.5-turbo"
 model_list = os.getenv("OPENAI_MODEL_LIST", "gpt-4-32k,gpt-4,gpt-3.5-turbo")
 MODELS = [m.strip() for m in model_list.split(",") if m.strip()]
+# ——— Configuration ———
+MAX_CONTEXT    = 32768   # model’s max context window (e.g. GPT-4-32k)
+BUFFER_TOKENS  = 500     # reserved tokens for the reply
+REPLY_MAX      = 2048    # max tokens in the answer
+TEMPERATURE    = 0.3     # creativity vs. determinism
 def count_tokens(text: str, model: str) -> int:
+    """Return the number of tokens for given text under specified model."""
     enc = tiktoken.encoding_for_model(model)
     return len(enc.encode(text))
+def trim_conversation(convo: list[dict], model: str) -> list[dict]:
+    """
+    Slide the window: drop oldest user/assistant turns
+    until total tokens + buffer ≤ MAX_CONTEXT.
+    """
+    # compute initial usage
+    tokens = [count_tokens(m["content"], model) for m in convo]
+    total = sum(tokens)
+    # pop oldest turns (after system prompt) while over budget
+    while total + BUFFER_TOKENS > MAX_CONTEXT and len(convo) > 2:
+        convo.pop(1)  # remove oldest user
+        convo.pop(1)  # remove that assistant reply
+        tokens = [count_tokens(m["content"], model) for m in convo]
+        total = sum(tokens)
+    return convo
+def safe_chat(convo: list[dict]):
     """
+    1) Try each model in MODELS in order.
+    2) If model-not-found, skip to next.
+    3) If context-length error, trim history and retry same model once.
     """
     last_exc = None
             return client.chat.completions.create(
                 model=model,
                 messages=convo,
+                max_tokens=REPLY_MAX,
                 temperature=TEMPERATURE
             )
         except Exception as e:
+            msg = str(e).lower()
+            # model not found → fallback
+            if "does not exist" in msg or "model_not_found" in msg or "404" in msg:
                 last_exc = e
                 continue
+            # context-length error → trim & retry
+            if "context length" in msg or "maximum context length" in msg:
+                trimmed = trim_conversation(convo.copy(), model)
                 try:
                     return client.chat.completions.create(
                         model=model,
+                        messages=trimmed,
+                        max_tokens=REPLY_MAX,
                         temperature=TEMPERATURE
                     )
                 except Exception as e2:
                     last_exc = e2
                     continue
+            # other errors → re-raise
             raise
+    # none succeeded
     raise last_exc or RuntimeError("All models failed in safe_chat()")
 def chat_handler(
     user_message: str,
+    history: list[tuple[str, str]],
     system_prompt: str
+) -> tuple[list[tuple[str, str]], str]:
+    """Gradio handler: builds convo, calls safe_chat, updates history."""
     if not user_message.strip():
         return history, ""
     if not client.api_key:
         return history, "❌ OPENAI_API_KEY not set."
+    # Build the conversation payload
+    convo = [{"role": "system", "content": system_prompt}]
     for u, b in history or []:
+        convo.append({"role": "user",      "content": u})
+        convo.append({"role": "assistant", "content": b})
+    convo.append({"role": "user", "content": user_message})
     try:
+        resp = safe_chat(convo)
         reply = resp.choices[0].message.content
     except Exception as e:
         reply = f"❌ OpenAI error: {e}"
     return history, ""
 # ——— Gradio UI ———
+with gr.Blocks(title="🤖 Advanced Chatbot (Sliding-Window Context)") as demo:
     gr.Markdown(
         """
+        # Advanced Chatbot
+        This bot preserves your newest input by **sliding** out the oldest history
+        when you exceed the model's context window—no summarization required.
+        It also **automatically falls back** through multiple models if one isn’t available.
         """
     )
         label="System Prompt"
     )
+    chatbot   = gr.Chatbot(label="Conversation")
+    user_input = gr.Textbox(placeholder="Type your message here...", label="You")
+    send_btn  = gr.Button("Send")
     send_btn.click(
         fn=chat_handler,
 if __name__ == "__main__":
     demo.launch()