Spaces:

LongeneckerPMO
/

openi_test

Sleeping

App Files Files Community

resumesearch commited on Jun 18, 2025

Commit

f9d8c5e

verified ·

1 Parent(s): 6d260b0

Update app.py

Browse files

Files changed (1) hide show

app.py +76 -57

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
-app.py – Advanced Chatbot with Automatic Long-Input Handling
 (OpenAI Python SDK ≥1.0.0)
 """
@@ -9,22 +9,26 @@ import gradio as gr
 import tiktoken
 from openai import OpenAI
-# Instantiate the new client
 client = OpenAI(api_key=os.getenv("OPENAI_API_KEY", "").strip())
-# Configuration
-MODEL_NAME         = "gpt-4-32k"
-MAX_CONTEXT        = 32768
-SUMMARY_MAX        = 1024
-REPLY_MAX          = 2048
-TEMPERATURE        = 0.3
-BUFFER_TOKENS      = 500  # leave room for the model's answer
-def count_tokens(text: str, model: str = MODEL_NAME) -> int:
     enc = tiktoken.encoding_for_model(model)
     return len(enc.encode(text))
-def chunk_text(text: str, max_toks: int, model: str = MODEL_NAME) -> list[str]:
     words, chunks, cur = text.split(), [], []
     for w in words:
         cur.append(w)
@@ -36,59 +40,74 @@ def chunk_text(text: str, max_toks: int, model: str = MODEL_NAME) -> list[str]:
         chunks.append(" ".join(cur))
     return chunks
-async def summarize_chunk(chunk: str) -> str:
     resp = await client.chat.completions.create(
-        model=MODEL_NAME,
         messages=[
-            {"role":"system", "content":"You are a concise summarizer."},
-            {"role":"user",   "content":f"Summarize this text briefly, preserving key details:\n\n{chunk}"}
         ],
         max_tokens=SUMMARY_MAX,
-        temperature=0.0,
     )
     return resp.choices[0].message.content.strip()
 def safe_chat(convo: list[dict], max_reply: int):
     """
-    1) Try a normal chat
-    2) On context-length error, summarize only the last user message
-    3) Retry once with the summaries in place
     """
-    try:
-        return client.chat.completions.create(
-            model=MODEL_NAME,
-            messages=convo,
-            max_tokens=max_reply,
-            temperature=TEMPERATURE
-        )
-    except Exception as e:
-        text = str(e).lower()
-        if "context length" not in text and "maximum context length" not in text:
-            # Not a token-limit issue
             raise
-        # How many tokens have we used so far?
-        used = count_tokens("".join(m["content"] for m in convo[:-1]), MODEL_NAME)
-        allowed = MAX_CONTEXT - used - BUFFER_TOKENS
-        if allowed < 100:
-            raise RuntimeError("Even after trimming, input is too large.")
-        # Chunk & summarize the last message
-        last_msg = convo[-1]["content"]
-        bits = chunk_text(last_msg, max_toks=allowed // 2, model=MODEL_NAME)
-        summaries = asyncio.get_event_loop().run_until_complete(
-            asyncio.gather(*(summarize_chunk(b) for b in bits))
-        )
-        convo[-1]["content"] = " ".join(summaries)
-        # Retry once
-        return client.chat.completions.create(
-            model=MODEL_NAME,
-            messages=convo,
-            max_tokens=max_reply,
-            temperature=TEMPERATURE
-        )
 def chat_handler(
     user_message: str,
@@ -100,12 +119,12 @@ def chat_handler(
     if not client.api_key:
         return history, "❌ OPENAI_API_KEY not set."
-    # Build the full conversation
     convo = [{"role":"system","content":system_prompt}]
     for u, b in history or []:
         convo.append({"role":"user",      "content":u})
         convo.append({"role":"assistant", "content":b})
-    convo.append({"role":"user", "content":user_message})
     try:
         resp = safe_chat(convo, max_reply=REPLY_MAX)
@@ -118,12 +137,12 @@ def chat_handler(
     return history, ""
 # ——— Gradio UI ———
-with gr.Blocks(title="🤖 Advanced Chatbot (Long-Input Safe)") as demo:
     gr.Markdown(
         """
-        # Advanced Chatbot
         Paste arbitrarily long code or text; the bot will auto-summarize overflow.
-        Expert in Python & C# with production-grade answers.
         """
     )

 """
+app.py – Advanced Chatbot with Multi-Model Fallback & Long-Input Safety
 (OpenAI Python SDK ≥1.0.0)
 """
 import tiktoken
 from openai import OpenAI
+# 1) Init OpenAI client
 client = OpenAI(api_key=os.getenv("OPENAI_API_KEY", "").strip())
+# 2) Read prioritized model list from env (comma-separated)
+#    Default: gpt-4-32k → gpt-4 → gpt-3.5-turbo
+model_list = os.getenv("OPENAI_MODEL_LIST", "gpt-4-32k,gpt-4,gpt-3.5-turbo")
+MODELS = [m.strip() for m in model_list.split(",") if m.strip()]
+# 3) Token-limit & summarization settings
+MAX_CONTEXT     = 32768   # e.g. for gpt-4-32k
+BUFFER_TOKENS   = 500     # reserved for the model’s reply
+SUMMARY_MAX     = 1024    # each chunk’s summary limit
+REPLY_MAX       = 2048    # tokens for the final answer
+TEMPERATURE     = 0.3
+def count_tokens(text: str, model: str) -> int:
     enc = tiktoken.encoding_for_model(model)
     return len(enc.encode(text))
+def chunk_text(text: str, max_toks: int, model: str) -> list[str]:
     words, chunks, cur = text.split(), [], []
     for w in words:
         cur.append(w)
         chunks.append(" ".join(cur))
     return chunks
+async def summarize_chunk(chunk: str, model: str) -> str:
     resp = await client.chat.completions.create(
+        model=model,
         messages=[
+            {"role":"system","content":"You are a concise summarizer."},
+            {"role":"user",  "content":f"Summarize this text briefly, preserving key details:\n\n{chunk}"}
         ],
         max_tokens=SUMMARY_MAX,
+        temperature=0.0
     )
     return resp.choices[0].message.content.strip()
 def safe_chat(convo: list[dict], max_reply: int):
     """
+    1) Try each model in MODELS in order
+    2) On model_not_found → try next
+    3) On context-length → summarize last user msg & retry that same model once
     """
+    last_exc = None
+    for model in MODELS:
+        try:
+            return client.chat.completions.create(
+                model=model,
+                messages=convo,
+                max_tokens=max_reply,
+                temperature=TEMPERATURE
+            )
+        except Exception as e:
+            text = str(e).lower()
+            # MODEL NOT FOUND → skip to next
+            if "does not exist" in text or "model_not_found" in text or "404" in text:
+                last_exc = e
+                continue
+            # CONTEXT-LENGTH ERROR → summarize + retry this same model once
+            if "maximum context length" in text or "context length" in text:
+                used = count_tokens("".join(m["content"] for m in convo[:-1]), model)
+                allowed = MAX_CONTEXT - used - BUFFER_TOKENS
+                if allowed < 100:
+                    last_exc = RuntimeError("Input too large even after trimming.")
+                    break
+                # chunk & summarize the last message
+                last_msg = convo[-1]["content"]
+                pieces = chunk_text(last_msg, allowed // 2, model)
+                summaries = asyncio.get_event_loop().run_until_complete(
+                    asyncio.gather(*(summarize_chunk(p, model) for p in pieces))
+                )
+                convo[-1]["content"] = " ".join(summaries)
+                # retry once on this model
+                try:
+                    return client.chat.completions.create(
+                        model=model,
+                        messages=convo,
+                        max_tokens=max_reply,
+                        temperature=TEMPERATURE
+                    )
+                except Exception as e2:
+                    last_exc = e2
+                    continue
+            # any other error → bubble up
             raise
+    # if none worked:
+    raise last_exc or RuntimeError("All models failed in safe_chat()")
 def chat_handler(
     user_message: str,
     if not client.api_key:
         return history, "❌ OPENAI_API_KEY not set."
+    # Build convo payload
     convo = [{"role":"system","content":system_prompt}]
     for u, b in history or []:
         convo.append({"role":"user",      "content":u})
         convo.append({"role":"assistant", "content":b})
+    convo.append({"role":"user","content":user_message})
     try:
         resp = safe_chat(convo, max_reply=REPLY_MAX)
     return history, ""
 # ——— Gradio UI ———
+with gr.Blocks(title="🤖 Advanced Chatbot") as demo:
+    gr.Markdown(f"**🔗 Models to try (in order):** {', '.join(MODELS)}")
     gr.Markdown(
         """
         Paste arbitrarily long code or text; the bot will auto-summarize overflow.
+        It will also automatically fall back if a model isn’t available.
         """
     )