Spaces:

LongeneckerPMO
/

openi_test

Sleeping

App Files Files Community

resumesearch commited on Jun 18, 2025

Commit

e4b7b93

verified ·

1 Parent(s): 1e12ce1

Update app.py

Browse files

Files changed (1) hide show

app.py +75 -49

app.py CHANGED Viewed

@@ -1,9 +1,10 @@
 """
-app.py – Advanced Chatbot with Sliding-Window Context Management
 (OpenAI Python SDK ≥1.0.0)
 """
 import os
 import gradio as gr
 import tiktoken
 from openai import OpenAI
@@ -11,83 +12,80 @@ from openai import OpenAI
 # ——— Initialize OpenAI client ———
 client = OpenAI(api_key=os.getenv("OPENAI_API_KEY", "").strip())
-# ——— Model fallback list (highest quality first) ———
-# Comma-separated in env, e.g. "gpt-4-32k,gpt-4,gpt-3.5-turbo"
-model_list = os.getenv("OPENAI_MODEL_LIST", "gpt-4-32k,gpt-4,gpt-3.5-turbo")
-MODELS = [m.strip() for m in model_list.split(",") if m.strip()]
-# ——— Configuration ———
-MAX_CONTEXT    = 32768   # model’s max context window (e.g. GPT-4-32k)
-BUFFER_TOKENS  = 500     # reserved tokens for the reply
-REPLY_MAX      = 2048    # max tokens in the answer
-TEMPERATURE    = 0.3     # creativity vs. determinism
 def count_tokens(text: str, model: str) -> int:
-    """Return the number of tokens for given text under specified model."""
     enc = tiktoken.encoding_for_model(model)
     return len(enc.encode(text))
-def trim_conversation(convo: list[dict], model: str) -> list[dict]:
     """
-    Slide the window: drop oldest user/assistant turns
-    until total tokens + buffer ≤ MAX_CONTEXT.
     """
-    # compute initial usage
     tokens = [count_tokens(m["content"], model) for m in convo]
-    total = sum(tokens)
-    # pop oldest turns (after system prompt) while over budget
-    while total + BUFFER_TOKENS > MAX_CONTEXT and len(convo) > 2:
-        convo.pop(1)  # remove oldest user
-        convo.pop(1)  # remove that assistant reply
         tokens = [count_tokens(m["content"], model) for m in convo]
-        total = sum(tokens)
     return convo
-def safe_chat(convo: list[dict]):
     """
-    1) Try each model in MODELS in order.
-    2) If model-not-found, skip to next.
-    3) If context-length error, trim history and retry same model once.
     """
     last_exc = None
-    for model in MODELS:
         try:
             return client.chat.completions.create(
                 model=model,
                 messages=convo,
-                max_tokens=REPLY_MAX,
                 temperature=TEMPERATURE
             )
         except Exception as e:
             msg = str(e).lower()
-            # model not found → fallback
             if "does not exist" in msg or "model_not_found" in msg or "404" in msg:
                 last_exc = e
                 continue
-            # context-length error → trim & retry
             if "context length" in msg or "maximum context length" in msg:
-                trimmed = trim_conversation(convo.copy(), model)
                 try:
                     return client.chat.completions.create(
                         model=model,
                         messages=trimmed,
-                        max_tokens=REPLY_MAX,
                         temperature=TEMPERATURE
                     )
                 except Exception as e2:
                     last_exc = e2
                     continue
-            # other errors → re-raise
             raise
-    # none succeeded
     raise last_exc or RuntimeError("All models failed in safe_chat()")
 def chat_handler(
     user_message: str,
     history: list[tuple[str, str]],
-    system_prompt: str
 ) -> tuple[list[tuple[str, str]], str]:
     """Gradio handler: builds convo, calls safe_chat, updates history."""
     if not user_message.strip():
@@ -96,14 +94,18 @@ def chat_handler(
         return history, "❌ OPENAI_API_KEY not set."
     # Build the conversation payload
-    convo = [{"role": "system", "content": system_prompt}]
     for u, b in history or []:
-        convo.append({"role": "user",      "content": u})
-        convo.append({"role": "assistant", "content": b})
-    convo.append({"role": "user", "content": user_message})
     try:
-        resp = safe_chat(convo)
         reply = resp.choices[0].message.content
     except Exception as e:
         reply = f"❌ OpenAI error: {e}"
@@ -113,16 +115,34 @@ def chat_handler(
     return history, ""
 # ——— Gradio UI ———
-with gr.Blocks(title="🤖 Advanced Chatbot (Sliding-Window Context)") as demo:
     gr.Markdown(
         """
         # Advanced Chatbot
-        This bot preserves your newest input by **sliding** out the oldest history
-        when you exceed the model's context window—no summarization required.
-        It also **automatically falls back** through multiple models if one isn’t available.
         """
     )
     system_txt = gr.Textbox(
         lines=3,
         value=(
@@ -132,16 +152,22 @@ with gr.Blocks(title="🤖 Advanced Chatbot (Sliding-Window Context)") as demo:
         label="System Prompt"
     )
-    chatbot   = gr.Chatbot(label="Conversation")
     user_input = gr.Textbox(placeholder="Type your message here...", label="You")
-    send_btn  = gr.Button("Send")
     send_btn.click(
         fn=chat_handler,
-        inputs=[user_input, chatbot, system_txt],
         outputs=[chatbot, user_input]
     )
 if __name__ == "__main__":
     demo.launch()

 """
+app.py – Advanced Chatbot with Model Selector, Context & Reply Token Sliders
 (OpenAI Python SDK ≥1.0.0)
 """
 import os
+import asyncio
 import gradio as gr
 import tiktoken
 from openai import OpenAI
 # ——— Initialize OpenAI client ———
 client = OpenAI(api_key=os.getenv("OPENAI_API_KEY", "").strip())
+# ——— Environment-configured model list (fallback order) ———
+env_models = os.getenv("OPENAI_MODEL_LIST", "gpt-4-32k,gpt-4,gpt-3.5-turbo")
+ALL_MODELS = [m.strip() for m in env_models.split(",") if m.strip()]
+# ——— Default limits ———
+DEFAULT_MAX_CONTEXT = 32768   # tokens
+DEFAULT_REPLY_MAX   = 2048    # tokens
+BUFFER_TOKENS       = 500     # reserved for the model’s answer
+TEMPERATURE         = 0.3
 def count_tokens(text: str, model: str) -> int:
     enc = tiktoken.encoding_for_model(model)
     return len(enc.encode(text))
+def trim_conversation(convo: list[dict], model: str, max_context: int) -> list[dict]:
     """
+    Slide-window: drop the oldest user/assistant turns until
+    total tokens + BUFFER_TOKENS ≤ max_context.
     """
     tokens = [count_tokens(m["content"], model) for m in convo]
+    total  = sum(tokens)
+    while total + BUFFER_TOKENS > max_context and len(convo) > 2:
+        convo.pop(1)  # oldest user
+        convo.pop(1)  # its assistant reply
         tokens = [count_tokens(m["content"], model) for m in convo]
+        total  = sum(tokens)
     return convo
+def safe_chat(convo: list[dict], max_context: int, max_reply: int, model_list: list[str]):
     """
+    Try each model in model_list:
+      - on model-not-found: skip to next
+      - on context-length: trim history & retry same model once
     """
     last_exc = None
+    for model in model_list:
         try:
             return client.chat.completions.create(
                 model=model,
                 messages=convo,
+                max_tokens=max_reply,
                 temperature=TEMPERATURE
             )
         except Exception as e:
             msg = str(e).lower()
+            # Model unavailable → fallback
             if "does not exist" in msg or "model_not_found" in msg or "404" in msg:
                 last_exc = e
                 continue
+            # Context-length error → slide window & retry
             if "context length" in msg or "maximum context length" in msg:
+                trimmed = trim_conversation(convo.copy(), model, max_context)
                 try:
                     return client.chat.completions.create(
                         model=model,
                         messages=trimmed,
+                        max_tokens=max_reply,
                         temperature=TEMPERATURE
                     )
                 except Exception as e2:
                     last_exc = e2
                     continue
+            # Other errors → bubble up
             raise
+    # All models failed
     raise last_exc or RuntimeError("All models failed in safe_chat()")
 def chat_handler(
     user_message: str,
     history: list[tuple[str, str]],
+    system_prompt: str,
+    selected_model: str,
+    max_context: int,
+    max_reply: int
 ) -> tuple[list[tuple[str, str]], str]:
     """Gradio handler: builds convo, calls safe_chat, updates history."""
     if not user_message.strip():
         return history, "❌ OPENAI_API_KEY not set."
     # Build the conversation payload
+    convo = [{"role":"system","content":system_prompt}]
     for u, b in history or []:
+        convo.append({"role":"user",      "content":u})
+        convo.append({"role":"assistant", "content":b})
+    convo.append({"role":"user","content":user_message})
+    # Prepare model list: user choice first, then the rest
+    fallback = [m for m in ALL_MODELS if m != selected_model]
+    models_to_try = [selected_model] + fallback
     try:
+        resp = safe_chat(convo, max_context, max_reply, models_to_try)
         reply = resp.choices[0].message.content
     except Exception as e:
         reply = f"❌ OpenAI error: {e}"
     return history, ""
 # ——— Gradio UI ———
+with gr.Blocks(title="🤖 Advanced Chatbot") as demo:
     gr.Markdown(
         """
         # Advanced Chatbot
+        - **Model selector**: pick any supported OpenAI model
+        - **Context slider**: adjust how many tokens of history to keep
+        - **Reply slider**: adjust maximum tokens in the answer
+        - **Sliding-window**: oldest history drops automatically when over limit
         """
     )
+    with gr.Row():
+        model_dropdown = gr.Dropdown(
+            choices=ALL_MODELS,
+            value=ALL_MODELS[0],
+            label="Choose Model"
+        )
+        context_slider = gr.Slider(
+            minimum=1000, maximum=DEFAULT_MAX_CONTEXT,
+            step=256, value=DEFAULT_MAX_CONTEXT,
+            label="Max Context Tokens"
+        )
+        reply_slider = gr.Slider(
+            minimum=100, maximum=8192,
+            step=100, value=DEFAULT_REPLY_MAX,
+            label="Max Reply Tokens"
+        )
     system_txt = gr.Textbox(
         lines=3,
         value=(
         label="System Prompt"
     )
+    chatbot    = gr.Chatbot(label="Conversation")
     user_input = gr.Textbox(placeholder="Type your message here...", label="You")
+    send_btn   = gr.Button("Send")
     send_btn.click(
         fn=chat_handler,
+        inputs=[
+            user_input,
+            chatbot,
+            system_txt,
+            model_dropdown,
+            context_slider,
+            reply_slider
+        ],
         outputs=[chatbot, user_input]
     )
 if __name__ == "__main__":
     demo.launch()