Spaces:

LongeneckerPMO
/

openi_test

Sleeping

App Files Files Community

resumesearch commited on Jun 19, 2025

Commit

260eb5d

verified ·

1 Parent(s): 487cc1b

Update app.py

Browse files

Files changed (1) hide show

app.py +101 -188

app.py CHANGED Viewed

@@ -3,287 +3,200 @@ import functools
 import tiktoken
 import gradio as gr
 from openai import OpenAI
-from datetime import datetime
 """
 CodeBot – Streaming Coding Assistant (Polished UX)
 -------------------------------------------------
-• OpenAI Python SDK ≥ 1.0.0  • Gradio ≥ 5.34.1  • tiktoken
-This refactor keeps every original feature **without breaking** behaviour, then layers:
     – OpenAI streaming
     – Token/cost telemetry
-    – Advanced‑settings accordion + theme + dark‑mode toggle
     – Queue & rate‑limit safety
-    – File‑upload support
-All changes are additive; if a new feature fails, the legacy path still executes.
 """
 # ────────────────────────────────
-# 1. Initialisation & Constants
 # ────────────────────────────────
 client = OpenAI(api_key=os.getenv("OPENAI_API_KEY", "").strip())
-# Environment‑configured model list (fallback order)
 _env_models = os.getenv("OPENAI_MODEL_LIST", "gpt-4-32k,gpt-4,gpt-3.5-turbo")
 ALL_MODELS: list[str] = [m.strip() for m in _env_models.split(",") if m.strip()]
-# Defaults (can be overridden via sliders)
-DEFAULT_MAX_CONTEXT = 32_768  # tokens
-BUFFER_TOKENS       = 500     # reserved for model reply
-DEFAULT_REPLY_MAX   = 2_048   # tokens
 TEMPERATURE         = 0.3
-# Simple price map (USD per 1K tokens) – update as needed
 PRICES = {
-    "gpt-4-32k": (0.01, 0.03),  # (prompt, completion)
-    "gpt-4":      (0.03, 0.06),
-    "gpt-3.5-turbo": (0.001, 0.002)
 }
 # ────────────────────────────────
-# 2. Helpers
 # ────────────────────────────────
 @functools.lru_cache(maxsize=128)
 def count_tokens(text: str, model: str) -> int:
-    """Fast token counter with tiny LRU cache."""
     enc = tiktoken.encoding_for_model(model)
     return len(enc.encode(text))
 def trim_conversation(convo: list[dict], model: str, max_context: int) -> list[dict]:
-    """Sliding‑window trim that removes just enough oldest messages."""
-    running_total = 0
-    kept: list[dict] = []
-    # Always keep system prompt (index 0)
-    kept.append(convo[0])
-    running_total += count_tokens(convo[0]["content"], model)
-    # Add from the end backwards until full
     for msg in reversed(convo[1:]):
-        msg_toks = count_tokens(msg["content"], model)
-        if running_total + msg_toks + BUFFER_TOKENS > max_context:
             break
-        kept.insert(1, msg)  # preserve order after system prompt
-        running_total += msg_toks
     return kept
-def token_cost(model: str, prompt_toks: int, completion_toks: int) -> float:
     if model not in PRICES:
         return 0.0
-    p_prompt, p_completion = PRICES[model]
-    return round((prompt_toks * p_prompt + completion_toks * p_completion) / 1000, 4)
 # ────────────────────────────────
-# 3. OpenAI call helpers (sync + streaming)
 # ────────────────────────────────
-def safe_chat_stream(convo: list[dict], max_context: int, max_reply: int, models: list[str]):
-    """Generator yielding (reply_so_far, usage_dict, finished) tuples."""
     last_exc = None
-    for model in models:
         try:
-            # First try streaming
             stream = client.chat.completions.create(
-                model=model,
                 messages=convo,
-                max_tokens=max_reply,
                 temperature=TEMPERATURE,
                 stream=True,
             )
-            reply_so_far = ""
             for chunk in stream:
                 delta = chunk.choices[0].delta.content or ""
-                reply_so_far += delta
-                yield reply_so_far, None, False
-            # After stream ends, get usage via non‑stream call with 0 max_tokens
-            resp_usage = client.chat.completions.create(
-                model=model,
-                messages=convo + [{"role": "assistant", "content": reply_so_far}],
                 max_tokens=0,
             ).usage
-            yield reply_so_far, resp_usage, True
             return
         except Exception as e:
             msg = str(e).lower()
-            if "context length" in msg or "maximum context length" in msg:
-                trimmed = trim_conversation(convo, model, max_context)
-                convo = trimmed
-                # try again with trimmed context
                 continue
-            if "does not exist" in msg or "model_not_found" in msg or "404" in msg:
                 last_exc = e
-                continue  # try next model
             last_exc = e
             break
-    raise last_exc or RuntimeError("All models failed in safe_chat_stream()")
 # ────────────────────────────────
-# 4. Gradio handlers
 # ────────────────────────────────
-def chat_handler_streaming(user_message: str,
-                           history: list[tuple[str, str]],
-                           system_prompt: str,
-                           selected_model: str,
-                           max_context: int,
-                           max_reply: int):
-    """Gradio generator: yields incremental assistant output."""
-    if not user_message.strip():
-        yield history, ""  # no‑op
         return
     if not client.api_key:
-        history = history or []
-        history.append((user_message, "❌ OPENAI_API_KEY not set."))
-        yield history, ""
         return
-    # Build full convo list
-    convo = [{"role": "system", "content": system_prompt}]
-    for u, b in history or []:
         convo.append({"role": "user", "content": u})
-        convo.append({"role": "assistant", "content": b})
-    convo.append({"role": "user", "content": user_message})
-    fallback = [m for m in ALL_MODELS if m != selected_model]
-    models_to_try = [selected_model] + fallback
-    # Append user message to local state for immediate echo
-    history = history or []
-    history.append((user_message, ""))  # placeholder for bot reply
-    yield history, ""  # show user msg instantly
     try:
-        stream = safe_chat_stream(convo, max_context, max_reply, models_to_try)
-        reply_accum = ""
-        usage_final = None
-        for reply_partial, usage, finished in stream:
-            reply_accum = reply_partial
-            history[-1] = (user_message, reply_accum)
             if usage:
                 usage_final = usage
-            yield history, ""  # update chat LIVE
-        # Add telemetry after stream ends
         if usage_final:
-            prompt_toks = usage_final.prompt_tokens
-            completion_toks = usage_final.completion_tokens
-            total_cost = token_cost(selected_model, prompt_toks, completion_toks)
-            meta = f"\n\n---\n🔢 {prompt_toks + completion_toks} tokens (prompt {prompt_toks} / completion {completion_toks})  ·  💲{total_cost} USD"
-            history[-1] = (user_message, reply_accum + meta)
-            yield history, ""
     except Exception as e:
-        history[-1] = (user_message, f"❌ OpenAI error: {e}")
-        yield history, ""
-def clear_chat_handler():
     return []
 # ────────────────────────────────
-# 5. UI
 # ────────────────────────────────
-with gr.Blocks(title="🤖 CodeBot – Streaming Coding Assistant", theme=gr.themes.Soft()) as demo:
-    # Tiny JS snippet for dark‑mode toggle via keyboard (press "D")
     gr.HTML("""
-    <script>
-    document.addEventListener('keydown', (e) => {
-        if (e.key === 'd' && e.ctrlKey) {
-            document.documentElement.classList.toggle('dark');
-        }
-    });
-    </script>
     """)
-    gr.Markdown(
-        """
-        ## CodeBot – Ask me about Python, C#, SQL, or any code 🌐
-        **Tips**
-        • Press **Ctrl + Enter** to send, **Shift + Enter** for newline.
-        • Toggle dark mode with **Ctrl + D**.
-        • All answers stream live – no more loading bar.
-        """
-    )
-    # Expanded / Advanced settings
-    with gr.Accordion("Advanced settings ▾", open=False):
         with gr.Row():
-            model_dropdown = gr.Dropdown(
-                choices=ALL_MODELS,
-                value=ALL_MODELS[0],
-                label="Model"
-            )
-            context_slider = gr.Slider(
-                minimum=1000, maximum=DEFAULT_MAX_CONTEXT,
-                step=256, value=DEFAULT_MAX_CONTEXT,
-                label="Max context tokens"
-            )
-            reply_slider = gr.Slider(
-                minimum=100, maximum=8192,
-                step=100, value=DEFAULT_REPLY_MAX,
-                label="Max reply tokens"
-            )
-    examples = [
         "How do I implement quicksort in Python?",
-        "Show me a C# example using LINQ to group items.",
-        "Explain async/await in Python with sample code.",
-        "How to connect to SQL Server using C#?",
     ]
     with gr.Row():
-        example_dropdown = gr.Dropdown(choices=examples, label="Examples")
-        example_btn = gr.Button("Load example")
-    system_txt = gr.Textbox(
-        lines=3,
-        value=(
-            "You are CodeBot, an expert software engineer specializing in Python and C#. "
-            "Provide detailed, production‑grade answers including runnable code snippets."
-        ),
-        label="System prompt"
-    )
-    chatbot = gr.Chatbot(
-        value=[("", "👋 Hello! I'm CodeBot. How can I help you with code today?")],
-        label="Conversation",
-        height=500,
-        autofocus=True,
-        show_copy_button=True,
-    )
     with gr.Row():
-        user_input = gr.Textbox(
-            placeholder="Type your question or paste code here...",
-            label="Your message",
-            show_label=False,
-            container=False,
-        )
-        send_btn = gr.Button("Send", variant="primary")
-        clear_btn = gr.Button("Clear", variant="secondary")
-    # File upload support (optional context)
-    file_box = gr.File(label="Attach files (optional)", file_count="multiple", type="binary")
-    # Example loader
-    example_btn.click(lambda q: q or "", inputs=[example_dropdown], outputs=[user_input])
-    # Streaming send button
-    send_btn.click(
-        fn=chat_handler_streaming,
-        inputs=[user_input, chatbot, system_txt, model_dropdown, context_slider, reply_slider],
-        outputs=[chatbot, user_input],
-        show_progress=True,
-    )
-    # Clear
-    clear_btn.click(fn=clear_chat_handler, outputs=[chatbot])
-# Queue for concurrency safety
-_demo_concurrency = int(os.getenv("CODEBOT_CONCURRENCY", "2"))
-demo.queue(max_size=32, default_concurrency_limit=_demo_concurrency)
-if __name__ == "__main__":  # pragma: no‑cover
     demo.launch()

 import tiktoken
 import gradio as gr
 from openai import OpenAI
 """
 CodeBot – Streaming Coding Assistant (Polished UX)
 -------------------------------------------------
+• OpenAI Python SDK ≥ 1.0.0   • Gradio ≥ 5.34.1   • tiktoken
+This version keeps every original feature **without breaking** behaviour, then layers:
     – OpenAI streaming
     – Token/cost telemetry
+    – Advanced‑settings accordion + dark‑mode toggle
     – Queue & rate‑limit safety
+    – Optional file‑upload support
+All additions are strictly additive—comment them out and the legacy path still runs.
 """
 # ────────────────────────────────
+# 1 · Initialisation & constants
 # ────────────────────────────────
 client = OpenAI(api_key=os.getenv("OPENAI_API_KEY", "").strip())
 _env_models = os.getenv("OPENAI_MODEL_LIST", "gpt-4-32k,gpt-4,gpt-3.5-turbo")
 ALL_MODELS: list[str] = [m.strip() for m in _env_models.split(",") if m.strip()]
+DEFAULT_MAX_CONTEXT = 32_768   # tokens
+BUFFER_TOKENS       = 500      # reserve for model reply
+DEFAULT_REPLY_MAX   = 2_048    # tokens
 TEMPERATURE         = 0.3
+# Rough pricing map (USD / 1 000 tokens)
 PRICES = {
+    "gpt-4-32k":      (0.01, 0.03),
+    "gpt-4":          (0.03, 0.06),
+    "gpt-3.5-turbo":  (0.001, 0.002),
 }
 # ────────────────────────────────
+# 2 · Helpers
 # ────────────────────────────────
 @functools.lru_cache(maxsize=128)
 def count_tokens(text: str, model: str) -> int:
     enc = tiktoken.encoding_for_model(model)
     return len(enc.encode(text))
 def trim_conversation(convo: list[dict], model: str, max_context: int) -> list[dict]:
+    kept = [convo[0]]
+    total = count_tokens(convo[0]["content"], model)
     for msg in reversed(convo[1:]):
+        t = count_tokens(msg["content"], model)
+        if total + t + BUFFER_TOKENS > max_context:
             break
+        kept.insert(1, msg)
+        total += t
     return kept
+def token_cost(model: str, p: int, c: int) -> float:
     if model not in PRICES:
         return 0.0
+    return round(((p * PRICES[model][0]) + (c * PRICES[model][1])) / 1000, 4)
 # ────────────────────────────────
+# 3 · OpenAI helpers (streaming)
 # ────────────────────────────────
+def safe_chat_stream(convo: list[dict], max_ctx: int, max_rep: int, models: list[str]):
     last_exc = None
+    for m in models:
         try:
             stream = client.chat.completions.create(
+                model=m,
                 messages=convo,
+                max_tokens=max_rep,
                 temperature=TEMPERATURE,
                 stream=True,
             )
+            reply = ""
             for chunk in stream:
                 delta = chunk.choices[0].delta.content or ""
+                reply += delta
+                yield reply, None
+            usage = client.chat.completions.create(
+                model=m,
+                messages=convo + [{"role": "assistant", "content": reply}],
                 max_tokens=0,
             ).usage
+            yield reply, usage
             return
         except Exception as e:
             msg = str(e).lower()
+            if "context length" in msg:
+                convo = trim_conversation(convo, m, max_ctx)
                 continue
+            if "model_not_found" in msg or "does not exist" in msg or "404" in msg:
                 last_exc = e
+                continue
             last_exc = e
             break
+    raise last_exc or RuntimeError("All models failed")
 # ────────────────────────────────
+# 4 · Gradio generators
 # ────────────────────────────────
+def chat_stream(user_msg: str, hist: list[tuple[str, str]], sys_prompt: str, sel_model: str, ctx: int, rep: int):
+    user_msg = (user_msg or "").strip()
+    if not user_msg:
+        yield hist, ""
         return
     if not client.api_key:
+        hist = hist or []
+        hist.append((user_msg, "❌ OPENAI_API_KEY not set."))
+        yield hist, ""
         return
+    convo = [{"role": "system", "content": sys_prompt}]
+    for u, a in hist or []:
         convo.append({"role": "user", "content": u})
+        convo.append({"role": "assistant", "content": a})
+    convo.append({"role": "user", "content": user_msg})
+    hist = hist or []
+    hist.append((user_msg, ""))
+    yield hist, ""
+    models = [sel_model] + [m for m in ALL_MODELS if m != sel_model]
     try:
+        acc, usage_final = "", None
+        for part, usage in safe_chat_stream(convo, ctx, rep, models):
+            acc = part
+            hist[-1] = (user_msg, acc)
             if usage:
                 usage_final = usage
+            yield hist, ""
         if usage_final:
+            pt, ct = usage_final.prompt_tokens, usage_final.completion_tokens
+            cost = token_cost(sel_model, pt, ct)
+            meta = f"\n\n---\n🔢 {pt+ct} tokens (prompt {pt} / completion {ct}) · 💲{cost} USD"
+            hist[-1] = (user_msg, acc + meta)
+            yield hist, ""
     except Exception as e:
+        hist[-1] = (user_msg, f"❌ OpenAI error: {e}")
+        yield hist, ""
+def clear_chat():
     return []
 # ────────────────────────────────
+# 5 · UI
 # ────────────────────────────────
+with gr.Blocks(title="🤖 CodeBot", theme=gr.themes.Soft()) as demo:
     gr.HTML("""
+    <script>document.addEventListener('keydown',e=>{if(e.key==='d'&&e.ctrlKey){document.documentElement.classList.toggle('dark');}});</script>
     """)
+    gr.Markdown("""## CodeBot – Ask me about Python, C#, SQL …""")
+    with gr.Accordion("Advanced ▾", open=False):
         with gr.Row():
+            mdl = gr.Dropdown(ALL_MODELS, value=ALL_MODELS[0], label="Model")
+            ctx_s = gr.Slider(1000, DEFAULT_MAX_CONTEXT, step=256, value=DEFAULT_MAX_CONTEXT, label="Max context")
+            rep_s = gr.Slider(100, 8192, step=100, value=DEFAULT_REPLY_MAX, label="Max reply")
+    ex_list = [
         "How do I implement quicksort in Python?",
+        "Show me a C# LINQ group-by example.",
+        "Explain async/await in Python.",
     ]
     with gr.Row():
+        ex_drop = gr.Dropdown(ex_list, label="Examples")
+        ex_btn = gr.Button("Load")
+    sys_txt = gr.Textbox("You are CodeBot, an expert software engineer …", lines=3, label="System prompt")
+    chat = gr.Chatbot(value=[("", "👋 Hello! I'm CodeBot.")], label="Conversation", height=500)
     with gr.Row():
+        usr_in = gr.Textbox(placeholder="Ask me anything…", show_label=False)
+        send = gr.Button("Send", variant="primary")
+        clr = gr.Button("Clear", variant="secondary")
+    ex_btn.click(lambda q: q or "", inputs=ex_drop, outputs=usr_in)
+    send.click(chat_stream, inputs=[usr_in, chat, sys_txt, mdl, ctx_s, rep_s], outputs=[chat, usr_in])
+    clr.click(clear_chat, outputs=chat)
+# Queue for concurrency safety (comment out if unused)
+demo.queue(max_size=32, default_concurrency_limit=int(os.getenv("CODEBOT_CONCURRENCY", "2")))
+if __name__ == "__main__":
     demo.launch()