Spaces:

teapotai
/

tinyteapotchat

Running

App Files Files Community

zakerytclarke commited on Feb 22

Commit

b00bb52

verified ·

1 Parent(s): e4379b8

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +204 -81

src/streamlit_app.py CHANGED Viewed

@@ -2,14 +2,27 @@ import os
 import time
 import threading
 import requests
 import streamlit as st
 import torch
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TextIteratorStreamer
 # Optional LangSmith
 try:
     from langsmith import Client as LangSmithClient
-except:
     LangSmithClient = None
 # =========================
@@ -23,6 +36,7 @@ LOGO_URL = "https://teapotai.com/assets/logo.gif"
 st.set_page_config(page_title="TeapotAI Chat", page_icon="🫖", layout="centered")
 # =========================
 # LOAD MODEL (CACHED)
 # =========================
@@ -34,8 +48,10 @@ def load_model():
     model.to(device).eval()
     return tokenizer, model, device
 tokenizer, model, device = load_model()
 # =========================
 # LANGSMITH (OPTIONAL)
 # =========================
@@ -46,8 +62,10 @@ def get_langsmith():
         return LangSmithClient()
     return None
 ls_client = get_langsmith()
 # =========================
 # SESSION STATE
 # =========================
@@ -56,16 +74,18 @@ if "messages" not in st.session_state:
 if "pending_response" not in st.session_state:
     st.session_state.pending_response = None
 # =========================
 # HEADER (LOGO)
 # =========================
 col1, col2 = st.columns([1, 6])
 with col1:
-    st.image(LOGO_URL, use_column_width=True)
 with col2:
     st.markdown("## TeapotAI Chat")
     st.caption("Fast grounded answers with clean web context")
 # =========================
 # SIDEBAR SETTINGS
 # =========================
@@ -84,17 +104,94 @@ with st.sidebar:
         height=180,
     )
-    st.markdown("### Local Context (Appended)")
-    local_context = st.text_area(
         "Paste additional context (optional)",
-        height=160,
-        placeholder="This will be appended after web content..."
     )
-    use_web = st.checkbox("Use web search", value=True)
 # =========================
-# WEB SEARCH (SNIPPETS ONLY)
 # =========================
 def web_search_snippets(query: str):
     api_key = os.getenv("BRAVE_API_KEY") or st.secrets.get("BRAVE_API_KEY", None)
@@ -113,7 +210,7 @@ def web_search_snippets(query: str):
             timeout=6,
         )
         data = r.json()
-    except:
         return "", 0.0
     t1 = time.perf_counter()
@@ -124,30 +221,31 @@ def web_search_snippets(query: str):
         if desc:
             snippets.append(desc)
-    # Paragraph-separated ONLY (no title, no URL)
-    clean_context = "\n\n".join(snippets)
     return clean_context, (t1 - t0)
 # =========================
-# TRUNCATE TO LAST 512 TOKENS
 # =========================
-def truncate_context(web_ctx, local_ctx, system, question):
-    ordered_context = (
-        f"{web_ctx}\n\n{local_ctx}".strip()
-    )
     base = f"\n{system}\n{question}\n"
     base_tokens = tokenizer.encode(base)
     budget = MAX_INPUT_TOKENS - len(base_tokens)
-    ctx_tokens = tokenizer.encode(ordered_context)
     if len(ctx_tokens) <= budget:
         return ordered_context
-    # Keep MOST RECENT tokens (tail truncation)
-    truncated = ctx_tokens[-budget:]
     return tokenizer.decode(truncated, skip_special_tokens=True)
 # =========================
 # STREAM GENERATION
 # =========================
@@ -164,7 +262,7 @@ def stream_generate(prompt: str):
             streamer=streamer,
         )
-    thread = threading.Thread(target=run)
     thread.start()
     text = ""
@@ -172,6 +270,7 @@ def stream_generate(prompt: str):
         text += chunk
         yield text
 # =========================
 # FEEDBACK HANDLER (Native st.feedback)
 # =========================
@@ -189,80 +288,89 @@ def handle_feedback(idx: int):
                 score=score,
                 comment="thumbs_up" if score else "thumbs_down",
             )
-        except:
             pass
 # =========================
 # RENDER CHAT HISTORY
 # =========================
 for i, msg in enumerate(st.session_state.messages):
     with st.chat_message(msg["role"]):
-        st.markdown(msg["content"])
-        if msg["role"] == "assistant":
-            # Metrics row
             st.caption(
                 f"🔎 {msg['search_time']:.2f}s search • "
                 f"🧠 {msg['gen_time']:.2f}s generate • "
                 f"⚡ {msg['tps']:.1f} tok/s • "
-                f"🧮 {msg['tokens']} tokens"
             )
-            # Inspectable context (clean UX)
-            with st.expander("🔍 Inspect Context Used"):
-                st.markdown("**Web Content:**")
-                st.write(msg["web_context"] or "_None_")
-                st.markdown("**Local Context:**")
-                st.write(msg["local_context"] or "_None_")
-                st.markdown("**Final Truncated Context (512 tokens tail):**")
-                st.write(msg["final_context"])
-            # Native thumbs feedback
-            key = f"feedback_{i}"
-            st.session_state.setdefault(key, msg.get("feedback"))
-            st.feedback(
-                "thumbs",
-                key=key,
-                disabled=msg.get("feedback") is not None,
-                on_change=handle_feedback,
-                args=(i,),
-            )
 # =========================
-# USER INPUT (FIXED ORDER)
 # =========================
 query = st.chat_input("Ask a question...")
 if query:
-    # 1️⃣ Immediately show user message FIRST (fix streaming race)
     st.session_state.messages.append({"role": "user", "content": query})
     st.rerun()
 # =========================
-# GENERATE AFTER RERUN (Prevents premature streaming)
 # =========================
 if (
     st.session_state.messages
     and st.session_state.messages[-1]["role"] == "user"
     and st.session_state.pending_response is None
 ):
-    query = st.session_state.messages[-1]["content"]
-    # --- Web Search ---
-    web_ctx = ""
-    search_time = 0.0
-    if use_web:
-        web_ctx, search_time = web_search_snippets(query)
     # --- Strict Order Context ---
     final_context = truncate_context(
-        web_ctx,
-        local_context,
-        system_prompt,
-        query,
     )
-    prompt = f"{final_context}\n{system_prompt}\n{query}\n"
     # LangSmith run
     run_id = None
@@ -275,51 +383,66 @@ if (
                     "web_content": web_ctx,
                     "local_context": local_context,
                     "system_prompt": system_prompt,
-                    "question": query,
                     "final_context": final_context,
                 },
             )
             run_id = run.id
-        except:
             pass
-    # --- Stream UI ---
     with st.chat_message("assistant"):
-        placeholder = st.empty()
-        start = time.perf_counter()
-        final_text = ""
-        for partial in stream_generate(prompt):
-            final_text = partial
-            placeholder.markdown(final_text)
-        gen_time = time.perf_counter() - start
-        tokens = len(tokenizer.encode(final_text))
-        tps = tokens / gen_time if gen_time > 0 else 0.0
-        st.caption(
-            f"🔎 {search_time:.2f}s search • "
-            f"🧠 {gen_time:.2f}s generate • "
-            f"⚡ {tps:.1f} tok/s • "
-            f"🧮 {tokens} tokens"
-        )
     if ls_client and run_id:
         try:
             ls_client.update_run(run_id, outputs={"answer": final_text})
-        except:
             pass
     st.session_state.messages.append(
         {
             "role": "assistant",
             "content": final_text,
             "web_context": web_ctx,
             "local_context": local_context,
             "final_context": final_context,
             "search_time": search_time,
             "gen_time": gen_time,
-            "tokens": tokens,
             "tps": tps,
             "run_id": run_id,
             "feedback": None,

 import time
 import threading
 import requests
+import io
 import streamlit as st
 import torch
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TextIteratorStreamer
+# Optional parsing libs (best-effort)
+try:
+    from pypdf import PdfReader  # pip install pypdf
+except Exception:
+    PdfReader = None
+try:
+    import pandas as pd  # pip install pandas
+except Exception:
+    pd = None
 # Optional LangSmith
 try:
     from langsmith import Client as LangSmithClient
+except Exception:
     LangSmithClient = None
 # =========================
 st.set_page_config(page_title="TeapotAI Chat", page_icon="🫖", layout="centered")
 # =========================
 # LOAD MODEL (CACHED)
 # =========================
     model.to(device).eval()
     return tokenizer, model, device
 tokenizer, model, device = load_model()
 # =========================
 # LANGSMITH (OPTIONAL)
 # =========================
         return LangSmithClient()
     return None
 ls_client = get_langsmith()
 # =========================
 # SESSION STATE
 # =========================
 if "pending_response" not in st.session_state:
     st.session_state.pending_response = None
 # =========================
 # HEADER (LOGO)
 # =========================
 col1, col2 = st.columns([1, 6])
 with col1:
+    st.image(LOGO_URL, use_container_width=True)
 with col2:
     st.markdown("## TeapotAI Chat")
     st.caption("Fast grounded answers with clean web context")
 # =========================
 # SIDEBAR SETTINGS
 # =========================
         height=180,
     )
+    st.markdown("### Local Context")
+    local_context_text = st.text_area(
         "Paste additional context (optional)",
+        height=140,
+        placeholder="This will be appended after web content...",
+    )
+    uploaded_files = st.file_uploader(
+        "Upload files to add to Local Context (pdf, txt, csv, md, json, etc.)",
+        type=None,
+        accept_multiple_files=True,
     )
 # =========================
+# FILE PARSING -> STRING
+# =========================
+def _safe_decode(b: bytes) -> str:
+    # best effort decode without throwing
+    for enc in ("utf-8", "utf-16", "latin-1"):
+        try:
+            return b.decode(enc)
+        except Exception:
+            pass
+    return b.decode("utf-8", errors="ignore")
+def parse_uploaded_file_to_text(file) -> str:
+    name = (file.name or "").lower()
+    raw = file.getvalue()
+    # PDF
+    if name.endswith(".pdf"):
+        if not PdfReader:
+            return (
+                f"[{file.name}] PDF parsing not available (install pypdf). "
+                f"Raw bytes={len(raw)}"
+            )
+        try:
+            reader = PdfReader(io.BytesIO(raw))
+            parts = []
+            for i, page in enumerate(reader.pages):
+                txt = page.extract_text() or ""
+                txt = txt.strip()
+                if txt:
+                    parts.append(txt)
+            return "\n\n".join(parts).strip()
+        except Exception as e:
+            return f"[{file.name}] PDF parse error: {e}"
+    # CSV
+    if name.endswith(".csv"):
+        if not pd:
+            return (
+                f"[{file.name}] CSV parsing not available (install pandas). "
+                f"Raw bytes={len(raw)}"
+            )
+        try:
+            df = pd.read_csv(io.BytesIO(raw))
+            # Keep it compact but readable
+            return df.to_csv(index=False)
+        except Exception as e:
+            # fallback: raw text
+            return f"[{file.name}] CSV parse error ({e}). Raw:\n{_safe_decode(raw)}"
+    # JSON / TXT / MD / others -> decode
+    return _safe_decode(raw).strip()
+def build_local_context(text_area: str, files) -> str:
+    chunks = []
+    if text_area.strip():
+        chunks.append(text_area.strip())
+    if files:
+        for f in files:
+            parsed = parse_uploaded_file_to_text(f).strip()
+            if parsed:
+                chunks.append(f"\n\n--- FILE: {f.name} ---\n{parsed}")
+    return "\n\n".join(chunks).strip()
+local_context = build_local_context(local_context_text, uploaded_files)
+# =========================
+# WEB SEARCH (SNIPPETS ONLY) - ALWAYS ON
 # =========================
 def web_search_snippets(query: str):
     api_key = os.getenv("BRAVE_API_KEY") or st.secrets.get("BRAVE_API_KEY", None)
             timeout=6,
         )
         data = r.json()
+    except Exception:
         return "", 0.0
     t1 = time.perf_counter()
         if desc:
             snippets.append(desc)
+    clean_context = "\n\n".join(snippets)  # paragraph-separated only
     return clean_context, (t1 - t0)
 # =========================
+# TRUNCATE TO LAST 512 TOKENS (TAIL)
 # =========================
+def truncate_context(web_ctx: str, local_ctx: str, system: str, question: str) -> str:
+    ordered_context = f"{web_ctx}\n\n{local_ctx}".strip()
     base = f"\n{system}\n{question}\n"
     base_tokens = tokenizer.encode(base)
     budget = MAX_INPUT_TOKENS - len(base_tokens)
+    if budget <= 0:
+        return ""  # system+question already consume budget
+    ctx_tokens = tokenizer.encode(ordered_context) if ordered_context else []
     if len(ctx_tokens) <= budget:
         return ordered_context
+    truncated = ctx_tokens[-budget:]  # keep MOST RECENT tokens
     return tokenizer.decode(truncated, skip_special_tokens=True)
 # =========================
 # STREAM GENERATION
 # =========================
             streamer=streamer,
         )
+    thread = threading.Thread(target=run, daemon=True)
     thread.start()
     text = ""
         text += chunk
         yield text
 # =========================
 # FEEDBACK HANDLER (Native st.feedback)
 # =========================
                 score=score,
                 comment="thumbs_up" if score else "thumbs_down",
             )
+        except Exception:
             pass
 # =========================
 # RENDER CHAT HISTORY
 # =========================
 for i, msg in enumerate(st.session_state.messages):
     with st.chat_message(msg["role"]):
+        if msg["role"] == "user":
+            st.markdown(msg["content"])
+            continue
+        # Assistant messages: collapsed-by-default expander = "whole message response be the dropdown"
+        with st.expander("🫖 Assistant response (click to expand)", expanded=False):
+            st.markdown(msg["content"])
             st.caption(
                 f"🔎 {msg['search_time']:.2f}s search • "
                 f"🧠 {msg['gen_time']:.2f}s generate • "
                 f"⚡ {msg['tps']:.1f} tok/s • "
+                f"🧾 in={msg['input_tokens']} • out={msg['output_tokens']}"
             )
+            # Show EXACT prompt passed into the model (and the parts)
+            st.markdown("---")
+            st.markdown("#### Prompt & Inputs (exactly what was passed to the model)")
+            st.markdown("**System prompt:**")
+            st.code(msg.get("system_prompt", ""), language="text")
+            st.markdown("**Question:**")
+            st.code(msg.get("question", ""), language="text")
+            st.markdown("**Full model input (prompt):**")
+            st.code(msg.get("prompt", ""), language="text")
+        # Native thumbs feedback (outside expander so it's still reachable)
+        key = f"feedback_{i}"
+        st.session_state.setdefault(key, msg.get("feedback"))
+        st.feedback(
+            "thumbs",
+            key=key,
+            disabled=msg.get("feedback") is not None,
+            on_change=handle_feedback,
+            args=(i,),
+        )
 # =========================
+# USER INPUT
 # =========================
 query = st.chat_input("Ask a question...")
 if query:
+    # show user message first
     st.session_state.messages.append({"role": "user", "content": query})
     st.rerun()
 # =========================
+# GENERATE AFTER RERUN
 # =========================
 if (
     st.session_state.messages
     and st.session_state.messages[-1]["role"] == "user"
     and st.session_state.pending_response is None
 ):
+    question = st.session_state.messages[-1]["content"]
+    # --- Web Search (always on) ---
+    web_ctx, search_time = web_search_snippets(question)
     # --- Strict Order Context ---
     final_context = truncate_context(
+        web_ctx=web_ctx,
+        local_ctx=local_context,
+        system=system_prompt,
+        question=question,
     )
+    # IMPORTANT: prompt is EXACTLY what we pass to the model
+    prompt = f"{final_context}\n{system_prompt}\n{question}\n".strip() + "\n"
+    # Token accounting (split input vs output)
+    input_tokens = len(tokenizer.encode(prompt))
     # LangSmith run
     run_id = None
                     "web_content": web_ctx,
                     "local_context": local_context,
                     "system_prompt": system_prompt,
+                    "question": question,
                     "final_context": final_context,
+                    "prompt": prompt,
                 },
             )
             run_id = run.id
+        except Exception:
             pass
+    # --- Stream UI: assistant response itself is a dropdown ---
     with st.chat_message("assistant"):
+        with st.expander("🫖 Assistant response (click to expand)", expanded=False):
+            placeholder = st.empty()
+            start = time.perf_counter()
+            final_text = ""
+            for partial in stream_generate(prompt):
+                final_text = partial
+                placeholder.markdown(final_text)
+            gen_time = time.perf_counter() - start
+            output_tokens = len(tokenizer.encode(final_text))
+            tps = output_tokens / gen_time if gen_time > 0 else 0.0
+            st.caption(
+                f"🔎 {search_time:.2f}s search • "
+                f"🧠 {gen_time:.2f}s generate • "
+                f"⚡ {tps:.1f} tok/s • "
+                f"🧾 in={input_tokens} • out={output_tokens}"
+            )
+            st.markdown("---")
+            st.markdown("#### Prompt & Inputs (exactly what was passed to the model)")
+            st.markdown("**System prompt:**")
+            st.code(system_prompt, language="text")
+            st.markdown("**Question:**")
+            st.code(question, language="text")
+            st.markdown("**Full model input (prompt):**")
+            st.code(prompt, language="text")
     if ls_client and run_id:
         try:
             ls_client.update_run(run_id, outputs={"answer": final_text})
+        except Exception:
             pass
     st.session_state.messages.append(
         {
             "role": "assistant",
             "content": final_text,
+            "system_prompt": system_prompt,
+            "question": question,
             "web_context": web_ctx,
             "local_context": local_context,
             "final_context": final_context,
+            "prompt": prompt,
             "search_time": search_time,
             "gen_time": gen_time,
+            "input_tokens": input_tokens,
+            "output_tokens": output_tokens,
             "tps": tps,
             "run_id": run_id,
             "feedback": None,