Spaces:

teapotai
/

tinyteapotchat

Running

App Files Files Community

zakerytclarke commited on Feb 23

Commit

c73781a

verified ·

1 Parent(s): ad255be

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +96 -107

src/streamlit_app.py CHANGED Viewed

@@ -43,11 +43,11 @@ st.set_page_config(page_title="TeapotAI Chat", page_icon="🫖", layout="centere
 # =========================
 @st.cache_resource
 def load_model():
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-    model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    model.to(device).eval()
-    return tokenizer, model, device
 tokenizer, model, device = load_model()
@@ -72,30 +72,27 @@ ls_client = get_langsmith()
 # =========================
 if "messages" not in st.session_state:
     st.session_state.messages = []
-if "pending_response" not in st.session_state:
-    st.session_state.pending_response = None
 # =========================
-# HEADER (SAFE IMAGE CALL)
 # =========================
 col1, col2 = st.columns([1, 6])
 with col1:
-    # IMPORTANT: use_column_width=True (works on your Streamlit version)
     st.image(LOGO_URL, use_column_width=True)
 with col2:
     st.markdown("## TeapotAI Chat")
-    st.caption("Fast grounded answers with clean web context")
 # =========================
-# SIDEBAR SETTINGS
 # =========================
 with st.sidebar:
     st.markdown("### Settings")
     system_prompt = st.text_area(
-        "System Prompt",
         value=(
             "You are Teapot, an open-source AI assistant optimized for low-end devices, "
             "providing short, accurate responses without hallucinating while excelling at "
@@ -103,21 +100,20 @@ with st.sidebar:
             "If the context does not answer the question, reply exactly: "
             "'I am sorry but I don't have any information on that'."
         ),
-        height=180,
     )
-    st.markdown("### Local Context (Text)")
     local_context_text = st.text_area(
-        "Paste additional context (optional)",
-        height=140,
-        placeholder="This will be appended after web content...",
     )
-    st.markdown("### Local Context (File Upload)")
     uploaded_files = st.file_uploader(
-        "Upload files (pdf, txt, csv, md, json, etc.)",
         accept_multiple_files=True,
         type=None,
     )
@@ -137,20 +133,18 @@ def parse_file_to_text(file) -> str:
     name = (file.name or "").lower()
     raw = file.getvalue()
-    # PDF
     if name.endswith(".pdf") and PdfReader:
         try:
             reader = PdfReader(io.BytesIO(raw))
-            pages = []
-            for page in reader.pages:
-                txt = page.extract_text() or ""
-                if txt.strip():
-                    pages.append(txt.strip())
-            return "\n\n".join(pages)
         except Exception as e:
             return f"[PDF parse error: {e}]"
-    # CSV
     if name.endswith(".csv") and pd:
         try:
             df = pd.read_csv(io.BytesIO(raw))
@@ -158,13 +152,11 @@ def parse_file_to_text(file) -> str:
         except Exception:
             return safe_decode(raw)
-    # TXT / MD / JSON / fallback
-    return safe_decode(raw)
 def build_local_context(text_block: str, files) -> str:
     chunks = []
     if text_block and text_block.strip():
         chunks.append(text_block.strip())
@@ -172,7 +164,7 @@ def build_local_context(text_block: str, files) -> str:
         for f in files:
             parsed = parse_file_to_text(f)
             if parsed and parsed.strip():
-                chunks.append(f"\n\n--- FILE: {f.name} ---\n{parsed.strip()}")
     return "\n\n".join(chunks).strip()
@@ -206,8 +198,7 @@ def web_search_snippets(query: str):
     snippets = []
     for item in data.get("web", {}).get("results", [])[:TOP_K_SEARCH]:
-        desc = item.get("description", "")
-        desc = desc.replace("<strong>", "").replace("</strong>", "").strip()
         if desc:
             snippets.append(desc)
@@ -217,22 +208,24 @@ def web_search_snippets(query: str):
 # =========================
 # CONTEXT TRUNCATION (TAIL)
 # =========================
-def truncate_context(web_ctx, local_ctx, system, question):
-    ordered_context = f"{web_ctx}\n\n{local_ctx}".strip()
     base = f"\n{system}\n{question}\n"
     base_tokens = tokenizer.encode(base)
     budget = MAX_INPUT_TOKENS - len(base_tokens)
     if budget <= 0:
         return ""
-    ctx_tokens = tokenizer.encode(ordered_context) if ordered_context else []
     if len(ctx_tokens) <= budget:
-        return ordered_context
-    truncated = ctx_tokens[-budget:]
-    return tokenizer.decode(truncated, skip_special_tokens=True)
 # =========================
@@ -251,23 +244,22 @@ def stream_generate(prompt: str):
             streamer=streamer,
         )
-    thread = threading.Thread(target=run)
-    thread.start()
-    text = ""
     for chunk in streamer:
-        text += chunk
-        yield text
 # =========================
 # FEEDBACK HANDLER
 # =========================
 def handle_feedback(idx: int):
-    val = st.session_state[f"feedback_{idx}"]
-    msg = st.session_state.messages[idx]
-    msg["feedback"] = val
     if ls_client and msg.get("run_id"):
         score = 1 if val == "👍" else 0
         try:
@@ -282,41 +274,41 @@ def handle_feedback(idx: int):
 # =========================
-# RENDER CHAT HISTORY
 # =========================
 for i, msg in enumerate(st.session_state.messages):
     with st.chat_message(msg["role"]):
-        if msg["role"] == "user":
-            st.markdown(msg["content"])
-            continue
-        # Entire response as collapsed dropdown (less visible inspector)
-        with st.expander("🫖 Assistant response (click to expand)", expanded=False):
-            st.markdown(msg["content"])
             st.caption(
-                f"🔎 {msg['search_time']:.2f}s search • "
-                f"🧠 {msg['gen_time']:.2f}s generate • "
-                f"⚡ {msg['tps']:.1f} tok/s • "
-                f"🧾 in={msg['input_tokens']} • out={msg['output_tokens']}"
             )
-            st.markdown("### Exact Model Input (Prompt)")
-            st.code(msg["prompt"], language="text")
-        key = f"feedback_{i}"
-        st.session_state.setdefault(key, msg.get("feedback"))
-        st.feedback(
-            "thumbs",
-            key=key,
-            disabled=msg.get("feedback") is not None,
-            on_change=handle_feedback,
-            args=(i,),
-        )
 # =========================
-# USER INPUT
 # =========================
 query = st.chat_input("Ask a question...")
@@ -326,29 +318,21 @@ if query:
 # =========================
-# GENERATE AFTER RERUN
 # =========================
-if (
-    st.session_state.messages
-    and st.session_state.messages[-1]["role"] == "user"
-    and st.session_state.pending_response is None
-):
     question = st.session_state.messages[-1]["content"]
-    # Always do web search
     web_ctx, search_time = web_search_snippets(question)
-    final_context = truncate_context(
-        web_ctx,
-        local_context,
-        system_prompt,
-        question,
-    )
-    # EXACT prompt passed to model
     prompt = f"{final_context}\n{system_prompt}\n{question}\n"
-    input_tokens = len(tokenizer.encode(prompt))
     # LangSmith run (optional)
     run_id = None
@@ -358,36 +342,40 @@ if (
                 name="teapot_chat",
                 run_type="llm",
                 inputs={
-                    "prompt": prompt,
                     "question": question,
                 },
             )
             run_id = run.id
         except Exception:
             pass
     with st.chat_message("assistant"):
-        with st.expander("🫖 Assistant response (click to expand)", expanded=False):
-            placeholder = st.empty()
-            start = time.perf_counter()
-            final_text = ""
-            for partial in stream_generate(prompt):
-                final_text = partial
-                placeholder.markdown(final_text)
-            gen_time = time.perf_counter() - start
-            output_tokens = len(tokenizer.encode(final_text))
-            tps = output_tokens / gen_time if gen_time > 0 else 0.0
-            st.caption(
-                f"🔎 {search_time:.2f}s search • "
-                f"🧠 {gen_time:.2f}s generate • "
-                f"⚡ {tps:.1f} tok/s • "
-                f"🧾 in={input_tokens} • out={output_tokens}"
-            )
-            st.markdown("### Exact Model Input (Prompt)")
             st.code(prompt, language="text")
     if ls_client and run_id:
@@ -400,6 +388,8 @@ if (
         {
             "role": "assistant",
             "content": final_text,
             "prompt": prompt,
             "search_time": search_time,
             "gen_time": gen_time,
@@ -411,5 +401,4 @@ if (
         }
     )
-    st.session_state.pending_response = None
     st.rerun()

 # =========================
 @st.cache_resource
 def load_model():
+    tok = AutoTokenizer.from_pretrained(MODEL_NAME)
+    mdl = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
+    dev = "cuda" if torch.cuda.is_available() else "cpu"
+    mdl.to(dev).eval()
+    return tok, mdl, dev
 tokenizer, model, device = load_model()
 # =========================
 if "messages" not in st.session_state:
     st.session_state.messages = []
 # =========================
+# HEADER
 # =========================
 col1, col2 = st.columns([1, 6])
 with col1:
     st.image(LOGO_URL, use_column_width=True)
 with col2:
     st.markdown("## TeapotAI Chat")
+    st.caption("Grounded answers with web context")
 # =========================
+# SIDEBAR
 # =========================
 with st.sidebar:
     st.markdown("### Settings")
     system_prompt = st.text_area(
+        "System prompt",
         value=(
             "You are Teapot, an open-source AI assistant optimized for low-end devices, "
             "providing short, accurate responses without hallucinating while excelling at "
             "If the context does not answer the question, reply exactly: "
             "'I am sorry but I don't have any information on that'."
         ),
+        height=160,
     )
     local_context_text = st.text_area(
+        "Local context (optional)",
+        height=120,
+        placeholder="Extra context to append after web snippets…",
     )
     uploaded_files = st.file_uploader(
+        "Upload context files",
         accept_multiple_files=True,
         type=None,
+        help="PDF, TXT, CSV, MD, JSON, etc.",
     )
     name = (file.name or "").lower()
     raw = file.getvalue()
     if name.endswith(".pdf") and PdfReader:
         try:
             reader = PdfReader(io.BytesIO(raw))
+            parts = []
+            for p in reader.pages:
+                t = (p.extract_text() or "").strip()
+                if t:
+                    parts.append(t)
+            return "\n\n".join(parts).strip()
         except Exception as e:
             return f"[PDF parse error: {e}]"
     if name.endswith(".csv") and pd:
         try:
             df = pd.read_csv(io.BytesIO(raw))
         except Exception:
             return safe_decode(raw)
+    return safe_decode(raw).strip()
 def build_local_context(text_block: str, files) -> str:
     chunks = []
     if text_block and text_block.strip():
         chunks.append(text_block.strip())
         for f in files:
             parsed = parse_file_to_text(f)
             if parsed and parsed.strip():
+                chunks.append(f"\n\n--- {f.name} ---\n{parsed.strip()}")
     return "\n\n".join(chunks).strip()
     snippets = []
     for item in data.get("web", {}).get("results", [])[:TOP_K_SEARCH]:
+        desc = (item.get("description") or "").replace("<strong>", "").replace("</strong>", "").strip()
         if desc:
             snippets.append(desc)
 # =========================
 # CONTEXT TRUNCATION (TAIL)
 # =========================
+def truncate_context(web_ctx: str, local_ctx: str, system: str, question: str) -> str:
+    ctx = f"{web_ctx}\n\n{local_ctx}".strip()
     base = f"\n{system}\n{question}\n"
     base_tokens = tokenizer.encode(base)
     budget = MAX_INPUT_TOKENS - len(base_tokens)
     if budget <= 0:
         return ""
+    ctx_tokens = tokenizer.encode(ctx) if ctx else []
     if len(ctx_tokens) <= budget:
+        return ctx
+    return tokenizer.decode(ctx_tokens[-budget:], skip_special_tokens=True)
+def count_tokens(text: str) -> int:
+    return len(tokenizer.encode(text)) if text else 0
 # =========================
             streamer=streamer,
         )
+    threading.Thread(target=run, daemon=True).start()
+    acc = ""
     for chunk in streamer:
+        acc += chunk
+        yield acc
 # =========================
 # FEEDBACK HANDLER
 # =========================
 def handle_feedback(idx: int):
+    val = st.session_state.get(f"fb_{idx}")
+    st.session_state.messages[idx]["feedback"] = val
+    msg = st.session_state.messages[idx]
     if ls_client and msg.get("run_id"):
         score = 1 if val == "👍" else 0
         try:
 # =========================
+# RENDER HISTORY
 # =========================
 for i, msg in enumerate(st.session_state.messages):
     with st.chat_message(msg["role"]):
+        st.markdown(msg["content"])
+        if msg["role"] == "assistant":
+            # Light, normal-looking stats
             st.caption(
+                f"{msg['search_time']:.2f}s search • {msg['gen_time']:.2f}s gen • "
+                f"{msg['tps']:.1f} tok/s • in {msg['input_tokens']} • out {msg['output_tokens']}"
             )
+            # Small inspector (collapsed)
+            with st.expander("Inspect context"):
+                st.markdown("**System**")
+                st.code(msg.get("system_prompt", ""), language="text")
+                st.markdown("**Question**")
+                st.code(msg.get("question", ""), language="text")
+                st.markdown("**Prompt (sent to model)**")
+                st.code(msg.get("prompt", ""), language="text")
+            key = f"fb_{i}"
+            st.session_state.setdefault(key, msg.get("feedback"))
+            st.feedback(
+                "thumbs",
+                key=key,
+                disabled=msg.get("feedback") is not None,
+                on_change=handle_feedback,
+                args=(i,),
+            )
 # =========================
+# INPUT
 # =========================
 query = st.chat_input("Ask a question...")
 # =========================
+# GENERATE
 # =========================
+if st.session_state.messages and st.session_state.messages[-1]["role"] == "user":
     question = st.session_state.messages[-1]["content"]
+    # web search
     web_ctx, search_time = web_search_snippets(question)
+    # truncate final context
+    final_context = truncate_context(web_ctx, local_context, system_prompt, question)
+    # prompt sent to model
     prompt = f"{final_context}\n{system_prompt}\n{question}\n"
+    input_tokens = count_tokens(prompt)
     # LangSmith run (optional)
     run_id = None
                 name="teapot_chat",
                 run_type="llm",
                 inputs={
+                    "system_prompt": system_prompt,
                     "question": question,
+                    "prompt": prompt,
                 },
             )
             run_id = run.id
         except Exception:
             pass
+    # stream normally in chat
     with st.chat_message("assistant"):
+        placeholder = st.empty()
+        start = time.perf_counter()
+        final_text = ""
+        for partial in stream_generate(prompt):
+            final_text = partial
+            placeholder.markdown(final_text)
+        gen_time = time.perf_counter() - start
+        output_tokens = count_tokens(final_text)
+        tps = output_tokens / gen_time if gen_time > 0 else 0.0
+        st.caption(
+            f"{search_time:.2f}s search • {gen_time:.2f}s gen • "
+            f"{tps:.1f} tok/s • in {input_tokens} • out {output_tokens}"
+        )
+        with st.expander("Inspect context"):
+            st.markdown("**System**")
+            st.code(system_prompt, language="text")
+            st.markdown("**Question**")
+            st.code(question, language="text")
+            st.markdown("**Prompt (sent to model)**")
             st.code(prompt, language="text")
     if ls_client and run_id:
         {
             "role": "assistant",
             "content": final_text,
+            "system_prompt": system_prompt,
+            "question": question,
             "prompt": prompt,
             "search_time": search_time,
             "gen_time": gen_time,
         }
     )
     st.rerun()