Spaces:

teapotai
/

tinyteapotchat

Running

App Files Files Community

zakerytclarke commited on Feb 23

Commit

3bbf8f3

verified ·

1 Parent(s): 0f3b8dd

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +110 -60

src/streamlit_app.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import os
 import time
-import threading
 import requests
 import streamlit as st
@@ -50,7 +49,6 @@ tokenizer, model, device = load_model()
 # =========================
 @st.cache_resource
 def get_langsmith():
-    key = os.getenv("LANGCHAIN_API_KEY") or os.getenv("LANGSMITH_API_KEY") or os.getenv("LANGCHAIN_TRACING_V2")
     if (os.getenv("LANGCHAIN_API_KEY") or os.getenv("LANGSMITH_API_KEY")) and LangSmithClient:
         return LangSmithClient()
     return None
@@ -59,6 +57,46 @@ def get_langsmith():
 ls_client = get_langsmith()
 # =========================
 # SESSION STATE
 # =========================
@@ -66,18 +104,27 @@ if "messages" not in st.session_state:
     st.session_state.messages = []
 if "needs_answer" not in st.session_state:
     st.session_state.needs_answer = False
 # =========================
 # HEADER (prevent logo flash)
-# Use a fixed pixel width to avoid layout shift / big flash.
 # =========================
 col1, col2 = st.columns([1, 7], vertical_alignment="center")
 with col1:
-    st.image(LOGO_URL, width=56)  # fixed width prevents "flash huge"
 with col2:
     st.markdown("## TeapotAI Chat")
-    st.caption("Teapot is a 77 million parameter LLM designed to generate ")
 # =========================
@@ -88,13 +135,7 @@ with st.sidebar:
     system_prompt = st.text_area(
         "System prompt",
-        value=(
-            "You are Teapot, an open-source AI assistant optimized for running on low-end cpu devices, "
-            "providing short, accurate responses without hallucinating while excelling at "
-            "information extraction and text summarization. "
-            "If the context does not answer the question, reply exactly: "
-            "'I am sorry but I don't have any information on that'."
-        ),
         height=160,
     )
@@ -104,6 +145,21 @@ with st.sidebar:
         placeholder="Extra context appended after web snippets…",
     )
 # =========================
 # WEB SEARCH (ALWAYS ON)
@@ -162,9 +218,9 @@ def count_tokens(text: str) -> int:
 # =========================
 # LANGSMITH-TRACED ANSWER FUNCTION
-# (signature exactly: context, system_prompt, question -> answer)
 # =========================
 if traceable:
     @traceable(name="teapot_answer")
     def traced_answer(context: str, system_prompt: str, question: str) -> str:
         prompt = f"{context}\n{system_prompt}\n{question}\n"
@@ -176,9 +232,10 @@ if traceable:
                 do_sample=False,
                 num_beams=1,
             )
-        text = tokenizer.decode(out[0], skip_special_tokens=True)
-        return text
 else:
     def traced_answer(context: str, system_prompt: str, question: str) -> str:
         prompt = f"{context}\n{system_prompt}\n{question}\n"
         inputs = tokenizer(prompt, return_tensors="pt").to(device)
@@ -193,7 +250,6 @@ else:
 def get_trace_id_if_available() -> str | None:
-    # Works when running inside a @traceable function call
     if not get_current_run_tree:
         return None
     try:
@@ -217,7 +273,6 @@ def handle_feedback(idx: int):
     if ls_client and trace_id:
         score = 1 if val == "👍" else 0
         try:
-            # LangSmith SDK supports trace_id= for feedback association
             ls_client.create_feedback(
                 trace_id=trace_id,
                 key="thumb_rating",
@@ -230,6 +285,8 @@ def handle_feedback(idx: int):
 # =========================
 # RENDER HISTORY
 # =========================
 for i, msg in enumerate(st.session_state.messages):
     with st.chat_message(msg["role"]):
@@ -237,17 +294,11 @@ for i, msg in enumerate(st.session_state.messages):
             st.markdown(msg["content"])
             continue
-        # Assistant
-        # Info icon popover with full prompt/context
-        # (st.popover is stable in your Streamlit range; no rerun on open/close)
-        c1, c2 = st.columns([1, 12], vertical_alignment="center")
-        with c1:
-            st.markdown(msg["content"])
-        with c2:
             key = f"fb_{i}"
             st.session_state.setdefault(key, msg.get("feedback"))
             st.feedback(
@@ -258,18 +309,10 @@ for i, msg in enumerate(st.session_state.messages):
                 args=(i,),
             )
-        c3, c4 = st.columns([1, 12], vertical_alignment="center")
-        with c3:
-           st.caption(
-                f"🔎 {msg['search_time']:.2f}s  (search)"
-                f"🧠 {msg['gen_time']:.2f}s (generation) "
-                f"⚡ {msg['tps']:.1f} tok/s  "
-                f"🧾 {msg['input_tokens']} input tokens • {msg['output_tokens']} output tokens"
-            )
-        with c4:
             with st.popover("ℹ️", help="Inspect"):
                 st.markdown("**Context**")
                 st.code(msg.get("context", ""), language="text")
@@ -277,11 +320,16 @@ for i, msg in enumerate(st.session_state.messages):
                 st.code(msg.get("system_prompt", ""), language="text")
                 st.markdown("**Question**")
                 st.code(msg.get("question", ""), language="text")
 # =========================
@@ -313,32 +361,40 @@ if (
     prompt = f"{context}\n{system_prompt}\n{question}\n"
     input_tokens = count_tokens(prompt)
-    # Run traced answer (returns answer; trace_id obtained from current run tree)
     with st.chat_message("assistant"):
-        placeholder = st.empty()
         start = time.perf_counter()
-        # Generate full answer first (traced), then "stream" it to UI quickly.
-        # This keeps LangSmith tracing simple/reliable while still giving a streaming UX.
         answer = traced_answer(context, system_prompt, question)
         trace_id = get_trace_id_if_available()
-        # Typewriter-ish stream (fast, looks normal)
         buf = ""
         for ch in answer:
             buf += ch
             placeholder.markdown(buf)
-            # small delay; tune if you want faster/slower
             time.sleep(0.002)
         gen_time = time.perf_counter() - start
         output_tokens = count_tokens(answer)
         tps = output_tokens / gen_time if gen_time > 0 else 0.0
-        # Metrics + info popover for this live message
-        c1, c2 = st.columns([1, 12], vertical_alignment="center")
-        with c1:
             with st.popover("ℹ️", help="Inspect"):
                 st.markdown("**Context**")
                 st.code(context, language="text")
@@ -348,13 +404,7 @@ if (
                 st.code(question, language="text")
                 st.markdown("**Prompt**")
                 st.code(prompt, language="text")
-        with c2:
-            st.caption(
-                f"🔎 {search_time:.2f}s (search) "
-                f"🧠 {gen_time:.2f}s (generation) "
-                f"⚡ {tps:.1f} tok/s  "
-                f"🧾 {input_tokens} input tokens • {output_tokens} output tokens"
-            )
     # Persist assistant message
     st.session_state.messages.append(

 import os
 import time
 import requests
 import streamlit as st
 # =========================
 @st.cache_resource
 def get_langsmith():
     if (os.getenv("LANGCHAIN_API_KEY") or os.getenv("LANGSMITH_API_KEY")) and LangSmithClient:
         return LangSmithClient()
     return None
 ls_client = get_langsmith()
+# =========================
+# SAMPLE SEED (with full debug fields)
+# =========================
+SAMPLE_QUESTION = "who are you"
+DEFAULT_SYSTEM_PROMPT = (
+    "You are Teapot, an open-source AI assistant optimized for running on low-end cpu devices, "
+    "providing short, accurate responses without hallucinating while excelling at "
+    "information extraction and text summarization. "
+    "If the context does not answer the question, reply exactly: "
+    "'I am sorry but I don't have any information on that'."
+)
+SAMPLE_SYSTEM_PROMPT = DEFAULT_SYSTEM_PROMPT
+SAMPLE_CONTEXT = (
+    "Teapot is an open-source AI assistant optimized for running on low-end cpu devices."
+)
+SAMPLE_ANSWER = "I am Teapot, an open-source AI assistant optimized for running on low-end cpu devices."
+SAMPLE_PROMPT = f"{SAMPLE_CONTEXT}\n{SAMPLE_SYSTEM_PROMPT}\n{SAMPLE_QUESTION}\n"
+SAMPLE_USER_MSG = {"role": "user", "content": SAMPLE_QUESTION}
+SAMPLE_ASSISTANT_MSG = {
+    "role": "assistant",
+    "content": SAMPLE_ANSWER,
+    "context": SAMPLE_CONTEXT,
+    "system_prompt": SAMPLE_SYSTEM_PROMPT,
+    "question": SAMPLE_QUESTION,
+    "prompt": SAMPLE_PROMPT,
+    "search_time": 0.37,
+    "gen_time": 0.67,
+    "input_tokens": 245,
+    "output_tokens": 24,
+    "tps": 35.9,
+    "trace_id": None,
+    "feedback": None,
+}
 # =========================
 # SESSION STATE
 # =========================
     st.session_state.messages = []
 if "needs_answer" not in st.session_state:
     st.session_state.needs_answer = False
+if "seeded" not in st.session_state:
+    st.session_state.seeded = False
+# Seed exactly once on first load
+if (not st.session_state.seeded) and (len(st.session_state.messages) == 0):
+    st.session_state.messages = [SAMPLE_USER_MSG, SAMPLE_ASSISTANT_MSG]
+    st.session_state.seeded = True
 # =========================
 # HEADER (prevent logo flash)
 # =========================
 col1, col2 = st.columns([1, 7], vertical_alignment="center")
 with col1:
+    st.image(LOGO_URL, width=56)
 with col2:
     st.markdown("## TeapotAI Chat")
+    st.caption(
+        "Teapot is a 77M-parameter LLM optimized for fast CPU inference that only generates answers "
+        "from the provided context to minimize hallucinations."
+    )
 # =========================
     system_prompt = st.text_area(
         "System prompt",
+        value=DEFAULT_SYSTEM_PROMPT,
         height=160,
     )
         placeholder="Extra context appended after web snippets…",
     )
+    st.markdown("### Conversation")
+    c1, c2 = st.columns(2)
+    with c1:
+        if st.button("Load sample"):
+            st.session_state.messages = [SAMPLE_USER_MSG, SAMPLE_ASSISTANT_MSG]
+            st.session_state.needs_answer = False
+            st.session_state.seeded = True
+            st.rerun()
+    with c2:
+        if st.button("Clear chat"):
+            st.session_state.messages = []
+            st.session_state.needs_answer = False
+            st.session_state.seeded = True
+            st.rerun()
 # =========================
 # WEB SEARCH (ALWAYS ON)
 # =========================
 # LANGSMITH-TRACED ANSWER FUNCTION
 # =========================
 if traceable:
     @traceable(name="teapot_answer")
     def traced_answer(context: str, system_prompt: str, question: str) -> str:
         prompt = f"{context}\n{system_prompt}\n{question}\n"
                 do_sample=False,
                 num_beams=1,
             )
+        return tokenizer.decode(out[0], skip_special_tokens=True)
 else:
     def traced_answer(context: str, system_prompt: str, question: str) -> str:
         prompt = f"{context}\n{system_prompt}\n{question}\n"
         inputs = tokenizer(prompt, return_tensors="pt").to(device)
 def get_trace_id_if_available() -> str | None:
     if not get_current_run_tree:
         return None
     try:
     if ls_client and trace_id:
         score = 1 if val == "👍" else 0
         try:
             ls_client.create_feedback(
                 trace_id=trace_id,
                 key="thumb_rating",
 # =========================
 # RENDER HISTORY
+# Row 1: message + feedback
+# Row 2: inspect + debug metrics
 # =========================
 for i, msg in enumerate(st.session_state.messages):
     with st.chat_message(msg["role"]):
             st.markdown(msg["content"])
             continue
+        # Row 1
+        msg_col, fb_col = st.columns([14, 1], vertical_alignment="center")
+        with msg_col:
+            st.markdown(msg.get("content", ""))
+        with fb_col:
             key = f"fb_{i}"
             st.session_state.setdefault(key, msg.get("feedback"))
             st.feedback(
                 args=(i,),
             )
+        # Row 2
+        inspect_col, metrics_col = st.columns([1, 12], vertical_alignment="center")
+        with inspect_col:
             with st.popover("ℹ️", help="Inspect"):
                 st.markdown("**Context**")
                 st.code(msg.get("context", ""), language="text")
                 st.code(msg.get("system_prompt", ""), language="text")
                 st.markdown("**Question**")
                 st.code(msg.get("question", ""), language="text")
+                st.markdown("**Prompt**")
+                st.code(msg.get("prompt", ""), language="text")
+        with metrics_col:
+            st.caption(
+                f"🔎 {msg.get('search_time', 0.0):.2f}s (search) "
+                f"🧠 {msg.get('gen_time', 0.0):.2f}s (generation) "
+                f"⚡ {msg.get('tps', 0.0):.1f} tok/s  "
+                f"🧾 {msg.get('input_tokens', 0)} input tokens • {msg.get('output_tokens', 0)} output tokens"
+            )
 # =========================
     prompt = f"{context}\n{system_prompt}\n{question}\n"
     input_tokens = count_tokens(prompt)
+    # Run traced answer
     with st.chat_message("assistant"):
+        # Row 1: message + feedback (feedback disabled until persisted)
+        msg_col, fb_col = st.columns([14, 1], vertical_alignment="center")
+        with msg_col:
+            placeholder = st.empty()
+        with fb_col:
+            st.feedback("thumbs", key="live_fb", disabled=True)
         start = time.perf_counter()
         answer = traced_answer(context, system_prompt, question)
         trace_id = get_trace_id_if_available()
+        # Stream into the message column
         buf = ""
         for ch in answer:
             buf += ch
             placeholder.markdown(buf)
             time.sleep(0.002)
         gen_time = time.perf_counter() - start
         output_tokens = count_tokens(answer)
         tps = output_tokens / gen_time if gen_time > 0 else 0.0
+        # Row 2: inspect + metrics
+        inspect_col, metrics_col = st.columns([12, 1], vertical_alignment="center")
+        with inspect_col:
+            st.caption(
+                f"🔎 {search_time:.2f}s (search) "
+                f"🧠 {gen_time:.2f}s (generation) "
+                f"⚡ {tps:.1f} tok/s  "
+                f"🧾 {input_tokens} input tokens • {output_tokens} output tokens"
+            )
+        with metrics_col:
             with st.popover("ℹ️", help="Inspect"):
                 st.markdown("**Context**")
                 st.code(context, language="text")
                 st.code(question, language="text")
                 st.markdown("**Prompt**")
                 st.code(prompt, language="text")
     # Persist assistant message
     st.session_state.messages.append(