Spaces:

teapotai
/

tinyteapotchat

Running

App Files Files Community

zakerytclarke commited on Feb 21

Commit

2c84c9e

verified ·

1 Parent(s): 0b7fcb4

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +167 -99

src/streamlit_app.py CHANGED Viewed

@@ -1,49 +1,112 @@
 import os
 import re
-import threading
 from typing import List, Dict
 import requests
 import streamlit as st
 import torch
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TextIteratorStreamer
 # -----------------------
 # Config
 # -----------------------
-MODEL_NAME = "teapotai/tinyteapot"
 BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"
 TOP_K = 3
 TIMEOUT_SECS = 15
-# -----------------------
-# Model load (cached)
-# -----------------------
-@st.cache_resource
-def load_model():
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-    model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    model.to(device)
-    model.eval()
-    return tokenizer, model, device
 # -----------------------
-# Brave Search
 # -----------------------
 def brave_search_snippets(query: str, top_k: int = 3) -> List[Dict[str, str]]:
-    brave_api_key = os.getenv("BRAVE_API_KEY")
     if not brave_api_key:
-        raise RuntimeError("Missing BRAVE_API_KEY env var.")
-    headers = {
-        "Accept": "application/json",
-        "X-Subscription-Token": brave_api_key,
-    }
     params = {"q": query, "count": top_k}
     resp = requests.get(
@@ -69,8 +132,7 @@ def brave_search_snippets(query: str, top_k: int = 3) -> List[Dict[str, str]]:
 def format_context_from_results(results: List[Dict[str, str]]) -> str:
     """
-    Stable, explicit formatting. If you want it to match some *other* exact template,
-    change only this function.
     """
     if not results:
         return ""
@@ -81,6 +143,10 @@ def format_context_from_results(results: List[Dict[str, str]]) -> str:
         url = re.sub(r"\s+", " ", r.get("url", "")).strip()
         snippet = re.sub(r"\s+", " ", r.get("snippet", "")).strip()
         blocks.append(
             f"[{i}] {title}\n"
             f"URL: {url}\n"
@@ -90,98 +156,109 @@ def format_context_from_results(results: List[Dict[str, str]]) -> str:
 # -----------------------
-# TinyTeapot generation (streaming)
 # -----------------------
-def build_prompt(context: str, system_prompt: str, question: str) -> str:
-    # EXACTLY your format: context + system_prompt + question
-    return f"{context}\n{system_prompt}\n{question}\n"
-def stream_generate(tokenizer, model, device, prompt: str, max_new_tokens: int, temperature: float, top_p: float):
-    inputs = tokenizer(prompt, return_tensors="pt").to(device)
-    do_sample = float(temperature) > 0.0
-    gen_kwargs = dict(
-        **inputs,
-        max_new_tokens=int(max_new_tokens),
-        do_sample=do_sample,
-        temperature=float(temperature) if do_sample else None,
-        top_p=float(top_p) if do_sample else None,
-        num_beams=1,
-    )
-    # Transformers streamer: yields decoded text pieces as generation proceeds
-    streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
-    def _run():
-        # Remove None args (generate doesn't like None for some models)
-        clean_kwargs = {k: v for k, v in gen_kwargs.items() if v is not None}
-        model.generate(**clean_kwargs, streamer=streamer)
-    t = threading.Thread(target=_run, daemon=True)
-    t.start()
-    partial = ""
-    for piece in streamer:
-        partial += piece
-        yield partial
 # -----------------------
-# Streamlit UI
 # -----------------------
-st.set_page_config(page_title="TinyTeapot + Brave Search", page_icon="🫖", layout="centered")
-st.title("🫖 TinyTeapot + Brave Search (Top 3)")
-default_system_prompt = (
-    "You are Teapot, an open-source AI assistant optimized for low-end devices, "
-    "providing short, accurate responses without hallucinating while excelling at "
-    "information extraction and text summarization. "
-    "If the context does not answer the question, reply exactly: "
-    "'I am sorry but I don't have any information on that'."
-)
 with st.sidebar:
     st.header("Settings")
-    system_prompt = st.text_area("System prompt", value=default_system_prompt, height=140)
-    max_new_tokens = st.slider("Max new tokens", 1, 512, 128, 1)
-    temperature = st.slider("Temperature (0 = greedy)", 0.0, 2.0, 0.0, 0.1)
-    top_p = st.slider("Top-p", 0.1, 1.0, 0.95, 0.05)
     show_sources = st.checkbox("Show sources/context", value=True)
-# Session state for chat history
 if "messages" not in st.session_state:
-    st.session_state.messages = []  # list of {"role": "user"/"assistant", "content": str}
-# Render chat history
 for m in st.session_state.messages:
     with st.chat_message(m["role"]):
         st.markdown(m["content"])
-# Chat input
-question = st.chat_input("Ask a question (the app will Brave-search top 3 snippets)…")
 if question:
-    # Add user message
     st.session_state.messages.append({"role": "user", "content": question})
     with st.chat_message("user"):
         st.markdown(question)
-    tokenizer, model, device = load_model()
-    # Get Brave context
     try:
         results = brave_search_snippets(question, top_k=TOP_K)
         context = format_context_from_results(results)
-    except Exception as e:
-        # If Brave fails, keep context empty so your system prompt triggers the exact refusal.
-        context = ""
         results = []
-        # You can uncomment this if you want to show the error:
-        # st.warning(f"Brave Search failed: {e}")
-    prompt = build_prompt(context=context, system_prompt=system_prompt, question=question)
     with st.chat_message("assistant"):
         if show_sources:
@@ -192,18 +269,9 @@ if question:
                     st.write("(No search context returned.)")
         placeholder = st.empty()
-        final = ""
-        for partial in stream_generate(
-            tokenizer=tokenizer,
-            model=model,
-            device=device,
-            prompt=prompt,
-            max_new_tokens=max_new_tokens,
-            temperature=temperature,
-            top_p=top_p,
-        ):
-            final = partial
-            placeholder.markdown(final)
-    st.session_state.messages.append({"role": "assistant", "content": final})

 import os
 import re
+import time
 from typing import List, Dict
 import requests
 import streamlit as st
 import torch
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+from teapotai import TeapotAI
+# -----------------------
+# Branding / Theme
+# -----------------------
+TEAPOT_LOGO_GIF = "https://teapotai.com/assets/logo.gif"
+TEA_BG = "#fbf7ef"       # warm off-white
+TEA_PANEL = "#fffaf2"    # slightly brighter
+TEA_TEXT = "#1f2937"     # slate-ish
+TEA_MUTED = "#6b7280"    # gray
+TEA_ACCENT = "#c0841d"   # warm amber
+TEA_BORDER = "rgba(31, 41, 55, 0.10)"
+st.set_page_config(
+    page_title="TeapotAI Chat",
+    page_icon="🫖",
+    layout="centered",
+)
+CUSTOM_CSS = f"""
+<style>
+/* App background */
+.stApp {{
+  background: {TEA_BG};
+  color: {TEA_TEXT};
+}}
+/* Sidebar */
+section[data-testid="stSidebar"] {{
+  background: {TEA_PANEL};
+  border-right: 1px solid {TEA_BORDER};
+}}
+/* Chat bubbles */
+div[data-testid="stChatMessage"] {{
+  border-radius: 16px;
+  padding: 8px 10px;
+}}
+/* Inputs */
+.stTextInput > div > div, .stTextArea > div > div {{
+  border-radius: 12px !important;
+}}
+/* Buttons */
+.stButton button {{
+  border-radius: 12px;
+  border: 1px solid {TEA_BORDER};
+}}
+/* Accent-ish links */
+a {{
+  color: {TEA_ACCENT} !important;
+}}
+</style>
+"""
+st.markdown(CUSTOM_CSS, unsafe_allow_html=True)
 # -----------------------
 # Config
 # -----------------------
 BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"
 TOP_K = 3
 TIMEOUT_SECS = 15
+MODEL_TINY = "teapotai/tinyteapot"
+MODEL_LLM = "teapotai/teapotllm"
+DEFAULT_SYSTEM_PROMPT = (
+    "You are Teapot, an open-source AI assistant optimized for low-end devices, "
+    "providing short, accurate responses without hallucinating while excelling at "
+    "information extraction and text summarization. "
+    "If the context does not answer the question, reply exactly: "
+    "'I am sorry but I don't have any information on that'."
+)
+DEFAULT_DOCUMENTS = [
+    """Teapot (Tiny Teapot) is an open-source small language model (~77 million parameters) fine-tuned on synthetic data and optimized to run locally on resource-constrained devices such as smartphones and CPUs. Teapot is trained to only answer using context from documents, reducing hallucinations. Teapot can perform a variety of tasks, including hallucination-resistant Question Answering (QnA), Retrieval-Augmented Generation (RAG), and JSON extraction. TeapotLLM is a fine tune of flan-t5-large that was trained on synthetic data generated by Deepseek v3 TeapotLLM can be hosted on low-power devices with as little as 2GB of CPU RAM such as a Raspberry Pi. Teapot is a model built by and for the community."""
+]
 # -----------------------
+# Helpers
 # -----------------------
+def get_brave_key() -> str:
+    # Streamlit Cloud secrets support + local env var support
+    return st.secrets.get("BRAVE_API_KEY") if hasattr(st, "secrets") and "BRAVE_API_KEY" in st.secrets else os.getenv("BRAVE_API_KEY")
 def brave_search_snippets(query: str, top_k: int = 3) -> List[Dict[str, str]]:
+    brave_api_key = get_brave_key()
     if not brave_api_key:
+        raise RuntimeError("Missing BRAVE_API_KEY (set env var or Streamlit secrets).")
+    headers = {"Accept": "application/json", "X-Subscription-Token": brave_api_key}
     params = {"q": query, "count": top_k}
     resp = requests.get(
 def format_context_from_results(results: List[Dict[str, str]]) -> str:
     """
+    Stable formatting; plus you asked to strip <strong> tags.
     """
     if not results:
         return ""
         url = re.sub(r"\s+", " ", r.get("url", "")).strip()
         snippet = re.sub(r"\s+", " ", r.get("snippet", "")).strip()
+        # strip <strong> tags specifically, as requested
+        title = title.replace("<strong>", "").replace("</strong>", "")
+        snippet = snippet.replace("<strong>", "").replace("</strong>", "")
         blocks.append(
             f"[{i}] {title}\n"
             f"URL: {url}\n"
 # -----------------------
+# Model / TeapotAI loader
 # -----------------------
+@st.cache_resource
+def load_teapot_ai(model_name: str) -> TeapotAI:
+    """
+    Cached per model_name. TinyTeapot will be loaded on startup (we call it once).
+    """
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    model.to(device)
+    model.eval()
+    teapot_ai = TeapotAI(
+        tokenizer=tokenizer,
+        model=model,
+        documents=DEFAULT_DOCUMENTS,
+    )
+    return teapot_ai
+def typewriter_render(text: str, container, speed_chars_per_sec: float = 250.0):
+    """
+    TeapotAI.query isn't streamed (in this code), so we do a simple typewriter effect.
+    """
+    if not text:
+        container.markdown("")
+        return
+    delay = 1.0 / max(speed_chars_per_sec, 1.0)
+    out = ""
+    for ch in text:
+        out += ch
+        container.markdown(out)
+        time.sleep(delay)
 # -----------------------
+# UI
 # -----------------------
+# Header with logo
+col1, col2 = st.columns([1, 3], vertical_alignment="center")
+with col1:
+    # Streamlit will fetch the gif directly
+    st.image(TEAPOT_LOGO_GIF, use_container_width=True)
+with col2:
+    st.markdown("## TeapotAI Chat")
+    st.caption("Brave Search (top 3 snippets) → context → TeapotAI.query()")
 with st.sidebar:
     st.header("Settings")
+    model_choice = st.radio(
+        "Model",
+        options=[MODEL_TINY, MODEL_LLM],
+        index=0,
+        help="TinyTeapot loads by default. Switching loads the other model (cached).",
+    )
+    system_prompt = st.text_area("System prompt", value=DEFAULT_SYSTEM_PROMPT, height=140)
     show_sources = st.checkbox("Show sources/context", value=True)
+    # Optional: “typing” effect
+    typing_effect = st.checkbox("Typing effect", value=True)
+# Load TinyTeapot on startup, regardless of current selection (your requirement)
+_ = load_teapot_ai(MODEL_TINY)
+# Load selected model (cached after first load)
+teapot_ai = load_teapot_ai(model_choice)
+# Chat state
 if "messages" not in st.session_state:
+    st.session_state.messages = []  # [{"role": "user"/"assistant", "content": str}]
+# Render history
 for m in st.session_state.messages:
     with st.chat_message(m["role"]):
         st.markdown(m["content"])
+question = st.chat_input("Ask a question… (@sources are fetched via Brave)")
 if question:
     st.session_state.messages.append({"role": "user", "content": question})
     with st.chat_message("user"):
         st.markdown(question)
+    # Brave context
     try:
         results = brave_search_snippets(question, top_k=TOP_K)
         context = format_context_from_results(results)
+    except Exception:
         results = []
+        context = ""
+    # TeapotAI query (context comes from Brave)
+    # NOTE: you explicitly want context="" param to hold Brave results after stripping strong tags.
+    answer = teapot_ai.query(
+        query=question,
+        context=context,
+        system_prompt=system_prompt,
+    )
     with st.chat_message("assistant"):
         if show_sources:
                     st.write("(No search context returned.)")
         placeholder = st.empty()
+        if typing_effect:
+            typewriter_render(answer, placeholder, speed_chars_per_sec=350.0)
+        else:
+            placeholder.markdown(answer)
+    st.session_state.messages.append({"role": "assistant", "content": answer})