Spaces:

teapotai
/

tinyteapotchat

Running

App Files Files Community

zakerytclarke commited on Feb 21

Commit

4f6c67a

verified ·

1 Parent(s): 3980160

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +207 -38

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,209 @@
-import altair as alt
-import numpy as np
-import pandas as pd
 import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
-"""
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

+import os
+import re
+import threading
+from typing import List, Dict
+import requests
 import streamlit as st
+import torch
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TextIteratorStreamer
+# -----------------------
+# Config
+# -----------------------
+MODEL_NAME = "teapotai/tinyteapot"
+BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"
+TOP_K = 3
+TIMEOUT_SECS = 15
+# -----------------------
+# Model load (cached)
+# -----------------------
+@st.cache_resource
+def load_model():
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+    model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    model.to(device)
+    model.eval()
+    return tokenizer, model, device
+# -----------------------
+# Brave Search
+# -----------------------
+def brave_search_snippets(query: str, top_k: int = 3) -> List[Dict[str, str]]:
+    brave_api_key = os.getenv("BRAVE_API_KEY")
+    if not brave_api_key:
+        raise RuntimeError("Missing BRAVE_API_KEY env var.")
+    headers = {
+        "Accept": "application/json",
+        "X-Subscription-Token": brave_api_key,
+    }
+    params = {"q": query, "count": top_k}
+    resp = requests.get(
+        BRAVE_ENDPOINT,
+        headers=headers,
+        params=params,
+        timeout=TIMEOUT_SECS,
+    )
+    resp.raise_for_status()
+    data = resp.json()
+    results = []
+    web = data.get("web") or {}
+    items = web.get("results") or []
+    for item in items[:top_k]:
+        title = (item.get("title") or "").strip()
+        url = (item.get("url") or "").strip()
+        snippet = (item.get("description") or "").strip()
+        if title or url or snippet:
+            results.append({"title": title, "url": url, "snippet": snippet})
+    return results
+def format_context_from_results(results: List[Dict[str, str]]) -> str:
+    """
+    Stable, explicit formatting. If you want it to match some *other* exact template,
+    change only this function.
+    """
+    if not results:
+        return ""
+    blocks = []
+    for i, r in enumerate(results, start=1):
+        title = re.sub(r"\s+", " ", r.get("title", "")).strip()
+        url = re.sub(r"\s+", " ", r.get("url", "")).strip()
+        snippet = re.sub(r"\s+", " ", r.get("snippet", "")).strip()
+        blocks.append(
+            f"[{i}] {title}\n"
+            f"URL: {url}\n"
+            f"Snippet: {snippet}"
+        )
+    return "\n\n".join(blocks)
+# -----------------------
+# TinyTeapot generation (streaming)
+# -----------------------
+def build_prompt(context: str, system_prompt: str, question: str) -> str:
+    # EXACTLY your format: context + system_prompt + question
+    return f"{context}\n{system_prompt}\n{question}\n"
+def stream_generate(tokenizer, model, device, prompt: str, max_new_tokens: int, temperature: float, top_p: float):
+    inputs = tokenizer(prompt, return_tensors="pt").to(device)
+    do_sample = float(temperature) > 0.0
+    gen_kwargs = dict(
+        **inputs,
+        max_new_tokens=int(max_new_tokens),
+        do_sample=do_sample,
+        temperature=float(temperature) if do_sample else None,
+        top_p=float(top_p) if do_sample else None,
+        num_beams=1,
+    )
+    # Transformers streamer: yields decoded text pieces as generation proceeds
+    streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
+    def _run():
+        # Remove None args (generate doesn't like None for some models)
+        clean_kwargs = {k: v for k, v in gen_kwargs.items() if v is not None}
+        model.generate(**clean_kwargs, streamer=streamer)
+    t = threading.Thread(target=_run, daemon=True)
+    t.start()
+    partial = ""
+    for piece in streamer:
+        partial += piece
+        yield partial
+# -----------------------
+# Streamlit UI
+# -----------------------
+st.set_page_config(page_title="TinyTeapot + Brave Search", page_icon="🫖", layout="centered")
+st.title("🫖 TinyTeapot + Brave Search (Top 3)")
+default_system_prompt = (
+    "You are Teapot, an open-source AI assistant optimized for low-end devices, "
+    "providing short, accurate responses without hallucinating while excelling at "
+    "information extraction and text summarization. "
+    "If the context does not answer the question, reply exactly: "
+    "'I am sorry but I don't have any information on that'."
+)
+with st.sidebar:
+    st.header("Settings")
+    system_prompt = st.text_area("System prompt", value=default_system_prompt, height=140)
+    max_new_tokens = st.slider("Max new tokens", 1, 512, 128, 1)
+    temperature = st.slider("Temperature (0 = greedy)", 0.0, 2.0, 0.0, 0.1)
+    top_p = st.slider("Top-p", 0.1, 1.0, 0.95, 0.05)
+    show_sources = st.checkbox("Show sources/context", value=True)
+# Session state for chat history
+if "messages" not in st.session_state:
+    st.session_state.messages = []  # list of {"role": "user"/"assistant", "content": str}
+# Render chat history
+for m in st.session_state.messages:
+    with st.chat_message(m["role"]):
+        st.markdown(m["content"])
+# Chat input
+question = st.chat_input("Ask a question (the app will Brave-search top 3 snippets)…")
+if question:
+    # Add user message
+    st.session_state.messages.append({"role": "user", "content": question})
+    with st.chat_message("user"):
+        st.markdown(question)
+    tokenizer, model, device = load_model()
+    # Get Brave context
+    try:
+        results = brave_search_snippets(question, top_k=TOP_K)
+        context = format_context_from_results(results)
+    except Exception as e:
+        # If Brave fails, keep context empty so your system prompt triggers the exact refusal.
+        context = ""
+        results = []
+        # You can uncomment this if you want to show the error:
+        # st.warning(f"Brave Search failed: {e}")
+    prompt = build_prompt(context=context, system_prompt=system_prompt, question=question)
+    with st.chat_message("assistant"):
+        if show_sources:
+            with st.expander("Sources / Context used", expanded=False):
+                if context.strip():
+                    st.code(context)
+                else:
+                    st.write("(No search context returned.)")
+        placeholder = st.empty()
+        final = ""
+        for partial in stream_generate(
+            tokenizer=tokenizer,
+            model=model,
+            device=device,
+            prompt=prompt,
+            max_new_tokens=max_new_tokens,
+            temperature=temperature,
+            top_p=top_p,
+        ):
+            final = partial
+            placeholder.markdown(final)
+    st.session_state.messages.append({"role": "assistant", "content": final})