Spaces:
Sleeping
Sleeping
Deploy TinyModel1Space from GitHub Actions
Browse files- README.md +4 -0
- scripts/google_cse_client.py +116 -0
- scripts/universal_brain_chat.py +163 -5
README.md
CHANGED
|
@@ -19,10 +19,14 @@ pinned: false
|
|
| 19 |
### Secrets (recommended)
|
| 20 |
|
| 21 |
- `HF_TOKEN` β read token so Hub model downloads are reliable (same as classic classifier Space).
|
|
|
|
| 22 |
|
| 23 |
### Optional environment variables
|
| 24 |
|
| 25 |
- `HORIZON2_MODEL` β Hugging Face id for the **generative** instruct model (default in code: SmolLM2-360M-Instruct if unset in image).
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
### Note
|
| 28 |
|
|
|
|
| 19 |
### Secrets (recommended)
|
| 20 |
|
| 21 |
- `HF_TOKEN` β read token so Hub model downloads are reliable (same as classic classifier Space).
|
| 22 |
+
- `GOOGLE_CSE_API_KEY` β Google Cloud API key restricted to **Custom Search API** (enables `/web` and smart-route **web search** in chat).
|
| 23 |
|
| 24 |
### Optional environment variables
|
| 25 |
|
| 26 |
- `HORIZON2_MODEL` β Hugging Face id for the **generative** instruct model (default in code: SmolLM2-360M-Instruct if unset in image).
|
| 27 |
+
- `GOOGLE_CSE_CX` β Programmable Search Engine id (`cx` from the [PSE control panel](https://programmablesearchengine.google.com/controlpanel/all)); required together with `GOOGLE_CSE_API_KEY` for web search.
|
| 28 |
+
- `GOOGLE_CSE_NUM` β results per request, 1β10 (default 5).
|
| 29 |
+
- `GOOGLE_CSE_SAFE` β optional safe-search level for Google `cse.list` (e.g. `off`, `active`).
|
| 30 |
|
| 31 |
### Note
|
| 32 |
|
scripts/google_cse_client.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Google Programmable Search Engine (Custom Search JSON API) β minimal stdlib client.
|
| 2 |
+
|
| 3 |
+
Env (see also `universal_brain_chat` / Space README):
|
| 4 |
+
GOOGLE_CSE_API_KEY β required
|
| 5 |
+
GOOGLE_CSE_CX β Programmable Search Engine id (required)
|
| 6 |
+
GOOGLE_CSE_NUM β optional, 1β10 (default 5)
|
| 7 |
+
GOOGLE_CSE_SAFE β optional, e.g. ``off`` or ``active`` (see Google ``cse.list`` reference)
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
from __future__ import annotations
|
| 11 |
+
|
| 12 |
+
import json
|
| 13 |
+
import os
|
| 14 |
+
import urllib.error
|
| 15 |
+
import urllib.parse
|
| 16 |
+
import urllib.request
|
| 17 |
+
from dataclasses import dataclass
|
| 18 |
+
|
| 19 |
+
_CSE_ENDPOINT = "https://www.googleapis.com/customsearch/v1"
|
| 20 |
+
_DEFAULT_UA = "TinyModel-UniversalBrain/1.0 (+https://github.com/HyperlinksSpace/TinyModel)"
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
@dataclass(frozen=True)
|
| 24 |
+
class CSEHit:
|
| 25 |
+
title: str
|
| 26 |
+
link: str
|
| 27 |
+
snippet: str
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def read_google_cse_settings() -> tuple[str | None, str | None, int, str | None]:
|
| 31 |
+
key = (os.environ.get("GOOGLE_CSE_API_KEY") or "").strip() or None
|
| 32 |
+
cx = (os.environ.get("GOOGLE_CSE_CX") or "").strip() or None
|
| 33 |
+
raw_n = (os.environ.get("GOOGLE_CSE_NUM") or "5").strip()
|
| 34 |
+
try:
|
| 35 |
+
num = max(1, min(10, int(raw_n)))
|
| 36 |
+
except ValueError:
|
| 37 |
+
num = 5
|
| 38 |
+
safe_raw = (os.environ.get("GOOGLE_CSE_SAFE") or "").strip()
|
| 39 |
+
safe = safe_raw or None
|
| 40 |
+
return key, cx, num, safe
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def google_cse_search(
|
| 44 |
+
query: str,
|
| 45 |
+
*,
|
| 46 |
+
api_key: str,
|
| 47 |
+
cx: str,
|
| 48 |
+
num: int = 5,
|
| 49 |
+
safe: str | None = None,
|
| 50 |
+
timeout_sec: float = 20.0,
|
| 51 |
+
) -> list[CSEHit]:
|
| 52 |
+
q = (query or "").strip()
|
| 53 |
+
if not q:
|
| 54 |
+
return []
|
| 55 |
+
n = max(1, min(10, num))
|
| 56 |
+
params: dict[str, str] = {"key": api_key, "cx": cx, "q": q, "num": str(n)}
|
| 57 |
+
if safe:
|
| 58 |
+
params["safe"] = safe
|
| 59 |
+
url = f"{_CSE_ENDPOINT}?{urllib.parse.urlencode(params)}"
|
| 60 |
+
req = urllib.request.Request(url, headers={"User-Agent": _DEFAULT_UA})
|
| 61 |
+
try:
|
| 62 |
+
with urllib.request.urlopen(req, timeout=timeout_sec) as resp:
|
| 63 |
+
raw = resp.read().decode("utf-8", errors="replace")
|
| 64 |
+
except urllib.error.HTTPError as e:
|
| 65 |
+
body = e.read().decode("utf-8", errors="replace") if e.fp else ""
|
| 66 |
+
try:
|
| 67 |
+
err = json.loads(body).get("error", {})
|
| 68 |
+
msg = err.get("message", body[:500])
|
| 69 |
+
except json.JSONDecodeError:
|
| 70 |
+
msg = body[:500] or str(e)
|
| 71 |
+
raise RuntimeError(f"Google CSE HTTP {e.code}: {msg}") from e
|
| 72 |
+
except urllib.error.URLError as e:
|
| 73 |
+
raise RuntimeError(f"Google CSE network error: {e}") from e
|
| 74 |
+
|
| 75 |
+
data = json.loads(raw)
|
| 76 |
+
if isinstance(data, dict) and "error" in data:
|
| 77 |
+
err = data.get("error") or {}
|
| 78 |
+
msg = err.get("message", str(err)) if isinstance(err, dict) else str(err)
|
| 79 |
+
raise RuntimeError(f"Google CSE API error: {msg}")
|
| 80 |
+
|
| 81 |
+
items = data.get("items") if isinstance(data, dict) else None
|
| 82 |
+
if not isinstance(items, list):
|
| 83 |
+
return []
|
| 84 |
+
|
| 85 |
+
out: list[CSEHit] = []
|
| 86 |
+
for it in items:
|
| 87 |
+
if not isinstance(it, dict):
|
| 88 |
+
continue
|
| 89 |
+
title = str(it.get("title") or "").strip()
|
| 90 |
+
link = str(it.get("link") or "").strip()
|
| 91 |
+
snippet = str(it.get("snippet") or "").strip()
|
| 92 |
+
if link:
|
| 93 |
+
out.append(CSEHit(title=title or "(no title)", link=link, snippet=snippet))
|
| 94 |
+
return out
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def format_cse_hits_markdown(hits: list[CSEHit], *, for_chat: bool) -> str:
|
| 98 |
+
"""Markdown block: either standalone (/web) or system-context injection."""
|
| 99 |
+
if not hits:
|
| 100 |
+
return "(No web results.)"
|
| 101 |
+
lines: list[str] = []
|
| 102 |
+
if for_chat:
|
| 103 |
+
lines.append(
|
| 104 |
+
"### Web search snippets (Google Programmable Search)\n"
|
| 105 |
+
"Ground factual claims that depend on current or external information in these excerpts when they "
|
| 106 |
+
"apply. Cite sources as **[Web n]** and include the page URL. If snippets are insufficient, say so."
|
| 107 |
+
)
|
| 108 |
+
else:
|
| 109 |
+
lines.append("### Google web search results\n")
|
| 110 |
+
for i, h in enumerate(hits, 1):
|
| 111 |
+
lines.append(
|
| 112 |
+
f"**[Web {i}]** {h.title}\n"
|
| 113 |
+
f"- **URL:** {h.link}\n"
|
| 114 |
+
f"- **Snippet:** {h.snippet}\n"
|
| 115 |
+
)
|
| 116 |
+
return "\n".join(lines).strip()
|
scripts/universal_brain_chat.py
CHANGED
|
@@ -53,6 +53,33 @@ DEFAULT_MEMORY_DB = str(_REPO / ".tmp" / "ub_chat_memory.sqlite")
|
|
| 53 |
if str(_scripts) not in sys.path:
|
| 54 |
sys.path.insert(0, str(_scripts))
|
| 55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
from horizon2_core import ( # noqa: E402
|
| 57 |
DEFAULT_CHAT_SYSTEM,
|
| 58 |
DEFAULT_INSTRUCTION_MODEL,
|
|
@@ -74,6 +101,11 @@ from horizon3_store import ( # noqa: E402
|
|
| 74 |
list_for_scope,
|
| 75 |
put,
|
| 76 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
from nl_controls import parse_control_action # noqa: E402
|
| 78 |
from rag_faq_smoke import _pick_model, hybrid_retrieve, load_chunks # noqa: E402
|
| 79 |
from tinymodel_runtime import TinyModelRuntime # noqa: E402
|
|
@@ -117,7 +149,7 @@ HELP_TEXT = """**How to use**
|
|
| 117 |
- *Turn off the FAQ context*, *Disable RAG snippets*, *Turn FAQ back on* -> toggles whether FAQ excerpts are injected into the chat system context
|
| 118 |
- *Turn off smart routing*, *Go back to normal chat only* -> disables the JSON intent router (slash commands still work)
|
| 119 |
- *Show the brain trace*, *Hide debug trace* -> toggles the optional *Brain trace* footer on replies
|
| 120 |
-
- **Shortcuts:** `/help`, `/status`, `/classify`, `/retrieve`, `/summarize`, `/reformulate`, `/grounded q ||| ctx`, `/remember`, `/session`, `/memories`, `/clear-session`, **`/similarity a ||| b`**, **`/embed` / `/embedding`**, **`/nearest q ||| c1 ||| c2`**.
|
| 121 |
|
| 122 |
**Intents the router understands** (examples, not exact wording):
|
| 123 |
- Ordinary chat / questions
|
|
@@ -161,6 +193,7 @@ GRADIO_INSTRUCTIONS_MARKDOWN = """### About this Space
|
|
| 161 |
| Normal Q&A | Ask any question in plain language. |
|
| 162 |
| **Classifier** (full probability table) | `/classify Stocks rallied after earnings.` or ask naturally to classify a paragraph. |
|
| 163 |
| **FAQ search** (scored chunks) | `/retrieve shipping policy` or βsearch the FAQ for β¦β. |
|
|
|
|
| 164 |
| **Summarize** | `/summarize` + long text, or βsummarize this: β¦β. |
|
| 165 |
| **Rephrase** | `/reformulate` + text, or βrewrite this professionally: β¦β. |
|
| 166 |
| **Answer from facts only** | `/grounded Will you refund? ||| Our policy is 14-day returns.` (question and context separated by `|||`). |
|
|
@@ -182,9 +215,54 @@ GRADIO_INSTRUCTIONS_MARKDOWN = """### About this Space
|
|
| 182 |
|
| 183 |
---
|
| 184 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
### Natural-language routing (no `/` required)
|
| 186 |
|
| 187 |
-
The app can infer intents such as **chat**, **summarize**, **reformulate**, **grounded Q&A**, **FAQ retrieve**, **classify**, **similarity**, **embedding**, **nearest candidate**, **remember / list / clear memory**, and **status**. If the wrong tool runs, repeat with a clearer verb or use the matching **slash command** from the table above.
|
| 188 |
|
| 189 |
---
|
| 190 |
|
|
@@ -220,7 +298,7 @@ On the Space page, open **Use via API** to call the **`chat`** endpoint (same pi
|
|
| 220 |
### Tips
|
| 221 |
|
| 222 |
- **Shared demo**: the default scope may be shared with other visitors; use *Start a new private session* for isolated memory.
|
| 223 |
-
- **Optional Space env**: `HORIZON2_MODEL` can override the generative model id; `HF_TOKEN` (secret) helps with Hub downloads.
|
| 224 |
- **More phrases**: the repo `README` and `/help` list additional natural phrasings for session controls."""
|
| 225 |
|
| 226 |
ROUTER_SYSTEM = """You are an intent router for a desktop AI assistant. The user speaks naturally (any language). Output EXACTLY one JSON object, one line, no markdown fences, no explanation.
|
|
@@ -233,7 +311,8 @@ intent must be one of:
|
|
| 233 |
- summarize β user wants a shorter summary; put source in "text"
|
| 234 |
- reformulate β rewrite/clarify/professional tone; source in "text"
|
| 235 |
- grounded β answer only from given facts; put QUESTION in "question", FACTS in "context" (if user mixes both in one blob, split sensibly)
|
| 236 |
-
- retrieve β search FAQ/knowledge; put search query in "text"
|
|
|
|
| 237 |
- classify β show topic-classifier probabilities; put passage in "text"
|
| 238 |
- similarity β cosine similarity between two texts; put "text_a ||| text_b" in "text"
|
| 239 |
- embedding β embedding vector summary for one passage; put passage in "text"
|
|
@@ -248,6 +327,7 @@ intent must be one of:
|
|
| 248 |
Rules:
|
| 249 |
- Default to "chat" when unsure; copy the entire user message into "text".
|
| 250 |
- Do not invent facts for "grounded": if no clear facts/context, use "chat" instead.
|
|
|
|
| 251 |
- Extract minimal "text" for tool intents (do not repeat system chatter)."""
|
| 252 |
|
| 253 |
VALID_INTENTS = frozenset(
|
|
@@ -257,6 +337,7 @@ VALID_INTENTS = frozenset(
|
|
| 257 |
"reformulate",
|
| 258 |
"grounded",
|
| 259 |
"retrieve",
|
|
|
|
| 260 |
"classify",
|
| 261 |
"similarity",
|
| 262 |
"embedding",
|
|
@@ -277,6 +358,9 @@ _INTENT_ALIASES = {
|
|
| 277 |
"search": "retrieve",
|
| 278 |
"faq": "retrieve",
|
| 279 |
"lookup": "retrieve",
|
|
|
|
|
|
|
|
|
|
| 280 |
"similar": "similarity",
|
| 281 |
"cosine": "similarity",
|
| 282 |
"embed": "embedding",
|
|
@@ -479,12 +563,19 @@ def _format_status(
|
|
| 479 |
scope_key: str,
|
| 480 |
) -> str:
|
| 481 |
rag_n = len(rag_chunks) if rag_chunks else 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 482 |
lines = [
|
| 483 |
"### Status\n",
|
| 484 |
f"- **Generative:** `{meta_mid}`",
|
| 485 |
f"- **Encoder:** {meta_encoder}",
|
| 486 |
f"- **RAG corpus:** {_clip(meta_rag_path or 'β', 80)} Β· **chunks:** {rag_n}",
|
| 487 |
f"- **Memory DB:** `{meta_mem_db or 'off'}` Β· **scope:** `{scope_key}`",
|
|
|
|
| 488 |
]
|
| 489 |
return "\n".join(lines)
|
| 490 |
|
|
@@ -1480,6 +1571,21 @@ def handle_slash(
|
|
| 1480 |
return "Usage: `/classify <text>`"
|
| 1481 |
return _classifier_result_markdown(encoder.classify([rest])[0])
|
| 1482 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1483 |
if cmd == "/retrieve":
|
| 1484 |
if not encoder or not rag_chunks:
|
| 1485 |
return "Retrieve needs encoder + FAQ corpus (default on unless `--lm-only` / `--no-rag` / `--no-encoder`)."
|
|
@@ -1679,6 +1785,9 @@ def parse_args() -> argparse.Namespace:
|
|
| 1679 |
|
| 1680 |
def main() -> None:
|
| 1681 |
args = parse_args()
|
|
|
|
|
|
|
|
|
|
| 1682 |
_ensure_gradio_can_reach_localhost()
|
| 1683 |
try:
|
| 1684 |
import gradio as gr
|
|
@@ -1742,6 +1851,9 @@ def main() -> None:
|
|
| 1742 |
init_schema(mem_conn)
|
| 1743 |
print(f"Memory: scope={args.memory_scope!r} db={mem_path!r}", flush=True)
|
| 1744 |
|
|
|
|
|
|
|
|
|
|
| 1745 |
meta_encoder = encoder_id or "off"
|
| 1746 |
meta_rag = str(rag_path.resolve()) if rag_path else None
|
| 1747 |
meta_mem = mem_path
|
|
@@ -1751,7 +1863,12 @@ def main() -> None:
|
|
| 1751 |
turn_counter = {"n": 0}
|
| 1752 |
initial_ub_session = {
|
| 1753 |
"trace": not args.no_trace
|
| 1754 |
-
and (
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1755 |
"smart_route": not args.no_smart_route,
|
| 1756 |
"rag": rag_chunks is not None,
|
| 1757 |
"scope_key": args.memory_scope,
|
|
@@ -1838,6 +1955,8 @@ def main() -> None:
|
|
| 1838 |
use_smart = bool(ub_session.get("smart_route")) and not args.no_smart_route
|
| 1839 |
|
| 1840 |
chat_line = msg
|
|
|
|
|
|
|
| 1841 |
if use_smart:
|
| 1842 |
try:
|
| 1843 |
route = infer_route(
|
|
@@ -1849,6 +1968,37 @@ def main() -> None:
|
|
| 1849 |
except Exception:
|
| 1850 |
route = {"intent": "chat", "text": msg, "question": "", "context": ""}
|
| 1851 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1852 |
if route["intent"] != "chat":
|
| 1853 |
tool_reply = run_routed_tool(
|
| 1854 |
route,
|
|
@@ -1877,6 +2027,8 @@ def main() -> None:
|
|
| 1877 |
trace: list[str] = []
|
| 1878 |
extras: list[str] = []
|
| 1879 |
_append_reply_style_hints(extras, ub_session)
|
|
|
|
|
|
|
| 1880 |
|
| 1881 |
if encoder:
|
| 1882 |
probs = encoder.classify([chat_line])[0]
|
|
@@ -1903,6 +2055,9 @@ def main() -> None:
|
|
| 1903 |
f"\n\n{rag_block}"
|
| 1904 |
)
|
| 1905 |
|
|
|
|
|
|
|
|
|
|
| 1906 |
if mem_conn:
|
| 1907 |
items = list_for_scope(mem_conn, cur_scope)
|
| 1908 |
if items:
|
|
@@ -1940,6 +2095,7 @@ def main() -> None:
|
|
| 1940 |
encoder is not None
|
| 1941 |
or mem_conn is not None
|
| 1942 |
or effective_rag is not None
|
|
|
|
| 1943 |
)
|
| 1944 |
)
|
| 1945 |
if show_trace_footer and trace:
|
|
@@ -1956,6 +2112,8 @@ def main() -> None:
|
|
| 1956 |
brain_bits.append("RAG")
|
| 1957 |
if mem_conn:
|
| 1958 |
brain_bits.append("memory")
|
|
|
|
|
|
|
| 1959 |
brain_label = "+".join(brain_bits) if brain_bits else "LM only"
|
| 1960 |
|
| 1961 |
_css = """
|
|
|
|
| 53 |
if str(_scripts) not in sys.path:
|
| 54 |
sys.path.insert(0, str(_scripts))
|
| 55 |
|
| 56 |
+
|
| 57 |
+
def _load_dotenv_if_present(root: Path) -> None:
|
| 58 |
+
"""Load ``root / .env`` into ``os.environ`` without overriding existing keys (stdlib only)."""
|
| 59 |
+
p = root / ".env"
|
| 60 |
+
if not p.is_file():
|
| 61 |
+
return
|
| 62 |
+
try:
|
| 63 |
+
text = p.read_text(encoding="utf-8")
|
| 64 |
+
except OSError:
|
| 65 |
+
return
|
| 66 |
+
for line in text.splitlines():
|
| 67 |
+
s = line.strip()
|
| 68 |
+
if not s or s.startswith("#"):
|
| 69 |
+
continue
|
| 70 |
+
if s.startswith("export "):
|
| 71 |
+
s = s[7:].strip()
|
| 72 |
+
if "=" not in s:
|
| 73 |
+
continue
|
| 74 |
+
k, _, v = s.partition("=")
|
| 75 |
+
k, v = k.strip(), v.strip()
|
| 76 |
+
if not k or k in os.environ:
|
| 77 |
+
continue
|
| 78 |
+
if len(v) >= 2 and v[0] == v[-1] and v[0] in "\"'":
|
| 79 |
+
v = v[1:-1]
|
| 80 |
+
os.environ[k] = v
|
| 81 |
+
|
| 82 |
+
|
| 83 |
from horizon2_core import ( # noqa: E402
|
| 84 |
DEFAULT_CHAT_SYSTEM,
|
| 85 |
DEFAULT_INSTRUCTION_MODEL,
|
|
|
|
| 101 |
list_for_scope,
|
| 102 |
put,
|
| 103 |
)
|
| 104 |
+
from google_cse_client import ( # noqa: E402
|
| 105 |
+
format_cse_hits_markdown,
|
| 106 |
+
google_cse_search,
|
| 107 |
+
read_google_cse_settings,
|
| 108 |
+
)
|
| 109 |
from nl_controls import parse_control_action # noqa: E402
|
| 110 |
from rag_faq_smoke import _pick_model, hybrid_retrieve, load_chunks # noqa: E402
|
| 111 |
from tinymodel_runtime import TinyModelRuntime # noqa: E402
|
|
|
|
| 149 |
- *Turn off the FAQ context*, *Disable RAG snippets*, *Turn FAQ back on* -> toggles whether FAQ excerpts are injected into the chat system context
|
| 150 |
- *Turn off smart routing*, *Go back to normal chat only* -> disables the JSON intent router (slash commands still work)
|
| 151 |
- *Show the brain trace*, *Hide debug trace* -> toggles the optional *Brain trace* footer on replies
|
| 152 |
+
- **Shortcuts:** `/help`, `/status`, `/classify`, `/retrieve`, **`/web <query>`** (Google Programmable Search when `GOOGLE_CSE_API_KEY` + `GOOGLE_CSE_CX` are set), `/summarize`, `/reformulate`, `/grounded q ||| ctx`, `/remember`, `/session`, `/memories`, `/clear-session`, **`/similarity a ||| b`**, **`/embed` / `/embedding`**, **`/nearest q ||| c1 ||| c2`**.
|
| 153 |
|
| 154 |
**Intents the router understands** (examples, not exact wording):
|
| 155 |
- Ordinary chat / questions
|
|
|
|
| 193 |
| Normal Q&A | Ask any question in plain language. |
|
| 194 |
| **Classifier** (full probability table) | `/classify Stocks rallied after earnings.` or ask naturally to classify a paragraph. |
|
| 195 |
| **FAQ search** (scored chunks) | `/retrieve shipping policy` or βsearch the FAQ for β¦β. |
|
| 196 |
+
| **Web search** (Google CSE) | `/web latest Python 3.13 release notes` or ask for **live web** / **Google** news (needs `GOOGLE_CSE_API_KEY` + `GOOGLE_CSE_CX`). |
|
| 197 |
| **Summarize** | `/summarize` + long text, or βsummarize this: β¦β. |
|
| 198 |
| **Rephrase** | `/reformulate` + text, or βrewrite this professionally: β¦β. |
|
| 199 |
| **Answer from facts only** | `/grounded Will you refund? ||| Our policy is 14-day returns.` (question and context separated by `|||`). |
|
|
|
|
| 215 |
|
| 216 |
---
|
| 217 |
|
| 218 |
+
### Google web search β Hugging Face Space setup and how to test
|
| 219 |
+
|
| 220 |
+
This Space can call **Google Programmable Search (Custom Search JSON API)** when you configure credentials on the Hub (and redeploy if you added new files).
|
| 221 |
+
|
| 222 |
+
**1) Space settings (Repository β Settings)**
|
| 223 |
+
|
| 224 |
+
| Name | Type | Value |
|
| 225 |
+
| --- | --- | --- |
|
| 226 |
+
| `GOOGLE_CSE_API_KEY` | **Secret** | Google Cloud API key restricted to **Custom Search API** (Application restrictions: **None** is typical for server-side Spaces). |
|
| 227 |
+
| `GOOGLE_CSE_CX` | **Variable** or **Secret** | Search engine ID from [Programmable Search Engine control panel](https://programmablesearchengine.google.com/controlpanel/all) β your engine β **Overview** β **Search engine ID** (the `cx` value). |
|
| 228 |
+
|
| 229 |
+
Optional **Variables**: `GOOGLE_CSE_NUM` (1β10, default 5), `GOOGLE_CSE_SAFE` (e.g. `off` or `active` β see Googleβs `cse.list` docs).
|
| 230 |
+
|
| 231 |
+
**2) Restart**
|
| 232 |
+
|
| 233 |
+
After saving secrets/variables, **Restart this Space** (or trigger a new deployment) so the container picks up env vars.
|
| 234 |
+
|
| 235 |
+
**3) Verify configuration**
|
| 236 |
+
|
| 237 |
+
Type **`/status`** and press **Send**. The line **Google web search (CSE)** should show **on** when both `GOOGLE_CSE_API_KEY` and `GOOGLE_CSE_CX` are set. If it says **off**, the Space process does not see those variables yet.
|
| 238 |
+
|
| 239 |
+
**4) Test the API directly (no router)**
|
| 240 |
+
|
| 241 |
+
- **`/web`** β returns **raw search hits** (titles, URLs, snippets) only. Example: `/web Python 3.13 release date`
|
| 242 |
+
- Same as **`/search_web β¦`**
|
| 243 |
+
|
| 244 |
+
If you see an error about HTTP 403 or βAPI key not validβ, fix the key or enable **Custom Search API** for that GCP project.
|
| 245 |
+
|
| 246 |
+
**5) Test with the AI (smart routing)**
|
| 247 |
+
|
| 248 |
+
- Ensure **smart routing** is on (say *Turn on smart routing* if you turned it off).
|
| 249 |
+
- Ask in plain language for **live web** / **Google** / **todayβs** information, e.g. *Search the web for the latest SpaceX launch summary* or *What does the web say about β¦?*
|
| 250 |
+
- The router uses intent **`web_search`**: the app fetches snippets, injects them into the model context, then the assistant replies **using those sources** (cite **[Web n]** when using a snippet).
|
| 251 |
+
- If the model stays in FAQ-only mode, use **`/web β¦`** first to confirm the API works, then try clearer web phrasing.
|
| 252 |
+
|
| 253 |
+
**6) Brain trace**
|
| 254 |
+
|
| 255 |
+
With **Show the brain trace** on, look for **`web:CSE:N`** (N = number of hits) at the bottom of the assistant message after a web-backed reply.
|
| 256 |
+
|
| 257 |
+
**7) Limits**
|
| 258 |
+
|
| 259 |
+
Google enforces **quotas** and may **restrict new signups** for the legacy Custom Search JSON API β check current Google documentation. This demo does not store your API key in the repo; it only reads **Space env** at runtime.
|
| 260 |
+
|
| 261 |
+
---
|
| 262 |
+
|
| 263 |
### Natural-language routing (no `/` required)
|
| 264 |
|
| 265 |
+
The app can infer intents such as **chat**, **summarize**, **reformulate**, **grounded Q&A**, **FAQ retrieve**, **web_search** (public web via Google CSE when configured), **classify**, **similarity**, **embedding**, **nearest candidate**, **remember / list / clear memory**, and **status**. If the wrong tool runs, repeat with a clearer verb or use the matching **slash command** from the table above.
|
| 266 |
|
| 267 |
---
|
| 268 |
|
|
|
|
| 298 |
### Tips
|
| 299 |
|
| 300 |
- **Shared demo**: the default scope may be shared with other visitors; use *Start a new private session* for isolated memory.
|
| 301 |
+
- **Optional Space env**: `HORIZON2_MODEL` can override the generative model id; `HF_TOKEN` (secret) helps with Hub downloads; **`GOOGLE_CSE_API_KEY`** + **`GOOGLE_CSE_CX`** enable web search (see section **Google web search** above).
|
| 302 |
- **More phrases**: the repo `README` and `/help` list additional natural phrasings for session controls."""
|
| 303 |
|
| 304 |
ROUTER_SYSTEM = """You are an intent router for a desktop AI assistant. The user speaks naturally (any language). Output EXACTLY one JSON object, one line, no markdown fences, no explanation.
|
|
|
|
| 311 |
- summarize β user wants a shorter summary; put source in "text"
|
| 312 |
- reformulate β rewrite/clarify/professional tone; source in "text"
|
| 313 |
- grounded β answer only from given facts; put QUESTION in "question", FACTS in "context" (if user mixes both in one blob, split sensibly)
|
| 314 |
+
- retrieve β search **FAQ / internal knowledge** corpus only; put search query in "text"
|
| 315 |
+
- web_search β user wants **live web** facts (news, current events, URLs); put the **search query** in "text" (not for FAQ-only lookup)
|
| 316 |
- classify β show topic-classifier probabilities; put passage in "text"
|
| 317 |
- similarity β cosine similarity between two texts; put "text_a ||| text_b" in "text"
|
| 318 |
- embedding β embedding vector summary for one passage; put passage in "text"
|
|
|
|
| 327 |
Rules:
|
| 328 |
- Default to "chat" when unsure; copy the entire user message into "text".
|
| 329 |
- Do not invent facts for "grounded": if no clear facts/context, use "chat" instead.
|
| 330 |
+
- Use **retrieve** for bundled FAQ / help-base search; use **web_search** when the user clearly needs the **public web** (today, external site, breaking news, "google this", etc.).
|
| 331 |
- Extract minimal "text" for tool intents (do not repeat system chatter)."""
|
| 332 |
|
| 333 |
VALID_INTENTS = frozenset(
|
|
|
|
| 337 |
"reformulate",
|
| 338 |
"grounded",
|
| 339 |
"retrieve",
|
| 340 |
+
"web_search",
|
| 341 |
"classify",
|
| 342 |
"similarity",
|
| 343 |
"embedding",
|
|
|
|
| 358 |
"search": "retrieve",
|
| 359 |
"faq": "retrieve",
|
| 360 |
"lookup": "retrieve",
|
| 361 |
+
"internet": "web_search",
|
| 362 |
+
"google": "web_search",
|
| 363 |
+
"browse_web": "web_search",
|
| 364 |
"similar": "similarity",
|
| 365 |
"cosine": "similarity",
|
| 366 |
"embed": "embedding",
|
|
|
|
| 563 |
scope_key: str,
|
| 564 |
) -> str:
|
| 565 |
rag_n = len(rag_chunks) if rag_chunks else 0
|
| 566 |
+
g_key, g_cx, _, _ = read_google_cse_settings()
|
| 567 |
+
cse_line = (
|
| 568 |
+
"**on** (`GOOGLE_CSE_API_KEY` + `GOOGLE_CSE_CX`)"
|
| 569 |
+
if g_key and g_cx
|
| 570 |
+
else "**off** (set `GOOGLE_CSE_API_KEY` and `GOOGLE_CSE_CX` for `/web` + routed web search)"
|
| 571 |
+
)
|
| 572 |
lines = [
|
| 573 |
"### Status\n",
|
| 574 |
f"- **Generative:** `{meta_mid}`",
|
| 575 |
f"- **Encoder:** {meta_encoder}",
|
| 576 |
f"- **RAG corpus:** {_clip(meta_rag_path or 'β', 80)} Β· **chunks:** {rag_n}",
|
| 577 |
f"- **Memory DB:** `{meta_mem_db or 'off'}` Β· **scope:** `{scope_key}`",
|
| 578 |
+
f"- **Google web search (CSE):** {cse_line}",
|
| 579 |
]
|
| 580 |
return "\n".join(lines)
|
| 581 |
|
|
|
|
| 1571 |
return "Usage: `/classify <text>`"
|
| 1572 |
return _classifier_result_markdown(encoder.classify([rest])[0])
|
| 1573 |
|
| 1574 |
+
if cmd in ("/web", "/search_web"):
|
| 1575 |
+
g_key, g_cx, g_num, g_safe = read_google_cse_settings()
|
| 1576 |
+
if not g_key or not g_cx:
|
| 1577 |
+
return (
|
| 1578 |
+
"Web search needs **`GOOGLE_CSE_API_KEY`** (secret) and **`GOOGLE_CSE_CX`** (search engine id) "
|
| 1579 |
+
"in Space settings or local `.env`. See `/status`."
|
| 1580 |
+
)
|
| 1581 |
+
if not rest:
|
| 1582 |
+
return "Usage: `/web <search query>`"
|
| 1583 |
+
try:
|
| 1584 |
+
hits = google_cse_search(rest, api_key=g_key, cx=g_cx, num=g_num, safe=g_safe)
|
| 1585 |
+
except Exception as e:
|
| 1586 |
+
return f"### Web search error\n{_clip(str(e), 1200)}"
|
| 1587 |
+
return format_cse_hits_markdown(hits, for_chat=False)
|
| 1588 |
+
|
| 1589 |
if cmd == "/retrieve":
|
| 1590 |
if not encoder or not rag_chunks:
|
| 1591 |
return "Retrieve needs encoder + FAQ corpus (default on unless `--lm-only` / `--no-rag` / `--no-encoder`)."
|
|
|
|
| 1785 |
|
| 1786 |
def main() -> None:
|
| 1787 |
args = parse_args()
|
| 1788 |
+
_load_dotenv_if_present(_REPO)
|
| 1789 |
+
_gk, _gc, _, _ = read_google_cse_settings()
|
| 1790 |
+
cse_on = bool(_gk and _gc)
|
| 1791 |
_ensure_gradio_can_reach_localhost()
|
| 1792 |
try:
|
| 1793 |
import gradio as gr
|
|
|
|
| 1851 |
init_schema(mem_conn)
|
| 1852 |
print(f"Memory: scope={args.memory_scope!r} db={mem_path!r}", flush=True)
|
| 1853 |
|
| 1854 |
+
if cse_on:
|
| 1855 |
+
print("Google CSE web search: configured (`/web` + smart-route `web_search`)", flush=True)
|
| 1856 |
+
|
| 1857 |
meta_encoder = encoder_id or "off"
|
| 1858 |
meta_rag = str(rag_path.resolve()) if rag_path else None
|
| 1859 |
meta_mem = mem_path
|
|
|
|
| 1863 |
turn_counter = {"n": 0}
|
| 1864 |
initial_ub_session = {
|
| 1865 |
"trace": not args.no_trace
|
| 1866 |
+
and (
|
| 1867 |
+
encoder is not None
|
| 1868 |
+
or mem_conn is not None
|
| 1869 |
+
or (rag_chunks is not None)
|
| 1870 |
+
or cse_on
|
| 1871 |
+
),
|
| 1872 |
"smart_route": not args.no_smart_route,
|
| 1873 |
"rag": rag_chunks is not None,
|
| 1874 |
"scope_key": args.memory_scope,
|
|
|
|
| 1955 |
use_smart = bool(ub_session.get("smart_route")) and not args.no_smart_route
|
| 1956 |
|
| 1957 |
chat_line = msg
|
| 1958 |
+
web_block = ""
|
| 1959 |
+
web_trace = ""
|
| 1960 |
if use_smart:
|
| 1961 |
try:
|
| 1962 |
route = infer_route(
|
|
|
|
| 1968 |
except Exception:
|
| 1969 |
route = {"intent": "chat", "text": msg, "question": "", "context": ""}
|
| 1970 |
|
| 1971 |
+
if route["intent"] == "web_search":
|
| 1972 |
+
g_key, g_cx, g_num, g_safe = read_google_cse_settings()
|
| 1973 |
+
q_web = (route["text"] or msg).strip()
|
| 1974 |
+
web_trace = "web:CSE:cfg"
|
| 1975 |
+
if g_key and g_cx and q_web:
|
| 1976 |
+
try:
|
| 1977 |
+
hits = google_cse_search(
|
| 1978 |
+
q_web,
|
| 1979 |
+
api_key=g_key,
|
| 1980 |
+
cx=g_cx,
|
| 1981 |
+
num=g_num,
|
| 1982 |
+
safe=g_safe,
|
| 1983 |
+
)
|
| 1984 |
+
web_block = format_cse_hits_markdown(hits, for_chat=True)
|
| 1985 |
+
web_trace = f"web:CSE:{len(hits)}"
|
| 1986 |
+
except Exception as ex:
|
| 1987 |
+
web_block = (
|
| 1988 |
+
f"(Google web search failed: {_clip(str(ex), 500)})\n\n"
|
| 1989 |
+
"Answer from general knowledge where appropriate; do not invent URLs or page titles."
|
| 1990 |
+
)
|
| 1991 |
+
web_trace = "web:CSE:err"
|
| 1992 |
+
elif not q_web:
|
| 1993 |
+
web_block = "(Empty web search query. Ask again with a concrete search topic.)"
|
| 1994 |
+
web_trace = "web:CSE:empty"
|
| 1995 |
+
else:
|
| 1996 |
+
web_block = (
|
| 1997 |
+
"(Web search is not configured: set **GOOGLE_CSE_API_KEY** and **GOOGLE_CSE_CX** "
|
| 1998 |
+
"in Hugging Face Space secrets/variables or local `.env`. See `/status`.)"
|
| 1999 |
+
)
|
| 2000 |
+
route = {"intent": "chat", "text": msg, "question": "", "context": ""}
|
| 2001 |
+
|
| 2002 |
if route["intent"] != "chat":
|
| 2003 |
tool_reply = run_routed_tool(
|
| 2004 |
route,
|
|
|
|
| 2027 |
trace: list[str] = []
|
| 2028 |
extras: list[str] = []
|
| 2029 |
_append_reply_style_hints(extras, ub_session)
|
| 2030 |
+
if web_trace:
|
| 2031 |
+
trace.append(web_trace)
|
| 2032 |
|
| 2033 |
if encoder:
|
| 2034 |
probs = encoder.classify([chat_line])[0]
|
|
|
|
| 2055 |
f"\n\n{rag_block}"
|
| 2056 |
)
|
| 2057 |
|
| 2058 |
+
if web_block:
|
| 2059 |
+
extras.append(web_block)
|
| 2060 |
+
|
| 2061 |
if mem_conn:
|
| 2062 |
items = list_for_scope(mem_conn, cur_scope)
|
| 2063 |
if items:
|
|
|
|
| 2095 |
encoder is not None
|
| 2096 |
or mem_conn is not None
|
| 2097 |
or effective_rag is not None
|
| 2098 |
+
or bool(web_trace)
|
| 2099 |
)
|
| 2100 |
)
|
| 2101 |
if show_trace_footer and trace:
|
|
|
|
| 2112 |
brain_bits.append("RAG")
|
| 2113 |
if mem_conn:
|
| 2114 |
brain_bits.append("memory")
|
| 2115 |
+
if cse_on:
|
| 2116 |
+
brain_bits.append("Google CSE")
|
| 2117 |
brain_label = "+".join(brain_bits) if brain_bits else "LM only"
|
| 2118 |
|
| 2119 |
_css = """
|