anriltine commited on
Commit
32907e2
Β·
verified Β·
1 Parent(s): b0bf3f7

Deploy TinyModel1Space from GitHub Actions

Browse files
README.md CHANGED
@@ -19,10 +19,14 @@ pinned: false
19
  ### Secrets (recommended)
20
 
21
  - `HF_TOKEN` β€” read token so Hub model downloads are reliable (same as classic classifier Space).
 
22
 
23
  ### Optional environment variables
24
 
25
  - `HORIZON2_MODEL` β€” Hugging Face id for the **generative** instruct model (default in code: SmolLM2-360M-Instruct if unset in image).
 
 
 
26
 
27
  ### Note
28
 
 
19
  ### Secrets (recommended)
20
 
21
  - `HF_TOKEN` β€” read token so Hub model downloads are reliable (same as classic classifier Space).
22
+ - `GOOGLE_CSE_API_KEY` β€” Google Cloud API key restricted to **Custom Search API** (enables `/web` and smart-route **web search** in chat).
23
 
24
  ### Optional environment variables
25
 
26
  - `HORIZON2_MODEL` β€” Hugging Face id for the **generative** instruct model (default in code: SmolLM2-360M-Instruct if unset in image).
27
+ - `GOOGLE_CSE_CX` β€” Programmable Search Engine id (`cx` from the [PSE control panel](https://programmablesearchengine.google.com/controlpanel/all)); required together with `GOOGLE_CSE_API_KEY` for web search.
28
+ - `GOOGLE_CSE_NUM` β€” results per request, 1–10 (default 5).
29
+ - `GOOGLE_CSE_SAFE` β€” optional safe-search level for Google `cse.list` (e.g. `off`, `active`).
30
 
31
  ### Note
32
 
scripts/google_cse_client.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Google Programmable Search Engine (Custom Search JSON API) β€” minimal stdlib client.
2
+
3
+ Env (see also `universal_brain_chat` / Space README):
4
+ GOOGLE_CSE_API_KEY β€” required
5
+ GOOGLE_CSE_CX β€” Programmable Search Engine id (required)
6
+ GOOGLE_CSE_NUM β€” optional, 1–10 (default 5)
7
+ GOOGLE_CSE_SAFE β€” optional, e.g. ``off`` or ``active`` (see Google ``cse.list`` reference)
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import json
13
+ import os
14
+ import urllib.error
15
+ import urllib.parse
16
+ import urllib.request
17
+ from dataclasses import dataclass
18
+
19
+ _CSE_ENDPOINT = "https://www.googleapis.com/customsearch/v1"
20
+ _DEFAULT_UA = "TinyModel-UniversalBrain/1.0 (+https://github.com/HyperlinksSpace/TinyModel)"
21
+
22
+
23
+ @dataclass(frozen=True)
24
+ class CSEHit:
25
+ title: str
26
+ link: str
27
+ snippet: str
28
+
29
+
30
+ def read_google_cse_settings() -> tuple[str | None, str | None, int, str | None]:
31
+ key = (os.environ.get("GOOGLE_CSE_API_KEY") or "").strip() or None
32
+ cx = (os.environ.get("GOOGLE_CSE_CX") or "").strip() or None
33
+ raw_n = (os.environ.get("GOOGLE_CSE_NUM") or "5").strip()
34
+ try:
35
+ num = max(1, min(10, int(raw_n)))
36
+ except ValueError:
37
+ num = 5
38
+ safe_raw = (os.environ.get("GOOGLE_CSE_SAFE") or "").strip()
39
+ safe = safe_raw or None
40
+ return key, cx, num, safe
41
+
42
+
43
+ def google_cse_search(
44
+ query: str,
45
+ *,
46
+ api_key: str,
47
+ cx: str,
48
+ num: int = 5,
49
+ safe: str | None = None,
50
+ timeout_sec: float = 20.0,
51
+ ) -> list[CSEHit]:
52
+ q = (query or "").strip()
53
+ if not q:
54
+ return []
55
+ n = max(1, min(10, num))
56
+ params: dict[str, str] = {"key": api_key, "cx": cx, "q": q, "num": str(n)}
57
+ if safe:
58
+ params["safe"] = safe
59
+ url = f"{_CSE_ENDPOINT}?{urllib.parse.urlencode(params)}"
60
+ req = urllib.request.Request(url, headers={"User-Agent": _DEFAULT_UA})
61
+ try:
62
+ with urllib.request.urlopen(req, timeout=timeout_sec) as resp:
63
+ raw = resp.read().decode("utf-8", errors="replace")
64
+ except urllib.error.HTTPError as e:
65
+ body = e.read().decode("utf-8", errors="replace") if e.fp else ""
66
+ try:
67
+ err = json.loads(body).get("error", {})
68
+ msg = err.get("message", body[:500])
69
+ except json.JSONDecodeError:
70
+ msg = body[:500] or str(e)
71
+ raise RuntimeError(f"Google CSE HTTP {e.code}: {msg}") from e
72
+ except urllib.error.URLError as e:
73
+ raise RuntimeError(f"Google CSE network error: {e}") from e
74
+
75
+ data = json.loads(raw)
76
+ if isinstance(data, dict) and "error" in data:
77
+ err = data.get("error") or {}
78
+ msg = err.get("message", str(err)) if isinstance(err, dict) else str(err)
79
+ raise RuntimeError(f"Google CSE API error: {msg}")
80
+
81
+ items = data.get("items") if isinstance(data, dict) else None
82
+ if not isinstance(items, list):
83
+ return []
84
+
85
+ out: list[CSEHit] = []
86
+ for it in items:
87
+ if not isinstance(it, dict):
88
+ continue
89
+ title = str(it.get("title") or "").strip()
90
+ link = str(it.get("link") or "").strip()
91
+ snippet = str(it.get("snippet") or "").strip()
92
+ if link:
93
+ out.append(CSEHit(title=title or "(no title)", link=link, snippet=snippet))
94
+ return out
95
+
96
+
97
+ def format_cse_hits_markdown(hits: list[CSEHit], *, for_chat: bool) -> str:
98
+ """Markdown block: either standalone (/web) or system-context injection."""
99
+ if not hits:
100
+ return "(No web results.)"
101
+ lines: list[str] = []
102
+ if for_chat:
103
+ lines.append(
104
+ "### Web search snippets (Google Programmable Search)\n"
105
+ "Ground factual claims that depend on current or external information in these excerpts when they "
106
+ "apply. Cite sources as **[Web n]** and include the page URL. If snippets are insufficient, say so."
107
+ )
108
+ else:
109
+ lines.append("### Google web search results\n")
110
+ for i, h in enumerate(hits, 1):
111
+ lines.append(
112
+ f"**[Web {i}]** {h.title}\n"
113
+ f"- **URL:** {h.link}\n"
114
+ f"- **Snippet:** {h.snippet}\n"
115
+ )
116
+ return "\n".join(lines).strip()
scripts/universal_brain_chat.py CHANGED
@@ -53,6 +53,33 @@ DEFAULT_MEMORY_DB = str(_REPO / ".tmp" / "ub_chat_memory.sqlite")
53
  if str(_scripts) not in sys.path:
54
  sys.path.insert(0, str(_scripts))
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  from horizon2_core import ( # noqa: E402
57
  DEFAULT_CHAT_SYSTEM,
58
  DEFAULT_INSTRUCTION_MODEL,
@@ -74,6 +101,11 @@ from horizon3_store import ( # noqa: E402
74
  list_for_scope,
75
  put,
76
  )
 
 
 
 
 
77
  from nl_controls import parse_control_action # noqa: E402
78
  from rag_faq_smoke import _pick_model, hybrid_retrieve, load_chunks # noqa: E402
79
  from tinymodel_runtime import TinyModelRuntime # noqa: E402
@@ -117,7 +149,7 @@ HELP_TEXT = """**How to use**
117
  - *Turn off the FAQ context*, *Disable RAG snippets*, *Turn FAQ back on* -> toggles whether FAQ excerpts are injected into the chat system context
118
  - *Turn off smart routing*, *Go back to normal chat only* -> disables the JSON intent router (slash commands still work)
119
  - *Show the brain trace*, *Hide debug trace* -> toggles the optional *Brain trace* footer on replies
120
- - **Shortcuts:** `/help`, `/status`, `/classify`, `/retrieve`, `/summarize`, `/reformulate`, `/grounded q ||| ctx`, `/remember`, `/session`, `/memories`, `/clear-session`, **`/similarity a ||| b`**, **`/embed` / `/embedding`**, **`/nearest q ||| c1 ||| c2`**.
121
 
122
  **Intents the router understands** (examples, not exact wording):
123
  - Ordinary chat / questions
@@ -161,6 +193,7 @@ GRADIO_INSTRUCTIONS_MARKDOWN = """### About this Space
161
  | Normal Q&A | Ask any question in plain language. |
162
  | **Classifier** (full probability table) | `/classify Stocks rallied after earnings.` or ask naturally to classify a paragraph. |
163
  | **FAQ search** (scored chunks) | `/retrieve shipping policy` or β€œsearch the FAQ for …”. |
 
164
  | **Summarize** | `/summarize` + long text, or β€œsummarize this: …”. |
165
  | **Rephrase** | `/reformulate` + text, or β€œrewrite this professionally: …”. |
166
  | **Answer from facts only** | `/grounded Will you refund? ||| Our policy is 14-day returns.` (question and context separated by `|||`). |
@@ -182,9 +215,54 @@ GRADIO_INSTRUCTIONS_MARKDOWN = """### About this Space
182
 
183
  ---
184
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  ### Natural-language routing (no `/` required)
186
 
187
- The app can infer intents such as **chat**, **summarize**, **reformulate**, **grounded Q&A**, **FAQ retrieve**, **classify**, **similarity**, **embedding**, **nearest candidate**, **remember / list / clear memory**, and **status**. If the wrong tool runs, repeat with a clearer verb or use the matching **slash command** from the table above.
188
 
189
  ---
190
 
@@ -220,7 +298,7 @@ On the Space page, open **Use via API** to call the **`chat`** endpoint (same pi
220
  ### Tips
221
 
222
  - **Shared demo**: the default scope may be shared with other visitors; use *Start a new private session* for isolated memory.
223
- - **Optional Space env**: `HORIZON2_MODEL` can override the generative model id; `HF_TOKEN` (secret) helps with Hub downloads.
224
  - **More phrases**: the repo `README` and `/help` list additional natural phrasings for session controls."""
225
 
226
  ROUTER_SYSTEM = """You are an intent router for a desktop AI assistant. The user speaks naturally (any language). Output EXACTLY one JSON object, one line, no markdown fences, no explanation.
@@ -233,7 +311,8 @@ intent must be one of:
233
  - summarize β€” user wants a shorter summary; put source in "text"
234
  - reformulate β€” rewrite/clarify/professional tone; source in "text"
235
  - grounded β€” answer only from given facts; put QUESTION in "question", FACTS in "context" (if user mixes both in one blob, split sensibly)
236
- - retrieve β€” search FAQ/knowledge; put search query in "text"
 
237
  - classify β€” show topic-classifier probabilities; put passage in "text"
238
  - similarity β€” cosine similarity between two texts; put "text_a ||| text_b" in "text"
239
  - embedding β€” embedding vector summary for one passage; put passage in "text"
@@ -248,6 +327,7 @@ intent must be one of:
248
  Rules:
249
  - Default to "chat" when unsure; copy the entire user message into "text".
250
  - Do not invent facts for "grounded": if no clear facts/context, use "chat" instead.
 
251
  - Extract minimal "text" for tool intents (do not repeat system chatter)."""
252
 
253
  VALID_INTENTS = frozenset(
@@ -257,6 +337,7 @@ VALID_INTENTS = frozenset(
257
  "reformulate",
258
  "grounded",
259
  "retrieve",
 
260
  "classify",
261
  "similarity",
262
  "embedding",
@@ -277,6 +358,9 @@ _INTENT_ALIASES = {
277
  "search": "retrieve",
278
  "faq": "retrieve",
279
  "lookup": "retrieve",
 
 
 
280
  "similar": "similarity",
281
  "cosine": "similarity",
282
  "embed": "embedding",
@@ -479,12 +563,19 @@ def _format_status(
479
  scope_key: str,
480
  ) -> str:
481
  rag_n = len(rag_chunks) if rag_chunks else 0
 
 
 
 
 
 
482
  lines = [
483
  "### Status\n",
484
  f"- **Generative:** `{meta_mid}`",
485
  f"- **Encoder:** {meta_encoder}",
486
  f"- **RAG corpus:** {_clip(meta_rag_path or 'β€”', 80)} Β· **chunks:** {rag_n}",
487
  f"- **Memory DB:** `{meta_mem_db or 'off'}` Β· **scope:** `{scope_key}`",
 
488
  ]
489
  return "\n".join(lines)
490
 
@@ -1480,6 +1571,21 @@ def handle_slash(
1480
  return "Usage: `/classify <text>`"
1481
  return _classifier_result_markdown(encoder.classify([rest])[0])
1482
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1483
  if cmd == "/retrieve":
1484
  if not encoder or not rag_chunks:
1485
  return "Retrieve needs encoder + FAQ corpus (default on unless `--lm-only` / `--no-rag` / `--no-encoder`)."
@@ -1679,6 +1785,9 @@ def parse_args() -> argparse.Namespace:
1679
 
1680
  def main() -> None:
1681
  args = parse_args()
 
 
 
1682
  _ensure_gradio_can_reach_localhost()
1683
  try:
1684
  import gradio as gr
@@ -1742,6 +1851,9 @@ def main() -> None:
1742
  init_schema(mem_conn)
1743
  print(f"Memory: scope={args.memory_scope!r} db={mem_path!r}", flush=True)
1744
 
 
 
 
1745
  meta_encoder = encoder_id or "off"
1746
  meta_rag = str(rag_path.resolve()) if rag_path else None
1747
  meta_mem = mem_path
@@ -1751,7 +1863,12 @@ def main() -> None:
1751
  turn_counter = {"n": 0}
1752
  initial_ub_session = {
1753
  "trace": not args.no_trace
1754
- and (encoder is not None or mem_conn is not None or (rag_chunks is not None)),
 
 
 
 
 
1755
  "smart_route": not args.no_smart_route,
1756
  "rag": rag_chunks is not None,
1757
  "scope_key": args.memory_scope,
@@ -1838,6 +1955,8 @@ def main() -> None:
1838
  use_smart = bool(ub_session.get("smart_route")) and not args.no_smart_route
1839
 
1840
  chat_line = msg
 
 
1841
  if use_smart:
1842
  try:
1843
  route = infer_route(
@@ -1849,6 +1968,37 @@ def main() -> None:
1849
  except Exception:
1850
  route = {"intent": "chat", "text": msg, "question": "", "context": ""}
1851
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1852
  if route["intent"] != "chat":
1853
  tool_reply = run_routed_tool(
1854
  route,
@@ -1877,6 +2027,8 @@ def main() -> None:
1877
  trace: list[str] = []
1878
  extras: list[str] = []
1879
  _append_reply_style_hints(extras, ub_session)
 
 
1880
 
1881
  if encoder:
1882
  probs = encoder.classify([chat_line])[0]
@@ -1903,6 +2055,9 @@ def main() -> None:
1903
  f"\n\n{rag_block}"
1904
  )
1905
 
 
 
 
1906
  if mem_conn:
1907
  items = list_for_scope(mem_conn, cur_scope)
1908
  if items:
@@ -1940,6 +2095,7 @@ def main() -> None:
1940
  encoder is not None
1941
  or mem_conn is not None
1942
  or effective_rag is not None
 
1943
  )
1944
  )
1945
  if show_trace_footer and trace:
@@ -1956,6 +2112,8 @@ def main() -> None:
1956
  brain_bits.append("RAG")
1957
  if mem_conn:
1958
  brain_bits.append("memory")
 
 
1959
  brain_label = "+".join(brain_bits) if brain_bits else "LM only"
1960
 
1961
  _css = """
 
53
  if str(_scripts) not in sys.path:
54
  sys.path.insert(0, str(_scripts))
55
 
56
+
57
+ def _load_dotenv_if_present(root: Path) -> None:
58
+ """Load ``root / .env`` into ``os.environ`` without overriding existing keys (stdlib only)."""
59
+ p = root / ".env"
60
+ if not p.is_file():
61
+ return
62
+ try:
63
+ text = p.read_text(encoding="utf-8")
64
+ except OSError:
65
+ return
66
+ for line in text.splitlines():
67
+ s = line.strip()
68
+ if not s or s.startswith("#"):
69
+ continue
70
+ if s.startswith("export "):
71
+ s = s[7:].strip()
72
+ if "=" not in s:
73
+ continue
74
+ k, _, v = s.partition("=")
75
+ k, v = k.strip(), v.strip()
76
+ if not k or k in os.environ:
77
+ continue
78
+ if len(v) >= 2 and v[0] == v[-1] and v[0] in "\"'":
79
+ v = v[1:-1]
80
+ os.environ[k] = v
81
+
82
+
83
  from horizon2_core import ( # noqa: E402
84
  DEFAULT_CHAT_SYSTEM,
85
  DEFAULT_INSTRUCTION_MODEL,
 
101
  list_for_scope,
102
  put,
103
  )
104
+ from google_cse_client import ( # noqa: E402
105
+ format_cse_hits_markdown,
106
+ google_cse_search,
107
+ read_google_cse_settings,
108
+ )
109
  from nl_controls import parse_control_action # noqa: E402
110
  from rag_faq_smoke import _pick_model, hybrid_retrieve, load_chunks # noqa: E402
111
  from tinymodel_runtime import TinyModelRuntime # noqa: E402
 
149
  - *Turn off the FAQ context*, *Disable RAG snippets*, *Turn FAQ back on* -> toggles whether FAQ excerpts are injected into the chat system context
150
  - *Turn off smart routing*, *Go back to normal chat only* -> disables the JSON intent router (slash commands still work)
151
  - *Show the brain trace*, *Hide debug trace* -> toggles the optional *Brain trace* footer on replies
152
+ - **Shortcuts:** `/help`, `/status`, `/classify`, `/retrieve`, **`/web <query>`** (Google Programmable Search when `GOOGLE_CSE_API_KEY` + `GOOGLE_CSE_CX` are set), `/summarize`, `/reformulate`, `/grounded q ||| ctx`, `/remember`, `/session`, `/memories`, `/clear-session`, **`/similarity a ||| b`**, **`/embed` / `/embedding`**, **`/nearest q ||| c1 ||| c2`**.
153
 
154
  **Intents the router understands** (examples, not exact wording):
155
  - Ordinary chat / questions
 
193
  | Normal Q&A | Ask any question in plain language. |
194
  | **Classifier** (full probability table) | `/classify Stocks rallied after earnings.` or ask naturally to classify a paragraph. |
195
  | **FAQ search** (scored chunks) | `/retrieve shipping policy` or β€œsearch the FAQ for …”. |
196
+ | **Web search** (Google CSE) | `/web latest Python 3.13 release notes` or ask for **live web** / **Google** news (needs `GOOGLE_CSE_API_KEY` + `GOOGLE_CSE_CX`). |
197
  | **Summarize** | `/summarize` + long text, or β€œsummarize this: …”. |
198
  | **Rephrase** | `/reformulate` + text, or β€œrewrite this professionally: …”. |
199
  | **Answer from facts only** | `/grounded Will you refund? ||| Our policy is 14-day returns.` (question and context separated by `|||`). |
 
215
 
216
  ---
217
 
218
+ ### Google web search β€” Hugging Face Space setup and how to test
219
+
220
+ This Space can call **Google Programmable Search (Custom Search JSON API)** when you configure credentials on the Hub (and redeploy if you added new files).
221
+
222
+ **1) Space settings (Repository β†’ Settings)**
223
+
224
+ | Name | Type | Value |
225
+ | --- | --- | --- |
226
+ | `GOOGLE_CSE_API_KEY` | **Secret** | Google Cloud API key restricted to **Custom Search API** (Application restrictions: **None** is typical for server-side Spaces). |
227
+ | `GOOGLE_CSE_CX` | **Variable** or **Secret** | Search engine ID from [Programmable Search Engine control panel](https://programmablesearchengine.google.com/controlpanel/all) β†’ your engine β†’ **Overview** β†’ **Search engine ID** (the `cx` value). |
228
+
229
+ Optional **Variables**: `GOOGLE_CSE_NUM` (1–10, default 5), `GOOGLE_CSE_SAFE` (e.g. `off` or `active` β€” see Google’s `cse.list` docs).
230
+
231
+ **2) Restart**
232
+
233
+ After saving secrets/variables, **Restart this Space** (or trigger a new deployment) so the container picks up env vars.
234
+
235
+ **3) Verify configuration**
236
+
237
+ Type **`/status`** and press **Send**. The line **Google web search (CSE)** should show **on** when both `GOOGLE_CSE_API_KEY` and `GOOGLE_CSE_CX` are set. If it says **off**, the Space process does not see those variables yet.
238
+
239
+ **4) Test the API directly (no router)**
240
+
241
+ - **`/web`** β€” returns **raw search hits** (titles, URLs, snippets) only. Example: `/web Python 3.13 release date`
242
+ - Same as **`/search_web …`**
243
+
244
+ If you see an error about HTTP 403 or β€œAPI key not valid”, fix the key or enable **Custom Search API** for that GCP project.
245
+
246
+ **5) Test with the AI (smart routing)**
247
+
248
+ - Ensure **smart routing** is on (say *Turn on smart routing* if you turned it off).
249
+ - Ask in plain language for **live web** / **Google** / **today’s** information, e.g. *Search the web for the latest SpaceX launch summary* or *What does the web say about …?*
250
+ - The router uses intent **`web_search`**: the app fetches snippets, injects them into the model context, then the assistant replies **using those sources** (cite **[Web n]** when using a snippet).
251
+ - If the model stays in FAQ-only mode, use **`/web …`** first to confirm the API works, then try clearer web phrasing.
252
+
253
+ **6) Brain trace**
254
+
255
+ With **Show the brain trace** on, look for **`web:CSE:N`** (N = number of hits) at the bottom of the assistant message after a web-backed reply.
256
+
257
+ **7) Limits**
258
+
259
+ Google enforces **quotas** and may **restrict new signups** for the legacy Custom Search JSON API β€” check current Google documentation. This demo does not store your API key in the repo; it only reads **Space env** at runtime.
260
+
261
+ ---
262
+
263
  ### Natural-language routing (no `/` required)
264
 
265
+ The app can infer intents such as **chat**, **summarize**, **reformulate**, **grounded Q&A**, **FAQ retrieve**, **web_search** (public web via Google CSE when configured), **classify**, **similarity**, **embedding**, **nearest candidate**, **remember / list / clear memory**, and **status**. If the wrong tool runs, repeat with a clearer verb or use the matching **slash command** from the table above.
266
 
267
  ---
268
 
 
298
  ### Tips
299
 
300
  - **Shared demo**: the default scope may be shared with other visitors; use *Start a new private session* for isolated memory.
301
+ - **Optional Space env**: `HORIZON2_MODEL` can override the generative model id; `HF_TOKEN` (secret) helps with Hub downloads; **`GOOGLE_CSE_API_KEY`** + **`GOOGLE_CSE_CX`** enable web search (see section **Google web search** above).
302
  - **More phrases**: the repo `README` and `/help` list additional natural phrasings for session controls."""
303
 
304
  ROUTER_SYSTEM = """You are an intent router for a desktop AI assistant. The user speaks naturally (any language). Output EXACTLY one JSON object, one line, no markdown fences, no explanation.
 
311
  - summarize β€” user wants a shorter summary; put source in "text"
312
  - reformulate β€” rewrite/clarify/professional tone; source in "text"
313
  - grounded β€” answer only from given facts; put QUESTION in "question", FACTS in "context" (if user mixes both in one blob, split sensibly)
314
+ - retrieve β€” search **FAQ / internal knowledge** corpus only; put search query in "text"
315
+ - web_search β€” user wants **live web** facts (news, current events, URLs); put the **search query** in "text" (not for FAQ-only lookup)
316
  - classify β€” show topic-classifier probabilities; put passage in "text"
317
  - similarity β€” cosine similarity between two texts; put "text_a ||| text_b" in "text"
318
  - embedding β€” embedding vector summary for one passage; put passage in "text"
 
327
  Rules:
328
  - Default to "chat" when unsure; copy the entire user message into "text".
329
  - Do not invent facts for "grounded": if no clear facts/context, use "chat" instead.
330
+ - Use **retrieve** for bundled FAQ / help-base search; use **web_search** when the user clearly needs the **public web** (today, external site, breaking news, "google this", etc.).
331
  - Extract minimal "text" for tool intents (do not repeat system chatter)."""
332
 
333
  VALID_INTENTS = frozenset(
 
337
  "reformulate",
338
  "grounded",
339
  "retrieve",
340
+ "web_search",
341
  "classify",
342
  "similarity",
343
  "embedding",
 
358
  "search": "retrieve",
359
  "faq": "retrieve",
360
  "lookup": "retrieve",
361
+ "internet": "web_search",
362
+ "google": "web_search",
363
+ "browse_web": "web_search",
364
  "similar": "similarity",
365
  "cosine": "similarity",
366
  "embed": "embedding",
 
563
  scope_key: str,
564
  ) -> str:
565
  rag_n = len(rag_chunks) if rag_chunks else 0
566
+ g_key, g_cx, _, _ = read_google_cse_settings()
567
+ cse_line = (
568
+ "**on** (`GOOGLE_CSE_API_KEY` + `GOOGLE_CSE_CX`)"
569
+ if g_key and g_cx
570
+ else "**off** (set `GOOGLE_CSE_API_KEY` and `GOOGLE_CSE_CX` for `/web` + routed web search)"
571
+ )
572
  lines = [
573
  "### Status\n",
574
  f"- **Generative:** `{meta_mid}`",
575
  f"- **Encoder:** {meta_encoder}",
576
  f"- **RAG corpus:** {_clip(meta_rag_path or 'β€”', 80)} Β· **chunks:** {rag_n}",
577
  f"- **Memory DB:** `{meta_mem_db or 'off'}` Β· **scope:** `{scope_key}`",
578
+ f"- **Google web search (CSE):** {cse_line}",
579
  ]
580
  return "\n".join(lines)
581
 
 
1571
  return "Usage: `/classify <text>`"
1572
  return _classifier_result_markdown(encoder.classify([rest])[0])
1573
 
1574
+ if cmd in ("/web", "/search_web"):
1575
+ g_key, g_cx, g_num, g_safe = read_google_cse_settings()
1576
+ if not g_key or not g_cx:
1577
+ return (
1578
+ "Web search needs **`GOOGLE_CSE_API_KEY`** (secret) and **`GOOGLE_CSE_CX`** (search engine id) "
1579
+ "in Space settings or local `.env`. See `/status`."
1580
+ )
1581
+ if not rest:
1582
+ return "Usage: `/web <search query>`"
1583
+ try:
1584
+ hits = google_cse_search(rest, api_key=g_key, cx=g_cx, num=g_num, safe=g_safe)
1585
+ except Exception as e:
1586
+ return f"### Web search error\n{_clip(str(e), 1200)}"
1587
+ return format_cse_hits_markdown(hits, for_chat=False)
1588
+
1589
  if cmd == "/retrieve":
1590
  if not encoder or not rag_chunks:
1591
  return "Retrieve needs encoder + FAQ corpus (default on unless `--lm-only` / `--no-rag` / `--no-encoder`)."
 
1785
 
1786
  def main() -> None:
1787
  args = parse_args()
1788
+ _load_dotenv_if_present(_REPO)
1789
+ _gk, _gc, _, _ = read_google_cse_settings()
1790
+ cse_on = bool(_gk and _gc)
1791
  _ensure_gradio_can_reach_localhost()
1792
  try:
1793
  import gradio as gr
 
1851
  init_schema(mem_conn)
1852
  print(f"Memory: scope={args.memory_scope!r} db={mem_path!r}", flush=True)
1853
 
1854
+ if cse_on:
1855
+ print("Google CSE web search: configured (`/web` + smart-route `web_search`)", flush=True)
1856
+
1857
  meta_encoder = encoder_id or "off"
1858
  meta_rag = str(rag_path.resolve()) if rag_path else None
1859
  meta_mem = mem_path
 
1863
  turn_counter = {"n": 0}
1864
  initial_ub_session = {
1865
  "trace": not args.no_trace
1866
+ and (
1867
+ encoder is not None
1868
+ or mem_conn is not None
1869
+ or (rag_chunks is not None)
1870
+ or cse_on
1871
+ ),
1872
  "smart_route": not args.no_smart_route,
1873
  "rag": rag_chunks is not None,
1874
  "scope_key": args.memory_scope,
 
1955
  use_smart = bool(ub_session.get("smart_route")) and not args.no_smart_route
1956
 
1957
  chat_line = msg
1958
+ web_block = ""
1959
+ web_trace = ""
1960
  if use_smart:
1961
  try:
1962
  route = infer_route(
 
1968
  except Exception:
1969
  route = {"intent": "chat", "text": msg, "question": "", "context": ""}
1970
 
1971
+ if route["intent"] == "web_search":
1972
+ g_key, g_cx, g_num, g_safe = read_google_cse_settings()
1973
+ q_web = (route["text"] or msg).strip()
1974
+ web_trace = "web:CSE:cfg"
1975
+ if g_key and g_cx and q_web:
1976
+ try:
1977
+ hits = google_cse_search(
1978
+ q_web,
1979
+ api_key=g_key,
1980
+ cx=g_cx,
1981
+ num=g_num,
1982
+ safe=g_safe,
1983
+ )
1984
+ web_block = format_cse_hits_markdown(hits, for_chat=True)
1985
+ web_trace = f"web:CSE:{len(hits)}"
1986
+ except Exception as ex:
1987
+ web_block = (
1988
+ f"(Google web search failed: {_clip(str(ex), 500)})\n\n"
1989
+ "Answer from general knowledge where appropriate; do not invent URLs or page titles."
1990
+ )
1991
+ web_trace = "web:CSE:err"
1992
+ elif not q_web:
1993
+ web_block = "(Empty web search query. Ask again with a concrete search topic.)"
1994
+ web_trace = "web:CSE:empty"
1995
+ else:
1996
+ web_block = (
1997
+ "(Web search is not configured: set **GOOGLE_CSE_API_KEY** and **GOOGLE_CSE_CX** "
1998
+ "in Hugging Face Space secrets/variables or local `.env`. See `/status`.)"
1999
+ )
2000
+ route = {"intent": "chat", "text": msg, "question": "", "context": ""}
2001
+
2002
  if route["intent"] != "chat":
2003
  tool_reply = run_routed_tool(
2004
  route,
 
2027
  trace: list[str] = []
2028
  extras: list[str] = []
2029
  _append_reply_style_hints(extras, ub_session)
2030
+ if web_trace:
2031
+ trace.append(web_trace)
2032
 
2033
  if encoder:
2034
  probs = encoder.classify([chat_line])[0]
 
2055
  f"\n\n{rag_block}"
2056
  )
2057
 
2058
+ if web_block:
2059
+ extras.append(web_block)
2060
+
2061
  if mem_conn:
2062
  items = list_for_scope(mem_conn, cur_scope)
2063
  if items:
 
2095
  encoder is not None
2096
  or mem_conn is not None
2097
  or effective_rag is not None
2098
+ or bool(web_trace)
2099
  )
2100
  )
2101
  if show_trace_footer and trace:
 
2112
  brain_bits.append("RAG")
2113
  if mem_conn:
2114
  brain_bits.append("memory")
2115
+ if cse_on:
2116
+ brain_bits.append("Google CSE")
2117
  brain_label = "+".join(brain_bits) if brain_bits else "LM only"
2118
 
2119
  _css = """