Add Mermaid flowchart tab, simplify How-it-works, new tools + hard examples
Browse files- New tab: Flowchart — a Mermaid diagram showing the plain-LLM path vs.
the tool-calling agent loop, rendered via Mermaid.js from CDN.
- How it works tab trimmed: short piece-table + 7-line pseudocode up
front; the long system-prompt and per-tool schemas moved into a
collapsible accordion.
- Two new tools: get_time (timezones/cities) and get_exchange_rate
(demo-rate currency conversion) with matching JSON schemas.
- Harder arithmetic examples (powers, percentages, square roots,
mixed-unit multi-step) to make plain-LLM failures more obvious.
- Parse tool-call leaks in three shapes now — JSON, Python
[tool(k=v)] (Gemma 3 symptom), and <function=foo {...}> (Llama).
Gemma was emitting [convert_units(from_unit=celsius, ...)] as final
content; the loop now re-enters and runs it.
- Tightened system prompt to forbid Python/JSON tool-call syntax in the
natural-language reply.
- Custom CSS: gradient title card, colour-coded plain vs. agent panels,
highlighted final-answer box.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
|
@@ -15,6 +15,7 @@ needed on the Space. Set HF_TOKEN as a Space secret.
|
|
| 15 |
|
| 16 |
from __future__ import annotations
|
| 17 |
|
|
|
|
| 18 |
import inspect
|
| 19 |
import json
|
| 20 |
import os
|
|
@@ -47,21 +48,26 @@ SYSTEM_PROMPT = """You are a helpful assistant with access to tools.
|
|
| 47 |
Rules you must follow:
|
| 48 |
1. For ANY arithmetic — even simple multiplication like 5 * 7 — call the
|
| 49 |
`calculate` tool. Never compute numbers in your head.
|
| 50 |
-
2. For real-time facts like weather, call the matching
|
| 51 |
-
Do not guess or say you don't know.
|
| 52 |
3. For unit conversions, call `convert_units` — don't approximate.
|
| 53 |
-
4. For
|
|
|
|
| 54 |
call `search_notes`.
|
| 55 |
-
|
| 56 |
-
|
| 57 |
"5 km/day for a week in miles" → first `convert_units`, then
|
| 58 |
`calculate` to multiply by 7.
|
| 59 |
-
|
| 60 |
write the final answer? Only answer once you have ALL the numbers.
|
| 61 |
-
|
| 62 |
France"), answer directly.
|
| 63 |
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
"""
|
| 66 |
|
| 67 |
|
|
@@ -139,6 +145,63 @@ def search_notes(query: str) -> str:
|
|
| 139 |
return json.dumps({"message": f"No results for '{query}'. Try: {', '.join(list(topics.keys())[:5])}"})
|
| 140 |
|
| 141 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
def define_word(word: str) -> str:
|
| 143 |
definitions = {
|
| 144 |
"overfitting": "When a model learns training data too well (including noise) and performs poorly on new data.",
|
|
@@ -221,6 +284,34 @@ TOOL_SCHEMAS = [
|
|
| 221 |
},
|
| 222 |
},
|
| 223 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
]
|
| 225 |
|
| 226 |
TOOL_FUNCTIONS = {
|
|
@@ -229,6 +320,8 @@ TOOL_FUNCTIONS = {
|
|
| 229 |
"convert_units": convert_units,
|
| 230 |
"search_notes": search_notes,
|
| 231 |
"define_word": define_word,
|
|
|
|
|
|
|
| 232 |
}
|
| 233 |
|
| 234 |
|
|
@@ -281,20 +374,59 @@ def _parse_args(raw):
|
|
| 281 |
return {}
|
| 282 |
|
| 283 |
|
| 284 |
-
# Fallback: some models
|
| 285 |
-
#
|
| 286 |
-
#
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
|
|
|
|
|
|
| 290 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 291 |
|
| 292 |
|
| 293 |
def _extract_leaked_tool_calls(content: str):
|
| 294 |
if not content:
|
| 295 |
return []
|
| 296 |
calls = []
|
| 297 |
-
for m in
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 298 |
name = m.group("name")
|
| 299 |
try:
|
| 300 |
args = json.loads(m.group("args"))
|
|
@@ -302,6 +434,13 @@ def _extract_leaked_tool_calls(content: str):
|
|
| 302 |
continue
|
| 303 |
if name in TOOL_FUNCTIONS:
|
| 304 |
calls.append({"name": name, "arguments": args})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 305 |
return calls
|
| 306 |
|
| 307 |
|
|
@@ -445,78 +584,160 @@ TOOLS_MARKDOWN = "\n\n".join(
|
|
| 445 |
|
| 446 |
|
| 447 |
HOW_IT_WORKS = f"""
|
| 448 |
-
## The
|
| 449 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 450 |
```
|
| 451 |
-
messages = [system prompt, user question]
|
| 452 |
|
| 453 |
-
|
| 454 |
-
|
| 455 |
|
| 456 |
-
if response has NO tool_calls:
|
| 457 |
-
return response as final answer
|
| 458 |
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
append result to messages
|
| 462 |
|
| 463 |
-
|
|
|
|
| 464 |
```
|
| 465 |
|
| 466 |
-
|
| 467 |
-
tools.
|
| 468 |
|
| 469 |
-
|
|
|
|
| 470 |
|
| 471 |
-
## System prompt
|
| 472 |
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 476 |
|
| 477 |
-
---
|
| 478 |
|
| 479 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 480 |
|
| 481 |
-
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 482 |
"""
|
| 483 |
|
| 484 |
|
|
|
|
|
|
|
|
|
|
| 485 |
# --- UI ------------------------------------------------------------------
|
| 486 |
|
| 487 |
EXAMPLES = [
|
|
|
|
| 488 |
"What is 4729 times 8314?",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 489 |
"What's the temperature in Gandhinagar in Fahrenheit?",
|
|
|
|
|
|
|
| 490 |
"How much hotter is Delhi than Bangalore right now, in degrees Celsius?",
|
| 491 |
"If I run 5 km every day for a week, how many miles is that total?",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 492 |
"Which week of CS 203 covered Docker?",
|
| 493 |
"What does 'overfitting' mean?",
|
|
|
|
| 494 |
"What is the capital of France?",
|
| 495 |
]
|
| 496 |
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
|
| 502 |
-
|
| 503 |
-
|
| 504 |
-
Watch what happens on questions the plain LLM can't answer from memory —
|
| 505 |
-
exact arithmetic, real-time weather, course-specific facts.
|
| 506 |
-
|
| 507 |
-
Based on the Week 12 lab for CS 203 at IIT Gandhinagar —
|
| 508 |
-
[colab notebook](https://github.com/nipunbatra/stt-ai-teaching/blob/master/lecture-demos/week12/colab-notebooks/01-agents-from-scratch.ipynb).
|
| 509 |
"""
|
| 510 |
|
| 511 |
-
|
| 512 |
-
|
|
|
|
| 513 |
|
| 514 |
with gr.Tabs():
|
| 515 |
with gr.Tab("Demo"):
|
| 516 |
with gr.Row():
|
| 517 |
question = gr.Textbox(
|
| 518 |
label="Ask something",
|
| 519 |
-
placeholder="e.g. How much
|
| 520 |
lines=2,
|
| 521 |
scale=4,
|
| 522 |
)
|
|
@@ -526,25 +747,34 @@ with gr.Blocks(title="Agent 101", theme=gr.themes.Soft()) as demo:
|
|
| 526 |
value=DEFAULT_MODEL,
|
| 527 |
scale=2,
|
| 528 |
)
|
| 529 |
-
go = gr.Button("Run both", variant="primary")
|
| 530 |
|
| 531 |
with gr.Row():
|
| 532 |
-
with gr.Column():
|
| 533 |
gr.Markdown("### 🚫 Without tools\n_Plain LLM — no calculator, no weather, no notes._")
|
| 534 |
out_plain = gr.Markdown()
|
| 535 |
-
with gr.Column():
|
| 536 |
gr.Markdown("### 🛠️ With tools (agent loop)\n_Same model, but it can call functions._")
|
| 537 |
-
out_final = gr.Markdown()
|
| 538 |
with gr.Accordion("Step-by-step trace", open=True):
|
| 539 |
out_trace = gr.Markdown()
|
| 540 |
|
| 541 |
-
gr.Examples(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 542 |
|
| 543 |
go.click(run_both, inputs=[question, model_choice], outputs=[out_plain, out_trace, out_final])
|
| 544 |
question.submit(run_both, inputs=[question, model_choice], outputs=[out_plain, out_trace, out_final])
|
| 545 |
|
|
|
|
|
|
|
|
|
|
| 546 |
with gr.Tab("How it works"):
|
| 547 |
gr.Markdown(HOW_IT_WORKS)
|
|
|
|
|
|
|
| 548 |
|
| 549 |
if __name__ == "__main__":
|
| 550 |
demo.queue().launch()
|
|
|
|
| 15 |
|
| 16 |
from __future__ import annotations
|
| 17 |
|
| 18 |
+
import datetime as _dt
|
| 19 |
import inspect
|
| 20 |
import json
|
| 21 |
import os
|
|
|
|
| 48 |
Rules you must follow:
|
| 49 |
1. For ANY arithmetic — even simple multiplication like 5 * 7 — call the
|
| 50 |
`calculate` tool. Never compute numbers in your head.
|
| 51 |
+
2. For real-time facts like weather or current time, call the matching
|
| 52 |
+
tool (`get_weather`, `get_time`). Do not guess or say you don't know.
|
| 53 |
3. For unit conversions, call `convert_units` — don't approximate.
|
| 54 |
+
4. For currency conversion, call `get_exchange_rate`.
|
| 55 |
+
5. For questions about the CS 203 course ("what week did we cover X?"),
|
| 56 |
call `search_notes`.
|
| 57 |
+
6. For definitions of CS / ML terms, call `define_word`.
|
| 58 |
+
7. Multi-step questions need multiple tool calls. Example:
|
| 59 |
"5 km/day for a week in miles" → first `convert_units`, then
|
| 60 |
`calculate` to multiply by 7.
|
| 61 |
+
8. After every tool result, decide: do I need another tool, or can I
|
| 62 |
write the final answer? Only answer once you have ALL the numbers.
|
| 63 |
+
9. If the question genuinely doesn't need any tool (e.g. "capital of
|
| 64 |
France"), answer directly.
|
| 65 |
|
| 66 |
+
CRITICAL: when you ARE calling a tool, use the provider's structured
|
| 67 |
+
tool_calls interface — do NOT write tool calls as Python-style text like
|
| 68 |
+
`[get_weather(city="Delhi")]` or as JSON in your reply. If you want to
|
| 69 |
+
call a tool, emit a real tool_call; otherwise write a final natural-
|
| 70 |
+
English answer with the numbers spelled out.
|
| 71 |
"""
|
| 72 |
|
| 73 |
|
|
|
|
| 145 |
return json.dumps({"message": f"No results for '{query}'. Try: {', '.join(list(topics.keys())[:5])}"})
|
| 146 |
|
| 147 |
|
| 148 |
+
def get_time(timezone: str = "UTC") -> str:
|
| 149 |
+
"""Return the current wall-clock time in a named timezone or UTC offset."""
|
| 150 |
+
offsets = {
|
| 151 |
+
"utc": 0, "ist": 5.5, "gmt": 0, "bst": 1,
|
| 152 |
+
"edt": -4, "est": -5, "pdt": -7, "pst": -8,
|
| 153 |
+
"jst": 9, "kst": 9, "cst": 8, "cet": 1, "eet": 2,
|
| 154 |
+
"sgt": 8, "hkt": 8, "aest": 10,
|
| 155 |
+
"gandhinagar": 5.5, "mumbai": 5.5, "delhi": 5.5, "bangalore": 5.5,
|
| 156 |
+
"tokyo": 9, "seoul": 9, "singapore": 8, "london": 0, "paris": 1,
|
| 157 |
+
"new york": -4, "san francisco": -7, "sydney": 10,
|
| 158 |
+
}
|
| 159 |
+
key = timezone.lower().strip()
|
| 160 |
+
offset_h = offsets.get(key)
|
| 161 |
+
if offset_h is None:
|
| 162 |
+
m = re.match(r"^utc([+-])(\d+(?:\.\d+)?)$", key)
|
| 163 |
+
if m:
|
| 164 |
+
offset_h = float(m.group(2)) * (1 if m.group(1) == "+" else -1)
|
| 165 |
+
if offset_h is None:
|
| 166 |
+
return json.dumps({"error": f"Unknown timezone '{timezone}'. Try UTC, IST, JST, 'Tokyo', 'New York', etc."})
|
| 167 |
+
now_utc = _dt.datetime.utcnow()
|
| 168 |
+
local = now_utc + _dt.timedelta(hours=offset_h)
|
| 169 |
+
return json.dumps({
|
| 170 |
+
"timezone": timezone,
|
| 171 |
+
"utc_offset_hours": offset_h,
|
| 172 |
+
"iso": local.strftime("%Y-%m-%d %H:%M:%S"),
|
| 173 |
+
"day_of_week": local.strftime("%A"),
|
| 174 |
+
})
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
def get_exchange_rate(from_currency: str, to_currency: str, amount: float = 1.0) -> str:
|
| 178 |
+
"""Mock currency converter — rates are approximate and fixed for the demo."""
|
| 179 |
+
# Rates expressed as "1 unit of X in USD".
|
| 180 |
+
usd_rates = {
|
| 181 |
+
"usd": 1.0, "eur": 1.08, "gbp": 1.27, "jpy": 0.0067,
|
| 182 |
+
"inr": 0.012, "cny": 0.14, "aud": 0.66, "cad": 0.73,
|
| 183 |
+
"sgd": 0.74, "chf": 1.13, "krw": 0.00075,
|
| 184 |
+
}
|
| 185 |
+
try:
|
| 186 |
+
amount = float(amount)
|
| 187 |
+
except (TypeError, ValueError):
|
| 188 |
+
return json.dumps({"error": f"amount '{amount}' is not a number"})
|
| 189 |
+
f = str(from_currency).lower().strip()
|
| 190 |
+
t = str(to_currency).lower().strip()
|
| 191 |
+
if f not in usd_rates or t not in usd_rates:
|
| 192 |
+
return json.dumps({
|
| 193 |
+
"error": f"Unsupported currency pair {from_currency}->{to_currency}",
|
| 194 |
+
"supported": sorted(usd_rates.keys()),
|
| 195 |
+
})
|
| 196 |
+
usd = amount * usd_rates[f]
|
| 197 |
+
converted = usd / usd_rates[t]
|
| 198 |
+
return json.dumps({
|
| 199 |
+
"from": f.upper(), "to": t.upper(),
|
| 200 |
+
"amount": amount, "converted": round(converted, 2),
|
| 201 |
+
"note": "rates are approximate demo values, not live market data",
|
| 202 |
+
})
|
| 203 |
+
|
| 204 |
+
|
| 205 |
def define_word(word: str) -> str:
|
| 206 |
definitions = {
|
| 207 |
"overfitting": "When a model learns training data too well (including noise) and performs poorly on new data.",
|
|
|
|
| 284 |
},
|
| 285 |
},
|
| 286 |
},
|
| 287 |
+
{
|
| 288 |
+
"type": "function",
|
| 289 |
+
"function": {
|
| 290 |
+
"name": "get_time",
|
| 291 |
+
"description": "Get the current wall-clock time in a given timezone or city. Accepts UTC, IST, JST, EST, PDT, 'Tokyo', 'New York', etc.",
|
| 292 |
+
"parameters": {
|
| 293 |
+
"type": "object",
|
| 294 |
+
"properties": {"timezone": {"type": "string", "description": "Timezone code or city, e.g. 'IST' or 'Tokyo'"}},
|
| 295 |
+
"required": ["timezone"],
|
| 296 |
+
},
|
| 297 |
+
},
|
| 298 |
+
},
|
| 299 |
+
{
|
| 300 |
+
"type": "function",
|
| 301 |
+
"function": {
|
| 302 |
+
"name": "get_exchange_rate",
|
| 303 |
+
"description": "Convert a currency amount using fixed demo rates. Supports USD, EUR, GBP, JPY, INR, CNY, AUD, CAD, SGD, CHF, KRW.",
|
| 304 |
+
"parameters": {
|
| 305 |
+
"type": "object",
|
| 306 |
+
"properties": {
|
| 307 |
+
"from_currency": {"type": "string", "description": "Source ISO code, e.g. 'USD'"},
|
| 308 |
+
"to_currency": {"type": "string", "description": "Target ISO code, e.g. 'INR'"},
|
| 309 |
+
"amount": {"type": "number", "description": "Amount to convert"},
|
| 310 |
+
},
|
| 311 |
+
"required": ["from_currency", "to_currency", "amount"],
|
| 312 |
+
},
|
| 313 |
+
},
|
| 314 |
+
},
|
| 315 |
]
|
| 316 |
|
| 317 |
TOOL_FUNCTIONS = {
|
|
|
|
| 320 |
"convert_units": convert_units,
|
| 321 |
"search_notes": search_notes,
|
| 322 |
"define_word": define_word,
|
| 323 |
+
"get_time": get_time,
|
| 324 |
+
"get_exchange_rate": get_exchange_rate,
|
| 325 |
}
|
| 326 |
|
| 327 |
|
|
|
|
| 374 |
return {}
|
| 375 |
|
| 376 |
|
| 377 |
+
# Fallback: some models emit tool calls as plain text instead of as
|
| 378 |
+
# structured `tool_calls`. We scrape a few common shapes so the loop
|
| 379 |
+
# doesn't silently give up:
|
| 380 |
+
# - JSON : {"name": "foo", "arguments": {...}}
|
| 381 |
+
# - Python : [foo(k=v, k="v")] (Gemma 3)
|
| 382 |
+
# - Llama : <function=foo {"k": "v"}>
|
| 383 |
+
_JSON_LEAK_RE = re.compile(
|
| 384 |
+
r'\{[^{}]*"name"\s*:\s*"(?P<name>[a-zA-Z_]+)"[^{}]*"arguments"\s*:\s*(?P<args>\{[^{}]*\})[^{}]*\}'
|
| 385 |
)
|
| 386 |
+
_PY_LEAK_RE = re.compile(r"\[?\s*(?P<name>[a-zA-Z_]\w*)\s*\((?P<args>[^)]*)\)\s*\]?")
|
| 387 |
+
_LLAMA_LEAK_RE = re.compile(r"<function=(?P<name>[a-zA-Z_]\w*)\s*(?P<args>\{.*?\})\s*>", re.DOTALL)
|
| 388 |
+
|
| 389 |
+
|
| 390 |
+
def _coerce_scalar(s: str):
|
| 391 |
+
s = s.strip()
|
| 392 |
+
if not s:
|
| 393 |
+
return s
|
| 394 |
+
if (s[0] == s[-1]) and s[0] in {'"', "'"}:
|
| 395 |
+
return s[1:-1]
|
| 396 |
+
try:
|
| 397 |
+
if "." in s:
|
| 398 |
+
return float(s)
|
| 399 |
+
return int(s)
|
| 400 |
+
except ValueError:
|
| 401 |
+
return s
|
| 402 |
+
|
| 403 |
+
|
| 404 |
+
def _parse_py_args(arg_str: str) -> dict:
|
| 405 |
+
"""Parse a loose k=v, k=v string used in Python-like tool-call leaks."""
|
| 406 |
+
args = {}
|
| 407 |
+
# Split on commas that are not inside quotes.
|
| 408 |
+
parts = re.findall(r'(?:[^,"\']|"[^"]*"|\'[^\']*\')+', arg_str)
|
| 409 |
+
for part in parts:
|
| 410 |
+
if "=" not in part:
|
| 411 |
+
continue
|
| 412 |
+
k, v = part.split("=", 1)
|
| 413 |
+
args[k.strip()] = _coerce_scalar(v)
|
| 414 |
+
return args
|
| 415 |
|
| 416 |
|
| 417 |
def _extract_leaked_tool_calls(content: str):
|
| 418 |
if not content:
|
| 419 |
return []
|
| 420 |
calls = []
|
| 421 |
+
for m in _JSON_LEAK_RE.finditer(content):
|
| 422 |
+
name = m.group("name")
|
| 423 |
+
try:
|
| 424 |
+
args = json.loads(m.group("args"))
|
| 425 |
+
except Exception:
|
| 426 |
+
continue
|
| 427 |
+
if name in TOOL_FUNCTIONS:
|
| 428 |
+
calls.append({"name": name, "arguments": args})
|
| 429 |
+
for m in _LLAMA_LEAK_RE.finditer(content):
|
| 430 |
name = m.group("name")
|
| 431 |
try:
|
| 432 |
args = json.loads(m.group("args"))
|
|
|
|
| 434 |
continue
|
| 435 |
if name in TOOL_FUNCTIONS:
|
| 436 |
calls.append({"name": name, "arguments": args})
|
| 437 |
+
if not calls:
|
| 438 |
+
for m in _PY_LEAK_RE.finditer(content):
|
| 439 |
+
name = m.group("name")
|
| 440 |
+
if name not in TOOL_FUNCTIONS:
|
| 441 |
+
continue
|
| 442 |
+
args = _parse_py_args(m.group("args"))
|
| 443 |
+
calls.append({"name": name, "arguments": args})
|
| 444 |
return calls
|
| 445 |
|
| 446 |
|
|
|
|
| 584 |
|
| 585 |
|
| 586 |
HOW_IT_WORKS = f"""
|
| 587 |
+
## The three pieces of an agent
|
| 588 |
+
|
| 589 |
+
| Piece | What it does |
|
| 590 |
+
|:--|:--|
|
| 591 |
+
| **LLM** | Decides what to do next |
|
| 592 |
+
| **Tools** | Python functions the LLM can call |
|
| 593 |
+
| **Loop** | Keep asking the LLM until it's done |
|
| 594 |
+
|
| 595 |
+
## Pseudocode
|
| 596 |
+
|
| 597 |
+
```python
|
| 598 |
+
while True:
|
| 599 |
+
response = llm.chat(messages, tools=schemas)
|
| 600 |
+
if not response.tool_calls:
|
| 601 |
+
return response.text # done
|
| 602 |
+
for call in response.tool_calls:
|
| 603 |
+
result = TOOLS[call.name](**call.args)
|
| 604 |
+
messages.append(result) # feed back, loop again
|
| 605 |
```
|
|
|
|
| 606 |
|
| 607 |
+
The LLM never runs code. It just *names* a tool and *your* code runs it.
|
| 608 |
+
"""
|
| 609 |
|
|
|
|
|
|
|
| 610 |
|
| 611 |
+
TOOLS_DETAIL = f"""
|
| 612 |
+
## System prompt
|
|
|
|
| 613 |
|
| 614 |
+
```text
|
| 615 |
+
{SYSTEM_PROMPT}
|
| 616 |
```
|
| 617 |
|
| 618 |
+
## Tools the agent has
|
|
|
|
| 619 |
|
| 620 |
+
{TOOLS_MARKDOWN}
|
| 621 |
+
"""
|
| 622 |
|
|
|
|
| 623 |
|
| 624 |
+
# Mermaid flowchart — rendered inside a raw HTML block with CDN Mermaid.
|
| 625 |
+
FLOWCHART_HTML = """
|
| 626 |
+
<div style="padding: 1rem 0;">
|
| 627 |
+
<h2>Without tools vs. with tools</h2>
|
| 628 |
+
<p>Same model on both sides. The only difference is the loop on the right.</p>
|
| 629 |
+
|
| 630 |
+
<div class="mermaid">
|
| 631 |
+
flowchart TB
|
| 632 |
+
subgraph PLAIN["🚫 Without tools"]
|
| 633 |
+
direction TB
|
| 634 |
+
U1["User question"] --> L1["LLM generates<br/>text answer"]
|
| 635 |
+
L1 --> A1["Final answer<br/>(may hallucinate,<br/>can't use real data)"]
|
| 636 |
+
end
|
| 637 |
+
|
| 638 |
+
subgraph AGENT["🛠️ With tools (agent loop)"]
|
| 639 |
+
direction TB
|
| 640 |
+
U2["User question"] --> L2["LLM sees question<br/>+ tool menu"]
|
| 641 |
+
L2 --> D{"Tool call<br/>needed?"}
|
| 642 |
+
D -- "No" --> F["Final answer<br/>(grounded in<br/>tool results)"]
|
| 643 |
+
D -- "Yes" --> T["Run tool<br/>in your code"]
|
| 644 |
+
T --> R["Append result<br/>to messages"]
|
| 645 |
+
R --> L2
|
| 646 |
+
end
|
| 647 |
+
|
| 648 |
+
classDef plain fill:#fdecea,stroke:#c0392b,color:#2c1810;
|
| 649 |
+
classDef agent fill:#e8f5e9,stroke:#1e8449,color:#0b2e13;
|
| 650 |
+
class PLAIN plain
|
| 651 |
+
class AGENT agent
|
| 652 |
+
</div>
|
| 653 |
+
|
| 654 |
+
<script type="module">
|
| 655 |
+
import mermaid from "https://cdn.jsdelivr.net/npm/mermaid@11/dist/mermaid.esm.min.mjs";
|
| 656 |
+
mermaid.initialize({ startOnLoad: true, theme: "default", securityLevel: "loose" });
|
| 657 |
+
// re-run when Gradio swaps tabs
|
| 658 |
+
const obs = new MutationObserver(() => mermaid.run());
|
| 659 |
+
obs.observe(document.body, { childList: true, subtree: true });
|
| 660 |
+
</script>
|
| 661 |
+
</div>
|
| 662 |
+
"""
|
| 663 |
|
|
|
|
| 664 |
|
| 665 |
+
CUSTOM_CSS = """
|
| 666 |
+
#title-card {
|
| 667 |
+
background: linear-gradient(135deg, #4f46e5 0%, #ec4899 100%);
|
| 668 |
+
color: white;
|
| 669 |
+
padding: 1.25rem 1.5rem;
|
| 670 |
+
border-radius: 16px;
|
| 671 |
+
margin-bottom: 1rem;
|
| 672 |
+
}
|
| 673 |
+
#title-card h1 { color: white; margin: 0 0 .25rem 0; font-size: 1.8rem; }
|
| 674 |
+
#title-card p { color: rgba(255,255,255,.92); margin: 0; font-size: .95rem; }
|
| 675 |
|
| 676 |
+
.panel {
|
| 677 |
+
border-radius: 14px;
|
| 678 |
+
padding: 1rem 1.1rem;
|
| 679 |
+
min-height: 220px;
|
| 680 |
+
}
|
| 681 |
+
.panel-plain { background: #fdecea; border: 1px solid #f5b7b1; }
|
| 682 |
+
.panel-agent { background: #e8f5e9; border: 1px solid #a9dfbf; }
|
| 683 |
+
.panel h3 { margin-top: 0 !important; }
|
| 684 |
+
|
| 685 |
+
.final-box {
|
| 686 |
+
background: white;
|
| 687 |
+
border-left: 4px solid #1e8449;
|
| 688 |
+
padding: .75rem 1rem;
|
| 689 |
+
border-radius: 8px;
|
| 690 |
+
margin-bottom: .75rem;
|
| 691 |
+
}
|
| 692 |
"""
|
| 693 |
|
| 694 |
|
| 695 |
+
# --- UI ------------------------------------------------------------------
|
| 696 |
+
|
| 697 |
+
|
| 698 |
# --- UI ------------------------------------------------------------------
|
| 699 |
|
| 700 |
EXAMPLES = [
|
| 701 |
+
# --- Hard arithmetic — plain LLMs routinely get these wrong ---
|
| 702 |
"What is 4729 times 8314?",
|
| 703 |
+
"Compute (127 ** 3) + (49 ** 4).",
|
| 704 |
+
"What is 31.5% of 128,400?",
|
| 705 |
+
"What is the square root of 987654, to 4 decimals?",
|
| 706 |
+
"Evaluate 2847 * 9183 - 17^5.",
|
| 707 |
+
# --- Single-tool: real-time data ---
|
| 708 |
"What's the temperature in Gandhinagar in Fahrenheit?",
|
| 709 |
+
"What time is it in Tokyo right now?",
|
| 710 |
+
# --- Multi-tool chains ---
|
| 711 |
"How much hotter is Delhi than Bangalore right now, in degrees Celsius?",
|
| 712 |
"If I run 5 km every day for a week, how many miles is that total?",
|
| 713 |
+
"I have 1000 USD. How much is that in INR, and what time is it in Delhi?",
|
| 714 |
+
"If Paris is 18 C and New York is 22 C, what's the average in Fahrenheit?",
|
| 715 |
+
"I earn 2500 EUR/month. After converting to INR, what is my annual salary?",
|
| 716 |
+
# --- Course knowledge base ---
|
| 717 |
"Which week of CS 203 covered Docker?",
|
| 718 |
"What does 'overfitting' mean?",
|
| 719 |
+
# --- No-tool question (sanity check) ---
|
| 720 |
"What is the capital of France?",
|
| 721 |
]
|
| 722 |
|
| 723 |
+
TITLE_CARD = """
|
| 724 |
+
<div id="title-card">
|
| 725 |
+
<h1>🤖 Agent 101 — LLM vs. LLM-with-tools</h1>
|
| 726 |
+
<p>Same model both sides. Left: plain LLM. Right: the same model with a toolkit and an agent loop. Watch what happens on questions the LLM can't answer from memory.</p>
|
| 727 |
+
<p style="margin-top:.4rem;font-size:.85rem;">Built for CS 203 at IIT Gandhinagar · <a href="https://github.com/nipunbatra/stt-ai-teaching/blob/master/lecture-demos/week12/colab-notebooks/01-agents-from-scratch.ipynb" style="color:white;text-decoration:underline;">companion Colab</a></p>
|
| 728 |
+
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 729 |
"""
|
| 730 |
|
| 731 |
+
|
| 732 |
+
with gr.Blocks(title="Agent 101", theme=gr.themes.Soft(), css=CUSTOM_CSS) as demo:
|
| 733 |
+
gr.HTML(TITLE_CARD)
|
| 734 |
|
| 735 |
with gr.Tabs():
|
| 736 |
with gr.Tab("Demo"):
|
| 737 |
with gr.Row():
|
| 738 |
question = gr.Textbox(
|
| 739 |
label="Ask something",
|
| 740 |
+
placeholder="e.g. I have 1000 USD. How much is that in INR, and what time is it in Delhi?",
|
| 741 |
lines=2,
|
| 742 |
scale=4,
|
| 743 |
)
|
|
|
|
| 747 |
value=DEFAULT_MODEL,
|
| 748 |
scale=2,
|
| 749 |
)
|
| 750 |
+
go = gr.Button("Run both", variant="primary", size="lg")
|
| 751 |
|
| 752 |
with gr.Row():
|
| 753 |
+
with gr.Column(elem_classes="panel panel-plain"):
|
| 754 |
gr.Markdown("### 🚫 Without tools\n_Plain LLM — no calculator, no weather, no notes._")
|
| 755 |
out_plain = gr.Markdown()
|
| 756 |
+
with gr.Column(elem_classes="panel panel-agent"):
|
| 757 |
gr.Markdown("### 🛠️ With tools (agent loop)\n_Same model, but it can call functions._")
|
| 758 |
+
out_final = gr.Markdown(elem_classes="final-box")
|
| 759 |
with gr.Accordion("Step-by-step trace", open=True):
|
| 760 |
out_trace = gr.Markdown()
|
| 761 |
|
| 762 |
+
gr.Examples(
|
| 763 |
+
examples=EXAMPLES,
|
| 764 |
+
inputs=question,
|
| 765 |
+
label="Click an example to try it",
|
| 766 |
+
)
|
| 767 |
|
| 768 |
go.click(run_both, inputs=[question, model_choice], outputs=[out_plain, out_trace, out_final])
|
| 769 |
question.submit(run_both, inputs=[question, model_choice], outputs=[out_plain, out_trace, out_final])
|
| 770 |
|
| 771 |
+
with gr.Tab("Flowchart"):
|
| 772 |
+
gr.HTML(FLOWCHART_HTML)
|
| 773 |
+
|
| 774 |
with gr.Tab("How it works"):
|
| 775 |
gr.Markdown(HOW_IT_WORKS)
|
| 776 |
+
with gr.Accordion("System prompt + tool definitions", open=False):
|
| 777 |
+
gr.Markdown(TOOLS_DETAIL)
|
| 778 |
|
| 779 |
if __name__ == "__main__":
|
| 780 |
demo.queue().launch()
|