moazeldegwy commited on
Commit
6cdcdeb
·
1 Parent(s): c504fac

Phase 3: SQLite long-term memory + KnowledgeAgent seam

Browse files

Adds the three-tier memory taxonomy from the 2026 agent-memory literature
(working / episodic / semantic / procedural) backed by stdlib sqlite3 -
zero new deps. Mirrors Mem0's interface so a future swap is one-line.

memory.py
* LongTermMemory(db_path=None) -- in-memory by default, file for persistence.
* Three tables, all keyed by user_id:
- semantic_facts(fact_type, content, source) -- likes/dislikes/allergies
- procedural_records(plan_summary, verdict, issues_json) -- validator history
- episodic_sessions(session_id, payload_json) -- full snapshot for replay
* Filtered recall by fact_type / substring / limit; forget_fact() to support
user-driven correction.

knowledge.py
* KnowledgeAgent.handle_task(task, memory) returns JSON {answer, citations}.
* Default backing is WebSearchTool with per-kind query biasing toward
authoritative domains (USDA FDC for nutrition, WHO/diabetes.org/EFSA/NICE
for guidelines, MedlinePlus/FDA/NIH for drug interactions).
* Always emits a citations list; if empty, appends an "advisory only" note
so the Validator can flag uncited clinical claims.
* Designed as a SEAM: a future phase swaps WebSearch for a real RAG index
over USDA + WHO/ADA/EFSA PDFs without changing the agent's call sites.

nutritionmas.py
* AGENTS dict now exposes 'KnowledgeAgent' so the Coach can call_agent it
for any "what does the literature say about X" question.
* New initialize_long_term_memory(db_path) -> LongTermMemory; module-level
singleton LONG_TERM_MEMORY for the Gradio app (Phase 7) to wire into
per-user sessions.

tests/test_memory.py (9 tests)
* Round-trip for all three tables, user isolation, filter-by-type and
filter-by-substring, recall ordering+limit, forget_fact.
* KnowledgeAgent: extracts USDA URL into citations; appends advisory note
when no URL is present.

46/46 tests green.

Note: full RAG ingestion of USDA FDC + WHO PDFs is intentionally deferred
- it's a large data-engineering task, and the seam is what unblocks it.

Files changed (4) hide show
  1. knowledge.py +127 -0
  2. memory.py +198 -0
  3. nutritionmas.py +24 -0
  4. tests/test_memory.py +115 -0
knowledge.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """KnowledgeAgent — citation-first retrieval over authoritative sources.
2
+
3
+ Phase 3 wires the *interface* and a default WebSearch-backed implementation.
4
+ Full RAG over USDA FoodData Central and WHO/ADA/EFSA PDFs is intentionally
5
+ left as a follow-up (it requires bulk data ingestion + an embedding store).
6
+ The seam is here so a later phase can drop in a real index without changing
7
+ the agents that call it.
8
+
9
+ The contract is: every query returns a synthesised answer **with at least
10
+ one citation**. The Validator will reject medical recommendations that lack
11
+ a citation, so this agent is the safety story for clinical content.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import json
17
+ from datetime import datetime
18
+ from typing import Any, Dict, List, Optional
19
+
20
+ from logging_setup import get_logger
21
+ from utils import save_to_json
22
+
23
+ _logger = get_logger("agents.knowledge")
24
+
25
+
26
+ class KnowledgeAgent:
27
+ """Default Knowledge implementation backed by WebSearchTool.
28
+
29
+ Future drop-in: replace ``self.web`` with a RAG retriever that walks an
30
+ embedded USDA/WHO/ADA/EFSA index and returns citation tuples.
31
+ """
32
+
33
+ SUPPORTED_KINDS = {"nutrition", "guideline", "drug_interaction", "general"}
34
+
35
+ def __init__(self, web_search_tool, llm_instance: Optional[Any] = None) -> None:
36
+ self.web = web_search_tool
37
+ self.llm = llm_instance # reserved for the RAG-backed variant
38
+
39
+ def handle_task(self, task: str, memory: Dict[str, Any]) -> str: # noqa: ARG002
40
+ """Answer ``task`` and return JSON ``{answer, citations}``.
41
+
42
+ ``task`` is a free-text question. Optional structured form:
43
+ ``{"kind": "nutrition" | "guideline" | ...,
44
+ "query": "...",
45
+ "context": "..."}``
46
+ """
47
+ kind, query, context = self._parse_task(task)
48
+ _logger.info("📚 KNOWLEDGE: kind=%s query=%r", kind, query[:80])
49
+
50
+ # Bias the query toward citation-rich sources.
51
+ biased_query = self._bias_query(kind, query)
52
+ web_answer = self.web.handle_task(biased_query)
53
+
54
+ citations = self._extract_citations(web_answer)
55
+ answer = web_answer # WebSearch already synthesises; we just append a citation note.
56
+ if not citations:
57
+ answer += (
58
+ "\n\n[Note] This answer comes from a generalist web search; "
59
+ "no authoritative clinical citation was found. Treat as advisory only."
60
+ )
61
+
62
+ payload = {"kind": kind, "answer": answer, "citations": citations}
63
+ save_to_json(
64
+ {
65
+ "task": task,
66
+ "kind": kind,
67
+ "query": query,
68
+ "context": context,
69
+ "biased_query": biased_query,
70
+ "answer": answer,
71
+ "citations": citations,
72
+ "timestamp": datetime.now().isoformat(),
73
+ },
74
+ f"knowledge_{datetime.now().isoformat()}.json",
75
+ subdirectory="KnowledgeAgent",
76
+ )
77
+ return json.dumps(payload)
78
+
79
+ # ------------------------------------------------------------------
80
+ @staticmethod
81
+ def _parse_task(task: str) -> tuple[str, str, str]:
82
+ try:
83
+ data = json.loads(task)
84
+ if isinstance(data, dict):
85
+ kind = data.get("kind", "general")
86
+ if kind not in KnowledgeAgent.SUPPORTED_KINDS:
87
+ kind = "general"
88
+ return kind, data.get("query", ""), data.get("context", "")
89
+ except (json.JSONDecodeError, TypeError):
90
+ pass
91
+ return "general", task, ""
92
+
93
+ @staticmethod
94
+ def _bias_query(kind: str, query: str) -> str:
95
+ """Steer the search toward authoritative domains per kind."""
96
+ if kind == "nutrition":
97
+ return (
98
+ f"{query} site:fdc.nal.usda.gov OR site:nutritionsource.hsph.harvard.edu "
99
+ "OR site:who.int"
100
+ )
101
+ if kind == "guideline":
102
+ return (
103
+ f"{query} site:who.int OR site:diabetes.org OR site:efsa.europa.eu "
104
+ "OR site:nice.org.uk"
105
+ )
106
+ if kind == "drug_interaction":
107
+ return f"{query} site:medlineplus.gov OR site:fda.gov OR site:nih.gov"
108
+ return query
109
+
110
+ @staticmethod
111
+ def _extract_citations(text: str) -> List[str]:
112
+ """Pull URL-looking tokens out of the synthesised answer."""
113
+ import re
114
+
115
+ urls = re.findall(r"https?://[^\s)\]]+", text)
116
+ # De-duplicate while preserving order.
117
+ seen = set()
118
+ out: List[str] = []
119
+ for u in urls:
120
+ u_clean = u.rstrip(".,);")
121
+ if u_clean not in seen:
122
+ seen.add(u_clean)
123
+ out.append(u_clean)
124
+ return out
125
+
126
+
127
+ __all__ = ["KnowledgeAgent"]
memory.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Long-term memory layer (semantic / procedural / episodic).
2
+
3
+ Phase 3 deliberately uses stdlib ``sqlite3`` rather than Mem0 / Letta /
4
+ sqlite-vec so the demo ships with zero extra dependencies. The interface,
5
+ however, mirrors the modern three-tier taxonomy from the 2026 agent-memory
6
+ literature so a later phase can swap the backend without touching call sites.
7
+
8
+ Tiers
9
+ -----
10
+ * **Working** — kept in the LangGraph state (untouched by this module).
11
+ * **Semantic** — atomic facts about the user (likes, dislikes, hard
12
+ constraints, lab results). Survives across sessions.
13
+ * **Procedural** — verdicts the validator produced. Lets the system learn
14
+ "this user rejected high-carb breakfasts twice" without re-asking.
15
+ * **Episodic** — JSON snapshot of past sessions for replay / audit.
16
+
17
+ Schema is intentionally tiny — three tables, one row per fact / verdict /
18
+ session. Vector search is *not* needed for this demo; SQL ``LIKE`` over
19
+ short text is good enough and adds zero dependencies. Phase 6 evals will
20
+ make the case for upgrading.
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import json
26
+ import sqlite3
27
+ import threading
28
+ from datetime import datetime
29
+ from typing import Any, Dict, List, Optional
30
+
31
+
32
+ _SCHEMA = """
33
+ CREATE TABLE IF NOT EXISTS semantic_facts (
34
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
35
+ user_id TEXT NOT NULL,
36
+ fact_type TEXT NOT NULL, -- e.g. 'dislike', 'allergy', 'preference'
37
+ content TEXT NOT NULL,
38
+ source TEXT NOT NULL DEFAULT '', -- e.g. 'user_stated', 'inferred', 'validator'
39
+ created_at TEXT NOT NULL
40
+ );
41
+
42
+ CREATE INDEX IF NOT EXISTS idx_facts_user ON semantic_facts(user_id, fact_type);
43
+
44
+ CREATE TABLE IF NOT EXISTS procedural_records (
45
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
46
+ user_id TEXT NOT NULL,
47
+ plan_summary TEXT NOT NULL,
48
+ verdict TEXT NOT NULL, -- 'pass' | 'revise' | 'reject'
49
+ issues_json TEXT NOT NULL, -- JSON list of ValidationIssue
50
+ created_at TEXT NOT NULL
51
+ );
52
+
53
+ CREATE INDEX IF NOT EXISTS idx_proc_user ON procedural_records(user_id, created_at);
54
+
55
+ CREATE TABLE IF NOT EXISTS episodic_sessions (
56
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
57
+ user_id TEXT NOT NULL,
58
+ session_id TEXT NOT NULL,
59
+ payload_json TEXT NOT NULL, -- JSON snapshot of session state
60
+ created_at TEXT NOT NULL
61
+ );
62
+
63
+ CREATE INDEX IF NOT EXISTS idx_episodic_user ON episodic_sessions(user_id, created_at);
64
+ """
65
+
66
+
67
+ class LongTermMemory:
68
+ """SQLite-backed three-tier long-term memory.
69
+
70
+ Pass a file path for persistence across runs, or ``None`` (default) for an
71
+ in-memory database useful in tests / ephemeral demos.
72
+ """
73
+
74
+ def __init__(self, db_path: Optional[str] = None) -> None:
75
+ self.db_path = db_path or ":memory:"
76
+ # SQLite connections are not thread-safe by default; one connection per
77
+ # thread is the standard pattern. The demo is single-process so a single
78
+ # connection + lock is enough.
79
+ self.conn = sqlite3.connect(self.db_path, check_same_thread=False)
80
+ self.conn.row_factory = sqlite3.Row
81
+ self._lock = threading.Lock()
82
+ self._init_schema()
83
+
84
+ def _init_schema(self) -> None:
85
+ with self._lock:
86
+ self.conn.executescript(_SCHEMA)
87
+ self.conn.commit()
88
+
89
+ def close(self) -> None:
90
+ with self._lock:
91
+ self.conn.close()
92
+
93
+ # ------------------------------------------------------------------
94
+ # Semantic facts
95
+ # ------------------------------------------------------------------
96
+ def remember_fact(
97
+ self,
98
+ user_id: str,
99
+ fact_type: str,
100
+ content: str,
101
+ source: str = "user_stated",
102
+ ) -> int:
103
+ """Insert a semantic fact. Returns the row id."""
104
+ now = datetime.utcnow().isoformat()
105
+ with self._lock:
106
+ cur = self.conn.execute(
107
+ "INSERT INTO semantic_facts (user_id, fact_type, content, source, created_at) "
108
+ "VALUES (?, ?, ?, ?, ?)",
109
+ (user_id, fact_type, content, source, now),
110
+ )
111
+ self.conn.commit()
112
+ return int(cur.lastrowid or 0)
113
+
114
+ def recall_facts(
115
+ self,
116
+ user_id: str,
117
+ fact_type: Optional[str] = None,
118
+ contains: Optional[str] = None,
119
+ limit: int = 50,
120
+ ) -> List[Dict[str, Any]]:
121
+ """List facts for a user, optionally filtered by type / substring."""
122
+ sql = "SELECT * FROM semantic_facts WHERE user_id = ?"
123
+ params: List[Any] = [user_id]
124
+ if fact_type:
125
+ sql += " AND fact_type = ?"
126
+ params.append(fact_type)
127
+ if contains:
128
+ sql += " AND content LIKE ?"
129
+ params.append(f"%{contains}%")
130
+ sql += " ORDER BY created_at DESC LIMIT ?"
131
+ params.append(limit)
132
+ with self._lock:
133
+ cur = self.conn.execute(sql, params)
134
+ return [dict(row) for row in cur.fetchall()]
135
+
136
+ def forget_fact(self, fact_id: int) -> None:
137
+ with self._lock:
138
+ self.conn.execute("DELETE FROM semantic_facts WHERE id = ?", (fact_id,))
139
+ self.conn.commit()
140
+
141
+ # ------------------------------------------------------------------
142
+ # Procedural records (validator history)
143
+ # ------------------------------------------------------------------
144
+ def remember_validation(
145
+ self,
146
+ user_id: str,
147
+ plan_summary: str,
148
+ verdict: str,
149
+ issues: List[Dict[str, Any]],
150
+ ) -> int:
151
+ now = datetime.utcnow().isoformat()
152
+ with self._lock:
153
+ cur = self.conn.execute(
154
+ "INSERT INTO procedural_records (user_id, plan_summary, verdict, issues_json, created_at) "
155
+ "VALUES (?, ?, ?, ?, ?)",
156
+ (user_id, plan_summary, verdict, json.dumps(issues), now),
157
+ )
158
+ self.conn.commit()
159
+ return int(cur.lastrowid or 0)
160
+
161
+ def recall_validations(self, user_id: str, limit: int = 10) -> List[Dict[str, Any]]:
162
+ with self._lock:
163
+ cur = self.conn.execute(
164
+ "SELECT * FROM procedural_records WHERE user_id = ? ORDER BY created_at DESC LIMIT ?",
165
+ (user_id, limit),
166
+ )
167
+ return [
168
+ {**dict(row), "issues": json.loads(row["issues_json"])}
169
+ for row in cur.fetchall()
170
+ ]
171
+
172
+ # ------------------------------------------------------------------
173
+ # Episodic sessions
174
+ # ------------------------------------------------------------------
175
+ def remember_session(self, user_id: str, session_id: str, payload: Dict[str, Any]) -> int:
176
+ now = datetime.utcnow().isoformat()
177
+ with self._lock:
178
+ cur = self.conn.execute(
179
+ "INSERT INTO episodic_sessions (user_id, session_id, payload_json, created_at) "
180
+ "VALUES (?, ?, ?, ?)",
181
+ (user_id, session_id, json.dumps(payload, default=str), now),
182
+ )
183
+ self.conn.commit()
184
+ return int(cur.lastrowid or 0)
185
+
186
+ def recall_sessions(self, user_id: str, limit: int = 5) -> List[Dict[str, Any]]:
187
+ with self._lock:
188
+ cur = self.conn.execute(
189
+ "SELECT * FROM episodic_sessions WHERE user_id = ? ORDER BY created_at DESC LIMIT ?",
190
+ (user_id, limit),
191
+ )
192
+ return [
193
+ {**dict(row), "payload": json.loads(row["payload_json"])}
194
+ for row in cur.fetchall()
195
+ ]
196
+
197
+
198
+ __all__ = ["LongTermMemory"]
nutritionmas.py CHANGED
@@ -7,7 +7,9 @@ from IPython.display import Markdown, display
7
 
8
  from agents import CoachAgent, MedicalAssessmentAgent, PlannerAgent
9
  from config import set_settings
 
10
  from logging_setup import get_logger, refresh_level
 
11
  from state import initialize_empty_memory
12
  from tools import ComputationTool, QuantitiesFinder, WebSearchTool
13
  from utils import APIPoolManager, create_llm
@@ -187,8 +189,30 @@ def initialize_agents():
187
  PLANNER_LLM, TOOLS["ComputationTool"], TOOLS["WebSearchTool"], TOOLS["QuantitiesFinder"]
188
  ),
189
  "ValidationAgent": ValidationAgent(VALIDATION_LLM),
 
 
 
 
190
  }
191
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  def setup_workflow():
193
  global APP
194
  if AGENTS is None or TOOLS is None:
 
7
 
8
  from agents import CoachAgent, MedicalAssessmentAgent, PlannerAgent
9
  from config import set_settings
10
+ from knowledge import KnowledgeAgent
11
  from logging_setup import get_logger, refresh_level
12
+ from memory import LongTermMemory
13
  from state import initialize_empty_memory
14
  from tools import ComputationTool, QuantitiesFinder, WebSearchTool
15
  from utils import APIPoolManager, create_llm
 
189
  PLANNER_LLM, TOOLS["ComputationTool"], TOOLS["WebSearchTool"], TOOLS["QuantitiesFinder"]
190
  ),
191
  "ValidationAgent": ValidationAgent(VALIDATION_LLM),
192
+ # KnowledgeAgent is the citation-first retrieval seam; defaults to
193
+ # WebSearch backing. Phase 3+ can swap in a RAG-backed implementation
194
+ # over USDA / WHO / ADA / EFSA without touching Coach call-sites.
195
+ "KnowledgeAgent": KnowledgeAgent(TOOLS["WebSearchTool"]),
196
  }
197
 
198
+
199
+ # ---------------------------------------------------------------------------
200
+ # Long-term memory singleton (Phase 3)
201
+ # ---------------------------------------------------------------------------
202
+ LONG_TERM_MEMORY: Optional[LongTermMemory] = None
203
+
204
+
205
+ def initialize_long_term_memory(db_path: Optional[str] = None) -> LongTermMemory:
206
+ """Initialise the SQLite-backed three-tier memory.
207
+
208
+ Pass a file path for cross-session persistence, or omit for an in-memory
209
+ DB (default; tests / ephemeral demos).
210
+ """
211
+ global LONG_TERM_MEMORY
212
+ LONG_TERM_MEMORY = LongTermMemory(db_path=db_path)
213
+ _logger.info("Long-term memory initialised at %s", db_path or ":memory:")
214
+ return LONG_TERM_MEMORY
215
+
216
  def setup_workflow():
217
  global APP
218
  if AGENTS is None or TOOLS is None:
tests/test_memory.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tests for the SQLite-backed three-tier long-term memory."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from memory import LongTermMemory
6
+
7
+
8
+ def test_round_trip_semantic_facts() -> None:
9
+ mem = LongTermMemory()
10
+ fid = mem.remember_fact("user1", "dislike", "okra", source="user_stated")
11
+ assert fid > 0
12
+ facts = mem.recall_facts("user1")
13
+ assert len(facts) == 1
14
+ assert facts[0]["content"] == "okra"
15
+ assert facts[0]["fact_type"] == "dislike"
16
+
17
+
18
+ def test_filter_facts_by_type_and_substring() -> None:
19
+ mem = LongTermMemory()
20
+ mem.remember_fact("u1", "dislike", "okra")
21
+ mem.remember_fact("u1", "dislike", "kale")
22
+ mem.remember_fact("u1", "preference", "high-protein")
23
+ mem.remember_fact("u2", "dislike", "okra") # different user
24
+
25
+ dislikes = mem.recall_facts("u1", fact_type="dislike")
26
+ assert {f["content"] for f in dislikes} == {"okra", "kale"}
27
+
28
+ prefs = mem.recall_facts("u1", fact_type="preference")
29
+ assert prefs[0]["content"] == "high-protein"
30
+
31
+ okra_only = mem.recall_facts("u1", contains="okra")
32
+ assert len(okra_only) == 1
33
+
34
+
35
+ def test_user_isolation() -> None:
36
+ mem = LongTermMemory()
37
+ mem.remember_fact("alice", "allergy", "peanut")
38
+ mem.remember_fact("bob", "allergy", "shellfish")
39
+
40
+ assert {f["content"] for f in mem.recall_facts("alice")} == {"peanut"}
41
+ assert {f["content"] for f in mem.recall_facts("bob")} == {"shellfish"}
42
+
43
+
44
+ def test_forget_fact() -> None:
45
+ mem = LongTermMemory()
46
+ fid = mem.remember_fact("u", "dislike", "okra")
47
+ mem.forget_fact(fid)
48
+ assert mem.recall_facts("u") == []
49
+
50
+
51
+ def test_procedural_records_round_trip() -> None:
52
+ mem = LongTermMemory()
53
+ issues = [{"code": "calorie_deviation", "description": "x", "severity": "medium"}]
54
+ mem.remember_validation("u1", "1500 kcal plan", "revise", issues)
55
+
56
+ history = mem.recall_validations("u1")
57
+ assert len(history) == 1
58
+ assert history[0]["verdict"] == "revise"
59
+ assert history[0]["issues"] == issues
60
+
61
+
62
+ def test_episodic_session_round_trip() -> None:
63
+ mem = LongTermMemory()
64
+ payload = {"messages": [{"role": "user", "content": "hi"}], "memory": {"x": 1}}
65
+ mem.remember_session("u1", "session-A", payload)
66
+
67
+ sessions = mem.recall_sessions("u1")
68
+ assert len(sessions) == 1
69
+ assert sessions[0]["payload"] == payload
70
+ assert sessions[0]["session_id"] == "session-A"
71
+
72
+
73
+ def test_recall_limit_and_order() -> None:
74
+ mem = LongTermMemory()
75
+ for i in range(15):
76
+ mem.remember_fact("u", "note", f"fact-{i}")
77
+ facts = mem.recall_facts("u", limit=5)
78
+ assert len(facts) == 5
79
+ # newest first
80
+ assert facts[0]["content"] == "fact-14"
81
+
82
+
83
+ def test_knowledge_agent_returns_citations() -> None:
84
+ """KnowledgeAgent should always return a citations list (possibly empty)."""
85
+ from knowledge import KnowledgeAgent
86
+
87
+ class StubWebSearch:
88
+ def handle_task(self, query: str) -> str:
89
+ return (
90
+ "Calories per 100g of chicken breast: 165 kcal (USDA FDC).\n"
91
+ "Source: https://fdc.nal.usda.gov/food-details/171477/nutrients"
92
+ )
93
+
94
+ agent = KnowledgeAgent(StubWebSearch())
95
+ import json
96
+ raw = agent.handle_task('{"kind": "nutrition", "query": "chicken breast"}', memory={})
97
+ payload = json.loads(raw)
98
+ assert payload["kind"] == "nutrition"
99
+ assert payload["citations"], "Should extract at least one URL"
100
+ assert any("fdc.nal.usda.gov" in c for c in payload["citations"])
101
+
102
+
103
+ def test_knowledge_agent_marks_uncited_answers() -> None:
104
+ """When no citations are found, agent appends an advisory note."""
105
+ from knowledge import KnowledgeAgent
106
+ import json
107
+
108
+ class NoCitationStub:
109
+ def handle_task(self, query: str) -> str:
110
+ return "Generic answer without any URL."
111
+
112
+ agent = KnowledgeAgent(NoCitationStub())
113
+ payload = json.loads(agent.handle_task("how many calories in an apple?", memory={}))
114
+ assert payload["citations"] == []
115
+ assert "advisory only" in payload["answer"]