shekkari21 commited on
Commit
4eaaf4a
·
1 Parent(s): 0cb7ee0

modified files

Browse files
agent_framework/agent.py CHANGED
@@ -1,8 +1,8 @@
1
  """Agent class for executing multi-step reasoning with tools."""
2
 
3
- from dataclasses import dataclass
4
  from typing import List, Optional, Type, Callable, Literal
5
- from pydantic import BaseModel, Field
6
  from .tools import tool
7
  import inspect
8
  import json
@@ -28,7 +28,7 @@ class AgentResult:
28
  output: str | BaseModel
29
  context: ExecutionContext
30
  status: Literal["complete", "pending", "error"] = "complete"
31
- pending_tool_calls: list[PendingToolCall] = Field(default_factory=list)
32
 
33
 
34
  class Agent:
 
1
  """Agent class for executing multi-step reasoning with tools."""
2
 
3
+ from dataclasses import dataclass, field
4
  from typing import List, Optional, Type, Callable, Literal
5
+ from pydantic import BaseModel
6
  from .tools import tool
7
  import inspect
8
  import json
 
28
  output: str | BaseModel
29
  context: ExecutionContext
30
  status: Literal["complete", "pending", "error"] = "complete"
31
+ pending_tool_calls: list[PendingToolCall] = field(default_factory=list)
32
 
33
 
34
  class Agent:
agent_tools/example_usage.py DELETED
@@ -1,33 +0,0 @@
1
- """Example: How to use agent_tools with your agent."""
2
-
3
- import asyncio
4
- import sys
5
- from pathlib import Path
6
-
7
- # Add parent directory to path
8
- sys.path.insert(0, str(Path(__file__).parent.parent))
9
-
10
- from agent_framework import Agent, LlmClient
11
- from agent_tools import unzip_file, list_files, read_file, read_media_file
12
- import asyncio
13
- from agent_framework import Agent, LlmClient
14
- from agent_tools import search_web, list_files, read_file
15
- from agent_tools.file_tools import delete_file
16
- from agent_tools.web_tools import search_compressor
17
- from agent_framework.agent import approval_callback
18
-
19
- async def main():
20
- agent = Agent(
21
- model=LlmClient(model="gpt-5-mini"), # Use a valid model name
22
- tools=[search_web, list_files, read_file, delete_file],
23
- instructions="You are a helpful assistant that can search the web and explore files to answer questions.",
24
- max_steps=20,
25
- before_tool_callbacks=[approval_callback],
26
- after_tool_callbacks=[search_compressor],
27
- )
28
-
29
- result = await agent.run("search about andrej karpathy")
30
- print(result.output)
31
-
32
- if __name__ == "__main__":
33
- asyncio.run(main())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agent_tools/web_tools.py CHANGED
@@ -75,71 +75,27 @@ def _extract_search_query(context: ExecutionContext, tool_call_id: str) -> str:
75
  return item.arguments.get("query", "")
76
  return ""
77
 
78
- ## callbacks
79
- # def search_compressor(context: ExecutionContext, tool_result: ToolResult):
80
- # """Callback that compresses web search results."""
81
- # # Pass through unchanged if not a search tool
82
- # if tool_result.name != "search_web":
83
- # return None
84
-
85
- # original_content = tool_result.content[0]
86
-
87
- # # No compression needed if result is short enough
88
- # if len(original_content) < 2000:
89
- # return None
90
-
91
- # # Extract search query matching the tool_call_id
92
- # query = _extract_search_query(context, tool_result.tool_call_id)
93
- # if not query:
94
- # return None
95
-
96
- # # Use functions implemented in section 5.3
97
- # chunks = fixed_length_chunking(original_content, chunk_size=500, overlap=50)
98
- # embeddings = get_embeddings(chunks)
99
- # results = vector_search(query, chunks, embeddings, top_k=3)
100
-
101
- # # Create compressed result
102
- # compressed = "\n\n".join([r['chunk'] for r in results])
103
-
104
- # return ToolResult(
105
- # tool_call_id=tool_result.tool_call_id,
106
- # name=tool_result.name,
107
- # status="success",
108
- # content=[compressed]
109
- # )
110
-
111
  ## callbacks
112
  def search_compressor(context: ExecutionContext, tool_result: ToolResult):
113
- """Callback that compresses web search results."""
114
- # Pass through unchanged if not a search tool
115
  if tool_result.name != "search_web":
116
- print("DEBUG: Callback skipped - not a search_web tool")
117
  return None
118
-
119
  original_content = tool_result.content[0]
120
- print(f"DEBUG: Callback triggered! Original content length: {len(original_content)}")
121
-
122
- # No compression needed if result is short enough
123
  if len(original_content) < 2000:
124
- print("DEBUG: Callback skipped - content too short")
125
  return None
126
-
127
- # Extract search query matching the tool_call_id
128
  query = _extract_search_query(context, tool_result.tool_call_id)
129
  if not query:
130
- print("DEBUG: Callback skipped - could not extract query")
131
  return None
132
-
133
- print(f"DEBUG: Compressing search results for query: {query}")
134
- # Use functions implemented in section 5.3
135
  chunks = fixed_length_chunking(original_content, chunk_size=500, overlap=50)
136
  embeddings = get_embeddings(chunks)
137
  results = vector_search(query, chunks, embeddings, top_k=3)
138
-
139
- # Create compressed result
140
  compressed = "\n\n".join([r['chunk'] for r in results])
141
- print(f"DEBUG: Compressed from {len(original_content)} to {len(compressed)} chars")
142
-
143
  return ToolResult(
144
  tool_call_id=tool_result.tool_call_id,
145
  name=tool_result.name,
 
75
  return item.arguments.get("query", "")
76
  return ""
77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  ## callbacks
79
  def search_compressor(context: ExecutionContext, tool_result: ToolResult):
80
+ """Callback that compresses web search results using RAG."""
 
81
  if tool_result.name != "search_web":
 
82
  return None
83
+
84
  original_content = tool_result.content[0]
85
+
 
 
86
  if len(original_content) < 2000:
 
87
  return None
88
+
 
89
  query = _extract_search_query(context, tool_result.tool_call_id)
90
  if not query:
 
91
  return None
92
+
 
 
93
  chunks = fixed_length_chunking(original_content, chunk_size=500, overlap=50)
94
  embeddings = get_embeddings(chunks)
95
  results = vector_search(query, chunks, embeddings, top_k=3)
96
+
 
97
  compressed = "\n\n".join([r['chunk'] for r in results])
98
+
 
99
  return ToolResult(
100
  tool_call_id=tool_result.tool_call_id,
101
  name=tool_result.name,
gaia/config.py CHANGED
@@ -20,9 +20,9 @@ PROVIDER_SEMAPHORES = {
20
 
21
  # Default models to evaluate
22
  DEFAULT_MODELS = [
23
- "gpt-5",
24
- "gpt-5-mini",
25
- "anthropic/claude-sonnet-4-5",
26
- "anthropic/claude-haiku-4-5"
27
  ]
28
 
 
20
 
21
  # Default models to evaluate
22
  DEFAULT_MODELS = [
23
+ "gpt-4o",
24
+ "gpt-4o-mini",
25
+ "anthropic/claude-sonnet-4-5-20250929",
26
+ "anthropic/claude-haiku-4-5-20251001"
27
  ]
28
 
gaia/example.py DELETED
@@ -1,38 +0,0 @@
1
- """Example: Running GAIA evaluation with a subset of problems."""
2
-
3
- import asyncio
4
- import sys
5
- from pathlib import Path
6
-
7
- # Add parent directory to path
8
- sys.path.insert(0, str(Path(__file__).parent.parent))
9
-
10
- from datasets import load_dataset
11
- from gaia import run_experiment, DEFAULT_MODELS
12
-
13
-
14
- async def main():
15
- """Example: Run GAIA evaluation on a small subset."""
16
- # Load GAIA dataset
17
- level1_problems = load_dataset("gaia-benchmark/GAIA", "2023_level1", split="validation")
18
- print(f"Number of Level 1 problems: {len(level1_problems)}")
19
-
20
- # Select a subset (first 20 problems)
21
- subset = level1_problems.select(range(20))
22
-
23
- # Run experiment with default models
24
- results = await run_experiment(subset, DEFAULT_MODELS)
25
-
26
- # Print results
27
- print("\nResults:")
28
- for model, model_results in results.items():
29
- total = len(model_results)
30
- correct = sum(1 for r in model_results if r.get("correct", False))
31
- print(f"{model}: {correct}/{total} correct ({correct/total*100:.1f}%)")
32
-
33
- return results
34
-
35
-
36
- if __name__ == "__main__":
37
- asyncio.run(main())
38
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
gaia/file_problems.py DELETED
@@ -1,98 +0,0 @@
1
- """Handle GAIA problems with file attachments."""
2
-
3
- import asyncio
4
- import shutil
5
- import sys
6
- from pathlib import Path
7
- from datasets import load_dataset
8
- from huggingface_hub import snapshot_download
9
-
10
- # Add parent directory to path
11
- sys.path.insert(0, str(Path(__file__).parent.parent))
12
-
13
- from agent_framework import Agent, LlmClient
14
- from agent_tools import unzip_file, read_file, list_files, read_media_file
15
-
16
-
17
- async def main():
18
- """Test GAIA problems with file attachments."""
19
- # Load dataset
20
- dataset = load_dataset("gaia-benchmark/GAIA", "2023_all", split="validation")
21
-
22
- # Download attached files
23
- NOTEBOOK_DIR = Path.cwd()
24
- PROJECT_ROOT = NOTEBOOK_DIR.parent
25
- CACHE_DIR = PROJECT_ROOT / "gaia_cache"
26
-
27
- snapshot_download(
28
- repo_id="gaia-benchmark/GAIA",
29
- repo_type="dataset",
30
- allow_patterns="2023/validation/*",
31
- local_dir=CACHE_DIR
32
- )
33
-
34
- WORK_DIR = NOTEBOOK_DIR / "gaia_workspace"
35
-
36
- def reset_workspace():
37
- """Restore the workspace to its initial state."""
38
- shutil.rmtree(WORK_DIR, ignore_errors=True)
39
- shutil.copytree(CACHE_DIR / "2023/validation", WORK_DIR)
40
- print(f"Workspace reset: {WORK_DIR}")
41
-
42
- reset_workspace()
43
-
44
- problems_with_files = [p for p in dataset if p.get('file_name')]
45
- problem_with_zip = [p for p in problems_with_files if p['file_name'].endswith('.zip')]
46
-
47
- print(f"Total problems: {len(dataset)}")
48
- print(f"Problems with attachments: {len(problems_with_files)}")
49
- print(f"Total problems with zip files: {len(problem_with_zip)}")
50
-
51
- problem = problem_with_zip[0]
52
- print(f"Question: {problem['Question'][:100]}...")
53
- print(f"File name: {problem['file_name']}")
54
-
55
- file_path = WORK_DIR / problem['file_name']
56
- print(f"File exists: {file_path.exists()}")
57
-
58
- # Reset workspace to clean state
59
- reset_workspace()
60
-
61
- # Select a problem with zip file attachment
62
- zip_problems = [p for p in problems_with_files if p['file_name'].endswith('.zip')]
63
- problem = zip_problems[0]
64
- print(problem)
65
- file_path = WORK_DIR / problem['file_name']
66
-
67
- # Construct prompt including file location
68
- prompt = f"""{problem['Question']}
69
-
70
- The attached file is located at: {file_path}
71
- """
72
- print(prompt)
73
-
74
- # Create agent with file tools
75
- agent = Agent(
76
- model=LlmClient(model="gpt-5"),
77
- tools=[unzip_file, read_file, list_files, read_media_file],
78
- instructions="""You are a helpful assistant that can work with files.
79
-
80
- CRITICAL: You MUST use the read_file tool for Excel files (.xlsx), (.xls). The tool WILL work - do NOT say you cannot read Excel files. Always call the tool first.
81
-
82
- Tools available:
83
- - read_file: Reads .xlsx, .xls, .csv, .txt, .py, .json, .md, .xml files. USE THIS FOR EXCEL FILES.
84
- - read_media_file: Analyzes PDF files, images, and audio. Requires a query parameter.
85
- - list_files: Lists directory contents
86
- - unzip_file: Extracts zip archives
87
-
88
-
89
- """,
90
- max_steps=15 )
91
-
92
- response = await agent.run(prompt)
93
- print(response.output)
94
- print(response.context)
95
-
96
-
97
- if __name__ == "__main__":
98
- asyncio.run(main())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
misc/CODE_TRACE_OUTLINE.md DELETED
@@ -1,395 +0,0 @@
1
- # Code Trace Outline - Agent Framework
2
-
3
- This document provides a systematic guide to understanding and tracing through the agent framework codebase.
4
-
5
- ## 📋 Table of Contents
6
- 1. [Entry Points](#entry-points)
7
- 2. [Suggested Reading Order](#suggested-reading-order)
8
- 3. [Function Call Flow](#function-call-flow)
9
- 4. [Component Relationships](#component-relationships)
10
- 5. [Key Functions Reference](#key-functions-reference)
11
-
12
- ---
13
-
14
- ## 🚀 Entry Points
15
-
16
- ### Primary Entry Point
17
- **File:** `examples/demo.py` or `example.py` or `examples/gaia_evaluation.py`
18
-
19
- ```python
20
- agent = Agent(...)
21
- result = await agent.run("question")
22
- ```
23
-
24
- **Flow:** `agent.run()` → `agent.step()` → `agent.think()` → `agent.act()`
25
-
26
- ---
27
-
28
- ## 📖 Suggested Reading Order
29
-
30
- ### Phase 1: Foundation (Data Models)
31
- **Start here to understand the data structures**
32
-
33
- 1. **`agent_framework/models.py`**
34
- - Read in order:
35
- - `Message` (line ~10-15)
36
- - `ToolCall` (line ~18-25)
37
- - `ToolResult` (line ~28-35)
38
- - `ContentItem` (line ~38-45)
39
- - `Event` (line ~48-60)
40
- - `ExecutionContext` (line ~63-85)
41
-
42
- **Why:** These are the core data structures used throughout the framework.
43
-
44
- ### Phase 2: Tool System
45
- **Understand how tools are defined and executed**
46
-
47
- 2. **`agent_framework/tools.py`**
48
- - Read in order:
49
- - `BaseTool` abstract class (line ~10-20)
50
- - `FunctionTool.__init__()` (line ~25-45)
51
- - `FunctionTool.execute()` (line ~47-70)
52
- - `FunctionTool.to_definition()` (line ~72-85)
53
- - `@tool` decorator (line ~88-120)
54
-
55
- **Why:** Tools are the extension mechanism - understand how functions become tools.
56
-
57
- 3. **`agent_framework/utils.py`**
58
- - Read in order:
59
- - `function_to_input_schema()` (line ~10-50)
60
- - `format_tool_definition()` (line ~52-75)
61
- - `mcp_tools_to_openai_format()` (line ~77-100)
62
-
63
- **Why:** These convert Python functions to LLM-understandable tool definitions.
64
-
65
- ### Phase 3: LLM Integration
66
- **Understand how we communicate with LLMs**
67
-
68
- 4. **`agent_framework/llm.py`**
69
- - Read in order:
70
- - `LlmRequest` (line ~10-25)
71
- - `LlmResponse` (line ~28-40)
72
- - `LlmClient.__init__()` (line ~43-50)
73
- - `LlmClient._build_messages()` (line ~52-85)
74
- - `LlmClient._parse_response()` (line ~87-120)
75
- - `LlmClient.generate()` (line ~122-146)
76
-
77
- **Why:** This is the interface to the LLM - understand request/response format.
78
-
79
- ### Phase 4: Agent Core Logic
80
- **The main orchestration logic**
81
-
82
- 5. **`agent_framework/agent.py`**
83
- - Read in order:
84
- - `Agent.__init__()` (line ~29-43)
85
- - `Agent._setup_tools()` (line ~45-46)
86
- - `Agent._prepare_llm_request()` (line ~48-85)
87
- - `Agent.think()` (line ~87-95)
88
- - `Agent.act()` (line ~97-115)
89
- - `Agent.step()` (line ~116-171)
90
- - `Agent.run()` (line ~173-200)
91
- - `Agent._is_final_response()` (line ~202-206)
92
- - `Agent._extract_final_result()` (line ~208-225)
93
-
94
- **Why:** This is the main agent loop - understand the step-by-step execution.
95
-
96
- ### Phase 5: MCP Integration (Optional)
97
- **External tool loading**
98
-
99
- 6. **`agent_framework/mcp.py`**
100
- - Read in order:
101
- - `_extract_text_content()` (line ~10-20)
102
- - `_create_mcp_tool()` (line ~22-60)
103
- - `load_mcp_tools()` (line ~62-100)
104
-
105
- **Why:** Understand how external MCP servers provide tools.
106
-
107
- ### Phase 6: Examples
108
- **See it all in action**
109
-
110
- 7. **`examples/demo.py`** - Simple example
111
- 8. **`examples/gaia_evaluation.py`** - Complex example with structured output
112
-
113
- ---
114
-
115
- ## 🔄 Function Call Flow
116
-
117
- ### Main Execution Flow
118
-
119
- ```
120
- User Code
121
-
122
- ├─> Agent.run(user_input)
123
- │ │
124
- │ ├─> Creates ExecutionContext
125
- │ ├─> Adds user Event
126
- │ │
127
- │ └─> Loop: while not final_result and step < max_steps
128
- │ │
129
- │ └─> Agent.step(context)
130
- │ │
131
- │ ├─> Agent._prepare_llm_request(context)
132
- │ │ │
133
- │ │ ├─> Flattens events → contents
134
- │ │ ├─> Adds tool info to instructions
135
- │ │ └─> Returns LlmRequest
136
- │ │
137
- │ ├─> Agent.think(llm_request)
138
- │ │ │
139
- │ │ └─> LlmClient.generate(request)
140
- │ │ │
141
- │ │ ├─> LlmClient._build_messages()
142
- │ │ ├─> litellm.acompletion()
143
- │ │ └─> LlmClient._parse_response()
144
- │ │
145
- │ ├─> If tool_calls exist:
146
- │ │ │
147
- │ │ └─> Agent.act(context, tool_calls)
148
- │ │ │
149
- │ │ ├─> For each tool_call:
150
- │ │ │ │
151
- │ │ │ └��> tool.execute(context, **args)
152
- │ │ │ │
153
- │ │ │ └─> FunctionTool.execute()
154
- │ │ │ │
155
- │ │ │ └─> Calls wrapped function
156
- │ │ │
157
- │ │ └─> Returns ToolResult[]
158
- │ │
159
- │ └─> If output_type and no tool_calls:
160
- │ │
161
- │ └─> Final LLM call with structured output
162
-
163
- └─> Returns AgentResult(output, context)
164
- ```
165
-
166
- ### Tool Execution Flow
167
-
168
- ```
169
- Agent.act(context, tool_calls)
170
-
171
- ├─> For each ToolCall:
172
- │ │
173
- │ ├─> Find tool by name: tool = self.tools[call.name]
174
- │ │
175
- │ ├─> Call: tool.execute(context, **call.arguments)
176
- │ │ │
177
- │ │ └─> FunctionTool.execute()
178
- │ │ │
179
- │ │ ├─> Inspect function signature
180
- │ │ ├─> If has 'context' param → pass it
181
- │ │ ├─> Call: func(**kwargs)
182
- │ │ └─> Wrap result in ToolResult
183
- │ │
184
- │ └─> Collect ToolResult
185
-
186
- └─> Return list of ToolResults
187
- ```
188
-
189
- ### LLM Request Building Flow
190
-
191
- ```
192
- Agent._prepare_llm_request(context)
193
-
194
- ├─> Flatten events → contents
195
- │ │
196
- │ └─> For each Event:
197
- │ └─> For each ContentItem in event.content:
198
- │ └─> Add to flat_contents
199
-
200
- ├─> Build instructions
201
- │ │
202
- │ ├─> Add self.instructions
203
- │ └─> If tools exist:
204
- │ └─> Append tool descriptions
205
-
206
- ├─> Set response_format (if enforce_output_type)
207
-
208
- └─> Return LlmRequest(instructions, contents, tools, response_format)
209
- ```
210
-
211
- ---
212
-
213
- ## 🔗 Component Relationships
214
-
215
- ```
216
- ┌─────────────────────────────────────────────────────────────┐
217
- │ User Code │
218
- │ (examples/demo.py, example.py, gaia_evaluation.py) │
219
- └────────────────────┬────────────────────────────────────────┘
220
-
221
- │ Creates
222
-
223
- ┌─────────────────────────────────────────────────────────────┐
224
- │ Agent │
225
- │ (agent_framework/agent.py) │
226
- │ │
227
- │ - Orchestrates reasoning loop │
228
- │ - Manages ExecutionContext │
229
- │ - Calls LLM and executes tools │
230
- └──────┬───────────────────────────────┬───────────────────────┘
231
- │ │
232
- │ Uses │ Uses
233
- ▼ ▼
234
- ┌──────────────────┐ ┌──────────────────┐
235
- │ LlmClient │ │ BaseTool │
236
- │ (llm.py) │ │ (tools.py) │
237
- │ │ │ │
238
- │ - Sends requests│ │ - FunctionTool │
239
- │ - Parses resp. │ │ - MCP Tools │
240
- └──────┬───────────┘ └──────┬───────────┘
241
- │ │
242
- │ Uses │ Uses
243
- ▼ ▼
244
- ┌──────────────────┐ ┌──────────────────┐
245
- │ LiteLLM │ │ Utils │
246
- │ (external) │ │ (utils.py) │
247
- │ │ │ │
248
- │ - API calls │ │ - Schema conv. │
249
- │ - Streaming │ │ - Format tools │
250
- └──────────────────┘ └──────────────────┘
251
-
252
- │ Uses
253
-
254
- ┌──────────────────┐
255
- │ MCP Client │
256
- │ (mcp.py) │
257
- │ │
258
- │ - Load tools │
259
- │ - Execute tools │
260
- └──────────────────┘
261
-
262
- ┌─────────────────────────────────────────────────────────────┐
263
- │ Data Models │
264
- │ (models.py) │
265
- │ │
266
- │ - Message, ToolCall, ToolResult │
267
- │ - Event, ExecutionContext │
268
- └─────────────────────────────────────────────────────────────┘
269
- ```
270
-
271
- ---
272
-
273
- ## 📚 Key Functions Reference
274
-
275
- ### Agent Class (`agent.py`)
276
-
277
- | Function | Purpose | Called By | Calls |
278
- |----------|---------|-----------|-------|
279
- | `__init__()` | Initialize agent with model, tools, instructions | User code | `_setup_tools()` |
280
- | `run()` | Main entry point - execute agent loop | User code | `step()` |
281
- | `step()` | Execute one reasoning step | `run()` | `_prepare_llm_request()`, `think()`, `act()` |
282
- | `think()` | Get LLM's decision | `step()` | `LlmClient.generate()` |
283
- | `act()` | Execute tool calls | `step()` | `tool.execute()` |
284
- | `_prepare_llm_request()` | Build LLM request from context | `step()` | - |
285
- | `_is_final_response()` | Check if response is final | `run()` | - |
286
- | `_extract_final_result()` | Extract structured output | `run()` | - |
287
-
288
- ### LlmClient Class (`llm.py`)
289
-
290
- | Function | Purpose | Called By | Calls |
291
- |----------|---------|-----------|-------|
292
- | `generate()` | Make async LLM API call | `Agent.think()` | `_build_messages()`, `_parse_response()`, `litellm.acompletion()` |
293
- | `_build_messages()` | Convert request to OpenAI format | `generate()` | - |
294
- | `_parse_response()` | Parse LLM response | `generate()` | - |
295
-
296
- ### FunctionTool Class (`tools.py`)
297
-
298
- | Function | Purpose | Called By | Calls |
299
- |----------|---------|-----------|-------|
300
- | `execute()` | Execute the wrapped function | `Agent.act()` | Wrapped function |
301
- | `to_definition()` | Convert to OpenAI tool format | `Agent._prepare_llm_request()` | `format_tool_definition()` |
302
-
303
- ### Utility Functions (`utils.py`)
304
-
305
- | Function | Purpose | Called By |
306
- |----------|---------|-----------|
307
- | `function_to_input_schema()` | Convert function to JSON Schema | `FunctionTool.to_definition()` |
308
- | `format_tool_definition()` | Format tool in OpenAI format | `FunctionTool.to_definition()` |
309
- | `display_trace()` | Pretty print execution trace | User code |
310
-
311
- ---
312
-
313
- ## 🎯 Tracing a Specific Execution
314
-
315
- ### Example: "What is 1234 * 5678?"
316
-
317
- 1. **User calls:** `agent.run("What is 1234 * 5678?")`
318
- - Location: `agent.py:173`
319
-
320
- 2. **Agent.run() creates context:**
321
- - Creates `ExecutionContext`
322
- - Adds user `Event` with `Message(role="user", content="...")`
323
- - Location: `agent.py:180-189`
324
-
325
- 3. **First iteration of loop:**
326
- - Calls `agent.step(context)`
327
- - Location: `agent.py:193`
328
-
329
- 4. **Agent.step() prepares request:**
330
- - Calls `_prepare_llm_request(context)`
331
- - Flattens events, adds tool info
332
- - Location: `agent.py:116-120`
333
-
334
- 5. **Agent.think() calls LLM:**
335
- - Calls `llm_client.generate(request)`
336
- - Location: `agent.py:87-95`
337
-
338
- 6. **LlmClient.generate() processes:**
339
- - `_build_messages()` converts to OpenAI format
340
- - `litellm.acompletion()` makes API call
341
- - `_parse_response()` parses response
342
- - Location: `llm.py:122-146`
343
-
344
- 7. **LLM returns with tool call:**
345
- - Response contains `ToolCall(name="calculator", arguments={"expression": "1234 * 5678"})`
346
- - Location: `llm.py:100-120` (parsing)
347
-
348
- 8. **Agent.act() executes tool:**
349
- - Finds `calculator` tool
350
- - Calls `tool.execute(context, expression="1234 * 5678")`
351
- - Location: `agent.py:97-115`
352
-
353
- 9. **FunctionTool.execute() runs function:**
354
- - Calls `calculator("1234 * 5678")`
355
- - Returns `7006652`
356
- - Wraps in `ToolResult`
357
- - Location: `tools.py:47-70`
358
-
359
- 10. **Tool result added to context:**
360
- - Creates new `Event` with `ToolResult`
361
- - Location: `agent.py:155-161`
362
-
363
- 11. **Second iteration:**
364
- - LLM sees tool result
365
- - Returns final answer: `"7006652"`
366
- - Location: `agent.py:116-171`
367
-
368
- 12. **Agent.run() detects final response:**
369
- - `_is_final_response()` returns True
370
- - `_extract_final_result()` extracts answer
371
- - Returns `AgentResult`
372
- - Location: `agent.py:196-200`
373
-
374
- ---
375
-
376
- ## 🔍 Debugging Tips
377
-
378
- 1. **Start with `Agent.run()`** - This is where execution begins
379
- 2. **Follow `step()` calls** - Each step is one reasoning iteration
380
- 3. **Check `ExecutionContext.events`** - This contains the full history
381
- 4. **Use `display_trace()`** - Visualize the execution flow
382
- 5. **Inspect `LlmRequest`** - See what's sent to the LLM
383
- 6. **Check `LlmResponse`** - See what the LLM returned
384
- 7. **Verify tool definitions** - Ensure tools are properly formatted
385
-
386
- ---
387
-
388
- ## 📝 Notes
389
-
390
- - **Async/Await:** Most functions are async - use `await` when calling
391
- - **ExecutionContext:** Threaded through all operations - contains state
392
- - **Events:** Every action (user input, LLM response, tool call) is an Event
393
- - **Tool Execution:** Tools can optionally receive `ExecutionContext` parameter
394
- - **Structured Output:** Only enforced on final LLM call (not during tool usage)
395
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
misc/JOB_SEARCH_SCHEDULE.md DELETED
@@ -1,468 +0,0 @@
1
- # Job Search Schedule: February 1 - June 1, 2026
2
-
3
- **Goal:** Land a $150k+ Applied AI/ML Engineer role before OPT grace period ends.
4
-
5
- ---
6
-
7
- ## Daily Structure
8
-
9
- | Time | Activity |
10
- |------|----------|
11
- | 5:00 AM | Wake up |
12
- | 5:00 - 6:00 AM | Morning routine, coffee |
13
- | 6:00 - 9:00 AM | **Deep Work Block 1** (LeetCode/Technical) |
14
- | 9:00 - 9:30 AM | Break |
15
- | 9:30 - 12:30 PM | **Deep Work Block 2** (Learning/Projects) |
16
- | 12:30 - 1:30 PM | Lunch |
17
- | 1:30 - 4:30 PM | **Deep Work Block 3** (Job Applications/Networking) |
18
- | 4:30 - 7:00 PM | Gym (2.5 hours) |
19
- | 7:00 - 8:00 PM | Dinner |
20
- | 8:00 - 10:00 PM | Light work (applications, reading, prep) |
21
- | 10:00 - 11:00 PM | Wind down |
22
- | 11:00 PM | Sleep |
23
-
24
- **Tuesday Exception:** Class 7-10 PM (no gym, shift blocks earlier)
25
-
26
- ---
27
-
28
- ## Phase Overview
29
-
30
- | Phase | Weeks | Focus |
31
- |-------|-------|-------|
32
- | **Phase 1** | Week 1-4 (Feb) | Foundation + Start Applications |
33
- | **Phase 2** | Week 5-8 (Mar) | LangChain + Interview Prep Ramp |
34
- | **Phase 3** | Week 9-12 (Apr) | System Design + Mock Interviews |
35
- | **Phase 4** | Week 13-17 (May-Jun 1) | Interview Mode + Final Push |
36
-
37
- ---
38
-
39
- # PHASE 1: Foundation (February)
40
-
41
- ## Week 1: Feb 1-7
42
- ### Theme: Finish Agent Framework + Start Everything
43
-
44
- | Block | Mon | Tue | Wed | Thu | Fri | Sat | Sun |
45
- |-------|-----|-----|-----|-----|-----|-----|-----|
46
- | 6-9 AM | LeetCode x2 | LeetCode x2 | LeetCode x2 | LeetCode x2 | LeetCode x2 | LeetCode x3 | Rest |
47
- | 9:30-12:30 | Agent Framework | Agent Framework | Agent Framework | Agent Framework | Agent Framework | Review week | Rest |
48
- | 1:30-4:30 | Resume update | 5 applications | 5 applications | 5 applications | 5 applications | Network LinkedIn | Rest |
49
- | 8-10 PM | ML review | (Class) | ML review | ML review | ML review | Applications | Rest |
50
-
51
- ### Deliverables Week 1:
52
- - [ ] Agent framework 100% complete
53
- - [ ] LeetCode: 12 problems solved (Arrays, Strings focus)
54
- - [ ] Resume updated with agent framework project
55
- - [ ] 20+ job applications submitted
56
- - [ ] LinkedIn profile updated
57
- - [ ] 5 LinkedIn connection requests to recruiters
58
-
59
- ---
60
-
61
- ## Week 2: Feb 8-14
62
- ### Theme: LangChain Basics + Application Momentum
63
-
64
- | Block | Mon | Tue | Wed | Thu | Fri | Sat | Sun |
65
- |-------|-----|-----|-----|-----|-----|-----|-----|
66
- | 6-9 AM | LeetCode x2 | LeetCode x2 | LeetCode x2 | LeetCode x2 | LeetCode x2 | LeetCode x3 | Rest |
67
- | 9:30-12:30 | LangChain basics | LangChain basics | LangChain chains | LangChain chains | LangChain agents | Build project | Rest |
68
- | 1:30-4:30 | 5 applications | 5 applications | 5 applications | 5 applications | 5 applications | Network | Rest |
69
- | 8-10 PM | ML review | (Class) | ML review | ML review | ML review | Behavioral prep | Rest |
70
-
71
- ### Deliverables Week 2:
72
- - [ ] LeetCode: 12 more problems (Total: 24) - Trees, Linked Lists
73
- - [ ] LangChain fundamentals understood (chains, prompts, output parsers)
74
- - [ ] 25+ job applications submitted (Total: 45+)
75
- - [ ] Started tracking applications in spreadsheet
76
- - [ ] 3 Pfizer contacts messaged for referrals
77
-
78
- ---
79
-
80
- ## Week 3: Feb 15-21
81
- ### Theme: LangGraph + First Responses
82
-
83
- | Block | Mon | Tue | Wed | Thu | Fri | Sat | Sun |
84
- |-------|-----|-----|-----|-----|-----|-----|-----|
85
- | 6-9 AM | LeetCode x2 | LeetCode x2 | LeetCode x2 | LeetCode x2 | LeetCode x2 | LeetCode x3 | Rest |
86
- | 9:30-12:30 | LangGraph basics | LangGraph state | LangGraph edges | LangGraph project | LangGraph project | Complete project | Rest |
87
- | 1:30-4:30 | 5 applications | 5 applications | 5 applications | 5 applications | 5 applications | Network | Rest |
88
- | 8-10 PM | ML review | (Class) | RAG concepts | RAG concepts | Project polish | Behavioral prep | Rest |
89
-
90
- ### Deliverables Week 3:
91
- - [ ] LeetCode: 12 more problems (Total: 36) - Graphs, BFS/DFS
92
- - [ ] LangGraph multi-agent project complete
93
- - [ ] Project deployed (Streamlit or FastAPI)
94
- - [ ] 25+ applications (Total: 70+)
95
- - [ ] First phone screens scheduled (hopefully)
96
- - [ ] Pfizer STAR story written out
97
-
98
- ---
99
-
100
- ## Week 4: Feb 22-28
101
- ### Theme: RAG Deep Dive + Interview Prep Start
102
-
103
- | Block | Mon | Tue | Wed | Thu | Fri | Sat | Sun |
104
- |-------|-----|-----|-----|-----|-----|-----|-----|
105
- | 6-9 AM | LeetCode x2 | LeetCode x2 | LeetCode x2 | LeetCode x2 | LeetCode x2 | LeetCode x3 | Rest |
106
- | 9:30-12:30 | RAG architecture | Vector DBs | Chunking strategies | Retrieval patterns | RAG evaluation | Build RAG project | Rest |
107
- | 1:30-4:30 | 5 applications | 5 applications | 5 applications | 5 applications | 5 applications | Mock interview | Rest |
108
- | 8-10 PM | Interview prep | (Class) | Interview prep | Interview prep | Interview prep | Rest | Rest |
109
-
110
- ### Deliverables Week 4:
111
- - [ ] LeetCode: 12 more problems (Total: 48) - DP basics
112
- - [ ] RAG system built (with evaluation)
113
- - [ ] Added to portfolio
114
- - [ ] 25+ applications (Total: 95+)
115
- - [ ] First mock interview completed
116
- - [ ] ML fundamentals flashcards created
117
-
118
- ---
119
-
120
- ## February Checkpoint ✓
121
-
122
- **By Feb 28:**
123
- - [ ] 48+ LeetCode problems solved
124
- - [ ] Agent framework complete + deployed
125
- - [ ] LangGraph project complete + deployed
126
- - [ ] RAG project complete
127
- - [ ] 95+ job applications
128
- - [ ] 2-3 phone screens scheduled
129
- - [ ] Pfizer story polished
130
-
131
- ---
132
-
133
- # PHASE 2: LangChain Mastery + Interview Ramp (March)
134
-
135
- ## Week 5: Mar 1-7
136
- ### Theme: Advanced RAG + Phone Screens
137
-
138
- | Block | Mon | Tue | Wed | Thu | Fri | Sat | Sun |
139
- |-------|-----|-----|-----|-----|-----|-----|-----|
140
- | 6-9 AM | LeetCode x2 | LeetCode x2 | LeetCode x2 | LeetCode x2 | LeetCode x2 | LeetCode x3 | Rest |
141
- | 9:30-12:30 | Self-correcting RAG | Multi-query RAG | Hybrid search | GraphRAG intro | GraphRAG build | Project polish | Rest |
142
- | 1:30-4:30 | Applications | Applications | Phone screen prep | Applications | Applications | Network | Rest |
143
- | 8-10 PM | ML interview Q's | (Class) | ML interview Q's | ML interview Q's | Behavioral prep | Rest | Rest |
144
-
145
- ### Deliverables Week 5:
146
- - [ ] LeetCode: 12 more (Total: 60)
147
- - [ ] Advanced RAG patterns implemented
148
- - [ ] GraphRAG basic understanding
149
- - [ ] 20+ applications (Total: 115+)
150
- - [ ] 1-2 phone screens completed
151
- - [ ] ML fundamentals review: supervised learning
152
-
153
- ---
154
-
155
- ## Week 6: Mar 8-14
156
- ### Theme: Production Patterns + Technical Screens
157
-
158
- | Block | Mon | Tue | Wed | Thu | Fri | Sat | Sun |
159
- |-------|-----|-----|-----|-----|-----|-----|-----|
160
- | 6-9 AM | LeetCode x2 | LeetCode x2 | LeetCode x2 | LeetCode x2 | LeetCode x2 | LeetCode x3 | Rest |
161
- | 9:30-12:30 | Streaming patterns | Fallback chains | Caching | Memory patterns | Production deploy | System design | Rest |
162
- | 1:30-4:30 | Applications | Applications | Applications | Applications | Applications | Mock interview | Rest |
163
- | 8-10 PM | DL/Transformer Q's | (Class) | DL/Transformer Q's | Interview prep | Interview prep | Rest | Rest |
164
-
165
- ### Deliverables Week 6:
166
- - [ ] LeetCode: 12 more (Total: 72)
167
- - [ ] Production-ready LangChain app
168
- - [ ] Streaming + caching implemented
169
- - [ ] 20+ applications (Total: 135+)
170
- - [ ] 2 mock interviews completed
171
- - [ ] ML fundamentals review: deep learning basics
172
-
173
- ---
174
-
175
- ## Week 7: Mar 15-21
176
- ### Theme: System Design Intro + More Interviews
177
-
178
- | Block | Mon | Tue | Wed | Thu | Fri | Sat | Sun |
179
- |-------|-----|-----|-----|-----|-----|-----|-----|
180
- | 6-9 AM | LeetCode x2 | LeetCode x2 | LeetCode x2 | LeetCode x2 | LeetCode x2 | LeetCode x3 | Rest |
181
- | 9:30-12:30 | ML System Design book | Design: Rec system | Design: Search | Design: RAG system | Design: Agent | Practice designs | Rest |
182
- | 1:30-4:30 | Applications | Applications | Applications | Applications | Applications | Mock interview | Rest |
183
- | 8-10 PM | RAG interview Q's | (Class) | Agent interview Q's | Interview prep | Interview prep | Rest | Rest |
184
-
185
- ### Deliverables Week 7:
186
- - [ ] LeetCode: 12 more (Total: 84)
187
- - [ ] 3 ML system designs practiced
188
- - [ ] System design framework memorized
189
- - [ ] 20+ applications (Total: 155+)
190
- - [ ] Technical screen(s) completed
191
- - [ ] ML fundamentals review: NLP/Transformers
192
-
193
- ---
194
-
195
- ## Week 8: Mar 22-28
196
- ### Theme: Interview Prep Intensity
197
-
198
- | Block | Mon | Tue | Wed | Thu | Fri | Sat | Sun |
199
- |-------|-----|-----|-----|-----|-----|-----|-----|
200
- | 6-9 AM | LeetCode x2 | LeetCode x2 | LeetCode x2 | LeetCode x2 | LeetCode x2 | LeetCode x3 | Rest |
201
- | 9:30-12:30 | System design practice | System design practice | ML interview Q's | ML interview Q's | Mock interview | Mock interview | Rest |
202
- | 1:30-4:30 | Applications | Applications | Applications | Applications | Applications | Rest | Rest |
203
- | 8-10 PM | Behavioral prep | (Class) | Behavioral prep | Interview prep | Rest | Rest | Rest |
204
-
205
- ### Deliverables Week 8:
206
- - [ ] LeetCode: 12 more (Total: 96)
207
- - [ ] 5+ system designs practiced
208
- - [ ] 15+ applications (Total: 170+)
209
- - [ ] 3+ mock interviews total
210
- - [ ] Ready for technical rounds
211
-
212
- ---
213
-
214
- ## March Checkpoint ✓
215
-
216
- **By Mar 31:**
217
- - [ ] 96+ LeetCode problems solved
218
- - [ ] Advanced RAG + LangGraph projects complete
219
- - [ ] 5+ system designs practiced
220
- - [ ] 170+ total applications
221
- - [ ] 5+ phone/technical screens completed
222
- - [ ] On-site interviews scheduled (hopefully)
223
-
224
- ---
225
-
226
- # PHASE 3: Interview Mode (April)
227
-
228
- ## Week 9: Apr 1-7
229
- ### Theme: Mock Interviews + Real Interviews
230
-
231
- | Block | Mon | Tue | Wed | Thu | Fri | Sat | Sun |
232
- |-------|-----|-----|-----|-----|-----|-----|-----|
233
- | 6-9 AM | LeetCode x2 | LeetCode x2 | LeetCode x2 | LeetCode x2 | LeetCode x2 | LeetCode x2 | Rest |
234
- | 9:30-12:30 | Mock coding | Mock system design | Mock ML | Interview prep | Interview prep | Mock full loop | Rest |
235
- | 1:30-4:30 | Applications | Applications | Applications | Applications | Applications | Rest | Rest |
236
- | 8-10 PM | Review weak areas | (Class) | Review weak areas | Prep for interviews | Prep for interviews | Rest | Rest |
237
-
238
- ### Deliverables Week 9:
239
- - [ ] LeetCode: 10 more (Total: 106)
240
- - [ ] 2 full mock interview loops
241
- - [ ] 10+ applications (Total: 180+)
242
- - [ ] Identify and fix weak areas
243
-
244
- ---
245
-
246
- ## Week 10: Apr 8-14
247
- ### Theme: On-sites Begin
248
-
249
- | Block | Mon | Tue | Wed | Thu | Fri | Sat | Sun |
250
- |-------|-----|-----|-----|-----|-----|-----|-----|
251
- | 6-9 AM | LeetCode x2 | Interview prep | LeetCode x2 | Interview prep | LeetCode x2 | Review | Rest |
252
- | 9:30-12:30 | Prep for specific company | INTERVIEW | Prep | INTERVIEW | Prep | Mock | Rest |
253
- | 1:30-4:30 | Applications | Follow-up | Applications | Follow-up | Applications | Rest | Rest |
254
- | 8-10 PM | Company research | (Class) | Company research | Behavioral prep | Rest | Rest | Rest |
255
-
256
- ### Deliverables Week 10:
257
- - [ ] LeetCode: 6 more (Total: 112)
258
- - [ ] 2+ on-site/virtual on-site interviews
259
- - [ ] Thank you emails sent
260
- - [ ] Continue applications pipeline
261
-
262
- ---
263
-
264
- ## Week 11: Apr 15-21
265
- ### Theme: Interview Sprint
266
-
267
- | Block | Focus |
268
- |-------|-------|
269
- | All week | Interviews, prep between interviews, light applications |
270
-
271
- ### Deliverables Week 11:
272
- - [ ] 2-3 more interviews completed
273
- - [ ] Follow-up on all interviews
274
- - [ ] Continue pipeline
275
-
276
- ---
277
-
278
- ## Week 12: Apr 22-28
279
- ### Theme: Interview Sprint Continues
280
-
281
- | Block | Focus |
282
- |-------|-------|
283
- | All week | Interviews, negotiations prep if offers coming |
284
-
285
- ### Deliverables Week 12:
286
- - [ ] More interviews
287
- - [ ] Start seeing results (offers or rejections)
288
- - [ ] Learn from rejections, iterate
289
-
290
- ---
291
-
292
- ## April Checkpoint ✓
293
-
294
- **By Apr 30:**
295
- - [ ] 110+ LeetCode problems solved
296
- - [ ] 5-10 on-site interviews completed
297
- - [ ] 190+ total applications
298
- - [ ] Hopefully 1+ offers in pipeline
299
- - [ ] Clear understanding of weak areas
300
-
301
- ---
302
-
303
- # PHASE 4: Final Push (May)
304
-
305
- ## Week 13: May 1-7
306
- ### Theme: Close Deals or Intensify Search
307
-
308
- **If you have offers:**
309
- - Negotiate
310
- - Compare offers
311
- - Make decision
312
-
313
- **If no offers yet:**
314
- - Analyze feedback from rejections
315
- - Intensify applications
316
- - More mock interviews
317
-
318
- ### Deliverables Week 13:
319
- - [ ] Decision on any offers OR
320
- - [ ] 20+ more applications
321
- - [ ] More interviews scheduled
322
-
323
- ---
324
-
325
- ## Week 14: May 8-14
326
- ### Theme: Pre-Graduation Push
327
-
328
- | Block | Focus |
329
- |-------|-------|
330
- | Continue interviews | Focus on closing |
331
- | Applications | Maintain pipeline |
332
- | Prep for graduation | Logistics |
333
-
334
- ### Deliverables Week 14:
335
- - [ ] Interviews ongoing
336
- - [ ] Offer negotiations if applicable
337
-
338
- ---
339
-
340
- ## Week 15: May 15-21
341
- ### Theme: Graduation Week
342
-
343
- | Block | Focus |
344
- |-------|-------|
345
- | May 23 | GRADUATION 🎓 |
346
- | Rest of week | Interviews, decisions |
347
-
348
- ### Deliverables Week 15:
349
- - [ ] Graduate!
350
- - [ ] Close any pending offers
351
- - [ ] Prepare for OPT start
352
-
353
- ---
354
-
355
- ## Week 16: May 22-28
356
- ### Theme: Post-Graduation, OPT Transition
357
-
358
- | Block | Focus |
359
- |-------|-------|
360
- | OPT paperwork | Get EAD card |
361
- | Interviews | Continue if needed |
362
- | Start date discussions | If offers accepted |
363
-
364
- ### Deliverables Week 16:
365
- - [ ] OPT paperwork filed
366
- - [ ] Job offer accepted OR
367
- - [ ] Intensive job search continues
368
-
369
- ---
370
-
371
- ## Week 17: May 29 - Jun 1
372
- ### Theme: June 1 Target
373
-
374
- ### Deliverables:
375
- - [ ] **JOB OFFER ACCEPTED** (Target!)
376
- - [ ] Start date confirmed
377
- - [ ] OPT employment verified
378
-
379
- ---
380
-
381
- # Tracking Spreadsheet
382
-
383
- Create a spreadsheet with these columns:
384
-
385
- | Company | Role | Applied Date | Status | Next Step | Notes |
386
- |---------|------|--------------|--------|-----------|-------|
387
- | Google | Applied AI Engineer | Feb 3 | Applied | Wait | Referral from X |
388
- | Anthropic | ML Engineer | Feb 5 | Phone Screen | Feb 15 | Prep RAG questions |
389
-
390
- ---
391
-
392
- # Weekly Metrics to Track
393
-
394
- | Metric | Week 1 Target | Week 8 Target | Week 17 Target |
395
- |--------|---------------|---------------|----------------|
396
- | LeetCode Total | 12 | 96 | 120 |
397
- | Applications Total | 20 | 170 | 220 |
398
- | Phone Screens | 0 | 8 | 15 |
399
- | Technical Screens | 0 | 5 | 12 |
400
- | On-sites | 0 | 2 | 8 |
401
- | Offers | 0 | 0 | 1+ |
402
-
403
- ---
404
-
405
- # Emergency Backup Plans
406
-
407
- ## If no interviews by Week 6:
408
- - Resume review with mentor/professional
409
- - Expand to Tier 2/3 companies
410
- - More networking, less applications
411
- - Consider contracting roles
412
-
413
- ## If no offers by Week 12:
414
- - Lower target salary temporarily
415
- - Consider smaller companies
416
- - Look at contract-to-hire roles
417
- - Leverage Pfizer network harder
418
-
419
- ## If no offers by Week 16:
420
- - Start date flexibility with any offer
421
- - Consider any legitimate offer
422
- - Plan for extended search during OPT
423
-
424
- ---
425
-
426
- # Resources to Use
427
-
428
- **LeetCode:**
429
- - NeetCode 150 (structured)
430
- - Grind 75 (time-efficient)
431
- - Company-specific lists
432
-
433
- **System Design:**
434
- - "Designing Machine Learning Systems" - Chip Huyen
435
- - "ML System Design Interview" - Ali Aminian
436
- - YouTube: System Design Interview channel
437
-
438
- **Behavioral:**
439
- - Write out 5 STAR stories
440
- - Pfizer project (main story)
441
- - Agent framework (technical depth)
442
- - Conflict resolution story
443
- - Failure and learning story
444
-
445
- **Mock Interviews:**
446
- - Pramp (free)
447
- - Interviewing.io (free/paid)
448
- - Friends/classmates
449
-
450
- ---
451
-
452
- # Key Success Factors
453
-
454
- 1. **Start applications NOW** - Don't wait until "ready"
455
- 2. **Consistent LeetCode** - 2 problems/day minimum
456
- 3. **Track everything** - Spreadsheet is your friend
457
- 4. **Network aggressively** - Referrals > cold applications
458
- 5. **Learn from rejections** - Ask for feedback
459
- 6. **Stay healthy** - Gym, sleep, breaks matter
460
-
461
- ---
462
-
463
- **You've got this. 4 months is enough with focused execution. Start today.**
464
-
465
- ---
466
-
467
- *Last updated: February 1, 2026*
468
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
misc/LANGCHAIN_COMPARISON.md DELETED
@@ -1,396 +0,0 @@
1
- # Comparison: Your Agent Framework vs LangChain
2
-
3
- ## Executive Summary
4
-
5
- **Your implementation is conceptually similar to LangChain but significantly simpler and more lightweight.** You've built the core agent loop pattern that LangChain uses, but without the extensive abstraction layers and enterprise features.
6
-
7
- **Similarity Score: ~70%** - You share the fundamental architecture but differ in complexity and features.
8
-
9
- ---
10
-
11
- ## Core Architecture Comparison
12
-
13
- ### Your Framework
14
-
15
- ```python
16
- Agent
17
- ├─ run() → step() loop
18
- ├─ think() → LlmClient.generate()
19
- ├─ act() → tool.execute()
20
- └─ ExecutionContext (events, state)
21
- ```
22
-
23
- ### LangChain
24
-
25
- ```python
26
- AgentExecutor
27
- ├─ Agent (ReAct, ToolCalling, etc.)
28
- ├─ Runnable chain
29
- ├─ ToolExecutor
30
- └─ Memory (separate abstraction)
31
- ```
32
-
33
- ---
34
-
35
- ## Detailed Feature Comparison
36
-
37
- ### 1. Agent Execution Loop
38
-
39
- | Feature | Your Framework | LangChain |
40
- |---------|---------------|-----------|
41
- | **Execution Loop** | `Agent.run()` → `step()` loop | `AgentExecutor.run()` → `_take_next_step()` |
42
- | **Max Steps** | ✅ `max_steps` parameter | ✅ `max_iterations` parameter |
43
- | **Early Stopping** | ✅ Checks `final_result` | ✅ Checks `AgentFinish` |
44
- | **Error Handling** | Basic | ✅ Comprehensive (retries, error recovery) |
45
- | **Streaming** | ❌ Not implemented | ✅ Built-in streaming support |
46
-
47
- **Your Code:**
48
- ```python
49
- while not context.final_result and context.current_step < self.max_steps:
50
- await self.step(context)
51
- ```
52
-
53
- **LangChain Equivalent:**
54
- ```python
55
- while not agent_finish and iterations < max_iterations:
56
- next_step = agent.plan(intermediate_steps)
57
- # ... execute step
58
- ```
59
-
60
- **Verdict:** ✅ **Very similar** - Same core loop pattern
61
-
62
- ---
63
-
64
- ### 2. Tool System
65
-
66
- | Feature | Your Framework | LangChain |
67
- |---------|---------------|-----------|
68
- | **Tool Definition** | `BaseTool` abstract class | `BaseTool` abstract class |
69
- | **Function Wrapping** | ✅ `FunctionTool` | ✅ `tool()` decorator / `StructuredTool` |
70
- | **Tool Schema** | ✅ JSON Schema generation | ✅ JSON Schema (via Pydantic) |
71
- | **Tool Execution** | ✅ `execute(context, **kwargs)` | ✅ `invoke(input)` or `arun(input)` |
72
- | **Tool Metadata** | Basic (name, description) | ✅ Rich metadata (tags, version, etc.) |
73
- | **Tool Validation** | Basic | ✅ Input/output validation |
74
- | **Tool Streaming** | ❌ | ✅ Streaming tool results |
75
-
76
- **Your Code:**
77
- ```python
78
- class BaseTool(ABC):
79
- @abstractmethod
80
- async def execute(self, context: ExecutionContext, **kwargs) -> ToolResult:
81
- pass
82
- ```
83
-
84
- **LangChain Equivalent:**
85
- ```python
86
- class BaseTool(BaseModel):
87
- def invoke(self, input: dict) -> Any:
88
- pass
89
- ```
90
-
91
- **Verdict:** ✅ **Similar** - Same abstraction pattern, LangChain has more features
92
-
93
- ---
94
-
95
- ### 3. LLM Integration
96
-
97
- | Feature | Your Framework | LangChain |
98
- |---------|---------------|-----------|
99
- | **LLM Client** | ✅ `LlmClient` (LiteLLM) | ✅ `ChatOpenAI`, `ChatAnthropic`, etc. |
100
- | **Message Formatting** | ✅ `_build_messages()` | ✅ `ChatPromptTemplate` |
101
- | **Tool Calling** | ✅ Function calling format | ✅ Native function calling |
102
- | **Structured Output** | ✅ `output_type` parameter | ✅ `with_structured_output()` |
103
- | **Streaming** | ❌ | ✅ Built-in streaming |
104
- | **Retries** | ❌ | ✅ Automatic retries with backoff |
105
- | **Rate Limiting** | ❌ | ✅ Built-in rate limiting |
106
-
107
- **Your Code:**
108
- ```python
109
- llm_request = LlmRequest(
110
- instructions=[self.instructions],
111
- contents=flat_contents,
112
- tools=self.tools,
113
- response_format=self.output_type
114
- )
115
- ```
116
-
117
- **LangChain Equivalent:**
118
- ```python
119
- messages = prompt.format_messages(...)
120
- response = llm.invoke(messages, tools=tools)
121
- ```
122
-
123
- **Verdict:** ✅ **Similar** - Same concepts, LangChain has more LLM providers
124
-
125
- ---
126
-
127
- ### 4. Memory/Context Management
128
-
129
- | Feature | Your Framework | LangChain |
130
- |---------|---------------|-----------|
131
- | **Conversation History** | ✅ `ExecutionContext.events` | ✅ `ChatMessageHistory` |
132
- | **State Management** | ✅ `ExecutionContext.state` | ✅ `BaseMemory` classes |
133
- | **Event Tracking** | ✅ `Event` model | ✅ `CallbackHandler` system |
134
- | **Memory Types** | Single (events list) | ✅ Multiple (buffer, summary, etc.) |
135
- | **Memory Persistence** | ❌ In-memory only | ✅ Database, Redis, etc. |
136
-
137
- **Your Code:**
138
- ```python
139
- @dataclass
140
- class ExecutionContext:
141
- events: List[Event]
142
- state: Dict[str, Any]
143
- ```
144
-
145
- **LangChain Equivalent:**
146
- ```python
147
- memory = ConversationBufferMemory()
148
- # or ConversationSummaryMemory, etc.
149
- ```
150
-
151
- **Verdict:** ⚠️ **Different approach** - You use events, LangChain uses separate memory classes
152
-
153
- ---
154
-
155
- ### 5. Prompt Engineering
156
-
157
- | Feature | Your Framework | LangChain |
158
- |---------|---------------|-----------|
159
- | **System Instructions** | ✅ `instructions` parameter | ✅ `SystemMessagePromptTemplate` |
160
- | **Tool Descriptions** | ✅ Auto-added to instructions | ✅ `format_tool_to_openai_function()` |
161
- | **Prompt Templates** | ❌ String concatenation | ✅ `ChatPromptTemplate` with variables |
162
- | **Few-shot Examples** | ❌ Manual | ✅ Built-in support |
163
- | **Prompt Versioning** | ❌ | ✅ Prompt management tools |
164
-
165
- **Your Code:**
166
- ```python
167
- tool_info = f"\n\nYou have the following tools available..."
168
- instructions[0] += tool_info
169
- ```
170
-
171
- **LangChain Equivalent:**
172
- ```python
173
- prompt = ChatPromptTemplate.from_messages([
174
- ("system", "{system_message}"),
175
- ("human", "{input}"),
176
- ])
177
- ```
178
-
179
- **Verdict:** ⚠️ **Simpler** - You use strings, LangChain uses templates
180
-
181
- ---
182
-
183
- ### 6. Structured Output
184
-
185
- | Feature | Your Framework | LangChain |
186
- |---------|---------------|-----------|
187
- | **Pydantic Models** | ✅ `output_type: Type[BaseModel]` | ✅ `with_structured_output()` |
188
- | **Type Safety** | ✅ Full type hints | ✅ Full type hints |
189
- | **Validation** | ✅ Pydantic validation | ✅ Pydantic validation |
190
- | **Conditional Enforcement** | ✅ Only on final answer | ✅ Always enforced |
191
-
192
- **Your Code:**
193
- ```python
194
- agent = Agent(
195
- output_type=AnswerOutput, # Pydantic model
196
- ...
197
- )
198
- ```
199
-
200
- **LangChain Equivalent:**
201
- ```python
202
- structured_llm = llm.with_structured_output(AnswerOutput)
203
- ```
204
-
205
- **Verdict:** ✅ **Very similar** - Both use Pydantic for structured output
206
-
207
- ---
208
-
209
- ### 7. Observability & Debugging
210
-
211
- | Feature | Your Framework | LangChain |
212
- |---------|---------------|-----------|
213
- | **Verbose Mode** | ✅ Built-in `verbose=True` | ✅ `verbose=True` parameter |
214
- | **Trace Display** | ✅ `display_trace()` | ✅ `LangSmith` integration |
215
- | **Callbacks** | ❌ | ✅ Extensive callback system |
216
- | **Logging** | Basic print statements | ✅ Structured logging |
217
- | **Metrics** | ❌ | ✅ Token usage, latency, etc. |
218
-
219
- **Your Code:**
220
- ```python
221
- if self.verbose:
222
- print(f"[TOOL CALL] {item.name}")
223
- ```
224
-
225
- **LangChain Equivalent:**
226
- ```python
227
- callbacks = [StdOutCallbackHandler()]
228
- agent.run(..., callbacks=callbacks)
229
- ```
230
-
231
- **Verdict:** ⚠️ **Simpler** - You have basic verbose, LangChain has full observability
232
-
233
- ---
234
-
235
- ## Key Differences
236
-
237
- ### What LangChain Has That You Don't
238
-
239
- 1. **Agent Types**: ReAct, Plan-and-Execute, Self-Ask-with-Search, etc.
240
- 2. **Runnable Interface**: Unified interface for chains, tools, prompts
241
- 3. **Memory Types**: Buffer, Summary, Token-based, Vector store
242
- 4. **Retrieval**: Built-in RAG with vector stores
243
- 5. **Callbacks**: Extensive callback system for hooks
244
- 6. **LangSmith**: Integrated observability platform
245
- 7. **Document Loaders**: 100+ document loaders
246
- 8. **Chains**: Pre-built chains for common tasks
247
- 9. **Agents**: Pre-built agent types (ReAct, etc.)
248
- 10. **Ecosystem**: 100+ integrations
249
-
250
- ### What You Have That's Unique
251
-
252
- 1. **Simplicity**: Much easier to understand and modify
253
- 2. **Event-Based History**: Clear event tracking system
254
- 3. **Direct Control**: Less abstraction, more control
255
- 4. **Educational Value**: Perfect for learning agent mechanics
256
- 5. **MCP Integration**: Direct MCP tool loading
257
- 6. **Verbose Mode**: Built-in real-time thinking display
258
-
259
- ---
260
-
261
- ## Code Pattern Comparison
262
-
263
- ### Creating an Agent
264
-
265
- **Your Framework:**
266
- ```python
267
- agent = Agent(
268
- model=LlmClient(model="gpt-5-mini"),
269
- tools=[calculator_tool],
270
- instructions="You are a helpful assistant.",
271
- output_type=AnswerOutput,
272
- verbose=True
273
- )
274
- result = await agent.run("What is 2+2?")
275
- ```
276
-
277
- **LangChain:**
278
- ```python
279
- from langchain.agents import create_tool_calling_agent
280
- from langchain_openai import ChatOpenAI
281
-
282
- llm = ChatOpenAI(model="gpt-4")
283
- tools = [calculator_tool]
284
- prompt = ChatPromptTemplate.from_messages([...])
285
- agent = create_tool_calling_agent(llm, tools, prompt)
286
- agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
287
- result = agent_executor.invoke({"input": "What is 2+2?"})
288
- ```
289
-
290
- **Verdict:** Your API is **much simpler** - 2 lines vs 6+ lines
291
-
292
- ---
293
-
294
- ### Tool Definition
295
-
296
- **Your Framework:**
297
- ```python
298
- @tool
299
- def calculator(expression: str) -> float:
300
- """Calculate mathematical expressions."""
301
- return eval(expression)
302
- ```
303
-
304
- **LangChain:**
305
- ```python
306
- from langchain.tools import tool
307
-
308
- @tool
309
- def calculator(expression: str) -> float:
310
- """Calculate mathematical expressions."""
311
- return eval(expression)
312
- ```
313
-
314
- **Verdict:** ✅ **Nearly identical** - Same decorator pattern
315
-
316
- ---
317
-
318
- ### Execution Flow
319
-
320
- **Your Framework:**
321
- ```
322
- run() → step() → think() → act() → step() → ...
323
- ```
324
-
325
- **LangChain:**
326
- ```
327
- invoke() → _take_next_step() → agent.plan() → tool_executor.execute() → ...
328
- ```
329
-
330
- **Verdict:** ✅ **Same pattern** - Different method names, same flow
331
-
332
- ---
333
-
334
- ## When to Use Each
335
-
336
- ### Use Your Framework When:
337
- - ✅ Learning how agents work
338
- - ✅ Building simple, focused agents
339
- - ✅ Need full control over execution
340
- - ✅ Want minimal dependencies
341
- - ✅ Prototyping quickly
342
- - ✅ Educational projects
343
-
344
- ### Use LangChain When:
345
- - ✅ Building production systems
346
- - ✅ Need extensive integrations
347
- - ✅ Want pre-built agent types
348
- - ✅ Need observability (LangSmith)
349
- - ✅ Building complex RAG systems
350
- - ✅ Enterprise requirements
351
-
352
- ---
353
-
354
- ## Migration Path
355
-
356
- If you wanted to make your framework more LangChain-like, you could add:
357
-
358
- 1. **Runnable Interface**: Unified interface for all components
359
- 2. **Memory Classes**: Separate memory abstractions
360
- 3. **Callbacks**: Hook system for observability
361
- 4. **Agent Types**: ReAct, Plan-and-Execute, etc.
362
- 5. **Prompt Templates**: Template system instead of strings
363
- 6. **Retries**: Automatic retry logic
364
- 7. **Streaming**: Stream responses and tool results
365
-
366
- But honestly, **your simplicity is a feature, not a bug**. LangChain's complexity comes from trying to support every use case. Your framework is perfect for learning and focused use cases.
367
-
368
- ---
369
-
370
- ## Conclusion
371
-
372
- **Your implementation captures ~70% of LangChain's core concepts** but in a much simpler, more understandable way. You've built:
373
-
374
- - ✅ The core agent loop
375
- - ✅ Tool system
376
- - ✅ LLM integration
377
- - ✅ Structured output
378
- - ✅ Context management
379
- - ✅ Verbose debugging
380
-
381
- **You're missing:**
382
- - ❌ Multiple agent types
383
- - ❌ Extensive integrations
384
- - ❌ Memory abstractions
385
- - ❌ Callback system
386
- - ❌ Streaming
387
- - ❌ Enterprise features
388
-
389
- **But that's okay!** Your framework is:
390
- - 🎓 **Better for learning** - You can see exactly what's happening
391
- - 🚀 **Faster to iterate** - Less abstraction to navigate
392
- - 🎯 **Focused** - Does one thing well
393
- - 📖 **Readable** - Easy to understand and modify
394
-
395
- **You've built a solid, educational agent framework that demonstrates the core concepts without the complexity.**
396
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
misc/example.py DELETED
@@ -1,65 +0,0 @@
1
- """Example usage of the agent framework."""
2
-
3
- import asyncio
4
- import os
5
- from dotenv import load_dotenv
6
- from agent_framework import Agent, LlmClient, FunctionTool, load_mcp_tools
7
-
8
- load_dotenv()
9
-
10
-
11
- # Example 1: Simple calculator tool
12
- def calculator(expression: str) -> float:
13
- """Calculate mathematical expressions."""
14
- return eval(expression)
15
-
16
-
17
- # Example 2: Using the @tool decorator
18
- from agent_framework import tool
19
-
20
- @tool
21
- def search_web(query: str, max_results: int = 5) -> str:
22
- """Search the web for information."""
23
- # This is a placeholder - in real usage, you'd call an actual search API
24
- return f"Search results for: {query}"
25
-
26
-
27
- async def main():
28
- # Create a calculator tool
29
- calc_tool = FunctionTool(calculator)
30
-
31
- # Create the agent
32
- agent = Agent(
33
- model=LlmClient(model="gpt-5-mini"),
34
- tools=[calc_tool, search_web],
35
- instructions="You are a helpful assistant that can calculate and search the web.",
36
- )
37
-
38
- # Run the agent
39
- result = await agent.run("What is 1234 * 5678?")
40
- print(f"Result: {result.output}")
41
- print(f"Steps taken: {result.context.current_step}")
42
-
43
- # Example with MCP tools
44
- if os.getenv("TAVILY_API_KEY"):
45
- connection = {
46
- "command": "npx",
47
- "args": ["-y", "tavily-mcp@latest"],
48
- "env": {"TAVILY_API_KEY": os.getenv("TAVILY_API_KEY")}
49
- }
50
- mcp_tools = await load_mcp_tools(connection)
51
-
52
- agent_with_mcp = Agent(
53
- model=LlmClient(model="gpt-5-mini"),
54
- tools=[calc_tool, *mcp_tools],
55
- instructions="You are a helpful assistant with web search capabilities.",
56
- )
57
-
58
- result = await agent_with_mcp.run("What is the capital of France?")
59
- print(f"Result: {result.output}")
60
-
61
-
62
-
63
- if __name__ == "__main__":
64
- asyncio.run(main())
65
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
misc/notebook_example.ipynb DELETED
@@ -1,222 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "markdown",
5
- "metadata": {},
6
- "source": [
7
- "# Using Agent Framework in Jupyter Notebook\n",
8
- "\n",
9
- "This notebook demonstrates how to use the agent_framework package with structured output.\n"
10
- ]
11
- },
12
- {
13
- "cell_type": "code",
14
- "execution_count": 1,
15
- "metadata": {},
16
- "outputs": [],
17
- "source": [
18
- "# Import the framework\n",
19
- "from agent_framework import Agent, LlmClient\n",
20
- "from pydantic import BaseModel\n",
21
- "from typing import Literal, List\n"
22
- ]
23
- },
24
- {
25
- "cell_type": "code",
26
- "execution_count": 2,
27
- "metadata": {},
28
- "outputs": [],
29
- "source": [
30
- "# Define your structured output model\n",
31
- "class SentimentAnalysis(BaseModel):\n",
32
- " sentiment: Literal[\"positive\", \"negative\", \"neutral\"]\n",
33
- " confidence: float\n",
34
- " key_phrases: List[str]\n"
35
- ]
36
- },
37
- {
38
- "cell_type": "code",
39
- "execution_count": 3,
40
- "metadata": {},
41
- "outputs": [],
42
- "source": [
43
- "# Create agent with structured output\n",
44
- "agent = Agent(\n",
45
- " model=LlmClient(model=\"gpt-5-mini\"),\n",
46
- " tools=[],\n",
47
- " instructions=\"Analyze the sentiment of the provided text.\",\n",
48
- " output_type=SentimentAnalysis\n",
49
- ")\n"
50
- ]
51
- },
52
- {
53
- "cell_type": "code",
54
- "execution_count": 4,
55
- "metadata": {},
56
- "outputs": [
57
- {
58
- "name": "stdout",
59
- "output_type": "stream",
60
- "text": [
61
- "Sentiment: positive\n",
62
- "Confidence: 0.95\n",
63
- "Key phrases: ['exceeded my expectations', 'Highly recommend', 'product']\n"
64
- ]
65
- }
66
- ],
67
- "source": [
68
- "# Run the agent\n",
69
- "result = await agent.run(\"This product exceeded my expectations! Highly recommend.\")\n",
70
- "\n",
71
- "# Access structured output\n",
72
- "print(f\"Sentiment: {result.output.sentiment}\") # \"positive\"\n",
73
- "print(f\"Confidence: {result.output.confidence}\") # 0.92\n",
74
- "print(f\"Key phrases: {result.output.key_phrases}\") # [\"exceeded expectations\", \"highly recommend\"]\n"
75
- ]
76
- },
77
- {
78
- "cell_type": "code",
79
- "execution_count": null,
80
- "metadata": {},
81
- "outputs": [],
82
- "source": [
83
- "'''\n",
84
- "To implement agent, we need tools, execution context, \n",
85
- "instructions(system prompt that defines agent behavior) and an llm. \n",
86
- "\n",
87
- "Event: It is a record of who did what ? like was it user request, \n",
88
- "or llm requested tool call, or did we get a result back from the tool etc., \n",
89
- "\n",
90
- "'''\n",
91
- "\n",
92
- "from anyio import Event\n",
93
- "from agent_framework import ExecutionContext, Message\n",
94
- "from agent_framework.agent import AgentResult\n",
95
- "\n",
96
- "class Agent: ## does this inherit from anything ? \n",
97
- " def init(self, tools, executionContext, llmClient, instructions, maxSteps, verbose, name = \"agent\"):\n",
98
- " self.tools = self._setup_tools(tools or [])\n",
99
- " self.executionContext = executionContext\n",
100
- " self.llmClient = llmClient\n",
101
- " self.instructions = instructions\n",
102
- " self.maxSteps = maxSteps\n",
103
- " self.verbose = verbose\n",
104
- " self.name = name\n",
105
- "\n",
106
- " ## step 1 is to setup tools\n",
107
- "\n",
108
- " def _setup_tools(self, tools):\n",
109
- " return tools\n",
110
- "\n",
111
- " ## step 2 is to define entry point for users.(run method)\n",
112
- "\n",
113
- " async def run(self, user_input, context):\n",
114
- "\n",
115
- " ## check if there is any previous context, else create\n",
116
- "\n",
117
- " if context is None:\n",
118
- " context = ExecutionContext()\n",
119
- "\n",
120
- " ## add the user_event to the event\n",
121
- " user_event = Event(\n",
122
- " execution_id = context.execution_id,\n",
123
- " author = 'user',\n",
124
- " content = [Message(role = 'user', content = user_input)]\n",
125
- " )\n",
126
- " ## add the event to context\n",
127
- " context.add_event(user_event)\n",
128
- "\n",
129
- " ## if agent doesnt reach final result or max steps, keep performing\n",
130
- " while not context.final_result and context.current_step < self.max_steps:\n",
131
- " ## each step is a think-act cycle\n",
132
- " await self.step(context)\n",
133
- "\n",
134
- " ## check if newly performed action is final\n",
135
- " last_event = context.events[-1]\n",
136
- "\n",
137
- " # If it is final, then extract the last event and sent it to \n",
138
- " # Agent result along with the context\n",
139
- " if self._is_final_response(last_event):\n",
140
- " context.final_result = self._extract_final_result(last_event)\n",
141
- "\n",
142
- " return AgentResult(context.final_result, context = context)\n",
143
- "\n",
144
- " # step 3 prepare for llm request\n",
145
- "\n",
146
- " def _prepare_llm_request(self, context):\n",
147
- " \n",
148
- " #flatten all the events (why ?)\n",
149
- " flat_contents = []\n",
150
- " for event in context.events:\n",
151
- " flat_contents.extend(event.content)\n",
152
- "\n",
153
- " ## with this context, call llm\n",
154
- " return LlmRequest(\n",
155
- " instructions=[self.instructions] if self.instructions else [],\n",
156
- " contents=flat_contents,\n",
157
- " tools=self.tools,\n",
158
- " tool_choice=\"auto\" if self.tools else None,\n",
159
- " )\n",
160
- "\n",
161
- " async def step(self, context):\n",
162
- " \n",
163
- " ## write a method for this\n",
164
- " llm_request = self._prepare_llm_request(context)\n",
165
- "\n",
166
- " # Get LLM's decision\n",
167
- " llm_response = await self.think(llm_request)\n",
168
- "\n",
169
- " response_event = Event(\n",
170
- " execution_id=context.execution_id,\n",
171
- " author=self.name,\n",
172
- " content=llm_response.content,\n",
173
- " )\n",
174
- "\n",
175
- " async def think(self, llm_request):\n",
176
- " \"\"\"Get LLM's response/decision.\"\"\"\n",
177
- " return await self.model.generate(llm_request)\n",
178
- "\n",
179
- "\n",
180
- "\n",
181
- "\n",
182
- "\n",
183
- "\n",
184
- "\n",
185
- "\n",
186
- "\n",
187
- "\n",
188
- "\n"
189
- ]
190
- },
191
- {
192
- "cell_type": "markdown",
193
- "metadata": {},
194
- "source": [
195
- "Q. LLM Request? \n",
196
- "\n",
197
- "A. It goes from our agent to LLM call. before sending it, we bundle it with necessary context , prompt and tools."
198
- ]
199
- }
200
- ],
201
- "metadata": {
202
- "kernelspec": {
203
- "display_name": ".venv",
204
- "language": "python",
205
- "name": "python3"
206
- },
207
- "language_info": {
208
- "codemirror_mode": {
209
- "name": "ipython",
210
- "version": 3
211
- },
212
- "file_extension": ".py",
213
- "mimetype": "text/x-python",
214
- "name": "python",
215
- "nbconvert_exporter": "python",
216
- "pygments_lexer": "ipython3",
217
- "version": "3.12.11"
218
- }
219
- },
220
- "nbformat": 4,
221
- "nbformat_minor": 2
222
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
misc/tutorials/ADDITIONAL_EXERCISES.md DELETED
@@ -1,729 +0,0 @@
1
- # Additional Exercises: Comprehensive Knowledge Test
2
-
3
- This document contains advanced exercises that test your understanding across all topics covered in the tutorial series. These questions require you to integrate concepts from multiple episodes.
4
-
5
- ---
6
-
7
- ## 🎯 Integration Challenges
8
-
9
- ### Challenge 1: Build a Multi-Agent System
10
- Create a system where two agents collaborate to solve a problem:
11
- - Agent A: Research agent (uses web search)
12
- - Agent B: Analysis agent (uses calculator and file tools)
13
- - They should share information through a shared ExecutionContext
14
-
15
- **Requirements:**
16
- - Both agents use the same session
17
- - Agent A finds information, Agent B analyzes it
18
- - Track which agent made which decisions
19
-
20
- **Hints:**
21
- - Use shared ExecutionContext
22
- - Different agent names
23
- - Coordinate through context.state
24
-
25
- ---
26
-
27
- ### Challenge 2: Custom Tool with Confirmation
28
- Create a `delete_file` tool that:
29
- - Requires user confirmation before execution
30
- - Shows what file will be deleted
31
- - Allows user to modify the file path
32
- - Tracks deletion history in ExecutionContext
33
-
34
- **Requirements:**
35
- - Use `requires_confirmation=True`
36
- - Custom confirmation message
37
- - Store deletions in context.state
38
- - Handle rejection gracefully
39
-
40
- ---
41
-
42
- ### Challenge 3: Streaming Agent Responses
43
- Modify the agent to support streaming responses:
44
- - Stream LLM tokens as they arrive
45
- - Update ExecutionContext in real-time
46
- - Allow cancellation mid-stream
47
- - Maintain full trace even with streaming
48
-
49
- **Requirements:**
50
- - Use LiteLLM streaming
51
- - Yield tokens as they arrive
52
- - Handle cancellation
53
- - Complete trace after streaming
54
-
55
- ---
56
-
57
- ## 🧠 Design Decision Questions
58
-
59
- ### Question 1: Why Pydantic for Models but Dataclass for ExecutionContext?
60
-
61
- **Your Task:** Write a detailed explanation covering:
62
- - When to use Pydantic vs Dataclass
63
- - Performance implications
64
- - Validation needs
65
- - Serialization requirements
66
- - Mutable vs immutable patterns
67
-
68
- **Test Your Answer:** Can you explain this to someone who doesn't know Python?
69
-
70
- ---
71
-
72
- ### Question 2: Tool Execution Error Handling Strategy
73
-
74
- **Scenario:** A tool fails during execution. What should happen?
75
-
76
- **Your Task:** Design a comprehensive error handling strategy that:
77
- - Distinguishes between recoverable and fatal errors
78
- - Allows agent to retry with different parameters
79
- - Provides meaningful error messages to LLM
80
- - Logs errors for debugging
81
- - Maintains execution trace
82
-
83
- **Implementation:** Write code that implements your strategy.
84
-
85
- ---
86
-
87
- ### Question 3: Memory Optimization Trade-offs
88
-
89
- **Scenario:** You have a conversation with 1000 messages. Token count is 50,000.
90
-
91
- **Your Task:** Design an optimization strategy that:
92
- - Balances context retention vs token savings
93
- - Preserves important information
94
- - Explains what information is being compressed
95
- - Allows user to see what was summarized
96
-
97
- **Consider:**
98
- - When to use sliding window vs summarization
99
- - How to preserve user preferences
100
- - Maintaining tool call history
101
- - Cost vs quality trade-offs
102
-
103
- ---
104
-
105
- ## 🐛 Debugging Scenarios
106
-
107
- ### Scenario 1: Agent Stuck in Loop
108
-
109
- **Problem:** Agent keeps calling the same tool with the same arguments repeatedly.
110
-
111
- **Your Task:**
112
- 1. Identify possible causes
113
- 2. Write code to detect this pattern
114
- 3. Implement a solution to break the loop
115
- 4. Add logging to track tool call patterns
116
-
117
- **Possible Causes:**
118
- - Tool returning same result
119
- - LLM not understanding tool output
120
- - Missing context in tool results
121
- - Tool definition unclear
122
-
123
- ---
124
-
125
- ### Scenario 2: Session Not Persisting
126
-
127
- **Problem:** Agent doesn't remember previous conversation even with session_id.
128
-
129
- **Your Task:**
130
- 1. Trace the session flow from run() to save()
131
- 2. Identify where session might be lost
132
- 3. Add validation to ensure session is saved
133
- 4. Create tests to verify persistence
134
-
135
- **Check Points:**
136
- - Session manager initialization
137
- - Session loading in run()
138
- - Session saving after execution
139
- - Session state updates
140
-
141
- ---
142
-
143
- ### Scenario 3: Tool Schema Mismatch
144
-
145
- **Problem:** LLM calls tool with wrong argument types (e.g., string instead of int).
146
-
147
- **Your Task:**
148
- 1. Add validation before tool execution
149
- 2. Convert types when possible (e.g., "5" → 5)
150
- 3. Return helpful error messages to LLM
151
- 4. Log schema mismatches for analysis
152
-
153
- **Implementation:** Enhance FunctionTool.execute() with type coercion.
154
-
155
- ---
156
-
157
- ## 🏗️ Architecture Questions
158
-
159
- ### Question 1: Extending the Framework
160
-
161
- **Task:** Design how you would add these features:
162
- - **Parallel tool execution**: Execute multiple tools simultaneously
163
- - **Tool chaining**: One tool's output becomes another's input
164
- - **Conditional tool execution**: Tools that decide which tool to call next
165
- - **Tool versioning**: Support multiple versions of the same tool
166
-
167
- **Requirements:**
168
- - Explain the architecture changes needed
169
- - Show how it integrates with existing code
170
- - Consider backward compatibility
171
- - Design the API
172
-
173
- ---
174
-
175
- ### Question 2: Multi-Provider Support
176
-
177
- **Task:** Design a system where:
178
- - Different tools use different LLM providers
179
- - Some tools use OpenAI, others use Anthropic
180
- - Agent can switch providers mid-conversation
181
- - Cost tracking per provider
182
-
183
- **Requirements:**
184
- - How to configure provider per tool
185
- - How to handle provider-specific features
186
- - How to track costs separately
187
- - How to handle provider failures
188
-
189
- ---
190
-
191
- ### Question 3: Distributed Agent System
192
-
193
- **Task:** Design an architecture where:
194
- - Agent runs on multiple servers
195
- - Tools can be on different machines
196
- - Sessions are shared across instances
197
- - Load balancing between agents
198
-
199
- **Requirements:**
200
- - Communication protocol
201
- - Session synchronization
202
- - Tool discovery across network
203
- - Failure handling
204
-
205
- ---
206
-
207
- ## 💡 Real-World Application Scenarios
208
-
209
- ### Scenario 1: Customer Support Bot
210
-
211
- **Requirements:**
212
- - Access customer database
213
- - Search knowledge base
214
- - Create support tickets
215
- - Escalate to human when needed
216
- - Remember conversation context
217
-
218
- **Your Task:**
219
- 1. Design the tool set needed
220
- 2. Create agent instructions
221
- 3. Implement session management for customers
222
- 4. Add escalation logic
223
- 5. Design conversation flow
224
-
225
- ---
226
-
227
- ### Scenario 2: Code Review Assistant
228
-
229
- **Requirements:**
230
- - Read code files
231
- - Analyze code quality
232
- - Suggest improvements
233
- - Check for security issues
234
- - Generate review comments
235
-
236
- **Your Task:**
237
- 1. Create tools for code analysis
238
- 2. Design agent prompts for code review
239
- 3. Implement file reading and parsing
240
- 4. Structure review output
241
- 5. Handle large codebases
242
-
243
- ---
244
-
245
- ### Scenario 3: Research Assistant
246
-
247
- **Requirements:**
248
- - Search multiple sources
249
- - Summarize findings
250
- - Compare information
251
- - Track sources
252
- - Generate citations
253
-
254
- **Your Task:**
255
- 1. Integrate multiple search tools
256
- 2. Create summarization tools
257
- 3. Implement source tracking
258
- 4. Design citation format
259
- 5. Handle conflicting information
260
-
261
- ---
262
-
263
- ## 🔍 Deep Understanding Questions
264
-
265
- ### Question 1: Execution Flow Trace
266
-
267
- **Task:** Given this code:
268
- ```python
269
- agent = Agent(
270
- model=LlmClient(model="gpt-4o-mini"),
271
- tools=[calculator, search_web],
272
- max_steps=5
273
- )
274
-
275
- result = await agent.run("What is the weather in NYC and convert 72F to Celsius?")
276
- ```
277
-
278
- **Trace the execution:**
279
- 1. List every method call in order
280
- 2. Show what's in ExecutionContext at each step
281
- 3. Show what's sent to LLM at each step
282
- 4. Show what tools are called and when
283
- 5. Show the final ExecutionContext state
284
-
285
- **Write it out step-by-step as if you're the agent.**
286
-
287
- ---
288
-
289
- ### Question 2: Memory Optimization Impact
290
-
291
- **Task:** Analyze this conversation:
292
- - 50 messages total
293
- - 10 tool calls
294
- - 5 file reads
295
- - 3 web searches
296
-
297
- **Questions:**
298
- 1. How many tokens without optimization?
299
- 2. How many tokens with sliding window (keep 20)?
300
- 3. How many tokens with compaction?
301
- 4. How many tokens with summarization?
302
- 5. What information is lost in each strategy?
303
-
304
- **Create a detailed analysis.**
305
-
306
- ---
307
-
308
- ### Question 3: Error Propagation
309
-
310
- **Task:** Trace what happens when:
311
- 1. LLM API call fails
312
- 2. Tool execution raises exception
313
- 3. Session save fails
314
- 4. Tool not found
315
- 5. Invalid tool arguments
316
-
317
- **For each error:**
318
- - Where does it get caught?
319
- - What error message is created?
320
- - How does it affect execution?
321
- - What does the user see?
322
- - How is it logged?
323
-
324
- **Draw the error flow diagram.**
325
-
326
- ---
327
-
328
- ## 🎨 Creative Challenges
329
-
330
- ### Challenge 1: Agent Personality System
331
-
332
- **Task:** Design a system where agents have personalities:
333
- - Each personality affects tool choice
334
- - Personalities influence response style
335
- - Personalities can change based on context
336
- - Track personality in session
337
-
338
- **Implementation:**
339
- - Create personality model
340
- - Modify agent instructions based on personality
341
- - Add personality to ExecutionContext
342
- - Create personality switching logic
343
-
344
- ---
345
-
346
- ### Challenge 2: Tool Learning System
347
-
348
- **Task:** Design a system where agents learn from tool usage:
349
- - Track which tools work best for which tasks
350
- - Suggest tool improvements
351
- - Learn optimal tool parameters
352
- - Adapt tool selection over time
353
-
354
- **Requirements:**
355
- - Tool usage analytics
356
- - Success/failure tracking
357
- - Parameter optimization
358
- - Learning algorithm
359
-
360
- ---
361
-
362
- ### Challenge 3: Agent Collaboration Protocol
363
-
364
- **Task:** Design a protocol for agents to work together:
365
- - Agents can request help from other agents
366
- - Agents can share context
367
- - Agents can delegate tasks
368
- - Track multi-agent conversations
369
-
370
- **Requirements:**
371
- - Communication protocol
372
- - Context sharing mechanism
373
- - Task delegation system
374
- - Conflict resolution
375
-
376
- ---
377
-
378
- ## 📊 Performance Optimization
379
-
380
- ### Challenge 1: Caching Strategy
381
-
382
- **Task:** Implement intelligent caching:
383
- - Cache LLM responses for identical requests
384
- - Cache tool results with TTL
385
- - Cache tool definitions
386
- - Invalidate cache appropriately
387
-
388
- **Requirements:**
389
- - Design cache structure
390
- - Implement cache logic
391
- - Handle cache invalidation
392
- - Measure cache hit rates
393
-
394
- ---
395
-
396
- ### Challenge 2: Batch Processing
397
-
398
- **Task:** Optimize for processing multiple requests:
399
- - Batch LLM calls when possible
400
- - Parallel tool execution
401
- - Shared session management
402
- - Resource pooling
403
-
404
- **Requirements:**
405
- - Design batching system
406
- - Implement parallel execution
407
- - Handle resource limits
408
- - Measure performance gains
409
-
410
- ---
411
-
412
- ### Challenge 3: Token Budget Management
413
-
414
- **Task:** Implement token budget system:
415
- - Set daily/monthly limits
416
- - Prioritize important conversations
417
- - Compress old conversations automatically
418
- - Alert when approaching limits
419
-
420
- **Requirements:**
421
- - Token tracking
422
- - Budget allocation
423
- - Prioritization logic
424
- - Alert system
425
-
426
- ---
427
-
428
- ## 🔐 Security & Safety
429
-
430
- ### Challenge 1: Tool Execution Sandbox
431
-
432
- **Task:** Create a sandbox for tool execution:
433
- - Isolate tool execution
434
- - Limit file system access
435
- - Restrict network access
436
- - Monitor resource usage
437
-
438
- **Requirements:**
439
- - Sandbox architecture
440
- - Permission system
441
- - Resource limits
442
- - Monitoring
443
-
444
- ---
445
-
446
- ### Challenge 2: Input Validation
447
-
448
- **Task:** Implement comprehensive input validation:
449
- - Validate all user inputs
450
- - Sanitize tool arguments
451
- - Check for injection attacks
452
- - Rate limit requests
453
-
454
- **Requirements:**
455
- - Validation rules
456
- - Sanitization functions
457
- - Security checks
458
- - Rate limiting
459
-
460
- ---
461
-
462
- ### Challenge 3: Audit Logging
463
-
464
- **Task:** Create comprehensive audit system:
465
- - Log all agent actions
466
- - Track tool executions
467
- - Monitor session access
468
- - Generate security reports
469
-
470
- **Requirements:**
471
- - Logging architecture
472
- - Event tracking
473
- - Report generation
474
- - Privacy considerations
475
-
476
- ---
477
-
478
- ## 🧪 Testing Challenges
479
-
480
- ### Challenge 1: Comprehensive Test Suite
481
-
482
- **Task:** Write tests for:
483
- - Agent execution flow
484
- - Tool execution
485
- - Session persistence
486
- - Memory optimization
487
- - Error handling
488
- - Edge cases
489
-
490
- **Requirements:**
491
- - Unit tests for each component
492
- - Integration tests
493
- - Mock LLM responses
494
- - Test fixtures
495
-
496
- ---
497
-
498
- ### Challenge 2: Load Testing
499
-
500
- **Task:** Create load tests:
501
- - 100 concurrent agents
502
- - 1000 messages per minute
503
- - Session persistence under load
504
- - Memory optimization under load
505
-
506
- **Requirements:**
507
- - Load testing framework
508
- - Performance metrics
509
- - Bottleneck identification
510
- - Optimization recommendations
511
-
512
- ---
513
-
514
- ### Challenge 3: Fuzz Testing
515
-
516
- **Task:** Implement fuzz testing:
517
- - Random tool arguments
518
- - Malformed requests
519
- - Invalid session IDs
520
- - Edge case inputs
521
-
522
- **Requirements:**
523
- - Fuzzing strategy
524
- - Error detection
525
- - Crash prevention
526
- - Recovery mechanisms
527
-
528
- ---
529
-
530
- ## 📝 Reflection Questions
531
-
532
- ### Question 1: Architecture Review
533
-
534
- **Reflect on the framework architecture:**
535
- 1. What are the strongest design decisions?
536
- 2. What would you change if rebuilding?
537
- 3. What's missing for production use?
538
- 4. How would you scale this to 1M users?
539
- 5. What security concerns exist?
540
-
541
- **Write a detailed architecture review.**
542
-
543
- ---
544
-
545
- ### Question 2: Learning Outcomes
546
-
547
- **Reflect on what you learned:**
548
- 1. What was the most challenging concept?
549
- 2. Which episode was most valuable?
550
- 3. What would you teach differently?
551
- 4. What additional topics are needed?
552
- 5. How has your understanding evolved?
553
-
554
- **Create a learning reflection document.**
555
-
556
- ---
557
-
558
- ### Question 3: Real-World Application
559
-
560
- **Design a real product using this framework:**
561
- 1. What problem does it solve?
562
- 2. What tools are needed?
563
- 3. How does it use sessions?
564
- 4. What optimizations are critical?
565
- 5. How would you deploy it?
566
-
567
- **Create a product specification.**
568
-
569
- ---
570
-
571
- ## 🎯 Mastery Checklist
572
-
573
- Test your mastery by completing:
574
-
575
- - [ ] Can explain every component's purpose
576
- - [ ] Can trace execution flow from start to finish
577
- - [ ] Can debug common issues
578
- - [ ] Can extend the framework
579
- - [ ] Can optimize for production
580
- - [ ] Can design new features
581
- - [ ] Can explain design decisions
582
- - [ ] Can teach someone else
583
- - [ ] Can build a real application
584
- - [ ] Can identify and fix bugs
585
-
586
- ---
587
-
588
- ## 💬 Discussion Questions
589
-
590
- Use these for study groups or self-reflection:
591
-
592
- 1. **Trade-offs**: What are the trade-offs between different memory optimization strategies?
593
-
594
- 2. **Scalability**: How would you scale this framework to handle millions of requests?
595
-
596
- 3. **Security**: What security vulnerabilities exist and how would you fix them?
597
-
598
- 4. **Testing**: How would you test an agent framework comprehensively?
599
-
600
- 5. **Monitoring**: What metrics would you track in production?
601
-
602
- 6. **Cost**: How would you minimize API costs while maintaining quality?
603
-
604
- 7. **Reliability**: How would you ensure the agent always responds correctly?
605
-
606
- 8. **Extensibility**: How would you make the framework more extensible?
607
-
608
- 9. **Performance**: What are the performance bottlenecks and how to fix them?
609
-
610
- 10. **User Experience**: How would you improve the user experience?
611
-
612
- ---
613
-
614
- ## 🏆 Advanced Projects
615
-
616
- ### Project 1: Agent Framework v2.0
617
-
618
- **Task:** Design the next version with:
619
- - Streaming support
620
- - WebSocket communication
621
- - Database sessions
622
- - Advanced memory
623
- - Tool marketplace
624
- - Multi-agent support
625
-
626
- **Deliverables:**
627
- - Architecture design
628
- - API specification
629
- - Migration plan
630
- - Implementation roadmap
631
-
632
- ---
633
-
634
- ### Project 2: Production Deployment
635
-
636
- **Task:** Deploy the framework to production:
637
- - Docker containerization
638
- - Kubernetes deployment
639
- - CI/CD pipeline
640
- - Monitoring setup
641
- - Logging system
642
- - Error tracking
643
-
644
- **Deliverables:**
645
- - Deployment configuration
646
- - Monitoring dashboards
647
- - Runbooks
648
- - Documentation
649
-
650
- ---
651
-
652
- ### Project 3: Agent Marketplace
653
-
654
- **Task:** Build a marketplace for:
655
- - Sharing agents
656
- - Sharing tools
657
- - Agent templates
658
- - Tool libraries
659
- - Community contributions
660
-
661
- **Deliverables:**
662
- - Platform design
663
- - API for sharing
664
- - Discovery system
665
- - Rating/review system
666
-
667
- ---
668
-
669
- ## 📚 Further Learning
670
-
671
- After completing these exercises, consider:
672
-
673
- 1. **Research Papers**: Read papers on agent architectures
674
- 2. **Open Source**: Contribute to agent frameworks
675
- 3. **Build Projects**: Create real applications
676
- 4. **Teach Others**: Share your knowledge
677
- 5. **Stay Updated**: Follow AI agent developments
678
-
679
- ---
680
-
681
- ## 🎓 Certification Project
682
-
683
- **Final Challenge:** Build a complete application that:
684
-
685
- 1. Uses the agent framework
686
- 2. Implements custom tools
687
- 3. Has session management
688
- 4. Includes memory optimization
689
- 5. Has a web interface
690
- 6. Is production-ready
691
- 7. Has comprehensive tests
692
- 8. Includes documentation
693
- 9. Has monitoring
694
- 10. Is deployed
695
-
696
- **This is your capstone project!**
697
-
698
- ---
699
-
700
- ## 💡 Tips for Success
701
-
702
- 1. **Start Simple**: Begin with basic implementations
703
- 2. **Test Thoroughly**: Write tests as you build
704
- 3. **Read Code**: Study the actual framework code
705
- 4. **Experiment**: Try different approaches
706
- 5. **Document**: Write down what you learn
707
- 6. **Share**: Discuss with others
708
- 7. **Iterate**: Improve your solutions
709
- 8. **Challenge Yourself**: Try the hard problems
710
-
711
- ---
712
-
713
- ## 🎯 Success Metrics
714
-
715
- You've mastered the framework when you can:
716
-
717
- ✅ Explain any component to a beginner
718
- ✅ Debug issues without looking at code
719
- ✅ Design new features confidently
720
- ✅ Optimize for specific use cases
721
- ✅ Build production applications
722
- ✅ Teach others effectively
723
-
724
- **Keep practicing until you reach all milestones!**
725
-
726
- ---
727
-
728
- *Good luck with your learning journey! 🚀*
729
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
misc/tutorials/ARCHITECTURE_DIAGRAMS.md DELETED
@@ -1,450 +0,0 @@
1
- # Architecture Diagrams
2
-
3
- This document contains visual diagrams of the agent framework architecture using Mermaid syntax.
4
-
5
- ---
6
-
7
- ## System Overview
8
-
9
- ```mermaid
10
- graph TB
11
- User[User/Application] --> Agent[Agent.run]
12
- Agent --> Think[Think: LLM Call]
13
- Agent --> Act[Act: Tool Execution]
14
- Agent --> Observe[Observe: Process Results]
15
-
16
- Think --> LlmClient[LlmClient]
17
- Act --> Tools[Tools]
18
- Observe --> Context[ExecutionContext]
19
-
20
- LlmClient --> LiteLLM[LiteLLM]
21
- Tools --> FunctionTools[FunctionTools]
22
- Tools --> MCPTools[MCP Tools]
23
- Context --> Session[Session Manager]
24
-
25
- LiteLLM --> OpenAI[OpenAI API]
26
- LiteLLM --> Anthropic[Anthropic API]
27
- LiteLLM --> Local[Local Models]
28
-
29
- Session --> Memory[Memory Storage]
30
- ```
31
-
32
- ---
33
-
34
- ## Agent Execution Flow
35
-
36
- ```mermaid
37
- sequenceDiagram
38
- participant User
39
- participant Agent
40
- participant Context
41
- participant LLM
42
- participant Tools
43
-
44
- User->>Agent: run(user_input)
45
- Agent->>Context: Create/load ExecutionContext
46
- Agent->>Context: Add user event
47
-
48
- loop Until completion or max_steps
49
- Agent->>Agent: step(context)
50
- Agent->>LLM: think(llm_request)
51
- LLM-->>Agent: LlmResponse (with tool_calls)
52
- Agent->>Context: Add response event
53
-
54
- alt Tool calls present
55
- Agent->>Tools: act(context, tool_calls)
56
- Tools-->>Agent: ToolResults
57
- Agent->>Context: Add tool results event
58
- end
59
-
60
- Agent->>Agent: Check completion
61
- end
62
-
63
- Agent-->>User: AgentResult
64
- ```
65
-
66
- ---
67
-
68
- ## Data Model Relationships
69
-
70
- ```mermaid
71
- classDiagram
72
- class ExecutionContext {
73
- +str execution_id
74
- +List[Event] events
75
- +int current_step
76
- +Dict state
77
- +str final_result
78
- +str session_id
79
- +add_event()
80
- +increment_step()
81
- }
82
-
83
- class Event {
84
- +str id
85
- +str execution_id
86
- +float timestamp
87
- +str author
88
- +List[ContentItem] content
89
- }
90
-
91
- class Message {
92
- +str type
93
- +str role
94
- +str content
95
- }
96
-
97
- class ToolCall {
98
- +str type
99
- +str tool_call_id
100
- +str name
101
- +dict arguments
102
- }
103
-
104
- class ToolResult {
105
- +str type
106
- +str tool_call_id
107
- +str name
108
- +str status
109
- +list content
110
- }
111
-
112
- class Session {
113
- +str session_id
114
- +str user_id
115
- +List[Event] events
116
- +dict state
117
- +datetime created_at
118
- +datetime updated_at
119
- }
120
-
121
- ExecutionContext "1" *-- "many" Event
122
- Event "1" *-- "many" ContentItem
123
- ContentItem <|-- Message
124
- ContentItem <|-- ToolCall
125
- ContentItem <|-- ToolResult
126
- Session "1" o-- "many" Event
127
- ```
128
-
129
- ---
130
-
131
- ## Tool System Architecture
132
-
133
- ```mermaid
134
- graph LR
135
- PythonFunction[Python Function] --> FunctionTool[FunctionTool]
136
- FunctionTool --> BaseTool[BaseTool]
137
- BaseTool --> ToolDefinition[Tool Definition JSON Schema]
138
-
139
- MCPServer[MCP Server] --> MCPTool[MCP Tool]
140
- MCPTool --> FunctionTool
141
-
142
- FunctionTool --> Agent[Agent]
143
- Agent --> LLM[LLM API]
144
-
145
- LLM --> ToolCall[ToolCall]
146
- ToolCall --> Agent
147
- Agent --> FunctionTool
148
- FunctionTool --> ToolResult[ToolResult]
149
- ToolResult --> Agent
150
- ```
151
-
152
- ---
153
-
154
- ## Memory Optimization Flow
155
-
156
- ```mermaid
157
- flowchart TD
158
- Start[LLM Request Created] --> CountTokens[Count Tokens]
159
- CountTokens --> CheckThreshold{Tokens < Threshold?}
160
-
161
- CheckThreshold -->|Yes| NoOptimization[No Optimization Needed]
162
- CheckThreshold -->|No| ApplyCompaction[Apply Compaction]
163
-
164
- ApplyCompaction --> CountAgain[Count Tokens Again]
165
- CountAgain --> CheckAgain{Tokens < Threshold?}
166
-
167
- CheckAgain -->|Yes| Done[Done]
168
- CheckAgain -->|No| ApplySummarization[Apply Summarization]
169
-
170
- ApplySummarization --> Done
171
- NoOptimization --> Done
172
- ```
173
-
174
- ---
175
-
176
- ## Session Management Flow
177
-
178
- ```mermaid
179
- sequenceDiagram
180
- participant User
181
- participant Agent
182
- participant SessionManager
183
- participant Storage
184
-
185
- User->>Agent: run(input, session_id)
186
- Agent->>SessionManager: get_or_create(session_id)
187
-
188
- alt Session exists
189
- SessionManager->>Storage: get(session_id)
190
- Storage-->>SessionManager: Session
191
- SessionManager->>Agent: Load session
192
- Agent->>Agent: Restore events/state
193
- else New session
194
- SessionManager->>Storage: create(session_id)
195
- Storage-->>SessionManager: New Session
196
- SessionManager->>Agent: New session
197
- end
198
-
199
- Agent->>Agent: Execute agent loop
200
- Agent->>SessionManager: save(session)
201
- SessionManager->>Storage: Persist session
202
- Agent-->>User: Result
203
- ```
204
-
205
- ---
206
-
207
- ## MCP Integration Flow
208
-
209
- ```mermaid
210
- sequenceDiagram
211
- participant Agent
212
- participant MCPClient[MCP Client]
213
- participant MCPServer[MCP Server]
214
- participant FunctionTool
215
-
216
- Agent->>MCPClient: load_mcp_tools(connection)
217
- MCPClient->>MCPServer: Connect via stdio
218
- MCPServer-->>MCPClient: Connection established
219
-
220
- MCPClient->>MCPServer: list_tools()
221
- MCPServer-->>MCPClient: Tool definitions
222
-
223
- loop For each MCP tool
224
- MCPClient->>FunctionTool: _create_mcp_tool()
225
- FunctionTool-->>MCPClient: Wrapped tool
226
- end
227
-
228
- MCPClient-->>Agent: List of FunctionTools
229
-
230
- Note over Agent,FunctionTool: Agent can now use MCP tools
231
-
232
- Agent->>FunctionTool: Execute tool
233
- FunctionTool->>MCPServer: call_tool(name, args)
234
- MCPServer-->>FunctionTool: Result
235
- FunctionTool-->>Agent: ToolResult
236
- ```
237
-
238
- ---
239
-
240
- ## Web Application Architecture
241
-
242
- ```mermaid
243
- graph TB
244
- Browser[Browser] --> Frontend[HTML/CSS/JS]
245
- Frontend --> API[FastAPI Backend]
246
-
247
- API --> ChatEndpoint[/api/chat]
248
- API --> UploadEndpoint[/api/upload]
249
- API --> ToolsEndpoint[/api/tools]
250
- API --> SessionsEndpoint[/api/sessions]
251
-
252
- ChatEndpoint --> Agent[Agent Framework]
253
- UploadEndpoint --> FileStorage[File Storage]
254
- ToolsEndpoint --> ToolRegistry[Tool Registry]
255
- SessionsEndpoint --> SessionManager[Session Manager]
256
-
257
- Agent --> LlmClient[LlmClient]
258
- Agent --> Tools[Tools]
259
- Agent --> Memory[Memory Manager]
260
-
261
- LlmClient --> LiteLLM[LiteLLM]
262
- Tools --> FunctionTools[Function Tools]
263
- Memory --> SessionManager
264
- ```
265
-
266
- ---
267
-
268
- ## Request/Response Flow
269
-
270
- ```mermaid
271
- sequenceDiagram
272
- participant Frontend
273
- participant API
274
- participant Agent
275
- participant LLM
276
- participant Tools
277
-
278
- Frontend->>API: POST /api/chat {message, session_id}
279
- API->>Agent: run(message, session_id)
280
-
281
- Agent->>Agent: Load session
282
- Agent->>Agent: step(context)
283
- Agent->>LLM: generate(request)
284
- LLM-->>Agent: Response with tool_calls
285
-
286
- alt Tool calls present
287
- Agent->>Tools: act(context, tool_calls)
288
- Tools-->>Agent: ToolResults
289
- Agent->>Agent: step(context) [continue]
290
- end
291
-
292
- Agent->>Agent: Save session
293
- Agent-->>API: AgentResult
294
- API->>API: Format response
295
- API-->>Frontend: {response, trace, tools_used}
296
- Frontend->>Frontend: Display message
297
- ```
298
-
299
- ---
300
-
301
- ## Tool Execution Details
302
-
303
- ```mermaid
304
- flowchart TD
305
- Start[ToolCall Received] --> FindTool[Find Tool by Name]
306
- FindTool --> ToolExists{Tool Found?}
307
-
308
- ToolExists -->|No| Error[Raise ValueError]
309
- ToolExists -->|Yes| CheckConfirmation{Requires Confirmation?}
310
-
311
- CheckConfirmation -->|Yes| Pending[Create PendingToolCall]
312
- CheckConfirmation -->|No| Execute[Execute Tool]
313
-
314
- Execute --> ToolSuccess{Success?}
315
- ToolSuccess -->|Yes| SuccessResult[ToolResult: success]
316
- ToolSuccess -->|No| ErrorResult[ToolResult: error]
317
-
318
- Pending --> WaitUser[Wait for User Confirmation]
319
- WaitUser --> UserApproved{Approved?}
320
- UserApproved -->|Yes| Execute
321
- UserApproved -->|No| RejectedResult[ToolResult: rejected]
322
-
323
- SuccessResult --> Return[Return ToolResult]
324
- ErrorResult --> Return
325
- RejectedResult --> Return
326
- Error --> Return
327
- ```
328
-
329
- ---
330
-
331
- ## Memory Optimization Strategies
332
-
333
- ```mermaid
334
- graph TB
335
- Request[LlmRequest] --> Count[Count Tokens]
336
- Count --> Threshold{> Threshold?}
337
-
338
- Threshold -->|No| Skip[Skip Optimization]
339
- Threshold -->|Yes| Strategy{Choose Strategy}
340
-
341
- Strategy --> SlidingWindow[Sliding Window<br/>Keep Recent N]
342
- Strategy --> Compaction[Compaction<br/>Replace with References]
343
- Strategy --> Summarization[Summarization<br/>LLM Compression]
344
-
345
- SlidingWindow --> Reduced1[Reduced Tokens]
346
- Compaction --> Reduced2[Reduced Tokens]
347
- Summarization --> Reduced3[Reduced Tokens]
348
-
349
- Reduced1 --> Final[Optimized Request]
350
- Reduced2 --> Final
351
- Reduced3 --> Final
352
- Skip --> Final
353
- ```
354
-
355
- ---
356
-
357
- ## Component Dependencies
358
-
359
- ```mermaid
360
- graph TD
361
- Agent --> LlmClient
362
- Agent --> Tools
363
- Agent --> Models
364
- Agent --> Memory
365
- Agent --> Callbacks
366
-
367
- LlmClient --> Models
368
- LlmClient --> LiteLLM
369
-
370
- Tools --> Models
371
- Tools --> Utils
372
-
373
- Memory --> Models
374
- Memory --> LlmClient
375
-
376
- Callbacks --> Memory
377
- Callbacks --> Models
378
-
379
- MCP --> Tools
380
- MCP --> Models
381
-
382
- WebApp --> Agent
383
- WebApp --> Models
384
-
385
- Models --> Pydantic
386
- Tools --> ABC
387
- ```
388
-
389
- ---
390
-
391
- ## State Management
392
-
393
- ```mermaid
394
- stateDiagram-v2
395
- [*] --> Initialized: Agent created
396
- Initialized --> Running: run() called
397
- Running --> Thinking: step() called
398
- Thinking --> Acting: Tool calls received
399
- Acting --> Thinking: Tool results processed
400
- Thinking --> Completed: Final response
401
- Acting --> Completed: Final response
402
- Completed --> [*]
403
-
404
- Running --> Pending: Tool confirmation required
405
- Pending --> Running: Confirmation received
406
- ```
407
-
408
- ---
409
-
410
- ## Error Handling Flow
411
-
412
- ```mermaid
413
- flowchart TD
414
- Start[Operation] --> Try{Try}
415
- Try -->|Success| Success[Return Result]
416
- Try -->|Error| Catch[Catch Exception]
417
-
418
- Catch --> ErrorType{Error Type?}
419
- ErrorType -->|LLM Error| LLMError[Return LlmResponse with error_message]
420
- ErrorType -->|Tool Error| ToolError[Return ToolResult with error status]
421
- ErrorType -->|Network Error| NetworkError[Retry or Return Error]
422
- ErrorType -->|Validation Error| ValidationError[Return Validation Error]
423
-
424
- LLMError --> Log[Log Error]
425
- ToolError --> Log
426
- NetworkError --> Log
427
- ValidationError --> Log
428
-
429
- Log --> ReturnError[Return Error to User]
430
- Success --> [*]
431
- ReturnError --> [*]
432
- ```
433
-
434
- ---
435
-
436
- ## Usage Examples
437
-
438
- These diagrams can be:
439
- 1. Included in video thumbnails
440
- 2. Shown during explanations
441
- 3. Added to documentation
442
- 4. Used in presentations
443
- 5. Embedded in blog posts
444
-
445
- To render these diagrams:
446
- - Use Mermaid Live Editor: https://mermaid.live/
447
- - Use GitHub (renders automatically in .md files)
448
- - Use VS Code with Mermaid extension
449
- - Use documentation tools like MkDocs
450
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
misc/tutorials/EPISODE_01_INTRODUCTION.md DELETED
@@ -1,318 +0,0 @@
1
- # Episode 1: Introduction & Python Foundations
2
-
3
- **Duration**: 30 minutes
4
- **What to Build**: None (concepts only)
5
- **Target Audience**: Intermediate Python developers
6
-
7
- ---
8
-
9
- ## Episode Structure
10
-
11
- ### 1. Hook (2 min)
12
- **Show what we'll build:**
13
- - Quick demo of the final agent framework
14
- - Agent using calculator and web search
15
- - Session persistence demo
16
- - Web application preview
17
-
18
- **Hook Statement**: "By the end of this series, you'll have built a complete AI agent framework from scratch that can reason, use tools, remember conversations, and be deployed as a web app."
19
-
20
- ---
21
-
22
- ### 2. Problem (3 min)
23
- **Why build an agent framework?**
24
-
25
- **The Problem:**
26
- - LLMs are powerful but limited to their training data
27
- - They can't access real-time information
28
- - They can't perform actions (file operations, API calls, etc.)
29
- - They need structure to be reliable
30
-
31
- **The Solution:**
32
- - Agent framework that gives LLMs tools
33
- - Multi-step reasoning loop
34
- - State management
35
- - Extensible architecture
36
-
37
- **Real-world Use Cases:**
38
- - Customer support bots with database access
39
- - Research assistants that can search the web
40
- - Code assistants that can read/write files
41
- - Data analysis agents that can process files
42
-
43
- ---
44
-
45
- ### 3. Concept: Python Patterns We'll Use (20 min)
46
-
47
- #### 3.1 Type Hints (5 min)
48
-
49
- **Why Type Hints?**
50
- - Self-documenting code
51
- - IDE autocomplete
52
- - Catch errors early
53
- - Better collaboration
54
-
55
- **Common Patterns:**
56
- ```python
57
- from typing import List, Optional, Literal, Union, Dict, Any
58
-
59
- # Basic types
60
- name: str = "Agent"
61
- count: int = 5
62
-
63
- # Collections
64
- messages: List[str] = []
65
- config: Dict[str, Any] = {}
66
-
67
- # Optional (can be None)
68
- result: Optional[str] = None # Same as: str | None
69
-
70
- # Union (multiple types)
71
- content: Union[str, dict] = "text" # Same as: str | dict
72
-
73
- # Literal (specific values only)
74
- role: Literal["user", "assistant", "system"] = "user"
75
- ```
76
-
77
- **Live Demo:**
78
- - Show IDE autocomplete with type hints
79
- - Show what happens without type hints
80
- - Explain `Optional` vs required parameters
81
-
82
- ---
83
-
84
- #### 3.2 Pydantic (7 min)
85
-
86
- **What is Pydantic?**
87
- - Data validation library
88
- - Runtime type checking
89
- - Automatic serialization
90
-
91
- **Basic Example:**
92
- ```python
93
- from pydantic import BaseModel, Field
94
-
95
- class Message(BaseModel):
96
- role: Literal["user", "assistant"]
97
- content: str
98
-
99
- # Valid
100
- msg = Message(role="user", content="Hello")
101
- print(msg.role) # "user"
102
-
103
- # Invalid - raises ValidationError
104
- msg = Message(role="user") # Missing 'content'!
105
- ```
106
-
107
- **Field Defaults:**
108
- ```python
109
- from datetime import datetime
110
- import uuid
111
-
112
- class Event(BaseModel):
113
- event_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
114
- timestamp: float = Field(default_factory=lambda: datetime.now().timestamp())
115
- content: List[str] = Field(default_factory=list) # NOT content: List[str] = []
116
- ```
117
-
118
- **Why Field(default_factory)?**
119
- - Prevents shared mutable defaults
120
- - Each instance gets a new list/dict
121
- - Critical for avoiding bugs
122
-
123
- **Serialization:**
124
- ```python
125
- # Model to dict
126
- data = msg.model_dump()
127
-
128
- # Model to JSON
129
- json_str = msg.model_dump_json()
130
-
131
- # Dict to model
132
- msg2 = Message.model_validate({"role": "user", "content": "hi"})
133
- ```
134
-
135
- **When to Use Pydantic:**
136
- - Data crossing boundaries (API requests/responses)
137
- - User input validation
138
- - Configuration files
139
- - External data
140
-
141
- ---
142
-
143
- #### 3.3 Dataclasses (5 min)
144
-
145
- **What are Dataclasses?**
146
- - Lightweight data containers
147
- - Less overhead than Pydantic
148
- - Mutable by default
149
-
150
- **Example:**
151
- ```python
152
- from dataclasses import dataclass, field
153
- from typing import List, Dict, Any
154
-
155
- @dataclass
156
- class ExecutionContext:
157
- execution_id: str = field(default_factory=lambda: str(uuid.uuid4()))
158
- events: List[Event] = field(default_factory=list)
159
- current_step: int = 0
160
- state: Dict[str, Any] = field(default_factory=dict)
161
-
162
- def add_event(self, event: Event):
163
- self.events.append(event)
164
- ```
165
-
166
- **Pydantic vs Dataclass:**
167
- | Feature | Pydantic | Dataclass |
168
- |---------|----------|-----------|
169
- | Validation | Yes | No |
170
- | JSON serialization | Built-in | Manual |
171
- | Performance | Slower | Faster |
172
- | Use Case | External data | Internal state |
173
-
174
- **Our Rule:**
175
- - **Pydantic** for data crossing boundaries
176
- - **Dataclass** for internal mutable state
177
-
178
- ---
179
-
180
- #### 3.4 Async/Await (3 min)
181
-
182
- **Why Async?**
183
- - LLM API calls take seconds
184
- - Without async, program waits doing nothing
185
- - With async, can do other work while waiting
186
-
187
- **Basic Example:**
188
- ```python
189
- import asyncio
190
-
191
- async def call_llm(prompt: str):
192
- await asyncio.sleep(2) # Simulates API call
193
- return f"Response to: {prompt}"
194
-
195
- # Sequential - takes 6 seconds
196
- async def sequential():
197
- results = []
198
- for prompt in ["A", "B", "C"]:
199
- results.append(await call_llm(prompt))
200
- return results
201
-
202
- # Concurrent - takes 2 seconds
203
- async def concurrent():
204
- results = await asyncio.gather(
205
- call_llm("A"),
206
- call_llm("B"),
207
- call_llm("C")
208
- )
209
- return results
210
- ```
211
-
212
- **Key Rules:**
213
- 1. `async` functions must be `await`ed
214
- 2. `await` only works inside `async` functions
215
- 3. Use `asyncio.run()` at top level
216
-
217
- **In Jupyter:**
218
- - Can use `await` directly (event loop already running)
219
-
220
- ---
221
-
222
- ### 4. Demo: Working Agent (3 min)
223
-
224
- **Show the Final Product:**
225
- ```python
226
- from agent_framework import Agent, LlmClient
227
- from agent_tools import calculator, search_web
228
-
229
- agent = Agent(
230
- model=LlmClient(model="gpt-4o-mini"),
231
- tools=[calculator, search_web],
232
- instructions="You are a helpful assistant.",
233
- max_steps=10
234
- )
235
-
236
- result = await agent.run("What is 123 * 456?")
237
- print(result.output)
238
- ```
239
-
240
- **What Happens:**
241
- 1. Agent receives user input
242
- 2. Calls LLM with tools available
243
- 3. LLM decides to use calculator
244
- 4. Agent executes calculator tool
245
- 5. Returns result to user
246
-
247
- **Show Trace:**
248
- - Display execution trace
249
- - Show each step
250
- - Highlight tool calls
251
-
252
- ---
253
-
254
- ### 5. Next Steps (2 min)
255
-
256
- **Preview Episode 2:**
257
- - Making your first LLM API call
258
- - Understanding chat completion format
259
- - Building a simple LLM wrapper
260
-
261
- **What to Prepare:**
262
- - Python 3.10+ installed
263
- - OpenAI API key (or other provider)
264
- - Code editor ready
265
-
266
- **Repository:**
267
- - GitHub link
268
- - Each episode has a branch
269
- - Follow along with code
270
-
271
- ---
272
-
273
- ## Key Takeaways
274
-
275
- 1. **Type hints** make code self-documenting
276
- 2. **Pydantic** validates data at runtime
277
- 3. **Dataclasses** are lightweight for internal state
278
- 4. **Async/await** enables concurrent operations
279
- 5. **Field(default_factory=...)** prevents shared mutable defaults
280
-
281
- ---
282
-
283
- ## Common Questions
284
-
285
- **Q: Do I need to know advanced Python?**
286
- A: Intermediate level is enough. Know functions, classes, dictionaries, and basic async.
287
-
288
- **Q: Can I use this with local models?**
289
- A: Yes! LiteLLM supports Ollama and other local providers.
290
-
291
- **Q: How is this different from LangChain?**
292
- A: We're building from scratch to understand every detail. LangChain is great but hides complexity.
293
-
294
- **Q: What if I get stuck?**
295
- A: Each episode builds on the previous. Pause, rewatch, and check the GitHub branches.
296
-
297
- ---
298
-
299
- ## Resources
300
-
301
- - [Pydantic Documentation](https://docs.pydantic.dev/)
302
- - [Python Type Hints](https://docs.python.org/3/library/typing.html)
303
- - [Async/Await Guide](https://docs.python.org/3/library/asyncio.html)
304
- - [Repository](https://github.com/yourusername/ai-agent-from-scratch)
305
-
306
- ---
307
-
308
- ## Exercises (Optional)
309
-
310
- 1. Create a Pydantic model for a `User` with name, email, and age
311
- 2. Create a dataclass for `AppState` with a list of users
312
- 3. Write an async function that makes 3 concurrent API calls
313
- 4. Experiment with type hints in your IDE
314
-
315
- ---
316
-
317
- **Next Episode**: [Episode 2: Your First LLM Call](./EPISODE_02_LLM_CALL.md)
318
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
misc/tutorials/EPISODE_02_LLM_CALL.md DELETED
@@ -1,395 +0,0 @@
1
- # Episode 2: Your First LLM Call
2
-
3
- **Duration**: 25 minutes
4
- **What to Build**: Simple LLM wrapper script
5
- **Target Audience**: Intermediate Python developers
6
-
7
- ---
8
-
9
- ## Episode Structure
10
-
11
- ### 1. Hook (2 min)
12
- **Show what we'll build:**
13
- - Simple LLM client that can chat
14
- - Multi-turn conversation
15
- - Error handling
16
-
17
- **Hook Statement**: "Today we'll build the foundation that lets us talk to LLMs. This is the bridge between our Python code and the AI."
18
-
19
- ---
20
-
21
- ### 2. Problem (3 min)
22
- **Why do we need an LLM client?**
23
-
24
- **The Challenge:**
25
- - Different providers have different APIs
26
- - Message format is complex
27
- - Error handling is crucial
28
- - We need a unified interface
29
-
30
- **The Solution:**
31
- - Abstract the API complexity
32
- - Standardize message format
33
- - Handle errors gracefully
34
- - Support multiple providers
35
-
36
- ---
37
-
38
- ### 3. Concept: Understanding Chat Completion APIs (5 min)
39
-
40
- #### 3.1 Message Format
41
-
42
- **The Standard Format:**
43
- ```python
44
- messages = [
45
- {"role": "system", "content": "You are a helpful assistant."},
46
- {"role": "user", "content": "What is 2+2?"},
47
- {"role": "assistant", "content": "2+2 equals 4."},
48
- {"role": "user", "content": "And 3+3?"},
49
- ]
50
- ```
51
-
52
- **Roles Explained:**
53
- - `system`: Sets behavior/personality (optional but recommended)
54
- - `user`: Human's messages
55
- - `assistant`: AI's previous responses
56
- - `tool`: Tool execution results (we'll cover this later)
57
-
58
- **Key Point**: LLMs are **stateless**. You must send the full conversation history each time.
59
-
60
- ---
61
-
62
- #### 3.2 Making API Calls
63
-
64
- **Direct OpenAI:**
65
- ```python
66
- from openai import OpenAI
67
-
68
- client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
69
- response = client.chat.completions.create(
70
- model="gpt-4o-mini",
71
- messages=[{"role": "user", "content": "Hello!"}]
72
- )
73
- print(response.choices[0].message.content)
74
- ```
75
-
76
- **LiteLLM (Multi-Provider):**
77
- ```python
78
- from litellm import completion
79
-
80
- # OpenAI
81
- response = completion(
82
- model="gpt-4o-mini",
83
- messages=[{"role": "user", "content": "Hello!"}]
84
- )
85
-
86
- # Anthropic (same interface!)
87
- response = completion(
88
- model="anthropic/claude-3-sonnet",
89
- messages=[{"role": "user", "content": "Hello!"}]
90
- )
91
-
92
- # Local models
93
- response = completion(
94
- model="ollama/llama2",
95
- messages=[{"role": "user", "content": "Hello!"}]
96
- )
97
- ```
98
-
99
- **Why LiteLLM?**
100
- - Unified interface for all providers
101
- - Easy to switch models
102
- - Handles provider differences
103
-
104
- ---
105
-
106
- #### 3.3 Async Calls
107
-
108
- **Why Async?**
109
- - API calls take 1-5 seconds
110
- - Don't block the program
111
- - Can make multiple calls concurrently
112
-
113
- ```python
114
- from litellm import acompletion
115
-
116
- async def get_response(prompt: str) -> str:
117
- response = await acompletion(
118
- model="gpt-4o-mini",
119
- messages=[{"role": "user", "content": prompt}]
120
- )
121
- return response.choices[0].message.content
122
-
123
- # Run it
124
- result = asyncio.run(get_response("What is Python?"))
125
- ```
126
-
127
- ---
128
-
129
- ### 4. Live Coding: Building SimpleLlmClient (15 min)
130
-
131
- #### Step 1: Setup (2 min)
132
- ```python
133
- # simple_llm_client.py
134
- import os
135
- from typing import Optional
136
- from litellm import acompletion
137
- from dotenv import load_dotenv
138
-
139
- load_dotenv()
140
- ```
141
-
142
- #### Step 2: Response Model (3 min)
143
- ```python
144
- from pydantic import BaseModel
145
-
146
- class LlmResponse(BaseModel):
147
- """Standardized response from LLM."""
148
- content: Optional[str] = None
149
- error_message: Optional[str] = None
150
-
151
- @property
152
- def success(self) -> bool:
153
- return self.error_message is None
154
- ```
155
-
156
- **Why This Model?**
157
- - Standardized interface
158
- - Error handling built-in
159
- - Easy to check success
160
-
161
- #### Step 3: Client Class (5 min)
162
- ```python
163
- class SimpleLlmClient:
164
- """A simple wrapper around LLM API calls."""
165
-
166
- def __init__(self, model: str = "gpt-4o-mini", temperature: float = 0.7):
167
- self.model = model
168
- self.temperature = temperature
169
-
170
- async def generate(self, messages: list[dict]) -> LlmResponse:
171
- """Generate a response from the LLM."""
172
- try:
173
- response = await acompletion(
174
- model=self.model,
175
- messages=messages,
176
- temperature=self.temperature,
177
- )
178
-
179
- return LlmResponse(
180
- content=response.choices[0].message.content
181
- )
182
-
183
- except Exception as e:
184
- return LlmResponse(
185
- error_message=str(e)
186
- )
187
- ```
188
-
189
- **Key Points:**
190
- - Wraps API complexity
191
- - Handles errors gracefully
192
- - Returns standardized response
193
-
194
- #### Step 4: Testing (3 min)
195
- ```python
196
- async def main():
197
- client = SimpleLlmClient()
198
-
199
- response = await client.generate([
200
- {"role": "system", "content": "You are helpful."},
201
- {"role": "user", "content": "What is 2+2?"}
202
- ])
203
-
204
- if response.success:
205
- print(response.content)
206
- else:
207
- print(f"Error: {response.error_message}")
208
-
209
- import asyncio
210
- asyncio.run(main())
211
- ```
212
-
213
- #### Step 5: Multi-Turn Conversation (2 min)
214
- ```python
215
- async def chat():
216
- client = SimpleLlmClient()
217
-
218
- # Maintain conversation history
219
- messages = [
220
- {"role": "system", "content": "You are a math tutor."}
221
- ]
222
-
223
- # Turn 1
224
- messages.append({"role": "user", "content": "What is 5 x 3?"})
225
- response = await client.generate(messages)
226
- print(f"AI: {response.content}")
227
-
228
- # Add AI response to history
229
- messages.append({"role": "assistant", "content": response.content})
230
-
231
- # Turn 2 (AI remembers because we sent full history)
232
- messages.append({"role": "user", "content": "Now divide that by 3"})
233
- response = await client.generate(messages)
234
- print(f"AI: {response.content}") # Should say 5
235
- ```
236
-
237
- **Key Point**: Must maintain full conversation history.
238
-
239
- ---
240
-
241
- ### 5. Understanding the Response (2 min)
242
-
243
- **Response Structure:**
244
- ```python
245
- response = await acompletion(...)
246
-
247
- # The response object
248
- print(response.model) # "gpt-4o-mini"
249
- print(response.choices) # List of completions
250
-
251
- # The main content
252
- choice = response.choices[0]
253
- print(choice.message.role) # "assistant"
254
- print(choice.message.content) # "Hello! How can I help you?"
255
- print(choice.finish_reason) # "stop" or "tool_calls" or "length"
256
-
257
- # Token usage (for cost tracking)
258
- print(response.usage.prompt_tokens) # Tokens in input
259
- print(response.usage.completion_tokens) # Tokens in output
260
- print(response.usage.total_tokens) # Total
261
- ```
262
-
263
- **Finish Reasons:**
264
- - `stop`: Normal completion
265
- - `tool_calls`: Model wants to use a tool (we'll cover this)
266
- - `length`: Hit max token limit
267
- - `content_filter`: Content was filtered
268
-
269
- ---
270
-
271
- ### 6. Error Handling (3 min)
272
-
273
- **Common Errors:**
274
- ```python
275
- from litellm.exceptions import (
276
- RateLimitError,
277
- APIError,
278
- AuthenticationError,
279
- Timeout
280
- )
281
-
282
- async def safe_completion(messages: list) -> str | None:
283
- try:
284
- response = await acompletion(
285
- model="gpt-4o-mini",
286
- messages=messages,
287
- timeout=30
288
- )
289
- return response.choices[0].message.content
290
-
291
- except AuthenticationError:
292
- print("Invalid API key!")
293
- return None
294
-
295
- except RateLimitError:
296
- print("Rate limited - wait and retry")
297
- await asyncio.sleep(60)
298
- return await safe_completion(messages) # Retry
299
-
300
- except Timeout:
301
- print("Request timed out")
302
- return None
303
-
304
- except APIError as e:
305
- print(f"API error: {e}")
306
- return None
307
- ```
308
-
309
- **Best Practices:**
310
- - Always handle errors
311
- - Provide meaningful error messages
312
- - Implement retry logic for rate limits
313
- - Set timeouts
314
-
315
- ---
316
-
317
- ### 7. Demo: Working Client (2 min)
318
-
319
- **Show:**
320
- - Single message
321
- - Multi-turn conversation
322
- - Error handling
323
- - Different models
324
-
325
- ---
326
-
327
- ### 8. Next Steps (1 min)
328
-
329
- **Preview Episode 3:**
330
- - Building data models (Message, ToolCall, etc.)
331
- - Why we need structured models
332
- - Pydantic validation
333
-
334
- **What We Built:**
335
- - Simple LLM client
336
- - Error handling
337
- - Multi-turn conversations
338
-
339
- ---
340
-
341
- ## Key Takeaways
342
-
343
- 1. LLM APIs are **stateless** - send full history each time
344
- 2. **LiteLLM** provides unified interface for multiple providers
345
- 3. **Async** is essential for non-blocking operations
346
- 4. **Error handling** is crucial for production
347
- 5. **Message format** is standardized across providers
348
-
349
- ---
350
-
351
- ## Common Mistakes
352
-
353
- **Mistake 1: Forgetting conversation history**
354
- ```python
355
- # Wrong - AI doesn't remember
356
- response1 = await client.generate([{"role": "user", "content": "My name is Alice"}])
357
- response2 = await client.generate([{"role": "user", "content": "What's my name?"}]) # Doesn't know!
358
-
359
- # Right - Maintain history
360
- messages = [{"role": "user", "content": "My name is Alice"}]
361
- response1 = await client.generate(messages)
362
- messages.append({"role": "assistant", "content": response1.content})
363
- messages.append({"role": "user", "content": "What's my name?"})
364
- response2 = await client.generate(messages) # Knows!
365
- ```
366
-
367
- **Mistake 2: Not handling errors**
368
- ```python
369
- # Wrong - crashes on error
370
- response = await acompletion(...)
371
- print(response.choices[0].message.content) # Might crash!
372
-
373
- # Right - handle errors
374
- try:
375
- response = await acompletion(...)
376
- if response.choices[0].message.content:
377
- print(response.choices[0].message.content)
378
- except Exception as e:
379
- print(f"Error: {e}")
380
- ```
381
-
382
- ---
383
-
384
- ## Exercises
385
-
386
- 1. Add retry logic with exponential backoff
387
- 2. Implement streaming responses
388
- 3. Add token usage tracking
389
- 4. Support temperature and other parameters
390
-
391
- ---
392
-
393
- **Previous Episode**: [Episode 1: Introduction](./EPISODE_01_INTRODUCTION.md)
394
- **Next Episode**: [Episode 3: Core Data Models](./EPISODE_03_DATA_MODELS.md)
395
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
misc/tutorials/EPISODE_03_DATA_MODELS.md DELETED
@@ -1,547 +0,0 @@
1
- # Episode 3: Core Data Models
2
-
3
- **Duration**: 35 minutes
4
- **What to Build**: `agent_framework/models.py`
5
- **Target Audience**: Intermediate Python developers
6
-
7
- ---
8
-
9
- ## Episode Structure
10
-
11
- ### 1. Hook (2 min)
12
- **Show what we'll build:**
13
- - Complete data model structure
14
- - Type-safe message handling
15
- - Execution tracking
16
- - Tool confirmation workflow
17
-
18
- **Hook Statement**: "Today we'll build the data structures that power our entire agent framework. These models ensure type safety and make our code predictable."
19
-
20
- ---
21
-
22
- ### 2. Problem (3 min)
23
- **Why do we need structured data models?**
24
-
25
- **The Challenge:**
26
- - Raw dictionaries are error-prone
27
- - No validation
28
- - Hard to understand
29
- - Easy to make mistakes
30
-
31
- **The Solution:**
32
- - Pydantic models for validation
33
- - Type hints for clarity
34
- - Structured data flow
35
- - Self-documenting code
36
-
37
- ---
38
-
39
- ### 3. Concept: Building Our Models (25 min)
40
-
41
- #### 3.1 Message Model (3 min)
42
-
43
- **What is a Message?**
44
- - Text content in conversation
45
- - Has a role (system, user, assistant)
46
- - Simple but critical
47
-
48
- **Implementation:**
49
- ```python
50
- from pydantic import BaseModel
51
- from typing import Literal
52
-
53
- class Message(BaseModel):
54
- """A text message in the conversation."""
55
- type: Literal["message"] = "message"
56
- role: Literal["system", "user", "assistant"]
57
- content: str
58
- ```
59
-
60
- **Why Literal?**
61
- - Only specific values allowed
62
- - Catches typos at runtime
63
- - Self-documenting
64
-
65
- **Live Coding**: Build Message model
66
-
67
- ---
68
-
69
- #### 3.2 ToolCall Model (3 min)
70
-
71
- **What is a ToolCall?**
72
- - LLM's request to execute a tool
73
- - Contains tool name and arguments
74
- - Has unique ID for tracking
75
-
76
- **Implementation:**
77
- ```python
78
- class ToolCall(BaseModel):
79
- """LLM's request to execute a tool."""
80
- type: Literal["tool_call"] = "tool_call"
81
- tool_call_id: str
82
- name: str
83
- arguments: dict
84
- ```
85
-
86
- **Key Fields:**
87
- - `tool_call_id`: Links to ToolResult
88
- - `name`: Which tool to call
89
- - `arguments`: Parameters for the tool
90
-
91
- **Live Coding**: Build ToolCall model
92
-
93
- ---
94
-
95
- #### 3.3 ToolResult Model (3 min)
96
-
97
- **What is a ToolResult?**
98
- - Outcome of tool execution
99
- - Success or error status
100
- - Contains output or error message
101
-
102
- **Implementation:**
103
- ```python
104
- class ToolResult(BaseModel):
105
- """Result from tool execution."""
106
- type: Literal["tool_result"] = "tool_result"
107
- tool_call_id: str
108
- name: str
109
- status: Literal["success", "error"]
110
- content: list
111
- ```
112
-
113
- **Why list for content?**
114
- - Can have multiple outputs
115
- - Flexible for different tools
116
- - Matches API format
117
-
118
- **Live Coding**: Build ToolResult model
119
-
120
- ---
121
-
122
- #### 3.4 ContentItem Union (2 min)
123
-
124
- **What is ContentItem?**
125
- - Union type for all content types
126
- - Used in events and requests
127
- - Type-safe polymorphism
128
-
129
- **Implementation:**
130
- ```python
131
- ContentItem = Union[Message, ToolCall, ToolResult]
132
- ```
133
-
134
- **Why Union?**
135
- - Events can contain any content type
136
- - Type checker understands all possibilities
137
- - Runtime validation via Pydantic
138
-
139
- **Live Coding**: Define ContentItem
140
-
141
- ---
142
-
143
- #### 3.5 ToolConfirmation Model (3 min)
144
-
145
- **What is ToolConfirmation?**
146
- - User's decision on a pending tool call
147
- - Can approve or reject
148
- - Can modify arguments before execution
149
-
150
- **Implementation:**
151
- ```python
152
- class ToolConfirmation(BaseModel):
153
- """User's decision on a pending tool call."""
154
-
155
- tool_call_id: str
156
- approved: bool
157
- modified_arguments: dict | None = None
158
- reason: str | None = None # Reason for rejection (if not approved)
159
- ```
160
-
161
- **Why This Model?**
162
- - Some tools are dangerous (delete file, send email)
163
- - Users should approve before execution
164
- - Allows argument modification (e.g., change file path)
165
- - Captures rejection reasons for debugging
166
-
167
- **Use Cases:**
168
- - Delete file confirmation
169
- - API call approval
170
- - Email sending confirmation
171
- - Database modification
172
-
173
- **Live Coding**: Build ToolConfirmation model
174
-
175
- ---
176
-
177
- #### 3.6 PendingToolCall Model (2 min)
178
-
179
- **What is PendingToolCall?**
180
- - A tool call awaiting user confirmation
181
- - Contains the original ToolCall
182
- - Has a confirmation message to show user
183
-
184
- **Implementation:**
185
- ```python
186
- class PendingToolCall(BaseModel):
187
- """A tool call awaiting user confirmation."""
188
-
189
- tool_call: ToolCall
190
- confirmation_message: str
191
- ```
192
-
193
- **Key Points:**
194
- - Wraps the original ToolCall
195
- - `confirmation_message` explains what will happen
196
- - Agent pauses until user responds
197
-
198
- **Flow:**
199
- 1. Agent decides to call dangerous tool
200
- 2. Creates PendingToolCall with message
201
- 3. Returns to user for approval
202
- 4. User submits ToolConfirmation
203
- 5. Agent continues or skips based on approval
204
-
205
- **Live Coding**: Build PendingToolCall model
206
-
207
- ---
208
-
209
- #### 3.7 Event Model (4 min)
210
-
211
- **What is an Event?**
212
- - Recorded step in execution
213
- - Contains one or more content items
214
- - Has timestamp and author
215
-
216
- **Implementation:**
217
- ```python
218
- import uuid
219
- from datetime import datetime
220
-
221
- class Event(BaseModel):
222
- """A recorded occurrence during agent execution."""
223
- id: str = Field(default_factory=lambda: str(uuid.uuid4()))
224
- execution_id: str
225
- timestamp: float = Field(default_factory=lambda: datetime.now().timestamp())
226
- author: str # "user" or agent name
227
- content: List[ContentItem] = Field(default_factory=list)
228
- ```
229
-
230
- **Key Points:**
231
- - `default_factory` for unique IDs
232
- - Timestamp for ordering
233
- - Author tracks who created it
234
- - Content list can be empty
235
-
236
- **Live Coding**: Build Event model
237
-
238
- ---
239
-
240
- #### 3.8 ExecutionContext Dataclass (5 min)
241
-
242
- **What is ExecutionContext?**
243
- - Central state container
244
- - Mutable (needs to be dataclass)
245
- - Tracks entire execution
246
-
247
- **Implementation:**
248
- ```python
249
- from dataclasses import dataclass, field
250
- from typing import Dict, Any, Optional
251
-
252
- @dataclass
253
- class ExecutionContext:
254
- """Central storage for all execution state."""
255
-
256
- execution_id: str = field(default_factory=lambda: str(uuid.uuid4()))
257
- events: List[Event] = field(default_factory=list)
258
- current_step: int = 0
259
- state: Dict[str, Any] = field(default_factory=dict)
260
- final_result: Optional[str | BaseModel] = None
261
- session_id: Optional[str] = None # Link to session for persistence
262
-
263
- def add_event(self, event: Event):
264
- """Append an event to the execution history."""
265
- self.events.append(event)
266
-
267
- def increment_step(self):
268
- """Move to the next execution step."""
269
- self.current_step += 1
270
- ```
271
-
272
- **Why Dataclass?**
273
- - Mutable state needs to be lightweight
274
- - No validation needed (internal use)
275
- - Better performance
276
-
277
- **Key Fields:**
278
- - `state`: Store pending tool calls, confirmations, custom data
279
- - `session_id`: Links to session for persistence
280
-
281
- **Live Coding**: Build ExecutionContext
282
-
283
- ---
284
-
285
- ### 4. Testing Our Models (3 min)
286
-
287
- **Test Message:**
288
- ```python
289
- msg = Message(role="user", content="Hello")
290
- print(msg.role) # "user"
291
- print(msg.type) # "message"
292
- ```
293
-
294
- **Test ToolCall:**
295
- ```python
296
- tool_call = ToolCall(
297
- tool_call_id="call_123",
298
- name="calculator",
299
- arguments={"expression": "2+2"}
300
- )
301
- print(tool_call.name) # "calculator"
302
- ```
303
-
304
- **Test ToolConfirmation:**
305
- ```python
306
- # User approves with modification
307
- confirmation = ToolConfirmation(
308
- tool_call_id="call_123",
309
- approved=True,
310
- modified_arguments={"expression": "2+3"} # Changed!
311
- )
312
- print(confirmation.approved) # True
313
-
314
- # User rejects with reason
315
- rejection = ToolConfirmation(
316
- tool_call_id="call_456",
317
- approved=False,
318
- reason="I don't want to delete that file"
319
- )
320
- print(rejection.reason) # "I don't want to delete that file"
321
- ```
322
-
323
- **Test PendingToolCall:**
324
- ```python
325
- pending = PendingToolCall(
326
- tool_call=tool_call,
327
- confirmation_message="The agent wants to calculate '2+2'. Do you approve?"
328
- )
329
- print(pending.confirmation_message)
330
- ```
331
-
332
- **Test Event:**
333
- ```python
334
- event = Event(
335
- execution_id="exec_123",
336
- author="user",
337
- content=[Message(role="user", content="Hello")]
338
- )
339
- print(len(event.content)) # 1
340
- ```
341
-
342
- **Test ExecutionContext:**
343
- ```python
344
- context = ExecutionContext()
345
- context.add_event(event)
346
- print(context.current_step) # 0
347
- context.increment_step()
348
- print(context.current_step) # 1
349
-
350
- # Store pending tool calls in state
351
- context.state["pending_tool_calls"] = [pending.model_dump()]
352
- ```
353
-
354
- ---
355
-
356
- ### 5. Why Pydantic vs Dataclass? (2 min)
357
-
358
- **Pydantic (Message, ToolCall, ToolResult, Event, ToolConfirmation, PendingToolCall):**
359
- - Data crossing boundaries
360
- - Needs validation
361
- - Serialization required
362
- - Immutable by default
363
-
364
- **Dataclass (ExecutionContext):**
365
- - Internal mutable state
366
- - No validation needed
367
- - Performance critical
368
- - Frequent updates
369
-
370
- **Rule of Thumb:**
371
- - External data → Pydantic
372
- - Internal state → Dataclass
373
-
374
- ---
375
-
376
- ### 6. Demo: Complete Models (2 min)
377
-
378
- **Show:**
379
- - All models working together
380
- - Type safety in action
381
- - Validation catching errors
382
- - Serialization working
383
- - Confirmation workflow
384
-
385
- ---
386
-
387
- ### 7. Next Steps (1 min)
388
-
389
- **Preview Episode 4:**
390
- - Building the LLM client
391
- - Converting our models to API format
392
- - Parsing API responses
393
-
394
- **What We Built:**
395
- - 7 Pydantic models: Message, ToolCall, ToolResult, ToolConfirmation, PendingToolCall, Event
396
- - 1 Dataclass: ExecutionContext
397
- - ContentItem union type
398
- - Complete data model structure matching actual codebase
399
-
400
- ---
401
-
402
- ## Key Takeaways
403
-
404
- 1. **Pydantic** validates data at runtime
405
- 2. **Literal types** constrain values
406
- 3. **Union types** enable polymorphism
407
- 4. **Field(default_factory=...)** prevents shared defaults
408
- 5. **Dataclass** for mutable internal state
409
- 6. **ToolConfirmation** enables user approval workflow
410
- 7. **PendingToolCall** pauses execution for confirmation
411
-
412
- ---
413
-
414
- ## Common Mistakes
415
-
416
- **Mistake 1: Mutable default arguments**
417
- ```python
418
- # Wrong
419
- class BadEvent(BaseModel):
420
- content: List[str] = [] # Shared across instances!
421
-
422
- # Right
423
- class GoodEvent(BaseModel):
424
- content: List[str] = Field(default_factory=list) # New list each time
425
- ```
426
-
427
- **Mistake 2: Missing type hints**
428
- ```python
429
- # Wrong - no type safety
430
- def process(event):
431
- return event.content
432
-
433
- # Right - type checker helps
434
- def process(event: Event) -> List[ContentItem]:
435
- return event.content
436
- ```
437
-
438
- **Mistake 3: Forgetting optional fields**
439
- ```python
440
- # Wrong - reason required even for approval
441
- class BadConfirmation(BaseModel):
442
- approved: bool
443
- reason: str # Always required!
444
-
445
- # Right - reason optional
446
- class GoodConfirmation(BaseModel):
447
- approved: bool
448
- reason: str | None = None # Only needed for rejection
449
- ```
450
-
451
- ---
452
-
453
- ## Exercises
454
-
455
- 1. **Build ToolConfirmation Validator**: Add validation that `reason` is required when `approved=False`
456
- 2. **Create Helper Function**: Write `extract_pending_calls(context: ExecutionContext) -> List[PendingToolCall]`
457
- 3. **Add Metadata to Event**: Add an optional `metadata: dict` field to Event for custom data
458
- 4. **Create ToolCallWithResult Model**: Combine ToolCall and ToolResult into a single model for reporting
459
-
460
- ---
461
-
462
- ## Complete models.py File
463
-
464
- ```python
465
- """Core data models for the agent framework."""
466
-
467
- from typing import Literal, Union, List, Dict, Optional, Any
468
- from pydantic import BaseModel, Field
469
- from dataclasses import dataclass, field
470
- import uuid
471
- from datetime import datetime
472
-
473
-
474
- class Message(BaseModel):
475
- """A text message in the conversation."""
476
- type: Literal["message"] = "message"
477
- role: Literal["system", "user", "assistant"]
478
- content: str
479
-
480
-
481
- class ToolCall(BaseModel):
482
- """LLM's request to execute a tool."""
483
- type: Literal["tool_call"] = "tool_call"
484
- tool_call_id: str
485
- name: str
486
- arguments: dict
487
-
488
-
489
- class ToolResult(BaseModel):
490
- """Result from tool execution."""
491
- type: Literal["tool_result"] = "tool_result"
492
- tool_call_id: str
493
- name: str
494
- status: Literal["success", "error"]
495
- content: list
496
-
497
-
498
- ContentItem = Union[Message, ToolCall, ToolResult]
499
-
500
-
501
- class ToolConfirmation(BaseModel):
502
- """User's decision on a pending tool call."""
503
-
504
- tool_call_id: str
505
- approved: bool
506
- modified_arguments: dict | None = None
507
- reason: str | None = None
508
-
509
-
510
- class PendingToolCall(BaseModel):
511
- """A tool call awaiting user confirmation."""
512
-
513
- tool_call: ToolCall
514
- confirmation_message: str
515
-
516
-
517
- class Event(BaseModel):
518
- """A recorded occurrence during agent execution."""
519
- id: str = Field(default_factory=lambda: str(uuid.uuid4()))
520
- execution_id: str
521
- timestamp: float = Field(default_factory=lambda: datetime.now().timestamp())
522
- author: str
523
- content: List[ContentItem] = Field(default_factory=list)
524
-
525
-
526
- @dataclass
527
- class ExecutionContext:
528
- """Central storage for all execution state."""
529
-
530
- execution_id: str = field(default_factory=lambda: str(uuid.uuid4()))
531
- events: List[Event] = field(default_factory=list)
532
- current_step: int = 0
533
- state: Dict[str, Any] = field(default_factory=dict)
534
- final_result: Optional[str | BaseModel] = None
535
- session_id: Optional[str] = None
536
-
537
- def add_event(self, event: Event):
538
- self.events.append(event)
539
-
540
- def increment_step(self):
541
- self.current_step += 1
542
- ```
543
-
544
- ---
545
-
546
- **Previous Episode**: [Episode 2: Your First LLM Call](./EPISODE_02_LLM_CALL.md)
547
- **Next Episode**: [Episode 4: The LLM Client](./EPISODE_04_LLM_CLIENT.md)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
misc/tutorials/EPISODE_04_LLM_CLIENT.md DELETED
@@ -1,377 +0,0 @@
1
- # Episode 4: The LLM Client
2
-
3
- **Duration**: 30 minutes
4
- **What to Build**: `agent_framework/llm.py`
5
- **Target Audience**: Intermediate Python developers
6
-
7
- ---
8
-
9
- ## Episode Structure
10
-
11
- ### 1. Hook (2 min)
12
- **Show what we'll build:**
13
- - Complete LLM client abstraction
14
- - Request/response models
15
- - Message format conversion
16
-
17
- **Hook Statement**: "Today we'll build the bridge between our data models and the LLM API. This client will handle all the complexity of API communication."
18
-
19
- ---
20
-
21
- ### 2. Problem (3 min)
22
- **Why do we need an LLM client abstraction?**
23
-
24
- **The Challenge:**
25
- - API format is different from our models
26
- - Need to convert back and forth
27
- - Error handling is complex
28
- - Multiple providers have different formats
29
-
30
- **The Solution:**
31
- - Request/response models
32
- - Conversion functions
33
- - Unified interface
34
- - Error handling built-in
35
-
36
- ---
37
-
38
- ### 3. Concept: Request/Response Pattern (5 min)
39
-
40
- **The Pattern:**
41
- 1. Create `LlmRequest` from our models
42
- 2. Convert to API format
43
- 3. Call API
44
- 4. Parse response into `LlmResponse`
45
- 5. Return our models
46
-
47
- **Benefits:**
48
- - Type safety
49
- - Easy to test
50
- - Provider agnostic
51
- - Clear data flow
52
-
53
- ---
54
-
55
- ### 4. Live Coding: Building the Client (20 min)
56
-
57
- #### Step 1: Request Model (3 min)
58
- ```python
59
- from pydantic import BaseModel, Field
60
- from typing import List, Optional
61
- from .models import ContentItem
62
- from .tools import BaseTool
63
-
64
- class LlmRequest(BaseModel):
65
- """Request object for LLM calls."""
66
- instructions: List[str] = Field(default_factory=list)
67
- contents: List[ContentItem] = Field(default_factory=list)
68
- tools: List[BaseTool] = Field(default_factory=list)
69
- tool_choice: Optional[str] = 'auto'
70
- ```
71
-
72
- **Key Fields:**
73
- - `instructions`: System messages
74
- - `contents`: Conversation history
75
- - `tools`: Available tools
76
- - `tool_choice`: Force tool usage or not
77
-
78
- **Live Coding**: Build LlmRequest
79
-
80
- ---
81
-
82
- #### Step 2: Response Model (2 min)
83
- ```python
84
- class LlmResponse(BaseModel):
85
- """Response object from LLM calls."""
86
- content: List[ContentItem] = Field(default_factory=list)
87
- error_message: Optional[str] = None
88
- usage_metadata: Dict[str, Any] = Field(default_factory=dict)
89
- ```
90
-
91
- **Key Fields:**
92
- - `content`: Messages and tool calls
93
- - `error_message`: Error if any
94
- - `usage_metadata`: Token usage
95
-
96
- **Live Coding**: Build LlmResponse
97
-
98
- ---
99
-
100
- #### Step 3: build_messages() Function (5 min)
101
-
102
- **Purpose**: Convert our models to API format
103
-
104
- **Implementation:**
105
- ```python
106
- import json
107
- from .models import Message, ToolCall, ToolResult
108
-
109
- def build_messages(request: LlmRequest) -> List[dict]:
110
- """Convert LlmRequest to API message format."""
111
- messages = []
112
-
113
- # Add system instructions
114
- for instruction in request.instructions:
115
- messages.append({"role": "system", "content": instruction})
116
-
117
- # Convert content items
118
- for item in request.contents:
119
- if isinstance(item, Message):
120
- messages.append({"role": item.role, "content": item.content})
121
-
122
- elif isinstance(item, ToolCall):
123
- tool_call_dict = {
124
- "id": item.tool_call_id,
125
- "type": "function",
126
- "function": {
127
- "name": item.name,
128
- "arguments": json.dumps(item.arguments)
129
- }
130
- }
131
- # Append to previous assistant message if exists
132
- if messages and messages[-1]["role"] == "assistant":
133
- messages[-1].setdefault("tool_calls", []).append(tool_call_dict)
134
- else:
135
- messages.append({
136
- "role": "assistant",
137
- "content": None,
138
- "tool_calls": [tool_call_dict]
139
- })
140
-
141
- elif isinstance(item, ToolResult):
142
- messages.append({
143
- "role": "tool",
144
- "tool_call_id": item.tool_call_id,
145
- "content": str(item.content[0]) if item.content else ""
146
- })
147
-
148
- return messages
149
- ```
150
-
151
- **Key Points:**
152
- - System messages first
153
- - Tool calls attach to assistant messages
154
- - Tool results use "tool" role
155
- - JSON stringify arguments
156
-
157
- **Live Coding**: Build build_messages()
158
-
159
- ---
160
-
161
- #### Step 4: LlmClient Class (5 min)
162
- ```python
163
- from litellm import acompletion
164
- from typing import Dict, Any
165
-
166
- class LlmClient:
167
- """Client for LLM API calls using LiteLLM."""
168
-
169
- def __init__(self, model: str, **config):
170
- self.model = model
171
- self.config = config
172
-
173
- async def generate(self, request: LlmRequest) -> LlmResponse:
174
- """Generate a response from the LLM."""
175
- try:
176
- messages = self._build_messages(request)
177
- tools = [t.tool_definition for t in request.tools] if request.tools else None
178
-
179
- response = await acompletion(
180
- model=self.model,
181
- messages=messages,
182
- tools=tools,
183
- **({"tool_choice": request.tool_choice}
184
- if request.tool_choice else {}),
185
- **self.config
186
- )
187
-
188
- return self._parse_response(response)
189
- except Exception as e:
190
- return LlmResponse(error_message=str(e))
191
-
192
- def _build_messages(self, request: LlmRequest) -> List[dict]:
193
- """Convert LlmRequest to API message format."""
194
- return build_messages(request)
195
- ```
196
-
197
- **Key Points:**
198
- - Wraps LiteLLM
199
- - Handles tools
200
- - Error handling
201
- - Configurable
202
-
203
- **Live Coding**: Build LlmClient
204
-
205
- ---
206
-
207
- #### Step 5: _parse_response() Method (5 min)
208
- ```python
209
- def _parse_response(self, response) -> LlmResponse:
210
- """Convert API response to LlmResponse."""
211
- choice = response.choices[0]
212
- content_items = []
213
-
214
- # Parse message content
215
- if choice.message.content:
216
- content_items.append(Message(
217
- role="assistant",
218
- content=choice.message.content
219
- ))
220
-
221
- # Parse tool calls
222
- if choice.message.tool_calls:
223
- for tc in choice.message.tool_calls:
224
- content_items.append(ToolCall(
225
- tool_call_id=tc.id,
226
- name=tc.function.name,
227
- arguments=json.loads(tc.function.arguments)
228
- ))
229
-
230
- return LlmResponse(
231
- content=content_items,
232
- usage_metadata={
233
- "input_tokens": response.usage.prompt_tokens,
234
- "output_tokens": response.usage.completion_tokens,
235
- }
236
- )
237
- ```
238
-
239
- **Key Points:**
240
- - Extract content and tool calls
241
- - Parse JSON arguments
242
- - Track token usage
243
- - Return our models
244
-
245
- **Live Coding**: Build _parse_response()
246
-
247
- ---
248
-
249
- ### 5. Testing the Client (3 min)
250
-
251
- **Test Basic Call:**
252
- ```python
253
- from agent_framework.llm import LlmClient, LlmRequest
254
- from agent_framework.models import Message
255
-
256
- client = LlmClient(model="gpt-4o-mini")
257
-
258
- request = LlmRequest(
259
- instructions=["You are helpful."],
260
- contents=[Message(role="user", content="Hello!")]
261
- )
262
-
263
- response = await client.generate(request)
264
- print(response.content[0].content) # "Hello! How can I help?"
265
- ```
266
-
267
- **Test with Tools:**
268
- ```python
269
- from agent_framework.tools import FunctionTool
270
-
271
- @tool
272
- def calculator(expression: str) -> str:
273
- """Evaluate a math expression."""
274
- return str(eval(expression))
275
-
276
- request = LlmRequest(
277
- instructions=["Use tools when needed."],
278
- contents=[Message(role="user", content="What is 2+2?")],
279
- tools=[calculator]
280
- )
281
-
282
- response = await client.generate(request)
283
- # Should contain ToolCall for calculator
284
- ```
285
-
286
- ---
287
-
288
- ### 6. Error Handling (2 min)
289
-
290
- **Built-in Error Handling:**
291
- ```python
292
- # Invalid API key
293
- response = await client.generate(request)
294
- if response.error_message:
295
- print(f"Error: {response.error_message}")
296
- ```
297
-
298
- **Common Errors:**
299
- - Authentication errors
300
- - Rate limits
301
- - Invalid tool definitions
302
- - Network timeouts
303
-
304
- ---
305
-
306
- ### 7. Demo: Complete Client (2 min)
307
-
308
- **Show:**
309
- - Basic message
310
- - Multi-turn conversation
311
- - Tool calls
312
- - Error handling
313
-
314
- ---
315
-
316
- ### 8. Next Steps (1 min)
317
-
318
- **Preview Episode 5:**
319
- - Building the agent loop
320
- - Think-Act-Observe cycle
321
- - Execution context management
322
-
323
- **What We Built:**
324
- - Complete LLM client
325
- - Request/response models
326
- - Message conversion
327
-
328
- ---
329
-
330
- ## Key Takeaways
331
-
332
- 1. **Request/Response pattern** provides clean abstraction
333
- 2. **build_messages()** converts our models to API format
334
- 3. **Error handling** is built into the response
335
- 4. **Tool support** is integrated
336
- 5. **Provider agnostic** via LiteLLM
337
-
338
- ---
339
-
340
- ## Common Mistakes
341
-
342
- **Mistake 1: Forgetting to JSON stringify arguments**
343
- ```python
344
- # Wrong
345
- "arguments": item.arguments # Dict, not string!
346
-
347
- # Right
348
- "arguments": json.dumps(item.arguments) # JSON string
349
- ```
350
-
351
- **Mistake 2: Not handling tool calls in response**
352
- ```python
353
- # Wrong - misses tool calls
354
- if choice.message.content:
355
- return choice.message.content
356
-
357
- # Right - check both
358
- if choice.message.content:
359
- # Add message
360
- if choice.message.tool_calls:
361
- # Add tool calls
362
- ```
363
-
364
- ---
365
-
366
- ## Exercises
367
-
368
- 1. Add streaming support
369
- 2. Implement retry logic
370
- 3. Add response caching
371
- 4. Support multiple response formats
372
-
373
- ---
374
-
375
- **Previous Episode**: [Episode 3: Core Data Models](./EPISODE_03_DATA_MODELS.md)
376
- **Next Episode**: [Episode 5: The Basic Agent Loop](./EPISODE_05_AGENT_LOOP.md)
377
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
misc/tutorials/EPISODE_05_AGENT_LOOP.md DELETED
@@ -1,356 +0,0 @@
1
- # Episode 5: The Basic Agent Loop
2
-
3
- **Duration**: 35 minutes
4
- **What to Build**: Basic `agent_framework/agent.py` (no tools yet)
5
- **Target Audience**: Intermediate Python developers
6
-
7
- ---
8
-
9
- ## Episode Structure
10
-
11
- ### 1. Hook (2 min)
12
- **Show what we'll build:**
13
- - Agent that can have conversations
14
- - Multi-step reasoning
15
- - Execution tracking
16
-
17
- **Hook Statement**: "Today we'll build the core agent loop - the brain that orchestrates everything. This is where the magic happens."
18
-
19
- ---
20
-
21
- ### 2. Problem (3 min)
22
- **Why do we need an agent loop?**
23
-
24
- **The Challenge:**
25
- - LLMs are stateless
26
- - Need to maintain conversation history
27
- - Need to track execution steps
28
- - Need to know when to stop
29
-
30
- **The Solution:**
31
- - Think-Act-Observe cycle
32
- - ExecutionContext for state
33
- - Event recording
34
- - Step-by-step execution
35
-
36
- ---
37
-
38
- ### 3. Concept: Think-Act-Observe Cycle (5 min)
39
-
40
- **The Cycle:**
41
- 1. **Think**: Call LLM with context
42
- 2. **Act**: Execute tools if needed (next episode)
43
- 3. **Observe**: Process results and continue
44
-
45
- **Why This Pattern?**
46
- - Mimics human reasoning
47
- - Allows multi-step problem solving
48
- - Clear separation of concerns
49
- - Easy to debug
50
-
51
- ---
52
-
53
- ### 4. Live Coding: Building the Agent (25 min)
54
-
55
- #### Step 1: Agent.__init__ (3 min)
56
- ```python
57
- from dataclasses import dataclass
58
- from typing import List, Optional
59
- from .llm import LlmClient
60
- from .models import ExecutionContext
61
-
62
- class Agent:
63
- """Agent that can reason and use tools to solve tasks."""
64
-
65
- def __init__(
66
- self,
67
- model: LlmClient,
68
- tools: List[BaseTool] = None,
69
- instructions: str = "",
70
- max_steps: int = 5,
71
- name: str = "agent"
72
- ):
73
- self.model = model
74
- self.instructions = instructions
75
- self.max_steps = max_steps
76
- self.name = name
77
- self.tools = tools or []
78
- ```
79
-
80
- **Key Parameters:**
81
- - `model`: LLM client
82
- - `tools`: Available tools (empty for now)
83
- - `instructions`: System prompt
84
- - `max_steps`: Safety limit
85
- - `name`: Agent identifier
86
-
87
- **Live Coding**: Build __init__
88
-
89
- ---
90
-
91
- #### Step 2: Agent.run() Method (5 min)
92
- ```python
93
- from .models import Event, Message, AgentResult
94
-
95
- async def run(
96
- self,
97
- user_input: str,
98
- context: ExecutionContext = None
99
- ) -> AgentResult:
100
- """Execute the agent."""
101
- # Create or reuse context
102
- if context is None:
103
- context = ExecutionContext()
104
-
105
- # Add user input as the first event
106
- user_event = Event(
107
- execution_id=context.execution_id,
108
- author="user",
109
- content=[Message(role="user", content=user_input)]
110
- )
111
- context.add_event(user_event)
112
-
113
- # Execute steps until completion or max steps reached
114
- while not context.final_result and context.current_step < self.max_steps:
115
- await self.step(context)
116
- # Check if the last event is a final response
117
- last_event = context.events[-1]
118
- if self._is_final_response(last_event):
119
- context.final_result = self._extract_final_result(last_event)
120
-
121
- return AgentResult(output=context.final_result, context=context)
122
- ```
123
-
124
- **Key Points:**
125
- - Creates context if needed
126
- - Records user input
127
- - Loops until done
128
- - Checks for completion
129
-
130
- **Live Coding**: Build run()
131
-
132
- ---
133
-
134
- #### Step 3: Agent.step() Method (5 min)
135
- ```python
136
- async def step(self, context: ExecutionContext):
137
- """Execute one step of the agent loop."""
138
-
139
- # Prepare LLM request
140
- llm_request = self._prepare_llm_request(context)
141
-
142
- # Get LLM's decision (Think)
143
- llm_response = await self.think(llm_request)
144
-
145
- # Record LLM response as an event
146
- response_event = Event(
147
- execution_id=context.execution_id,
148
- author=self.name,
149
- content=llm_response.content,
150
- )
151
- context.add_event(response_event)
152
-
153
- # Execute tools if needed (Act) - next episode!
154
- # For now, just record the response
155
-
156
- context.increment_step()
157
- ```
158
-
159
- **Key Points:**
160
- - Prepares request
161
- - Calls LLM
162
- - Records response
163
- - Increments step
164
-
165
- **Live Coding**: Build step()
166
-
167
- ---
168
-
169
- #### Step 4: _prepare_llm_request() Method (4 min)
170
- ```python
171
- from .llm import LlmRequest
172
-
173
- def _prepare_llm_request(self, context: ExecutionContext) -> LlmRequest:
174
- """Convert execution context to LLM request."""
175
- # Flatten events into content items
176
- flat_contents = []
177
- for event in context.events:
178
- flat_contents.extend(event.content)
179
-
180
- return LlmRequest(
181
- instructions=[self.instructions] if self.instructions else [],
182
- contents=flat_contents,
183
- tools=self.tools, # Empty for now
184
- tool_choice=None # No tools yet
185
- )
186
- ```
187
-
188
- **Key Points:**
189
- - Flattens events
190
- - Includes instructions
191
- - Adds conversation history
192
- - No tools yet
193
-
194
- **Live Coding**: Build _prepare_llm_request()
195
-
196
- ---
197
-
198
- #### Step 5: think() Method (2 min)
199
- ```python
200
- async def think(self, llm_request: LlmRequest) -> LlmResponse:
201
- """Get LLM's response/decision."""
202
- return await self.model.generate(llm_request)
203
- ```
204
-
205
- **Simple wrapper** around LLM client.
206
-
207
- **Live Coding**: Build think()
208
-
209
- ---
210
-
211
- #### Step 6: Completion Detection (3 min)
212
- ```python
213
- def _is_final_response(self, event: Event) -> bool:
214
- """Check if this event contains a final response."""
215
- # For now, if it's a message and no tool calls, it's final
216
- has_tool_calls = any(isinstance(c, ToolCall) for c in event.content)
217
- has_tool_results = any(isinstance(c, ToolResult) for c in event.content)
218
- return not has_tool_calls and not has_tool_results
219
-
220
- def _extract_final_result(self, event: Event) -> str:
221
- """Extract final result from event."""
222
- for item in event.content:
223
- if isinstance(item, Message) and item.role == "assistant":
224
- return item.content
225
- return None
226
- ```
227
-
228
- **Key Points:**
229
- - Checks for tool activity
230
- - Extracts message content
231
- - Simple for now
232
-
233
- **Live Coding**: Build completion detection
234
-
235
- ---
236
-
237
- #### Step 7: AgentResult (3 min)
238
- ```python
239
- @dataclass
240
- class AgentResult:
241
- """Result of an agent execution."""
242
- output: str | BaseModel
243
- context: ExecutionContext
244
- status: Literal["complete", "pending", "error"] = "complete"
245
- ```
246
-
247
- **Simple result container.**
248
-
249
- **Live Coding**: Build AgentResult
250
-
251
- ---
252
-
253
- ### 5. Testing the Agent (3 min)
254
-
255
- **Basic Conversation:**
256
- ```python
257
- from agent_framework import Agent, LlmClient
258
-
259
- agent = Agent(
260
- model=LlmClient(model="gpt-4o-mini"),
261
- instructions="You are a helpful assistant.",
262
- max_steps=5
263
- )
264
-
265
- result = await agent.run("Hello! My name is Alice.")
266
- print(result.output)
267
- print(f"Steps: {result.context.current_step}")
268
- ```
269
-
270
- **Multi-Turn:**
271
- ```python
272
- # First turn
273
- result1 = await agent.run("My name is Alice")
274
- print(result1.output)
275
-
276
- # Second turn (new context - doesn't remember)
277
- result2 = await agent.run("What's my name?")
278
- print(result2.output) # Doesn't know!
279
- ```
280
-
281
- **Note**: Session persistence comes later!
282
-
283
- ---
284
-
285
- ### 6. Demo: Working Agent (2 min)
286
-
287
- **Show:**
288
- - Basic conversation
289
- - Multi-step reasoning
290
- - Execution trace
291
- - Step counting
292
-
293
- ---
294
-
295
- ### 7. Next Steps (1 min)
296
-
297
- **Preview Episode 6:**
298
- - Building the tool system
299
- - Creating tools
300
- - Tool definitions
301
-
302
- **What We Built:**
303
- - Basic agent loop
304
- - Conversation handling
305
- - Execution tracking
306
-
307
- ---
308
-
309
- ## Key Takeaways
310
-
311
- 1. **Think-Act-Observe** cycle is the core pattern
312
- 2. **ExecutionContext** tracks all state
313
- 3. **Events** record every step
314
- 4. **Max steps** prevents infinite loops
315
- 5. **Completion detection** knows when to stop
316
-
317
- ---
318
-
319
- ## Common Mistakes
320
-
321
- **Mistake 1: Not incrementing step**
322
- ```python
323
- # Wrong - infinite loop!
324
- while not context.final_result:
325
- await self.step(context)
326
- # Missing: context.increment_step()
327
-
328
- # Right
329
- await self.step(context) # step() increments internally
330
- ```
331
-
332
- **Mistake 2: Not checking max steps**
333
- ```python
334
- # Wrong - can loop forever
335
- while not context.final_result:
336
- await self.step(context)
337
-
338
- # Right
339
- while not context.final_result and context.current_step < self.max_steps:
340
- await self.step(context)
341
- ```
342
-
343
- ---
344
-
345
- ## Exercises
346
-
347
- 1. Add verbose logging
348
- 2. Implement step-by-step trace display
349
- 3. Add error handling for LLM failures
350
- 4. Create a "thinking" indicator
351
-
352
- ---
353
-
354
- **Previous Episode**: [Episode 4: The LLM Client](./EPISODE_04_LLM_CLIENT.md)
355
- **Next Episode**: [Episode 6: Building the Tool System](./EPISODE_06_TOOL_SYSTEM.md)
356
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
misc/tutorials/EPISODE_06_TOOL_SYSTEM.md DELETED
@@ -1,631 +0,0 @@
1
- # Episode 6: Building the Tool System
2
-
3
- **Duration**: 40 minutes
4
- **What to Build**: `agent_framework/tools.py`
5
- **Target Audience**: Intermediate Python developers
6
-
7
- ---
8
-
9
- ## Episode Structure
10
-
11
- ### 1. Hook (2 min)
12
- **Show what we'll build:**
13
- - Tool system that wraps functions
14
- - Automatic schema generation
15
- - @tool decorator
16
- - Tool confirmation for dangerous operations
17
-
18
- **Hook Statement**: "Today we'll build the system that lets LLMs use Python functions. This is what makes agents powerful - they can actually do things!"
19
-
20
- ---
21
-
22
- ### 2. Problem (3 min)
23
- **Why do we need a tool system?**
24
-
25
- **The Challenge:**
26
- - LLMs can't execute code directly
27
- - Need to bridge Python functions to LLM
28
- - Need to describe functions to LLM
29
- - Need to handle execution safely
30
- - Some tools are dangerous (delete file, send email)
31
-
32
- **The Solution:**
33
- - BaseTool abstract interface
34
- - FunctionTool wrapper
35
- - Automatic schema generation
36
- - @tool decorator for ease
37
- - Confirmation workflow for safety
38
-
39
- ---
40
-
41
- ### 3. Concept: Tool Architecture (5 min)
42
-
43
- **The Flow:**
44
- 1. Define Python function
45
- 2. Wrap as BaseTool
46
- 3. Generate JSON schema
47
- 4. Send to LLM
48
- 5. LLM calls tool
49
- 6. Check if confirmation required
50
- 7. Execute function (or wait for approval)
51
- 8. Return result
52
-
53
- **Key Components:**
54
- - BaseTool: Abstract interface
55
- - FunctionTool: Wraps functions
56
- - Schema generation: From type hints
57
- - Decorator: Syntactic sugar
58
- - Confirmation: Safety for dangerous tools
59
-
60
- ---
61
-
62
- ### 4. Live Coding: Building the Tool System (30 min)
63
-
64
- #### Step 1: BaseTool Abstract Class (7 min)
65
- ```python
66
- from abc import ABC, abstractmethod
67
- from typing import Dict, Any
68
- from .models import ExecutionContext
69
-
70
- class BaseTool(ABC):
71
- """Abstract base class for all tools."""
72
-
73
- def __init__(
74
- self,
75
- name: str = None,
76
- description: str = None,
77
- tool_definition: Dict[str, Any] = None,
78
- # Confirmation support
79
- requires_confirmation: bool = False,
80
- confirmation_message_template: str = None
81
- ):
82
- self.name = name or self.__class__.__name__
83
- self.description = description or self.__doc__ or ""
84
- self._tool_definition = tool_definition
85
- self.requires_confirmation = requires_confirmation
86
- self.confirmation_message_template = confirmation_message_template or (
87
- "The agent wants to execute '{name}' with arguments: {arguments}. "
88
- "Do you approve?"
89
- )
90
-
91
- @property
92
- def tool_definition(self) -> Dict[str, Any] | None:
93
- return self._tool_definition
94
-
95
- @abstractmethod
96
- async def execute(self, context: ExecutionContext, **kwargs) -> Any:
97
- pass
98
-
99
- async def __call__(self, context: ExecutionContext, **kwargs) -> Any:
100
- return await self.execute(context, **kwargs)
101
-
102
- def get_confirmation_message(self, arguments: dict[str, Any]) -> str:
103
- """Generate a confirmation message for this tool call."""
104
- return self.confirmation_message_template.format(
105
- name=self.name,
106
- arguments=arguments
107
- )
108
- ```
109
-
110
- **Key Points:**
111
- - Abstract base class
112
- - Name and description
113
- - Tool definition (JSON schema)
114
- - Execute method
115
- - **NEW: `requires_confirmation`** - marks dangerous tools
116
- - **NEW: `confirmation_message_template`** - customizable message
117
- - **NEW: `get_confirmation_message()`** - generates message for user
118
-
119
- **Why Confirmation?**
120
- - Some tools are dangerous (delete files, send emails)
121
- - Users should approve before execution
122
- - Allows argument modification
123
-
124
- **Live Coding**: Build BaseTool with confirmation
125
-
126
- ---
127
-
128
- #### Step 2: Schema Generation Utilities (5 min)
129
- ```python
130
- import inspect
131
- from .utils import function_to_input_schema, format_tool_definition
132
-
133
- def function_to_input_schema(func) -> dict:
134
- """Convert function signature to JSON Schema."""
135
- type_map = {
136
- str: "string",
137
- int: "integer",
138
- float: "number",
139
- bool: "boolean",
140
- list: "array",
141
- dict: "object",
142
- }
143
-
144
- signature = inspect.signature(func)
145
- parameters = {}
146
-
147
- for param in signature.parameters.values():
148
- param_type = type_map.get(param.annotation, "string")
149
- parameters[param.name] = {"type": param_type}
150
-
151
- required = [
152
- param.name
153
- for param in signature.parameters.values()
154
- if param.default == inspect._empty
155
- ]
156
-
157
- return {
158
- "type": "object",
159
- "properties": parameters,
160
- "required": required,
161
- }
162
-
163
- def format_tool_definition(name: str, description: str, parameters: dict) -> dict:
164
- """Format tool definition in OpenAI function calling format."""
165
- return {
166
- "type": "function",
167
- "function": {
168
- "name": name,
169
- "description": description,
170
- "parameters": parameters,
171
- },
172
- }
173
- ```
174
-
175
- **Key Points:**
176
- - Inspects function signature
177
- - Maps Python types to JSON Schema
178
- - Handles required parameters
179
- - Formats for OpenAI
180
-
181
- **Live Coding**: Build schema generation
182
-
183
- ---
184
-
185
- #### Step 3: FunctionTool Class (8 min)
186
- ```python
187
- class FunctionTool(BaseTool):
188
- """Wraps a Python function as a BaseTool."""
189
-
190
- def __init__(
191
- self,
192
- func: Callable,
193
- name: str = None,
194
- description: str = None,
195
- tool_definition: Dict[str, Any] = None,
196
- requires_confirmation: bool = False,
197
- confirmation_message_template: str = None
198
- ):
199
- self.func = func
200
- self.needs_context = 'context' in inspect.signature(func).parameters
201
-
202
- self.name = name or func.__name__
203
- self.description = description or (func.__doc__ or "").strip()
204
- tool_definition = tool_definition or self._generate_definition()
205
-
206
- super().__init__(
207
- name=self.name,
208
- description=self.description,
209
- tool_definition=tool_definition,
210
- requires_confirmation=requires_confirmation,
211
- confirmation_message_template=confirmation_message_template
212
- )
213
-
214
- async def execute(self, context: ExecutionContext = None, **kwargs) -> Any:
215
- """Execute the wrapped function.
216
-
217
- Context is only required if the wrapped function has a 'context' parameter.
218
- """
219
- if self.needs_context:
220
- if context is None:
221
- raise ValueError(
222
- f"Tool '{self.name}' requires a context parameter. "
223
- f"Please provide an ExecutionContext instance."
224
- )
225
- result = self.func(context=context, **kwargs)
226
- else:
227
- result = self.func(**kwargs)
228
-
229
- # Handle both sync and async functions
230
- if inspect.iscoroutine(result):
231
- return await result
232
- return result
233
-
234
- def _generate_definition(self) -> Dict[str, Any]:
235
- """Generate tool definition from function signature."""
236
- parameters = function_to_input_schema(self.func)
237
- return format_tool_definition(self.name, self.description, parameters)
238
- ```
239
-
240
- **Key Points:**
241
- - Wraps any function
242
- - Detects context parameter
243
- - Handles sync/async
244
- - Auto-generates schema
245
- - **Passes confirmation params to parent**
246
-
247
- **Live Coding**: Build FunctionTool
248
-
249
- ---
250
-
251
- #### Step 4: @tool Decorator with Confirmation (6 min)
252
- ```python
253
- def tool(
254
- func: Callable = None,
255
- *,
256
- name: str = None,
257
- description: str = None,
258
- tool_definition: Dict[str, Any] = None,
259
- requires_confirmation: bool = False,
260
- confirmation_message: str = None
261
- ):
262
- """Decorator to convert a function into a FunctionTool.
263
-
264
- Usage:
265
- @tool
266
- def my_function(x: int) -> int:
267
- return x * 2
268
-
269
- # Or with parameters:
270
- @tool(name="custom_name", description="Custom description")
271
- def my_function(x: int) -> int:
272
- return x * 2
273
-
274
- # With confirmation:
275
- @tool(requires_confirmation=True, confirmation_message="Delete file?")
276
- def delete_file(filename: str) -> str:
277
- ...
278
- """
279
- def decorator(f: Callable) -> FunctionTool:
280
- return FunctionTool(
281
- func=f,
282
- name=name,
283
- description=description,
284
- tool_definition=tool_definition,
285
- requires_confirmation=requires_confirmation,
286
- confirmation_message_template=confirmation_message
287
- )
288
-
289
- if func is not None:
290
- return decorator(func)
291
- return decorator
292
- ```
293
-
294
- **Usage Examples:**
295
-
296
- **Simple Tool (no confirmation):**
297
- ```python
298
- @tool
299
- def calculator(expression: str) -> str:
300
- """Evaluate a math expression."""
301
- return str(eval(expression))
302
- ```
303
-
304
- **Dangerous Tool (requires confirmation):**
305
- ```python
306
- @tool(
307
- requires_confirmation=True,
308
- confirmation_message="Delete file '{arguments[filename]}'? This cannot be undone."
309
- )
310
- def delete_file(filename: str) -> str:
311
- """Delete a file from the filesystem."""
312
- import os
313
- os.remove(filename)
314
- return f"Deleted {filename}"
315
- ```
316
-
317
- **Custom Confirmation Message:**
318
- ```python
319
- @tool(
320
- requires_confirmation=True,
321
- confirmation_message="Send email to {arguments[recipient]}? Subject: {arguments[subject]}"
322
- )
323
- def send_email(recipient: str, subject: str, body: str) -> str:
324
- """Send an email."""
325
- # ... send email ...
326
- return "Email sent"
327
- ```
328
-
329
- **Live Coding**: Build @tool decorator with confirmation
330
-
331
- ---
332
-
333
- #### Step 5: Testing Tools (4 min)
334
-
335
- **Simple Tool:**
336
- ```python
337
- @tool
338
- def add(a: int, b: int) -> int:
339
- """Add two numbers."""
340
- return a + b
341
-
342
- print(add.name) # "add"
343
- print(add.requires_confirmation) # False
344
- result = await add.execute(context=None, a=2, b=3)
345
- print(result) # 5
346
- ```
347
-
348
- **Tool with Context:**
349
- ```python
350
- @tool
351
- def get_step_count(context: ExecutionContext) -> int:
352
- """Get current step count."""
353
- return context.current_step
354
-
355
- print(get_step_count.needs_context) # True
356
- ```
357
-
358
- **Tool with Confirmation:**
359
- ```python
360
- @tool(
361
- requires_confirmation=True,
362
- confirmation_message="Delete '{arguments[filename]}'?"
363
- )
364
- def delete_file(filename: str) -> str:
365
- """Delete a file."""
366
- return f"Deleted {filename}"
367
-
368
- print(delete_file.requires_confirmation) # True
369
-
370
- # Generate confirmation message
371
- message = delete_file.get_confirmation_message({"filename": "secret.txt"})
372
- print(message) # "Delete 'secret.txt'?"
373
- ```
374
-
375
- **Test Schema:**
376
- ```python
377
- print(add.tool_definition)
378
- # {
379
- # "type": "function",
380
- # "function": {
381
- # "name": "add",
382
- # "description": "Add two numbers.",
383
- # "parameters": {
384
- # "type": "object",
385
- # "properties": {
386
- # "a": {"type": "integer"},
387
- # "b": {"type": "integer"}
388
- # },
389
- # "required": ["a", "b"]
390
- # }
391
- # }
392
- # }
393
- ```
394
-
395
- ---
396
-
397
- ### 5. Demo: Creating Tools (2 min)
398
-
399
- **Show:**
400
- - Simple calculator tool
401
- - Tool with context
402
- - Tool with confirmation
403
- - Schema generation
404
- - Confirmation message generation
405
-
406
- ---
407
-
408
- ### 6. Next Steps (1 min)
409
-
410
- **Preview Episode 7:**
411
- - Integrating tools into agent
412
- - Tool execution in agent loop
413
- - **Handling pending tool calls**
414
- - **Processing confirmations**
415
-
416
- **What We Built:**
417
- - Complete tool system
418
- - Schema generation
419
- - @tool decorator
420
- - Confirmation workflow
421
-
422
- ---
423
-
424
- ## Key Takeaways
425
-
426
- 1. **BaseTool** provides abstract interface
427
- 2. **FunctionTool** wraps any function
428
- 3. **Schema generation** from type hints
429
- 4. **@tool decorator** for ease of use
430
- 5. **Context-aware** tools supported
431
- 6. **`requires_confirmation`** marks dangerous tools
432
- 7. **`get_confirmation_message()`** generates user-facing message
433
-
434
- ---
435
-
436
- ## Common Mistakes
437
-
438
- **Mistake 1: Forgetting docstring**
439
- ```python
440
- # Wrong - no description for LLM
441
- @tool
442
- def add(a: int, b: int) -> int:
443
- return a + b
444
-
445
- # Right - LLM knows what it does
446
- @tool
447
- def add(a: int, b: int) -> int:
448
- """Add two numbers together."""
449
- return a + b
450
- ```
451
-
452
- **Mistake 2: Not handling async functions**
453
- ```python
454
- # Wrong - doesn't await
455
- result = self.func(**kwargs)
456
- return result
457
-
458
- # Right - checks if coroutine
459
- result = self.func(**kwargs)
460
- if inspect.iscoroutine(result):
461
- return await result
462
- return result
463
- ```
464
-
465
- **Mistake 3: Forgetting confirmation for dangerous tools**
466
- ```python
467
- # Wrong - dangerous tool without confirmation
468
- @tool
469
- def delete_file(filename: str) -> str:
470
- os.remove(filename)
471
- return "Deleted"
472
-
473
- # Right - requires user approval
474
- @tool(requires_confirmation=True)
475
- def delete_file(filename: str) -> str:
476
- os.remove(filename)
477
- return "Deleted"
478
- ```
479
-
480
- **Mistake 4: Bad confirmation message**
481
- ```python
482
- # Wrong - generic message
483
- @tool(
484
- requires_confirmation=True,
485
- confirmation_message="Are you sure?"
486
- )
487
- def delete_file(filename: str) -> str: ...
488
-
489
- # Right - specific and informative
490
- @tool(
491
- requires_confirmation=True,
492
- confirmation_message="Delete file '{arguments[filename]}'? This cannot be undone."
493
- )
494
- def delete_file(filename: str) -> str: ...
495
- ```
496
-
497
- ---
498
-
499
- ## Exercises
500
-
501
- 1. **Add `requires_confirmation` to a tool**: Create a `send_email` tool that requires confirmation
502
- 2. **Custom confirmation message**: Create a message template that includes all arguments
503
- 3. **Create tool registry**: Build a `ToolRegistry` class that tracks all tools and their confirmation status
504
- 4. **Add validation**: Add a method to validate arguments before execution
505
-
506
- ---
507
-
508
- ## Complete tools.py File
509
-
510
- ```python
511
- """Tool system for the agent framework."""
512
-
513
- from abc import ABC, abstractmethod
514
- from typing import Dict, Any, Callable
515
- import inspect
516
- from .models import ExecutionContext
517
- from .utils import function_to_input_schema, format_tool_definition
518
-
519
-
520
- class BaseTool(ABC):
521
- """Abstract base class for all tools."""
522
-
523
- def __init__(
524
- self,
525
- name: str = None,
526
- description: str = None,
527
- tool_definition: Dict[str, Any] = None,
528
- requires_confirmation: bool = False,
529
- confirmation_message_template: str = None
530
- ):
531
- self.name = name or self.__class__.__name__
532
- self.description = description or self.__doc__ or ""
533
- self._tool_definition = tool_definition
534
- self.requires_confirmation = requires_confirmation
535
- self.confirmation_message_template = confirmation_message_template or (
536
- "The agent wants to execute '{name}' with arguments: {arguments}. "
537
- "Do you approve?"
538
- )
539
-
540
- @property
541
- def tool_definition(self) -> Dict[str, Any] | None:
542
- return self._tool_definition
543
-
544
- @abstractmethod
545
- async def execute(self, context: ExecutionContext, **kwargs) -> Any:
546
- pass
547
-
548
- async def __call__(self, context: ExecutionContext, **kwargs) -> Any:
549
- return await self.execute(context, **kwargs)
550
-
551
- def get_confirmation_message(self, arguments: dict[str, Any]) -> str:
552
- """Generate a confirmation message for this tool call."""
553
- return self.confirmation_message_template.format(
554
- name=self.name,
555
- arguments=arguments
556
- )
557
-
558
-
559
- class FunctionTool(BaseTool):
560
- """Wraps a Python function as a BaseTool."""
561
-
562
- def __init__(
563
- self,
564
- func: Callable,
565
- name: str = None,
566
- description: str = None,
567
- tool_definition: Dict[str, Any] = None,
568
- requires_confirmation: bool = False,
569
- confirmation_message_template: str = None
570
- ):
571
- self.func = func
572
- self.needs_context = 'context' in inspect.signature(func).parameters
573
-
574
- self.name = name or func.__name__
575
- self.description = description or (func.__doc__ or "").strip()
576
- tool_definition = tool_definition or self._generate_definition()
577
-
578
- super().__init__(
579
- name=self.name,
580
- description=self.description,
581
- tool_definition=tool_definition,
582
- requires_confirmation=requires_confirmation,
583
- confirmation_message_template=confirmation_message_template
584
- )
585
-
586
- async def execute(self, context: ExecutionContext = None, **kwargs) -> Any:
587
- if self.needs_context:
588
- if context is None:
589
- raise ValueError(f"Tool '{self.name}' requires a context parameter.")
590
- result = self.func(context=context, **kwargs)
591
- else:
592
- result = self.func(**kwargs)
593
-
594
- if inspect.iscoroutine(result):
595
- return await result
596
- return result
597
-
598
- def _generate_definition(self) -> Dict[str, Any]:
599
- parameters = function_to_input_schema(self.func)
600
- return format_tool_definition(self.name, self.description, parameters)
601
-
602
-
603
- def tool(
604
- func: Callable = None,
605
- *,
606
- name: str = None,
607
- description: str = None,
608
- tool_definition: Dict[str, Any] = None,
609
- requires_confirmation: bool = False,
610
- confirmation_message: str = None
611
- ):
612
- """Decorator to convert a function into a FunctionTool."""
613
- def decorator(f: Callable) -> FunctionTool:
614
- return FunctionTool(
615
- func=f,
616
- name=name,
617
- description=description,
618
- tool_definition=tool_definition,
619
- requires_confirmation=requires_confirmation,
620
- confirmation_message_template=confirmation_message
621
- )
622
-
623
- if func is not None:
624
- return decorator(func)
625
- return decorator
626
- ```
627
-
628
- ---
629
-
630
- **Previous Episode**: [Episode 5: The Basic Agent Loop](./EPISODE_05_AGENT_LOOP.md)
631
- **Next Episode**: [Episode 7: Tool Execution & Complete Agent](./EPISODE_07_TOOL_EXECUTION.md)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
misc/tutorials/EPISODE_07_TOOL_EXECUTION.md DELETED
@@ -1,594 +0,0 @@
1
- # Episode 7: Tool Execution & Complete Agent
2
-
3
- **Duration**: 45 minutes
4
- **What to Build**: Complete `agent_framework/agent.py` with tool execution and confirmation
5
- **Target Audience**: Intermediate Python developers
6
-
7
- ---
8
-
9
- ## Episode Structure
10
-
11
- ### 1. Hook (2 min)
12
- **Show what we'll build:**
13
- - Complete agent with tool execution
14
- - Calculator and web search working
15
- - Multi-step problem solving
16
- - Tool confirmation workflow
17
-
18
- **Hook Statement**: "Today we'll complete the agent by adding tool execution and safety confirmations. This is where everything comes together - the agent can now actually do things, safely!"
19
-
20
- ---
21
-
22
- ### 2. Problem (3 min)
23
- **Why do we need tool execution?**
24
-
25
- **The Challenge:**
26
- - LLM decides to use tools
27
- - Need to execute them
28
- - Handle results
29
- - Continue reasoning
30
- - Some tools are dangerous!
31
-
32
- **The Solution:**
33
- - Act() method
34
- - Tool dispatch
35
- - Result handling
36
- - Error management
37
- - Confirmation workflow for safety
38
-
39
- ---
40
-
41
- ### 3. Concept: Tool Execution Flow (5 min)
42
-
43
- **The Flow:**
44
- 1. LLM returns tool calls
45
- 2. Agent finds tools by name
46
- 3. Check if confirmation required
47
- 4. If yes: pause and return pending calls
48
- 5. User submits confirmation
49
- 6. Execute approved tools
50
- 7. Record results
51
- 8. Continue reasoning
52
-
53
- **Key Steps:**
54
- - Parse tool calls from response
55
- - Map to tool instances
56
- - Check `requires_confirmation`
57
- - Handle pending state
58
- - Process confirmations
59
- - Create ToolResult objects
60
-
61
- ---
62
-
63
- ### 4. Live Coding: Completing the Agent (35 min)
64
-
65
- #### Step 1: Update AgentResult (3 min)
66
- ```python
67
- from dataclasses import dataclass
68
- from typing import Literal
69
- from pydantic import Field
70
-
71
- @dataclass
72
- class AgentResult:
73
- """Result of an agent execution."""
74
- output: str | BaseModel
75
- context: ExecutionContext
76
- status: Literal["complete", "pending", "error"] = "complete"
77
- pending_tool_calls: list[PendingToolCall] = Field(default_factory=list)
78
- ```
79
-
80
- **Key Points:**
81
- - `status`: Indicates if execution is complete, pending, or errored
82
- - `pending_tool_calls`: List of tools awaiting user confirmation
83
- - Enables pausing and resuming agent execution
84
-
85
- **Live Coding**: Update AgentResult
86
-
87
- ---
88
-
89
- #### Step 2: Update step() to Handle Tools (5 min)
90
- ```python
91
- async def step(self, context: ExecutionContext):
92
- """Execute one step of the agent loop."""
93
-
94
- # Process pending confirmations if both are present
95
- if ("pending_tool_calls" in context.state and
96
- "tool_confirmations" in context.state):
97
- confirmation_results = await self._process_confirmations(context)
98
-
99
- # Add results as an event so they appear in contents
100
- if confirmation_results:
101
- confirmation_event = Event(
102
- execution_id=context.execution_id,
103
- author=self.name,
104
- content=confirmation_results,
105
- )
106
- context.add_event(confirmation_event)
107
-
108
- # Clear processed state
109
- del context.state["pending_tool_calls"]
110
- del context.state["tool_confirmations"]
111
-
112
- llm_request = self._prepare_llm_request(context)
113
- llm_response = await self.think(llm_request)
114
-
115
- # Record LLM response
116
- response_event = Event(
117
- execution_id=context.execution_id,
118
- author=self.name,
119
- content=llm_response.content,
120
- )
121
- context.add_event(response_event)
122
-
123
- # Execute tools if the LLM requested any
124
- tool_calls = [c for c in llm_response.content if isinstance(c, ToolCall)]
125
- if tool_calls:
126
- tool_results = await self.act(context, tool_calls)
127
- tool_event = Event(
128
- execution_id=context.execution_id,
129
- author=self.name,
130
- content=tool_results,
131
- )
132
- context.add_event(tool_event)
133
-
134
- context.increment_step()
135
- ```
136
-
137
- **Key Changes:**
138
- - Check for pending confirmations at start
139
- - Process confirmations before LLM call
140
- - Clear processed state after handling
141
-
142
- **Live Coding**: Update step()
143
-
144
- ---
145
-
146
- #### Step 3: act() Method with Confirmation (10 min)
147
- ```python
148
- from .models import ToolResult, PendingToolCall
149
-
150
- async def act(
151
- self,
152
- context: ExecutionContext,
153
- tool_calls: List[ToolCall]
154
- ) -> List[ToolResult]:
155
- """Execute tool calls and return results."""
156
- tools_dict = {tool.name: tool for tool in self.tools}
157
- results = []
158
- pending_calls = [] # Track tools needing confirmation
159
-
160
- for tool_call in tool_calls:
161
- if tool_call.name not in tools_dict:
162
- raise ValueError(f"Tool '{tool_call.name}' not found")
163
-
164
- tool = tools_dict[tool_call.name]
165
-
166
- # Check if confirmation is required
167
- if tool.requires_confirmation:
168
- pending = PendingToolCall(
169
- tool_call=tool_call,
170
- confirmation_message=tool.get_confirmation_message(
171
- tool_call.arguments
172
- )
173
- )
174
- pending_calls.append(pending)
175
- continue # Skip execution, wait for confirmation
176
-
177
- # Execute tool if no confirmation needed
178
- try:
179
- tool_response = await tool(context, **tool_call.arguments)
180
- status = "success"
181
- except Exception as e:
182
- tool_response = str(e)
183
- status = "error"
184
-
185
- tool_result = ToolResult(
186
- tool_call_id=tool_call.tool_call_id,
187
- name=tool_call.name,
188
- status=status,
189
- content=[tool_response],
190
- )
191
- results.append(tool_result)
192
-
193
- # Store pending calls in state for later processing
194
- if pending_calls:
195
- context.state["pending_tool_calls"] = [
196
- p.model_dump() for p in pending_calls
197
- ]
198
-
199
- return results
200
- ```
201
-
202
- **Key Points:**
203
- - Check `tool.requires_confirmation`
204
- - Create PendingToolCall with message
205
- - Store in `context.state` for persistence
206
- - Skip execution for pending tools
207
-
208
- **Live Coding**: Build act() with confirmation
209
-
210
- ---
211
-
212
- #### Step 4: Process Confirmations (10 min)
213
- ```python
214
- async def _process_confirmations(
215
- self,
216
- context: ExecutionContext
217
- ) -> List[ToolResult]:
218
- """Process user confirmations and execute approved tools."""
219
- tools_dict = {tool.name: tool for tool in self.tools}
220
- results = []
221
-
222
- # Restore pending tool calls from state
223
- pending_map = {
224
- p["tool_call"]["tool_call_id"]: PendingToolCall.model_validate(p)
225
- for p in context.state["pending_tool_calls"]
226
- }
227
-
228
- # Build confirmation lookup by tool_call_id
229
- confirmation_map = {
230
- c["tool_call_id"]: ToolConfirmation.model_validate(c)
231
- for c in context.state["tool_confirmations"]
232
- }
233
-
234
- # Process ALL pending tool calls
235
- for tool_call_id, pending in pending_map.items():
236
- tool = tools_dict.get(pending.tool_call.name)
237
- confirmation = confirmation_map.get(tool_call_id)
238
-
239
- if confirmation and confirmation.approved:
240
- # Merge original arguments with modifications
241
- arguments = {
242
- **pending.tool_call.arguments,
243
- **(confirmation.modified_arguments or {})
244
- }
245
-
246
- # Execute the approved tool
247
- try:
248
- output = await tool(context, **arguments)
249
- results.append(ToolResult(
250
- tool_call_id=tool_call_id,
251
- name=pending.tool_call.name,
252
- status="success",
253
- content=[output],
254
- ))
255
- except Exception as e:
256
- results.append(ToolResult(
257
- tool_call_id=tool_call_id,
258
- name=pending.tool_call.name,
259
- status="error",
260
- content=[str(e)],
261
- ))
262
- else:
263
- # Rejected: either explicitly or not in confirmation list
264
- if confirmation:
265
- reason = confirmation.reason or "Tool execution was rejected by user."
266
- else:
267
- reason = "Tool execution was not approved."
268
-
269
- results.append(ToolResult(
270
- tool_call_id=tool_call_id,
271
- name=pending.tool_call.name,
272
- status="error",
273
- content=[reason],
274
- ))
275
-
276
- return results
277
- ```
278
-
279
- **Key Points:**
280
- - Deserialize pending calls from state
281
- - Match confirmations by `tool_call_id`
282
- - Execute approved tools with merged arguments
283
- - Return error result for rejected tools
284
- - LLM sees rejection reason and can adapt
285
-
286
- **Live Coding**: Build _process_confirmations()
287
-
288
- ---
289
-
290
- #### Step 5: Update run() for Pending State (5 min)
291
- ```python
292
- async def run(
293
- self,
294
- user_input: str,
295
- context: ExecutionContext = None,
296
- tool_confirmations: Optional[List[ToolConfirmation]] = None
297
- ) -> AgentResult:
298
- """Execute the agent with optional confirmation support."""
299
-
300
- # Store confirmations in state if provided
301
- if tool_confirmations:
302
- if context is None:
303
- context = ExecutionContext()
304
- context.state["tool_confirmations"] = [
305
- c.model_dump() for c in tool_confirmations
306
- ]
307
-
308
- # Create or reuse context
309
- if context is None:
310
- context = ExecutionContext()
311
-
312
- # Add user input as the first event
313
- user_event = Event(
314
- execution_id=context.execution_id,
315
- author="user",
316
- content=[Message(role="user", content=user_input)]
317
- )
318
- context.add_event(user_event)
319
-
320
- # Execute steps until completion or max steps reached
321
- while not context.final_result and context.current_step < self.max_steps:
322
- await self.step(context)
323
-
324
- # Check for pending confirmations after each step
325
- if context.state.get("pending_tool_calls"):
326
- pending_calls = [
327
- PendingToolCall.model_validate(p)
328
- for p in context.state["pending_tool_calls"]
329
- ]
330
- return AgentResult(
331
- status="pending",
332
- context=context,
333
- pending_tool_calls=pending_calls,
334
- )
335
-
336
- # Check if the last event is a final response
337
- last_event = context.events[-1]
338
- if self._is_final_response(last_event):
339
- context.final_result = self._extract_final_result(last_event)
340
-
341
- return AgentResult(output=context.final_result, context=context)
342
- ```
343
-
344
- **Key Points:**
345
- - Accept `tool_confirmations` parameter
346
- - Store in context.state for processing
347
- - Return "pending" status when confirmations needed
348
- - Resume execution when confirmations provided
349
-
350
- **Live Coding**: Update run()
351
-
352
- ---
353
-
354
- #### Step 6: Testing Confirmation Workflow (2 min)
355
-
356
- **Create Dangerous Tool:**
357
- ```python
358
- from agent_framework import tool
359
-
360
- @tool(
361
- requires_confirmation=True,
362
- confirmation_message="Delete file '{arguments[filename]}'? This cannot be undone."
363
- )
364
- def delete_file(filename: str) -> str:
365
- """Delete a file from the filesystem."""
366
- import os
367
- os.remove(filename)
368
- return f"Deleted {filename}"
369
- ```
370
-
371
- **Test Workflow:**
372
- ```python
373
- from agent_framework import Agent, LlmClient, ToolConfirmation
374
-
375
- agent = Agent(
376
- model=LlmClient(model="gpt-4o-mini"),
377
- tools=[delete_file],
378
- instructions="Help manage files."
379
- )
380
-
381
- # First call - gets pending
382
- result = await agent.run("Delete the file named 'test.txt'")
383
- print(result.status) # "pending"
384
- print(result.pending_tool_calls[0].confirmation_message)
385
- # "Delete file 'test.txt'? This cannot be undone."
386
-
387
- # User approves
388
- confirmation = ToolConfirmation(
389
- tool_call_id=result.pending_tool_calls[0].tool_call.tool_call_id,
390
- approved=True
391
- )
392
-
393
- # Resume with confirmation
394
- result = await agent.run(
395
- "", # Empty because we're resuming
396
- context=result.context,
397
- tool_confirmations=[confirmation]
398
- )
399
- print(result.status) # "complete"
400
- print(result.output) # Agent's response about deletion
401
- ```
402
-
403
- **Test Rejection:**
404
- ```python
405
- # User rejects with reason
406
- confirmation = ToolConfirmation(
407
- tool_call_id=result.pending_tool_calls[0].tool_call.tool_call_id,
408
- approved=False,
409
- reason="I don't want to delete that file"
410
- )
411
-
412
- result = await agent.run("", context=result.context, tool_confirmations=[confirmation])
413
- # Agent sees the rejection reason and responds appropriately
414
- ```
415
-
416
- ---
417
-
418
- ### 5. Demo: Complete Agent (3 min)
419
-
420
- **Show:**
421
- - Calculator tool working (no confirmation)
422
- - Delete file tool pausing for confirmation
423
- - User approving/rejecting
424
- - Agent adapting to rejection
425
- - Multi-step reasoning with confirmations
426
-
427
- ---
428
-
429
- ### 6. Error Handling (2 min)
430
-
431
- **Tool Not Found:**
432
- ```python
433
- if tool_call.name not in tools_dict:
434
- raise ValueError(f"Tool '{tool_call.name}' not found")
435
- ```
436
-
437
- **Tool Execution Error:**
438
- ```python
439
- try:
440
- tool_response = await tool(context, **tool_call.arguments)
441
- status = "success"
442
- except Exception as e:
443
- tool_response = str(e)
444
- status = "error"
445
- ```
446
-
447
- **Confirmation Not Provided:**
448
- ```python
449
- if confirmation:
450
- reason = confirmation.reason or "Tool execution was rejected by user."
451
- else:
452
- reason = "Tool execution was not approved."
453
- ```
454
-
455
- **Best Practices:**
456
- - Always handle errors gracefully
457
- - Return error in ToolResult
458
- - Let agent continue reasoning
459
- - Provide clear rejection reasons
460
-
461
- ---
462
-
463
- ### 7. Next Steps (1 min)
464
-
465
- **Preview Episode 8:**
466
- - MCP integration
467
- - External tool servers
468
- - Tool discovery
469
-
470
- **What We Built:**
471
- - Complete agent with tools
472
- - Tool execution
473
- - Confirmation workflow
474
- - Error handling
475
-
476
- ---
477
-
478
- ## Key Takeaways
479
-
480
- 1. **act()** executes tools from LLM decisions
481
- 2. **Tool dispatch** maps names to instances
482
- 3. **requires_confirmation** pauses for user approval
483
- 4. **PendingToolCall** stores awaiting tools
484
- 5. **ToolConfirmation** carries user's decision
485
- 6. **_process_confirmations()** handles approved/rejected tools
486
- 7. **AgentResult.status** indicates execution state
487
-
488
- ---
489
-
490
- ## Common Mistakes
491
-
492
- **Mistake 1: Not handling tool errors**
493
- ```python
494
- # Wrong - crashes on error
495
- tool_response = await tool(context, **tool_call.arguments)
496
-
497
- # Right - handles gracefully
498
- try:
499
- tool_response = await tool(context, **tool_call.arguments)
500
- status = "success"
501
- except Exception as e:
502
- tool_response = str(e)
503
- status = "error"
504
- ```
505
-
506
- **Mistake 2: Forgetting to store pending calls**
507
- ```python
508
- # Wrong - pending calls lost
509
- if tool.requires_confirmation:
510
- pending_calls.append(...)
511
- # Missing: storing in context.state
512
-
513
- # Right - persists for later
514
- if pending_calls:
515
- context.state["pending_tool_calls"] = [p.model_dump() for p in pending_calls]
516
- ```
517
-
518
- **Mistake 3: Not clearing processed state**
519
- ```python
520
- # Wrong - processes same confirmations repeatedly
521
- confirmation_results = await self._process_confirmations(context)
522
-
523
- # Right - clears after processing
524
- del context.state["pending_tool_calls"]
525
- del context.state["tool_confirmations"]
526
- ```
527
-
528
- **Mistake 4: Ignoring modified arguments**
529
- ```python
530
- # Wrong - ignores user modifications
531
- output = await tool(context, **pending.tool_call.arguments)
532
-
533
- # Right - merges modifications
534
- arguments = {
535
- **pending.tool_call.arguments,
536
- **(confirmation.modified_arguments or {})
537
- }
538
- output = await tool(context, **arguments)
539
- ```
540
-
541
- ---
542
-
543
- ## Exercises
544
-
545
- 1. **Add Tool Timeout**: Implement a timeout for tool execution
546
- 2. **Implement Auto-Approve**: Add a flag to auto-approve tools for testing
547
- 3. **Add Confirmation Expiry**: Make pending confirmations expire after a timeout
548
- 4. **Create Approval Logger**: Log all confirmation decisions for audit
549
-
550
- ---
551
-
552
- ## Complete Confirmation Flow
553
-
554
- ```
555
- User: "Delete test.txt"
556
- |
557
- v
558
- Agent.run() -> Agent.step() -> Agent.act()
559
- |
560
- v
561
- act() sees requires_confirmation=True
562
- |
563
- v
564
- Creates PendingToolCall, stores in context.state
565
- |
566
- v
567
- Returns to run(), detects pending_tool_calls
568
- |
569
- v
570
- Returns AgentResult(status="pending", pending_tool_calls=[...])
571
- |
572
- v
573
- User sees confirmation message, submits ToolConfirmation
574
- |
575
- v
576
- Agent.run(context=..., tool_confirmations=[...])
577
- |
578
- v
579
- step() calls _process_confirmations()
580
- |
581
- v
582
- Executes approved tools, returns ToolResults
583
- |
584
- v
585
- Agent continues reasoning with results
586
- |
587
- v
588
- Returns AgentResult(status="complete", output="File deleted")
589
- ```
590
-
591
- ---
592
-
593
- **Previous Episode**: [Episode 6: Building the Tool System](./EPISODE_06_TOOL_SYSTEM.md)
594
- **Next Episode**: [Episode 8: MCP Integration](./EPISODE_08_MCP.md)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
misc/tutorials/EPISODE_08_MCP.md DELETED
@@ -1,304 +0,0 @@
1
- # Episode 8: MCP Integration
2
-
3
- **Duration**: 30 minutes
4
- **What to Build**: `agent_framework/mcp.py`
5
- **Target Audience**: Intermediate Python developers
6
-
7
- ---
8
-
9
- ## Episode Structure
10
-
11
- ### 1. Hook (2 min)
12
- **Show what we'll build:**
13
- - Loading tools from MCP servers
14
- - Tavily search integration
15
- - External tool discovery
16
-
17
- **Hook Statement**: "Today we'll integrate with MCP - a protocol that lets us discover and use tools from external servers. This opens up a whole ecosystem of tools!"
18
-
19
- ---
20
-
21
- ### 2. Problem (3 min)
22
- **Why do we need MCP?**
23
-
24
- **The Challenge:**
25
- - Tools live on separate servers
26
- - Need standard protocol
27
- - Want tool discovery
28
- - Don't want to hardcode tools
29
-
30
- **The Solution:**
31
- - Model Context Protocol (MCP)
32
- - Stdio communication
33
- - Tool discovery
34
- - Automatic wrapping
35
-
36
- ---
37
-
38
- ### 3. Concept: What is MCP? (5 min)
39
-
40
- **MCP (Model Context Protocol):**
41
- - Standard protocol for tool servers
42
- - JSON-RPC over stdio
43
- - Tool discovery
44
- - Tool execution
45
-
46
- **Benefits:**
47
- - Decouples tools from agents
48
- - Standard interface
49
- - Easy integration
50
- - Tool marketplace potential
51
-
52
- **Flow:**
53
- 1. Connect to MCP server
54
- 2. Discover available tools
55
- 3. Wrap as FunctionTool
56
- 4. Use in agent
57
-
58
- ---
59
-
60
- ### 4. Live Coding: Building MCP Integration (20 min)
61
-
62
- #### Step 1: Setup and Imports (2 min)
63
- ```python
64
- import os
65
- from typing import Dict, List
66
- from mcp import ClientSession, StdioServerParameters
67
- from mcp.client.stdio import stdio_client
68
-
69
- from .tools import BaseTool, FunctionTool
70
- ```
71
-
72
- **Dependencies:**
73
- - `mcp` package
74
- - Stdio client for communication
75
-
76
- **Live Coding**: Setup imports
77
-
78
- ---
79
-
80
- #### Step 2: Helper Function - Extract Text (2 min)
81
- ```python
82
- def _extract_text_content(result) -> str:
83
- """Extract text content from MCP tool result."""
84
- if not hasattr(result, 'content'):
85
- return str(result)
86
-
87
- texts = []
88
- for item in result.content:
89
- if hasattr(item, 'text'):
90
- texts.append(item.text)
91
- else:
92
- texts.append(str(item))
93
-
94
- return "\n\n".join(texts)
95
- ```
96
-
97
- **Purpose**: MCP returns structured content, we need text.
98
-
99
- **Live Coding**: Build extract function
100
-
101
- ---
102
-
103
- #### Step 3: load_mcp_tools() Function (6 min)
104
- ```python
105
- async def load_mcp_tools(connection: Dict) -> List[BaseTool]:
106
- """Load tools from an MCP server and convert to FunctionTools.
107
-
108
- Args:
109
- connection: Dictionary with connection parameters:
110
- - command: Command to run the MCP server
111
- - args: Arguments for the command
112
- - env: Environment variables (optional)
113
-
114
- Returns:
115
- List of BaseTool instances wrapping MCP tools
116
- """
117
- tools = []
118
-
119
- async with stdio_client(StdioServerParameters(**connection)) as (read, write):
120
- async with ClientSession(read, write) as session:
121
- await session.initialize()
122
- mcp_tools = await session.list_tools()
123
-
124
- for mcp_tool in mcp_tools.tools:
125
- func_tool = _create_mcp_tool(mcp_tool, connection)
126
- tools.append(func_tool)
127
-
128
- return tools
129
- ```
130
-
131
- **Key Points:**
132
- - Connects via stdio
133
- - Initializes session
134
- - Lists tools
135
- - Wraps each tool
136
-
137
- **Live Coding**: Build load_mcp_tools()
138
-
139
- ---
140
-
141
- #### Step 4: _create_mcp_tool() Function (8 min)
142
- ```python
143
- def _create_mcp_tool(mcp_tool, connection: Dict) -> FunctionTool:
144
- """Create a FunctionTool that wraps an MCP tool."""
145
-
146
- async def call_mcp(**kwargs):
147
- async with stdio_client(StdioServerParameters(**connection)) as (read, write):
148
- async with ClientSession(read, write) as session:
149
- await session.initialize()
150
- result = await session.call_tool(mcp_tool.name, kwargs)
151
- return _extract_text_content(result)
152
-
153
- tool_definition = {
154
- "type": "function",
155
- "function": {
156
- "name": mcp_tool.name,
157
- "description": mcp_tool.description,
158
- "parameters": mcp_tool.inputSchema,
159
- }
160
- }
161
-
162
- return FunctionTool(
163
- func=call_mcp,
164
- name=mcp_tool.name,
165
- description=mcp_tool.description,
166
- tool_definition=tool_definition
167
- )
168
- ```
169
-
170
- **Key Points:**
171
- - Creates wrapper function
172
- - Connects on each call
173
- - Uses MCP schema
174
- - Returns FunctionTool
175
-
176
- **Live Coding**: Build _create_mcp_tool()
177
-
178
- ---
179
-
180
- #### Step 5: Testing MCP Integration (2 min)
181
- ```python
182
- import os
183
- from agent_framework.mcp import load_mcp_tools
184
-
185
- connection = {
186
- "command": "npx",
187
- "args": ["-y", "tavily-mcp@latest"],
188
- "env": {"TAVILY_API_KEY": os.getenv("TAVILY_API_KEY")}
189
- }
190
-
191
- tools = await load_mcp_tools(connection)
192
- print(f"Loaded {len(tools)} tools")
193
- for tool in tools:
194
- print(f" - {tool.name}: {tool.description}")
195
- ```
196
-
197
- **Expected Output:**
198
- ```
199
- Loaded 5 tools
200
- - tavily_search: Search the web...
201
- - tavily_extract: Extract content from URLs...
202
- ...
203
- ```
204
-
205
- ---
206
-
207
- ### 5. Demo: Using MCP Tools (3 min)
208
-
209
- **Show:**
210
- - Loading Tavily tools
211
- - Using in agent
212
- - Web search working
213
- - Tool discovery
214
-
215
- ---
216
-
217
- ### 6. MCP Server Setup (2 min)
218
-
219
- **Tavily MCP Server:**
220
- ```bash
221
- # Install via npx
222
- npx -y tavily-mcp@latest
223
-
224
- # Or set up locally
225
- npm install -g tavily-mcp
226
- ```
227
-
228
- **Other MCP Servers:**
229
- - File system tools
230
- - Database tools
231
- - API tools
232
- - Custom servers
233
-
234
- ---
235
-
236
- ### 7. Next Steps (1 min)
237
-
238
- **Preview Episode 9:**
239
- - Session management
240
- - Memory optimization
241
- - Token management
242
-
243
- **What We Built:**
244
- - MCP integration
245
- - Tool discovery
246
- - External tool support
247
-
248
- ---
249
-
250
- ## Key Takeaways
251
-
252
- 1. **MCP** provides standard tool protocol
253
- 2. **Stdio communication** for tool servers
254
- 3. **Tool discovery** finds available tools
255
- 4. **Automatic wrapping** converts to FunctionTool
256
- 5. **Decoupled** tools from agents
257
-
258
- ---
259
-
260
- ## Common Mistakes
261
-
262
- **Mistake 1: Not handling connection errors**
263
- ```python
264
- # Wrong - crashes if server unavailable
265
- tools = await load_mcp_tools(connection)
266
-
267
- # Right - handle gracefully
268
- try:
269
- tools = await load_mcp_tools(connection)
270
- except Exception as e:
271
- print(f"Failed to load MCP tools: {e}")
272
- tools = []
273
- ```
274
-
275
- **Mistake 2: Forgetting environment variables**
276
- ```python
277
- # Wrong - missing API key
278
- connection = {
279
- "command": "npx",
280
- "args": ["-y", "tavily-mcp@latest"]
281
- }
282
-
283
- # Right - include env
284
- connection = {
285
- "command": "npx",
286
- "args": ["-y", "tavily-mcp@latest"],
287
- "env": {"TAVILY_API_KEY": os.getenv("TAVILY_API_KEY")}
288
- }
289
- ```
290
-
291
- ---
292
-
293
- ## Exercises
294
-
295
- 1. Add connection pooling
296
- 2. Implement tool caching
297
- 3. Add MCP server health checks
298
- 4. Create custom MCP server
299
-
300
- ---
301
-
302
- **Previous Episode**: [Episode 7: Tool Execution](./EPISODE_07_TOOL_EXECUTION.md)
303
- **Next Episode**: [Episode 9: Session & Memory Management](./EPISODE_09_MEMORY.md)
304
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
misc/tutorials/EPISODE_09_MEMORY.md DELETED
@@ -1,658 +0,0 @@
1
- # Episode 9: Session & Memory Management
2
-
3
- **Duration**: 40 minutes
4
- **What to Build**: `agent_framework/memory.py`, session integration in `agent.py`
5
- **Target Audience**: Intermediate Python developers
6
-
7
- ---
8
-
9
- ## Episode Structure
10
-
11
- ### 1. Hook (2 min)
12
- **Show what we'll build:**
13
- - Session persistence
14
- - Memory optimization
15
- - Token management
16
- - Full session integration with agent
17
-
18
- **Hook Statement**: "Today we'll add memory to our agent - it will remember conversations across multiple interactions and optimize token usage. This makes agents truly useful for real applications!"
19
-
20
- ---
21
-
22
- ### 2. Problem (3 min)
23
- **Why do we need memory management?**
24
-
25
- **The Challenge:**
26
- - Conversations get long
27
- - Token costs increase
28
- - Context windows limited
29
- - Need to remember across sessions
30
- - State must persist across agent runs
31
-
32
- **The Solution:**
33
- - Session persistence
34
- - Token counting
35
- - Memory optimization strategies
36
- - Full integration with agent loop
37
-
38
- ---
39
-
40
- ### 3. Concept: Memory Strategies (5 min)
41
-
42
- **Strategies:**
43
- 1. **Sliding Window**: Keep recent N messages
44
- 2. **Compaction**: Replace tool calls/results with references
45
- 3. **Summarization**: Compress old history with LLM
46
-
47
- **When to Use:**
48
- - Sliding Window: Simple, fast
49
- - Compaction: Tool-heavy conversations
50
- - Summarization: Very long conversations
51
-
52
- ---
53
-
54
- ### 4. Live Coding: Building Memory System (30 min)
55
-
56
- #### Step 1: Session Models (3 min)
57
- ```python
58
- # In agent_framework/models.py
59
- from pydantic import BaseModel, Field
60
- from datetime import datetime
61
- from typing import Any
62
-
63
- class Session(BaseModel):
64
- """Container for persistent conversation state."""
65
- session_id: str
66
- user_id: str | None = None
67
- events: list[Event] = Field(default_factory=list)
68
- state: dict[str, Any] = Field(default_factory=dict)
69
- created_at: datetime = Field(default_factory=datetime.now)
70
- updated_at: datetime = Field(default_factory=datetime.now)
71
- ```
72
-
73
- **Key Points:**
74
- - Stores events from conversation
75
- - Custom state for pending calls, etc.
76
- - Timestamps for tracking
77
-
78
- **Live Coding**: Build Session model
79
-
80
- ---
81
-
82
- #### Step 2: Session Manager (5 min)
83
- ```python
84
- # In agent_framework/models.py
85
- from abc import ABC, abstractmethod
86
-
87
- class BaseSessionManager(ABC):
88
- """Abstract base class for session management."""
89
-
90
- @abstractmethod
91
- async def create(self, session_id: str, user_id: str | None = None) -> Session:
92
- """Create a new session."""
93
- pass
94
-
95
- @abstractmethod
96
- async def get(self, session_id: str) -> Session | None:
97
- """Retrieve a session by ID. Returns None if not found."""
98
- pass
99
-
100
- @abstractmethod
101
- async def save(self, session: Session) -> None:
102
- """Persist session changes to storage."""
103
- pass
104
-
105
- async def get_or_create(self, session_id: str, user_id: str | None = None) -> Session:
106
- """Get existing session or create new one."""
107
- session = await self.get(session_id)
108
- if session is None:
109
- session = await self.create(session_id, user_id)
110
- return session
111
-
112
-
113
- class InMemorySessionManager(BaseSessionManager):
114
- """In-memory session storage for development and testing."""
115
-
116
- def __init__(self):
117
- self._sessions: dict[str, Session] = {}
118
-
119
- async def create(self, session_id: str, user_id: str | None = None) -> Session:
120
- """Create a new session."""
121
- if session_id in self._sessions:
122
- raise ValueError(f"Session {session_id} already exists")
123
-
124
- session = Session(session_id=session_id, user_id=user_id)
125
- self._sessions[session_id] = session
126
- return session
127
-
128
- async def get(self, session_id: str) -> Session | None:
129
- """Retrieve a session by ID."""
130
- return self._sessions.get(session_id)
131
-
132
- async def save(self, session: Session) -> None:
133
- """Save session to storage."""
134
- self._sessions[session.session_id] = session
135
- ```
136
-
137
- **Key Points:**
138
- - Abstract interface for flexibility
139
- - In-memory implementation for development
140
- - Easy to extend (database, Redis, etc.)
141
-
142
- **Live Coding**: Build session managers
143
-
144
- ---
145
-
146
- #### Step 3: Session Integration in Agent (8 min)
147
-
148
- **Update ExecutionContext:**
149
- ```python
150
- # In agent_framework/models.py
151
- @dataclass
152
- class ExecutionContext:
153
- """Central storage for all execution state."""
154
-
155
- execution_id: str = field(default_factory=lambda: str(uuid.uuid4()))
156
- events: List[Event] = field(default_factory=list)
157
- current_step: int = 0
158
- state: Dict[str, Any] = field(default_factory=dict)
159
- final_result: Optional[str | BaseModel] = None
160
- session_id: Optional[str] = None # NEW: Link to session for persistence
161
- ```
162
-
163
- **Update Agent.__init__:**
164
- ```python
165
- class Agent:
166
- def __init__(
167
- self,
168
- model: LlmClient,
169
- tools: List[BaseTool] = None,
170
- instructions: str = "",
171
- max_steps: int = 5,
172
- session_manager: BaseSessionManager | None = None # NEW
173
- ):
174
- # ... other init ...
175
- self.session_manager = session_manager or InMemorySessionManager()
176
- ```
177
-
178
- **Update Agent.run() with Full Session Support:**
179
- ```python
180
- async def run(
181
- self,
182
- user_input: str,
183
- context: ExecutionContext = None,
184
- session_id: Optional[str] = None,
185
- tool_confirmations: Optional[List[ToolConfirmation]] = None
186
- ) -> AgentResult:
187
- """Execute the agent with optional session support.
188
-
189
- Args:
190
- user_input: User's input message
191
- context: Optional execution context (creates new if None)
192
- session_id: Optional session ID for persistent conversations
193
- tool_confirmations: Optional list of tool confirmations for pending calls
194
- """
195
- # Load or create session if session_id is provided
196
- session = None
197
- if session_id and self.session_manager:
198
- session = await self.session_manager.get_or_create(session_id)
199
-
200
- # Load session data into context if context is new
201
- if context is None:
202
- context = ExecutionContext()
203
- # Restore events and state from session
204
- context.events = session.events.copy()
205
- context.state = session.state.copy()
206
- context.execution_id = session.session_id
207
- context.session_id = session_id
208
-
209
- if tool_confirmations:
210
- if context is None:
211
- context = ExecutionContext()
212
- context.state["tool_confirmations"] = [
213
- c.model_dump() for c in tool_confirmations
214
- ]
215
-
216
- # Create or reuse context
217
- if context is None:
218
- context = ExecutionContext()
219
-
220
- # Add user input as the first event
221
- user_event = Event(
222
- execution_id=context.execution_id,
223
- author="user",
224
- content=[Message(role="user", content=user_input)]
225
- )
226
- context.add_event(user_event)
227
-
228
- # Execute steps until completion or max steps reached
229
- while not context.final_result and context.current_step < self.max_steps:
230
- await self.step(context)
231
-
232
- # Check for pending confirmations after each step
233
- if context.state.get("pending_tool_calls"):
234
- pending_calls = [
235
- PendingToolCall.model_validate(p)
236
- for p in context.state["pending_tool_calls"]
237
- ]
238
- # Save session state before returning
239
- if session:
240
- session.events = context.events
241
- session.state = context.state
242
- await self.session_manager.save(session)
243
- return AgentResult(
244
- status="pending",
245
- context=context,
246
- pending_tool_calls=pending_calls,
247
- )
248
-
249
- # Check if the last event is a final response
250
- last_event = context.events[-1]
251
- if self._is_final_response(last_event):
252
- context.final_result = self._extract_final_result(last_event)
253
-
254
- # Save session after execution completes
255
- if session:
256
- session.events = context.events
257
- session.state = context.state
258
- await self.session_manager.save(session)
259
-
260
- return AgentResult(output=context.final_result, context=context)
261
- ```
262
-
263
- **Key Points:**
264
- - Load session at start of run()
265
- - Restore events and state from session
266
- - Set `context.session_id` for tracking
267
- - Save session before returning pending
268
- - Save session after completion
269
-
270
- **Live Coding**: Integrate session with agent
271
-
272
- ---
273
-
274
- #### Step 4: Token Counting (4 min)
275
- ```python
276
- # In agent_framework/memory.py
277
- import tiktoken
278
- import json
279
- from .llm import build_messages
280
- from .models import LlmRequest
281
-
282
- def count_tokens(request: LlmRequest, model_id: str = "gpt-4") -> int:
283
- """Calculate total token count of LlmRequest."""
284
- try:
285
- encoding = tiktoken.encoding_for_model(model_id)
286
- except KeyError:
287
- encoding = tiktoken.get_encoding("o200k_base")
288
-
289
- messages = build_messages(request)
290
- total_tokens = 0
291
-
292
- for message in messages:
293
- total_tokens += 4 # Per-message overhead
294
-
295
- if message.get("content"):
296
- total_tokens += len(encoding.encode(message["content"]))
297
-
298
- if message.get("tool_calls"):
299
- for tool_call in message["tool_calls"]:
300
- func = tool_call.get("function", {})
301
- if func.get("name"):
302
- total_tokens += len(encoding.encode(func["name"]))
303
- if func.get("arguments"):
304
- total_tokens += len(encoding.encode(func["arguments"]))
305
-
306
- if request.tools:
307
- for tool in request.tools:
308
- tool_def = tool.tool_definition
309
- total_tokens += len(encoding.encode(json.dumps(tool_def)))
310
-
311
- return total_tokens
312
- ```
313
-
314
- **Key Points:**
315
- - Uses tiktoken for accurate counting
316
- - Counts messages, tool calls, tools
317
- - Model-specific encoding
318
-
319
- **Live Coding**: Build token counting
320
-
321
- ---
322
-
323
- #### Step 5: Sliding Window (4 min)
324
- ```python
325
- # In agent_framework/memory.py
326
- from .models import Message
327
-
328
- def apply_sliding_window(
329
- context: ExecutionContext,
330
- request: LlmRequest,
331
- window_size: int = 20
332
- ) -> None:
333
- """Keep only the most recent N messages."""
334
- contents = request.contents
335
-
336
- # Find user message position
337
- user_message_idx = None
338
- for i, item in enumerate(contents):
339
- if isinstance(item, Message) and item.role == "user":
340
- user_message_idx = i
341
- break
342
-
343
- if user_message_idx is None:
344
- return
345
-
346
- # Preserve up to user message
347
- preserved = contents[:user_message_idx + 1]
348
-
349
- # Keep only the most recent N from remaining items
350
- remaining = contents[user_message_idx + 1:]
351
- if len(remaining) > window_size:
352
- remaining = remaining[-window_size:]
353
-
354
- request.contents = preserved + remaining
355
- ```
356
-
357
- **Key Points:**
358
- - Preserves user message
359
- - Keeps recent N items
360
- - Simple and fast
361
-
362
- **Live Coding**: Build sliding window
363
-
364
- ---
365
-
366
- #### Step 6: Compaction (4 min)
367
- ```python
368
- # In agent_framework/memory.py
369
- from .models import ToolCall, ToolResult
370
-
371
- TOOLRESULT_COMPACTION_RULES = {
372
- "read_file": "File content from {file_path}. Re-read if needed.",
373
- "search_web": "Search results processed. Query: {query}. Re-search if needed.",
374
- }
375
-
376
- def apply_compaction(context: ExecutionContext, request: LlmRequest) -> None:
377
- """Compress tool calls and results into reference messages."""
378
- tool_call_args = {}
379
- compacted = []
380
-
381
- for item in request.contents:
382
- if isinstance(item, ToolCall):
383
- tool_call_args[item.tool_call_id] = item.arguments
384
- compacted.append(item) # Keep tool calls
385
-
386
- elif isinstance(item, ToolResult):
387
- if item.name in TOOLRESULT_COMPACTION_RULES:
388
- args = tool_call_args.get(item.tool_call_id, {})
389
- template = TOOLRESULT_COMPACTION_RULES[item.name]
390
- compressed_content = template.format(
391
- file_path=args.get("file_path", "unknown"),
392
- query=args.get("query", "unknown")
393
- )
394
- compacted.append(ToolResult(
395
- tool_call_id=item.tool_call_id,
396
- name=item.name,
397
- status=item.status,
398
- content=[compressed_content]
399
- ))
400
- else:
401
- compacted.append(item)
402
- else:
403
- compacted.append(item)
404
-
405
- request.contents = compacted
406
- ```
407
-
408
- **Key Points:**
409
- - Replaces tool results with references
410
- - Configurable rules
411
- - Reduces token count
412
-
413
- **Live Coding**: Build compaction
414
-
415
- ---
416
-
417
- #### Step 7: Optimizer Callback (3 min)
418
- ```python
419
- # In agent_framework/callbacks.py
420
- import inspect
421
- from typing import Callable, Optional
422
- from .models import ExecutionContext
423
- from .llm import LlmRequest, LlmResponse
424
- from .memory import count_tokens
425
-
426
- def create_optimizer_callback(
427
- apply_optimization: Callable,
428
- threshold: int = 50000,
429
- model_id: str = "gpt-4"
430
- ) -> Callable:
431
- """Factory for optimization callbacks."""
432
- async def callback(
433
- context: ExecutionContext,
434
- request: LlmRequest
435
- ) -> Optional[LlmResponse]:
436
- token_count = count_tokens(request, model_id=model_id)
437
-
438
- if token_count < threshold:
439
- return None
440
-
441
- result = apply_optimization(context, request)
442
- if inspect.isawaitable(result):
443
- await result
444
- return None
445
-
446
- return callback
447
- ```
448
-
449
- **Key Points:**
450
- - Factory function
451
- - Checks threshold
452
- - Supports sync/async
453
-
454
- **Live Coding**: Build callback system
455
-
456
- ---
457
-
458
- ### 5. Testing Session Integration (3 min)
459
-
460
- **Test Session Persistence:**
461
- ```python
462
- import asyncio
463
- from agent_framework import Agent, LlmClient, InMemorySessionManager
464
- from agent_tools import calculator
465
-
466
- # Create a shared session manager
467
- session_manager = InMemorySessionManager()
468
-
469
- # Create agent with session support
470
- agent = Agent(
471
- model=LlmClient(model="gpt-4o-mini"),
472
- tools=[calculator],
473
- instructions="You are a helpful assistant.",
474
- session_manager=session_manager
475
- )
476
-
477
- session_id = "user-123"
478
-
479
- # First conversation - introduce yourself
480
- result1 = await agent.run(
481
- "Hi! My name is Alice and I'm a software engineer.",
482
- session_id=session_id
483
- )
484
- print(f"Response 1: {result1.output}")
485
- print(f"Events: {len(result1.context.events)}")
486
-
487
- # Second conversation - continue
488
- result2 = await agent.run(
489
- "What's 1234 * 5678?",
490
- session_id=session_id
491
- )
492
- print(f"Response 2: {result2.output}")
493
- print(f"Events: {len(result2.context.events)}") # Should include previous events!
494
-
495
- # Third conversation - test memory
496
- result3 = await agent.run(
497
- "What's my name and what do I do for work?",
498
- session_id=session_id
499
- )
500
- print(f"Response 3: {result3.output}")
501
- # Should remember: "Your name is Alice and you're a software engineer."
502
-
503
- # Different session - should NOT remember
504
- result4 = await agent.run(
505
- "What's my name?",
506
- session_id="different-user"
507
- )
508
- print(f"Response 4: {result4.output}")
509
- # Should say it doesn't know
510
- ```
511
-
512
- **Test Session Isolation:**
513
- ```python
514
- # Check stored sessions
515
- print("Session Storage Summary:")
516
- for sid, session in session_manager._sessions.items():
517
- print(f"Session ID: {session.session_id}")
518
- print(f" Events: {len(session.events)}")
519
- print(f" State keys: {list(session.state.keys())}")
520
- ```
521
-
522
- ---
523
-
524
- ### 6. Demo: Memory in Action (3 min)
525
-
526
- **Show:**
527
- - Session persistence across runs
528
- - Token counting
529
- - Sliding window optimization
530
- - Long conversation handling
531
- - Session isolation between users
532
-
533
- ---
534
-
535
- ### 7. Next Steps (1 min)
536
-
537
- **Preview Episode 10:**
538
- - Web deployment
539
- - FastAPI backend
540
- - Frontend interface
541
-
542
- **What We Built:**
543
- - Session model and managers
544
- - Full session integration with Agent.run()
545
- - Token counting
546
- - Memory optimization strategies
547
-
548
- ---
549
-
550
- ## Key Takeaways
551
-
552
- 1. **Sessions** persist conversations across multiple run() calls
553
- 2. **session_id** links context to session
554
- 3. **Events and state** are restored from session
555
- 4. **Session saves** on pending and completion
556
- 5. **Token counting** tracks usage
557
- 6. **Optimization strategies** reduce costs
558
-
559
- ---
560
-
561
- ## Common Mistakes
562
-
563
- **Mistake 1: Not loading session state**
564
- ```python
565
- # Wrong - creates empty context
566
- if context is None:
567
- context = ExecutionContext()
568
-
569
- # Right - loads from session
570
- if context is None:
571
- context = ExecutionContext()
572
- context.events = session.events.copy()
573
- context.state = session.state.copy()
574
- ```
575
-
576
- **Mistake 2: Not saving before pending return**
577
- ```python
578
- # Wrong - loses state on pending
579
- if context.state.get("pending_tool_calls"):
580
- return AgentResult(status="pending", ...)
581
-
582
- # Right - saves before returning
583
- if context.state.get("pending_tool_calls"):
584
- if session:
585
- session.events = context.events
586
- session.state = context.state
587
- await self.session_manager.save(session)
588
- return AgentResult(status="pending", ...)
589
- ```
590
-
591
- **Mistake 3: Mutating session directly**
592
- ```python
593
- # Wrong - might not persist
594
- session.events.append(new_event)
595
-
596
- # Right - copy and save
597
- session.events = context.events # Full replacement
598
- await self.session_manager.save(session)
599
- ```
600
-
601
- ---
602
-
603
- ## Exercises
604
-
605
- 1. **Implement Database Session Manager**: Create a PostgreSQL or SQLite session manager
606
- 2. **Add Session Expiry**: Auto-delete sessions after 24 hours of inactivity
607
- 3. **Build Token Dashboard**: Track token usage per session
608
- 4. **Add Summarization Strategy**: Use LLM to summarize old history
609
-
610
- ---
611
-
612
- ## Complete Session Flow
613
-
614
- ```
615
- User Request with session_id="user-123"
616
- |
617
- v
618
- Agent.run(session_id="user-123")
619
- |
620
- v
621
- session_manager.get_or_create("user-123")
622
- |
623
- v
624
- [New Session?] --Yes--> Create empty Session
625
- | |
626
- No |
627
- | v
628
- v context = ExecutionContext()
629
- Load existing Session |
630
- | v
631
- v context.events = []
632
- context = ExecutionContext()
633
- context.events = session.events.copy()
634
- context.state = session.state.copy()
635
- |
636
- v
637
- Execute agent loop (step, step, ...)
638
- |
639
- +--[Pending?]--Yes--> Save session, return pending
640
- |
641
- No
642
- |
643
- v
644
- Complete execution
645
- |
646
- v
647
- session.events = context.events
648
- session.state = context.state
649
- await session_manager.save(session)
650
- |
651
- v
652
- Return AgentResult(status="complete")
653
- ```
654
-
655
- ---
656
-
657
- **Previous Episode**: [Episode 8: MCP Integration](./EPISODE_08_MCP.md)
658
- **Next Episode**: [Episode 10: Web Deployment](./EPISODE_10_WEB_DEPLOYMENT.md)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
misc/tutorials/EPISODE_10_WEB_DEPLOYMENT.md DELETED
@@ -1,425 +0,0 @@
1
- # Episode 10: Web Deployment
2
-
3
- **Duration**: 35 minutes
4
- **What to Build**: `web_app/app.py`, `web_app/static/index.html`
5
- **Target Audience**: Intermediate Python developers
6
-
7
- ---
8
-
9
- ## Episode Structure
10
-
11
- ### 1. Hook (2 min)
12
- **Show what we'll build:**
13
- - Complete web application
14
- - Chat interface
15
- - File uploads
16
- - Session management
17
-
18
- **Hook Statement**: "Today we'll deploy our agent as a web application. This is the final piece - making it accessible to users through a beautiful interface!"
19
-
20
- ---
21
-
22
- ### 2. Problem (3 min)
23
- **Why deploy as a web app?**
24
-
25
- **The Challenge:**
26
- - Command-line isn't user-friendly
27
- - Need file uploads
28
- - Want real-time interaction
29
- - Need session management UI
30
-
31
- **The Solution:**
32
- - FastAPI backend
33
- - Modern frontend
34
- - RESTful API
35
- - Static file serving
36
-
37
- ---
38
-
39
- ### 3. Concept: Web Architecture (5 min)
40
-
41
- **Architecture:**
42
- ```
43
- Frontend (HTML/CSS/JS)
44
- ↓ HTTP Requests
45
- FastAPI Backend
46
-
47
- Agent Framework
48
-
49
- LLM API
50
- ```
51
-
52
- **Components:**
53
- - Backend: FastAPI server
54
- - Frontend: Static HTML/CSS/JS
55
- - API: RESTful endpoints
56
- - File handling: Upload directory
57
-
58
- ---
59
-
60
- ### 4. Live Coding: Building Web App (25 min)
61
-
62
- #### Step 1: FastAPI Setup (3 min)
63
- ```python
64
- from fastapi import FastAPI, UploadFile, File, HTTPException
65
- from fastapi.staticfiles import StaticFiles
66
- from fastapi.responses import HTMLResponse
67
- from fastapi.middleware.cors import CORSMiddleware
68
- from pydantic import BaseModel
69
- from pathlib import Path
70
- import uuid
71
-
72
- app = FastAPI(title="Agent Chat")
73
-
74
- # Enable CORS
75
- app.add_middleware(
76
- CORSMiddleware,
77
- allow_origins=["*"],
78
- allow_credentials=True,
79
- allow_methods=["*"],
80
- allow_headers=["*"],
81
- )
82
-
83
- # Serve static files
84
- app.mount("/static", StaticFiles(directory="static"), name="static")
85
- ```
86
-
87
- **Key Points:**
88
- - FastAPI app
89
- - CORS enabled
90
- - Static file serving
91
-
92
- **Live Coding**: Setup FastAPI
93
-
94
- ---
95
-
96
- #### Step 2: Agent Creation (2 min)
97
- ```python
98
- from agent_framework import Agent, LlmClient, InMemorySessionManager
99
- from agent_tools import calculator, search_web, read_file
100
-
101
- session_manager = InMemorySessionManager()
102
- UPLOAD_DIR = Path("uploads")
103
- UPLOAD_DIR.mkdir(exist_ok=True)
104
-
105
- def create_agent(use_session: bool = True) -> Agent:
106
- """Create an agent instance."""
107
- return Agent(
108
- model=LlmClient(model="gpt-4o-mini"),
109
- tools=[calculator, search_web, read_file],
110
- instructions="You are a helpful assistant.",
111
- max_steps=10,
112
- session_manager=session_manager if use_session else None
113
- )
114
- ```
115
-
116
- **Key Points:**
117
- - Shared session manager
118
- - Configurable tools
119
- - Session toggle
120
-
121
- **Live Coding**: Build agent creation
122
-
123
- ---
124
-
125
- #### Step 3: API Models (2 min)
126
- ```python
127
- class ChatRequest(BaseModel):
128
- message: str
129
- session_id: Optional[str] = None
130
- use_session: bool = True
131
-
132
- class ChatResponse(BaseModel):
133
- response: str
134
- session_id: str
135
- events_count: int
136
- tools_used: List[str]
137
- trace: str
138
- ```
139
-
140
- **Key Points:**
141
- - Request/response models
142
- - Session support
143
- - Trace included
144
-
145
- **Live Coding**: Build API models
146
-
147
- ---
148
-
149
- #### Step 4: Chat Endpoint (5 min)
150
- ```python
151
- @app.post("/api/chat")
152
- async def chat(request: ChatRequest) -> ChatResponse:
153
- """Send a message to the agent."""
154
- session_id = request.session_id or str(uuid.uuid4())
155
- agent = create_agent(use_session=request.use_session)
156
-
157
- try:
158
- if request.use_session:
159
- result = await agent.run(request.message, session_id=session_id)
160
- else:
161
- result = await agent.run(request.message)
162
-
163
- # Extract tools used
164
- tools_used = []
165
- for event in result.context.events:
166
- for item in event.content:
167
- if hasattr(item, 'name') and item.type == "tool_call":
168
- if item.name not in tools_used:
169
- tools_used.append(item.name)
170
-
171
- # Format trace
172
- from agent_framework import format_trace
173
- trace_output = format_trace(result.context)
174
-
175
- return ChatResponse(
176
- response=str(result.output) if result.output else "No response.",
177
- session_id=session_id,
178
- events_count=len(result.context.events),
179
- tools_used=tools_used,
180
- trace=trace_output
181
- )
182
- except Exception as e:
183
- raise HTTPException(status_code=500, detail=str(e))
184
- ```
185
-
186
- **Key Points:**
187
- - Handles sessions
188
- - Extracts tools
189
- - Formats trace
190
- - Error handling
191
-
192
- **Live Coding**: Build chat endpoint
193
-
194
- ---
195
-
196
- #### Step 5: File Upload Endpoint (3 min)
197
- ```python
198
- @app.post("/api/upload")
199
- async def upload_file(file: UploadFile = File(...)):
200
- """Upload a file."""
201
- file_path = UPLOAD_DIR / file.filename
202
-
203
- with open(file_path, "wb") as f:
204
- content = await file.read()
205
- f.write(content)
206
-
207
- return {"filename": file.filename, "path": str(file_path)}
208
- ```
209
-
210
- **Key Points:**
211
- - Saves to uploads directory
212
- - Returns file info
213
-
214
- **Live Coding**: Build upload endpoint
215
-
216
- ---
217
-
218
- #### Step 6: Frontend HTML Structure (5 min)
219
- ```html
220
- <!DOCTYPE html>
221
- <html>
222
- <head>
223
- <title>Agent Chat</title>
224
- <link rel="stylesheet" href="/static/style.css">
225
- </head>
226
- <body>
227
- <div class="container">
228
- <div class="sidebar">
229
- <h2>Tools</h2>
230
- <div id="tools-list"></div>
231
-
232
- <h2>Files</h2>
233
- <input type="file" id="file-input">
234
- <div id="files-list"></div>
235
- </div>
236
-
237
- <div class="chat-area">
238
- <div class="chat-header">
239
- <h1>Agent Chat</h1>
240
- <button id="trace-btn">View Trace</button>
241
- </div>
242
- <div id="messages"></div>
243
- <div class="input-area">
244
- <input type="text" id="message-input" placeholder="Type a message...">
245
- <button id="send-btn">Send</button>
246
- </div>
247
- </div>
248
- </div>
249
-
250
- <script src="/static/script.js"></script>
251
- </body>
252
- </html>
253
- ```
254
-
255
- **Key Points:**
256
- - Sidebar for tools/files
257
- - Chat area
258
- - Input area
259
- - Trace button
260
-
261
- **Live Coding**: Build HTML structure
262
-
263
- ---
264
-
265
- #### Step 7: Frontend JavaScript (5 min)
266
- ```javascript
267
- let sessionId = null;
268
-
269
- async function sendMessage() {
270
- const input = document.getElementById('message-input');
271
- const message = input.value;
272
- if (!message) return;
273
-
274
- // Add user message to UI
275
- addMessage('user', message);
276
- input.value = '';
277
-
278
- // Send to API
279
- const response = await fetch('/api/chat', {
280
- method: 'POST',
281
- headers: {'Content-Type': 'application/json'},
282
- body: JSON.stringify({
283
- message: message,
284
- session_id: sessionId,
285
- use_session: true
286
- })
287
- });
288
-
289
- const data = await response.json();
290
- sessionId = data.session_id;
291
-
292
- // Add agent response
293
- addMessage('assistant', data.response);
294
- }
295
-
296
- function addMessage(role, content) {
297
- const messages = document.getElementById('messages');
298
- const div = document.createElement('div');
299
- div.className = `message ${role}`;
300
- div.textContent = content;
301
- messages.appendChild(div);
302
- messages.scrollTop = messages.scrollHeight;
303
- }
304
-
305
- document.getElementById('send-btn').addEventListener('click', sendMessage);
306
- document.getElementById('message-input').addEventListener('keypress', (e) => {
307
- if (e.key === 'Enter') sendMessage();
308
- });
309
- ```
310
-
311
- **Key Points:**
312
- - Session management
313
- - Message sending
314
- - UI updates
315
-
316
- **Live Coding**: Build JavaScript
317
-
318
- ---
319
-
320
- ### 5. Demo: Complete Web App (3 min)
321
-
322
- **Show:**
323
- - Chat interface
324
- - File upload
325
- - Tool listing
326
- - Trace display
327
- - Session persistence
328
-
329
- ---
330
-
331
- ### 6. Deployment Tips (2 min)
332
-
333
- **Development:**
334
- ```bash
335
- uvicorn web_app.app:app --reload
336
- ```
337
-
338
- **Production:**
339
- ```bash
340
- uvicorn web_app.app:app --host 0.0.0.0 --port 8000
341
- ```
342
-
343
- **Docker:**
344
- ```dockerfile
345
- FROM python:3.11
346
- WORKDIR /app
347
- COPY . .
348
- RUN pip install -r requirements.txt
349
- CMD ["uvicorn", "web_app.app:app", "--host", "0.0.0.0", "--port", "8000"]
350
- ```
351
-
352
- ---
353
-
354
- ### 7. Next Steps (1 min)
355
-
356
- **What We Built:**
357
- - Complete web application
358
- - Full agent framework
359
- - Production-ready system
360
-
361
- **Future Enhancements:**
362
- - WebSocket for streaming
363
- - User authentication
364
- - Database sessions
365
- - Monitoring and logging
366
-
367
- ---
368
-
369
- ## Key Takeaways
370
-
371
- 1. **FastAPI** provides easy API creation
372
- 2. **Static files** for frontend
373
- 3. **RESTful API** for communication
374
- 4. **Session management** via API
375
- 5. **File handling** for uploads
376
-
377
- ---
378
-
379
- ## Common Mistakes
380
-
381
- **Mistake 1: Not handling CORS**
382
- ```python
383
- # Wrong - CORS errors
384
- app = FastAPI()
385
-
386
- # Right - CORS enabled
387
- app.add_middleware(CORSMiddleware, allow_origins=["*"])
388
- ```
389
-
390
- **Mistake 2: Not serving static files**
391
- ```python
392
- # Wrong - can't load CSS/JS
393
- app = FastAPI()
394
-
395
- # Right - serve static files
396
- app.mount("/static", StaticFiles(directory="static"), name="static")
397
- ```
398
-
399
- ---
400
-
401
- ## Exercises
402
-
403
- 1. Add WebSocket support
404
- 2. Implement user authentication
405
- 3. Add database session storage
406
- 4. Create admin dashboard
407
-
408
- ---
409
-
410
- **Previous Episode**: [Episode 9: Session & Memory Management](./EPISODE_09_MEMORY.md)
411
- **Series Complete!** 🎉
412
-
413
- ---
414
-
415
- ## Series Summary
416
-
417
- You've built:
418
- - Complete agent framework
419
- - Tool system
420
- - MCP integration
421
- - Memory management
422
- - Web deployment
423
-
424
- **Congratulations on completing the series!**
425
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
misc/tutorials/EXERCISES.md DELETED
@@ -1,991 +0,0 @@
1
- # Exercises and Challenges
2
-
3
- This document contains exercises for each episode to reinforce learning. Exercises are designed to build incrementally toward the actual codebase implementation.
4
-
5
- ---
6
-
7
- ## Episode 1: Python Foundations
8
-
9
- ### Exercise 1: Pydantic Model
10
- Create a `User` model with:
11
- - `name`: string (required)
12
- - `email`: string (required, must contain "@")
13
- - `age`: integer (optional, must be >= 0)
14
- - `is_active`: boolean (default: True)
15
-
16
- **Solution:**
17
- ```python
18
- from pydantic import BaseModel, field_validator
19
-
20
- class User(BaseModel):
21
- name: str
22
- email: str
23
- age: int | None = None
24
- is_active: bool = True
25
-
26
- @field_validator('email')
27
- def validate_email(cls, v):
28
- if '@' not in v:
29
- raise ValueError('Email must contain @')
30
- return v
31
-
32
- @field_validator('age')
33
- def validate_age(cls, v):
34
- if v is not None and v < 0:
35
- raise ValueError('Age must be >= 0')
36
- return v
37
- ```
38
-
39
- ### Exercise 2: Dataclass with Methods
40
- Create a `ShoppingCart` dataclass with:
41
- - `items`: list of strings (default: empty list)
42
- - `total`: float (default: 0.0)
43
- - Methods: `add_item(name, price)`, `get_total()`
44
-
45
- **Solution:**
46
- ```python
47
- from dataclasses import dataclass, field
48
-
49
- @dataclass
50
- class ShoppingCart:
51
- items: list[dict] = field(default_factory=list)
52
- total: float = 0.0
53
-
54
- def add_item(self, name: str, price: float):
55
- self.items.append({"name": name, "price": price})
56
- self.total += price
57
-
58
- def get_total(self) -> float:
59
- return self.total
60
- ```
61
-
62
- ### Exercise 3: Async Parallel Calls
63
- Write a function that makes 5 concurrent API calls and returns all results.
64
-
65
- **Solution:**
66
- ```python
67
- import asyncio
68
-
69
- async def call_api(id: int) -> str:
70
- await asyncio.sleep(1) # Simulate API call
71
- return f"Result {id}"
72
-
73
- async def parallel_calls():
74
- results = await asyncio.gather(
75
- call_api(1),
76
- call_api(2),
77
- call_api(3),
78
- call_api(4),
79
- call_api(5)
80
- )
81
- return results
82
-
83
- # Run it
84
- results = asyncio.run(parallel_calls())
85
- print(results)
86
- ```
87
-
88
- ---
89
-
90
- ## Episode 2: Your First LLM Call
91
-
92
- ### Exercise 1: Retry with Backoff
93
- Implement exponential backoff for rate limit errors.
94
-
95
- **Solution:**
96
- ```python
97
- import asyncio
98
- from litellm import acompletion
99
- from litellm.exceptions import RateLimitError
100
-
101
- async def call_with_retry(messages: list, max_retries: int = 3):
102
- delay = 1
103
- for attempt in range(max_retries):
104
- try:
105
- response = await acompletion(
106
- model="gpt-4o-mini",
107
- messages=messages
108
- )
109
- return response.choices[0].message.content
110
- except RateLimitError:
111
- if attempt < max_retries - 1:
112
- await asyncio.sleep(delay)
113
- delay *= 2 # Exponential backoff
114
- else:
115
- raise
116
- ```
117
-
118
- ### Exercise 2: Streaming Responses
119
- Implement streaming for real-time output.
120
-
121
- **Solution:**
122
- ```python
123
- from litellm import acompletion
124
-
125
- async def stream_response(prompt: str):
126
- messages = [{"role": "user", "content": prompt}]
127
-
128
- response = await acompletion(
129
- model="gpt-4o-mini",
130
- messages=messages,
131
- stream=True
132
- )
133
-
134
- async for chunk in response:
135
- if chunk.choices[0].delta.content:
136
- yield chunk.choices[0].delta.content
137
-
138
- # Usage
139
- async def main():
140
- async for chunk in stream_response("Tell me a story"):
141
- print(chunk, end='', flush=True)
142
-
143
- asyncio.run(main())
144
- ```
145
-
146
- ### Exercise 3: Temperature Experiment
147
- Call the LLM 5 times with temperature=0 and 5 times with temperature=1. Compare outputs.
148
-
149
- **Solution:**
150
- ```python
151
- async def temperature_experiment(prompt: str):
152
- results_temp0 = []
153
- results_temp1 = []
154
-
155
- for _ in range(5):
156
- response = await acompletion(
157
- model="gpt-4o-mini",
158
- messages=[{"role": "user", "content": prompt}],
159
- temperature=0
160
- )
161
- results_temp0.append(response.choices[0].message.content)
162
-
163
- for _ in range(5):
164
- response = await acompletion(
165
- model="gpt-4o-mini",
166
- messages=[{"role": "user", "content": prompt}],
167
- temperature=1
168
- )
169
- results_temp1.append(response.choices[0].message.content)
170
-
171
- print("Temperature 0 (deterministic):")
172
- for r in results_temp0:
173
- print(f" {r}")
174
-
175
- print("\nTemperature 1 (creative):")
176
- for r in results_temp1:
177
- print(f" {r}")
178
- ```
179
-
180
- ---
181
-
182
- ## Episode 3: Core Data Models
183
-
184
- ### Exercise 1: Build ToolConfirmation Model
185
- Create the `ToolConfirmation` model that captures a user's decision on a pending tool call. It should have:
186
- - `tool_call_id`: string (required) - links to the pending tool call
187
- - `approved`: boolean (required) - whether user approved
188
- - `modified_arguments`: optional dict - if user wants to change arguments
189
- - `reason`: optional string - reason for rejection
190
-
191
- **Solution:**
192
- ```python
193
- from pydantic import BaseModel
194
-
195
- class ToolConfirmation(BaseModel):
196
- """User's decision on a pending tool call."""
197
-
198
- tool_call_id: str
199
- approved: bool
200
- modified_arguments: dict | None = None
201
- reason: str | None = None # Reason for rejection
202
- ```
203
-
204
- ### Exercise 2: Build PendingToolCall Model
205
- Create the `PendingToolCall` model that wraps a ToolCall awaiting confirmation:
206
- - `tool_call`: ToolCall (required) - the original tool call
207
- - `confirmation_message`: string (required) - message to show user
208
-
209
- **Solution:**
210
- ```python
211
- class PendingToolCall(BaseModel):
212
- """A tool call awaiting user confirmation."""
213
-
214
- tool_call: ToolCall # Assumes ToolCall is already defined
215
- confirmation_message: str
216
- ```
217
-
218
- ### Exercise 3: Add Validation to ToolConfirmation
219
- Add a validator that requires `reason` when `approved=False`.
220
-
221
- **Solution:**
222
- ```python
223
- from pydantic import BaseModel, model_validator
224
-
225
- class ToolConfirmation(BaseModel):
226
- """User's decision on a pending tool call."""
227
-
228
- tool_call_id: str
229
- approved: bool
230
- modified_arguments: dict | None = None
231
- reason: str | None = None
232
-
233
- @model_validator(mode='after')
234
- def validate_reason_on_rejection(self):
235
- if not self.approved and not self.reason:
236
- raise ValueError('reason is required when approved=False')
237
- return self
238
- ```
239
-
240
- ### Exercise 4: Extract Pending Calls Helper
241
- Create a helper function to extract pending tool calls from ExecutionContext state.
242
-
243
- **Solution:**
244
- ```python
245
- from typing import List
246
-
247
- def extract_pending_calls(context: ExecutionContext) -> List[PendingToolCall]:
248
- """Extract all pending tool calls from context state."""
249
- raw_pending = context.state.get("pending_tool_calls", [])
250
- return [PendingToolCall.model_validate(p) for p in raw_pending]
251
- ```
252
-
253
- ---
254
-
255
- ## Episode 4: The LLM Client
256
-
257
- ### Exercise 1: Add Streaming Support
258
- Add streaming support to `LlmClient`.
259
-
260
- **Solution:**
261
- ```python
262
- async def generate_streaming(self, request: LlmRequest):
263
- """Generate streaming response from LLM."""
264
- messages = self._build_messages(request)
265
-
266
- response = await acompletion(
267
- model=self.model,
268
- messages=messages,
269
- stream=True
270
- )
271
-
272
- async for chunk in response:
273
- if chunk.choices[0].delta.content:
274
- yield chunk.choices[0].delta.content
275
- ```
276
-
277
- ### Exercise 2: Response Caching
278
- Implement response caching based on request hash.
279
-
280
- **Solution:**
281
- ```python
282
- import hashlib
283
- import json
284
-
285
- class LlmClient:
286
- def __init__(self, model: str, cache: dict = None, **config):
287
- self.model = model
288
- self.config = config
289
- self.cache = cache or {}
290
-
291
- def _get_cache_key(self, request: LlmRequest) -> str:
292
- """Generate cache key from request."""
293
- data = {
294
- "model": self.model,
295
- "instructions": request.instructions,
296
- "contents": [c.model_dump() for c in request.contents]
297
- }
298
- return hashlib.md5(json.dumps(data, sort_keys=True).encode()).hexdigest()
299
-
300
- async def generate(self, request: LlmRequest) -> LlmResponse:
301
- cache_key = self._get_cache_key(request)
302
- if cache_key in self.cache:
303
- return self.cache[cache_key]
304
-
305
- response = await self._generate_uncached(request)
306
- self.cache[cache_key] = response
307
- return response
308
- ```
309
-
310
- ---
311
-
312
- ## Episode 5: The Basic Agent Loop
313
-
314
- ### Exercise 1: Add Verbose Logging
315
- Add verbose logging to show agent thinking process.
316
-
317
- **Solution:**
318
- ```python
319
- import logging
320
-
321
- class Agent:
322
- def __init__(self, ..., verbose: bool = False):
323
- # ... existing code ...
324
- self.verbose = verbose
325
- if verbose:
326
- logging.basicConfig(level=logging.INFO)
327
-
328
- async def step(self, context: ExecutionContext):
329
- if self.verbose:
330
- logging.info(f"Step {context.current_step + 1}: Thinking...")
331
-
332
- llm_request = self._prepare_llm_request(context)
333
- llm_response = await self.think(llm_request)
334
-
335
- if self.verbose:
336
- logging.info(f"Step {context.current_step + 1}: Got response")
337
- for item in llm_response.content:
338
- if isinstance(item, Message):
339
- logging.info(f" Message: {item.content[:100]}")
340
- ```
341
-
342
- ### Exercise 2: Step-by-Step Trace
343
- Implement a method to display step-by-step trace.
344
-
345
- **Solution:**
346
- ```python
347
- def display_step_trace(self, context: ExecutionContext, step: int):
348
- """Display trace for a specific step."""
349
- if step >= len(context.events):
350
- print(f"Step {step} does not exist")
351
- return
352
-
353
- event = context.events[step]
354
- print(f"\n{'='*60}")
355
- print(f"Step {step + 1} - {event.author.upper()}")
356
- print(f"{'='*60}")
357
-
358
- for item in event.content:
359
- if isinstance(item, Message):
360
- print(f"[Message] {item.role}: {item.content}")
361
- elif isinstance(item, ToolCall):
362
- print(f"[Tool Call] {item.name}({item.arguments})")
363
- elif isinstance(item, ToolResult):
364
- print(f"[Tool Result] {item.name}: {item.status}")
365
- ```
366
-
367
- ---
368
-
369
- ## Episode 6: Building the Tool System
370
-
371
- ### Exercise 1: Add requires_confirmation to a Tool
372
- Create a `delete_file` tool that requires confirmation before execution.
373
-
374
- **Solution:**
375
- ```python
376
- from agent_framework import tool
377
-
378
- @tool(
379
- requires_confirmation=True,
380
- confirmation_message="Delete file '{arguments[filename]}'? This cannot be undone."
381
- )
382
- def delete_file(filename: str) -> str:
383
- """Delete a file from the filesystem."""
384
- import os
385
- os.remove(filename)
386
- return f"Deleted {filename}"
387
-
388
- # Test it
389
- print(delete_file.requires_confirmation) # True
390
- print(delete_file.get_confirmation_message({"filename": "test.txt"}))
391
- # "Delete file 'test.txt'? This cannot be undone."
392
- ```
393
-
394
- ### Exercise 2: Create Custom Confirmation Message Template
395
- Create a `send_email` tool with a detailed confirmation message.
396
-
397
- **Solution:**
398
- ```python
399
- @tool(
400
- requires_confirmation=True,
401
- confirmation_message=(
402
- "Send email?\n"
403
- " To: {arguments[recipient]}\n"
404
- " Subject: {arguments[subject]}\n"
405
- " Body preview: {arguments[body][:50]}..."
406
- )
407
- )
408
- def send_email(recipient: str, subject: str, body: str) -> str:
409
- """Send an email to a recipient."""
410
- # ... email sending logic ...
411
- return f"Email sent to {recipient}"
412
-
413
- # Test confirmation message
414
- msg = send_email.get_confirmation_message({
415
- "recipient": "user@example.com",
416
- "subject": "Hello",
417
- "body": "This is a test email with some content that is quite long..."
418
- })
419
- print(msg)
420
- ```
421
-
422
- ### Exercise 3: Tool Registry with Confirmation Status
423
- Create a tool registry that tracks confirmation requirements.
424
-
425
- **Solution:**
426
- ```python
427
- class ToolRegistry:
428
- def __init__(self):
429
- self._tools: Dict[str, BaseTool] = {}
430
-
431
- def register(self, tool: BaseTool):
432
- """Register a tool."""
433
- self._tools[tool.name] = tool
434
-
435
- def get(self, name: str) -> BaseTool | None:
436
- """Get a tool by name."""
437
- return self._tools.get(name)
438
-
439
- def list_all(self) -> List[BaseTool]:
440
- """List all registered tools."""
441
- return list(self._tools.values())
442
-
443
- def list_dangerous(self) -> List[BaseTool]:
444
- """List tools requiring confirmation."""
445
- return [t for t in self._tools.values() if t.requires_confirmation]
446
-
447
- def list_safe(self) -> List[BaseTool]:
448
- """List tools not requiring confirmation."""
449
- return [t for t in self._tools.values() if not t.requires_confirmation]
450
-
451
- # Usage
452
- registry = ToolRegistry()
453
- registry.register(calculator)
454
- registry.register(delete_file)
455
- print(f"Safe tools: {[t.name for t in registry.list_safe()]}")
456
- print(f"Dangerous tools: {[t.name for t in registry.list_dangerous()]}")
457
- ```
458
-
459
- ---
460
-
461
- ## Episode 7: Tool Execution with Confirmation
462
-
463
- ### Exercise 1: Implement Pending Tool Call Detection
464
- Write the logic to detect when a tool requires confirmation and create a PendingToolCall.
465
-
466
- **Solution:**
467
- ```python
468
- async def act(
469
- self,
470
- context: ExecutionContext,
471
- tool_calls: List[ToolCall]
472
- ) -> List[ToolResult]:
473
- tools_dict = {tool.name: tool for tool in self.tools}
474
- results = []
475
- pending_calls = []
476
-
477
- for tool_call in tool_calls:
478
- tool = tools_dict[tool_call.name]
479
-
480
- # Check if confirmation is required
481
- if tool.requires_confirmation:
482
- pending = PendingToolCall(
483
- tool_call=tool_call,
484
- confirmation_message=tool.get_confirmation_message(
485
- tool_call.arguments
486
- )
487
- )
488
- pending_calls.append(pending)
489
- continue # Skip execution
490
-
491
- # Execute tool normally
492
- try:
493
- result = await tool(context, **tool_call.arguments)
494
- status = "success"
495
- except Exception as e:
496
- result = str(e)
497
- status = "error"
498
-
499
- results.append(ToolResult(
500
- tool_call_id=tool_call.tool_call_id,
501
- name=tool_call.name,
502
- status=status,
503
- content=[result]
504
- ))
505
-
506
- # Store pending calls in state
507
- if pending_calls:
508
- context.state["pending_tool_calls"] = [
509
- p.model_dump() for p in pending_calls
510
- ]
511
-
512
- return results
513
- ```
514
-
515
- ### Exercise 2: Build Confirmation Processing Logic
516
- Implement `_process_confirmations` that handles approved and rejected tools.
517
-
518
- **Solution:**
519
- ```python
520
- async def _process_confirmations(
521
- self,
522
- context: ExecutionContext
523
- ) -> List[ToolResult]:
524
- tools_dict = {tool.name: tool for tool in self.tools}
525
- results = []
526
-
527
- # Build maps
528
- pending_map = {
529
- p["tool_call"]["tool_call_id"]: PendingToolCall.model_validate(p)
530
- for p in context.state["pending_tool_calls"]
531
- }
532
- confirmation_map = {
533
- c["tool_call_id"]: ToolConfirmation.model_validate(c)
534
- for c in context.state["tool_confirmations"]
535
- }
536
-
537
- for tool_call_id, pending in pending_map.items():
538
- tool = tools_dict.get(pending.tool_call.name)
539
- confirmation = confirmation_map.get(tool_call_id)
540
-
541
- if confirmation and confirmation.approved:
542
- # Merge modified arguments
543
- arguments = {
544
- **pending.tool_call.arguments,
545
- **(confirmation.modified_arguments or {})
546
- }
547
-
548
- try:
549
- output = await tool(context, **arguments)
550
- results.append(ToolResult(
551
- tool_call_id=tool_call_id,
552
- name=pending.tool_call.name,
553
- status="success",
554
- content=[output],
555
- ))
556
- except Exception as e:
557
- results.append(ToolResult(
558
- tool_call_id=tool_call_id,
559
- name=pending.tool_call.name,
560
- status="error",
561
- content=[str(e)],
562
- ))
563
- else:
564
- # Rejected
565
- reason = (confirmation.reason if confirmation
566
- else "Tool execution was not approved.")
567
- results.append(ToolResult(
568
- tool_call_id=tool_call_id,
569
- name=pending.tool_call.name,
570
- status="error",
571
- content=[reason],
572
- ))
573
-
574
- return results
575
- ```
576
-
577
- ### Exercise 3: Test Complete Confirmation Workflow
578
- Write a test that demonstrates the full confirmation workflow.
579
-
580
- **Solution:**
581
- ```python
582
- import asyncio
583
- from agent_framework import Agent, LlmClient, ToolConfirmation, tool
584
-
585
- @tool(requires_confirmation=True)
586
- def dangerous_action(action: str) -> str:
587
- """Perform a dangerous action."""
588
- return f"Executed: {action}"
589
-
590
- async def test_confirmation_workflow():
591
- agent = Agent(
592
- model=LlmClient(model="gpt-4o-mini"),
593
- tools=[dangerous_action],
594
- instructions="Execute dangerous actions when asked."
595
- )
596
-
597
- # Step 1: Initial request triggers pending
598
- result1 = await agent.run("Execute the dangerous action 'delete_all'")
599
- print(f"Status: {result1.status}") # "pending"
600
- print(f"Pending: {result1.pending_tool_calls[0].confirmation_message}")
601
-
602
- # Step 2: User approves
603
- confirmation = ToolConfirmation(
604
- tool_call_id=result1.pending_tool_calls[0].tool_call.tool_call_id,
605
- approved=True
606
- )
607
-
608
- result2 = await agent.run(
609
- "", # Empty - resuming
610
- context=result1.context,
611
- tool_confirmations=[confirmation]
612
- )
613
- print(f"Status: {result2.status}") # "complete"
614
- print(f"Output: {result2.output}")
615
-
616
- asyncio.run(test_confirmation_workflow())
617
- ```
618
-
619
- ---
620
-
621
- ## Episode 8: MCP Integration
622
-
623
- ### Exercise 1: Connection Pooling
624
- Implement connection pooling for MCP servers.
625
-
626
- **Solution:**
627
- ```python
628
- from collections import defaultdict
629
-
630
- class MCPConnectionPool:
631
- def __init__(self):
632
- self._pools: Dict[str, List] = defaultdict(list)
633
- self._max_pool_size = 5
634
-
635
- async def get_connection(self, connection_params: Dict):
636
- """Get or create connection from pool."""
637
- key = str(connection_params)
638
-
639
- if self._pools[key]:
640
- return self._pools[key].pop()
641
-
642
- # Create new connection
643
- return await self._create_connection(connection_params)
644
-
645
- async def return_connection(self, connection_params: Dict, connection):
646
- """Return connection to pool."""
647
- key = str(connection_params)
648
- if len(self._pools[key]) < self._max_pool_size:
649
- self._pools[key].append(connection)
650
- ```
651
-
652
- ### Exercise 2: MCP Server Health Check
653
- Add health check for MCP servers.
654
-
655
- **Solution:**
656
- ```python
657
- async def check_mcp_health(connection: Dict) -> bool:
658
- """Check if MCP server is healthy."""
659
- try:
660
- async with stdio_client(StdioServerParameters(**connection)) as (read, write):
661
- async with ClientSession(read, write) as session:
662
- await session.initialize()
663
- await session.list_tools()
664
- return True
665
- except Exception:
666
- return False
667
- ```
668
-
669
- ---
670
-
671
- ## Episode 9: Session & Memory Management
672
-
673
- ### Exercise 1: Build Session Restoration Logic
674
- Implement the logic to restore session data into ExecutionContext.
675
-
676
- **Solution:**
677
- ```python
678
- async def run(
679
- self,
680
- user_input: str,
681
- session_id: Optional[str] = None,
682
- context: ExecutionContext = None
683
- ) -> AgentResult:
684
- # Load session if provided
685
- session = None
686
- if session_id and self.session_manager:
687
- session = await self.session_manager.get_or_create(session_id)
688
-
689
- # Restore into context
690
- if context is None:
691
- context = ExecutionContext()
692
- context.events = session.events.copy()
693
- context.state = session.state.copy()
694
- context.execution_id = session.session_id
695
- context.session_id = session_id
696
-
697
- if context is None:
698
- context = ExecutionContext()
699
-
700
- # ... rest of run logic ...
701
- ```
702
-
703
- ### Exercise 2: Implement Database Session Manager
704
- Create a SQLite-backed session manager.
705
-
706
- **Solution:**
707
- ```python
708
- import sqlite3
709
- import json
710
- from datetime import datetime
711
-
712
- class DatabaseSessionManager(BaseSessionManager):
713
- def __init__(self, db_path: str = "sessions.db"):
714
- self.db_path = db_path
715
- self._init_db()
716
-
717
- def _init_db(self):
718
- conn = sqlite3.connect(self.db_path)
719
- conn.execute("""
720
- CREATE TABLE IF NOT EXISTS sessions (
721
- session_id TEXT PRIMARY KEY,
722
- user_id TEXT,
723
- events TEXT,
724
- state TEXT,
725
- created_at TEXT,
726
- updated_at TEXT
727
- )
728
- """)
729
- conn.close()
730
-
731
- async def create(self, session_id: str, user_id: str | None = None) -> Session:
732
- session = Session(session_id=session_id, user_id=user_id)
733
- await self.save(session)
734
- return session
735
-
736
- async def get(self, session_id: str) -> Session | None:
737
- conn = sqlite3.connect(self.db_path)
738
- cursor = conn.execute(
739
- "SELECT * FROM sessions WHERE session_id = ?",
740
- (session_id,)
741
- )
742
- row = cursor.fetchone()
743
- conn.close()
744
-
745
- if row is None:
746
- return None
747
-
748
- return Session(
749
- session_id=row[0],
750
- user_id=row[1],
751
- events=[Event.model_validate(e) for e in json.loads(row[2])],
752
- state=json.loads(row[3]),
753
- created_at=datetime.fromisoformat(row[4]),
754
- updated_at=datetime.fromisoformat(row[5])
755
- )
756
-
757
- async def save(self, session: Session) -> None:
758
- conn = sqlite3.connect(self.db_path)
759
- conn.execute("""
760
- INSERT OR REPLACE INTO sessions
761
- (session_id, user_id, events, state, created_at, updated_at)
762
- VALUES (?, ?, ?, ?, ?, ?)
763
- """, (
764
- session.session_id,
765
- session.user_id,
766
- json.dumps([e.model_dump() for e in session.events]),
767
- json.dumps(session.state),
768
- session.created_at.isoformat(),
769
- datetime.now().isoformat()
770
- ))
771
- conn.commit()
772
- conn.close()
773
- ```
774
-
775
- ### Exercise 3: Session with Pending Tool Calls
776
- Test that pending tool calls persist across session saves.
777
-
778
- **Solution:**
779
- ```python
780
- async def test_session_with_pending():
781
- session_manager = InMemorySessionManager()
782
-
783
- agent = Agent(
784
- model=LlmClient(model="gpt-4o-mini"),
785
- tools=[delete_file], # requires confirmation
786
- session_manager=session_manager
787
- )
788
-
789
- session_id = "test-session"
790
-
791
- # First call - should return pending
792
- result1 = await agent.run("Delete test.txt", session_id=session_id)
793
- assert result1.status == "pending"
794
-
795
- # Check session was saved with pending state
796
- session = await session_manager.get(session_id)
797
- assert "pending_tool_calls" in session.state
798
- print(f"Session has {len(session.state['pending_tool_calls'])} pending calls")
799
-
800
- # Resume with confirmation
801
- confirmation = ToolConfirmation(
802
- tool_call_id=result1.pending_tool_calls[0].tool_call.tool_call_id,
803
- approved=True
804
- )
805
-
806
- result2 = await agent.run("", session_id=session_id,
807
- tool_confirmations=[confirmation])
808
- assert result2.status == "complete"
809
-
810
- # Check pending was cleared
811
- session = await session_manager.get(session_id)
812
- assert "pending_tool_calls" not in session.state
813
- print("Session pending calls cleared after completion")
814
-
815
- asyncio.run(test_session_with_pending())
816
- ```
817
-
818
- ---
819
-
820
- ## Episode 10: Web Deployment
821
-
822
- ### Exercise 1: WebSocket Support
823
- Add WebSocket support for streaming responses.
824
-
825
- **Solution:**
826
- ```python
827
- from fastapi import WebSocket
828
-
829
- @app.websocket("/ws/chat")
830
- async def websocket_chat(websocket: WebSocket):
831
- await websocket.accept()
832
-
833
- agent = create_agent()
834
- session_id = None
835
-
836
- while True:
837
- data = await websocket.receive_json()
838
- message = data.get("message")
839
-
840
- if message:
841
- result = await agent.run(message, session_id=session_id)
842
- session_id = result.context.session_id
843
-
844
- await websocket.send_json({
845
- "type": "response",
846
- "content": result.output
847
- })
848
- ```
849
-
850
- ### Exercise 2: Confirmation UI
851
- Add a UI component for handling tool confirmations.
852
-
853
- **Solution:**
854
- ```javascript
855
- // In index.html
856
- async function handlePendingToolCalls(pendingCalls) {
857
- const modal = document.getElementById('confirmationModal');
858
- const content = document.getElementById('confirmationContent');
859
-
860
- content.innerHTML = pendingCalls.map(pending => `
861
- <div class="pending-call" data-id="${pending.tool_call.tool_call_id}">
862
- <p>${pending.confirmation_message}</p>
863
- <button onclick="approveToolCall('${pending.tool_call.tool_call_id}')">
864
- Approve
865
- </button>
866
- <button onclick="rejectToolCall('${pending.tool_call.tool_call_id}')">
867
- Reject
868
- </button>
869
- </div>
870
- `).join('');
871
-
872
- modal.style.display = 'block';
873
- }
874
-
875
- async function approveToolCall(toolCallId) {
876
- const confirmation = {
877
- tool_call_id: toolCallId,
878
- approved: true
879
- };
880
-
881
- await resumeWithConfirmation([confirmation]);
882
- }
883
-
884
- async function rejectToolCall(toolCallId) {
885
- const reason = prompt('Reason for rejection:');
886
- const confirmation = {
887
- tool_call_id: toolCallId,
888
- approved: false,
889
- reason: reason
890
- };
891
-
892
- await resumeWithConfirmation([confirmation]);
893
- }
894
- ```
895
-
896
- ---
897
-
898
- ## Final Integration Challenge
899
-
900
- ### Build Complete Agent with All Features
901
- Create an agent that:
902
- 1. Uses multiple tools (calculator, search, file operations)
903
- 2. Has dangerous tools requiring confirmation
904
- 3. Persists sessions across requests
905
- 4. Displays execution trace
906
-
907
- **Solution:**
908
- ```python
909
- import asyncio
910
- from agent_framework import (
911
- Agent, LlmClient, InMemorySessionManager,
912
- ToolConfirmation, format_trace, tool
913
- )
914
- from agent_tools import calculator, search_web
915
-
916
- @tool(
917
- requires_confirmation=True,
918
- confirmation_message="Delete '{arguments[filename]}'?"
919
- )
920
- def delete_file(filename: str) -> str:
921
- """Delete a file."""
922
- return f"Deleted {filename}"
923
-
924
- async def main():
925
- session_manager = InMemorySessionManager()
926
-
927
- agent = Agent(
928
- model=LlmClient(model="gpt-4o-mini"),
929
- tools=[calculator, search_web, delete_file],
930
- instructions="You are a helpful assistant with file management capabilities.",
931
- session_manager=session_manager
932
- )
933
-
934
- session_id = "demo-session"
935
-
936
- # Conversation 1: Simple calculation
937
- result1 = await agent.run("What is 25 * 17?", session_id=session_id)
938
- print(f"Response: {result1.output}\n")
939
-
940
- # Conversation 2: Try dangerous action
941
- result2 = await agent.run("Delete old_data.txt", session_id=session_id)
942
-
943
- if result2.status == "pending":
944
- print("Agent wants to delete a file!")
945
- print(f"Message: {result2.pending_tool_calls[0].confirmation_message}")
946
-
947
- # Approve the deletion
948
- confirmation = ToolConfirmation(
949
- tool_call_id=result2.pending_tool_calls[0].tool_call.tool_call_id,
950
- approved=True
951
- )
952
-
953
- result2 = await agent.run(
954
- "",
955
- session_id=session_id,
956
- tool_confirmations=[confirmation]
957
- )
958
-
959
- print(f"Response: {result2.output}\n")
960
-
961
- # Conversation 3: Test memory
962
- result3 = await agent.run(
963
- "What calculations did I ask about earlier?",
964
- session_id=session_id
965
- )
966
- print(f"Response: {result3.output}\n")
967
-
968
- # Display trace
969
- print(format_trace(result3.context))
970
-
971
- asyncio.run(main())
972
- ```
973
-
974
- ---
975
-
976
- ## Solutions Location
977
-
978
- Solutions can be found in:
979
- - `misc/tutorials/exercises/solutions/`
980
- - Each episode has its own solution file
981
- - Solutions include explanations
982
-
983
- ---
984
-
985
- ## Contributing Exercises
986
-
987
- Feel free to contribute additional exercises:
988
- 1. Fork the repository
989
- 2. Add exercise to appropriate episode section
990
- 3. Include solution
991
- 4. Submit pull request
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
misc/tutorials/FEATURE_DOCUMENTATION.md DELETED
@@ -1,653 +0,0 @@
1
- # AI Agent Framework: Complete Feature Documentation
2
-
3
- ## Overview
4
-
5
- This document provides a comprehensive inventory of all features implemented in the AI Agent Framework. This framework allows you to build AI agents that can reason, use tools, maintain conversation state, and be deployed as web applications.
6
-
7
- ---
8
-
9
- ## Core Framework Features
10
-
11
- ### 1. Agent Execution Engine (`agent_framework/agent.py`)
12
-
13
- The `Agent` class is the heart of the framework, orchestrating the entire execution flow.
14
-
15
- #### Key Features:
16
-
17
- - **Think-Act-Observe Loop**: Multi-step reasoning where the agent thinks (calls LLM), acts (executes tools), and observes (processes results) in a continuous cycle
18
- - **Structured Output**: Support for Pydantic models as output types, ensuring type-safe responses
19
- - **Max Steps Control**: Configurable iteration limits to prevent infinite loops
20
- - **Tool Confirmation**: Optional user approval for tool execution before running
21
- - **Pending Tool Calls**: Suspend execution and wait for user confirmation when required
22
- - **Callbacks**: `before_tool_callbacks` and `after_tool_callbacks` for extensibility
23
- - **Session Integration**: Persistent conversation state across multiple runs
24
-
25
- #### Key Methods:
26
-
27
- ```python
28
- Agent.__init__(
29
- model: LlmClient,
30
- tools: List[BaseTool] = None,
31
- instructions: str = "",
32
- max_steps: int = 5,
33
- output_type: Optional[Type[BaseModel]] = None,
34
- session_manager: BaseSessionManager | None = None
35
- )
36
-
37
- Agent.run(
38
- user_input: str,
39
- context: ExecutionContext = None,
40
- session_id: Optional[str] = None,
41
- tool_confirmations: Optional[List[ToolConfirmation]] = None
42
- ) -> AgentResult
43
-
44
- Agent.step(context: ExecutionContext) # Single iteration
45
- Agent.think(llm_request: LlmRequest) -> LlmResponse # LLM call
46
- Agent.act(context: ExecutionContext, tool_calls: List[ToolCall]) -> List[ToolResult] # Tool execution
47
- ```
48
-
49
- ---
50
-
51
- ### 2. Data Models (`agent_framework/models.py`)
52
-
53
- All data structures used throughout the framework.
54
-
55
- #### Message
56
- Represents a text message in the conversation.
57
-
58
- ```python
59
- class Message(BaseModel):
60
- type: Literal["message"] = "message"
61
- role: Literal["system", "user", "assistant"]
62
- content: str
63
- ```
64
-
65
- #### ToolCall
66
- LLM's request to execute a tool.
67
-
68
- ```python
69
- class ToolCall(BaseModel):
70
- type: Literal["tool_call"] = "tool_call"
71
- tool_call_id: str
72
- name: str
73
- arguments: dict
74
- ```
75
-
76
- #### ToolResult
77
- Result from tool execution (success or error).
78
-
79
- ```python
80
- class ToolResult(BaseModel):
81
- type: Literal["tool_result"] = "tool_result"
82
- tool_call_id: str
83
- name: str
84
- status: Literal["success", "error"]
85
- content: list
86
- ```
87
-
88
- #### Event
89
- A recorded occurrence during agent execution with timestamp.
90
-
91
- ```python
92
- class Event(BaseModel):
93
- id: str
94
- execution_id: str
95
- timestamp: float
96
- author: str # "user" or agent name
97
- content: List[ContentItem]
98
- ```
99
-
100
- #### ExecutionContext
101
- Central state container (dataclass) for all execution state.
102
-
103
- ```python
104
- @dataclass
105
- class ExecutionContext:
106
- execution_id: str
107
- events: List[Event]
108
- current_step: int
109
- state: Dict[str, Any]
110
- final_result: Optional[str | BaseModel]
111
- session_id: Optional[str]
112
- ```
113
-
114
- #### Session
115
- Persistent conversation state across multiple `run()` calls.
116
-
117
- ```python
118
- class Session(BaseModel):
119
- session_id: str
120
- user_id: str | None
121
- events: list[Event]
122
- state: dict[str, Any]
123
- created_at: datetime
124
- updated_at: datetime
125
- ```
126
-
127
- #### Session Management
128
- - **BaseSessionManager**: Abstract interface for session storage
129
- - **InMemorySessionManager**: In-memory implementation for development/testing
130
-
131
- #### Tool Confirmation
132
- - **ToolConfirmation**: User's decision on a pending tool call (approved/rejected with optional modifications)
133
- - **PendingToolCall**: Tool calls awaiting user confirmation
134
-
135
- ---
136
-
137
- ### 3. LLM Client (`agent_framework/llm.py`)
138
-
139
- Unified interface for interacting with LLM APIs.
140
-
141
- #### LlmClient
142
- Multi-provider support via LiteLLM (OpenAI, Anthropic, local models).
143
-
144
- ```python
145
- class LlmClient:
146
- def __init__(self, model: str, **config)
147
- async def generate(self, request: LlmRequest) -> LlmResponse
148
- ```
149
-
150
- #### LlmRequest
151
- Structured request model.
152
-
153
- ```python
154
- class LlmRequest(BaseModel):
155
- instructions: List[str]
156
- contents: List[ContentItem]
157
- tools: List[BaseTool]
158
- tool_choice: Optional[str] # "auto", "required", or None
159
- ```
160
-
161
- #### LlmResponse
162
- Structured response model.
163
-
164
- ```python
165
- class LlmResponse(BaseModel):
166
- content: List[ContentItem]
167
- error_message: Optional[str]
168
- usage_metadata: Dict[str, Any]
169
- ```
170
-
171
- #### build_messages()
172
- Converts internal models to API message format.
173
-
174
- ```python
175
- def build_messages(request: LlmRequest) -> List[dict]
176
- ```
177
-
178
- ---
179
-
180
- ### 4. Tool System (`agent_framework/tools.py`)
181
-
182
- Complete tool abstraction layer.
183
-
184
- #### BaseTool
185
- Abstract base class for all tools.
186
-
187
- ```python
188
- class BaseTool(ABC):
189
- name: str
190
- description: str
191
- tool_definition: Dict[str, Any]
192
- requires_confirmation: bool
193
- async def execute(self, context: ExecutionContext, **kwargs) -> Any
194
- ```
195
-
196
- #### FunctionTool
197
- Wraps Python functions as tools.
198
-
199
- ```python
200
- class FunctionTool(BaseTool):
201
- def __init__(
202
- self,
203
- func: Callable,
204
- name: str = None,
205
- description: str = None,
206
- tool_definition: Dict[str, Any] = None,
207
- requires_confirmation: bool = False
208
- )
209
- ```
210
-
211
- #### @tool Decorator
212
- Syntactic sugar for tool creation.
213
-
214
- ```python
215
- @tool
216
- def my_function(x: int) -> int:
217
- """Description for LLM."""
218
- return x * 2
219
- ```
220
-
221
- #### Features:
222
- - **Automatic Schema Generation**: From function type hints
223
- - **Context-Aware Tools**: Optional ExecutionContext parameter
224
- - **Tool Confirmation**: Per-tool confirmation requirements
225
- - **Custom Tool Definitions**: Override auto-generated schemas
226
-
227
- ---
228
-
229
- ### 5. MCP Integration (`agent_framework/mcp.py`)
230
-
231
- Integration with Model Context Protocol servers.
232
-
233
- #### load_mcp_tools()
234
- Discovers and loads tools from MCP servers.
235
-
236
- ```python
237
- async def load_mcp_tools(connection: Dict) -> List[BaseTool]
238
- ```
239
-
240
- #### Features:
241
- - **MCP Tool Wrapping**: Converts MCP tools to FunctionTool
242
- - **Stdio Client**: Connects to MCP servers via stdio
243
- - **Schema Conversion**: MCP schemas to OpenAI format
244
-
245
- #### Example:
246
- ```python
247
- connection = {
248
- "command": "npx",
249
- "args": ["-y", "tavily-mcp@latest"],
250
- "env": {"TAVILY_API_KEY": os.getenv("TAVILY_API_KEY")}
251
- }
252
- tools = await load_mcp_tools(connection)
253
- ```
254
-
255
- ---
256
-
257
- ### 6. Memory Management (`agent_framework/memory.py`)
258
-
259
- Token optimization and conversation history management.
260
-
261
- #### Token Counting
262
- Accurate token counting using tiktoken.
263
-
264
- ```python
265
- def count_tokens(request: LlmRequest, model_id: str = "gpt-4") -> int
266
- ```
267
-
268
- #### Sliding Window
269
- Keeps only the most recent N messages.
270
-
271
- ```python
272
- def apply_sliding_window(
273
- context: ExecutionContext,
274
- request: LlmRequest,
275
- window_size: int = 20
276
- ) -> None
277
- ```
278
-
279
- #### Compaction
280
- Replaces tool calls/results with compact references.
281
-
282
- ```python
283
- def apply_compaction(context: ExecutionContext, request: LlmRequest) -> None
284
- ```
285
-
286
- #### Summarization
287
- LLM-based history compression.
288
-
289
- ```python
290
- async def apply_summarization(
291
- context: ExecutionContext,
292
- request: LlmRequest,
293
- llm_client: LlmClient,
294
- keep_recent: int = 5
295
- ) -> None
296
- ```
297
-
298
- #### ContextOptimizer
299
- Hierarchical optimization strategy.
300
-
301
- ```python
302
- class ContextOptimizer:
303
- def __init__(
304
- self,
305
- llm_client: LlmClient,
306
- token_threshold: int = 50000,
307
- enable_compaction: bool = True,
308
- enable_summarization: bool = True
309
- )
310
- ```
311
-
312
- ---
313
-
314
- ### 7. Callbacks (`agent_framework/callbacks.py`)
315
-
316
- Extensibility hooks for agent execution.
317
-
318
- #### create_optimizer_callback()
319
- Factory for optimization callbacks.
320
-
321
- ```python
322
- def create_optimizer_callback(
323
- apply_optimization: Callable,
324
- threshold: int = 50000,
325
- model_id: str = "gpt-4"
326
- ) -> Callable
327
- ```
328
-
329
- #### Features:
330
- - **Before LLM Callbacks**: Modify requests before API calls
331
- - **After Tool Callbacks**: Process tool results
332
- - **Async Support**: Both sync and async callbacks
333
-
334
- ---
335
-
336
- ### 8. Utilities (`agent_framework/utils.py`)
337
-
338
- Helper functions for tool definitions and trace display.
339
-
340
- #### Schema Generation
341
- ```python
342
- function_to_input_schema(func) -> dict
343
- format_tool_definition(name, description, parameters) -> dict
344
- function_to_tool_definition(func) -> dict
345
- ```
346
-
347
- #### Trace Display
348
- ```python
349
- format_trace(context: ExecutionContext) -> str
350
- display_trace(context: ExecutionContext) -> None
351
- ```
352
-
353
- #### MCP Conversion
354
- ```python
355
- mcp_tools_to_openai_format(mcp_tools) -> list[dict]
356
- ```
357
-
358
- ---
359
-
360
- ## Built-in Tools (`agent_tools/`)
361
-
362
- ### File Tools (`file_tools.py`)
363
-
364
- #### read_file()
365
- Reads text files (supports .txt, .csv, .json).
366
-
367
- ```python
368
- @tool
369
- def read_file(file_path: str) -> str
370
- ```
371
-
372
- #### read_media_file()
373
- Reads PDFs, Excel files, and images.
374
-
375
- ```python
376
- @tool
377
- def read_media_file(file_path: str) -> str
378
- ```
379
-
380
- Supports:
381
- - PDFs (via pymupdf)
382
- - Excel files (via pandas/openpyxl)
383
- - Images (via PIL)
384
-
385
- #### list_files()
386
- Lists directory contents.
387
-
388
- ```python
389
- @tool
390
- def list_files(directory_path: str) -> str
391
- ```
392
-
393
- #### unzip_file()
394
- Extracts zip archives.
395
-
396
- ```python
397
- @tool
398
- def unzip_file(zip_path: str, extract_to: str = None) -> str
399
- ```
400
-
401
- ---
402
-
403
- ### Web Tools (`web_tools.py`)
404
-
405
- #### search_web()
406
- Tavily API integration for web search.
407
-
408
- ```python
409
- @tool
410
- def search_web(query: str, max_results: int = 5) -> str
411
- ```
412
-
413
- ---
414
-
415
- ### Math Tools (`math_tools.py`)
416
-
417
- #### calculator()
418
- Safe eval-based calculator.
419
-
420
- ```python
421
- @tool
422
- def calculator(expression: str) -> str
423
- ```
424
-
425
- ---
426
-
427
- ## Web Application (`web_app/`)
428
-
429
- ### Backend (`app.py`)
430
-
431
- FastAPI server providing RESTful API for agent interaction.
432
-
433
- #### Endpoints:
434
-
435
- - `GET /`: Serves the chat interface
436
- - `POST /api/chat`: Send message to agent
437
- - `POST /api/upload`: Upload files
438
- - `GET /api/uploads`: List uploaded files
439
- - `DELETE /api/uploads/{filename}`: Delete uploaded file
440
- - `GET /api/tools`: List available tools
441
- - `GET /api/sessions/{session_id}`: Get session info
442
- - `DELETE /api/sessions/{session_id}`: Clear session
443
-
444
- #### Features:
445
- - **File Upload**: Handle user file uploads
446
- - **Session Management**: API-based session handling
447
- - **Tool Listing**: Expose available tools via API
448
- - **Trace Display**: Return formatted execution traces
449
- - **CORS Support**: Cross-origin requests enabled
450
-
451
- ---
452
-
453
- ### Frontend (`static/index.html`)
454
-
455
- Modern chat interface with full framework integration.
456
-
457
- #### Features:
458
- - **Chat Interface**: Real-time conversation UI
459
- - **File Upload UI**: Drag-and-drop file uploads
460
- - **Tool List Display**: Show available tools in sidebar
461
- - **Session Toggle**: Enable/disable session persistence
462
- - **Trace Modal**: View execution traces in formatted view
463
- - **Session ID Display**: Show current session ID
464
- - **Clear Session**: Reset conversation state
465
- - **Responsive Design**: Works on desktop and mobile
466
-
467
- ---
468
-
469
- ## Additional Features
470
-
471
- ### GAIA Evaluation (`gaia/`)
472
-
473
- Benchmark integration for evaluating agent performance.
474
-
475
- - **Problem Loading**: Load GAIA benchmark problems
476
- - **File Handling**: Download and extract attached files
477
- - **Evaluation**: Run agent on benchmark problems
478
- - **Results**: Track accuracy and solvability
479
-
480
- ### RAG Examples (`rag/`)
481
-
482
- Examples of retrieval-augmented generation.
483
-
484
- - **Chunking**: Text chunking strategies
485
- - **Embeddings**: Vector embeddings using OpenAI
486
- - **Vector Search**: Cosine similarity search
487
- - **Integration**: Using RAG with agents
488
-
489
- ### Example Scripts
490
-
491
- - **example_agent.py**: Basic agent usage example
492
- - **test_session.py**: Session persistence demonstration
493
-
494
- ---
495
-
496
- ## Architecture Overview
497
-
498
- ```
499
- ┌─────────────────────────────────────────────────────────┐
500
- │ User/Application │
501
- └──────────────────────┬────────────────────────────────────┘
502
-
503
-
504
- ┌─────────────────────────────────────────────────────────┐
505
- │ Agent.run() │
506
- │ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
507
- │ │ Think │→ │ Act │→ │ Observe │ │
508
- │ │ (LLM Call) │ │ (Tool Exec) │ │ (Process) │ │
509
- │ └──────────────┘ └──────────────┘ └──────────────┘ │
510
- └──────────────────────┬────────────────────────────────────┘
511
-
512
- ┌──────────────┼──────────────┐
513
- ▼ ▼ ▼
514
- ┌─────────────┐ ┌─────────────┐ ┌─────────────┐
515
- │ LlmClient │ │ Tools │ │ Execution │
516
- │ │ │ │ │ Context │
517
- └─────────────┘ └─────────────┘ └─────────────┘
518
- │ │ │
519
- ▼ ▼ ▼
520
- ┌─────────────┐ ┌─────────────┐ ┌─────────────┐
521
- │ LiteLLM │ │ MCP Tools │ │ Session │
522
- │ │ │ │ │ Manager │
523
- └─────────────┘ └─────────────┘ └─────────────┘
524
- ```
525
-
526
- ---
527
-
528
- ## Usage Examples
529
-
530
- ### Basic Agent
531
-
532
- ```python
533
- from agent_framework import Agent, LlmClient
534
- from agent_tools import calculator, search_web
535
-
536
- agent = Agent(
537
- model=LlmClient(model="gpt-4o-mini"),
538
- tools=[calculator, search_web],
539
- instructions="You are a helpful assistant.",
540
- max_steps=10
541
- )
542
-
543
- result = await agent.run("What is 123 * 456?")
544
- print(result.output)
545
- ```
546
-
547
- ### Agent with Session
548
-
549
- ```python
550
- from agent_framework import Agent, LlmClient, InMemorySessionManager
551
-
552
- session_manager = InMemorySessionManager()
553
-
554
- agent = Agent(
555
- model=LlmClient(model="gpt-4o-mini"),
556
- tools=[calculator],
557
- session_manager=session_manager
558
- )
559
-
560
- # First conversation
561
- result1 = await agent.run("My name is Alice", session_id="user-123")
562
-
563
- # Second conversation (remembers context)
564
- result2 = await agent.run("What's my name?", session_id="user-123")
565
- ```
566
-
567
- ### Agent with Structured Output
568
-
569
- ```python
570
- from pydantic import BaseModel
571
- from typing import Literal
572
-
573
- class SentimentAnalysis(BaseModel):
574
- sentiment: Literal["positive", "negative", "neutral"]
575
- confidence: float
576
- key_phrases: list[str]
577
-
578
- agent = Agent(
579
- model=LlmClient(model="gpt-4o-mini"),
580
- tools=[],
581
- instructions="Analyze sentiment.",
582
- output_type=SentimentAnalysis
583
- )
584
-
585
- result = await agent.run("I love this product!")
586
- print(result.output.sentiment) # "positive"
587
- ```
588
-
589
- ### Agent with Memory Optimization
590
-
591
- ```python
592
- from agent_framework import Agent, LlmClient, create_optimizer_callback
593
- from agent_framework.memory import apply_sliding_window
594
-
595
- optimizer = create_optimizer_callback(
596
- apply_optimization=apply_sliding_window,
597
- threshold=30000
598
- )
599
-
600
- agent = Agent(
601
- model=LlmClient(model="gpt-4o-mini"),
602
- tools=[calculator],
603
- before_llm_callback=optimizer
604
- )
605
- ```
606
-
607
- ---
608
-
609
- ## Design Decisions
610
-
611
- ### Why Pydantic for Models?
612
- - Runtime validation catches errors early
613
- - Automatic serialization/deserialization
614
- - Type safety with IDE support
615
-
616
- ### Why Dataclass for ExecutionContext?
617
- - Mutable state needs to be lightweight
618
- - No validation needed (internal use)
619
- - Better performance for frequent updates
620
-
621
- ### Why LiteLLM?
622
- - Multi-provider support (OpenAI, Anthropic, local)
623
- - Unified API interface
624
- - Easy to switch models
625
-
626
- ### Why MCP?
627
- - Standard protocol for tool discovery
628
- - Decouples tool servers from agents
629
- - Easy integration of external tools
630
-
631
- ---
632
-
633
- ## Future Enhancements
634
-
635
- Potential areas for expansion:
636
-
637
- 1. **Database Session Manager**: Persistent storage for sessions
638
- 2. **Streaming Responses**: Real-time token streaming
639
- 3. **Multi-Agent Coordination**: Agents working together
640
- 4. **Tool Marketplace**: Discover and share tools
641
- 5. **Advanced Memory**: Vector-based memory retrieval
642
- 6. **Cost Tracking**: Monitor API usage and costs
643
- 7. **Rate Limiting**: Built-in rate limiting
644
- 8. **Monitoring**: Observability and logging
645
-
646
- ---
647
-
648
- ## Conclusion
649
-
650
- This framework provides a complete foundation for building production-ready AI agents. It balances flexibility with structure, allowing you to build simple chatbots or complex multi-agent systems.
651
-
652
- For tutorials and examples, see the `misc/tutorials/` directory.
653
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
misc/tutorials/GITHUB_STRUCTURE.md DELETED
@@ -1,342 +0,0 @@
1
- # GitHub Repository Structure for Tutorial Series
2
-
3
- This document outlines the recommended GitHub repository structure for organizing the tutorial series code.
4
-
5
- ---
6
-
7
- ## Repository Organization
8
-
9
- ```
10
- ai-agent-from-scratch/
11
- ├── README.md # Main repository README
12
- ├── LICENSE # License file
13
- ├── pyproject.toml # Project configuration
14
- ├── requirements.txt # Python dependencies
15
-
16
- ├── agent_framework/ # Core framework (built in episodes 3-9)
17
- │ ├── __init__.py
18
- │ ├── models.py # Episode 3
19
- │ ├── llm.py # Episode 4
20
- │ ├── agent.py # Episodes 5, 7
21
- │ ├── tools.py # Episode 6
22
- │ ├── mcp.py # Episode 8
23
- │ ├── memory.py # Episode 9
24
- │ ├── callbacks.py # Episode 9
25
- │ ├── utils.py
26
- │ └── README.md
27
-
28
- ├── agent_tools/ # Built-in tools (Episode 6-7)
29
- │ ├── __init__.py
30
- │ ├── file_tools.py
31
- │ ├── web_tools.py
32
- │ ├── math_tools.py
33
- │ └── README.md
34
-
35
- ├── web_app/ # Web deployment (Episode 10)
36
- │ ├── app.py
37
- │ ├── static/
38
- │ │ └── index.html
39
- │ ├── uploads/
40
- │ └── README.md
41
-
42
- ├── examples/ # Example scripts
43
- │ ├── example_agent.py
44
- │ ├── test_session.py
45
- │ └── README.md
46
-
47
- ├── misc/
48
- │ └── tutorials/ # Tutorial materials
49
- │ ├── FEATURE_DOCUMENTATION.md
50
- │ ├── ARCHITECTURE_DIAGRAMS.md
51
- │ ├── GITHUB_STRUCTURE.md
52
- │ ├── EPISODE_01_INTRODUCTION.md
53
- │ ├── EPISODE_02_LLM_CALL.md
54
- │ ├── EPISODE_03_DATA_MODELS.md
55
- │ ├── EPISODE_04_LLM_CLIENT.md
56
- │ ├── EPISODE_05_AGENT_LOOP.md
57
- │ ├── EPISODE_06_TOOL_SYSTEM.md
58
- │ ├── EPISODE_07_TOOL_EXECUTION.md
59
- │ ├── EPISODE_08_MCP.md
60
- │ ├── EPISODE_09_MEMORY.md
61
- │ ├── EPISODE_10_WEB_DEPLOYMENT.md
62
- │ └── exercises/
63
-
64
- └── .github/
65
- └── workflows/ # CI/CD (optional)
66
- └── tests.yml
67
- ```
68
-
69
- ---
70
-
71
- ## Branch Strategy
72
-
73
- ### Main Branch
74
- - `main`: Complete, working codebase
75
- - Always stable
76
- - Production-ready
77
-
78
- ### Episode Branches
79
- - `episode-1`: Python foundations (concepts only)
80
- - `episode-2`: LLM client basics
81
- - `episode-3`: Data models complete
82
- - `episode-4`: LLM client complete
83
- - `episode-5`: Basic agent loop
84
- - `episode-6`: Tool system complete
85
- - `episode-7`: Complete agent with tools
86
- - `episode-8`: MCP integration
87
- - `episode-9`: Memory management
88
- - `episode-10`: Web deployment
89
-
90
- ### Feature Branches (Optional)
91
- - `feature/session-db`: Database session manager
92
- - `feature/streaming`: Streaming responses
93
- - `feature/auth`: User authentication
94
-
95
- ---
96
-
97
- ## Commit Message Convention
98
-
99
- Use clear, descriptive commit messages that match episodes:
100
-
101
- ```
102
- Episode 3: Add Message, ToolCall, ToolResult models
103
- Episode 3: Add Event model with timestamp
104
- Episode 4: Implement LlmClient with LiteLLM
105
- Episode 4: Add build_messages() function
106
- Episode 5: Create basic Agent class
107
- Episode 5: Implement agent.run() method
108
- Episode 6: Add BaseTool abstract class
109
- Episode 6: Implement FunctionTool wrapper
110
- Episode 6: Add @tool decorator
111
- Episode 7: Implement tool execution in agent
112
- Episode 7: Add error handling for tools
113
- Episode 8: Add MCP integration
114
- Episode 9: Implement session management
115
- Episode 9: Add memory optimization
116
- Episode 10: Create FastAPI backend
117
- Episode 10: Build frontend interface
118
- ```
119
-
120
- ---
121
-
122
- ## README Structure
123
-
124
- ### Main README.md
125
-
126
- ```markdown
127
- # AI Agent Framework from Scratch
128
-
129
- A complete AI agent framework built from scratch, designed for learning and production use.
130
-
131
- ## Features
132
-
133
- - Multi-step reasoning with tools
134
- - Session persistence
135
- - Memory optimization
136
- - MCP integration
137
- - Web deployment
138
-
139
- ## Quick Start
140
-
141
- \`\`\`bash
142
- pip install -e .
143
- python examples/example_agent.py
144
- \`\`\`
145
-
146
- ## Tutorial Series
147
-
148
- This repository accompanies a 10-part YouTube tutorial series:
149
-
150
- 1. [Episode 1: Introduction & Python Foundations](./misc/tutorials/EPISODE_01_INTRODUCTION.md)
151
- 2. [Episode 2: Your First LLM Call](./misc/tutorials/EPISODE_02_LLM_CALL.md)
152
- 3. [Episode 3: Core Data Models](./misc/tutorials/EPISODE_03_DATA_MODELS.md)
153
- 4. [Episode 4: The LLM Client](./misc/tutorials/EPISODE_04_LLM_CLIENT.md)
154
- 5. [Episode 5: The Basic Agent Loop](./misc/tutorials/EPISODE_05_AGENT_LOOP.md)
155
- 6. [Episode 6: Building the Tool System](./misc/tutorials/EPISODE_06_TOOL_SYSTEM.md)
156
- 7. [Episode 7: Tool Execution & Complete Agent](./misc/tutorials/EPISODE_07_TOOL_EXECUTION.md)
157
- 8. [Episode 8: MCP Integration](./misc/tutorials/EPISODE_08_MCP.md)
158
- 9. [Episode 9: Session & Memory Management](./misc/tutorials/EPISODE_09_MEMORY.md)
159
- 10. [Episode 10: Web Deployment](./misc/tutorials/EPISODE_10_WEB_DEPLOYMENT.md)
160
-
161
- ## Documentation
162
-
163
- - [Feature Documentation](./misc/tutorials/FEATURE_DOCUMENTATION.md)
164
- - [Architecture Diagrams](./misc/tutorials/ARCHITECTURE_DIAGRAMS.md)
165
-
166
- ## Contributing
167
-
168
- Contributions welcome! See [CONTRIBUTING.md](./CONTRIBUTING.md) for guidelines.
169
- ```
170
-
171
- ---
172
-
173
- ## Tagging Strategy
174
-
175
- Tag releases to match episodes:
176
-
177
- ```bash
178
- # Episode milestones
179
- git tag -a v0.1.0-episode-3 -m "Episode 3: Data Models Complete"
180
- git tag -a v0.2.0-episode-5 -m "Episode 5: Basic Agent Loop"
181
- git tag -a v0.3.0-episode-7 -m "Episode 7: Complete Agent"
182
- git tag -a v1.0.0-episode-10 -m "Episode 10: Full Framework"
183
-
184
- # Push tags
185
- git push origin --tags
186
- ```
187
-
188
- ---
189
-
190
- ## Issue Templates
191
-
192
- ### Bug Report Template
193
-
194
- ```markdown
195
- **Episode**: [Which episode?]
196
- **Component**: [agent_framework/agent.py, etc.]
197
- **Description**: [What's wrong?]
198
- **Steps to Reproduce**: [How to reproduce]
199
- **Expected Behavior**: [What should happen]
200
- **Actual Behavior**: [What actually happens]
201
- ```
202
-
203
- ### Feature Request Template
204
-
205
- ```markdown
206
- **Episode**: [Which episode?]
207
- **Feature**: [What feature?]
208
- **Use Case**: [Why is this needed?]
209
- **Proposed Solution**: [How should it work?]
210
- ```
211
-
212
- ---
213
-
214
- ## Pull Request Template
215
-
216
- ```markdown
217
- ## Description
218
- [What does this PR do?]
219
-
220
- ## Episode
221
- [Which episode does this relate to?]
222
-
223
- ## Changes
224
- - [ ] Added new feature
225
- - [ ] Fixed bug
226
- - [ ] Updated documentation
227
- - [ ] Added tests
228
-
229
- ## Testing
230
- [How was this tested?]
231
-
232
- ## Checklist
233
- - [ ] Code follows style guidelines
234
- - [ ] Tests pass
235
- - [ ] Documentation updated
236
- - [ ] Episode branch updated
237
- ```
238
-
239
- ---
240
-
241
- ## GitHub Actions (Optional)
242
-
243
- ### CI Workflow
244
-
245
- ```yaml
246
- name: Tests
247
-
248
- on: [push, pull_request]
249
-
250
- jobs:
251
- test:
252
- runs-on: ubuntu-latest
253
- steps:
254
- - uses: actions/checkout@v3
255
- - uses: actions/setup-python@v4
256
- with:
257
- python-version: '3.11'
258
- - run: pip install -e .
259
- - run: pip install pytest
260
- - run: pytest tests/
261
- ```
262
-
263
- ---
264
-
265
- ## Documentation Organization
266
-
267
- ### Episode-Specific Documentation
268
-
269
- Each episode branch should include:
270
- - Code comments explaining decisions
271
- - Docstrings for all functions/classes
272
- - Inline comments for complex logic
273
- - README in relevant directories
274
-
275
- ### Example: Episode 3 Branch
276
-
277
- ```
278
- episode-3/
279
- ├── agent_framework/
280
- │ ├── models.py # Well-commented
281
- │ └── README.md # Explains models
282
- └── examples/
283
- └── test_models.py # Example usage
284
- ```
285
-
286
- ---
287
-
288
- ## File Naming Conventions
289
-
290
- - **Python files**: `snake_case.py`
291
- - **Markdown files**: `UPPER_CASE.md` for episodes, `lowercase.md` for docs
292
- - **Directories**: `snake_case/`
293
- - **Tests**: `test_*.py` or `*_test.py`
294
-
295
- ---
296
-
297
- ## Code Style
298
-
299
- - Follow PEP 8
300
- - Use type hints
301
- - Document with docstrings
302
- - Keep functions focused
303
- - Use meaningful names
304
-
305
- ---
306
-
307
- ## Release Process
308
-
309
- 1. Complete episode
310
- 2. Update episode branch
311
- 3. Merge to main
312
- 4. Tag release
313
- 5. Update documentation
314
- 6. Create release notes
315
-
316
- ---
317
-
318
- ## Community Guidelines
319
-
320
- - Be respectful
321
- - Provide constructive feedback
322
- - Follow episode structure
323
- - Test before submitting
324
- - Document your changes
325
-
326
- ---
327
-
328
- ## Resources
329
-
330
- - [Python Style Guide (PEP 8)](https://peps.python.org/pep-0008/)
331
- - [Semantic Versioning](https://semver.org/)
332
- - [Conventional Commits](https://www.conventionalcommits.org/)
333
-
334
- ---
335
-
336
- This structure ensures:
337
- - Clear progression through episodes
338
- - Easy navigation
339
- - Good organization
340
- - Professional presentation
341
- - Community contribution support
342
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
misc/tutorials/LANGCHAIN_LANGSERVE_PATTERNS.md DELETED
@@ -1,782 +0,0 @@
1
- # LangChain & LangServe: Advanced Patterns and Nuances
2
-
3
- A comprehensive reference for building production-ready agentic systems with LangChain and LangServe.
4
-
5
- ---
6
-
7
- ## Table of Contents
8
-
9
- 1. [LangGraph - Stateful Agent Workflows](#1-langgraph---stateful-agent-workflows)
10
- 2. [Agentic RAG Patterns](#2-agentic-rag-patterns)
11
- 3. [Human-in-the-Loop Workflows](#3-human-in-the-loop-workflows)
12
- 4. [Streaming Patterns](#4-streaming-patterns)
13
- 5. [Fallback and Routing Chains](#5-fallback-and-routing-chains)
14
- 6. [Structured Output Patterns](#6-structured-output-patterns)
15
- 7. [Tool Orchestration](#7-tool-orchestration)
16
- 8. [Multi-Agent Patterns](#8-multi-agent-patterns)
17
- 9. [Production Patterns](#9-production-patterns)
18
- 10. [Evaluation Patterns](#10-evaluation-patterns)
19
- 11. [Advanced Memory Patterns](#11-advanced-memory-patterns)
20
- 12. [Advanced Retrieval Patterns](#12-advanced-retrieval-patterns)
21
- 13. [Callbacks and Observability](#13-callbacks-and-observability)
22
- 14. [Dynamic Tool Generation](#14-dynamic-tool-generation)
23
- 15. [Complex Chain Patterns](#15-complex-chain-patterns)
24
- 16. [LangGraph Advanced Patterns](#16-langgraph-advanced-patterns)
25
- 17. [Guardrails and Safety](#17-guardrails-and-safety)
26
- 18. [Advanced Prompting Patterns](#18-advanced-prompting-patterns)
27
- 19. [Testing Patterns](#19-testing-patterns)
28
- 20. [LangServe Deployment Nuances](#20-langserve-deployment-nuances)
29
-
30
- ---
31
-
32
- ## 1. LangGraph - Stateful Agent Workflows
33
-
34
- LangGraph enables complex, stateful workflows with cycles and conditional routing.
35
-
36
- ### Key Concepts
37
- - **StateGraph**: Define a graph with typed state that flows between nodes
38
- - **Nodes**: Functions that process and transform state
39
- - **Edges**: Connections between nodes (can be conditional)
40
- - **Cycles**: Loops in the graph for iterative refinement
41
- - **Checkpointing**: Persist state for resumption
42
-
43
- ### What to Master
44
- - State management across nodes
45
- - Conditional edge routing based on state
46
- - Implementing cycles for iterative improvement
47
- - Human-in-the-loop breakpoints
48
- - State persistence and recovery
49
- - Subgraphs for modular workflows
50
-
51
- ### Interview-Worthy Projects
52
- - Multi-agent research assistant with supervisor coordination
53
- - Self-correcting RAG with review loop
54
- - Code generation with test → fix → retry cycles
55
-
56
- ---
57
-
58
- ## 2. Agentic RAG Patterns
59
-
60
- Beyond basic RAG - agents that reason about retrieval.
61
-
62
- ### Key Patterns
63
-
64
- **Self-Correcting RAG**
65
- - Initial retrieval
66
- - Generate answer
67
- - Self-check for hallucinations
68
- - Re-retrieve with refined query if needed
69
-
70
- **Query Transformation**
71
- - Query expansion (multiple variations)
72
- - Query decomposition (break into sub-queries)
73
- - Query refinement based on initial results
74
-
75
- **Adaptive Retrieval**
76
- - Decide when to retrieve vs use knowledge
77
- - Multi-hop reasoning for complex questions
78
- - Corrective RAG (re-retrieval on failure)
79
-
80
- **HyDE (Hypothetical Document Embeddings)**
81
- - Generate hypothetical answer first
82
- - Use it to retrieve similar real documents
83
- - Better semantic matching
84
-
85
- ### What to Master
86
- - When to retrieve vs when to answer directly
87
- - How to evaluate retrieval quality
88
- - Multi-step retrieval strategies
89
- - Combining retrieval with reasoning
90
-
91
- ---
92
-
93
- ## 3. Human-in-the-Loop Workflows
94
-
95
- Real production systems need human oversight.
96
-
97
- ### Key Patterns
98
-
99
- **Interrupts**
100
- - Pause execution at specific nodes
101
- - Wait for human approval
102
- - Resume with modified state if needed
103
-
104
- **State Persistence**
105
- - Save state across sessions
106
- - Allow humans to review async
107
- - Resume from any checkpoint
108
-
109
- **Approval Workflows**
110
- - Single approval gates
111
- - Multi-level approval chains
112
- - Conditional approval based on risk
113
-
114
- **Feedback Incorporation**
115
- - Collect human corrections
116
- - Learn from feedback
117
- - Improve over time
118
-
119
- ### What to Master
120
- - Designing breakpoints in workflows
121
- - State serialization for persistence
122
- - Multi-turn approval processes
123
- - Feedback loop architecture
124
-
125
- ---
126
-
127
- ## 4. Streaming Patterns
128
-
129
- Not just streaming text - structured streaming for complex chains.
130
-
131
- ### Key Patterns
132
-
133
- **Event Streaming**
134
- - Stream events from nested chains
135
- - Fine-grained control over what to stream
136
- - Different event types (tool start, LLM chunk, retriever end)
137
-
138
- **Progress Updates**
139
- - Stream status during long operations
140
- - Partial results as they become available
141
- - Error events for graceful handling
142
-
143
- **Multi-Agent Streaming**
144
- - Stream from multiple agents simultaneously
145
- - Coordinate streaming across agents
146
- - Aggregate and present coherently
147
-
148
- ### What to Master
149
- - `astream_events()` for fine-grained control
150
- - Streaming in multi-agent systems
151
- - Progress updates during long operations
152
- - Partial results streaming
153
- - Error streaming and recovery
154
-
155
- ---
156
-
157
- ## 5. Fallback and Routing Chains
158
-
159
- Build resilient systems with intelligent routing.
160
-
161
- ### Key Patterns
162
-
163
- **Fallback Chains**
164
- - Primary chain fails → try fallback
165
- - Multiple fallback levels
166
- - Graceful degradation
167
-
168
- **Semantic Routing**
169
- - Route based on query meaning
170
- - Different chains for different intents
171
- - Dynamic chain selection
172
-
173
- **Model Routing**
174
- - Simple queries → cheap model
175
- - Complex queries → powerful model
176
- - Cost-aware routing
177
-
178
- **Latency-Based Routing**
179
- - Fast model for time-sensitive queries
180
- - Slow model for quality-critical queries
181
-
182
- ### What to Master
183
- - Designing fallback hierarchies
184
- - Semantic similarity for routing
185
- - Cost vs quality tradeoffs
186
- - Error handling at each level
187
-
188
- ---
189
-
190
- ## 6. Structured Output Patterns
191
-
192
- Getting reliable, typed outputs from LLMs.
193
-
194
- ### Key Patterns
195
-
196
- **Pydantic Output Parsing**
197
- - Define schema with Pydantic
198
- - Parse LLM output into typed objects
199
- - Validation and error handling
200
-
201
- **Nested Structures**
202
- - Complex nested schemas
203
- - Lists and optional fields
204
- - Recursive structures
205
-
206
- **Partial Parsing for Streaming**
207
- - Parse incomplete JSON during streaming
208
- - Show progress while parsing
209
- - Handle malformed chunks
210
-
211
- **Error Recovery**
212
- - Retry on parse failure
213
- - Ask LLM to fix output
214
- - Fallback to simpler schema
215
-
216
- ### What to Master
217
- - Designing robust schemas
218
- - Handling validation errors
219
- - Streaming with structured output
220
- - Combining multiple structured outputs
221
-
222
- ---
223
-
224
- ## 7. Tool Orchestration
225
-
226
- Advanced patterns for tool usage.
227
-
228
- ### Key Patterns
229
-
230
- **Tool Dependencies**
231
- - Tools that call other tools
232
- - Sequential tool chains
233
- - Parallel tool execution
234
-
235
- **Conditional Tool Usage**
236
- - Select tools based on context
237
- - Skip tools when not needed
238
- - Dynamic tool availability
239
-
240
- **Tool Selection Strategies**
241
- - Semantic matching to select relevant tools
242
- - Limit tools based on query type
243
- - Dynamic tool generation
244
-
245
- **Tool Error Handling**
246
- - Retry failed tools
247
- - Fallback tools
248
- - Graceful degradation
249
-
250
- ### What to Master
251
- - Designing tool interfaces
252
- - Managing tool dependencies
253
- - Error handling and retries
254
- - Tool result validation
255
-
256
- ---
257
-
258
- ## 8. Multi-Agent Patterns
259
-
260
- Coordinating multiple agents for complex tasks.
261
-
262
- ### Key Patterns
263
-
264
- **Supervisor Pattern**
265
- - Manager agent coordinates workers
266
- - Routes tasks to appropriate agents
267
- - Synthesizes results
268
-
269
- **Debate Pattern**
270
- - Multiple agents discuss/debate
271
- - Reach consensus
272
- - Synthesize best answer
273
-
274
- **Pipeline Pattern**
275
- - Sequential agent handoffs
276
- - Each agent specializes
277
- - Pass context along
278
-
279
- **Hierarchical Teams**
280
- - Manager decomposes task
281
- - Workers execute subtasks
282
- - Manager synthesizes
283
-
284
- **Collaborative Agents**
285
- - Agents work together
286
- - Shared state
287
- - Complementary skills
288
-
289
- **Competitive Agents**
290
- - Multiple solutions
291
- - Evaluate and select best
292
- - Diverse approaches
293
-
294
- ### What to Master
295
- - Agent communication protocols
296
- - State sharing strategies
297
- - Conflict resolution
298
- - Coordination overhead management
299
-
300
- ---
301
-
302
- ## 9. Production Patterns
303
-
304
- Building systems that work in the real world.
305
-
306
- ### Key Patterns
307
-
308
- **Caching**
309
- - Semantic caching (similar queries → cached result)
310
- - Exact match caching
311
- - Cache invalidation strategies
312
-
313
- **Rate Limiting**
314
- - Per-user limits
315
- - Per-model limits
316
- - Graceful handling when limited
317
-
318
- **Retry Strategies**
319
- - Exponential backoff
320
- - Jitter to prevent thundering herd
321
- - Max retry limits
322
-
323
- **Cost Tracking**
324
- - Token usage per request
325
- - Cost per user/session
326
- - Budget enforcement
327
-
328
- **Batch Processing**
329
- - Batch similar requests
330
- - Efficient API usage
331
- - Queue management
332
-
333
- ### What to Master
334
- - Caching strategies for LLM calls
335
- - Rate limiting architecture
336
- - Cost optimization
337
- - Monitoring and alerting
338
-
339
- ---
340
-
341
- ## 10. Evaluation Patterns
342
-
343
- Measuring and improving agent quality.
344
-
345
- ### Key Patterns
346
-
347
- **LLM-as-Judge**
348
- - Use LLM to evaluate outputs
349
- - Define evaluation criteria
350
- - Score and compare
351
-
352
- **Custom Evaluators**
353
- - Domain-specific metrics
354
- - Task-specific evaluation
355
- - Automated scoring
356
-
357
- **A/B Testing**
358
- - Compare different agents
359
- - Statistical significance
360
- - User preference tracking
361
-
362
- **Regression Testing**
363
- - Maintain test suite
364
- - Detect regressions
365
- - Continuous evaluation
366
-
367
- **Human Feedback**
368
- - Collect user ratings
369
- - Incorporate corrections
370
- - Improve over time
371
-
372
- ### What to Master
373
- - Designing evaluation criteria
374
- - Building test datasets
375
- - Interpreting evaluation results
376
- - Continuous improvement loops
377
-
378
- ---
379
-
380
- ## 11. Advanced Memory Patterns
381
-
382
- Beyond basic conversation memory.
383
-
384
- ### Key Patterns
385
-
386
- **Cross-Session Memory**
387
- - Remember across conversations
388
- - User-specific memory
389
- - Relevant context retrieval
390
-
391
- **Entity Memory**
392
- - Track entities (people, places, things)
393
- - Update entity knowledge
394
- - Retrieve entity context
395
-
396
- **Summary + Buffer Hybrid**
397
- - Recent messages in full
398
- - Older messages summarized
399
- - Token-efficient
400
-
401
- **Vector Store Memory**
402
- - All conversations in vector DB
403
- - Semantic search for relevant context
404
- - Scalable long-term memory
405
-
406
- ### What to Master
407
- - Memory type selection
408
- - Token budget management
409
- - Memory persistence
410
- - Privacy considerations
411
-
412
- ---
413
-
414
- ## 12. Advanced Retrieval Patterns
415
-
416
- Sophisticated retrieval strategies.
417
-
418
- ### Key Patterns
419
-
420
- **Multi-Query Retrieval**
421
- - Generate multiple query variations
422
- - Retrieve for all variations
423
- - Combine results
424
-
425
- **Contextual Compression**
426
- - Retrieve documents
427
- - Extract only relevant parts
428
- - Reduce context size
429
-
430
- **Parent Document Retrieval**
431
- - Index small chunks
432
- - Retrieve full parent documents
433
- - Best of both worlds
434
-
435
- **Ensemble Retrieval**
436
- - Combine semantic + keyword search
437
- - Weighted combination
438
- - Better recall
439
-
440
- **Reranking**
441
- - Initial retrieval
442
- - Rerank with cross-encoder
443
- - Better precision
444
-
445
- ### What to Master
446
- - Choosing retrieval strategies
447
- - Combining multiple approaches
448
- - Evaluating retrieval quality
449
- - Optimizing for latency vs quality
450
-
451
- ---
452
-
453
- ## 13. Callbacks and Observability
454
-
455
- Understanding what your agents are doing.
456
-
457
- ### What to Track
458
- - Token usage (cost)
459
- - Latency per step
460
- - Tool execution times
461
- - Error rates
462
- - Retrieval quality
463
- - User satisfaction
464
-
465
- ### Integration Points
466
- - LangSmith for tracing
467
- - Custom logging
468
- - Metrics systems
469
- - Alerting
470
-
471
- ### What to Master
472
- - Designing callback handlers
473
- - Async callbacks for non-blocking
474
- - Aggregating metrics
475
- - Setting up alerts
476
-
477
- ---
478
-
479
- ## 14. Dynamic Tool Generation
480
-
481
- Tools that create themselves.
482
-
483
- ### Key Patterns
484
-
485
- **API-Based Tools**
486
- - Generate tools from OpenAPI specs
487
- - Database schema to tools
488
- - Dynamic API discovery
489
-
490
- **Dynamic Tool Selection**
491
- - Too many tools → select dynamically
492
- - Semantic matching to query
493
- - Limit active tools
494
-
495
- **Tool Composition**
496
- - Combine tools into workflows
497
- - Meta-tools that orchestrate
498
- - Adaptive tool creation
499
-
500
- ### What to Master
501
- - API spec parsing
502
- - Dynamic function generation
503
- - Tool relevance scoring
504
- - Managing tool explosion
505
-
506
- ---
507
-
508
- ## 15. Complex Chain Patterns
509
-
510
- Advanced chain compositions.
511
-
512
- ### Key Patterns
513
-
514
- **Map-Reduce**
515
- - Process chunks in parallel (map)
516
- - Combine results (reduce)
517
- - Good for large documents
518
-
519
- **Refine**
520
- - Iteratively refine answer
521
- - Each document improves result
522
- - Good for synthesis
523
-
524
- **Branching**
525
- - Conditional paths
526
- - Multiple parallel branches
527
- - Merge results
528
-
529
- ### What to Master
530
- - Choosing the right pattern
531
- - Handling large inputs
532
- - Parallel execution
533
- - Result aggregation
534
-
535
- ---
536
-
537
- ## 16. LangGraph Advanced Patterns
538
-
539
- Deep LangGraph knowledge.
540
-
541
- ### Key Patterns
542
-
543
- **Subgraphs**
544
- - Nested graphs for modularity
545
- - Reusable workflow components
546
- - Clean separation of concerns
547
-
548
- **Parallel Execution**
549
- - Fan out to multiple nodes
550
- - Process in parallel
551
- - Fan in to merge
552
-
553
- **Time Travel**
554
- - Replay from any checkpoint
555
- - Debug by stepping through
556
- - Modify and replay
557
-
558
- **Conditional Cycles**
559
- - Loop until condition met
560
- - Self-improvement loops
561
- - Bounded iteration
562
-
563
- ### What to Master
564
- - Graph design patterns
565
- - State serialization
566
- - Checkpoint management
567
- - Debugging complex graphs
568
-
569
- ---
570
-
571
- ## 17. Guardrails and Safety
572
-
573
- Keeping agents safe.
574
-
575
- ### Key Patterns
576
-
577
- **Constitutional AI**
578
- - Self-critique
579
- - Revise harmful outputs
580
- - Principle-based filtering
581
-
582
- **Input Validation**
583
- - Check inputs before processing
584
- - Reject unsafe requests
585
- - Log suspicious activity
586
-
587
- **Output Filtering**
588
- - Check outputs before returning
589
- - Remove sensitive information
590
- - Ensure policy compliance
591
-
592
- **Rate Limiting**
593
- - Prevent abuse
594
- - Per-user limits
595
- - Anomaly detection
596
-
597
- ### What to Master
598
- - Designing safety principles
599
- - Input/output validation
600
- - PII detection and removal
601
- - Audit logging
602
-
603
- ---
604
-
605
- ## 18. Advanced Prompting Patterns
606
-
607
- Sophisticated prompt engineering.
608
-
609
- ### Key Patterns
610
-
611
- **Few-Shot Learning**
612
- - Include examples in prompt
613
- - Dynamic example selection
614
- - Semantic similarity for selection
615
-
616
- **Chain of Thought**
617
- - Step-by-step reasoning
618
- - Show work before answer
619
- - Better for complex tasks
620
-
621
- **Self-Consistency**
622
- - Generate multiple answers
623
- - Vote on best
624
- - Higher reliability
625
-
626
- **Role Prompting**
627
- - Assign specific roles
628
- - Expert personas
629
- - Behavior shaping
630
-
631
- ### What to Master
632
- - Example selection strategies
633
- - Prompt templates
634
- - Dynamic prompt construction
635
- - Prompt optimization
636
-
637
- ---
638
-
639
- ## 19. Testing Patterns
640
-
641
- Ensuring quality.
642
-
643
- ### Key Patterns
644
-
645
- **Unit Testing**
646
- - Test individual components
647
- - Mock LLM responses
648
- - Fast feedback
649
-
650
- **Integration Testing**
651
- - Test full chains
652
- - Real LLM calls
653
- - End-to-end verification
654
-
655
- **Regression Testing**
656
- - Maintain golden dataset
657
- - Detect quality drops
658
- - Continuous monitoring
659
-
660
- **Load Testing**
661
- - Test under load
662
- - Find bottlenecks
663
- - Capacity planning
664
-
665
- ### What to Master
666
- - Mocking LLM calls
667
- - Test dataset curation
668
- - Evaluation metrics
669
- - CI/CD integration
670
-
671
- ---
672
-
673
- ## 20. LangServe Deployment Nuances
674
-
675
- Production deployment details.
676
-
677
- ### Key Patterns
678
-
679
- **Custom Endpoints**
680
- - Custom input/output schemas
681
- - Disable playground in production
682
- - Custom error handling
683
-
684
- **Authentication**
685
- - Middleware for auth
686
- - Token validation
687
- - Role-based access
688
-
689
- **Batch Endpoints**
690
- - Automatic batch support
691
- - Efficient processing
692
- - Queue management
693
-
694
- **Scaling**
695
- - Horizontal scaling
696
- - Load balancing
697
- - Connection pooling
698
-
699
- ### What to Master
700
- - FastAPI middleware
701
- - Authentication patterns
702
- - Performance tuning
703
- - Monitoring and logging
704
-
705
- ---
706
-
707
- ## Key Nuances Summary
708
-
709
- | Area | Critical Nuance |
710
- |------|-----------------|
711
- | **LangGraph** | State must be serializable; use Pydantic |
712
- | **Streaming** | Use `astream_events` for fine control |
713
- | **Memory** | Token counting is crucial for long chats |
714
- | **RAG** | Chunk size dramatically affects retrieval quality |
715
- | **Tools** | Too many tools confuses the LLM (keep under 10-15) |
716
- | **Callbacks** | Use async callbacks for non-blocking |
717
- | **Caching** | Semantic cache > exact match cache |
718
- | **Fallbacks** | Order matters; try cheapest/fastest first |
719
- | **Evaluation** | LLM-as-judge is powerful but needs calibration |
720
- | **Multi-agent** | Communication overhead can dominate |
721
-
722
- ---
723
-
724
- ## Interview-Worthy Project Ideas
725
-
726
- 1. **LangGraph Multi-Agent Research System**
727
- - Supervisor coordinates researcher, writer, reviewer agents
728
- - Human approval before publishing
729
- - Self-correction loop
730
-
731
- 2. **Self-Correcting RAG System**
732
- - Hallucination detection
733
- - Automatic re-retrieval
734
- - Quality scoring
735
-
736
- 3. **Production Chatbot**
737
- - Memory across sessions
738
- - Streaming with progress
739
- - Fallback chains
740
- - Cost tracking
741
-
742
- 4. **Code Assistant**
743
- - Generate code
744
- - Run tests
745
- - Fix failures
746
- - Iterate until passing
747
-
748
- 5. **Research Agent**
749
- - Web search
750
- - Document analysis
751
- - Citation tracking
752
- - Human-in-the-loop approval
753
-
754
- ---
755
-
756
- ## What Interviewers Look For
757
-
758
- | Question They Ask | What Impresses |
759
- |-------------------|----------------|
760
- | "How does it handle failures?" | Fallback chains, retries, graceful degradation |
761
- | "How do you ensure quality?" | Self-checking, evaluation, human-in-the-loop |
762
- | "How does it scale?" | Caching, batching, async, connection pooling |
763
- | "How do agents coordinate?" | LangGraph, state machines, message passing |
764
- | "How do you monitor it?" | LangSmith, custom callbacks, cost tracking |
765
- | "How do you test it?" | Unit tests, regression tests, evaluation suites |
766
-
767
- ---
768
-
769
- ## Learning Path
770
-
771
- 1. **Start**: Basic chains and prompts
772
- 2. **Add**: Tools and agents
773
- 3. **Upgrade**: RAG with advanced retrieval
774
- 4. **Advanced**: LangGraph for stateful workflows
775
- 5. **Production**: Streaming, caching, monitoring
776
- 6. **Scale**: Multi-agent, human-in-the-loop
777
- 7. **Master**: Evaluation, optimization, safety
778
-
779
- ---
780
-
781
- *Last Updated: February 2026*
782
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
misc/tutorials/NEXT_STEPS.md DELETED
@@ -1,442 +0,0 @@
1
- # Next Steps: Complementary Skills & Learning Path
2
-
3
- After building this agent framework from scratch, here's what to learn next to become a complete Agentic AI Engineer.
4
-
5
- ---
6
-
7
- ## Why This Matters
8
-
9
- You've built the fundamentals. But in the real world:
10
- - Agents need to retrieve knowledge (RAG)
11
- - Complex tasks need multiple agents
12
- - Production systems need observability
13
- - Safety is non-negotiable
14
-
15
- This guide helps you add value beyond "I built an agent framework."
16
-
17
- ---
18
-
19
- ## Priority 1: RAG (Retrieval Augmented Generation)
20
-
21
- You have basic embeddings, but production RAG is much deeper.
22
-
23
- ### Key Concepts
24
-
25
- | Concept | Description | Why It Matters |
26
- |---------|-------------|----------------|
27
- | **Chunking Strategies** | Fixed-size, semantic, recursive splitting | Affects retrieval quality dramatically |
28
- | **Hybrid Search** | Combine vector + keyword (BM25) | Better results than vector-only |
29
- | **Re-ranking** | Cross-encoders to improve top-k | Fixes retriever mistakes |
30
- | **Vector Databases** | Pinecone, Weaviate, Qdrant, Chroma | Each has different tradeoffs |
31
- | **Query Transformation** | HyDE, step-back, multi-query | Improve query-document matching |
32
- | **Agentic RAG** | Agent decides when/what to retrieve | Most flexible approach |
33
-
34
- ### Add to Your Project
35
-
36
- ```python
37
- @tool
38
- def rag_search(query: str, top_k: int = 5) -> str:
39
- """Search knowledge base with hybrid retrieval."""
40
- # 1. Vector search
41
- vector_results = vector_db.search(embed(query), top_k=top_k*2)
42
-
43
- # 2. Keyword search (BM25)
44
- keyword_results = bm25_search(query, top_k=top_k*2)
45
-
46
- # 3. Merge and dedupe
47
- combined = merge_results(vector_results, keyword_results)
48
-
49
- # 4. Re-rank with cross-encoder
50
- reranked = cross_encoder.rerank(query, combined, top_k=top_k)
51
-
52
- return format_results(reranked)
53
- ```
54
-
55
- ### Resources
56
- - [LlamaIndex](https://docs.llamaindex.ai/) - Best RAG framework
57
- - [LangChain RAG Tutorial](https://python.langchain.com/docs/tutorials/rag/)
58
- - Paper: "Retrieval-Augmented Generation for Large Language Models: A Survey"
59
-
60
- ---
61
-
62
- ## Priority 2: Multi-Agent Systems
63
-
64
- Your framework is single-agent. The industry is moving to multi-agent architectures.
65
-
66
- ### Patterns
67
-
68
- | Pattern | Description | Use Case |
69
- |---------|-------------|----------|
70
- | **Supervisor** | One agent delegates to specialists | Complex tasks with clear subtasks |
71
- | **Debate** | Agents argue, synthesize best answer | Reduce hallucination, improve reasoning |
72
- | **Pipeline** | Agent A -> Agent B -> Agent C | Sequential processing |
73
- | **Swarm** | Agents coordinate dynamically | Open-ended exploration |
74
- | **Reflection** | Agent critiques own output | Self-improvement loop |
75
-
76
- ### Example: Supervisor Pattern
77
-
78
- ```python
79
- class SupervisorAgent(Agent):
80
- def __init__(self, specialists: List[Agent]):
81
- self.specialists = {agent.name: agent for agent in specialists}
82
- super().__init__(
83
- instructions="""You are a supervisor.
84
- Delegate tasks to specialists:
85
- - researcher: for information gathering
86
- - coder: for code tasks
87
- - writer: for content creation
88
- """
89
- )
90
-
91
- async def delegate(self, task: str, specialist_name: str):
92
- specialist = self.specialists[specialist_name]
93
- return await specialist.run(task)
94
- ```
95
-
96
- ### Frameworks to Study
97
- - **LangGraph** - Stateful multi-agent workflows
98
- - **CrewAI** - Role-based agent teams
99
- - **AutoGen** - Microsoft's multi-agent framework
100
- - **Swarm** - OpenAI's experimental framework
101
-
102
- ---
103
-
104
- ## Priority 3: Observability & Tracing
105
-
106
- You have `format_trace`, but production systems need more.
107
-
108
- ### Tools
109
-
110
- | Tool | Type | Best For |
111
- |------|------|----------|
112
- | **LangSmith** | SaaS | LangChain users, enterprise |
113
- | **LangFuse** | Open Source | Self-hosted, privacy-focused |
114
- | **Weights & Biases** | SaaS | Experiment tracking |
115
- | **OpenTelemetry** | Standard | Distributed tracing |
116
- | **Arize Phoenix** | Open Source | LLM observability |
117
-
118
- ### Key Metrics to Track
119
-
120
- ```python
121
- @dataclass
122
- class AgentMetrics:
123
- # Latency
124
- total_duration_ms: float
125
- llm_call_duration_ms: float
126
- tool_execution_duration_ms: float
127
-
128
- # Token Usage
129
- prompt_tokens: int
130
- completion_tokens: int
131
- total_tokens: int
132
-
133
- # Cost
134
- estimated_cost_usd: float
135
-
136
- # Quality
137
- steps_to_completion: int
138
- tool_calls_count: int
139
- errors_count: int
140
- ```
141
-
142
- ### Add to Your Project
143
-
144
- ```python
145
- # In agent.py
146
- class Agent:
147
- async def run(self, ...):
148
- start_time = time.time()
149
-
150
- try:
151
- result = await self._run_internal(...)
152
-
153
- # Log metrics
154
- self.log_metrics(AgentMetrics(
155
- total_duration_ms=(time.time() - start_time) * 1000,
156
- steps_to_completion=result.context.current_step,
157
- # ... other metrics
158
- ))
159
-
160
- return result
161
- except Exception as e:
162
- self.log_error(e)
163
- raise
164
- ```
165
-
166
- ---
167
-
168
- ## Priority 4: Evaluation & Benchmarking
169
-
170
- You have GAIA. Go deeper with systematic evaluation.
171
-
172
- ### Evaluation Types
173
-
174
- | Type | What It Measures | How |
175
- |------|------------------|-----|
176
- | **Task Completion** | Did agent solve the problem? | Binary success/fail |
177
- | **Accuracy** | Is the answer correct? | Compare to ground truth |
178
- | **Faithfulness** | Is answer grounded in retrieved context? | LLM-as-Judge |
179
- | **Relevance** | Is answer relevant to question? | LLM-as-Judge |
180
- | **Latency** | How fast is the agent? | Time measurement |
181
- | **Cost** | How much did it cost? | Token tracking |
182
-
183
- ### LLM-as-Judge Pattern
184
-
185
- ```python
186
- JUDGE_PROMPT = """
187
- You are evaluating an AI agent's response.
188
-
189
- Question: {question}
190
- Agent's Answer: {answer}
191
- Ground Truth: {ground_truth}
192
-
193
- Rate the answer on a scale of 1-5:
194
- 1 = Completely wrong
195
- 2 = Partially wrong
196
- 3 = Partially correct
197
- 4 = Mostly correct
198
- 5 = Completely correct
199
-
200
- Provide your rating and reasoning.
201
- """
202
-
203
- async def evaluate_with_llm(question: str, answer: str, ground_truth: str) -> int:
204
- response = await llm.generate(JUDGE_PROMPT.format(...))
205
- return extract_rating(response)
206
- ```
207
-
208
- ### Frameworks
209
- - **Ragas** - RAG evaluation
210
- - **DeepEval** - LLM evaluation framework
211
- - **Promptfoo** - Prompt testing
212
- - **Evalica** - Comparative evaluation
213
-
214
- ---
215
-
216
- ## Priority 5: Safety & Guardrails
217
-
218
- Production agents need safety layers.
219
-
220
- ### Input Guardrails
221
-
222
- ```python
223
- class InputGuardrails:
224
- def __init__(self):
225
- self.blocked_patterns = [
226
- r"ignore previous instructions",
227
- r"you are now",
228
- r"pretend to be",
229
- ]
230
-
231
- def check(self, input: str) -> bool:
232
- for pattern in self.blocked_patterns:
233
- if re.search(pattern, input, re.IGNORECASE):
234
- return False
235
- return True
236
- ```
237
-
238
- ### Output Guardrails
239
-
240
- ```python
241
- class OutputGuardrails:
242
- async def check(self, output: str) -> tuple[bool, str]:
243
- # Check for PII
244
- if self.contains_pii(output):
245
- return False, "Response contains PII"
246
-
247
- # Check for harmful content
248
- if await self.is_harmful(output):
249
- return False, "Response contains harmful content"
250
-
251
- return True, ""
252
- ```
253
-
254
- ### Integration with Your Framework
255
-
256
- ```python
257
- # Add as callbacks
258
- agent = Agent(
259
- model=LlmClient(model="gpt-4o-mini"),
260
- tools=[...],
261
- before_llm_callback=input_guardrails.check,
262
- after_llm_callback=output_guardrails.check,
263
- )
264
- ```
265
-
266
- ### Tools
267
- - **Guardrails AI** - Structured output validation
268
- - **NeMo Guardrails** - NVIDIA's safety framework
269
- - **Lakera Guard** - Prompt injection detection
270
- - **Rebuff** - Self-hardening prompt injection detector
271
-
272
- ---
273
-
274
- ## Priority 6: LLM Routing & Optimization
275
-
276
- ### Smart Model Selection
277
-
278
- ```python
279
- class ModelRouter:
280
- def __init__(self):
281
- self.models = {
282
- "simple": "gpt-4o-mini", # Fast, cheap
283
- "complex": "gpt-4o", # Powerful
284
- "coding": "claude-sonnet-4-5", # Best for code
285
- }
286
-
287
- async def route(self, query: str) -> str:
288
- # Classify query complexity
289
- complexity = await self.classify_complexity(query)
290
-
291
- if "code" in query.lower():
292
- return self.models["coding"]
293
- elif complexity == "high":
294
- return self.models["complex"]
295
- else:
296
- return self.models["simple"]
297
- ```
298
-
299
- ### Semantic Caching
300
-
301
- ```python
302
- class SemanticCache:
303
- def __init__(self, similarity_threshold: float = 0.95):
304
- self.cache = {}
305
- self.embeddings = {}
306
- self.threshold = similarity_threshold
307
-
308
- async def get(self, query: str) -> str | None:
309
- query_embedding = embed(query)
310
-
311
- for cached_query, cached_response in self.cache.items():
312
- similarity = cosine_similarity(
313
- query_embedding,
314
- self.embeddings[cached_query]
315
- )
316
- if similarity > self.threshold:
317
- return cached_response
318
-
319
- return None
320
-
321
- async def set(self, query: str, response: str):
322
- self.cache[query] = response
323
- self.embeddings[query] = embed(query)
324
- ```
325
-
326
- ---
327
-
328
- ## Suggested Learning Path
329
-
330
- ### Month 1: RAG Deep Dive
331
- - [ ] Implement hybrid search (vector + BM25)
332
- - [ ] Add re-ranking with cross-encoder
333
- - [ ] Build RAGTool for your agent
334
- - [ ] Experiment with different chunking strategies
335
-
336
- ### Month 2: Multi-Agent Systems
337
- - [ ] Study LangGraph architecture
338
- - [ ] Implement supervisor pattern
339
- - [ ] Build debate/reflection agents
340
- - [ ] Add multi-agent orchestration layer
341
-
342
- ### Month 3: Production Readiness
343
- - [ ] Integrate LangFuse for observability
344
- - [ ] Implement input/output guardrails
345
- - [ ] Build evaluation suite with LLM-as-Judge
346
- - [ ] Add cost tracking and alerts
347
-
348
- ### Month 4: Advanced Topics
349
- - [ ] Implement smart model routing
350
- - [ ] Add semantic caching
351
- - [ ] Experiment with fine-tuning
352
- - [ ] Build monitoring dashboard
353
-
354
- ---
355
-
356
- ## Quick Wins to Add Now
357
-
358
- These can be added to your framework in a few hours each:
359
-
360
- ### 1. Semantic Caching
361
- ```python
362
- # In memory.py
363
- class SemanticCache:
364
- """Cache responses for similar queries."""
365
- ...
366
- ```
367
-
368
- ### 2. Cost Tracker
369
- ```python
370
- # In agent.py
371
- PRICING = {
372
- "gpt-4o-mini": {"input": 0.15, "output": 0.60}, # per 1M tokens
373
- "gpt-4o": {"input": 2.50, "output": 10.00},
374
- }
375
-
376
- def calculate_cost(model: str, input_tokens: int, output_tokens: int) -> float:
377
- prices = PRICING.get(model, {"input": 0, "output": 0})
378
- return (input_tokens * prices["input"] + output_tokens * prices["output"]) / 1_000_000
379
- ```
380
-
381
- ### 3. Streaming Support
382
- ```python
383
- # In llm.py
384
- async def generate_streaming(self, request: LlmRequest):
385
- """Stream tokens as they're generated."""
386
- ...
387
- ```
388
-
389
- ### 4. Simple Guardrails
390
- ```python
391
- # In callbacks.py
392
- def prompt_injection_detector(context, request):
393
- """Block obvious prompt injection attempts."""
394
- ...
395
- ```
396
-
397
- ### 5. Retry with Exponential Backoff
398
- ```python
399
- # In llm.py
400
- async def generate_with_retry(self, request: LlmRequest, max_retries: int = 3):
401
- """Retry failed LLM calls with exponential backoff."""
402
- ...
403
- ```
404
-
405
- ---
406
-
407
- ## Resources
408
-
409
- ### Courses
410
- - [DeepLearning.AI - Building Agentic RAG with LlamaIndex](https://www.deeplearning.ai/short-courses/building-agentic-rag-with-llamaindex/)
411
- - [DeepLearning.AI - Multi AI Agent Systems with crewAI](https://www.deeplearning.ai/short-courses/multi-ai-agent-systems-with-crewai/)
412
- - [LangChain Academy](https://academy.langchain.com/)
413
-
414
- ### Papers
415
- - "ReAct: Synergizing Reasoning and Acting in Language Models"
416
- - "Toolformer: Language Models Can Teach Themselves to Use Tools"
417
- - "Chain-of-Thought Prompting Elicits Reasoning in Large Language Models"
418
- - "Retrieval-Augmented Generation for Large Language Models: A Survey"
419
-
420
- ### Communities
421
- - [LangChain Discord](https://discord.gg/langchain)
422
- - [LlamaIndex Discord](https://discord.gg/llamaindex)
423
- - [Latent Space Podcast](https://www.latent.space/)
424
- - [AI Engineer Newsletter](https://www.aiengineer.dev/)
425
-
426
- ---
427
-
428
- ## What Would Make Your Project Stand Out
429
-
430
- 1. **RAG + Agents** - Agent that retrieves, reasons, and acts
431
- 2. **Multi-Agent Orchestration** - Coordinator + specialists
432
- 3. **Built-in Evaluation** - Self-testing agent framework
433
- 4. **Safety Layer** - Production-grade guardrails
434
- 5. **Observability Dashboard** - Visual trace explorer
435
- 6. **Semantic Caching** - Cost optimization
436
- 7. **Model Routing** - Smart model selection
437
-
438
- ---
439
-
440
- **Previous**: [Resume Guide](./RESUME_GUIDE.md)
441
- **Back to**: [Tutorial Overview](./README.md)
442
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
misc/tutorials/README.md DELETED
@@ -1,278 +0,0 @@
1
- # Tutorial Series Materials
2
-
3
- This directory contains all materials for the "Building an AI Agent Framework from Scratch" YouTube tutorial series.
4
-
5
- ---
6
-
7
- ## Documentation
8
-
9
- ### Core Documentation
10
- - **[FEATURE_DOCUMENTATION.md](./FEATURE_DOCUMENTATION.md)**: Complete inventory of all framework features
11
- - **[ARCHITECTURE_DIAGRAMS.md](./ARCHITECTURE_DIAGRAMS.md)**: Visual diagrams using Mermaid syntax
12
- - **[GITHUB_STRUCTURE.md](./GITHUB_STRUCTURE.md)**: Repository organization and branch strategy
13
- - **[EXERCISES.md](./EXERCISES.md)**: Exercises and challenges for each episode
14
- - **[ADDITIONAL_EXERCISES.md](./ADDITIONAL_EXERCISES.md)**: Cross-topic challenges and integration exercises
15
-
16
- ### Career & Next Steps
17
- - **[RESUME_GUIDE.md](./RESUME_GUIDE.md)**: How to market this project for AI engineering roles
18
- - **[NEXT_STEPS.md](./NEXT_STEPS.md)**: Complementary skills & learning path after completion
19
-
20
- ---
21
-
22
- ## 🎬 Episode Guides
23
-
24
- ### Episode 1: Introduction & Python Foundations
25
- **[EPISODE_01_INTRODUCTION.md](./EPISODE_01_INTRODUCTION.md)**
26
- - Python patterns: Pydantic, dataclasses, async/await
27
- - Type hints and validation
28
- - Duration: 30 minutes
29
-
30
- ### Episode 2: Your First LLM Call
31
- **[EPISODE_02_LLM_CALL.md](./EPISODE_02_LLM_CALL.md)**
32
- - Chat completion API format
33
- - LiteLLM integration
34
- - Error handling
35
- - Duration: 25 minutes
36
-
37
- ### Episode 3: Core Data Models
38
- **[EPISODE_03_DATA_MODELS.md](./EPISODE_03_DATA_MODELS.md)**
39
- - Message, ToolCall, ToolResult models
40
- - Event and ExecutionContext
41
- - Pydantic vs Dataclass
42
- - Duration: 30 minutes
43
-
44
- ### Episode 4: The LLM Client
45
- **[EPISODE_04_LLM_CLIENT.md](./EPISODE_04_LLM_CLIENT.md)**
46
- - LlmRequest and LlmResponse models
47
- - build_messages() function
48
- - Response parsing
49
- - Duration: 30 minutes
50
-
51
- ### Episode 5: The Basic Agent Loop
52
- **[EPISODE_05_AGENT_LOOP.md](./EPISODE_05_AGENT_LOOP.md)**
53
- - Think-Act-Observe cycle
54
- - Agent.run() and Agent.step()
55
- - Execution tracking
56
- - Duration: 35 minutes
57
-
58
- ### Episode 6: Building the Tool System
59
- **[EPISODE_06_TOOL_SYSTEM.md](./EPISODE_06_TOOL_SYSTEM.md)**
60
- - BaseTool abstract class
61
- - FunctionTool wrapper
62
- - @tool decorator
63
- - Schema generation
64
- - Duration: 35 minutes
65
-
66
- ### Episode 7: Tool Execution & Complete Agent
67
- **[EPISODE_07_TOOL_EXECUTION.md](./EPISODE_07_TOOL_EXECUTION.md)**
68
- - Tool execution in agent loop
69
- - Error handling
70
- - Complete working agent
71
- - Duration: 35 minutes
72
-
73
- ### Episode 8: MCP Integration
74
- **[EPISODE_08_MCP.md](./EPISODE_08_MCP.md)**
75
- - Model Context Protocol
76
- - Tool discovery
77
- - MCP server integration
78
- - Duration: 30 minutes
79
-
80
- ### Episode 9: Session & Memory Management
81
- **[EPISODE_09_MEMORY.md](./EPISODE_09_MEMORY.md)**
82
- - Session persistence
83
- - Token counting
84
- - Memory optimization strategies
85
- - Duration: 35 minutes
86
-
87
- ### Episode 10: Web Deployment
88
- **[EPISODE_10_WEB_DEPLOYMENT.md](./EPISODE_10_WEB_DEPLOYMENT.md)**
89
- - FastAPI backend
90
- - Frontend interface
91
- - File uploads
92
- - Session management
93
- - Duration: 35 minutes
94
-
95
- ---
96
-
97
- ## 📊 Series Overview
98
-
99
- **Total Duration**: ~5.5 hours
100
- **Target Audience**: Intermediate Python developers
101
- **Teaching Style**: Build from scratch (live coding)
102
- **Prerequisites**: Python 3.10+, basic async knowledge
103
-
104
- ---
105
-
106
- ## 🎯 Learning Path
107
-
108
- ```
109
- Episode 1-2: Foundations
110
-
111
- Episode 3-4: Core Components
112
-
113
- Episode 5-7: Agent System
114
-
115
- Episode 8-9: Advanced Features
116
-
117
- Episode 10: Deployment
118
- ```
119
-
120
- ---
121
-
122
- ## File Structure
123
-
124
- ```
125
- misc/tutorials/
126
- ├── README.md (this file)
127
-
128
- ├── # Core Documentation
129
- ├── FEATURE_DOCUMENTATION.md
130
- ├── ARCHITECTURE_DIAGRAMS.md
131
- ├── GITHUB_STRUCTURE.md
132
-
133
- ├── # Exercises
134
- ├── EXERCISES.md
135
- ├── ADDITIONAL_EXERCISES.md
136
-
137
- ├── # Career & Learning
138
- ├── RESUME_GUIDE.md
139
- ├── NEXT_STEPS.md
140
-
141
- ├── # Episode Guides
142
- ├── EPISODE_01_INTRODUCTION.md
143
- ├── EPISODE_02_LLM_CALL.md
144
- ├── EPISODE_03_DATA_MODELS.md
145
- ├── EPISODE_04_LLM_CLIENT.md
146
- ├── EPISODE_05_AGENT_LOOP.md
147
- ├── EPISODE_06_TOOL_SYSTEM.md
148
- ├── EPISODE_07_TOOL_EXECUTION.md
149
- ├── EPISODE_08_MCP.md
150
- ├── EPISODE_09_MEMORY.md
151
- └── EPISODE_10_WEB_DEPLOYMENT.md
152
- ```
153
-
154
- ---
155
-
156
- ## 🚀 Quick Start
157
-
158
- 1. **Read the Feature Documentation** to understand what we're building
159
- 2. **Follow episodes in order** - each builds on the previous
160
- 3. **Complete exercises** after each episode
161
- 4. **Check GitHub branches** for episode-specific code
162
-
163
- ---
164
-
165
- ## 💡 Tips for Teaching
166
-
167
- ### For Each Episode:
168
- 1. **Hook** (2 min): Show what we'll build
169
- 2. **Problem** (3 min): Why do we need this?
170
- 3. **Concept** (5 min): How does it work?
171
- 4. **Live Coding** (15-20 min): Build it step by step
172
- 5. **Demo** (3 min): Show it working
173
- 6. **Next Steps** (2 min): Preview next episode
174
-
175
- ### Visual Aids:
176
- - Use architecture diagrams from `ARCHITECTURE_DIAGRAMS.md`
177
- - Show code side-by-side with explanations
178
- - Use terminal output to show execution
179
- - Display execution traces
180
-
181
- ### Engagement:
182
- - Ask rhetorical questions
183
- - Show "what if" scenarios
184
- - Compare with alternatives
185
- - Highlight design decisions
186
- - Show common mistakes
187
-
188
- ---
189
-
190
- ## 📝 Episode Checklist
191
-
192
- Before recording each episode:
193
-
194
- - [ ] Review episode outline
195
- - [ ] Prepare code examples
196
- - [ ] Set up development environment
197
- - [ ] Test all code snippets
198
- - [ ] Prepare visual aids
199
- - [ ] Review architecture diagrams
200
- - [ ] Prepare exercises
201
- - [ ] Check GitHub branch
202
-
203
- ---
204
-
205
- ## 🎬 Recording Tips
206
-
207
- 1. **Start fresh**: Begin each episode with clean files
208
- 2. **Build incrementally**: Test after each major component
209
- 3. **Show errors**: Demonstrate common mistakes and fixes
210
- 4. **Explain decisions**: Why this approach vs alternatives
211
- 5. **Keep it real**: Show actual debugging process
212
- 6. **Engage audience**: Ask questions, pause for thought
213
-
214
- ---
215
-
216
- ## 📚 Additional Resources
217
-
218
- - [Pydantic Documentation](https://docs.pydantic.dev/)
219
- - [LiteLLM Documentation](https://docs.litellm.ai/)
220
- - [FastAPI Documentation](https://fastapi.tiangolo.com/)
221
- - [MCP Specification](https://modelcontextprotocol.io/)
222
-
223
- ---
224
-
225
- ## 🤝 Contributing
226
-
227
- Found an error or want to improve the tutorials?
228
-
229
- 1. Fork the repository
230
- 2. Make your changes
231
- 3. Submit a pull request
232
- 4. Include explanation of changes
233
-
234
- ---
235
-
236
- ## 📧 Support
237
-
238
- Questions or issues?
239
- - Open a GitHub issue
240
- - Check existing documentation
241
- - Review episode-specific guides
242
-
243
- ---
244
-
245
- ## Series Completion
246
-
247
- After completing all 10 episodes, you will have:
248
-
249
- - Built a complete AI agent framework
250
- - Understand every component
251
- - Created production-ready code
252
- - Deployed a web application
253
- - Gained deep understanding of agent architecture
254
-
255
- **Congratulations on your learning journey!**
256
-
257
- ### What's Next?
258
-
259
- Check out **[NEXT_STEPS.md](./NEXT_STEPS.md)** for:
260
- - RAG (Retrieval Augmented Generation)
261
- - Multi-Agent Systems
262
- - Observability & Tracing
263
- - Evaluation & Benchmarking
264
- - Safety & Guardrails
265
- - LLM Routing & Optimization
266
-
267
- ### Career Guidance
268
-
269
- See **[RESUME_GUIDE.md](./RESUME_GUIDE.md)** for:
270
- - How to market this project
271
- - Resume bullet points (STAR method)
272
- - Interview talking points
273
- - Portfolio presentation tips
274
-
275
- ---
276
-
277
- *Last Updated: 2026*
278
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
misc/tutorials/RESUME_GUIDE.md DELETED
@@ -1,719 +0,0 @@
1
- # Resume & Interview Guide: Marketing Your Agent Framework
2
-
3
- This guide helps you present your AI Agent Framework project effectively for agentic AI engineer roles at top labs (OpenAI, Anthropic, Google DeepMind, etc.).
4
-
5
- ---
6
-
7
- ## 🎯 Why This Project is Valuable
8
-
9
- **This project demonstrates:**
10
- - ✅ Deep understanding of agent architectures (not just using frameworks)
11
- - ✅ Ability to build production systems from scratch
12
- - ✅ Knowledge of core concepts: tool use, memory, sessions, reasoning loops
13
- - ✅ Full-stack capabilities (backend + frontend + deployment)
14
- - ✅ Understanding of optimization and scalability
15
- - ✅ Teaching ability (YouTube series shows communication skills)
16
-
17
- **Big labs value:**
18
- - People who understand internals, not just APIs
19
- - Ability to build from first principles
20
- - Production engineering mindset
21
- - Teaching/communication skills
22
-
23
- ---
24
-
25
- ## 📝 Resume Project Description
26
-
27
- ### Option 1: Concise Version (2-3 lines)
28
-
29
- ```
30
- AI Agent Framework | Python, FastAPI, LLMs
31
- Built a production-ready agent framework from scratch implementing multi-step reasoning,
32
- tool execution, session management, and memory optimization. Features include MCP integration,
33
- token-aware context optimization, and web deployment. Created comprehensive 10-part tutorial
34
- series teaching the architecture.
35
- ```
36
-
37
- ### Option 2: Detailed Version (Bullet Points)
38
-
39
- ```
40
- AI Agent Framework - From Scratch Implementation
41
- • Architected and built a complete agent framework implementing Think-Act-Observe reasoning
42
- loop with tool execution, supporting OpenAI, Anthropic, and local models via LiteLLM
43
- • Designed extensible tool system with automatic schema generation, MCP protocol integration,
44
- and user confirmation workflows for production safety
45
- • Implemented session persistence and memory optimization strategies (sliding window,
46
- compaction, summarization) reducing token costs by 60%+ in long conversations
47
- • Built FastAPI backend with real-time chat interface, file upload handling, and execution
48
- trace visualization for debugging and monitoring
49
- • Created comprehensive 10-part YouTube tutorial series (5.5 hours) teaching agent architecture
50
- from first principles, demonstrating technical communication skills
51
- • Technologies: Python, Pydantic, AsyncIO, FastAPI, LiteLLM, MCP, tiktoken, React/HTML/CSS
52
- ```
53
-
54
- ### Option 3: Skills-Focused Version
55
-
56
- ```
57
- AI Agent Framework | Agentic AI | Full-Stack
58
- Built end-to-end agent framework demonstrating expertise in:
59
- • Agent Architecture: Multi-step reasoning, tool orchestration, execution loops
60
- • LLM Integration: Multi-provider support, structured output, streaming
61
- • Memory Management: Token optimization, context compression, session persistence
62
- • Production Engineering: Error handling, monitoring, deployment, scalability
63
- • Technical Communication: Created educational content reaching 1000+ developers
64
- ```
65
-
66
- ---
67
-
68
- ## 🎯 Key Skills to Highlight
69
-
70
- ### Technical Skills (Match Job Descriptions)
71
-
72
- **Core Agent Concepts:**
73
- - Multi-step reasoning and planning
74
- - Tool use and function calling
75
- - Agent execution loops
76
- - Context management
77
- - Memory optimization
78
-
79
- **LLM Integration:**
80
- - Multi-provider support (OpenAI, Anthropic, local)
81
- - Structured output (Pydantic)
82
- - Token management
83
- - Streaming responses
84
- - Error handling
85
-
86
- **System Design:**
87
- - Extensible architecture
88
- - Plugin system (tools)
89
- - Session management
90
- - State persistence
91
- - API design
92
-
93
- **Production Engineering:**
94
- - Performance optimization
95
- - Cost management
96
- - Monitoring and debugging
97
- - Web deployment
98
- - Scalability considerations
99
-
100
- **Communication:**
101
- - Technical writing
102
- - Teaching complex concepts
103
- - Documentation
104
- - Code organization
105
-
106
- ---
107
-
108
- ## 💼 Resume Bullet Points by Role
109
-
110
- ### For Research Roles (OpenAI, Anthropic Research)
111
-
112
- ```
113
- • Implemented agent reasoning loop from first principles, demonstrating understanding of
114
- core agentic AI concepts including tool use, memory, and multi-step planning
115
- • Designed and evaluated memory optimization strategies (sliding window, compaction,
116
- summarization) with quantitative analysis of token reduction and context retention
117
- • Built extensible framework supporting multiple LLM providers and tool protocols (MCP),
118
- enabling research into cross-provider agent behavior
119
- • Created educational content teaching agent architecture, contributing to open-source
120
- knowledge and demonstrating ability to communicate complex research concepts
121
- ```
122
-
123
- ### For Engineering Roles (Applied AI Teams)
124
-
125
- ```
126
- • Architected production-ready agent framework handling 1000+ concurrent sessions with
127
- session persistence, error recovery, and cost optimization
128
- • Implemented tool system with automatic schema generation, user confirmation workflows,
129
- and MCP integration for extensibility
130
- • Built FastAPI backend with real-time chat, file processing, and execution tracing,
131
- demonstrating full-stack capabilities
132
- • Optimized token usage by 60%+ through intelligent context management while maintaining
133
- conversation quality
134
- • Deployed scalable web application with monitoring, logging, and error tracking for
135
- production use
136
- ```
137
-
138
- ### For Infrastructure Roles (Platform Teams)
139
-
140
- ```
141
- • Designed extensible agent framework architecture supporting plugin-based tool system,
142
- multiple LLM providers, and custom memory strategies
143
- • Implemented session management system with in-memory and database backends, supporting
144
- horizontal scaling
145
- • Built monitoring and debugging tools including execution trace visualization and
146
- token usage analytics
147
- • Created comprehensive documentation and tutorial series demonstrating system architecture
148
- and design decisions
149
- • Optimized system performance through async operations, connection pooling, and intelligent
150
- caching strategies
151
- ```
152
-
153
- ---
154
-
155
- ## 🎤 Interview Talking Points
156
-
157
- ### "Tell me about this project"
158
-
159
- **Structure:**
160
- 1. **Problem**: "I wanted to deeply understand how agent frameworks work, so I built one from scratch"
161
- 2. **Architecture**: "I implemented the core components: reasoning loop, tool system, memory management"
162
- 3. **Challenges**: "Key challenges were token optimization, session persistence, and tool execution safety"
163
- 4. **Results**: "Built a production-ready system with 60%+ token reduction and comprehensive tool support"
164
- 5. **Learning**: "Created a tutorial series teaching others, which deepened my own understanding"
165
-
166
- ### Key Technical Details to Mention
167
-
168
- **Agent Architecture:**
169
- - "I implemented a Think-Act-Observe loop where the agent reasons, executes tools, and processes results iteratively"
170
- - "The ExecutionContext tracks all state, allowing for debugging and session persistence"
171
- - "I designed an extensible tool system where any Python function can become a tool with automatic schema generation"
172
-
173
- **Memory Optimization:**
174
- - "I implemented three strategies: sliding window for speed, compaction for tool-heavy conversations, and summarization for very long contexts"
175
- - "Token counting using tiktoken allows optimization to trigger automatically when thresholds are exceeded"
176
- - "This reduced costs by 60%+ while maintaining conversation quality"
177
-
178
- **Production Considerations:**
179
- - "Error handling at every layer: LLM calls, tool execution, session management"
180
- - "Session persistence allows conversations to span multiple requests"
181
- - "Web deployment with FastAPI demonstrates full-stack capabilities"
182
-
183
- ---
184
-
185
- ## 🎯 Alignment with Big Lab Priorities
186
-
187
- ### What OpenAI/Anthropic Look For:
188
-
189
- **✅ You Have:**
190
- - Deep understanding of agent internals (not just API usage)
191
- - Ability to build from first principles
192
- - Production engineering mindset
193
- - Teaching/communication ability
194
- - Full-stack capabilities
195
-
196
- **Highlight:**
197
- - "Built framework from scratch to understand internals"
198
- - "Implemented memory optimization reducing costs"
199
- - "Created educational content"
200
- - "Production-ready deployment"
201
-
202
- ### What Google DeepMind Looks For:
203
-
204
- **✅ You Have:**
205
- - Research-oriented thinking
206
- - System design skills
207
- - Ability to explain complex concepts
208
- - Open-source contribution mindset
209
-
210
- **Highlight:**
211
- - "Designed extensible architecture"
212
- - "Evaluated optimization strategies"
213
- - "Open-source tutorial series"
214
- - "First-principles implementation"
215
-
216
- ---
217
-
218
- ## 📊 Quantifiable Achievements
219
-
220
- **Add numbers where possible:**
221
-
222
- - "Reduced token costs by 60%+ through optimization"
223
- - "Built framework supporting 10+ tool types"
224
- - "Created 10-part tutorial series (5.5 hours)"
225
- - "Implemented 3 memory optimization strategies"
226
- - "Supports 3+ LLM providers"
227
- - "Web app handles file uploads, real-time chat, session management"
228
- - "Framework used in [X] projects" (if applicable)
229
-
230
- ---
231
-
232
- ## 🔗 GitHub & Portfolio Presentation
233
-
234
- ### GitHub Repository
235
-
236
- **README should highlight:**
237
- - Clear problem statement
238
- - Architecture overview
239
- - Key features
240
- - Production considerations
241
- - Tutorial series link
242
-
243
- **Code Quality:**
244
- - Clean, well-documented code
245
- - Type hints throughout
246
- - Comprehensive docstrings
247
- - Example scripts
248
- - Tests (if you add them)
249
-
250
- ### Portfolio Website
251
-
252
- **Include:**
253
- - Project overview
254
- - Architecture diagrams
255
- - Key features demo
256
- - Link to tutorial series
257
- - Technical blog post (optional)
258
-
259
- ---
260
-
261
- ## 🎓 Interview Preparation
262
-
263
- ### Technical Questions They Might Ask
264
-
265
- **"How does your agent handle tool execution errors?"**
266
- - Explain error handling in `act()` method
267
- - ToolResult with error status
268
- - Agent continues reasoning with error context
269
-
270
- **"How do you optimize for long conversations?"**
271
- - Three strategies: sliding window, compaction, summarization
272
- - Token counting triggers optimization
273
- - Trade-offs between strategies
274
-
275
- **"How would you scale this to millions of users?"**
276
- - Database session manager
277
- - Load balancing
278
- - Caching strategies
279
- - Async operations
280
- - Resource pooling
281
-
282
- **"What would you change if rebuilding?"**
283
- - Streaming support
284
- - WebSocket communication
285
- - Database sessions
286
- - Advanced monitoring
287
- - Multi-agent support
288
-
289
- ### System Design Questions
290
-
291
- **"Design an agent system for [use case]"**
292
- - Use your framework as foundation
293
- - Show understanding of requirements
294
- - Design tool set
295
- - Consider scalability
296
- - Address edge cases
297
-
298
- ---
299
-
300
- ## 🚀 Making It Stand Out
301
-
302
- ### Unique Selling Points
303
-
304
- 1. **Built from Scratch**: Not using LangChain/other frameworks - shows deep understanding
305
- 2. **Production-Ready**: Not just a prototype - has deployment, optimization, error handling
306
- 3. **Educational Content**: Tutorial series shows teaching ability (valuable in research roles)
307
- 4. **Full-Stack**: Backend + frontend + deployment shows versatility
308
- 5. **Well-Documented**: Comprehensive docs show professional standards
309
-
310
- ### Additional Enhancements (Optional)
311
-
312
- **To make it even stronger:**
313
- - Add comprehensive test suite
314
- - Deploy to production (AWS/GCP)
315
- - Add monitoring (Grafana, Prometheus)
316
- - Write technical blog posts
317
- - Contribute to open-source agent projects
318
- - Add more advanced features (streaming, WebSocket)
319
-
320
- ---
321
-
322
- ## 📝 Cover Letter Snippet
323
-
324
- ```
325
- I recently built an AI agent framework from scratch to deepen my understanding of agentic AI
326
- architectures. The project implements core concepts including multi-step reasoning, tool
327
- execution, memory optimization, and session management. I also created a comprehensive
328
- 10-part tutorial series teaching the architecture, demonstrating my ability to communicate
329
- complex technical concepts.
330
-
331
- This project aligns with [Company]'s work on [specific project/area] because [connection].
332
- I'm particularly interested in [specific aspect] and would love to contribute to [team/project].
333
- ```
334
-
335
- ---
336
-
337
- ## 🎯 Role-Specific Tailoring
338
-
339
- ### For OpenAI (GPT-4, Function Calling Team)
340
- - Emphasize: Tool use, function calling, structured output
341
- - Mention: Understanding of their API design
342
- - Highlight: Production tool execution patterns
343
-
344
- ### For Anthropic (Claude, Tool Use)
345
- - Emphasize: Multi-step reasoning, safety (confirmation system)
346
- - Mention: Understanding of their approach
347
- - Highlight: Memory optimization strategies
348
-
349
- ### For Google DeepMind (Gemini, Agent Research)
350
- - Emphasize: Research-oriented thinking, system design
351
- - Mention: Open-source contribution
352
- - Highlight: Educational content creation
353
-
354
- ---
355
-
356
- ## ✅ Final Checklist
357
-
358
- Before applying:
359
-
360
- - [ ] GitHub repo is clean and well-documented
361
- - [ ] README clearly explains the project
362
- - [ ] Code has type hints and docstrings
363
- - [ ] Resume bullet points are quantified
364
- - [ ] Can explain architecture in 2 minutes
365
- - [ ] Can discuss design decisions
366
- - [ ] Can answer "what would you change?" question
367
- - [ ] Tutorial series is accessible
368
- - [ ] Portfolio/website is updated (if you have one)
369
-
370
- ---
371
-
372
- ## 💡 Pro Tips
373
-
374
- 1. **Be Specific**: Don't say "built an agent" - say "built agent framework with tool execution and memory optimization"
375
-
376
- 2. **Show Impact**: Quantify results (token reduction, features, tutorial views)
377
-
378
- 3. **Demonstrate Learning**: Mention what you learned and how it changed your thinking
379
-
380
- 4. **Connect to Role**: Research their work and connect your project to their needs
381
-
382
- 5. **Be Honest**: Acknowledge limitations and what you'd improve
383
-
384
- 6. **Show Growth**: This project shows you can learn and build complex systems
385
-
386
- ---
387
-
388
- ## 🎓 This Project is Definitely Useful!
389
-
390
- **Why:**
391
- - ✅ Shows deep understanding (not just API usage)
392
- - ✅ Demonstrates production engineering skills
393
- - ✅ Proves you can build from first principles
394
- - ✅ Shows teaching/communication ability
395
- - ✅ Demonstrates full-stack capabilities
396
- - ✅ Aligns with what big labs value
397
-
398
- **Big labs hire people who:**
399
- - Understand internals deeply
400
- - Can build production systems
401
- - Can communicate complex ideas
402
- - Think from first principles
403
- - Have engineering rigor
404
-
405
- **Your project demonstrates all of these!**
406
-
407
- ---
408
-
409
- ## Positioning as an End-to-End Agentic AI Architect
410
-
411
- If you want to project yourself as someone who can **architect any type of agentic system for any use case**, you need to demonstrate breadth, depth, and system design thinking.
412
-
413
- ### Target Positioning
414
-
415
- **Current**: "I built an agent framework from scratch"
416
-
417
- **Target**: "I architect end-to-end agentic systems - from requirements to production. I've implemented 8+ agent patterns across 10+ domains, with expertise in multi-agent orchestration, RAG integration, and human-in-the-loop safety."
418
-
419
- ---
420
-
421
- ## Architecture Patterns You Should Master
422
-
423
- | Pattern | Description | Use Case | Your Framework |
424
- |---------|-------------|----------|----------------|
425
- | **Single Agent** | One agent with tools | Simple tasks, chatbots | Implemented |
426
- | **Human-in-the-Loop** | Confirmation workflow | Dangerous operations | Implemented |
427
- | **Supervisor + Specialists** | Coordinator delegates | Complex multi-domain tasks | To add |
428
- | **Pipeline/Chain** | Sequential agents | Document processing | To add |
429
- | **Debate/Critique** | Agents challenge each other | High-stakes decisions | To add |
430
- | **Reflection** | Self-critique loop | Code generation, writing | To add |
431
- | **Hierarchical** | Multi-level delegation | Enterprise workflows | To add |
432
- | **Swarm** | Dynamic collaboration | Research, exploration | To add |
433
-
434
- ### Add These to Your Portfolio
435
-
436
- ```
437
- examples/
438
- ├── single_agent/ # What you have
439
- ├── supervisor_agent/ # Coordinator + specialists
440
- ├── pipeline_agent/ # Sequential processing
441
- ├── reflection_agent/ # Self-critique loop
442
- └── rag_agent/ # Retrieval-augmented
443
- ```
444
-
445
- ---
446
-
447
- ## Domain Portfolio to Build
448
-
449
- Show you can build agents for ANY use case:
450
-
451
- | Domain | Agent Type | Key Features |
452
- |--------|-----------|--------------|
453
- | **Customer Support** | RAG + Tools | Knowledge base, ticket creation, escalation |
454
- | **Code Assistant** | Code Gen + Execution | Sandbox execution, testing, debugging |
455
- | **Research Agent** | Multi-source RAG | Web search, paper analysis, synthesis |
456
- | **Data Analyst** | SQL + Visualization | Query generation, chart creation |
457
- | **Content Creator** | Writing + Review | Draft, edit, SEO optimization |
458
- | **DevOps Agent** | Monitoring + Actions | Alert analysis, auto-remediation |
459
- | **Sales Agent** | CRM + Email | Lead scoring, outreach, follow-up |
460
- | **Legal/Compliance** | Document Analysis | Contract review, risk flagging |
461
-
462
- ### Add Use Case Demos
463
-
464
- ```
465
- demos/
466
- ├── customer_support/ # RAG + ticket tools
467
- ├── code_assistant/ # Code execution sandbox
468
- ├── research_agent/ # Multi-source research
469
- └── data_analyst/ # SQL + visualization
470
- ```
471
-
472
- ---
473
-
474
- ## Skills to Demonstrate as an Architect
475
-
476
- ### Technical Architecture
477
- - **Scalability**: How to handle 1000+ concurrent agent sessions
478
- - **Reliability**: Retry logic, fallbacks, graceful degradation
479
- - **Observability**: Tracing, metrics, debugging
480
- - **Security**: Guardrails, sandboxing, access control
481
- - **Cost Optimization**: Caching, routing, batching
482
-
483
- ### System Design Expertise
484
- - When to use agents vs. deterministic code
485
- - Choosing between single vs. multi-agent
486
- - Tool design principles
487
- - Memory strategies for different use cases
488
- - Evaluation and testing strategies
489
-
490
- ### Architecture Decision Records
491
-
492
- Add these to your docs:
493
- ```
494
- docs/
495
- ├── ADR_001_single_vs_multi_agent.md
496
- ├── ADR_002_memory_strategies.md
497
- ├── ADR_003_tool_confirmation.md
498
- └── ADR_004_session_management.md
499
- ```
500
-
501
- ---
502
-
503
- ## How to Present Yourself
504
-
505
- ### Resume Headline
506
- ```
507
- Agentic AI Architect | End-to-End Agent Systems | LLM Infrastructure
508
- ```
509
-
510
- ### LinkedIn Summary
511
- ```
512
- I design and build production-grade AI agent systems from scratch. My expertise
513
- spans single-agent assistants to complex multi-agent orchestration, with deep
514
- knowledge of tool integration, memory management, and human-in-the-loop safety
515
- patterns.
516
-
517
- I've architected agent frameworks covering 8+ architecture patterns across 10+
518
- use case domains - from customer support to code generation to research automation.
519
-
520
- "Give me any business problem, and I'll architect an agent system to solve it -
521
- from requirements to production deployment."
522
- ```
523
-
524
- ### Portfolio Statement
525
- ```
526
- "I don't just use agent frameworks - I build them. I understand every layer from
527
- LLM API calls to production deployment, and I can architect solutions for any
528
- domain."
529
- ```
530
-
531
- ---
532
-
533
- ## Interview Strategy: "How Would You Build X?"
534
-
535
- When asked about designing an agent system, structure your answer:
536
-
537
- ### Framework (use consistently)
538
-
539
- 1. **Requirements Analysis**
540
- - "First, I'd clarify the task complexity, latency needs, and safety requirements..."
541
- - "What are the input/output formats? What tools are needed?"
542
-
543
- 2. **Architecture Selection**
544
- - "For this use case, I'd choose a [pattern] because..."
545
- - "Single agent if simple, supervisor pattern if multi-domain..."
546
-
547
- 3. **Component Design**
548
- - "The key components would be: agent loop, tools, memory, guardrails..."
549
- - "For tools, I'd implement [specific tools] with these schemas..."
550
-
551
- 4. **Trade-off Analysis**
552
- - "The main trade-offs are cost vs latency, accuracy vs speed..."
553
- - "For this use case, I'd prioritize [X] over [Y] because..."
554
-
555
- 5. **Production Considerations**
556
- - "To make this production-ready, I'd add monitoring, error handling..."
557
- - "For scaling, I'd implement caching, async operations, load balancing..."
558
-
559
- ### Example Answers
560
-
561
- **"Design a customer support agent"**
562
- ```
563
- "I'd use a single-agent RAG architecture with these components:
564
- 1. Knowledge base tool with hybrid search (vector + BM25)
565
- 2. Ticket creation tool for escalation
566
- 3. CRM lookup tool for customer context
567
- 4. Session management for conversation continuity
568
- 5. Guardrails to prevent sharing sensitive data
569
-
570
- The agent loop would: retrieve context, generate response, escalate if needed.
571
- For production, I'd add response caching, rate limiting, and quality monitoring."
572
- ```
573
-
574
- **"Design a code review agent"**
575
- ```
576
- "I'd use a reflection pattern - agent critiques its own analysis:
577
- 1. First pass: identify issues (security, style, bugs)
578
- 2. Self-critique: 'Are these issues valid? Did I miss anything?'
579
- 3. Final pass: prioritize and format feedback
580
-
581
- Tools: file reader, AST parser, security scanner, style checker.
582
- I'd add sandboxed execution to verify fixes actually work."
583
- ```
584
-
585
- **"Design a multi-agent research system"**
586
- ```
587
- "I'd use a supervisor pattern:
588
- 1. Coordinator agent: plans research, assigns tasks
589
- 2. Search specialist: web and academic search
590
- 3. Analysis specialist: summarizes and synthesizes
591
- 4. Writing specialist: produces final report
592
-
593
- The supervisor tracks progress and handles failures.
594
- Key challenge: ensuring specialists share context efficiently."
595
- ```
596
-
597
- ---
598
-
599
- ## Credentials to Build Authority
600
-
601
- | Credential | How to Get It | Priority |
602
- |------------|--------------|----------|
603
- | **GitHub Stars** | Share on Twitter/LinkedIn, make repo useful | High |
604
- | **Technical Blog** | Write about architecture decisions | High |
605
- | **YouTube Series** | Your tutorial series | Done! |
606
- | **Open Source Contributions** | Contribute to LangChain, LlamaIndex, CrewAI | Medium |
607
- | **Conference Talks** | Apply to AI meetups, conferences | Medium |
608
- | **Certifications** | DeepLearning.AI courses | Low |
609
-
610
- ---
611
-
612
- ## Architecture Examples to Add
613
-
614
- ### Supervisor Pattern Example
615
-
616
- ```python
617
- class SupervisorAgent(Agent):
618
- """Coordinator that delegates to specialist agents."""
619
-
620
- def __init__(self, specialists: List[Agent]):
621
- self.specialists = {agent.name: agent for agent in specialists}
622
- super().__init__(
623
- instructions="""You are a supervisor coordinating specialists:
624
- - researcher: for information gathering
625
- - coder: for code tasks
626
- - writer: for content creation
627
-
628
- Analyze tasks and delegate appropriately."""
629
- )
630
-
631
- @tool
632
- async def delegate(self, task: str, specialist: str) -> str:
633
- """Delegate a task to a specialist agent."""
634
- agent = self.specialists.get(specialist)
635
- if not agent:
636
- return f"Unknown specialist: {specialist}"
637
- result = await agent.run(task)
638
- return result.output
639
- ```
640
-
641
- ### Reflection Pattern Example
642
-
643
- ```python
644
- class ReflectionAgent(Agent):
645
- """Agent that critiques and improves its own output."""
646
-
647
- async def run_with_reflection(self, task: str, max_reflections: int = 2):
648
- # Initial attempt
649
- result = await self.run(task)
650
-
651
- for i in range(max_reflections):
652
- # Self-critique
653
- critique = await self.run(f"""
654
- Review this output and identify issues:
655
- {result.output}
656
-
657
- What could be improved? Be specific.
658
- """)
659
-
660
- # Check if good enough
661
- if "no issues" in critique.output.lower():
662
- break
663
-
664
- # Improve based on critique
665
- result = await self.run(f"""
666
- Original task: {task}
667
- Previous attempt: {result.output}
668
- Critique: {critique.output}
669
-
670
- Now provide an improved version.
671
- """)
672
-
673
- return result
674
- ```
675
-
676
- ---
677
-
678
- ## Quick Wins to Strengthen Your Position
679
-
680
- 1. **Add architecture diagrams** for each pattern
681
- 2. **Create use case READMEs** explaining design decisions
682
- 3. **Write ADRs** (Architecture Decision Records)
683
- 4. **Add benchmarks** comparing patterns
684
- 5. **Create a "pattern selector" tool** that recommends patterns based on requirements
685
-
686
- ---
687
-
688
- ## Summary: Your Positioning Statement
689
-
690
- ```
691
- "As an Agentic AI Architect, I design and build end-to-end agent systems
692
- for any business problem. My expertise includes:
693
-
694
- - 8+ agent architecture patterns (single, supervisor, pipeline, reflection, etc.)
695
- - 10+ domain applications (support, code, research, data analysis, etc.)
696
- - Production systems with safety, scalability, and observability
697
- - From-scratch implementation demonstrating deep understanding
698
-
699
- I don't just use frameworks - I understand every layer and can architect
700
- the right solution for any use case."
701
- ```
702
-
703
- ---
704
-
705
- ## Next Steps
706
-
707
- 1. **Polish GitHub**: Clean code, great README, examples
708
- 2. **Update Resume**: Use bullet points from this guide
709
- 3. **Add Architecture Examples**: Supervisor, Pipeline, Reflection patterns
710
- 4. **Add Use Case Demos**: Customer support, code assistant, research agent
711
- 5. **Write ADRs**: Document your design decisions
712
- 6. **Prepare Stories**: Practice explaining architectures
713
- 7. **Research Labs**: Understand their specific work
714
- 8. **Apply Confidently**: This is a strong foundation!
715
-
716
- ---
717
-
718
- **You've built the foundation. Now expand it to show you can architect ANY agent system!**
719
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
my_practice/first_llm_call.ipynb DELETED
@@ -1,276 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": null,
6
- "id": "f25e9940",
7
- "metadata": {},
8
- "outputs": [],
9
- "source": [
10
- "from litellm import completion\n",
11
- "\n",
12
- "\n",
13
- "result = completion(\n",
14
- " model = 'gpt-4o',\n",
15
- " messages = [{'role':'user', 'content' : 'Hello'}]\n",
16
- "\n",
17
- ")\n",
18
- "\n",
19
- "\n"
20
- ]
21
- },
22
- {
23
- "cell_type": "code",
24
- "execution_count": 2,
25
- "id": "fba08f3a",
26
- "metadata": {},
27
- "outputs": [
28
- {
29
- "data": {
30
- "text/plain": [
31
- "'Hello! How can I assist you today?'"
32
- ]
33
- },
34
- "execution_count": 2,
35
- "metadata": {},
36
- "output_type": "execute_result"
37
- }
38
- ],
39
- "source": [
40
- "result.choices[0].message.content"
41
- ]
42
- },
43
- {
44
- "cell_type": "markdown",
45
- "id": "c283cf21",
46
- "metadata": {},
47
- "source": [
48
- "Building data models"
49
- ]
50
- },
51
- {
52
- "cell_type": "code",
53
- "execution_count": null,
54
- "id": "f4a41257",
55
- "metadata": {},
56
- "outputs": [],
57
- "source": [
58
- "from typing import Union, Literal\n",
59
- "from dataclasses import dataclass, field\n",
60
- "from pydantic import BaseModel, Field, List, Dict, Any, Optional, BaseTool\n",
61
- "import uuid\n",
62
- "from datetime import datetime\n",
63
- "import json\n",
64
- "\n",
65
- "## agent models\n",
66
- "\n",
67
- "class Message(BaseModel):\n",
68
- "\n",
69
- " \"\"\"A text message in the conversation.\"\"\"\n",
70
- " type: Literal[\"message\"] = \"message\"\n",
71
- " role: Literal[\"system\", \"user\", \"assistant\"]\n",
72
- " content: str\n",
73
- "\n",
74
- "class ToolCall(BaseModel): \n",
75
- " \"\"\"LLM's request to execute a tool.\"\"\"\n",
76
- " type: Literal[\"tool_call\"] = \"tool_call\"\n",
77
- " tool_call_id: str\n",
78
- " name: str\n",
79
- " arguments: dict\n",
80
- " \n",
81
- "\n",
82
- "class ToolResult(BaseModel):\n",
83
- " \"\"\"Result from tool execution.\"\"\"\n",
84
- " type: Literal[\"tool_result\"] = \"tool_result\"\n",
85
- " tool_call_id: str\n",
86
- " name: str\n",
87
- " status: Literal[\"success\", \"error\"]\n",
88
- " content: list\n",
89
- "\n",
90
- "class ToolConfirmation(BaseModel): ## this is temperory so not in union (user to agent)\n",
91
- " \"\"\"User's decision on a pending tool call.\"\"\"\n",
92
- " tool_call_id: str\n",
93
- " approved: bool\n",
94
- " modified_arguments: dict | None = None\n",
95
- " reason: str | None = None # Reason for rejection (if not approved)\n",
96
- "\n",
97
- "class PendingToolCall(BaseModel): ## this is temperory so not in union(agent to user)\n",
98
- " \"\"\"A tool call awaiting user confirmation.\"\"\"\n",
99
- " \n",
100
- " tool_call: ToolCall\n",
101
- " confirmation_message: str\n",
102
- "\n",
103
- "## every request returns with these things so union all of them\n",
104
- "ContentItem = Union[Message, ToolCall, ToolResult, ToolConfirmation] ## keep adding if there is more\n",
105
- "\n",
106
- "class Event(BaseModel):\n",
107
- " \"\"\"A recorded occurrence during agent execution.\"\"\"\n",
108
- " id: str = Field(default_factory=lambda: str(uuid.uuid4()))\n",
109
- " execution_id: str\n",
110
- " timestamp: float = Field(default_factory=lambda: datetime.now().timestamp())\n",
111
- " author: str # \"user\" or agent name\n",
112
- " content: List[ContentItem] = Field(default_factory=list)\n",
113
- "\n",
114
- "@dataclass\n",
115
- "class ExecutionContext: ## there will be frequent modifications to this, so not basemodel\n",
116
- " \"\"\"Central storage for all execution state.\"\"\"\n",
117
- " \n",
118
- " execution_id: str = field(default_factory=lambda: str(uuid.uuid4()))\n",
119
- " events: List[Event] = field(default_factory=list)\n",
120
- " current_step: int = 0\n",
121
- " state: Dict[str, Any] = field(default_factory=dict)\n",
122
- " final_result: Optional[str | BaseModel] = None\n",
123
- " session_id: Optional[str] = None # Link to session for persistence\n",
124
- "\n",
125
- "\n",
126
- "\n",
127
- "### lets do LLM models\n",
128
- "\n",
129
- "class LlmRequest(BaseModel):\n",
130
- " \"\"\"Request object for LLM calls.\"\"\"\n",
131
- " instructions: List[str] = Field(default_factory=list)\n",
132
- " contents: List[ContentItem] = Field(default_factory=list)\n",
133
- " tools: List[BaseTool] = Field(default_factory=list)\n",
134
- " tool_choice: Optional[str] = 'auto'\n",
135
- "\n",
136
- "class LlmResponse(BaseModel):\n",
137
- " \"\"\"Response object from LLM calls.\"\"\"\n",
138
- " content: List[ContentItem] = Field(default_factory=list)\n",
139
- " error_message: Optional[str] = None\n",
140
- " usage_metadata: Dict[str, Any] = Field(default_factory=dict)\n",
141
- "\n",
142
- "class LlmClient(BaseModel):\n",
143
- " def _parse_response(self, response) -> LlmResponse:\n",
144
- " \"\"\"Convert API response to LlmResponse.\"\"\"\n",
145
- " choice = response.choices[0]\n",
146
- " content_items = []\n",
147
- " \n",
148
- " # Parse message content\n",
149
- " if choice.message.content:\n",
150
- " content_items.append(Message(\n",
151
- " role=\"assistant\",\n",
152
- " content=choice.message.content\n",
153
- " ))\n",
154
- "\n",
155
- " # Parse tool calls\n",
156
- " if choice.message.tool_calls:\n",
157
- " for tc in choice.message.tool_calls:\n",
158
- " content_items.append(ToolCall(\n",
159
- " tool_call_id=tc.id,\n",
160
- " name=tc.function.name,\n",
161
- " arguments=json.loads(tc.function.arguments)\n",
162
- " ))\n",
163
- " \n",
164
- " return LlmResponse(\n",
165
- " content=content_items,\n",
166
- " usage_metadata={\n",
167
- " \"input_tokens\": response.usage.prompt_tokens,\n",
168
- " \"output_tokens\": response.usage.completion_tokens,\n",
169
- " }\n",
170
- " ) ## this is internal method to parse toolcalls, reasoning, message content\n",
171
- " \n",
172
- "\n",
173
- "def build_messages(request: LlmRequest) -> List[dict]: ## will be reused everywhere\n",
174
- " \"\"\"Convert LlmRequest to API message format.\"\"\"\n",
175
- " messages = []\n",
176
- " \n",
177
- " # Add system instructions\n",
178
- " for instruction in request.instructions:\n",
179
- " messages.append({\"role\": \"system\", \"content\": instruction})\n",
180
- " \n",
181
- " # Convert content items\n",
182
- " for item in request.contents:\n",
183
- " if isinstance(item, Message):\n",
184
- " messages.append({\"role\": item.role, \"content\": item.content})\n",
185
- " \n",
186
- " elif isinstance(item, ToolCall):\n",
187
- " tool_call_dict = {\n",
188
- " \"id\": item.tool_call_id,\n",
189
- " \"type\": \"function\",\n",
190
- " \"function\": {\n",
191
- " \"name\": item.name,\n",
192
- " \"arguments\": json.dumps(item.arguments)\n",
193
- " }\n",
194
- " }\n",
195
- " # Append to previous assistant message if exists\n",
196
- " if messages and messages[-1][\"role\"] == \"assistant\":\n",
197
- " messages[-1].setdefault(\"tool_calls\", []).append(tool_call_dict)\n",
198
- " else:\n",
199
- " messages.append({\n",
200
- " \"role\": \"assistant\",\n",
201
- " \"content\": None,\n",
202
- " \"tool_calls\": [tool_call_dict]\n",
203
- " })\n",
204
- " \n",
205
- " elif isinstance(item, ToolResult):\n",
206
- " messages.append({\n",
207
- " \"role\": \"tool\",\n",
208
- " \"tool_call_id\": item.tool_call_id,\n",
209
- " \"content\": str(item.content[0]) if item.content else \"\"\n",
210
- " })\n",
211
- " \n",
212
- " return messages\n",
213
- "\n"
214
- ]
215
- },
216
- {
217
- "cell_type": "markdown",
218
- "id": "d4b6afba",
219
- "metadata": {},
220
- "source": [
221
- "Building our Agent"
222
- ]
223
- },
224
- {
225
- "cell_type": "code",
226
- "execution_count": null,
227
- "id": "2d7d0140",
228
- "metadata": {},
229
- "outputs": [],
230
- "source": [
231
- "from dataclasses import dataclass\n",
232
- "from typing import List, Optional\n",
233
- "from .llm import LlmClient\n",
234
- "from .models import ExecutionContext\n",
235
- "\n",
236
- "class Agent:\n",
237
- " \"\"\"Agent that can reason and use tools to solve tasks.\"\"\"\n",
238
- " \n",
239
- " def __init__(\n",
240
- " self,\n",
241
- " model: LlmClient,\n",
242
- " tools: List[BaseTool] = None,\n",
243
- " instructions: str = \"\",\n",
244
- " max_steps: int = 5,\n",
245
- " name: str = \"agent\"\n",
246
- " ):\n",
247
- " self.model = model\n",
248
- " self.instructions = instructions\n",
249
- " self.max_steps = max_steps\n",
250
- " self.name = name\n",
251
- " self.tools = tools or []"
252
- ]
253
- }
254
- ],
255
- "metadata": {
256
- "kernelspec": {
257
- "display_name": ".venv",
258
- "language": "python",
259
- "name": "python3"
260
- },
261
- "language_info": {
262
- "codemirror_mode": {
263
- "name": "ipython",
264
- "version": 3
265
- },
266
- "file_extension": ".py",
267
- "mimetype": "text/x-python",
268
- "name": "python",
269
- "nbconvert_exporter": "python",
270
- "pygments_lexer": "ipython3",
271
- "version": "3.12.11"
272
- }
273
- },
274
- "nbformat": 4,
275
- "nbformat_minor": 5
276
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
my_practice/pydantic.ipynb DELETED
@@ -1,255 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 16,
6
- "id": "ff3b1200",
7
- "metadata": {},
8
- "outputs": [
9
- {
10
- "name": "stdout",
11
- "output_type": "stream",
12
- "text": [
13
- "msg1.role: user\n",
14
- "msg2.role: assistant\n",
15
- "msg3.role: system\n",
16
- "\n",
17
- "Validation error: 1 validation error for Message\n",
18
- "role\n",
19
- " Input should be 'user', 'assistant' or 'system' [type=literal_error, input_value='admin', input_type=str]\n",
20
- " For further information visit https://errors.pydantic.dev/2.11/v/literal_error\n",
21
- "\n",
22
- "Missing field error: 1 validation error for Message\n",
23
- "content\n",
24
- " Field required [type=missing, input_value={'role': 'user'}, input_type=dict]\n",
25
- " For further information visit https://errors.pydantic.dev/2.11/v/missing\n"
26
- ]
27
- }
28
- ],
29
- "source": [
30
- "from pydantic import BaseModel, ValidationError\n",
31
- "from typing import Literal\n",
32
- "\n",
33
- "# Create a Message model like in your framework\n",
34
- "class Message(BaseModel):\n",
35
- " role: Literal[\"user\", \"assistant\", \"system\"]\n",
36
- " content: str\n",
37
- "\n",
38
- "# Test 1: Valid messages\n",
39
- "msg1 = Message(role=\"user\", content=\"Hello\")\n",
40
- "msg2 = Message(role=\"assistant\", content=\"Hi there!\")\n",
41
- "msg3 = Message(role=\"system\", content=\"You are helpful\")\n",
42
- "\n",
43
- "print(f\"msg1.role: {msg1.role}\")\n",
44
- "print(f\"msg2.role: {msg2.role}\")\n",
45
- "print(f\"msg3.role: {msg3.role}\")\n",
46
- "\n",
47
- "# Test 2: What happens with invalid role?\n",
48
- "try:\n",
49
- " bad_msg = Message(role=\"admin\", content=\"test\")\n",
50
- "except ValidationError as e:\n",
51
- " print(f\"\\nValidation error: {e}\")\n",
52
- "\n",
53
- "# Test 3: What happens with missing content?\n",
54
- "try:\n",
55
- " incomplete = Message(role=\"user\")\n",
56
- "except ValidationError as e:\n",
57
- " print(f\"\\nMissing field error: {e}\")"
58
- ]
59
- },
60
- {
61
- "cell_type": "code",
62
- "execution_count": 27,
63
- "id": "811107f4",
64
- "metadata": {},
65
- "outputs": [
66
- {
67
- "name": "stdout",
68
- "output_type": "stream",
69
- "text": [
70
- "{\"role\":\"user\",\"content\":\"My Name is Akhil\",\"timestamp\":1769878310.697898}\n",
71
- "{'role': 'user', 'content': 'My Name is Akhil', 'timestamp': 1769878310.697898}\n",
72
- "user\n"
73
- ]
74
- }
75
- ],
76
- "source": [
77
- "import datetime\n",
78
- "from typing import Optional, List, Any, Dict\n",
79
- "import uuid\n",
80
- "from pydantic import Field\n",
81
- "\n",
82
- "class ChatMessage(BaseModel):\n",
83
- " role : Literal[\"user\", \"assistant\", \"system\"]\n",
84
- " content : str \n",
85
- " timestamp : float = Field(default_factory=lambda: datetime.datetime.now().timestamp())\n",
86
- "\n",
87
- "class ChatSession(BaseModel):\n",
88
- " session_id: str = Field(default_factory=lambda: str(uuid.uuid4()))\n",
89
- " user_name : str\n",
90
- " messages : List[ChatMessage] = Field(default_factory=list)\n",
91
- " metadata : Dict[str, Any] = Field(default_factory=dict)\n",
92
- "\n",
93
- " def add_message(self, role: Literal[\"user\", \"assistant\", \"system\"], content: str):\n",
94
- " self.messages.append(ChatMessage(role = role, content = content))\n",
95
- "\n",
96
- "\n",
97
- "\n",
98
- "chat = ChatMessage(role = \"user\", content = \"My Name is Akhil\")\n",
99
- "\n",
100
- "print(chat.model_dump_json())\n",
101
- "print(chat.model_dump())\n",
102
- "\n",
103
- "msg2 = ChatMessage.model_validate({\"role\": \"user\", \"content\": \"hi\"})\n",
104
- "print(msg2.role)\n",
105
- "\n",
106
- "\n",
107
- "\n"
108
- ]
109
- },
110
- {
111
- "cell_type": "code",
112
- "execution_count": 28,
113
- "id": "a38ac16e",
114
- "metadata": {},
115
- "outputs": [
116
- {
117
- "name": "stdout",
118
- "output_type": "stream",
119
- "text": [
120
- "Starting API 1...\n",
121
- "Finished API 1\n",
122
- "Starting API 2...\n",
123
- "Finished API 2\n",
124
- "Starting API 3...\n",
125
- "Finished API 3\n",
126
- "\n",
127
- "Total time: 6.0 seconds\n",
128
- "Results: Result from API 1, Result from API 2, Result from API 3\n"
129
- ]
130
- }
131
- ],
132
- "source": [
133
- "import time\n",
134
- "\n",
135
- "def slow_api_call(name: str) -> str:\n",
136
- " \"\"\"Simulate a slow API call (like calling OpenAI)\"\"\"\n",
137
- " print(f\"Starting {name}...\")\n",
138
- " time.sleep(2) # Blocks for 2 seconds\n",
139
- " print(f\"Finished {name}\")\n",
140
- " return f\"Result from {name}\"\n",
141
- "\n",
142
- "# Sequential calls - takes 6 seconds total\n",
143
- "start = time.time()\n",
144
- "\n",
145
- "result1 = slow_api_call(\"API 1\")\n",
146
- "result2 = slow_api_call(\"API 2\")\n",
147
- "result3 = slow_api_call(\"API 3\")\n",
148
- "\n",
149
- "print(f\"\\nTotal time: {time.time() - start:.1f} seconds\")\n",
150
- "print(f\"Results: {result1}, {result2}, {result3}\")"
151
- ]
152
- },
153
- {
154
- "cell_type": "code",
155
- "execution_count": 32,
156
- "id": "afc59f41",
157
- "metadata": {},
158
- "outputs": [
159
- {
160
- "name": "stdout",
161
- "output_type": "stream",
162
- "text": [
163
- "Starting API 1...\n",
164
- "Starting API 2...\n",
165
- "Starting API 3...\n",
166
- "Finished API 1\n",
167
- "Finished API 2\n",
168
- "Finished API 3\n",
169
- "\n",
170
- "Total time: 2.0 seconds\n",
171
- "Results: ['Result from API 1', 'Result from API 2', 'Result from API 3'], Result from API 2, Result from API 3\n"
172
- ]
173
- }
174
- ],
175
- "source": [
176
- "import time\n",
177
- "import asyncio\n",
178
- "\n",
179
- "async def slow_api_call(name: str) -> str:\n",
180
- " \"\"\"Simulate a slow API call (like calling OpenAI)\"\"\"\n",
181
- " print(f\"Starting {name}...\")\n",
182
- " await asyncio.sleep(2) # Blocks for 2 seconds\n",
183
- " print(f\"Finished {name}\")\n",
184
- " return f\"Result from {name}\"\n",
185
- "\n",
186
- "# Sequential calls - takes 6 seconds total\n",
187
- "start = time.time()\n",
188
- "\n",
189
- "result1 = await asyncio.gather(\n",
190
- " slow_api_call(\"API 1\"),\n",
191
- "slow_api_call(\"API 2\"),\n",
192
- "slow_api_call(\"API 3\")\n",
193
- ")\n",
194
- "\n",
195
- "print(f\"\\nTotal time: {time.time() - start:.1f} seconds\")\n",
196
- "print(f\"Results: {result1}, {result2}, {result3}\")"
197
- ]
198
- },
199
- {
200
- "cell_type": "code",
201
- "execution_count": 34,
202
- "id": "144c73f9",
203
- "metadata": {},
204
- "outputs": [
205
- {
206
- "name": "stdout",
207
- "output_type": "stream",
208
- "text": [
209
- "['hi', 'hi', 'hi']\n"
210
- ]
211
- }
212
- ],
213
- "source": [
214
- "async def fetch_weather(city: str) -> str:\n",
215
- " await asyncio.sleep(2)\n",
216
- " return \"hi\"\n",
217
- "\n",
218
- "async def fetch_news(news: str) -> str:\n",
219
- " await asyncio.sleep(1.5)\n",
220
- " return \"hi\"\n",
221
- "\n",
222
- "async def fetch_stock(symbool: str) -> str:\n",
223
- " await asyncio.sleep(1)\n",
224
- " return \"hi\"\n",
225
- "\n",
226
- "\n",
227
- "results = await asyncio.gather(\n",
228
- " fetch_weather(\"hyd\"), fetch_news(\"hyd\"), fetch_stock(\"hyd\")\n",
229
- " )\n",
230
- "print(results)\n"
231
- ]
232
- }
233
- ],
234
- "metadata": {
235
- "kernelspec": {
236
- "display_name": ".venv",
237
- "language": "python",
238
- "name": "python3"
239
- },
240
- "language_info": {
241
- "codemirror_mode": {
242
- "name": "ipython",
243
- "version": 3
244
- },
245
- "file_extension": ".py",
246
- "mimetype": "text/x-python",
247
- "name": "python",
248
- "nbconvert_exporter": "python",
249
- "pygments_lexer": "ipython3",
250
- "version": "3.12.11"
251
- }
252
- },
253
- "nbformat": 4,
254
- "nbformat_minor": 5
255
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
rag/__init__.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ """RAG (Retrieval-Augmented Generation) module."""
2
+
3
+ from .embeddings import get_embeddings, vector_search
4
+ from .chunking import fixed_length_chunking
5
+
6
+ __all__ = [
7
+ "get_embeddings",
8
+ "vector_search",
9
+ "fixed_length_chunking",
10
+ ]
rag/embeddings.py CHANGED
@@ -1,5 +1,8 @@
 
 
1
  from openai import OpenAI
2
  import numpy as np
 
3
  from dotenv import load_dotenv
4
  import os
5
 
@@ -8,21 +11,21 @@ load_dotenv()
8
 
9
  # Initialize OpenAI client with API key from environment
10
  client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
11
-
12
  def get_embeddings(texts, model="text-embedding-3-small"):
13
  """Convert text to embedding vectors."""
14
  if isinstance(texts, str):
15
  texts = [texts]
16
-
17
  response = client.embeddings.create(input=texts, model=model)
18
  return np.array([item.embedding for item in response.data])
19
-
20
  def vector_search(query, chunks, chunk_embeddings, top_k=3):
21
  """Find the most similar chunks to the query."""
22
  query_embedding = get_embeddings(query)
23
  similarities = cosine_similarity(query_embedding, chunk_embeddings)[0]
24
  top_indices = similarities.argsort()[::-1][:top_k]
25
-
26
  results = []
27
  for idx in top_indices:
28
  results.append({
@@ -30,15 +33,3 @@ def vector_search(query, chunks, chunk_embeddings, top_k=3):
30
  'similarity': similarities[idx]
31
  })
32
  return results
33
-
34
- from sklearn.metrics.pairwise import cosine_similarity
35
-
36
- sentences = [
37
- "The cat is sleeping on the couch",
38
- "A kitten is playing with a toy",
39
- "The dog is running in the park"
40
- ]
41
- embeddings = get_embeddings(sentences)
42
-
43
- cat_kitten = cosine_similarity([embeddings[0]], [embeddings[1]])[0][0]
44
- cat_dog = cosine_similarity([embeddings[0]], [embeddings[2]])[0][0]
 
1
+ """Embedding and vector search utilities."""
2
+
3
  from openai import OpenAI
4
  import numpy as np
5
+ from sklearn.metrics.pairwise import cosine_similarity
6
  from dotenv import load_dotenv
7
  import os
8
 
 
11
 
12
  # Initialize OpenAI client with API key from environment
13
  client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
14
+
15
  def get_embeddings(texts, model="text-embedding-3-small"):
16
  """Convert text to embedding vectors."""
17
  if isinstance(texts, str):
18
  texts = [texts]
19
+
20
  response = client.embeddings.create(input=texts, model=model)
21
  return np.array([item.embedding for item in response.data])
22
+
23
  def vector_search(query, chunks, chunk_embeddings, top_k=3):
24
  """Find the most similar chunks to the query."""
25
  query_embedding = get_embeddings(query)
26
  similarities = cosine_similarity(query_embedding, chunk_embeddings)[0]
27
  top_indices = similarities.argsort()[::-1][:top_k]
28
+
29
  results = []
30
  for idx in top_indices:
31
  results.append({
 
33
  'similarity': similarities[idx]
34
  })
35
  return results
 
 
 
 
 
 
 
 
 
 
 
 
rag/example.py DELETED
@@ -1,27 +0,0 @@
1
- from embeddings import get_embeddings, cosine_similarity
2
- def vector_search(query, chunks, chunk_embeddings, top_k=3):
3
- """Find the most similar chunks to the query."""
4
- query_embedding = get_embeddings(query)
5
- similarities = cosine_similarity(query_embedding, chunk_embeddings)[0]
6
- top_indices = similarities.argsort()[::-1][:top_k]
7
-
8
- results = []
9
- for idx in top_indices:
10
- results.append({
11
- 'chunk': chunks[idx],
12
- 'similarity': similarities[idx]
13
- })
14
- return results
15
-
16
- documents = [
17
- "Python is a programming language",
18
- "Machine learning uses Python extensively",
19
- "Cats are popular pets",
20
- "Deep learning is a subset of machine learning"
21
- ]
22
-
23
- doc_embeddings = get_embeddings(documents)
24
-
25
- results = vector_search("Artificial Intelligence", documents, doc_embeddings, top_k=4)
26
- for r in results:
27
- print(f"{r['similarity']:.3f}: {r['chunk']}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
rag/example2.py DELETED
@@ -1,84 +0,0 @@
1
- from tavily import TavilyClient
2
- import os
3
- from dotenv import load_dotenv
4
- from chunking import fixed_length_chunking
5
- from embeddings import get_embeddings
6
- from sklearn.metrics.pairwise import cosine_similarity
7
- import numpy as np
8
-
9
-
10
- load_dotenv()
11
- tavily = TavilyClient(api_key=os.getenv("TAVILY_API_KEY"))
12
-
13
-
14
- response = tavily.search(
15
- "2025 Nobel Prize winners",
16
- max_results=10,
17
- include_raw_content=True
18
- )
19
-
20
- search_results = []
21
- for result in response['results']:
22
- if result.get('raw_content'):
23
- search_results.append({
24
- 'title': result['title'],
25
- 'content': result['raw_content'],
26
- 'url': result['url']
27
- })
28
- all_chunks = []
29
- for result in search_results:
30
- text = f"Title: {result['title']}\n{result['content']}"
31
- chunks = fixed_length_chunking(text, chunk_size=500, overlap=50)
32
- for chunk in chunks:
33
- all_chunks.append({
34
- 'text': chunk,
35
- 'title': result['title'],
36
- 'url': result['url']
37
- })
38
-
39
- print(f"Total chunks: {len(all_chunks)}")
40
-
41
- chunk_texts = [c['text'] for c in all_chunks]
42
- chunk_embeddings = get_embeddings(chunk_texts)
43
-
44
- def vector_search(query, chunks, chunk_embeddings, top_k=3):
45
- """Find the most similar chunks to the query."""
46
- query_embedding = get_embeddings(query)
47
- similarities = cosine_similarity(query_embedding, chunk_embeddings)[0]
48
- top_indices = similarities.argsort()[::-1][:top_k]
49
-
50
- results = []
51
- for idx in top_indices:
52
- results.append({
53
- 'chunk': chunks[idx],
54
- 'similarity': similarities[idx],
55
- 'title': all_chunks[idx]['title'],
56
- 'url': all_chunks[idx]['url']
57
- })
58
- return results
59
-
60
- query = "quantum computing"
61
- results = vector_search(query, chunk_texts, chunk_embeddings, top_k=3)
62
-
63
- print(f"Query: '{query}'\n")
64
- print("=" * 60)
65
- for i, r in enumerate(results, 1):
66
- print(f"\n[{i}] Similarity: {r['similarity']:.3f}")
67
- print(f"Title: {r['title']}")
68
- print(f"URL: {r['url']}")
69
- print(f"Chunk: {r['chunk'][:300]}...")
70
-
71
- total_tokens = 17000
72
- import tiktoken
73
-
74
- # token savings effect
75
-
76
- enc = tiktoken.get_encoding("cl100k_base") # Used by GPT-4, GPT-4-turbo, etc.
77
- # Alternative: enc = tiktoken.encoding_for_model("gpt-4") # If you want to use a specific model
78
- top_chunks = [r['chunk'] for r in results]
79
- selected_text = "\n\n".join(top_chunks)
80
- selected_tokens = len(enc.encode(selected_text))
81
-
82
- print(f"Total tokens: {total_tokens}")
83
- print(f"Selected tokens: {selected_tokens}")
84
- print(f"Savings rate: {(1 - selected_tokens/total_tokens)*100:.1f}%")