shekkari21 commited on
Commit
ec96f6b
·
1 Parent(s): 6a0d993

started AI agents from scratch

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .env.example +0 -1
  2. chapter_02_llm/01_llm_chat.py +0 -53
  3. chapter_02_llm/02_conversation_management.py +0 -53
  4. chapter_02_llm/03_structured_output.py +0 -23
  5. chapter_02_llm/04_asynchronous_llm_call.py +0 -31
  6. chapter_02_llm/05_potato_problem.py +0 -181
  7. chapter_03_tool_use/ch3_01_calculator_tool.py +0 -123
  8. chapter_03_tool_use/ch3_02_tavily_search_tool.py +0 -16
  9. chapter_03_tool_use/ch3_03_wikipedia_tool.py +0 -23
  10. chapter_03_tool_use/ch3_04_tool_definition.py +0 -72
  11. chapter_03_tool_use/ch3_05_tools_exercise.py +0 -113
  12. chapter_03_tool_use/ch3_06_tool_abstraction.py +0 -126
  13. chapter_03_tool_use/ch3_07_tool_decorator.py +0 -45
  14. chapter_03_tool_use/ch3_08_mcp_tavily_custom.py +0 -37
  15. chapter_04_basic_agent/01_/bsolve_kipchoge_problem.py +0 -30
  16. chapter_04_basic_agent/02_agent_structured_output.py +0 -33
  17. chapter_04_basic_agent/03_human_in_the_loop.py +0 -76
  18. chapter_06_memory/01_session_agent.py +0 -56
  19. chapter_06_memory/02_core_memory_strategy.py +0 -35
  20. chapter_06_memory/03_core_memory_update.py +0 -32
  21. chapter_06_memory/04_sliding_window.py +0 -43
  22. chapter_06_memory/05_summarization.py +0 -64
  23. chapter_06_memory/06_conversation_search.py +0 -59
  24. chapter_06_memory/07_task_long_term.py +0 -76
  25. chapter_06_memory/08_user_long_term.py +0 -98
  26. my_code.ipynb +926 -0
  27. pyproject.toml +4 -0
  28. scratch_agents/agents/__init__.py +0 -0
  29. scratch_agents/agents/execution_context_ch4.py +0 -27
  30. scratch_agents/agents/execution_context_ch6.py +0 -34
  31. scratch_agents/agents/tool_calling_agent_ch4_base.py +0 -116
  32. scratch_agents/agents/tool_calling_agent_ch4_callback.py +0 -203
  33. scratch_agents/agents/tool_calling_agent_ch4_structured_output.py +0 -146
  34. scratch_agents/agents/tool_calling_agent_ch6.py +0 -226
  35. scratch_agents/memory/base_memory_strategy.py +0 -13
  36. scratch_agents/memory/core_memory_strategy.py +0 -21
  37. scratch_agents/memory/sliding_window_strategy.py +0 -26
  38. scratch_agents/memory/summarization_strategy.py +0 -77
  39. scratch_agents/models/__init__.py +0 -0
  40. scratch_agents/models/base_llm.py +0 -12
  41. scratch_agents/models/llm_request.py +0 -19
  42. scratch_agents/models/llm_response.py +0 -10
  43. scratch_agents/models/openai.py +0 -174
  44. scratch_agents/sessions/base_cross_session_manager.py +0 -297
  45. scratch_agents/sessions/base_session_manager.py +0 -28
  46. scratch_agents/sessions/in_memory_session_manager.py +0 -30
  47. scratch_agents/sessions/session.py +0 -23
  48. scratch_agents/sessions/task_cross_session_manager.py +0 -194
  49. scratch_agents/sessions/user_cross_session_manager.py +0 -185
  50. scratch_agents/tools/__init__.py +0 -5
.env.example DELETED
@@ -1 +0,0 @@
1
- OPENAI_API_KEY=asdf
 
 
chapter_02_llm/01_llm_chat.py DELETED
@@ -1,53 +0,0 @@
1
- # Listing 2.1
2
- from openai import OpenAI
3
- from dotenv import load_dotenv
4
-
5
- load_dotenv()
6
-
7
- client = OpenAI()
8
-
9
- # Listing 2.2
10
- response = client.chat.completions.create(
11
- model="gpt-5-mini",
12
- messages=[
13
- {"role": "developer", "content": "You are a helpful assistant."},
14
- {"role": "user", "content": [{ "type": "text", "text": "Who's there?" }]}
15
- ]
16
- )
17
- print(response.choices[0].message.content)
18
-
19
- # Listing 2.3
20
- response = client.chat.completions.create(
21
- model="o4-mini",
22
- messages=[
23
- {"role": "developer", "content": "You are a helpful assistant."},
24
- {"role": "user", "content": "Who's there?"}
25
- ]
26
- )
27
- print(response.choices[0].message.content)
28
- print(f"Input tokens: {response.usage.prompt_tokens}")
29
- print(f"Output tokens: {response.usage.completion_tokens}")
30
- print(f"Reasoning tokens: {response.usage.completion_tokens_details.reasoning_tokens}")
31
-
32
- # Listing 2.4
33
- response = client.chat.completions.create(
34
- model="gpt-4o-mini",
35
- messages=[
36
- {"role": "developer", "content": "You are a helpful assistant."},
37
- {"role": "user", "content": "Who's there?"}
38
- ],
39
- stream=True,
40
- temperature=0.1,
41
- max_completion_tokens=200,
42
- logprobs=True
43
- )
44
- for chunk in response:
45
- print(chunk.choices[0].delta.content, end="", flush=True)
46
-
47
- # Listing 2.5
48
- response = client.responses.create(
49
- model="gpt-5-mini",
50
- input="Where is the capital of South Korea?",
51
- instructions="You are a helpful assistant."
52
- )
53
- print(response.output_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
chapter_02_llm/02_conversation_management.py DELETED
@@ -1,53 +0,0 @@
1
- from openai import OpenAI
2
- from dotenv import load_dotenv
3
-
4
- load_dotenv()
5
-
6
- client = OpenAI()
7
-
8
- # Listing 2.6
9
- response_1 = client.chat.completions.create(
10
- model='gpt-5-mini',
11
- messages=[{"role": "user", "content": "My name is Jungjun"}],
12
- )
13
- print(response_1.choices[0].message.content)
14
-
15
- response_2 = client.chat.completions.create(
16
- model='gpt-5-mini',
17
- messages=[{"role": "user", "content": "What is my name?"}],
18
- )
19
- print(response_2.choices[0].message.content)
20
-
21
- # Listing 2.7
22
- messages = []
23
- messages.append({"role": "user", "content": "My name is Jungjun"})
24
-
25
- response_3 = client.chat.completions.create(
26
- model='gpt-5-mini',
27
- messages=messages,
28
- )
29
- print(response_3.choices[0].message.content)
30
-
31
- messages.append({"role": "assistant", "content": response_3.choices[0].message.content})
32
- messages.append({"role": "user", "content": "What is my name?"})
33
-
34
- response_4 = client.chat.completions.create(
35
- model='gpt-5-mini',
36
- messages=messages,
37
- )
38
- print(response_4.choices[0].message.content)
39
-
40
- # Listing 2.8
41
- response = client.responses.create(
42
- model="gpt-5-mini",
43
- input="My name is Jungjun",
44
- )
45
- print(response.output_text)
46
-
47
- second_response = client.responses.create(
48
- model="gpt-5-mini",
49
- previous_response_id=response.id,
50
- input=[{"role": "user", "content": "What is my name?"}],
51
- )
52
- print(second_response.output_text)
53
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
chapter_02_llm/03_structured_output.py DELETED
@@ -1,23 +0,0 @@
1
- from pydantic import BaseModel
2
- from openai import OpenAI
3
- from dotenv import load_dotenv
4
-
5
- load_dotenv()
6
-
7
- client = OpenAI()
8
-
9
- # Listing 2.10
10
- class User(BaseModel):
11
- name: str
12
- email: str
13
-
14
- response = client.beta.chat.completions.parse(
15
- model='gpt-5-mini',
16
- messages=[{"role": "user", "content": """My name is John Smith,
17
- my phone number is (555) 123-4567,
18
- and my email is john.smith@example.com"""}],
19
- response_format=User,
20
- )
21
-
22
- print(type(response.choices[0].message.parsed))
23
- print(response.choices[0].message.parsed)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
chapter_02_llm/04_asynchronous_llm_call.py DELETED
@@ -1,31 +0,0 @@
1
- import asyncio
2
- from openai import AsyncOpenAI
3
- from dotenv import load_dotenv
4
-
5
- load_dotenv()
6
-
7
- client = AsyncOpenAI()
8
-
9
- # Listing 2.11
10
- async def get_answer(prompt): #A
11
- response = await client.chat.completions.create( #B
12
- model="gpt-5-mini",
13
- messages=[{"role": "user", "content": prompt}]
14
- )
15
- return response.choices[0].message.content
16
-
17
- async def main():
18
- prompts = [
19
- "Hello!",
20
- "What's 2 + 2?",
21
- "Tell me a short joke about cats."
22
- ]
23
-
24
- tasks = [get_answer(p) for p in prompts]
25
-
26
- results = await asyncio.gather(*tasks)
27
-
28
- for r in results:
29
- print(r)
30
-
31
- asyncio.run(main())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
chapter_02_llm/05_potato_problem.py DELETED
@@ -1,181 +0,0 @@
1
- from openai import AsyncOpenAI
2
- from dotenv import load_dotenv
3
- from pydantic import BaseModel
4
-
5
- load_dotenv()
6
-
7
- client = AsyncOpenAI()
8
-
9
- import asyncio
10
- import time
11
-
12
- class PotatoSolution(BaseModel):
13
- thought_process: str
14
- final_answer: str
15
-
16
- SYS_PROMPT = """You are a general AI assistant.
17
- I will ask you a question. Report your thoughts in "thought_process" and finish your answer in "final_answer".
18
- YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
19
- If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
20
- If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
21
- If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
22
- """
23
-
24
- FAMILY_REUNION_PROBLEM = """
25
- My family reunion is this week, and I was assigned the mashed potatoes to bring. The attendees include my married mother and father, my twin brother and his family, my aunt and her family, my grandma and her brother, her brother's daughter, and his daughter's family. All the adults but me have been married, and no one is divorced or remarried, but my grandpa and my grandma's sister-in-law passed away last year. All living spouses are attending. My brother has two children that are still kids, my aunt has one six-year-old, and my grandma's brother's daughter has three kids under 12. I figure each adult will eat about 1.5 potatoes of mashed potatoes and each kid will eat about 1/2 a potato of mashed potatoes, except my second cousins don't eat carbs. How many potatoes do I need in total? Just give the number.
26
- """
27
-
28
- EXPECTED_ANSWER = "18"
29
-
30
- async def get_llm_answer(
31
- client: AsyncOpenAI,
32
- model_name: str,
33
- prompt: str,
34
- result_format: type[BaseModel] | None = None
35
- ) -> tuple[str, str]:
36
- try:
37
- api_call_params = {
38
- "model": model_name,
39
- "messages": [
40
- {"role": "system", "content": SYS_PROMPT},
41
- {"role": "user", "content": prompt}
42
- ]
43
- }
44
-
45
- if model_name not in ["gpt-5", "gpt-5-mini"]:
46
- api_call_params["temperature"] = 0.5
47
-
48
- response = await client.beta.chat.completions.parse(
49
- **api_call_params,
50
- response_format=result_format
51
- )
52
- parsed_object = response.choices[0].message.parsed
53
- return parsed_object.thought_process, parsed_object.final_answer
54
-
55
- except Exception as e:
56
- print(f"Error during LLM API call for model {model_name}: {e}")
57
- return "", ""
58
-
59
-
60
- async def run_problem_test(
61
- local_client: AsyncOpenAI,
62
- model_name: str,
63
- prompt_name: str,
64
- prompt_content: str,
65
- num_tests: int,
66
- expected_answer: str
67
- ) -> tuple[int, float]:
68
- """
69
- Asynchronously runs the math problem test N times for the specified prompt
70
- and returns the number of successful answers and total execution time.
71
- """
72
- print(f"\n--- Testing '{prompt_name}' prompt strategy ({num_tests} repetitions) ---")
73
-
74
- tasks = [
75
- get_llm_answer(local_client, model_name, prompt_content, result_format=PotatoSolution)
76
- for _ in range(num_tests)
77
- ]
78
-
79
- llm_responses = await asyncio.gather(*tasks)
80
-
81
- correct_answers = 0
82
- for i, (_, final_answer) in enumerate(llm_responses):
83
- if final_answer == expected_answer:
84
- correct_answers += 1
85
-
86
- print(f"'{prompt_name}' test completed: {correct_answers}/{num_tests} correct (Success rate: {correct_answers/num_tests*100:.2f}%)")
87
- return correct_answers
88
-
89
- async def test_model_with_all_strategies(model_name: str, number_of_runs: int):
90
- """
91
- Test a single model with all prompt strategies in parallel.
92
- Returns results for the model.
93
- """
94
- print(f"\n======================================================================")
95
- print(f"Testing Model: {model_name}")
96
- print(f"======================================================================")
97
-
98
- # Define all prompts
99
- prompts = {
100
- "Baseline (Zero-shot)": FAMILY_REUNION_PROBLEM,
101
-
102
- "Few-shot": f"""
103
- Here's an example of how to solve a similar family calculation problem:
104
- <example>
105
- Question: "I'm hosting a birthday party. Attendees include me, my parents, my sister and her husband, and my uncle with his two teenage children. Each adult will eat 2 slices of pizza and each child will eat 1 slice. How many pizza slices do I need?"
106
- Answer: 14
107
- </example>
108
- Now solve this problem:
109
-
110
- {FAMILY_REUNION_PROBLEM}
111
- """,
112
-
113
- "Role-based": f"""
114
- You are a family event planning specialist with expertise in calculating food quantities for family gatherings. You excel at parsing complex family relationships and determining accurate serving quantities based on different demographics and dietary preferences.
115
- Using your expertise, please solve this problem:
116
-
117
-
118
- {FAMILY_REUNION_PROBLEM}
119
- """,
120
-
121
- "Chain-of-Thought (Guided)": f"""
122
- {FAMILY_REUNION_PROBLEM}
123
-
124
- Let's solve this step by step:
125
- 1. First, identify all family members attending:
126
- - List each person and their relationship to you
127
- - Account for spouses of married individuals
128
- - Note any deceased family members who won't be attending
129
- 2. Categorize attendees by age group:
130
- - Count total adults
131
- - Count total children
132
- - Note any special dietary restrictions
133
- 3. Apply consumption rules:
134
- - Calculate potatoes needed for adults
135
- - Calculate potatoes needed for children
136
- - Adjust for any dietary restrictions
137
- 4. Sum the total number of potatoes needed Please work through each step carefully.
138
-
139
- """,
140
-
141
- "Simple Chain-of-Thought": f"""
142
- {FAMILY_REUNION_PROBLEM}
143
-
144
- Think step by step and give the answer.
145
- """
146
- }
147
-
148
- # Run all strategies in parallel
149
- tasks = [
150
- run_problem_test(client, model_name, prompt_name, prompt_content, number_of_runs, EXPECTED_ANSWER)
151
- for prompt_name, prompt_content in prompts.items()
152
- ]
153
-
154
- results = await asyncio.gather(*tasks)
155
- return results
156
-
157
- async def main():
158
- """
159
- Tests the family reunion problem using various prompt engineering techniques
160
- across multiple LLM models.
161
- """
162
- models_to_test = ["gpt-4.1", "gpt-4.1-mini", "gpt-5", "gpt-5-mini"]
163
- number_of_runs = 10
164
-
165
- print(f"Starting family reunion problem test ({number_of_runs} runs per prompt per model)")
166
- print(f"Problem: Calculate how many bags of potatoes needed for family reunion")
167
- print(f"Expected answer: '{EXPECTED_ANSWER}'")
168
-
169
- overall_start = time.time()
170
-
171
- # Test each model sequentially (could also parallelize this)
172
- for model_name in models_to_test:
173
- await test_model_with_all_strategies(model_name, number_of_runs)
174
-
175
- overall_end = time.time()
176
- print(f"\n======================================================================")
177
- print(f"All tests completed in {overall_end - overall_start:.2f} seconds")
178
-
179
-
180
- if __name__ == "__main__":
181
- asyncio.run(main())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
chapter_03_tool_use/ch3_01_calculator_tool.py DELETED
@@ -1,123 +0,0 @@
1
- import json
2
- from openai import OpenAI
3
- from dotenv import load_dotenv
4
-
5
- load_dotenv()
6
-
7
- client = OpenAI()
8
-
9
- # Listing 3.1
10
- calculator_tool_definition = {
11
- "type": "function",
12
- "function": {
13
- "name": "calculator",
14
- "description": "Perform basic arithmetic operations between two numbers.",
15
- "parameters": {
16
- "type": "object",
17
- "properties": {
18
- "operator": {
19
- "type": "string",
20
- "description": "Arithmetic operation to perform",
21
- "enum": ["add", "subtract", "multiply", "divide"]
22
- },
23
- "first_number": {
24
- "type": "number",
25
- "description": "First number for the calculation"
26
- },
27
- "second_number": {
28
- "type": "number",
29
- "description": "Second number for the calculation"
30
- }
31
- },
32
- "required": ["operator", "first_number", "second_number"],
33
- }
34
- }
35
- }
36
-
37
- # Listing 3.2
38
- def calculator(operator: str, first_number: float, second_number: float) -> float:
39
- if operator == 'add':
40
- return first_number + second_number
41
- elif operator == 'subtract':
42
- return first_number - second_number
43
- elif operator == 'multiply':
44
- return first_number * second_number
45
- elif operator == 'divide':
46
- if second_number == 0:
47
- raise ValueError("Cannot divide by zero")
48
- return first_number / second_number
49
- else:
50
- raise ValueError(f"Unsupported operator: {operator}")
51
-
52
- if __name__ == "__main__":
53
- # Listing 3.3
54
- tools = [calculator_tool_definition]
55
-
56
- response_without_tool = client.chat.completions.create(
57
- model='gpt-5-mini',
58
- messages=[{"role": "user", "content": "What is the capital of South Korea?"}],
59
- tools=tools
60
- )
61
- print(response_without_tool.choices[0].message.content) # The capital of South Korea is Seoul.
62
- print(response_without_tool.choices[0].message.tool_calls) # None
63
-
64
- response_with_tool = client.chat.completions.create(
65
- model='gpt-5-mini',
66
- messages=[{"role": "user", "content": "What is 1234 x 5678?"}],
67
- tools=tools
68
- )
69
- print(response_with_tool.choices[0].message.content) # None
70
- print(response_with_tool.choices[0].message.tool_calls)
71
- # [ChatCompletionMessageFunctionToolCall(id='call_viaOEiQJ5VEB9YvKl95qlDjM', function=Function(arguments='{"operator":"multiply","first_number":1234,"second_number":5678}', name='calculator'), type='function')]
72
-
73
- # Listing 3.4
74
- ai_message = response_with_tool.choices[0].message
75
-
76
- if ai_message.tool_calls:
77
- for tool_call in ai_message.tool_calls:
78
- function_name = tool_call.function.name
79
- function_args = json.loads(tool_call.function.arguments)
80
-
81
- if function_name == "calculator":
82
- result = calculator(**function_args)
83
- print("calculator result:", result)
84
-
85
- # Listing 3.5
86
- messages = []
87
- messages.append({"role": "user", "content": "What is 1234 x 5678?"})
88
-
89
- response_with_tool = client.chat.completions.create(
90
- model='gpt-5-mini',
91
- messages=messages,
92
- tools=tools
93
- )
94
-
95
- ai_message = response_with_tool.choices[0].message
96
-
97
- messages.append({
98
- "role": "assistant",
99
- "content": ai_message.content,
100
- "tool_calls": ai_message.tool_calls
101
- })
102
-
103
- if ai_message.tool_calls:
104
- for tool_call in ai_message.tool_calls:
105
- function_name = tool_call.function.name
106
- function_args = json.loads(tool_call.function.arguments)
107
-
108
- if function_name == "calculator":
109
- result = calculator(**function_args)
110
-
111
- messages.append({
112
- "role": "tool",
113
- "tool_call_id": tool_call.id,
114
- "content": str(result)
115
- })
116
-
117
- final_response = client.chat.completions.create(
118
- model='gpt-5-mini',
119
- messages=messages
120
- )
121
- print("Messages:", messages)
122
- print("Final Answer:", final_response.choices[0].message.content)
123
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
chapter_03_tool_use/ch3_02_tavily_search_tool.py DELETED
@@ -1,16 +0,0 @@
1
- # Listing 3.13
2
- import os
3
- from tavily import TavilyClient
4
- from dotenv import load_dotenv
5
-
6
- load_dotenv()
7
-
8
- tavily_client = TavilyClient(os.getenv("TAVILY_API_KEY"))
9
-
10
- def search_web(query: str) -> str:
11
- """Search the web for the given query."""
12
- response = tavily_client.search(query, max_results=2, chunks_per_source=2)
13
- return response.get("results")
14
-
15
- # Listing 3.14
16
- print(search_web("Kipchoge's marathon world record"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
chapter_03_tool_use/ch3_03_wikipedia_tool.py DELETED
@@ -1,23 +0,0 @@
1
- import wikipedia
2
-
3
- # Listing 3.16
4
- def search_wikipedia(query:str) -> list[str]:
5
- """Search Wikipedia for a query and return titles of wikipedia pages"""
6
- search_results = wikipedia.search(query)
7
- return search_results
8
-
9
- def get_wikipedia_page(title:str) -> str:
10
- """Get a wikipedia page by title"""
11
- page = wikipedia.page(title, auto_suggest=False)
12
- return page.content
13
-
14
-
15
- if __name__ == "__main__":
16
- # Listing 3.15
17
- search_results = wikipedia.search("moon")
18
- print("search_results:")
19
- print(search_results)
20
-
21
- page = wikipedia.page("Moon", auto_suggest=False)
22
- print("page content:")
23
- print(page.content[:100])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
chapter_03_tool_use/ch3_04_tool_definition.py DELETED
@@ -1,72 +0,0 @@
1
- # Listing 3.17
2
- import inspect
3
- from ch3_02_tavily_search_tool import search_web
4
-
5
- def example_tool(input_1:str, input_2:int=1):
6
- """docstring for example_tool"""
7
- return
8
-
9
- print(f"function name: {example_tool.__name__}")
10
- print(f"function docstring: {example_tool.__doc__}")
11
- print(f"function signature: {inspect.signature(example_tool)}")
12
-
13
- # Listing 3.18
14
- def function_to_input_schema(func) -> dict:
15
- type_map = {
16
- str: "string",
17
- int: "integer",
18
- float: "number",
19
- bool: "boolean",
20
- list: "array",
21
- dict: "object",
22
- type(None): "null",
23
- }
24
-
25
- try:
26
- signature = inspect.signature(func)
27
- except ValueError as e:
28
- raise ValueError(
29
- f"Failed to get signature for function {func.__name__}: {str(e)}"
30
- )
31
-
32
- parameters = {}
33
- for param in signature.parameters.values():
34
- try:
35
- param_type = type_map.get(param.annotation, "string")
36
- except KeyError as e:
37
- raise KeyError(
38
- f"Unknown type annotation {param.annotation} for parameter {param.name}: {str(e)}"
39
- )
40
- parameters[param.name] = {"type": param_type}
41
-
42
- required = [
43
- param.name
44
- for param in signature.parameters.values()
45
- if param.default == inspect._empty
46
- ]
47
-
48
- return {
49
- "type": "object",
50
- "properties": parameters,
51
- "required": required,
52
- }
53
-
54
- # Listing 3.19
55
- def format_tool_definition(name: str, description: str, parameters: dict) -> dict:
56
- return {
57
- "type": "function",
58
- "function": {
59
- "name": name,
60
- "description": description,
61
- "parameters": parameters,
62
- },
63
- }
64
-
65
- def function_to_tool_definition(func) -> dict:
66
- return format_tool_definition(
67
- func.__name__,
68
- func.__doc__ or "",
69
- function_to_input_schema(func)
70
- )
71
-
72
- print(function_to_input_schema(search_web))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
chapter_03_tool_use/ch3_05_tools_exercise.py DELETED
@@ -1,113 +0,0 @@
1
- import json
2
- from openai import OpenAI
3
- from dotenv import load_dotenv
4
- from ch3_01_calculator_tool import calculator
5
- from ch3_02_tavily_search_tool import search_web
6
- from ch3_03_wikipedia_tool import search_wikipedia, get_wikipedia_page
7
- from ch3_04_tool_definition import function_to_tool_definition
8
-
9
- load_dotenv()
10
-
11
- client = OpenAI()
12
-
13
- system_prompt = "You are a helpful assistant. calculator has only 4 operations: add, subtract, multiply, divide"
14
-
15
- tools = [calculator, search_web, search_wikipedia, get_wikipedia_page]
16
- tool_box = {tool.__name__: tool for tool in tools}
17
- tool_definitions = [function_to_tool_definition(tool) for tool in tools]
18
-
19
- # Listing 3.20
20
- def tool_execution(tool_box, tool_call):
21
- function_name = tool_call.function.name
22
- function_args = json.loads(tool_call.function.arguments)
23
-
24
- tool_result = tool_box[function_name](**function_args)
25
- return tool_result
26
-
27
- # Listing 3.21
28
- def run_step(system_prompt, question):
29
- messages = [
30
- {"role": "system", "content": system_prompt},
31
- {"role": "user", "content": question}
32
- ]
33
-
34
- while True:
35
- response = client.chat.completions.create(
36
- model="gpt-5-mini",
37
- messages=messages,
38
- tools=tool_definitions
39
- )
40
-
41
- assistant_message = response.choices[0].message
42
-
43
- if assistant_message.tool_calls:
44
- messages.append(assistant_message)
45
- for tool_call in assistant_message.tool_calls:
46
- tool_result = tool_execution(tool_box, tool_call)
47
- messages.append({
48
- "role": "tool",
49
- "content": str(tool_result),
50
- "tool_call_id": tool_call.id
51
- })
52
- else:
53
- return assistant_message.content
54
-
55
- # Listing 3.22
56
- def step_1_search_kipchoge():
57
- question = """I need to find Eliud Kipchoge's record-making marathon pace.
58
- Please search for information about his world record marathon time and
59
- calculate his pace per kilometer.
60
-
61
- FINAL ANSWER should be in the format: "X.XX minutes per km"."""
62
-
63
- result = run_step(system_prompt, question)
64
- return result
65
-
66
- kipchoge_result = step_1_search_kipchoge()
67
- print(f"Step 1 Complete - Kipchoge pace: {kipchoge_result}")
68
-
69
- # Listing 3.23
70
- def step_2_search_moon_distance():
71
- question = """I need to find the minimum perigee value (closest approach
72
- distance) between Earth and Moon from the Wikipedia page for the Moon.
73
- Please search for this information.
74
-
75
- FINAL ANSWER should be in the format: "X km"."""
76
-
77
- result = run_step(system_prompt, question)
78
- return result
79
-
80
- moon_result = step_2_search_moon_distance()
81
- print(f"Step 2 Complete - Moon distance: {moon_result}")
82
-
83
- # Listing 3.24
84
- def step_3_calculate(kipchoge_pace, moon_distance):
85
- question = f"""Given the following information:
86
- - Kipchoge's pace: {kipchoge_pace}
87
- - Moon distance: {moon_distance}
88
-
89
- Please calculate how many hours it would take Kipchoge to run this distance
90
- at his record pace. Make sure to handle unit conversions properly.
91
-
92
- FINAL ANSWER should be in the format: "X hours"."""
93
-
94
- result = run_step(system_prompt, question)
95
- return result
96
-
97
- time_result = step_3_calculate(kipchoge_result, moon_result)
98
- print(f"Step 3 Complete - Time needed: {time_result}")
99
-
100
- # Listing 3.25
101
- def step_4_final_answer(total_hours):
102
- question = f"""Given that the total time is {total_hours}, I need to round
103
- this to the nearest 1000 hours and express the answer in thousand hours.
104
-
105
- The original question asks for the result rounded to the nearest 1000 hours.
106
-
107
- FINAL ANSWER should be just the number (in thousand hours)."""
108
-
109
- result = run_step(system_prompt, question)
110
- return result
111
-
112
- final_result = step_4_final_answer(time_result)
113
- print(f"Step 4 Complete - Final answer: {final_result}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
chapter_03_tool_use/ch3_06_tool_abstraction.py DELETED
@@ -1,126 +0,0 @@
1
- from abc import ABC, abstractmethod
2
- from typing import Any, Dict, Optional, Union, Type, Callable
3
- import asyncio
4
- import inspect
5
- import json
6
- from ch3_04_tool_definition import format_tool_definition, function_to_input_schema
7
-
8
- # Listing 3.27
9
- class BaseTool(ABC):
10
-
11
- def __init__(
12
- self,
13
- name: str = None,
14
- description: str = None,
15
- tool_definition: Union[Dict[str, Any], str] = None,
16
- pydantic_input_model: Type = None
17
- ):
18
- self.name = name or self.__class__.__name__
19
- self.description = description or self.__doc__ or ""
20
- self.pydantic_input_model = pydantic_input_model
21
-
22
- if isinstance(tool_definition, str):
23
- self._tool_definition = json.loads(tool_definition)
24
- elif tool_definition is not None:
25
- self._tool_definition = tool_definition
26
- else:
27
- self._tool_definition = None # Generate later
28
-
29
- # Listing 3.28
30
- @property
31
- def tool_definition(self) -> Dict[str, Any]:
32
- if self._tool_definition is None: #A
33
- self._tool_definition = self._generate_definition() #A
34
- return self._tool_definition
35
-
36
- def _generate_definition(self) -> Dict[str, Any]:
37
- if self.pydantic_input_model:
38
- try:
39
- from pydantic import BaseModel
40
- if issubclass(self.pydantic_input_model, BaseModel):
41
- parameters = self.pydantic_input_model.model_json_schema()
42
- return format_tool_definition(
43
- self.name, self.description, parameters
44
- )
45
- except ImportError:
46
- pass
47
- # Subclasses should override this method or provide tool_definition
48
- raise NotImplementedError(
49
- f"{self.__class__.__name__} must either provide a tool_definition, "
50
- f"pydantic_input_model, or override _generate_definition()"
51
- )
52
-
53
- # Listing 3.29
54
- class FunctionTool(BaseTool):
55
-
56
- def __init__(
57
- self,
58
- func: Callable,
59
- name: str = None,
60
- description: str = None,
61
- tool_definition: Union[Dict[str, Any], str] = None
62
- ):
63
- self.func = func
64
- self.pydantic_input_model = self._detect_pydantic_model(func) #A
65
-
66
- name = name or func.__name__ #B
67
- description = description or (func.__doc__ or "").strip() #B
68
-
69
- super().__init__(
70
- name=name,
71
- description=description,
72
- tool_definition=tool_definition,
73
- pydantic_input_model=self.pydantic_input_model
74
- )
75
-
76
- # Listing 3.30
77
- async def execute(self, **kwargs) -> Any:
78
- if self.pydantic_input_model:
79
- args = (self.pydantic_input_model.model_validate(kwargs),)
80
- call_kwargs = {}
81
- else:
82
- args = ()
83
- call_kwargs = kwargs
84
-
85
- if inspect.iscoroutinefunction(self.func):
86
- return await self.func(*args, **call_kwargs)
87
- else:
88
- loop = asyncio.get_event_loop()
89
- return await loop.run_in_executor(
90
- None, lambda: self.func(*args, **call_kwargs)
91
- )
92
-
93
- # Listing 3.31
94
- def _generate_definition(self) -> Dict[str, Any]:
95
- if self.pydantic_input_model:
96
- return super()._generate_definition()
97
-
98
- parameters = function_to_input_schema(self.func)
99
- return format_tool_definition(self.name, self.description, parameters)
100
-
101
- # Listing 3.32
102
- def _detect_pydantic_model(self, func: Callable) -> Optional[Type]:
103
- try:
104
- from pydantic import BaseModel
105
- sig = inspect.signature(func)
106
- params = list(sig.parameters.values())
107
-
108
- if len(params) == 1 and params[0].annotation != inspect._empty:
109
- param_type = params[0].annotation
110
- if isinstance(param_type, type) and issubclass(param_type, BaseModel):
111
- return param_type
112
- except ImportError:
113
- pass
114
- return None
115
-
116
- if __name__ == "__main__":
117
- def search_web(query: str) -> str:
118
- """Search for information on the web"""
119
- # Actual search logic
120
- return f"Search results: {query}"
121
-
122
- search_tool = FunctionTool(search_web)
123
-
124
- print(type(search_tool))
125
- print(search_tool.description)
126
- print(search_tool.tool_definition)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
chapter_03_tool_use/ch3_07_tool_decorator.py DELETED
@@ -1,45 +0,0 @@
1
- from typing import Callable, Union, Dict, Any
2
- from ch3_06_tool_abstraction import FunctionTool
3
-
4
- def tool(
5
- func: Callable = None,
6
- *,
7
- name: str = None,
8
- description: str = None,
9
- tool_definition: Union[Dict[str, Any], str] = None
10
- ) -> Union[Callable, FunctionTool]:
11
-
12
- def decorator(f: Callable) -> FunctionTool:
13
- return FunctionTool(
14
- func=f,
15
- name=name,
16
- description=description,
17
- tool_definition=tool_definition
18
- )
19
-
20
- # Handle both @tool and @tool() usage
21
- if func is not None:
22
- return decorator(func)
23
- return decorator
24
-
25
- if __name__ == "__main__":
26
- def search_web(query: str) -> str:
27
- """Search for information on the web"""
28
- return f"{query}_result"
29
-
30
- search_tool_v1 = FunctionTool(search_web)
31
-
32
- @tool
33
- def search_web(query: str) -> str:
34
- """Search for information on the web"""
35
- return f"{query}_result"
36
-
37
- @tool(name="internet_search",
38
- description="Query the internet for latest information")
39
- def search_web_custom(query: str) -> str:
40
- """Search for information on the web"""
41
- return f"{query}_result"
42
-
43
- print(search_tool_v1.tool_definition)
44
- print(search_web.tool_definition)
45
- print(search_web_custom.tool_definition)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
chapter_03_tool_use/ch3_08_mcp_tavily_custom.py DELETED
@@ -1,37 +0,0 @@
1
- import os
2
- from tavily import TavilyClient
3
- from dotenv import load_dotenv
4
- from mcp.server.fastmcp import FastMCP
5
-
6
- load_dotenv()
7
-
8
- tavily_client = TavilyClient(os.getenv("TAVILY_API_KEY"))
9
-
10
- mcp = FastMCP("tavily-search")
11
-
12
- @mcp.tool()
13
- def search_web(query: str, max_results: int = 5) -> str:
14
- """
15
- Search the web using Tavily API.
16
-
17
- Args:
18
- query: Search query string
19
- max_results: Maximum number of results to return (default: 5)
20
-
21
- Returns:
22
- Search results as formatted string
23
- """
24
- try:
25
- response = tavily_client.search(
26
- query,
27
- max_results=max_results,
28
- chunks_per_source=2
29
- )
30
-
31
- return "\n".join(response.get("results"))
32
-
33
- except Exception as e:
34
- return f"Error searching web: {str(e)}"
35
-
36
- if __name__ == "__main__":
37
- mcp.run(transport='stdio')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
chapter_04_basic_agent/01_/bsolve_kipchoge_problem.py DELETED
@@ -1,30 +0,0 @@
1
- import asyncio
2
- from dotenv import load_dotenv
3
- load_dotenv()
4
-
5
- from scratch_agents.tools import calculator, search_web, search_wikipedia, get_wikipedia_page
6
- from scratch_agents.models.openai import OpenAILlm
7
- from scratch_agents.agents.tool_calling_agent_ch4_base import ToolCallingAgent
8
-
9
- gaia_system_prompt = """
10
- You are a general AI assistant.
11
- I will ask you a question.
12
- YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
13
- If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
14
- If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
15
- If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
16
- """
17
-
18
- kipchoge_problem = """
19
- If Eliud Kipchoge could maintain his record-making marathon pace indefinitely, how many thousand hours would it take him to run the distance between the Earth and the Moon at its closest approach? Please use the minimum perigee value on the Wikipedia page for the Moon when carrying out your calculation. Round your result to the nearest integer.
20
- """
21
-
22
- async def main():
23
- tools = [search_web, calculator, search_wikipedia, get_wikipedia_page]
24
- model = OpenAILlm(model="gpt-5")
25
- agent = ToolCallingAgent(model=model, tools=tools, instruction=gaia_system_prompt, max_steps=20)
26
- result, context = await agent.run(kipchoge_problem, return_context=True)
27
- print(result)
28
-
29
- if __name__ == "__main__":
30
- asyncio.run(main())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
chapter_04_basic_agent/02_agent_structured_output.py DELETED
@@ -1,33 +0,0 @@
1
- import asyncio
2
- from typing import Optional, Literal, List
3
- from pydantic import BaseModel, Field
4
- from dotenv import load_dotenv
5
- from scratch_agents.agents.tool_calling_agent_ch4_structured_output import ToolCallingAgent
6
- from scratch_agents.models.openai import OpenAILlm
7
-
8
- load_dotenv()
9
-
10
-
11
- async def main():
12
- # Initialize LLM (ensure OPENAI_API_KEY is set in your environment)
13
- llm = OpenAILlm(model="gpt-5-mini")
14
-
15
- class SentimentAnalysis(BaseModel):
16
- sentiment: Literal["positive", "negative", "neutral"]
17
- confidence: float
18
- key_phrases: List[str]
19
-
20
- agent = ToolCallingAgent(
21
- name="sentiment_analyzer",
22
- model=llm,
23
- tools=[], # Could include tools for data retrieval
24
- instructions="Analyze the sentiment of the provided text.",
25
- output_type=SentimentAnalysis
26
- )
27
-
28
- result = await agent.run("This product exceeded my expectations! Highly recommend.")
29
- # result is now a SentimentAnalysis instance with validated fields
30
- print(f"Sentiment: {result.sentiment} (confidence: {result.confidence})")
31
-
32
- if __name__ == "__main__":
33
- asyncio.run(main())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
chapter_04_basic_agent/03_human_in_the_loop.py DELETED
@@ -1,76 +0,0 @@
1
- import asyncio
2
- from scratch_agents.tools import search_web, calculator, search_wikipedia, get_wikipedia_page
3
- from scratch_agents.models.openai import OpenAILlm
4
- from scratch_agents.agents.tool_calling_agent_ch4_callback import ToolCallingAgent
5
- from dotenv import load_dotenv
6
-
7
- load_dotenv()
8
-
9
- gaia_system_prompt = """
10
- You are a general AI assistant.
11
- I will ask you a question.
12
- YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
13
- If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
14
- If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
15
- If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
16
- """
17
-
18
- kipchoge_problem = """
19
- If Eliud Kipchoge could maintain his record-making marathon pace indefinitely, how many thousand hours would it take him to run the distance between the Earth and the Moon at its closest approach? Please use the minimum perigee value on the Wikipedia page for the Moon when carrying out your calculation. Round your result to the nearest integer.
20
- """
21
-
22
- def basic_approval_callback(context, tool_call):
23
- tool_name = tool_call.name
24
- print(f"\n🔧 Tool Execution Request")
25
- print(f"Tool: {tool_call.name}")
26
- print(f"Arguments: {tool_call.arguments}")
27
-
28
- response = input("Execute this tool? (y/n): ").lower().strip()
29
-
30
- if response == 'y':
31
- print("✅ Approved. Executing...\n")
32
- return None
33
- else:
34
- print("❌ Denied. Skipping execution.\n")
35
- return f"User denied execution of {tool_name}"
36
-
37
-
38
- def session_aware_approval_callback(context, tool_call):
39
- tool_name = tool_call.name
40
- # Check if tool is already marked as safe in this session
41
- safe_tools = context.state.get('safe_tools', [])
42
- if tool_name in safe_tools:
43
- print(f"✓ Auto-executing {tool_name} (marked as safe)")
44
- return None
45
-
46
- response = input("Execute this tool? (y to run once, ya to allow for session, n to skip): ").lower().strip()
47
-
48
- if response == 'y':
49
- print("✅ Approved. Executing...\n")
50
- return None
51
- elif response == 'ya':
52
- if 'safe_tools' not in context.state:
53
- context.state['safe_tools'] = []
54
- context.state['safe_tools'].append(tool_name)
55
- print(f"✅ {tool_name} marked as safe for this session. Executing...\n")
56
- return None
57
- else:
58
- print("❌ Denied. Skipping execution.\n")
59
- return f"User denied execution of {tool_name}"
60
-
61
-
62
- async def main():
63
- tools = [search_web, calculator, search_wikipedia, get_wikipedia_page]
64
- model = OpenAILlm(model="gpt-5-mini")
65
- agent = ToolCallingAgent(
66
- name="callback_agent",
67
- model=model,
68
- tools=tools,
69
- instructions=gaia_system_prompt,
70
- before_tool_callbacks=[basic_approval_callback]
71
- )
72
- result = await agent.run(kipchoge_problem)
73
- print(result)
74
-
75
- if __name__ == "__main__":
76
- asyncio.run(main())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
chapter_06_memory/01_session_agent.py DELETED
@@ -1,56 +0,0 @@
1
- import asyncio
2
- from scratch_agents.models.openai import OpenAILlm
3
- from scratch_agents.agents.tool_calling_agent_ch6 import ToolCallingAgent
4
- from scratch_agents.tools import calculator, search_web
5
- from scratch_agents.sessions.in_memory_session_manager import InMemorySessionManager
6
- from dotenv import load_dotenv
7
- import os
8
-
9
- load_dotenv()
10
-
11
-
12
- async def main():
13
- """Demonstrate session memory functionality"""
14
- user_id = "test_123"
15
- # Initialize components
16
- model = OpenAILlm(model='gpt-5-mini')
17
- tools = [calculator, search_web]
18
-
19
- # Create agent with session manager
20
- agent = ToolCallingAgent(
21
- name="session_assistant",
22
- model=model,
23
- instructions="You are a helpful assistant that remembers our conversations.",
24
- tools=tools,
25
- session_manager=InMemorySessionManager()
26
- )
27
-
28
- # First interaction - session 1
29
- print("=== First Interaction (Session 1) ===")
30
- answer1 = await agent.run(
31
- "My name is Alice and I'm working on Project Alpha. What's 123 * 456?",
32
- session_id="session_1",
33
- user_id=user_id
34
- )
35
- print(f"Assistant: {answer1}\n")
36
-
37
- # Second interaction - continue session 1
38
- print("=== Second Interaction (Session 1) ===")
39
- answer2 = await agent.run(
40
- "What project am I working on and what was the result of the multiplication I asked about?",
41
- session_id="session_1",
42
- user_id=user_id
43
- )
44
- print(f"Assistant: {answer2}\n")
45
-
46
- # New session - session 2
47
- print("=== New Session (Session 2) ===")
48
- answer3 = await agent.run(
49
- "Do you remember my name?",
50
- session_id="session_2",
51
- user_id=user_id
52
- )
53
- print(f"Assistant: {answer3}\n")
54
-
55
- if __name__ == "__main__":
56
- asyncio.run(main())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
chapter_06_memory/02_core_memory_strategy.py DELETED
@@ -1,35 +0,0 @@
1
- import asyncio
2
- from scratch_agents.agents.tool_calling_agent_ch6 import ToolCallingAgent
3
- from scratch_agents.models.openai import OpenAILlm
4
- from scratch_agents.sessions.in_memory_session_manager import InMemorySessionManager
5
- from scratch_agents.memory.core_memory_strategy import CoreMemoryStrategy
6
- from dotenv import load_dotenv
7
- import os
8
-
9
- load_dotenv()
10
-
11
-
12
- async def test_core_memory_loading():
13
- user_id = "test_123"
14
- session_id = "test_session"
15
- session_manager = InMemorySessionManager()
16
- session = session_manager.get_or_create_session(session_id, user_id)
17
- session.core_memory["user"] = "User's name is Alice"
18
-
19
- agent = ToolCallingAgent(
20
- name="memory_agent",
21
- model=OpenAILlm(model="gpt-5-mini"),
22
- instructions="You are a helpful assistant",
23
- session_manager=session_manager,
24
- before_llm_callbacks=[CoreMemoryStrategy()]
25
- )
26
-
27
- response = await agent.run(
28
- "What's my name?",
29
- session_id=session_id,
30
- user_id=user_id
31
- )
32
-
33
- print(response)
34
-
35
- asyncio.run(test_core_memory_loading())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
chapter_06_memory/03_core_memory_update.py DELETED
@@ -1,32 +0,0 @@
1
- from scratch_agents.agents.tool_calling_agent_ch6 import ToolCallingAgent
2
- from scratch_agents.models.openai import OpenAILlm
3
- from scratch_agents.sessions.in_memory_session_manager import InMemorySessionManager
4
- from scratch_agents.tools.core_memory_upsert import core_memory_upsert
5
- import asyncio
6
- from dotenv import load_dotenv
7
- import os
8
-
9
- load_dotenv()
10
-
11
- user_id = "test_123"
12
- session_id = "test_session"
13
-
14
- async def test_automatic_memory_update():
15
- agent = ToolCallingAgent(
16
- name="learning_agent",
17
- model=OpenAILlm(model="gpt-5-mini"),
18
- instructions="Remember important user info with core_memory_upsert",
19
- tools=[core_memory_upsert],
20
- session_manager=InMemorySessionManager(),
21
- )
22
-
23
- await agent.run(
24
- "Hi! My name is Alice and I work as a data scientist.",
25
- session_id=session_id,
26
- user_id=user_id
27
- )
28
-
29
- session = agent.session_manager.get_session(session_id)
30
- print(session.core_memory['user'])
31
-
32
- asyncio.run(test_automatic_memory_update())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
chapter_06_memory/04_sliding_window.py DELETED
@@ -1,43 +0,0 @@
1
- import asyncio
2
- from scratch_agents.agents.tool_calling_agent_ch6 import ToolCallingAgent
3
- from scratch_agents.models.openai import OpenAILlm
4
- from scratch_agents.sessions.in_memory_session_manager import InMemorySessionManager
5
- from scratch_agents.memory.sliding_window_strategy import SlidingWindowStrategy
6
- from scratch_agents.types.contents import Message
7
- from scratch_agents.types.events import Event
8
- from dotenv import load_dotenv
9
- import os
10
-
11
- load_dotenv()
12
-
13
- user_id = "test_123"
14
- session_id = "test_session"
15
-
16
- async def test_sliding_window():
17
-
18
- session_manager = InMemorySessionManager()
19
- session = session_manager.create_session(session_id, user_id)
20
-
21
- session.events.append(Event(
22
- execution_id="exec1",
23
- author="user",
24
- content=[Message(role="user", content="My name is Alice"),
25
- Message(role="user", content="I live in Korea")]
26
- ))
27
-
28
- agent = ToolCallingAgent(
29
- name="window_agent",
30
- model=OpenAILlm(model="gpt-5-mini"),
31
- instructions="You are a helpful assistant",
32
- session_manager=session_manager,
33
- before_llm_callbacks=[SlidingWindowStrategy(max_messages=2)]
34
- )
35
-
36
- response = await agent.run(
37
- "What's my name?",
38
- session_id=session_id,
39
- user_id=user_id
40
- )
41
- print(response)
42
-
43
- asyncio.run(test_sliding_window())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
chapter_06_memory/05_summarization.py DELETED
@@ -1,64 +0,0 @@
1
- from scratch_agents.agents.tool_calling_agent_ch6 import ToolCallingAgent
2
- from scratch_agents.models.openai import OpenAILlm
3
- from scratch_agents.sessions.in_memory_session_manager import InMemorySessionManager
4
- from scratch_agents.memory.summarization_strategy import SummarizationStrategy
5
- from scratch_agents.types.contents import Message
6
- from scratch_agents.types.events import Event
7
- import asyncio
8
- from dotenv import load_dotenv
9
- import os
10
-
11
- load_dotenv()
12
-
13
- user_id = "test_123"
14
- session_id = "test_session"
15
-
16
- async def test_summarization_strategy():
17
- """Demonstrate summarization strategy in action"""
18
-
19
- model = OpenAILlm(model="gpt-5-mini")
20
- session_manager = InMemorySessionManager()
21
- session = session_manager.create_session(session_id, user_id)
22
-
23
- messages = [
24
- Message(role="user", content="Hi, I'm Bob"),
25
- Message(role="assistant", content="Nice to meet you, Bob!"),
26
- Message(role="user", content="I work as a teacher"),
27
- Message(role="assistant", content="Wow! What subject?"),
28
- Message(role="user", content="I teach math"),
29
- Message(role="assistant", content="Math is important!"),
30
- Message(role="user", content="I have 30 students"),
31
- Message(role="assistant", content="That's a good class size"),
32
- ]
33
-
34
- for msg in messages:
35
- event = Event(
36
- execution_id="test_exec",
37
- author="test",
38
- content=[msg]
39
- )
40
- session.events.append(event)
41
-
42
- agent = ToolCallingAgent(
43
- name="summary_agent",
44
- model=model,
45
- instructions="You are a helpful assistant",
46
- session_manager=session_manager,
47
- before_llm_callbacks=[
48
- SummarizationStrategy(model=model, trigger_count=8, keep_recent=2)
49
- ]
50
- )
51
-
52
- response = await agent.run(
53
- "What subject do I teach?",
54
- session_id=session_id,
55
- user_id=user_id
56
- )
57
-
58
- if "conversation_summary" in session.state:
59
- print(f"Summary: {session.state['conversation_summary']}")
60
- print(f"Summary Index: {session.state['last_summarized_index']}")
61
-
62
- print(f"\nAgent response: {response}")
63
-
64
- asyncio.run(test_summarization_strategy())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
chapter_06_memory/06_conversation_search.py DELETED
@@ -1,59 +0,0 @@
1
- from scratch_agents.agents.tool_calling_agent_ch6 import ToolCallingAgent
2
- from scratch_agents.models.openai import OpenAILlm
3
- from scratch_agents.sessions.in_memory_session_manager import InMemorySessionManager
4
- from scratch_agents.memory.sliding_window_strategy import SlidingWindowStrategy
5
- from scratch_agents.tools.conversation_search import conversation_search
6
- from scratch_agents.types.contents import Message
7
- from scratch_agents.types.events import Event
8
- import asyncio
9
- from dotenv import load_dotenv
10
- import os
11
-
12
- load_dotenv()
13
-
14
- user_id = "test_123"
15
- session_id = "test_session"
16
-
17
- async def test_search_with_sliding_window():
18
- """Demonstrate search recovering information lost to sliding window"""
19
-
20
- model = OpenAILlm(model="gpt-5-mini")
21
- session_manager = InMemorySessionManager()
22
- session = session_manager.create_session(session_id, user_id)
23
-
24
- conversation_history = [
25
- ("user", "My golden retriever puppy is named Max."),
26
- ("assistant", "Max is a lovely name for a golden retriever!"),
27
- ("user", "He loves playing fetch in the park."),
28
- ("assistant", "That's wonderful! Golden retrievers are great at fetch."),
29
- ]
30
-
31
- for role, content in conversation_history:
32
- event = Event(
33
- execution_id="pre_loaded",
34
- author=role,
35
- content=[Message(role=role, content=content)]
36
- )
37
- session.events.append(event)
38
-
39
- agent = ToolCallingAgent(
40
- name="search_agent",
41
- model=model,
42
- instructions="""You are a helpful assistant. When asked about
43
- information from earlier in our conversation, use the
44
- conversation_search tool to find it.""",
45
- tools=[conversation_search],
46
- session_manager=session_manager,
47
- before_llm_callbacks=[
48
- SlidingWindowStrategy(max_messages=2)
49
- ]
50
- )
51
-
52
- response = await agent.run(
53
- "What was my puppy's name?",
54
- session_id=session_id,
55
- user_id=user_id
56
- )
57
- print(f"Agent: {response}\n")
58
-
59
- asyncio.run(test_search_with_sliding_window())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
chapter_06_memory/07_task_long_term.py DELETED
@@ -1,76 +0,0 @@
1
- from scratch_agents.agents.execution_context_ch6 import ExecutionContext
2
- from scratch_agents.agents.tool_calling_agent_ch6 import ToolCallingAgent
3
- from scratch_agents.models.openai import OpenAILlm
4
- from scratch_agents.sessions.task_cross_session_manager import TaskCrossSessionManager
5
- from scratch_agents.sessions.in_memory_session_manager import InMemorySessionManager
6
- from scratch_agents.tools.base_tool import BaseTool
7
- from scratch_agents.models.llm_request import LlmRequest
8
- from scratch_agents.tools.search_web import search_web
9
- import asyncio
10
- from dotenv import load_dotenv
11
- import os
12
-
13
- load_dotenv()
14
-
15
- user_id = "test_123"
16
-
17
- async def long_term_memory_save_callback(context:ExecutionContext):
18
- cross_session_manager = context.cross_session_manager
19
- session = context.session
20
- execution_id = context.execution_id
21
-
22
- await cross_session_manager.process_session(session=session, execution_id=execution_id)
23
-
24
- class MemorySearchTool(BaseTool):
25
- async def execute(self, context, **kwargs):
26
- return None
27
-
28
- async def process_llm_request(self, request: LlmRequest, context: ExecutionContext):
29
- user_input = context.user_input
30
- user_id = context.session.user_id
31
- results = await context.cross_session_manager.search(user_input, user_id)
32
- if results:
33
- request.add_instructions(f"Use the following task memory to answer the user's question: {results}")
34
-
35
-
36
- async def test_long_term_memory_save():
37
- """Test long-term memory saving with a meaningful conversation"""
38
-
39
- session_manager = InMemorySessionManager()
40
- model = OpenAILlm(model="gpt-5-mini")
41
- cross_session_manager = TaskCrossSessionManager(model=model)
42
-
43
- memory_search_tool = MemorySearchTool()
44
-
45
- agent = ToolCallingAgent(
46
- name="memory_agent",
47
- model=model,
48
- instructions="You are a helpful assistant. Have a natural conversation and learn about the user's task. IMPORTANT: When the user asks about a specific term or technology, use the search results to provide a comprehensive answer. Do NOT ask for clarification if you find relevant search results. Only ask for clarification if search returns no results or the query is truly impossible to understand. If multiple meanings exist, provide information about the most common or relevant one based on the search results.",
49
- tools=[search_web, memory_search_tool],
50
- session_manager=session_manager,
51
- cross_session_manager=cross_session_manager,
52
- after_run_callbacks=[long_term_memory_save_callback]
53
- )
54
-
55
- print("=== Testing Long-term Memory Save ===\n")
56
-
57
- test_conversations = [
58
- "What is Mem0?",
59
- "How does mem0 work?"
60
- ]
61
-
62
- for i, message in enumerate(test_conversations, 1):
63
- print(f"User: {message}")
64
- session_id = f"test_session_{i}"
65
-
66
- response = await agent.run(
67
- message,
68
- session_id=session_id,
69
- user_id=user_id
70
- )
71
- print(response)
72
- # print(cross_session_manager.collection.peek())
73
-
74
- if __name__ == "__main__":
75
- asyncio.run(test_long_term_memory_save())
76
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
chapter_06_memory/08_user_long_term.py DELETED
@@ -1,98 +0,0 @@
1
- from scratch_agents.agents.execution_context_ch6 import ExecutionContext
2
- from scratch_agents.agents.tool_calling_agent_ch6 import ToolCallingAgent
3
- from scratch_agents.models.openai import OpenAILlm
4
- from scratch_agents.sessions.user_cross_session_manager import UserCrossSessionManager
5
- from scratch_agents.sessions.in_memory_session_manager import InMemorySessionManager
6
- from scratch_agents.tools.base_tool import BaseTool
7
- from scratch_agents.models.llm_request import LlmRequest
8
- import asyncio
9
- from dotenv import load_dotenv
10
- import os
11
-
12
- load_dotenv()
13
-
14
- user_id = "test_user_123"
15
-
16
- async def user_memory_save_callback(context: ExecutionContext):
17
- """Callback to save user memories after each interaction"""
18
- cross_session_manager = context.cross_session_manager
19
- session = context.session
20
- execution_id = context.execution_id
21
-
22
- await cross_session_manager.process_session(session=session, execution_id=execution_id)
23
-
24
- class UserMemorySearchTool(BaseTool):
25
- """Tool to search and retrieve user memories"""
26
- async def execute(self, context, **kwargs):
27
- return None
28
-
29
- async def process_llm_request(self, request: LlmRequest, context: ExecutionContext):
30
- user_id = context.session.user_id
31
- # Get all existing memories for the user
32
- all_memories = await context.cross_session_manager.find_existing([], user_id)
33
- if all_memories:
34
- memory_contents = [mem['content'] for mem in all_memories]
35
- memory_text = "\n".join(f"- {content}" for content in memory_contents)
36
- request.add_instructions(f"You have the following memories about this user:\n{memory_text}\n\nUse these memories to personalize your responses.")
37
-
38
-
39
- async def test_user_long_term_memory():
40
- """Test user long-term memory with location updates"""
41
-
42
- session_manager = InMemorySessionManager()
43
- model = OpenAILlm(model="gpt-4o-mini")
44
- cross_session_manager = UserCrossSessionManager(model=model)
45
-
46
- memory_search_tool = UserMemorySearchTool()
47
-
48
- agent = ToolCallingAgent(
49
- name="user_memory_agent",
50
- model=model,
51
- instructions="You are a helpful assistant that remembers information about the user. Have natural conversations and acknowledge what you know about the user when relevant.",
52
- tools=[memory_search_tool],
53
- session_manager=session_manager,
54
- cross_session_manager=cross_session_manager,
55
- after_run_callbacks=[user_memory_save_callback]
56
- )
57
-
58
- print("=== Testing User Long-term Memory ===\n")
59
-
60
- # Test conversation about location changes
61
- test_conversations = [
62
- "Hi! I'm living in New York City. I love the energy here!",
63
- "Actually, I just moved to Los Angeles last month. The weather is so much better here.",
64
- "What do you remember about where I live?"
65
- ]
66
-
67
- for i, message in enumerate(test_conversations, 1):
68
- print(f"\n--- Conversation {i} ---")
69
- print(f"User: {message}")
70
- session_id = f"user_session_{i}"
71
-
72
- response = await agent.run(
73
- message,
74
- session_id=session_id,
75
- user_id=user_id
76
- )
77
- print(f"Assistant: {response}")
78
-
79
- # Show current memories in the database with timestamps
80
- print("\n=> Current User Memories:")
81
- memories = await cross_session_manager.find_existing([], user_id)
82
- if memories:
83
- for mem in memories:
84
- created = mem.get('created_at', 'Unknown')[:19] if mem.get('created_at') != 'Unknown' else 'Unknown'
85
- updated = mem.get('updated_at', 'Unknown')[:19] if mem.get('updated_at') != 'Unknown' else 'Unknown'
86
- print(f" - {mem['content']}")
87
- if created != updated:
88
- print(f" (Created: {created}, Updated: {updated})")
89
- else:
90
- print(f" (Created: {created})")
91
- else:
92
- print(" (No memories yet)")
93
-
94
- # Small delay to see the progression
95
- await asyncio.sleep(1)
96
-
97
- if __name__ == "__main__":
98
- asyncio.run(test_user_long_term_memory())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
my_code.ipynb ADDED
@@ -0,0 +1,926 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "bd396f3a",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "data": {
11
+ "text/plain": [
12
+ "True"
13
+ ]
14
+ },
15
+ "execution_count": 1,
16
+ "metadata": {},
17
+ "output_type": "execute_result"
18
+ }
19
+ ],
20
+ "source": [
21
+ "from dotenv import load_dotenv, find_dotenv\n",
22
+ "\n",
23
+ "load_dotenv(find_dotenv())\n",
24
+ "\n"
25
+ ]
26
+ },
27
+ {
28
+ "cell_type": "code",
29
+ "execution_count": null,
30
+ "id": "bdc55e33",
31
+ "metadata": {},
32
+ "outputs": [
33
+ {
34
+ "name": "stdout",
35
+ "output_type": "stream",
36
+ "text": [
37
+ "ChatCompletionMessage(content='The capital of India is New Delhi.', refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None)\n",
38
+ "The capital of India is New Delhi.\n"
39
+ ]
40
+ }
41
+ ],
42
+ "source": [
43
+ "from openai import OpenAI\n",
44
+ "client = OpenAI()\n",
45
+ "\n",
46
+ "response = client.chat.completions.create(\n",
47
+ " model = 'gpt-5-mini',\n",
48
+ " messages = [\n",
49
+ " {'role': 'system', 'content' : 'You are a helpful assistant !'},\n",
50
+ " {'role': 'user', 'content': 'What is the capital of India ?'}\n",
51
+ " ]\n",
52
+ ")\n",
53
+ "\n",
54
+ "print(response.choices[0].message)\n",
55
+ "print(response.choices[0].message.content)\n"
56
+ ]
57
+ },
58
+ {
59
+ "cell_type": "code",
60
+ "execution_count": 5,
61
+ "id": "396e8826",
62
+ "metadata": {},
63
+ "outputs": [
64
+ {
65
+ "name": "stdout",
66
+ "output_type": "stream",
67
+ "text": [
68
+ "Hello! How can I help you today?\n"
69
+ ]
70
+ }
71
+ ],
72
+ "source": [
73
+ "## with this we can unify all providers\n",
74
+ "\n",
75
+ "from litellm import completion\n",
76
+ "response = completion(\n",
77
+ " model = 'gpt-5-mini',\n",
78
+ " messages = [{'role' : 'user', 'content' : 'Hello !' }]\n",
79
+ ")\n",
80
+ "\n",
81
+ "print(response.choices[0].message.content)"
82
+ ]
83
+ },
84
+ {
85
+ "cell_type": "code",
86
+ "execution_count": null,
87
+ "id": "cb505eb4",
88
+ "metadata": {},
89
+ "outputs": [
90
+ {
91
+ "name": "stdout",
92
+ "output_type": "stream",
93
+ "text": [
94
+ "Nice to meet you, Akhil — how can I help you today?\n",
95
+ "I don't know — I don't have access to personal details unless you tell me. What would you like me to call you in this chat? (I can use that name for this conversation, but I can't remember it across separate sessions unless you set it in your app/profile.)\n"
96
+ ]
97
+ }
98
+ ],
99
+ "source": [
100
+ "from litellm import completion\n",
101
+ "\n",
102
+ "response1 = completion(\n",
103
+ " model = 'gpt-5-mini',\n",
104
+ " messages = [{'role' : 'user', 'content':'My name is Akhil'}]\n",
105
+ ")\n",
106
+ "\n",
107
+ "response2 = completion(\n",
108
+ " model = 'gpt-5-mini',\n",
109
+ " messages = [{'role' : 'user', 'content':'what\\'s my name'}]\n",
110
+ ")\n",
111
+ "\n",
112
+ "print(response1.choices[0].message.content)\n",
113
+ "print(response2.choices[0].message.content)\n",
114
+ "\n",
115
+ "### This proves that each LLM call is independent. Our Model doesn't have memory"
116
+ ]
117
+ },
118
+ {
119
+ "cell_type": "code",
120
+ "execution_count": 8,
121
+ "id": "cd3ade31",
122
+ "metadata": {},
123
+ "outputs": [
124
+ {
125
+ "name": "stdout",
126
+ "output_type": "stream",
127
+ "text": [
128
+ "Great — love the ambition, Akhil. If you want to be “the future of AI,” I can help you get there. How would you like me to help right now? (Pick one: roadmap, project ideas, resume/LinkedIn copy, interview prep, or a 12‑month actionable plan.)\n",
129
+ "\n",
130
+ "Below are a few immediately useful things you can use or ask me to expand.\n",
131
+ "\n",
132
+ "Quick elevator pitch / LinkedIn headline\n",
133
+ "- Headline: Akhil — Building safe, scalable AI that augments human creativity and solves real-world problems\n",
134
+ "- 1‑line pitch: “I build trustworthy AI systems that turn complex data into products people love — with a focus on safety, scalability, and real-world impact.”\n",
135
+ "\n",
136
+ "High‑level skills to prioritize\n",
137
+ "- Foundations: probability, linear algebra, optimization\n",
138
+ "- Core ML: supervised learning, neural networks, transfer learning, transformers\n",
139
+ "- Systems & infra: PyTorch/TensorFlow, Docker, Kubernetes, model serving, MLOps\n",
140
+ "- Specialized: LLMs, RL, generative models, multimodal models (vision+language)\n",
141
+ "- Soft skills: product sense, communication, writing and presenting research\n",
142
+ "- Ethics & safety: alignment concepts, bias mitigation, robust evaluation\n",
143
+ "\n",
144
+ "3 concrete projects (increasing complexity)\n",
145
+ "1. End‑to‑end ML app: simple image classifier with deployment (Flask/FastAPI + Docker + test pipeline)\n",
146
+ "2. LLM product prototype: retrieval-augmented chatbot for a specific domain (docs → vector DB → RAG)\n",
147
+ "3. Research/engineering hybrid: fine-tune or distill a model for efficiency and publish a blog post + code on GitHub\n",
148
+ "\n",
149
+ "Practical 12‑month roadmap (high level)\n",
150
+ "- Months 0–2: Fill gaps — math refresher, PyTorch, small projects, GitHub portfolio\n",
151
+ "- Months 3–5: Build and deploy 2 production prototypes (one LLM-based), publish writeups\n",
152
+ "- Months 6–9: Contribute to OSS or collaborate on a research project; attend conferences/meetups\n",
153
+ "- Months 10–12: Target internships/roles, refine portfolio, prepare interviews, publish a substantial case study or replication\n",
154
+ "\n",
155
+ "Quick resources\n",
156
+ "- Fast theory/math: “Mathematics for Machine Learning” + 3Blue1Brown playlists\n",
157
+ "- Practical ML: Deep Learning Book (selected chapters), PyTorch docs, Hugging Face course\n",
158
+ "- MLOps/RAG: LangChain/HF tutorials, Vector DB docs (Pinecone/Weaviate)\n",
159
+ "\n",
160
+ "If you want, I can:\n",
161
+ "- Create a personalized 6‑ or 12‑month plan based on your background and time availability\n",
162
+ "- Draft a LinkedIn summary, resume bullets, or a cover letter\n",
163
+ "- Design a project roadmap with milestones and tech stack\n",
164
+ "Tell me which and give me your experience level (student / early-career / senior / founder) and how many hours per week you can commit.\n",
165
+ "Short answer: you’re Akhil — the person who just told me “I am going to be the Future of AI.” Beyond that, only you can fully answer “who am I,” but I can help you shape a clear, useful version of that identity for career, confidence, and action.\n",
166
+ "\n",
167
+ "Pick one of these and I’ll build it for you:\n",
168
+ "- A crisp personal identity/mission statement (1–2 lines)\n",
169
+ "- A short LinkedIn “About” summary\n",
170
+ "- A 12‑month plan to become a leader in AI\n",
171
+ "- A set of interview/resume bullets matched to your level\n",
172
+ "\n",
173
+ "If you want to explore it yourself first, answer 5 quick prompts (one sentence each):\n",
174
+ "1. What technical skills do you already have (languages, frameworks, papers/projects)?\n",
175
+ "2. What do you enjoy doing most in AI (research, building products, deploying models, safety/ethics)?\n",
176
+ "3. What impact do you want to have (industry, research, social good, startups)?\n",
177
+ "4. What are your top 2 strengths and top 1 weakness you want to fix?\n",
178
+ "5. How many hours/week can you commit to learning or working toward this goal?\n",
179
+ "\n",
180
+ "Or, if you want an immediate example identity statement based on your earlier claim:\n",
181
+ "- “I’m Akhil — an aspiring AI leader building safe, scalable systems that augment human creativity. My mission is to bridge cutting‑edge research and real‑world impact.”\n",
182
+ "\n",
183
+ "Tell me which option you want or answer the 5 prompts and I’ll draft something tailored.\n"
184
+ ]
185
+ }
186
+ ],
187
+ "source": [
188
+ "### Managing conversation history\n",
189
+ "\n",
190
+ "\n",
191
+ "from litellm import completion\n",
192
+ "\n",
193
+ "## Maintain a messages object\n",
194
+ "messages = []\n",
195
+ "\n",
196
+ "## append your message/conversation\n",
197
+ "messages.append({'role':'user', 'content':'My name is Akhil and I am going to be the Future of AI'})\n",
198
+ "response3 = completion(model = 'gpt-5-mini', messages = messages)\n",
199
+ "\n",
200
+ "print(response3.choices[0].message.content)\n",
201
+ "\n",
202
+ "## append the message from assistant\n",
203
+ "messages.append({'role':'assistant', 'content':response3.choices[0].message.content})\n",
204
+ "\n",
205
+ "## write a new message\n",
206
+ "messages.append({'role':'user', 'content':'who am i'})\n",
207
+ "response4 = completion(model = 'gpt-5-mini', messages = messages)\n",
208
+ "\n",
209
+ "print(response4.choices[0].message.content)\n",
210
+ "\n",
211
+ "\n"
212
+ ]
213
+ },
214
+ {
215
+ "cell_type": "code",
216
+ "execution_count": null,
217
+ "id": "e0868cf6",
218
+ "metadata": {},
219
+ "outputs": [
220
+ {
221
+ "name": "stdout",
222
+ "output_type": "stream",
223
+ "text": [
224
+ "{\"name\":\"Akhil\",\"email\":\"akhil.masters21@gmail.com\",\"phone\":\"9550303420\"}\n"
225
+ ]
226
+ }
227
+ ],
228
+ "source": [
229
+ "### Structured output\n",
230
+ "\n",
231
+ "from pydantic import BaseModel\n",
232
+ "from litellm import completion\n",
233
+ "\n",
234
+ "class ExtractedInfo(BaseModel):\n",
235
+ " name : str\n",
236
+ " email : str\n",
237
+ " phone : str | None = None\n",
238
+ "\n",
239
+ "response = completion(\n",
240
+ " model=\"gpt-5-mini\",\n",
241
+ " messages=[{\n",
242
+ " \"role\": \"user\", \n",
243
+ " \"content\": \"My name is Akhil, my email is akhil.masters21@gmail.com, and my phone is 9550303420.\"\n",
244
+ " }],\n",
245
+ " response_format=ExtractedInfo\n",
246
+ ")\n",
247
+ "\n",
248
+ "print(response.choices[0].message.content)"
249
+ ]
250
+ },
251
+ {
252
+ "cell_type": "code",
253
+ "execution_count": null,
254
+ "id": "03d48814",
255
+ "metadata": {},
256
+ "outputs": [
257
+ {
258
+ "name": "stdout",
259
+ "output_type": "stream",
260
+ "text": [
261
+ "Q: What is 2 + 2?\n",
262
+ "A: 2 + 2 = 4.\n",
263
+ "\n",
264
+ "Q: What is the capital of Japan?\n",
265
+ "A: The capital of Japan is Tokyo.\n",
266
+ "\n",
267
+ "Q: Who wrote Romeo and Juliet?\n",
268
+ "A: Romeo and Juliet was written by William Shakespeare. It was likely written and first performed in the mid-1590s (published in 1597).\n",
269
+ "\n"
270
+ ]
271
+ }
272
+ ],
273
+ "source": [
274
+ "### Asynchronus calls\n",
275
+ "\n",
276
+ "import asyncio\n",
277
+ "from litellm import acompletion\n",
278
+ "async def get_response(prompt: str) -> str:\n",
279
+ " response = await acompletion(\n",
280
+ " model = 'gpt-5-mini',\n",
281
+ " messages=[{\"role\": \"user\", \"content\": prompt}]\n",
282
+ " )\n",
283
+ " return response.choices[0].message.content\n",
284
+ " \n",
285
+ "prompts = [\n",
286
+ " \"What is 2 + 2?\",\n",
287
+ " \"What is the capital of Japan?\",\n",
288
+ " \"Who wrote Romeo and Juliet?\"\n",
289
+ "]\n",
290
+ "\n",
291
+ "### here \n",
292
+ "## tasks = [get_response(What is 2 + 2?), get_response(What is the capital of Japan?)] \n",
293
+ "## doesnt run the function, it just creates a coroutine object. Thats the difference in async.\n",
294
+ "## functions are called in gather step\n",
295
+ "\n",
296
+ "tasks = [get_response(p) for p in prompts]\n",
297
+ "results = await asyncio.gather(*tasks)\n",
298
+ "\n",
299
+ "for prompt, result in zip(prompts, results):\n",
300
+ " print(f\"Q: {prompt}\")\n",
301
+ " print(f\"A: {result}\\n\")\n"
302
+ ]
303
+ },
304
+ {
305
+ "cell_type": "code",
306
+ "execution_count": 14,
307
+ "id": "3333de1d",
308
+ "metadata": {},
309
+ "outputs": [
310
+ {
311
+ "name": "stdout",
312
+ "output_type": "stream",
313
+ "text": [
314
+ "Q: What is 0 + 0?\n",
315
+ "A: 0\n",
316
+ "\n",
317
+ "Because adding zero to zero yields zero.\n",
318
+ "\n",
319
+ "Q: What is 1 + 1?\n",
320
+ "A: 1 + 1 = 2.\n",
321
+ "\n",
322
+ "Q: What is 2 + 2?\n",
323
+ "A: 2 + 2 = 4.\n",
324
+ "\n",
325
+ "Q: What is 3 + 3?\n",
326
+ "A: 3 + 3 = 6.\n",
327
+ "\n",
328
+ "Q: What is 4 + 4?\n",
329
+ "A: 8\n",
330
+ "\n",
331
+ "Q: What is 5 + 5?\n",
332
+ "A: 10\n",
333
+ "\n",
334
+ "Q: What is 6 + 6?\n",
335
+ "A: 12\n",
336
+ "\n",
337
+ "Q: What is 7 + 7?\n",
338
+ "A: 14\n",
339
+ "\n",
340
+ "Q: What is 8 + 8?\n",
341
+ "A: 16\n",
342
+ "\n",
343
+ "Q: What is 9 + 9?\n",
344
+ "A: 18\n",
345
+ "\n",
346
+ "Q: What is 10 + 10?\n",
347
+ "A: 10 + 10 = 20\n",
348
+ "\n",
349
+ "Q: What is 11 + 11?\n",
350
+ "A: 22\n",
351
+ "\n",
352
+ "Q: What is 12 + 12?\n",
353
+ "A: 24\n",
354
+ "\n",
355
+ "Q: What is 13 + 13?\n",
356
+ "A: 26\n",
357
+ "\n",
358
+ "Q: What is 14 + 14?\n",
359
+ "A: 28\n",
360
+ "\n",
361
+ "Q: What is 15 + 15?\n",
362
+ "A: 30\n",
363
+ "\n",
364
+ "Q: What is 16 + 16?\n",
365
+ "A: 32\n",
366
+ "\n",
367
+ "Q: What is 17 + 17?\n",
368
+ "A: 34\n",
369
+ "\n",
370
+ "Q: What is 18 + 18?\n",
371
+ "A: 36\n",
372
+ "\n",
373
+ "Q: What is 19 + 19?\n",
374
+ "A: 38\n",
375
+ "\n"
376
+ ]
377
+ }
378
+ ],
379
+ "source": [
380
+ "### rate limiting queries\n",
381
+ "semaphore = asyncio.Semaphore(10)\n",
382
+ "\n",
383
+ "async def call_llm(prompt : str) -> str:\n",
384
+ " async with semaphore:\n",
385
+ " response = await acompletion(\n",
386
+ " model=\"gpt-5-mini\",\n",
387
+ " messages=[{\"role\": \"user\", \"content\": prompt}],\n",
388
+ " num_retries=3 # Automatic retry with exponential backoff\n",
389
+ " )\n",
390
+ " return response.choices[0].message.content\n",
391
+ "prompts = [f\"What is {i} + {i}?\" for i in range(20)]\n",
392
+ "tasks = [call_llm(p) for p in prompts]\n",
393
+ "results = await asyncio.gather(*tasks, return_exceptions=True)\n",
394
+ "\n",
395
+ "\n",
396
+ "for prompt, result in zip(prompts, results):\n",
397
+ " print(f\"Q: {prompt}\")\n",
398
+ " print(f\"A: {result}\\n\")\n"
399
+ ]
400
+ },
401
+ {
402
+ "cell_type": "code",
403
+ "execution_count": 16,
404
+ "id": "1caef766",
405
+ "metadata": {},
406
+ "outputs": [
407
+ {
408
+ "name": "stderr",
409
+ "output_type": "stream",
410
+ "text": [
411
+ "Generating test split: 100%|██████████| 93/93 [00:00<00:00, 1653.78 examples/s]\n",
412
+ "Generating validation split: 100%|██████████| 53/53 [00:00<00:00, 32022.20 examples/s]"
413
+ ]
414
+ },
415
+ {
416
+ "name": "stdout",
417
+ "output_type": "stream",
418
+ "text": [
419
+ "Number of Level 1 problems: 53\n"
420
+ ]
421
+ },
422
+ {
423
+ "name": "stderr",
424
+ "output_type": "stream",
425
+ "text": [
426
+ "\n"
427
+ ]
428
+ }
429
+ ],
430
+ "source": [
431
+ "## loading the GAIA dataset\n",
432
+ "\n",
433
+ "from datasets import load_dataset\n",
434
+ "level1_problems = load_dataset(\"gaia-benchmark/GAIA\", \"2023_level1\", split=\"validation\")\n",
435
+ "print(f\"Number of Level 1 problems: {len(level1_problems)}\")\n"
436
+ ]
437
+ },
438
+ {
439
+ "cell_type": "code",
440
+ "execution_count": 17,
441
+ "id": "733c211c",
442
+ "metadata": {},
443
+ "outputs": [
444
+ {
445
+ "data": {
446
+ "text/plain": [
447
+ "{'task_id': '8e867cd7-cff9-4e6c-867a-ff5ddc2550be',\n",
448
+ " 'Question': 'How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.',\n",
449
+ " 'Level': '1',\n",
450
+ " 'Final answer': '3',\n",
451
+ " 'file_name': '',\n",
452
+ " 'file_path': '',\n",
453
+ " 'Annotator Metadata': {'Steps': '1. I did a search for Mercedes Sosa\\n2. I went to the Wikipedia page for her\\n3. I scrolled down to \"Studio albums\"\\n4. I counted the ones between 2000 and 2009',\n",
454
+ " 'Number of steps': '4',\n",
455
+ " 'How long did this take?': '5 minutes',\n",
456
+ " 'Tools': '1. web browser\\n2. google search',\n",
457
+ " 'Number of tools': '2'}}"
458
+ ]
459
+ },
460
+ "execution_count": 17,
461
+ "metadata": {},
462
+ "output_type": "execute_result"
463
+ }
464
+ ],
465
+ "source": [
466
+ "level1_problems[1]"
467
+ ]
468
+ },
469
+ {
470
+ "cell_type": "code",
471
+ "execution_count": 19,
472
+ "id": "3d5bcb22",
473
+ "metadata": {},
474
+ "outputs": [
475
+ {
476
+ "name": "stderr",
477
+ "output_type": "stream",
478
+ "text": [
479
+ "100%|██████████| 40/40 [02:23<00:00, 3.58s/it]\n"
480
+ ]
481
+ }
482
+ ],
483
+ "source": [
484
+ "## defining a respose for gaia\n",
485
+ "from pydantic import BaseModel\n",
486
+ "from tqdm.asyncio import tqdm\n",
487
+ "gaia_prompt = \"\"\"You are a general AI assistant. I will ask you a question.\n",
488
+ "First, determine if you can solve this problem with your current capabilities and set \"is_solvable\" accordingly.\n",
489
+ "If you can solve it, set \"is_solvable\" to true and provide your answer in \"final_answer\".\n",
490
+ "If you cannot solve it, set \"is_solvable\" to false and explain why in \"unsolvable_reason\".\n",
491
+ "Your final answer should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.\n",
492
+ "If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.\n",
493
+ "If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.\n",
494
+ "If you are asked for a comma separated list, apply the above rules depending on whether the element is a number or a string.\"\"\"\n",
495
+ "\n",
496
+ "class GaiaOutput(BaseModel):\n",
497
+ " is_solvable: bool\n",
498
+ " unsolvable_reason: str = \"\"\n",
499
+ " final_answer: str = \"\"\n",
500
+ "\n",
501
+ "PROVIDER_SEMAPHORES = {'openai': asyncio.Semaphore(30), 'anthropic': asyncio.Semaphore(10)}\n",
502
+ "\n",
503
+ "def get_provider(model: str) -> str:\n",
504
+ " return \"anthropic\" if model.startswith(\"anthropic/\") else \"openai\"\n",
505
+ "\n",
506
+ "\n",
507
+ "async def solve_problem(model: str, question: str) -> GaiaOutput:\n",
508
+ " provider = get_provider(model)\n",
509
+ " async with PROVIDER_SEMAPHORES[provider]:\n",
510
+ " response = await acompletion(\n",
511
+ " model = model,\n",
512
+ " messages=[\n",
513
+ " {\"role\": \"system\", \"content\": gaia_prompt},\n",
514
+ " {\"role\": \"user\", \"content\": question},\n",
515
+ " ],\n",
516
+ " response_format=GaiaOutput,\n",
517
+ " num_retries=2,\n",
518
+ " )\n",
519
+ " finish_reason = response.choices[0].finish_reason\n",
520
+ " content = response.choices[0].message.content\n",
521
+ " if finish_reason == \"refusal\" or content is None:\n",
522
+ " return GaiaOutput(\n",
523
+ " is_solvable=False,\n",
524
+ " unsolvable_reason=f\"Model refused to answer (finish_reason: {finish_reason})\",\n",
525
+ " final_answer=\"\"\n",
526
+ " )\n",
527
+ " return GaiaOutput.model_validate_json(content)\n",
528
+ "\n",
529
+ "def is_correct(prediction: str | None, answer: str) -> bool:\n",
530
+ " \"\"\"Check exact match between prediction and answer (case-insensitive).\"\"\"\n",
531
+ " if prediction is None:\n",
532
+ " return False\n",
533
+ " return prediction.strip().lower() == answer.strip().lower()\n",
534
+ "\n",
535
+ "async def evaluate_gaia_single(problem: dict, model: str) -> dict:\n",
536
+ " \"\"\"Evaluate a single problem-model pair and return result.\"\"\"\n",
537
+ " try:\n",
538
+ " output = await solve_problem(model, problem[\"Question\"])\n",
539
+ " return {\n",
540
+ " \"task_id\": problem[\"task_id\"],\n",
541
+ " \"model\": model,\n",
542
+ " \"correct\": is_correct(output.final_answer, problem[\"Final answer\"]),\n",
543
+ " \"is_solvable\": output.is_solvable,\n",
544
+ " \"prediction\": output.final_answer,\n",
545
+ " \"answer\": problem[\"Final answer\"],\n",
546
+ " \"unsolvable_reason\": output.unsolvable_reason,\n",
547
+ " }\n",
548
+ " except Exception as e:\n",
549
+ " return {\n",
550
+ " \"task_id\": problem[\"task_id\"],\n",
551
+ " \"model\": model,\n",
552
+ " \"correct\": False,\n",
553
+ " \"is_solvable\": None,\n",
554
+ " \"prediction\": None,\n",
555
+ " \"answer\": problem[\"Final answer\"],\n",
556
+ " \"error\": str(e),\n",
557
+ " }\n",
558
+ "\n",
559
+ "async def run_experiment(\n",
560
+ " problems: list[dict],\n",
561
+ " models: list[str],\n",
562
+ ") -> dict[str, list]:\n",
563
+ " \"\"\"Evaluate all models on all problems.\"\"\"\n",
564
+ " tasks = [\n",
565
+ " evaluate_gaia_single(problem, model)\n",
566
+ " for problem in problems\n",
567
+ " for model in models\n",
568
+ " ]\n",
569
+ " \n",
570
+ " all_results = await tqdm.gather(*tasks)\n",
571
+ " \n",
572
+ " # Group results by model\n",
573
+ " results = {model: [] for model in models}\n",
574
+ " for result in all_results:\n",
575
+ " results[result[\"model\"]].append(result)\n",
576
+ " \n",
577
+ " return results\n",
578
+ "\n",
579
+ "MODELS = [\n",
580
+ " \"gpt-5\",\n",
581
+ " \"gpt-5-mini\"\n",
582
+ "]\n",
583
+ " \n",
584
+ "subset = level1_problems.select(range(20))\n",
585
+ "results = await run_experiment(subset, MODELS)"
586
+ ]
587
+ },
588
+ {
589
+ "cell_type": "code",
590
+ "execution_count": 20,
591
+ "id": "04f60efa",
592
+ "metadata": {},
593
+ "outputs": [
594
+ {
595
+ "data": {
596
+ "text/plain": [
597
+ "{'gpt-5': [{'task_id': 'e1fc63a2-da7a-432f-be78-7c4a95598703',\n",
598
+ " 'model': 'gpt-5',\n",
599
+ " 'correct': True,\n",
600
+ " 'is_solvable': True,\n",
601
+ " 'prediction': '17',\n",
602
+ " 'answer': '17',\n",
603
+ " 'unsolvable_reason': ''},\n",
604
+ " {'task_id': '8e867cd7-cff9-4e6c-867a-ff5ddc2550be',\n",
605
+ " 'model': 'gpt-5',\n",
606
+ " 'correct': False,\n",
607
+ " 'is_solvable': True,\n",
608
+ " 'prediction': '4',\n",
609
+ " 'answer': '3',\n",
610
+ " 'unsolvable_reason': ''},\n",
611
+ " {'task_id': 'ec09fa32-d03f-4bf8-84b0-1f16922c3ae4',\n",
612
+ " 'model': 'gpt-5',\n",
613
+ " 'correct': True,\n",
614
+ " 'is_solvable': True,\n",
615
+ " 'prediction': '3',\n",
616
+ " 'answer': '3',\n",
617
+ " 'unsolvable_reason': ''},\n",
618
+ " {'task_id': '5d0080cb-90d7-4712-bc33-848150e917d3',\n",
619
+ " 'model': 'gpt-5',\n",
620
+ " 'correct': False,\n",
621
+ " 'is_solvable': False,\n",
622
+ " 'prediction': '',\n",
623
+ " 'answer': '0.1777',\n",
624
+ " 'unsolvable_reason': 'I don’t have access to the specific paper text or its figures and can’t browse to retrieve the exact calculated volume.'},\n",
625
+ " {'task_id': 'a1e91b78-d3d8-4675-bb8d-62741b4b68a6',\n",
626
+ " 'model': 'gpt-5',\n",
627
+ " 'correct': False,\n",
628
+ " 'is_solvable': False,\n",
629
+ " 'prediction': '',\n",
630
+ " 'answer': '3',\n",
631
+ " 'unsolvable_reason': 'I can’t access or watch the linked video to determine the number.'},\n",
632
+ " {'task_id': '46719c30-f4c3-4cad-be07-d5cb21eee6bb',\n",
633
+ " 'model': 'gpt-5',\n",
634
+ " 'correct': False,\n",
635
+ " 'is_solvable': False,\n",
636
+ " 'prediction': '',\n",
637
+ " 'answer': 'Mapping Human Oriented Information to Software Agents for Online Systems Usage',\n",
638
+ " 'unsolvable_reason': 'I need to look up the 2015 paper’s author list and their publication histories, which I cannot access without web browsing or additional details.'},\n",
639
+ " {'task_id': '4b6bb5f7-f634-410e-815d-e673ab7f8632',\n",
640
+ " 'model': 'gpt-5',\n",
641
+ " 'correct': True,\n",
642
+ " 'is_solvable': True,\n",
643
+ " 'prediction': 'THE CASTLE',\n",
644
+ " 'answer': 'THE CASTLE',\n",
645
+ " 'unsolvable_reason': ''},\n",
646
+ " {'task_id': 'cffe0e32-c9a6-4c52-9877-78ceb4aaa9fb',\n",
647
+ " 'model': 'gpt-5',\n",
648
+ " 'correct': False,\n",
649
+ " 'is_solvable': False,\n",
650
+ " 'prediction': '',\n",
651
+ " 'answer': 'Fred',\n",
652
+ " 'unsolvable_reason': 'The referenced document with employee profiles and gift assignments is not provided, so the giver who failed to give a gift cannot be determined.'},\n",
653
+ " {'task_id': '2d83110e-a098-4ebb-9987-066c06fa42d0',\n",
654
+ " 'model': 'gpt-5',\n",
655
+ " 'correct': True,\n",
656
+ " 'is_solvable': True,\n",
657
+ " 'prediction': 'right',\n",
658
+ " 'answer': 'Right',\n",
659
+ " 'unsolvable_reason': ''},\n",
660
+ " {'task_id': '5cfb274c-0207-4aa7-9575-6ac0bd95d9b2',\n",
661
+ " 'model': 'gpt-5',\n",
662
+ " 'correct': False,\n",
663
+ " 'is_solvable': False,\n",
664
+ " 'prediction': '',\n",
665
+ " 'answer': 'No',\n",
666
+ " 'unsolvable_reason': 'Missing the spreadsheet/layout of green plots, so I cannot determine if a non-backtracking loop exists.'},\n",
667
+ " {'task_id': '27d5d136-8563-469e-92bf-fd103c28b57c',\n",
668
+ " 'model': 'gpt-5',\n",
669
+ " 'correct': True,\n",
670
+ " 'is_solvable': True,\n",
671
+ " 'prediction': '(¬A → B) ↔ (A ∨ ¬B)',\n",
672
+ " 'answer': '(¬A → B) ↔ (A ∨ ¬B)',\n",
673
+ " 'unsolvable_reason': ''},\n",
674
+ " {'task_id': 'dc28cf18-6431-458b-83ef-64b3ce566c10',\n",
675
+ " 'model': 'gpt-5',\n",
676
+ " 'correct': True,\n",
677
+ " 'is_solvable': True,\n",
678
+ " 'prediction': '2',\n",
679
+ " 'answer': '2',\n",
680
+ " 'unsolvable_reason': ''},\n",
681
+ " {'task_id': 'b816bfce-3d80-4913-a07d-69b752ce6377',\n",
682
+ " 'model': 'gpt-5',\n",
683
+ " 'correct': False,\n",
684
+ " 'is_solvable': True,\n",
685
+ " 'prediction': 'cute',\n",
686
+ " 'answer': 'fluffy',\n",
687
+ " 'unsolvable_reason': ''},\n",
688
+ " {'task_id': '72e110e7-464c-453c-a309-90a95aed6538',\n",
689
+ " 'model': 'gpt-5',\n",
690
+ " 'correct': False,\n",
691
+ " 'is_solvable': False,\n",
692
+ " 'prediction': '',\n",
693
+ " 'answer': 'Guatemala',\n",
694
+ " 'unsolvable_reason': 'I don’t have browsing access to verify the 2020 BASE DDC 633 page and its flags.'},\n",
695
+ " {'task_id': '42576abe-0deb-4869-8c63-225c2d75a95a',\n",
696
+ " 'model': 'gpt-5',\n",
697
+ " 'correct': True,\n",
698
+ " 'is_solvable': True,\n",
699
+ " 'prediction': 'Maktay Mato Apple',\n",
700
+ " 'answer': 'Maktay mato apple',\n",
701
+ " 'unsolvable_reason': ''},\n",
702
+ " {'task_id': 'b415aba4-4b68-4fc6-9b89-2c812e55a3e1',\n",
703
+ " 'model': 'gpt-5',\n",
704
+ " 'correct': False,\n",
705
+ " 'is_solvable': False,\n",
706
+ " 'prediction': '',\n",
707
+ " 'answer': 'diamond',\n",
708
+ " 'unsolvable_reason': 'I don’t have browsing tools to look up the specific 2012 Scientific Reports conference proceedings article and identify the nano-compound without external access.'},\n",
709
+ " {'task_id': 'cca530fc-4052-43b2-b130-b30968d8aa44',\n",
710
+ " 'model': 'gpt-5',\n",
711
+ " 'correct': False,\n",
712
+ " 'is_solvable': False,\n",
713
+ " 'prediction': '',\n",
714
+ " 'answer': 'Rd5',\n",
715
+ " 'unsolvable_reason': 'Cannot view the chessboard image'},\n",
716
+ " {'task_id': '935e2cff-ae78-4218-b3f5-115589b19dae',\n",
717
+ " 'model': 'gpt-5',\n",
718
+ " 'correct': True,\n",
719
+ " 'is_solvable': True,\n",
720
+ " 'prediction': 'research',\n",
721
+ " 'answer': 'research',\n",
722
+ " 'unsolvable_reason': ''},\n",
723
+ " {'task_id': '4fc2f1ae-8625-45b5-ab34-ad4433bc21f8',\n",
724
+ " 'model': 'gpt-5',\n",
725
+ " 'correct': True,\n",
726
+ " 'is_solvable': True,\n",
727
+ " 'prediction': 'FunkMonk',\n",
728
+ " 'answer': 'FunkMonk',\n",
729
+ " 'unsolvable_reason': ''},\n",
730
+ " {'task_id': '5188369a-3bbe-43d8-8b94-11558f909a08',\n",
731
+ " 'model': 'gpt-5',\n",
732
+ " 'correct': False,\n",
733
+ " 'is_solvable': False,\n",
734
+ " 'prediction': '',\n",
735
+ " 'answer': 'Annie Levin',\n",
736
+ " 'unsolvable_reason': 'I need to look up Merriam-Webster’s Word of the Day page for June 27, 2022 to see the quoted writer, but I don’t have browsing access.'}],\n",
737
+ " 'gpt-5-mini': [{'task_id': 'e1fc63a2-da7a-432f-be78-7c4a95598703',\n",
738
+ " 'model': 'gpt-5-mini',\n",
739
+ " 'correct': False,\n",
740
+ " 'is_solvable': False,\n",
741
+ " 'prediction': '',\n",
742
+ " 'answer': '17',\n",
743
+ " 'unsolvable_reason': 'I cannot access external websites such as Wikipedia to retrieve the exact minimum perigee value required for the calculation.'},\n",
744
+ " {'task_id': '8e867cd7-cff9-4e6c-867a-ff5ddc2550be',\n",
745
+ " 'model': 'gpt-5-mini',\n",
746
+ " 'correct': False,\n",
747
+ " 'is_solvable': False,\n",
748
+ " 'prediction': '',\n",
749
+ " 'answer': '3',\n",
750
+ " 'unsolvable_reason': \"I cannot access the 2022 English Wikipedia from here to verify Mercedes Sosa's discography and reliably count studio albums released between 2000 and 2009.\"},\n",
751
+ " {'task_id': 'ec09fa32-d03f-4bf8-84b0-1f16922c3ae4',\n",
752
+ " 'model': 'gpt-5-mini',\n",
753
+ " 'correct': True,\n",
754
+ " 'is_solvable': True,\n",
755
+ " 'prediction': '3',\n",
756
+ " 'answer': '3',\n",
757
+ " 'unsolvable_reason': ''},\n",
758
+ " {'task_id': '5d0080cb-90d7-4712-bc33-848150e917d3',\n",
759
+ " 'model': 'gpt-5-mini',\n",
760
+ " 'correct': False,\n",
761
+ " 'is_solvable': False,\n",
762
+ " 'prediction': '',\n",
763
+ " 'answer': '0.1777',\n",
764
+ " 'unsolvable_reason': \"I cannot access external documents or the internet and do not have the paper's calculated fish bag volume memorized.\"},\n",
765
+ " {'task_id': 'a1e91b78-d3d8-4675-bb8d-62741b4b68a6',\n",
766
+ " 'model': 'gpt-5-mini',\n",
767
+ " 'correct': False,\n",
768
+ " 'is_solvable': False,\n",
769
+ " 'prediction': '',\n",
770
+ " 'answer': '3',\n",
771
+ " 'unsolvable_reason': 'I cannot access or view external video content (YouTube) to count bird species on screen.'},\n",
772
+ " {'task_id': '46719c30-f4c3-4cad-be07-d5cb21eee6bb',\n",
773
+ " 'model': 'gpt-5-mini',\n",
774
+ " 'correct': False,\n",
775
+ " 'is_solvable': False,\n",
776
+ " 'prediction': '',\n",
777
+ " 'answer': 'Mapping Human Oriented Information to Software Agents for Online Systems Usage',\n",
778
+ " 'unsolvable_reason': \"I cannot access external databases or the internet to look up the 2015 paper's authors and their publication histories, and I do not have that specific bibliographic information memorized.\"},\n",
779
+ " {'task_id': '4b6bb5f7-f634-410e-815d-e673ab7f8632',\n",
780
+ " 'model': 'gpt-5-mini',\n",
781
+ " 'correct': False,\n",
782
+ " 'is_solvable': False,\n",
783
+ " 'prediction': '',\n",
784
+ " 'answer': 'THE CASTLE',\n",
785
+ " 'unsolvable_reason': 'I cannot reliably recall the exact wording of the first scene heading from the official script and I cannot access external resources to check the script to provide the precise, verbatim setting.'},\n",
786
+ " {'task_id': 'cffe0e32-c9a6-4c52-9877-78ceb4aaa9fb',\n",
787
+ " 'model': 'gpt-5-mini',\n",
788
+ " 'correct': False,\n",
789
+ " 'is_solvable': False,\n",
790
+ " 'prediction': '',\n",
791
+ " 'answer': 'Fred',\n",
792
+ " 'unsolvable_reason': 'Insufficient information: the document with the employees, their likes, and assignment/gift details was not provided.'},\n",
793
+ " {'task_id': '2d83110e-a098-4ebb-9987-066c06fa42d0',\n",
794
+ " 'model': 'gpt-5-mini',\n",
795
+ " 'correct': True,\n",
796
+ " 'is_solvable': True,\n",
797
+ " 'prediction': 'right',\n",
798
+ " 'answer': 'Right',\n",
799
+ " 'unsolvable_reason': ''},\n",
800
+ " {'task_id': '5cfb274c-0207-4aa7-9575-6ac0bd95d9b2',\n",
801
+ " 'model': 'gpt-5-mini',\n",
802
+ " 'correct': False,\n",
803
+ " 'is_solvable': False,\n",
804
+ " 'prediction': '',\n",
805
+ " 'answer': 'No',\n",
806
+ " 'unsolvable_reason': 'I cannot access the attached spreadsheet or any images. Paste the grid (use G for Earl plots and . for others) or give coordinates so I can analyze the path.'},\n",
807
+ " {'task_id': '27d5d136-8563-469e-92bf-fd103c28b57c',\n",
808
+ " 'model': 'gpt-5-mini',\n",
809
+ " 'correct': True,\n",
810
+ " 'is_solvable': True,\n",
811
+ " 'prediction': '(¬A → B) ↔ (A ∨ ¬B)',\n",
812
+ " 'answer': '(¬A → B) ↔ (A ∨ ¬B)',\n",
813
+ " 'unsolvable_reason': ''},\n",
814
+ " {'task_id': 'dc28cf18-6431-458b-83ef-64b3ce566c10',\n",
815
+ " 'model': 'gpt-5-mini',\n",
816
+ " 'correct': True,\n",
817
+ " 'is_solvable': True,\n",
818
+ " 'prediction': '2',\n",
819
+ " 'answer': '2',\n",
820
+ " 'unsolvable_reason': ''},\n",
821
+ " {'task_id': 'b816bfce-3d80-4913-a07d-69b752ce6377',\n",
822
+ " 'model': 'gpt-5-mini',\n",
823
+ " 'correct': False,\n",
824
+ " 'is_solvable': False,\n",
825
+ " 'prediction': '',\n",
826
+ " 'answer': 'fluffy',\n",
827
+ " 'unsolvable_reason': \"I cannot access external sources to read Emily Midkiff's June 2014 article in Fafnir and so cannot determine the quoted word.\"},\n",
828
+ " {'task_id': '72e110e7-464c-453c-a309-90a95aed6538',\n",
829
+ " 'model': 'gpt-5-mini',\n",
830
+ " 'correct': False,\n",
831
+ " 'is_solvable': False,\n",
832
+ " 'prediction': '',\n",
833
+ " 'answer': 'Guatemala',\n",
834
+ " 'unsolvable_reason': 'I cannot browse the Bielefeld University Library BASE site or view its 2020 content to inspect the article flags. Determining which country’s flag was unique requires accessing that specific webpage or an archived snapshot, which I cannot do.'},\n",
835
+ " {'task_id': '42576abe-0deb-4869-8c63-225c2d75a95a',\n",
836
+ " 'model': 'gpt-5-mini',\n",
837
+ " 'correct': True,\n",
838
+ " 'is_solvable': True,\n",
839
+ " 'prediction': 'Maktay Mato Apple',\n",
840
+ " 'answer': 'Maktay mato apple',\n",
841
+ " 'unsolvable_reason': ''},\n",
842
+ " {'task_id': 'b415aba4-4b68-4fc6-9b89-2c812e55a3e1',\n",
843
+ " 'model': 'gpt-5-mini',\n",
844
+ " 'correct': False,\n",
845
+ " 'is_solvable': False,\n",
846
+ " 'prediction': '',\n",
847
+ " 'answer': 'diamond',\n",
848
+ " 'unsolvable_reason': 'I cannot access external web resources or the specific 2012 Scientific Reports conference proceedings to identify that article and its studied compound.'},\n",
849
+ " {'task_id': 'cca530fc-4052-43b2-b130-b30968d8aa44',\n",
850
+ " 'model': 'gpt-5-mini',\n",
851
+ " 'correct': False,\n",
852
+ " 'is_solvable': False,\n",
853
+ " 'prediction': 'image not available',\n",
854
+ " 'answer': 'Rd5',\n",
855
+ " 'unsolvable_reason': 'Image not provided or inaccessible; cannot determine board position and legal winning move'},\n",
856
+ " {'task_id': '935e2cff-ae78-4218-b3f5-115589b19dae',\n",
857
+ " 'model': 'gpt-5-mini',\n",
858
+ " 'correct': False,\n",
859
+ " 'is_solvable': False,\n",
860
+ " 'prediction': '',\n",
861
+ " 'answer': 'research',\n",
862
+ " 'unsolvable_reason': \"I cannot access or view the specific Wikipedia public logs for the Legume page from 2022; determining what 'R' stood for requires looking at those logs or contemporaneous Wikipedia discussion, which I cannot browse from here.\"},\n",
863
+ " {'task_id': '4fc2f1ae-8625-45b5-ab34-ad4433bc21f8',\n",
864
+ " 'model': 'gpt-5-mini',\n",
865
+ " 'correct': False,\n",
866
+ " 'is_solvable': False,\n",
867
+ " 'prediction': '',\n",
868
+ " 'answer': 'FunkMonk',\n",
869
+ " 'unsolvable_reason': 'I cannot access Wikipedia or external web sources to check which dinosaur article was promoted in November 2016 and who nominated it.'},\n",
870
+ " {'task_id': '5188369a-3bbe-43d8-8b94-11558f909a08',\n",
871
+ " 'model': 'gpt-5-mini',\n",
872
+ " 'correct': False,\n",
873
+ " 'is_solvable': False,\n",
874
+ " 'prediction': '',\n",
875
+ " 'answer': 'Annie Levin',\n",
876
+ " 'unsolvable_reason': 'I cannot access the Merriam-Webster Word of the Day archive or the web to verify the quoted writer for June 27 2022.'}]}"
877
+ ]
878
+ },
879
+ "execution_count": 20,
880
+ "metadata": {},
881
+ "output_type": "execute_result"
882
+ }
883
+ ],
884
+ "source": [
885
+ "results"
886
+ ]
887
+ },
888
+ {
889
+ "cell_type": "markdown",
890
+ "id": "99926f44",
891
+ "metadata": {},
892
+ "source": [
893
+ "## Tool Usage"
894
+ ]
895
+ },
896
+ {
897
+ "cell_type": "code",
898
+ "execution_count": null,
899
+ "id": "ba50100c",
900
+ "metadata": {},
901
+ "outputs": [],
902
+ "source": []
903
+ }
904
+ ],
905
+ "metadata": {
906
+ "kernelspec": {
907
+ "display_name": ".venv",
908
+ "language": "python",
909
+ "name": "python3"
910
+ },
911
+ "language_info": {
912
+ "codemirror_mode": {
913
+ "name": "ipython",
914
+ "version": 3
915
+ },
916
+ "file_extension": ".py",
917
+ "mimetype": "text/x-python",
918
+ "name": "python",
919
+ "nbconvert_exporter": "python",
920
+ "pygments_lexer": "ipython3",
921
+ "version": "3.12.11"
922
+ }
923
+ },
924
+ "nbformat": 4,
925
+ "nbformat_minor": 5
926
+ }
pyproject.toml CHANGED
@@ -6,12 +6,16 @@ readme = "README.md"
6
  requires-python = ">=3.11"
7
  dependencies = [
8
  "chromadb>=1.0.20",
 
9
  "fastmcp>=2.11.3",
 
 
10
  "mcp>=1.13.1",
11
  "openai>=1.101.0",
12
  "pydantic>=2.11.7",
13
  "python-dotenv>=1.1.1",
14
  "tavily-python>=0.7.11",
 
15
  "wikipedia>=1.4.0",
16
  ]
17
 
 
6
  requires-python = ">=3.11"
7
  dependencies = [
8
  "chromadb>=1.0.20",
9
+ "datasets>=4.5.0",
10
  "fastmcp>=2.11.3",
11
+ "ipykernel>=7.1.0",
12
+ "litellm>=1.81.3",
13
  "mcp>=1.13.1",
14
  "openai>=1.101.0",
15
  "pydantic>=2.11.7",
16
  "python-dotenv>=1.1.1",
17
  "tavily-python>=0.7.11",
18
+ "tqdm>=4.67.1",
19
  "wikipedia>=1.4.0",
20
  ]
21
 
scratch_agents/agents/__init__.py DELETED
File without changes
scratch_agents/agents/execution_context_ch4.py DELETED
@@ -1,27 +0,0 @@
1
- from dataclasses import dataclass, field
2
- from typing import List, Dict, Any, Optional
3
- from ..types.events import Event
4
- from ..types.contents import Message
5
- from pydantic import BaseModel
6
- import uuid
7
-
8
- @dataclass
9
- class ExecutionContext:
10
- """Manages the execution state of an agent throughout its lifecycle."""
11
-
12
- execution_id: str = field(default_factory=lambda: str(uuid.uuid4()))
13
-
14
- events: List[Event] = field(default_factory=list)
15
- user_input: Optional[Message] = None
16
- current_step: int = 0
17
-
18
- state: Dict[str, Any] = field(default_factory=dict)
19
-
20
- final_result: str | BaseModel = None
21
-
22
- def add_event(self, event: Event):
23
- """Add an event to the history"""
24
- self.events.append(event)
25
-
26
- def increment_step(self):
27
- self.current_step += 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scratch_agents/agents/execution_context_ch6.py DELETED
@@ -1,34 +0,0 @@
1
- from dataclasses import dataclass
2
- from ..sessions.session import Session
3
- from ..sessions.in_memory_session_manager import InMemorySessionManager
4
- from ..sessions.base_session_manager import BaseSessionManager
5
- from dataclasses import field
6
- import uuid
7
- from pydantic import BaseModel
8
- from typing import List, Dict, Any
9
- from ..types.events import Event
10
- from ..sessions.base_cross_session_manager import BaseCrossSessionManager
11
-
12
- @dataclass
13
- class ExecutionContext:
14
- session: Session
15
- session_manager: BaseSessionManager
16
- cross_session_manager: BaseCrossSessionManager
17
- execution_id: str = field(default_factory=lambda: str(uuid.uuid4()))
18
- current_step: int = 0
19
- max_steps: int = 10
20
- user_input: str = ""
21
- final_result: str | BaseModel = ""
22
-
23
- def add_event(self, event: Event) -> None:
24
- self.session_manager.add_event(self.session, event)
25
- @property
26
- def events(self) -> List[Event]:
27
- return self.session.events
28
-
29
- @property
30
- def state(self) -> Dict[str, Any]:
31
- return self.session.state
32
-
33
- def increment_step(self) -> None:
34
- self.current_step += 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scratch_agents/agents/tool_calling_agent_ch4_base.py DELETED
@@ -1,116 +0,0 @@
1
- from typing import List, Dict, Any, Optional
2
- from ..models.base_llm import BaseLlm
3
- from ..models.llm_request import LlmRequest
4
- from ..models.llm_response import LlmResponse
5
- from ..types.contents import Message, ToolCall
6
- from ..types.events import Event
7
- from .execution_context_ch4 import ExecutionContext
8
- from ..tools.base_tool import BaseTool
9
- from ..types.contents import ToolResult
10
- from typing import Type
11
- from pydantic import BaseModel
12
-
13
- class ToolCallingAgent:
14
- def __init__(self, name: str, model: BaseLlm, tools: List[BaseTool], instructions: str, max_steps: int = 10, output_type: Optional[Type[BaseModel]] = None):
15
- self.name = name
16
- self.model = model
17
- self.max_steps = max_steps
18
- self.instructions = instructions
19
- self.tools = self._setup_tools(tools)
20
-
21
- def _setup_tools(self, tools: List[BaseTool]):
22
- return {tool.name: tool for tool in tools}
23
-
24
- async def think(self, context: ExecutionContext, llm_request: LlmRequest):
25
- llm_response = await self.model.generate(llm_request)
26
- return llm_response
27
-
28
- async def act(self, context: ExecutionContext, tool_calls: List[ToolCall]):
29
- tool_results = []
30
- for tool_call in tool_calls:
31
- tool_name = tool_call.name
32
- tool_input = tool_call.arguments
33
- print(f" → Calling {tool_name} with {tool_input}")
34
- try:
35
- result_output = await self.tools[tool_name](**tool_input)
36
- tool_results.append(
37
- ToolResult(
38
- tool_call_id=tool_call.tool_call_id,
39
- name=tool_call.name,
40
- status="success",
41
- content=str(result_output),
42
- )
43
- )
44
- except Exception as e:
45
- tool_results.append(
46
- ToolResult(
47
- tool_call_id=tool_call.tool_call_id,
48
- name=tool_call.name,
49
- status="error",
50
- content=str(e),
51
- )
52
- )
53
- return tool_results
54
-
55
- async def step(self, context: ExecutionContext):
56
- print(f"[Step {context.current_step + 1}]")
57
- llm_request = self._prepare_llm_request(context)
58
- llm_response = await self.think(context, llm_request)
59
- response_event = Event(
60
- execution_id=context.execution_id,
61
- author=self.name,
62
- **llm_response.model_dump(),
63
- )
64
- context.add_event(response_event)
65
-
66
- if tool_calls := response_event.get_tool_calls():
67
- tool_results = await self.act(context, tool_calls)
68
- tool_results_event = Event(
69
- execution_id=context.execution_id,
70
- author=self.name,
71
- content=tool_results,
72
- )
73
- context.add_event(tool_results_event)
74
-
75
- context.increment_step()
76
-
77
- async def run(self, user_input: str):
78
- context = ExecutionContext(
79
- user_input=user_input,
80
- )
81
- user_input_event = Event(
82
- execution_id=context.execution_id,
83
- author="user",
84
- content=[
85
- Message(
86
- role="user",
87
- content=user_input,
88
- )
89
- ],
90
- )
91
- context.add_event(user_input_event)
92
-
93
- while not context.final_result and context.current_step < self.max_steps:
94
- await self.step(context)
95
-
96
- last_event = context.events[-1]
97
- if last_event.is_final_response():
98
- context.final_result = self._extract_final_result(last_event)
99
-
100
- return context.final_result
101
-
102
- def _prepare_llm_request(self, context: ExecutionContext):
103
- flat_contents = []
104
- for event in context.events:
105
- flat_contents.extend(event.content)
106
- return LlmRequest(
107
- instructions=[self.instructions] if self.instructions else [],
108
- contents=flat_contents,
109
- tools_dict=self.tools,
110
- tool_choice="auto" if self.tools else None
111
- )
112
-
113
- def _extract_final_result(self, event: Event):
114
- for item in event.content:
115
- if isinstance(item, Message) and item.role == "assistant":
116
- return item.content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scratch_agents/agents/tool_calling_agent_ch4_callback.py DELETED
@@ -1,203 +0,0 @@
1
- from typing import List, Dict, Any, Optional
2
- from ..models.base_llm import BaseLlm
3
- from ..models.llm_request import LlmRequest
4
- from ..models.llm_response import LlmResponse
5
- from ..types.contents import Message, ToolCall
6
- from ..types.events import Event
7
- from .execution_context_ch4 import ExecutionContext
8
- from ..tools.base_tool import BaseTool
9
- from ..types.contents import ToolResult
10
- from typing import Type
11
- from pydantic import BaseModel
12
- from ..tools.decorator import tool
13
- import inspect
14
-
15
- class ToolCallingAgent:
16
- def __init__(self, name: str, model: BaseLlm,
17
- tools: List[BaseTool],
18
- instructions: str,
19
- max_steps: int = 10,
20
- output_type: Optional[Type[BaseModel]] = None,
21
- before_llm_callbacks = None,
22
- after_llm_callbacks = None,
23
- before_tool_callbacks = None,
24
- after_tool_callbacks = None,
25
- after_run_callbacks = None):
26
- self.name = name
27
- self.model = model
28
- self.max_steps = max_steps
29
- self.instructions = instructions
30
- self.output_type: Optional[Type[BaseModel]] = output_type
31
- self.output_tool: Optional[str] = None
32
- self.tools = self._setup_tools(tools)
33
- self.before_llm_callbacks = before_llm_callbacks or []
34
- self.after_llm_callbacks = after_llm_callbacks or []
35
- self.before_tool_callbacks = before_tool_callbacks or []
36
- self.after_tool_callbacks = after_tool_callbacks or []
37
- self.after_run_callbacks = after_run_callbacks or []
38
-
39
-
40
- def _setup_tools(self, tools: List[BaseTool]):
41
- if self.output_type is not None:
42
- @tool(name="final_answer", description="Return the final structured answer matching the required schema.")
43
- def final_answer(output: self.output_type) -> self.output_type:
44
- return output
45
- tools.append(final_answer)
46
- self.output_tool = final_answer.name
47
- return {t.name: t for t in tools}
48
-
49
- async def think(self, context: ExecutionContext, llm_request: LlmRequest):
50
- for callback in self.before_llm_callbacks:
51
- result = callback(context, llm_request)
52
- if inspect.isawaitable(result):
53
- result = await result
54
- if result is not None:
55
- return result
56
-
57
- llm_response = await self.model.generate(llm_request)
58
-
59
- for callback in self.after_llm_callbacks:
60
- result = callback(context, llm_response)
61
- if inspect.isawaitable(result):
62
- result = await result
63
- if result is not None:
64
- return result
65
-
66
- return llm_response
67
-
68
-
69
- async def act(self, context: ExecutionContext, tool_calls: List[ToolCall]):
70
- tool_results = []
71
- for tool_call in tool_calls:
72
- tool_name = tool_call.name
73
- tool_input = tool_call.arguments
74
- print(f" → Calling {tool_name} with {tool_input}")
75
-
76
- # Step 1: before_tool_callbacks - can skip tool execution
77
- tool_response = None
78
- for callback in self.before_tool_callbacks:
79
- result = callback(context, tool_call)
80
- if inspect.isawaitable(result):
81
- result = await result
82
- if result is not None:
83
- tool_response = result
84
- break
85
-
86
- # Step 2: Execute tool if no callback provided result
87
- status = "success"
88
- if tool_response is None:
89
- try:
90
- tool_response = await self.tools[tool_name](**tool_input)
91
- except Exception as e:
92
- tool_response = str(e)
93
- status = "error"
94
-
95
- # Step 3: after_tool_callbacks - only after actual tool execution
96
- for callback in self.after_tool_callbacks:
97
- result = callback(context, tool_response)
98
- if inspect.isawaitable(result):
99
- result = await result
100
- if result is not None:
101
- tool_response = result
102
- break
103
-
104
- # Step 4: Wrap in ToolResult at the end
105
- if tool_response is not None:
106
- tool_result = ToolResult(
107
- tool_call_id=tool_call.tool_call_id,
108
- name=tool_call.name,
109
- status=status,
110
- content=str(tool_response),
111
- )
112
- tool_results.append(tool_result)
113
-
114
- return tool_results
115
-
116
- async def step(self, context: ExecutionContext):
117
- print(f"[Step {context.current_step + 1}]")
118
- llm_request = self._prepare_llm_request(context)
119
- llm_response = await self.think(context, llm_request)
120
- if llm_response.error_message:
121
- raise RuntimeError(f"LLM error: {llm_response.error_message}")
122
- response_event = Event(
123
- execution_id=context.execution_id,
124
- author=self.name,
125
- required_output_tool=self.output_tool or None,
126
- **llm_response.model_dump(),
127
- )
128
- context.add_event(response_event)
129
-
130
- if tool_calls := response_event.get_tool_calls():
131
- tool_results = await self.act(context, tool_calls)
132
- tool_results_event = Event(
133
- execution_id=context.execution_id,
134
- author=self.name,
135
- required_output_tool=self.output_tool or None,
136
- content=tool_results,
137
- )
138
- context.add_event(tool_results_event)
139
-
140
- context.increment_step()
141
-
142
- async def run(self, user_input: str):
143
- context = ExecutionContext(
144
- user_input=user_input,
145
- )
146
- user_input_event = Event(
147
- execution_id=context.execution_id,
148
- author="user",
149
- content=[
150
- Message(
151
- role="user",
152
- content=user_input,
153
- )
154
- ],
155
- )
156
- context.add_event(user_input_event)
157
-
158
- while not context.final_result and context.current_step < self.max_steps:
159
- await self.step(context)
160
-
161
- last_event = context.events[-1]
162
- if last_event.is_final_response():
163
- context.final_result = self._extract_final_result(last_event)
164
-
165
- for callback in self.after_run_callbacks:
166
- result = callback(context)
167
- if inspect.isawaitable(result):
168
- await result
169
-
170
- return context.final_result
171
-
172
- def _prepare_llm_request(self, context: ExecutionContext):
173
- flat_contents = []
174
- for event in context.events:
175
- flat_contents.extend(event.content)
176
-
177
- if self.output_tool:
178
- tool_choice = "required"
179
- elif self.tools:
180
- tool_choice = "auto"
181
- else:
182
- tool_choice = None
183
-
184
- return LlmRequest(
185
- instructions=[self.instructions] if self.instructions else [],
186
- contents=flat_contents,
187
- tools_dict=self.tools,
188
- tool_choice=tool_choice
189
- )
190
-
191
- def _extract_final_result(self, event: Event):
192
- if event.required_output_tool:
193
- for item in event.content:
194
- if (
195
- isinstance(item, ToolResult)
196
- and item.status == "success"
197
- and item.name == event.required_output_tool
198
- and item.content
199
- ):
200
- return item.content[0]
201
- for item in event.content:
202
- if isinstance(item, Message) and item.role == "assistant":
203
- return item.content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scratch_agents/agents/tool_calling_agent_ch4_structured_output.py DELETED
@@ -1,146 +0,0 @@
1
- from typing import List, Dict, Any, Optional
2
- from ..models.base_llm import BaseLlm
3
- from ..models.llm_request import LlmRequest
4
- from ..models.llm_response import LlmResponse
5
- from ..types.contents import Message, ToolCall
6
- from ..types.events import Event
7
- from .execution_context_ch4 import ExecutionContext
8
- from ..tools.base_tool import BaseTool
9
- from ..types.contents import ToolResult
10
- from typing import Type
11
- from pydantic import BaseModel
12
- from ..tools.decorator import tool
13
-
14
- class ToolCallingAgent:
15
- def __init__(self, name: str, model: BaseLlm, tools: List[BaseTool], instructions: str, max_steps: int = 10, output_type: Optional[Type[BaseModel]] = None):
16
- self.name = name
17
- self.model = model
18
- self.max_steps = max_steps
19
- self.instructions = instructions
20
- self.output_type: Optional[Type[BaseModel]] = output_type
21
- self.output_tool: Optional[str] = None
22
- self.tools = self._setup_tools(tools)
23
-
24
- def _setup_tools(self, tools: List[BaseTool]):
25
- if self.output_type is not None:
26
- @tool(name="final_answer", description="Return the final structured answer matching the required schema.")
27
- def final_answer(output: self.output_type) -> self.output_type:
28
- return output
29
- tools.append(final_answer)
30
- self.output_tool = final_answer.name
31
- return {t.name: t for t in tools}
32
-
33
- async def think(self, context: ExecutionContext, llm_request: LlmRequest):
34
- llm_response = await self.model.generate(llm_request)
35
- return llm_response
36
-
37
- async def act(self, context: ExecutionContext, tool_calls: List[ToolCall]):
38
- tool_results = []
39
- for tool_call in tool_calls:
40
- tool_name = tool_call.name
41
- tool_input = tool_call.arguments
42
- print(f" → Calling {tool_name} with {tool_input}")
43
- try:
44
- result_output = await self.tools[tool_name](**tool_input)
45
- tool_results.append(
46
- ToolResult(
47
- tool_call_id=tool_call.tool_call_id,
48
- name=tool_call.name,
49
- status="success",
50
- content=str(result_output),
51
- )
52
- )
53
- except Exception as e:
54
- tool_results.append(
55
- ToolResult(
56
- tool_call_id=tool_call.tool_call_id,
57
- name=tool_call.name,
58
- status="error",
59
- content=str(e),
60
- )
61
- )
62
- return tool_results
63
-
64
- async def step(self, context: ExecutionContext):
65
- print(f"[Step {context.current_step + 1}]")
66
- llm_request = self._prepare_llm_request(context)
67
- llm_response = await self.think(context, llm_request)
68
- if llm_response.error_message:
69
- raise RuntimeError(f"LLM error: {llm_response.error_message}")
70
- response_event = Event(
71
- execution_id=context.execution_id,
72
- author=self.name,
73
- required_output_tool=self.output_tool or None,
74
- **llm_response.model_dump(),
75
- )
76
- context.add_event(response_event)
77
-
78
- if tool_calls := response_event.get_tool_calls():
79
- tool_results = await self.act(context, tool_calls)
80
- tool_results_event = Event(
81
- execution_id=context.execution_id,
82
- author=self.name,
83
- required_output_tool=self.output_tool or None,
84
- content=tool_results,
85
- )
86
- context.add_event(tool_results_event)
87
-
88
- context.increment_step()
89
-
90
- async def run(self, user_input: str):
91
- context = ExecutionContext(
92
- user_input=user_input,
93
- )
94
- user_input_event = Event(
95
- execution_id=context.execution_id,
96
- author="user",
97
- content=[
98
- Message(
99
- role="user",
100
- content=user_input,
101
- )
102
- ],
103
- )
104
- context.add_event(user_input_event)
105
-
106
- while not context.final_result and context.current_step < self.max_steps:
107
- await self.step(context)
108
-
109
- last_event = context.events[-1]
110
- if last_event.is_final_response():
111
- context.final_result = self._extract_final_result(last_event)
112
-
113
- return context.final_result
114
-
115
- def _prepare_llm_request(self, context: ExecutionContext):
116
- flat_contents = []
117
- for event in context.events:
118
- flat_contents.extend(event.content)
119
-
120
- if self.output_tool:
121
- tool_choice = "required"
122
- elif self.tools:
123
- tool_choice = "auto"
124
- else:
125
- tool_choice = None
126
-
127
- return LlmRequest(
128
- instructions=[self.instructions] if self.instructions else [],
129
- contents=flat_contents,
130
- tools_dict=self.tools,
131
- tool_choice=tool_choice
132
- )
133
-
134
- def _extract_final_result(self, event: Event):
135
- if event.required_output_tool:
136
- for item in event.content:
137
- if (
138
- isinstance(item, ToolResult)
139
- and item.status == "success"
140
- and item.name == event.required_output_tool
141
- and item.content
142
- ):
143
- return item.content[0]
144
- for item in event.content:
145
- if isinstance(item, Message) and item.role == "assistant":
146
- return item.content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scratch_agents/agents/tool_calling_agent_ch6.py DELETED
@@ -1,226 +0,0 @@
1
- from typing import List, Dict, Any, Optional
2
- from ..models.base_llm import BaseLlm
3
- from ..models.llm_request import LlmRequest
4
- from ..models.llm_response import LlmResponse
5
- from ..types.contents import Message, ToolCall
6
- from ..types.events import Event
7
- from .execution_context_ch6 import ExecutionContext
8
- from ..tools.base_tool import BaseTool
9
- from ..types.contents import ToolResult
10
- from typing import Type
11
- from pydantic import BaseModel
12
- from ..tools.decorator import tool
13
- import inspect
14
- from ..sessions.base_session_manager import BaseSessionManager
15
- from ..sessions.in_memory_session_manager import InMemorySessionManager
16
- from ..sessions.base_cross_session_manager import BaseCrossSessionManager
17
-
18
- class ToolCallingAgent:
19
- def __init__(self, name: str, model: BaseLlm,
20
- tools: List[BaseTool] = [],
21
- instructions: str = "",
22
- max_steps: int = 10,
23
- output_type: Optional[Type[BaseModel]] = None,
24
- before_llm_callbacks = None,
25
- after_llm_callbacks = None,
26
- before_tool_callbacks = None,
27
- after_tool_callbacks = None,
28
- after_run_callbacks = None,
29
- session_manager: BaseSessionManager = None,
30
- cross_session_manager: BaseCrossSessionManager = None):
31
- self.name = name
32
- self.model = model
33
- self.max_steps = max_steps
34
- self.instructions = instructions
35
- self.output_type: Optional[Type[BaseModel]] = output_type
36
- self.output_tool: Optional[str] = None
37
- self.tools = self._setup_tools(tools)
38
- self.before_llm_callbacks = before_llm_callbacks or []
39
- self.after_llm_callbacks = after_llm_callbacks or []
40
- self.before_tool_callbacks = before_tool_callbacks or []
41
- self.after_tool_callbacks = after_tool_callbacks or []
42
- self.after_run_callbacks = after_run_callbacks or []
43
- self.session_manager = session_manager or InMemorySessionManager()
44
- self.cross_session_manager = cross_session_manager
45
-
46
- def _setup_tools(self, tools: List[BaseTool]):
47
- if self.output_type is not None:
48
- @tool(name="final_answer", description="Return the final structured answer matching the required schema.")
49
- def final_answer(output: self.output_type) -> self.output_type:
50
- return output
51
- tools.append(final_answer)
52
- self.output_tool = final_answer.name
53
- return {t.name: t for t in tools}
54
-
55
- async def think(self, context: ExecutionContext, llm_request: LlmRequest):
56
- for callback in self.before_llm_callbacks:
57
- result = callback(context, llm_request)
58
- if inspect.isawaitable(result):
59
- result = await result
60
- if result is not None:
61
- return result
62
-
63
- llm_response = await self.model.generate(llm_request)
64
-
65
- for callback in self.after_llm_callbacks:
66
- result = callback(context, llm_response)
67
- if inspect.isawaitable(result):
68
- result = await result
69
- if result is not None:
70
- return result
71
-
72
- return llm_response
73
-
74
- async def _execute_tool(self, context: ExecutionContext, tool_name: str, tool_input: dict) -> Any:
75
- """Execute a tool with context injection if needed"""
76
- tool = self.tools[tool_name]
77
-
78
- # All tools now handle context properly in their execute method
79
- return await tool.execute(context, **tool_input)
80
-
81
- async def act(self, context: ExecutionContext, tool_calls: List[ToolCall]):
82
- tool_results = []
83
- for tool_call in tool_calls:
84
- tool_name = tool_call.name
85
- tool_input = tool_call.arguments
86
- print(f" → Calling {tool_name} with {tool_input}")
87
-
88
- # Step 1: before_tool_callbacks - can skip tool execution
89
- tool_response = None
90
- for callback in self.before_tool_callbacks:
91
- result = callback(context, tool_call)
92
- if inspect.isawaitable(result):
93
- result = await result
94
- if result is not None:
95
- tool_response = result
96
- break
97
-
98
- # Step 2: Execute tool if no callback provided result
99
- status = "success"
100
- if tool_response is None:
101
- try:
102
- tool_response = await self._execute_tool(context, tool_name, tool_input)
103
- except Exception as e:
104
- tool_response = str(e)
105
- status = "error"
106
-
107
- # Step 3: after_tool_callbacks - only after actual tool execution
108
- for callback in self.after_tool_callbacks:
109
- result = callback(context, tool_response)
110
- if inspect.isawaitable(result):
111
- result = await result
112
- if result is not None:
113
- tool_response = result
114
- break
115
-
116
- # Step 4: Wrap in ToolResult at the end
117
- if tool_response is not None:
118
- tool_result = ToolResult(
119
- tool_call_id=tool_call.tool_call_id,
120
- name=tool_call.name,
121
- status=status,
122
- content=str(tool_response),
123
- )
124
- tool_results.append(tool_result)
125
-
126
- return tool_results
127
-
128
- async def step(self, context: ExecutionContext):
129
- print(f"[Step {context.current_step + 1}]")
130
- llm_request = await self._prepare_llm_request(context)
131
- llm_response = await self.think(context, llm_request)
132
- if llm_response.error_message:
133
- raise RuntimeError(f"LLM error: {llm_response.error_message}")
134
- response_event = Event(
135
- execution_id=context.execution_id,
136
- author=self.name,
137
- required_output_tool=self.output_tool or None,
138
- **llm_response.model_dump(),
139
- )
140
- context.add_event(response_event)
141
-
142
- if tool_calls := response_event.get_tool_calls():
143
- tool_results = await self.act(context, tool_calls)
144
- tool_results_event = Event(
145
- execution_id=context.execution_id,
146
- author=self.name,
147
- required_output_tool=self.output_tool or None,
148
- content=tool_results,
149
- )
150
- context.add_event(tool_results_event)
151
-
152
- context.increment_step()
153
-
154
- async def run(self, user_input: str,
155
- user_id: str = None,
156
- session_id: str = None):
157
- session = self.session_manager.get_or_create_session(session_id, user_id)
158
- context = ExecutionContext(
159
- user_input=user_input,
160
- session=session,
161
- session_manager=self.session_manager,
162
- cross_session_manager=self.cross_session_manager,
163
- )
164
-
165
- user_input_event = Event(
166
- execution_id=context.execution_id,
167
- author="user",
168
- content=[
169
- Message(
170
- role="user",
171
- content=user_input,
172
- )
173
- ],
174
- )
175
- context.add_event(user_input_event)
176
-
177
- while not context.final_result and context.current_step < self.max_steps:
178
- await self.step(context)
179
-
180
- last_event = context.events[-1]
181
- if last_event.is_final_response():
182
- context.final_result = self._extract_final_result(last_event)
183
-
184
- for callback in self.after_run_callbacks:
185
- result = callback(context)
186
- if inspect.isawaitable(result):
187
- await result
188
-
189
- return context.final_result
190
-
191
- async def _prepare_llm_request(self, context: ExecutionContext):
192
- flat_contents = []
193
- for event in context.events:
194
- flat_contents.extend(event.content)
195
-
196
- llm_request = LlmRequest(
197
- instructions=[self.instructions] if self.instructions else [],
198
- contents=flat_contents,
199
- tools_dict={tool.name:tool for tool in self.tools.values() if tool.tool_definition},
200
- )
201
-
202
- for tool in self.tools.values():
203
- await tool.process_llm_request(llm_request, context)
204
-
205
- if self.output_tool:
206
- llm_request.tool_choice = "required"
207
- elif llm_request.tools_dict:
208
- llm_request.tool_choice = "auto"
209
- else:
210
- llm_request.tool_choice = None
211
-
212
- return llm_request
213
-
214
- def _extract_final_result(self, event: Event):
215
- if event.required_output_tool:
216
- for item in event.content:
217
- if (
218
- isinstance(item, ToolResult)
219
- and item.status == "success"
220
- and item.name == event.required_output_tool
221
- and item.content
222
- ):
223
- return item.content[0]
224
- for item in event.content:
225
- if isinstance(item, Message) and item.role == "assistant":
226
- return item.content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scratch_agents/memory/base_memory_strategy.py DELETED
@@ -1,13 +0,0 @@
1
- from abc import ABC, abstractmethod
2
-
3
- class MemoryStrategy(ABC):
4
- """Base class for memory management strategies"""
5
-
6
- @abstractmethod
7
- async def apply(self, context, llm_request): #A
8
- """Apply memory management strategy to the request"""
9
- pass
10
-
11
- async def __call__(self, context, llm_request): #B
12
- """Make strategy callable as a before_llm_callback"""
13
- return await self.apply(context, llm_request)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scratch_agents/memory/core_memory_strategy.py DELETED
@@ -1,21 +0,0 @@
1
- from .base_memory_strategy import MemoryStrategy
2
-
3
-
4
- class CoreMemoryStrategy(MemoryStrategy):
5
- """Automatically add core memory to LLM context"""
6
-
7
- async def apply(self, context, llm_request):
8
- """Add core memory as instructions if it exists"""
9
- core_memory = context.session.core_memory
10
-
11
- memory_parts = []
12
- if core_memory.get("agent"):
13
- memory_parts.append(f"[Your Persona]\n{core_memory['agent']}")
14
- if core_memory.get("user"):
15
- memory_parts.append(f"[User Info]\n{core_memory['user']}")
16
-
17
- if memory_parts:
18
- memory_text = "\n\n".join(memory_parts)
19
- llm_request.add_instructions([memory_text])
20
-
21
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scratch_agents/memory/sliding_window_strategy.py DELETED
@@ -1,26 +0,0 @@
1
- from .base_memory_strategy import MemoryStrategy
2
- from ..models.llm_request import LlmRequest
3
- from ..agents.execution_context_ch6 import ExecutionContext
4
-
5
-
6
- class SlidingWindowStrategy(MemoryStrategy):
7
- """Keep only the most recent N messages in context"""
8
-
9
- def __init__(self, max_messages: int = 20):
10
- self.max_messages = max_messages
11
-
12
- async def apply(self, context: ExecutionContext, llm_request: LlmRequest):
13
- """Apply sliding window to conversation history"""
14
- contents = llm_request.contents
15
-
16
- if len(contents) <= self.max_messages:
17
- return None
18
-
19
- # Keep only recent messages
20
- recent_contents = contents[-self.max_messages:]
21
- llm_request.contents = recent_contents
22
-
23
- print(f"Trimmed messages")
24
- print(f"from {len(contents)} to {self.max_messages}")
25
-
26
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scratch_agents/memory/summarization_strategy.py DELETED
@@ -1,77 +0,0 @@
1
- from .base_memory_strategy import MemoryStrategy
2
- from ..models.llm_request import LlmRequest
3
- from ..types.contents import Message
4
-
5
- class SummarizationStrategy(MemoryStrategy):
6
- """Summarize old messages to preserve information while reducing tokens"""
7
-
8
- def __init__(self, model, trigger_count: int = 10, keep_recent: int = 3):
9
- self.model = model
10
- self.trigger_count = trigger_count #A
11
- self.keep_recent = keep_recent #B
12
-
13
- async def _generate_summary(self, messages_text: str):
14
- request = LlmRequest(
15
- instructions=[ #A
16
- "Summarize the following conversation concisely.", #A
17
- "Preserve key facts, decisions, and important context.", #A
18
- "Keep the summary under 200 words." #A
19
- ],
20
- contents=[Message(role="user", content=messages_text)] #B
21
- )
22
-
23
- response = await self.model.generate(request) #C
24
-
25
- for item in response.content: #D
26
- if isinstance(item, Message) and item.role == "assistant": #D
27
- return item.content #D
28
-
29
- return "Summary generation failed" #E
30
-
31
- async def apply(self, context, llm_request):
32
- """Apply summarization when new messages since last summary exceed threshold"""
33
- contents = llm_request.contents
34
-
35
- messages_only = [item for item in contents if isinstance(item, Message)] #A
36
- last_summarized = context.state.get("last_summarized_index", 0)
37
-
38
- total_messages = len(messages_only) #B
39
- new_messages_count = total_messages - last_summarized #B
40
-
41
- if new_messages_count < self.trigger_count:
42
- return None
43
-
44
- summarize_until = total_messages - self.keep_recent #C
45
- to_summarize = messages_only[last_summarized:summarize_until] #C
46
- to_keep = contents[-self.keep_recent:] if len(contents) >= self.keep_recent else contents #C
47
-
48
- if not to_summarize:
49
- return None
50
-
51
- existing_summary = context.state.get("conversation_summary")
52
-
53
- summary_input = [] #D
54
- if existing_summary: #D
55
- summary_input.append(f"Previous Summary:\n{existing_summary}\n") #D
56
-
57
- summary_input.append("New Messages to Summarize:\n") #D
58
- for msg in to_summarize: #D
59
- summary_input.append(f"{msg.role}: {msg.content}") #D
60
-
61
- messages_text = "\n".join(summary_input) #D
62
-
63
- new_summary = await self._generate_summary(messages_text) #E
64
-
65
- context.state["conversation_summary"] = new_summary
66
- context.state["last_summarized_index"] = summarize_until
67
-
68
- if new_summary:
69
- summary_instruction = f"[Previous Conversation Summary]\n{new_summary}"
70
- llm_request.add_instructions([summary_instruction]) #F
71
-
72
- llm_request.contents = to_keep #G
73
-
74
- print(f"Compressed {len(to_summarize)} messages")
75
- print(f"Keeping {len(to_keep)} recent items")
76
-
77
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scratch_agents/models/__init__.py DELETED
File without changes
scratch_agents/models/base_llm.py DELETED
@@ -1,12 +0,0 @@
1
- from abc import abstractmethod
2
- from pydantic import BaseModel
3
- from .llm_request import LlmRequest
4
-
5
- class BaseLlm(BaseModel):
6
- """Abstract base class for LLM implementations"""
7
-
8
- model: str
9
-
10
- @abstractmethod
11
- async def generate(self, request: LlmRequest):
12
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
scratch_agents/models/llm_request.py DELETED
@@ -1,19 +0,0 @@
1
- from typing import Any, Dict, List, Optional
2
- from pydantic import BaseModel, Field
3
- from ..types.contents import ContentItem
4
-
5
-
6
- class LlmRequest(BaseModel):
7
- """Request object for LLM calls"""
8
- instructions: List[str] = Field(default_factory=list)
9
- contents: List[ContentItem] = Field(default_factory=list)
10
- tools_dict: Dict[str, Any] = Field(default_factory=dict)
11
- tool_choice: Optional[str] = None
12
-
13
- def add_instructions(self, instructions: List[str] | str):
14
- """Add instructions to the request"""
15
- if isinstance(instructions, str):
16
- self.instructions.append(instructions)
17
- else:
18
- self.instructions.extend(instructions)
19
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scratch_agents/models/llm_response.py DELETED
@@ -1,10 +0,0 @@
1
- from typing import Any, Dict, List, Optional
2
- from pydantic import BaseModel, Field
3
- from ..types.contents import ContentItem
4
-
5
-
6
- class LlmResponse(BaseModel):
7
- """Response object from LLM calls"""
8
- content: List[ContentItem] = Field(default_factory=list)
9
- error_message: Optional[str] = None
10
- usage_metadata: Dict[str, Any] = Field(default_factory=dict)
 
 
 
 
 
 
 
 
 
 
 
scratch_agents/models/openai.py DELETED
@@ -1,174 +0,0 @@
1
- from openai import AsyncOpenAI
2
- from .base_llm import BaseLlm
3
- from .llm_request import LlmRequest
4
- from .llm_response import LlmResponse
5
- from ..types.contents import Message, ToolCall, ToolResult
6
- import json
7
- from pydantic import Field, BaseModel
8
- from typing import Dict, Any, List
9
-
10
- class OpenAILlm(BaseLlm):
11
- """OpenAI LLM implementation"""
12
-
13
- llm_config: dict = Field(default_factory=dict)
14
-
15
- def __init__(self, model, **kwargs):
16
- super().__init__(model=model)
17
- self.llm_config = kwargs
18
- self._client = None
19
-
20
- @property
21
- def openai_client(self):
22
- if self._client is None:
23
- self._client = AsyncOpenAI()
24
- return self._client
25
-
26
- async def generate(self, request: LlmRequest) -> LlmResponse:
27
- """Generate a response using OpenAI API"""
28
- try:
29
- # Build messages for OpenAI API
30
- messages, model_params = self._build_llm_input(request, self.llm_config)
31
-
32
- # Convert tools_dict to tools array for OpenAI
33
- tools = None
34
- if request.tools_dict:
35
- tools = [tool.tool_definition for tool in request.tools_dict.values()]
36
- # Call OpenAI API
37
- call_kwargs = {}
38
- if request.tool_choice is not None:
39
- call_kwargs["tool_choice"] = request.tool_choice
40
- response = await self.openai_client.chat.completions.create(
41
- model=self.model,
42
- messages=messages,
43
- tools=tools,
44
- **call_kwargs,
45
- **model_params
46
- )
47
-
48
- # Extract message from response
49
- choice = response.choices[0]
50
- content_items = []
51
-
52
- # Handle text content
53
- if choice.message.content:
54
- content_items.append(Message(role="assistant", content=choice.message.content))
55
-
56
- # Handle tool calls
57
- if choice.message.tool_calls:
58
- for tool_call in choice.message.tool_calls:
59
- content_items.append(ToolCall(
60
- tool_call_id=tool_call.id,
61
- name=tool_call.function.name,
62
- arguments=json.loads(tool_call.function.arguments)
63
- ))
64
-
65
- # Extract usage metadata
66
- usage_metadata = {
67
- "input_tokens": response.usage.prompt_tokens,
68
- "output_tokens": response.usage.completion_tokens,
69
- "total_tokens": response.usage.total_tokens
70
- }
71
-
72
- return LlmResponse(
73
- content=content_items,
74
- usage_metadata=usage_metadata
75
- )
76
- except Exception as e:
77
- return LlmResponse(
78
- error_message=str(e)
79
- )
80
-
81
- def _build_llm_input(self, request: LlmRequest, model_config: dict):
82
- """Build messages and parameters for OpenAI API"""
83
- messages = []
84
-
85
- # Add instructions as system messages
86
- for instruction in request.instructions:
87
- messages.append({"role": "system", "content": instruction})
88
-
89
- # Add conversation history
90
- # Group assistant messages and their tool calls together
91
- pending_assistant_content = None
92
- pending_tool_calls = []
93
-
94
- def flush_assistant_message():
95
- """Flush any pending assistant message with its tool calls"""
96
- if pending_assistant_content is not None or pending_tool_calls:
97
- msg = {"role": "assistant"}
98
- if pending_assistant_content is not None:
99
- msg["content"] = pending_assistant_content
100
- else:
101
- msg["content"] = None
102
- if pending_tool_calls:
103
- msg["tool_calls"] = pending_tool_calls
104
- messages.append(msg)
105
- return True
106
- return False
107
-
108
- for item in request.contents:
109
- if isinstance(item, Message):
110
- if item.role == "assistant":
111
- # Accumulate assistant content
112
- pending_assistant_content = item.content
113
- else:
114
- # Non-assistant message, flush any pending assistant message
115
- flush_assistant_message()
116
- pending_assistant_content = None
117
- pending_tool_calls = []
118
- messages.append({"role": item.role, "content": item.content})
119
-
120
- elif isinstance(item, ToolCall):
121
- # Accumulate tool calls with the assistant message
122
- pending_tool_calls.append({
123
- "id": item.tool_call_id,
124
- "type": "function",
125
- "function": {
126
- "name": item.name,
127
- "arguments": json.dumps(item.arguments)
128
- }
129
- })
130
-
131
- elif isinstance(item, ToolResult):
132
- # Tool result means we need to flush any pending assistant message
133
- flush_assistant_message()
134
- pending_assistant_content = None
135
- pending_tool_calls = []
136
-
137
- messages.append({
138
- "role": "tool",
139
- "tool_call_id": item.tool_call_id,
140
- "content": str(item.content) if item.content else ""
141
- })
142
-
143
- # Flush any remaining assistant message
144
- flush_assistant_message()
145
-
146
- # Extract model parameters
147
- model_params = {**self.llm_config}
148
-
149
- return messages, model_params
150
-
151
- async def generate_structured(self, messages: List[Dict[str, Any]], response_format: BaseModel):
152
- """Generate structured output using OpenAI's response_format"""
153
- try:
154
- response = await self.openai_client.chat.completions.parse(
155
- model=self.model,
156
- messages=messages,
157
- response_format=response_format,
158
- **self.llm_config
159
- )
160
-
161
- return response.choices[0].message.parsed
162
- except Exception as e:
163
- return {"error": str(e)}
164
-
165
- async def embed(self, model, texts: List[str]) -> List[List[float]]:
166
- """Get embeddings using OpenAI API"""
167
- try:
168
- response = await self.openai_client.embeddings.create(
169
- model=model,
170
- input=texts
171
- )
172
- return [embedding.embedding for embedding in response.data]
173
- except Exception as e:
174
- return {"error": str(e)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scratch_agents/sessions/base_cross_session_manager.py DELETED
@@ -1,297 +0,0 @@
1
- """Base class for cross-session memory management."""
2
-
3
- from abc import ABC, abstractmethod
4
- from typing import List, Dict, Optional, Any
5
- import chromadb
6
- from chromadb.utils import embedding_functions
7
- from chromadb.config import Settings
8
- from datetime import datetime
9
- import logging
10
- import os
11
- import uuid
12
- from .session import Session
13
-
14
- logger = logging.getLogger(__name__)
15
-
16
-
17
- class BaseCrossSessionManager(ABC):
18
- """Abstract base class for cross-session memory management."""
19
-
20
- def __init__(
21
- self,
22
- model,
23
- collection_name: str,
24
- persist_directory: str = "./cross_session_db",
25
- embedding_model: str = "text-embedding-3-small"
26
- ):
27
- """Initialize the base cross-session manager.
28
-
29
- Args:
30
- model: LLM model for memory processing
31
- collection_name: Name of the ChromaDB collection
32
- persist_directory: Directory to persist ChromaDB data
33
- embedding_model: Optional custom embedding model
34
- """
35
- self.model = model
36
- self.collection_name = collection_name
37
- self.persist_directory = persist_directory
38
- self.embedding_model = embedding_model
39
-
40
- self.client = chromadb.PersistentClient(
41
- path=persist_directory,
42
- )
43
- embedding_function = embedding_functions.OpenAIEmbeddingFunction(
44
- api_key=os.getenv("OPENAI_API_KEY"),
45
- model_name=self.embedding_model
46
- )
47
-
48
- # Get or create collection
49
- try:
50
- self.collection = self.client.get_or_create_collection(
51
- name=collection_name,
52
- metadata={"hnsw:space": "cosine"},
53
- embedding_function=embedding_function
54
- )
55
- logger.info(f"Using existing collection: {collection_name}")
56
- except Exception:
57
- logger.error(f"Error getting or creating collection: {collection_name}")
58
- raise
59
-
60
- @abstractmethod
61
- async def extract_memories(
62
- self,
63
- events: List[Dict[str, Any]],
64
- ) -> List[str]:
65
- """Extract memories from session events.
66
-
67
- Args:
68
- events: List of session events
69
- user_id: User identifier
70
-
71
- Returns:
72
- List of extracted memory strings
73
- """
74
- pass
75
-
76
- async def process_session(
77
- self,
78
- session: Session,
79
- execution_id: str
80
- ) -> None:
81
- """Process a completed session and extract/merge memories.
82
-
83
- Args:
84
- session: Session data containing events
85
- execution_id: Unique execution identifier
86
- """
87
- try:
88
- user_id = session.user_id
89
- events = session.events
90
-
91
- events = [event for event in events if event.execution_id == execution_id]
92
-
93
- memories = await self.extract_memories(events)
94
-
95
- if memories:
96
- existing = await self.find_existing(memories, user_id)
97
- actions = await self.decide_actions(memories, existing, user_id)
98
- await self.execute_memory_actions(actions)
99
- else:
100
- logger.info(f"No memories extracted for user {user_id}")
101
-
102
- except Exception as e:
103
- logger.error(f"Error processing session: {e}")
104
-
105
- async def find_existing(
106
- self,
107
- memories: List[str],
108
- user_id: str
109
- ) -> List[Dict[str, Any]]:
110
- """Find existing memories.
111
-
112
- Args:
113
- memories: List of new memory strings to merge
114
- user_id: User identifier
115
-
116
- Returns:
117
- List of existing memories with metadata
118
- """
119
- existing_memories = []
120
- for memory in memories:
121
- existing = await self.search(memory, user_id)
122
- if existing:
123
- existing_memories.append(existing)
124
- return existing_memories
125
-
126
- @abstractmethod
127
- async def decide_actions(
128
- self,
129
- memories: List[str],
130
- existing: List[Dict[str, Any]],
131
- user_id: str
132
- ) -> List[Dict[str, Any]]:
133
- """Decide actions for new memories."""
134
- pass
135
-
136
- async def execute_memory_actions(
137
- self,
138
- actions: List[Dict[str, Any]]
139
- ) -> None:
140
- """Execute memory actions."""
141
- for action in actions:
142
- if action["action"] == "ADD":
143
- metadata = action.get("metadata", {})
144
- await self.add(action["memory"], action["user_id"], action.get("embedding"), metadata)
145
- elif action["action"] == "UPDATE":
146
- metadata = action.get("metadata", {})
147
- await self.update(action["memory_id"], action["memory"], action.get("embedding"), metadata)
148
- elif action["action"] == "DELETE":
149
- await self.delete(action["memory_id"])
150
- elif action["action"] == "NOOP":
151
- pass
152
-
153
- async def search(
154
- self,
155
- query: str,
156
- user_id: str,
157
- limit: int = 5
158
- ) -> List[Dict[str, Any]]:
159
- """Search for relevant memories.
160
-
161
- Args:
162
- query: Search query
163
- user_id: User identifier
164
- limit: Maximum number of results
165
-
166
- Returns:
167
- List of relevant memories with metadata
168
- """
169
- try:
170
- # Filter by user_id in metadata
171
- where = {"user_id": user_id}
172
-
173
- results = self.collection.query(
174
- query_texts=[query],
175
- n_results=limit,
176
- where=where
177
- )
178
-
179
- memories = []
180
- if results["documents"] and results["documents"][0]:
181
- for i, doc in enumerate(results["documents"][0]):
182
- memory = {
183
- "id": results["ids"][0][i] if results["ids"] and results["ids"][0] else None,
184
- "content": doc,
185
- "metadata": results["metadatas"][0][i] if results["metadatas"] else {},
186
- "distance": results["distances"][0][i] if results["distances"] else 0
187
- }
188
- memories.append(memory)
189
-
190
- return memories
191
-
192
- except Exception as e:
193
- logger.error(f"Error searching memories: {e}")
194
- return []
195
-
196
- async def add(
197
- self,
198
- memory: str,
199
- user_id: str,
200
- embedding: Optional[List[float]] = None,
201
- additional_metadata: Optional[Dict[str, Any]] = None,
202
- ) -> str:
203
- """Add a new memory.
204
-
205
- Args:
206
- memory: Memory content (as string for ChromaDB)
207
- user_id: User identifier
208
- embedding: Optional embedding vector
209
- additional_metadata: Additional metadata to store
210
-
211
- Returns:
212
- Memory ID
213
- """
214
- memory_id = f"{uuid.uuid4()}"
215
-
216
- final_metadata = {
217
- "user_id": user_id,
218
- "created_at": datetime.now().isoformat(),
219
- "updated_at": datetime.now().isoformat()
220
- }
221
-
222
- # Add any additional metadata (like the original structured data)
223
- if additional_metadata:
224
- final_metadata.update(additional_metadata)
225
-
226
- if embedding:
227
- self.collection.upsert(
228
- documents=[memory],
229
- ids=[memory_id],
230
- embeddings=[embedding],
231
- metadatas=[final_metadata]
232
- )
233
- else:
234
- self.collection.add(
235
- documents=[memory],
236
- ids=[memory_id],
237
- metadatas=[final_metadata]
238
- )
239
-
240
- return memory_id
241
-
242
- async def update(
243
- self,
244
- memory_id: str,
245
- memory: str,
246
- embedding: Optional[List[float]] = None,
247
- additional_metadata: Optional[Dict[str, Any]] = None,
248
- ) -> None:
249
- """Update an existing memory.
250
-
251
- Args:
252
- memory_id: ID of memory to update
253
- memory: New memory content (as string for ChromaDB)
254
- embedding: Optional embedding of the memory
255
- additional_metadata: Additional metadata to update
256
- """
257
- if not memory_id:
258
- logger.error("Cannot update memory: memory_id is None")
259
- return
260
-
261
- # Get existing metadata
262
- existing = self.collection.get(ids=[memory_id])
263
- if existing["metadatas"] and existing["metadatas"][0]:
264
- final_metadata = existing["metadatas"][0].copy()
265
- final_metadata["updated_at"] = datetime.now().isoformat()
266
- else:
267
- final_metadata = {}
268
- final_metadata["updated_at"] = datetime.now().isoformat()
269
-
270
- # Update with any additional metadata
271
- if additional_metadata:
272
- final_metadata.update(additional_metadata)
273
-
274
- if embedding:
275
- self.collection.upsert(
276
- ids=[memory_id],
277
- documents=[memory],
278
- embeddings=[embedding],
279
- metadatas=[final_metadata]
280
- )
281
- else:
282
- self.collection.upsert(
283
- ids=[memory_id],
284
- documents=[memory],
285
- metadatas=[final_metadata]
286
- )
287
-
288
- async def delete(
289
- self,
290
- memory_id: str
291
- ) -> None:
292
- """Delete a memory.
293
-
294
- Args:
295
- memory_id: ID of memory to delete
296
- """
297
- self.collection.delete(ids=[memory_id])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scratch_agents/sessions/base_session_manager.py DELETED
@@ -1,28 +0,0 @@
1
- from abc import ABC, abstractmethod
2
- from typing import Optional, Dict, List
3
- from scratch_agents.types.events import Event
4
- from scratch_agents.sessions.session import Session
5
-
6
-
7
- class BaseSessionManager(ABC):
8
- """Abstract base class for session management"""
9
-
10
- @abstractmethod
11
- def create_session(self, session_id: Optional[str] = None, user_id: str = None) -> Session:
12
- """Create a new session"""
13
- pass
14
-
15
- @abstractmethod
16
- def get_session(self, session_id: str) -> Optional[Session]:
17
- """Load a session from storage"""
18
- pass
19
-
20
- @abstractmethod
21
- def get_or_create_session(self, session_id: str, user_id: str = None) -> Session:
22
- """Get an existing session or create a new one"""
23
- pass
24
-
25
- @abstractmethod
26
- def add_event(self, session: Session, event: Event) -> None:
27
- """Add an event to the session"""
28
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scratch_agents/sessions/in_memory_session_manager.py DELETED
@@ -1,30 +0,0 @@
1
- from .base_session_manager import BaseSessionManager
2
- from .session import Session
3
- from scratch_agents.types.events import Event
4
- from datetime import datetime
5
-
6
- class InMemorySessionManager(BaseSessionManager):
7
- """In-memory session manager"""
8
-
9
- def __init__(self):
10
- self.sessions = {}
11
-
12
- def create_session(self, session_id: str, user_id: str = None) -> Session:
13
- if session_id in self.sessions:
14
- raise ValueError(f"Session with id {session_id} already exists")
15
- self.sessions[session_id] = Session(session_id=session_id, user_id=user_id)
16
- return self.sessions[session_id]
17
-
18
- def get_session(self, session_id: str) -> Session:
19
- if session_id not in self.sessions:
20
- raise ValueError(f"Session with id {session_id} does not exist")
21
- return self.sessions[session_id]
22
-
23
- def get_or_create_session(self, session_id: str, user_id: str = None) -> Session:
24
- if session_id not in self.sessions:
25
- return self.create_session(session_id, user_id)
26
- return self.sessions[session_id]
27
-
28
- def add_event(self, session: Session, event: Event) -> None:
29
- session.events.append(event)
30
- session.last_updated_at = datetime.now()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scratch_agents/sessions/session.py DELETED
@@ -1,23 +0,0 @@
1
- import uuid
2
- from pydantic import BaseModel, Field
3
- from typing import List, Dict, Any
4
- from datetime import datetime
5
- from ..types.contents import ContentItem
6
-
7
- class Session(BaseModel):
8
- """Container for short-term memory during a conversation session"""
9
- user_id: str
10
- session_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
11
- events: List[ContentItem] = Field(default_factory=list)
12
- state: Dict[str, Any] = Field(default_factory=dict)
13
- last_updated_at: datetime = Field(default_factory=datetime.now)
14
-
15
- @property
16
- def core_memory(self) -> Dict[str, str]:
17
- """Access core memory with automatic initialization"""
18
- if "core_memory" not in self.state:
19
- self.state["core_memory"] = {
20
- "persona": "You are a helpful AI assistant",
21
- "human": ""
22
- }
23
- return self.state["core_memory"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scratch_agents/sessions/task_cross_session_manager.py DELETED
@@ -1,194 +0,0 @@
1
- """Task-specific cross-session memory management."""
2
-
3
- from typing import List, Dict, Any, Optional, Literal
4
- import logging
5
- from pydantic import BaseModel, Field
6
- import json
7
-
8
- from .base_cross_session_manager import BaseCrossSessionManager
9
- from ..types.events import Event
10
- from ..types.contents import Message, ToolCall, ToolResult
11
-
12
- logger = logging.getLogger(__name__)
13
-
14
- MEMORY_EXTRACT_PROMPT = """
15
- You are a Task Memory Extractor specializing in tracking agent actions and problem-solving attempts.
16
- Extract ONLY information about what the agent ACTUALLY DID in this conversation.
17
-
18
- Focus on:
19
- 1. **Problem Identification**: What issue or challenge was the agent trying to address?
20
- 2. **Actions Taken**: What specific actions did the agent perform? (tools used, searches made, code written, etc.)
21
- 3. **Key Discoveries**: What important facts or information did the agent discover during the process?
22
- 4. **Success Status**: Was the task completed successfully?
23
-
24
- DO NOT extract:
25
- - Personal user information (name, preferences, etc.)
26
- - General conversation or greetings
27
- - User opinions or feelings
28
- - Future plans or what should be done
29
-
30
- Format each task as a structured memory with:
31
- - problem: Clear description of what the agent was asked to do or investigate
32
- - actions_taken: Specific actions the agent performed (not what it should do)
33
- - key_discoveries: Important information discovered during the task
34
- - success: true/false indicating if the task was completed
35
-
36
- Examples of GOOD task memories:
37
- {
38
- "problem": "User asked about React component not rendering",
39
- "actions_taken": "Examined useEffect hook, identified missing dependency in array, added state variable to dependency array",
40
- "key_discoveries": "useEffect was missing 'count' state variable in dependency array causing stale closure",
41
- "success": true
42
- }
43
-
44
- {
45
- "problem": "User reported database connection timeouts in production",
46
- "actions_taken": "Checked connection pool configuration, analyzed production logs, increased pool size from 10 to 50, implemented retry logic with exponential backoff",
47
- "key_discoveries": "Production load peaked at 45 concurrent connections, default pool size was only 10",
48
- "success": true
49
- }
50
-
51
- {
52
- "problem": "User asked 'What is Mem0 and how does it work?'",
53
- "actions_taken": "Performed multiple web searches with different query variations to find information about Mem0",
54
- "key_discoveries": "Found that Mem0 is an open-source memory layer for LLM applications, has a GitHub repo (mem0ai/mem0), provides hybrid data storage and intelligent retrieval",
55
- "success": false
56
- }
57
- """
58
-
59
- MEMORY_ACTION_PROMPT = """
60
- You are a Task Memory Action Decider specializing in tracking agent actions and problem-solving attempts.
61
- You are given a list of new task memories and a list of existing task memories.
62
- You need to decide whether to ADD, UPDATE, DELETE, or NOOP the new task memories.
63
-
64
- Format your response as a list of actions with:
65
- - action: ADD, UPDATE, DELETE, or NOOP
66
- - memory_id: The id of the memory to update or delete
67
-
68
- Action:
69
- - ADD: Add the new task memory if it describes a different problem or significantly different approach
70
- - UPDATE: Update the existing task memory if it's the same problem but with better/more complete actions or discoveries
71
- - DELETE: Delete the existing task memory if it's outdated or no longer relevant
72
- - NOOP: Do not add if it's essentially the same problem with similar actions and discoveries
73
-
74
- """
75
-
76
-
77
- class TaskMemory(BaseModel):
78
- """Structured task memory."""
79
- problem: str = Field(description="The problem or task the agent was asked to address")
80
- actions_taken: str = Field(description="The specific actions the agent performed")
81
- success: bool = Field(description="Whether the task was completed successfully")
82
- key_discoveries: Optional[str] = Field(default=None, description="Important information discovered during the task")
83
-
84
- class MemoryAction(BaseModel):
85
- """Memory action."""
86
- action: Literal["ADD", "UPDATE", "DELETE", "NOOP"] = Field(description="The action to take with the memory")
87
- memory_id: Optional[str] = Field(description="The id of the memory to update or delete")
88
-
89
-
90
- class TaskCrossSessionManager(BaseCrossSessionManager):
91
- """Manage task-specific memories across sessions."""
92
-
93
- def __init__(self, model,
94
- collection_name="task_memories",
95
- persist_directory="./cross_session_db",
96
- ):
97
- """Initialize task cross-session manager.
98
-
99
- Args:
100
- model: LLM model for memory extraction
101
- collection_name: Name of the ChromaDB collection
102
- persist_directory: Directory to persist ChromaDB data
103
- """
104
- super().__init__(model, collection_name, persist_directory)
105
-
106
- async def extract_memories(self, events: List[Event]):
107
- conversation_parts = []
108
-
109
- for event in events:
110
- for item in event.content:
111
- if isinstance(item, Message):
112
- conversation_parts.append(f"{item.role}: {item.content}")
113
- elif isinstance(item, ToolCall):
114
- conversation_parts.append(f"{item.tool_call_id}: {item.name}")
115
- elif isinstance(item, ToolResult):
116
- conversation_parts.append(f"{item.tool_call_id}: {item.name} {item.content}")
117
-
118
- conversation = "\n".join(conversation_parts)
119
-
120
- user_prompt = f"""Conversation:
121
- {conversation}
122
- """
123
- messages = [
124
- {"role": "system", "content": MEMORY_EXTRACT_PROMPT},
125
- {"role": "user", "content": user_prompt}
126
- ]
127
-
128
- try:
129
- response = await self.model.generate_structured(messages, TaskMemory)
130
- task_memory = TaskMemory.model_validate(response)
131
- return [task_memory.model_dump()]
132
-
133
- except Exception as e:
134
- logger.error(f"Error extracting task memories: {e}")
135
- return []
136
-
137
- async def find_existing(self, memories: List[Dict], user_id: str) -> List[Dict[str, Any]]:
138
- existing_memories = []
139
- for memory in memories:
140
- query = memory["problem"]
141
- results = await self.search(query, user_id)
142
- if results:
143
- existing_memories.append(results[0])
144
- return existing_memories
145
-
146
- async def decide_actions(self, new_memory: List[Dict], existing: List[Dict[str, Any]], user_id: str) -> List[Dict[str, Any]]:
147
- system_prompt = MEMORY_ACTION_PROMPT
148
- user_prompt = f"""
149
- Existing memory: {existing}
150
- New memory: {new_memory}
151
- """
152
- messages = [
153
- {"role": "system", "content": system_prompt},
154
- {"role": "user", "content": user_prompt}
155
- ]
156
- action = await self.model.generate_structured(messages, MemoryAction)
157
- result = []
158
- if action.action == "UPDATE":
159
-
160
- memory_id = action.memory_id
161
- if not memory_id:
162
- logger.error("Cannot update memory: no memory_id available")
163
- return []
164
- embeddings = await self.model.embed(self.embedding_model, [new_memory[0]["problem"]])
165
- # Convert dict to string for ChromaDB document field
166
- memory_str = json.dumps(new_memory[0], ensure_ascii=False)
167
- result.append({
168
- "action": "UPDATE",
169
- "memory_id": memory_id,
170
- "memory": memory_str,
171
- "embedding": embeddings[0],
172
- "metadata": new_memory[0] # Store original dict in metadata
173
- })
174
- elif action.action == "ADD":
175
- embeddings = await self.model.embed(self.embedding_model, [new_memory[0]["problem"]])
176
- # Convert dict to string for ChromaDB document field
177
- memory_str = json.dumps(new_memory[0], ensure_ascii=False)
178
- result.append({
179
- "action": "ADD",
180
- "memory": memory_str,
181
- "user_id": user_id,
182
- "embedding": embeddings[0],
183
- "metadata": new_memory[0] # Store original dict in metadata
184
- })
185
- elif action.action == "DELETE":
186
- result.append({
187
- "action": "DELETE",
188
- "memory_id": action.memory_id
189
- })
190
- elif action.action == "NOOP":
191
- result.append({
192
- "action": "NOOP"
193
- })
194
- return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scratch_agents/sessions/user_cross_session_manager.py DELETED
@@ -1,185 +0,0 @@
1
- import json
2
- import chromadb
3
- from chromadb.utils import embedding_functions
4
- from typing import List, Optional, Literal, Dict, Any
5
- from enum import Enum
6
- import uuid
7
- from datetime import datetime
8
- import os
9
- from pydantic import BaseModel, Field
10
- import logging
11
-
12
- from .session import Session
13
- from .base_cross_session_manager import BaseCrossSessionManager
14
- from ..types.contents import Message
15
- from ..types.events import Event
16
- from ..models.llm_request import LlmRequest
17
-
18
- logger = logging.getLogger(__name__)
19
-
20
- MEMORY_EXTRACT_PROMPT = """
21
- You are a User Memory Extractor specializing in accurately storing ONLY facts about the USER from their messages.
22
-
23
- CRITICAL RULES:
24
- 1. ONLY extract factual information that the user explicitly states about themselves
25
- 2. NEVER extract questions the user asks
26
- 3. NEVER extract hypothetical scenarios or wishes
27
- 4. NEVER create memories from assistant responses
28
- 5. If the user is only asking questions, return an empty list
29
-
30
- Types of Information to Remember:
31
-
32
- 1. **Personal Identity & Details**: Names, relationships, family information, important dates
33
- 2. **Professional Information**: Current job title, company name, work responsibilities, career goals, past work experience
34
- 3. **Personal Preferences**: Likes, dislikes, preferences in food, activities, entertainment, brands
35
- 4. **Goals & Plans**: Future intentions, upcoming events, trips, personal objectives
36
- 5. **Health & Wellness**: Dietary restrictions, fitness routines, health conditions
37
- 6. **Lifestyle & Activities**: Hobbies, regular activities, service preferences
38
- 7. **Location & Living Situation**: Where they live, recent moves, living arrangements
39
- """
40
-
41
- MEMORY_ACTION_PROMPT = """
42
- You are a User Memory Action Decider specializing in accurately managing user facts and preferences.
43
-
44
- CRITICAL RULES FOR CONFLICTING INFORMATION:
45
- 1. When new information CONTRADICTS or UPDATES existing information, you MUST use UPDATE action
46
- 2. Location changes: If user moves from Place A to Place B, UPDATE the existing location memory
47
- 3. Status changes: If user changes jobs, relationships, or any status, UPDATE the relevant memory
48
- 4. Preference changes: If user's preferences change, UPDATE the existing preference
49
- 5. Look for semantic conflicts, not just exact text matches
50
-
51
- Examples of when to UPDATE:
52
- - Existing: "User works at Company A" + New: "User works at Company B" → UPDATE existing memory
53
- - Existing: "User likes coffee" + New: "User doesn't like coffee anymore" → UPDATE existing memory
54
-
55
- Format your response as a list of actions with:
56
- - action: ADD, UPDATE, DELETE, or NOOP
57
- - memory_id: The id of the memory to update or delete (required for UPDATE/DELETE)
58
- - content: The content of the memory to add or update (required for ADD/UPDATE)
59
-
60
- Actions:
61
- - ADD: Add new information that doesn't conflict with existing memories
62
- - UPDATE: Replace existing memory when there's conflicting or updated information
63
- - DELETE: Remove outdated or incorrect memory (use sparingly)
64
- - NOOP: Skip if the information is already stored or not relevant
65
- """
66
-
67
- class MemoryAction(BaseModel):
68
- """Structured output for memory action decision"""
69
- action: Literal["ADD", "UPDATE", "DELETE", "NOOP"] = Field(
70
- description="The action to take with the memory"
71
- )
72
- memory_id: Optional[str] = Field(
73
- description="The id of the memory to update or delete"
74
- )
75
- content: Optional[str] = Field(
76
- description="The content of the memory to add or update"
77
- )
78
-
79
- class MemoryActions(BaseModel):
80
- """A list of memory actions"""
81
- actions: List[MemoryAction] = Field(
82
- description="A list of memory actions"
83
- )
84
-
85
- class MemoryFacts(BaseModel):
86
- """A list of facts about the user"""
87
- facts: List[str] = Field(
88
- description="A list of facts about the user"
89
- )
90
-
91
- class UserCrossSessionManager(BaseCrossSessionManager):
92
- """Manage memories across sessions using ChromaDB"""
93
-
94
- def __init__(self, model, collection_name="user_memory", persist_directory="./cross_session_db", embedding_model="text-embedding-3-small"):
95
- # Initialize base class first
96
- super().__init__(model, collection_name, persist_directory, embedding_model)
97
-
98
- async def extract_memories(self, events: List[Any]) -> List[str]:
99
- """Extract important information from execution events using LLM"""
100
-
101
- conversation_parts = []
102
- for event in events:
103
- for item in event.content:
104
- if hasattr(item, 'role') and hasattr(item, 'content'):
105
- if item.role == 'user':
106
- conversation_parts.append(f"User: {item.content}")
107
-
108
- conversation = "\n".join(conversation_parts)
109
-
110
- if not conversation.strip():
111
- return []
112
-
113
- user_prompt = f"""Conversation:
114
- {conversation}
115
- """
116
-
117
- messages = [
118
- {"role": "system", "content": MEMORY_EXTRACT_PROMPT},
119
- {"role": "user", "content": user_prompt}
120
- ]
121
-
122
- response = await self.model.generate_structured(
123
- messages,
124
- MemoryFacts
125
- )
126
- logger.debug(f"Extracted facts: {response}")
127
- try:
128
- return response.facts
129
- except Exception as e:
130
- logger.error(f"Error extracting facts: {e}")
131
- return []
132
-
133
- async def find_existing(
134
- self,
135
- memories: List[str],
136
- user_id: str
137
- ) -> List[Dict[str, Any]]:
138
- """Find existing memories.
139
-
140
- Args:
141
- user_id: User identifier
142
-
143
- Returns:
144
- List of existing memories with metadata including timestamps
145
- """
146
- existing_memories = []
147
- results = self.collection.get(
148
- where={"user_id": user_id},
149
- include=["documents", "metadatas"]
150
- )
151
- if results["documents"]:
152
- for i, doc in enumerate(results["documents"]):
153
- metadata = results["metadatas"][i] if results["metadatas"] else {}
154
- existing_memories.append({
155
- "id": results["ids"][i],
156
- "content": doc,
157
- "metadata": metadata,
158
- "created_at": metadata.get("created_at", "Unknown"),
159
- "updated_at": metadata.get("updated_at", "Unknown")
160
- })
161
- return existing_memories
162
-
163
- async def decide_actions(self, new_memories: List[str], existing: List[Dict[str, Any]], user_id: str) -> List[Dict[str, Any]]:
164
- """Decide actions for new memories."""
165
- system_prompt = MEMORY_ACTION_PROMPT
166
-
167
- user_prompt = f"""
168
- Existing memory: {existing}
169
- New memory: {new_memories}
170
- """
171
- messages = [
172
- {"role": "system", "content": system_prompt},
173
- {"role": "user", "content": user_prompt}
174
- ]
175
- actions = await self.model.generate_structured(messages, MemoryActions)
176
- result = []
177
- for action in actions.actions:
178
- action_dict = action.model_dump()
179
- if action_dict["action"] == "ADD":
180
- action_dict["user_id"] = user_id
181
- action_dict["memory"] = action_dict.pop("content", None)
182
- elif action_dict["action"] == "UPDATE":
183
- action_dict["memory"] = action_dict.pop("content", None)
184
- result.append(action_dict)
185
- return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scratch_agents/tools/__init__.py DELETED
@@ -1,5 +0,0 @@
1
- from .search_web import search_web
2
- from .wikipedia import search_wikipedia, get_wikipedia_page
3
- from .calculator import calculator
4
-
5
- __all__ = ["search_web", "search_wikipedia", "get_wikipedia_page", "calculator"]