Spaces:

shekkari21
/

agent-from-scratch

Sleeping

App Files Files Community

shekkari21 commited on Jan 28

Commit

ec96f6b

1 Parent(s): 6a0d993

started AI agents from scratch

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.env.example +0 -1
chapter_02_llm/01_llm_chat.py +0 -53
chapter_02_llm/02_conversation_management.py +0 -53
chapter_02_llm/03_structured_output.py +0 -23
chapter_02_llm/04_asynchronous_llm_call.py +0 -31
chapter_02_llm/05_potato_problem.py +0 -181
chapter_03_tool_use/ch3_01_calculator_tool.py +0 -123
chapter_03_tool_use/ch3_02_tavily_search_tool.py +0 -16
chapter_03_tool_use/ch3_03_wikipedia_tool.py +0 -23
chapter_03_tool_use/ch3_04_tool_definition.py +0 -72
chapter_03_tool_use/ch3_05_tools_exercise.py +0 -113
chapter_03_tool_use/ch3_06_tool_abstraction.py +0 -126
chapter_03_tool_use/ch3_07_tool_decorator.py +0 -45
chapter_03_tool_use/ch3_08_mcp_tavily_custom.py +0 -37
chapter_04_basic_agent/01_/bsolve_kipchoge_problem.py +0 -30
chapter_04_basic_agent/02_agent_structured_output.py +0 -33
chapter_04_basic_agent/03_human_in_the_loop.py +0 -76
chapter_06_memory/01_session_agent.py +0 -56
chapter_06_memory/02_core_memory_strategy.py +0 -35
chapter_06_memory/03_core_memory_update.py +0 -32
chapter_06_memory/04_sliding_window.py +0 -43
chapter_06_memory/05_summarization.py +0 -64
chapter_06_memory/06_conversation_search.py +0 -59
chapter_06_memory/07_task_long_term.py +0 -76
chapter_06_memory/08_user_long_term.py +0 -98
my_code.ipynb +926 -0
pyproject.toml +4 -0
scratch_agents/agents/__init__.py +0 -0
scratch_agents/agents/execution_context_ch4.py +0 -27
scratch_agents/agents/execution_context_ch6.py +0 -34
scratch_agents/agents/tool_calling_agent_ch4_base.py +0 -116
scratch_agents/agents/tool_calling_agent_ch4_callback.py +0 -203
scratch_agents/agents/tool_calling_agent_ch4_structured_output.py +0 -146
scratch_agents/agents/tool_calling_agent_ch6.py +0 -226
scratch_agents/memory/base_memory_strategy.py +0 -13
scratch_agents/memory/core_memory_strategy.py +0 -21
scratch_agents/memory/sliding_window_strategy.py +0 -26
scratch_agents/memory/summarization_strategy.py +0 -77
scratch_agents/models/__init__.py +0 -0
scratch_agents/models/base_llm.py +0 -12
scratch_agents/models/llm_request.py +0 -19
scratch_agents/models/llm_response.py +0 -10
scratch_agents/models/openai.py +0 -174
scratch_agents/sessions/base_cross_session_manager.py +0 -297
scratch_agents/sessions/base_session_manager.py +0 -28
scratch_agents/sessions/in_memory_session_manager.py +0 -30
scratch_agents/sessions/session.py +0 -23
scratch_agents/sessions/task_cross_session_manager.py +0 -194
scratch_agents/sessions/user_cross_session_manager.py +0 -185
scratch_agents/tools/__init__.py +0 -5

.env.example DELETED Viewed

	@@ -1 +0,0 @@
1	- OPENAI_API_KEY=asdf

chapter_02_llm/01_llm_chat.py DELETED Viewed

@@ -1,53 +0,0 @@
-# Listing 2.1
-from openai import OpenAI
-from dotenv import load_dotenv
-load_dotenv()
-client = OpenAI()
-# Listing 2.2
-response = client.chat.completions.create(
-    model="gpt-5-mini",
-    messages=[
-        {"role": "developer", "content": "You are a helpful assistant."},
-        {"role": "user", "content": [{ "type": "text", "text": "Who's there?" }]}
-    ]
-)
-print(response.choices[0].message.content)
-# Listing 2.3
-response = client.chat.completions.create(
-    model="o4-mini",
-    messages=[
-        {"role": "developer", "content": "You are a helpful assistant."},
-        {"role": "user", "content": "Who's there?"}
-    ]
-)
-print(response.choices[0].message.content)
-print(f"Input tokens: {response.usage.prompt_tokens}")
-print(f"Output tokens: {response.usage.completion_tokens}")
-print(f"Reasoning tokens: {response.usage.completion_tokens_details.reasoning_tokens}")
-# Listing 2.4
-response = client.chat.completions.create(
-    model="gpt-4o-mini",
-    messages=[
-        {"role": "developer", "content": "You are a helpful assistant."},
-        {"role": "user", "content": "Who's there?"}
-    ],
-    stream=True,
-    temperature=0.1,
-    max_completion_tokens=200,
-    logprobs=True
-)
-for chunk in response:
-    print(chunk.choices[0].delta.content, end="", flush=True)
-# Listing 2.5
-response = client.responses.create(
-    model="gpt-5-mini",
-    input="Where is the capital of South Korea?",
-    instructions="You are a helpful assistant."
-)
-print(response.output_text)

chapter_02_llm/02_conversation_management.py DELETED Viewed

@@ -1,53 +0,0 @@
-from openai import OpenAI
-from dotenv import load_dotenv
-load_dotenv()
-client = OpenAI()
-# Listing 2.6
-response_1 = client.chat.completions.create(
-    model='gpt-5-mini',
-    messages=[{"role": "user", "content": "My name is Jungjun"}],
-)
-print(response_1.choices[0].message.content)
-response_2 = client.chat.completions.create(
-    model='gpt-5-mini',
-    messages=[{"role": "user", "content": "What is my name?"}],
-)
-print(response_2.choices[0].message.content)
-# Listing 2.7
-messages = []
-messages.append({"role": "user", "content": "My name is Jungjun"})
-response_3 = client.chat.completions.create(
-    model='gpt-5-mini',
-    messages=messages,
-)
-print(response_3.choices[0].message.content)
-messages.append({"role": "assistant", "content": response_3.choices[0].message.content})
-messages.append({"role": "user", "content": "What is my name?"})
-response_4 = client.chat.completions.create(
-    model='gpt-5-mini',
-    messages=messages,
-)
-print(response_4.choices[0].message.content)
-# Listing 2.8
-response = client.responses.create(
-    model="gpt-5-mini",
-    input="My name is Jungjun",
-)
-print(response.output_text)
-second_response = client.responses.create(
-    model="gpt-5-mini",
-    previous_response_id=response.id,
-    input=[{"role": "user", "content": "What is my name?"}],
-)
-print(second_response.output_text)

chapter_02_llm/03_structured_output.py DELETED Viewed

@@ -1,23 +0,0 @@
-from pydantic import BaseModel
-from openai import OpenAI
-from dotenv import load_dotenv
-load_dotenv()
-client = OpenAI()
-# Listing 2.10
-class User(BaseModel):
-    name: str
-    email: str
-response = client.beta.chat.completions.parse(
-    model='gpt-5-mini',
-    messages=[{"role": "user", "content": """My name is John Smith,
-my phone number is (555) 123-4567,
-and my email is john.smith@example.com"""}],
-    response_format=User,
-)
-print(type(response.choices[0].message.parsed))
-print(response.choices[0].message.parsed)

chapter_02_llm/04_asynchronous_llm_call.py DELETED Viewed

@@ -1,31 +0,0 @@
-import asyncio
-from openai import AsyncOpenAI
-from dotenv import load_dotenv
-load_dotenv()
-client = AsyncOpenAI()
-# Listing 2.11
-async def get_answer(prompt):  #A
-    response = await client.chat.completions.create(  #B
-        model="gpt-5-mini",
-        messages=[{"role": "user", "content": prompt}]
-    )
-    return response.choices[0].message.content
-async def main():
-    prompts = [
-        "Hello!",
-        "What's 2 + 2?",
-        "Tell me a short joke about cats."
-    ]
-    tasks = [get_answer(p) for p in prompts]
-    results = await asyncio.gather(*tasks)
-    for r in results:
-        print(r)
-asyncio.run(main())

chapter_02_llm/05_potato_problem.py DELETED Viewed

@@ -1,181 +0,0 @@
-from openai import AsyncOpenAI
-from dotenv import load_dotenv
-from pydantic import BaseModel
-load_dotenv()
-client = AsyncOpenAI()
-import asyncio
-import time
-class PotatoSolution(BaseModel):
-    thought_process: str
-    final_answer: str
-SYS_PROMPT = """You are a general AI assistant.
-I will ask you a question. Report your thoughts in "thought_process" and finish your answer in "final_answer".
-YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
-If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
-If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
-If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
-"""
-FAMILY_REUNION_PROBLEM = """
-My family reunion is this week, and I was assigned the mashed potatoes to bring. The attendees include my married mother and father, my twin brother and his family, my aunt and her family, my grandma and her brother, her brother's daughter, and his daughter's family. All the adults but me have been married, and no one is divorced or remarried, but my grandpa and my grandma's sister-in-law passed away last year. All living spouses are attending. My brother has two children that are still kids, my aunt has one six-year-old, and my grandma's brother's daughter has three kids under 12. I figure each adult will eat about 1.5 potatoes of mashed potatoes and each kid will eat about 1/2 a potato of mashed potatoes, except my second cousins don't eat carbs. How many potatoes do I need in total? Just give the number.
-"""
-EXPECTED_ANSWER = "18"
-async def get_llm_answer(
-    client: AsyncOpenAI,
-    model_name: str,
-    prompt: str,
-    result_format: type[BaseModel] | None = None
-) -> tuple[str, str]:
-    try:
-        api_call_params = {
-            "model": model_name,
-            "messages": [
-                {"role": "system", "content": SYS_PROMPT},
-                {"role": "user", "content": prompt}
-            ]
-        }
-        if model_name not in ["gpt-5", "gpt-5-mini"]:
-            api_call_params["temperature"] = 0.5
-        response = await client.beta.chat.completions.parse(
-            **api_call_params,
-            response_format=result_format
-        )
-        parsed_object = response.choices[0].message.parsed
-        return parsed_object.thought_process, parsed_object.final_answer
-    except Exception as e:
-        print(f"Error during LLM API call for model {model_name}: {e}")
-        return "", ""
-async def run_problem_test(
-    local_client: AsyncOpenAI,
-    model_name: str,
-    prompt_name: str,
-    prompt_content: str,
-    num_tests: int,
-    expected_answer: str
-) -> tuple[int, float]:
-    """
-    Asynchronously runs the math problem test N times for the specified prompt
-    and returns the number of successful answers and total execution time.
-    """
-    print(f"\n--- Testing '{prompt_name}' prompt strategy ({num_tests} repetitions) ---")
-    tasks = [
-        get_llm_answer(local_client, model_name, prompt_content, result_format=PotatoSolution)
-        for _ in range(num_tests)
-    ]
-    llm_responses = await asyncio.gather(*tasks)
-    correct_answers = 0
-    for i, (_, final_answer) in enumerate(llm_responses):
-        if final_answer == expected_answer:
-            correct_answers += 1
-    print(f"'{prompt_name}' test completed: {correct_answers}/{num_tests} correct (Success rate: {correct_answers/num_tests*100:.2f}%)")
-    return correct_answers
-async def test_model_with_all_strategies(model_name: str, number_of_runs: int):
-    """
-    Test a single model with all prompt strategies in parallel.
-    Returns results for the model.
-    """
-    print(f"\n======================================================================")
-    print(f"Testing Model: {model_name}")
-    print(f"======================================================================")
-    # Define all prompts
-    prompts = {
-        "Baseline (Zero-shot)": FAMILY_REUNION_PROBLEM,
-        "Few-shot": f"""
-Here's an example of how to solve a similar family calculation problem:
-<example>
-Question: "I'm hosting a birthday party. Attendees include me, my parents, my sister and her husband, and my uncle with his two teenage children. Each adult will eat 2 slices of pizza and each child will eat 1 slice. How many pizza slices do I need?"
-Answer: 14
-</example>
-Now solve this problem:
-{FAMILY_REUNION_PROBLEM}
-""",
-        "Role-based": f"""
-You are a family event planning specialist with expertise in calculating food quantities for family gatherings. You excel at parsing complex family relationships and determining accurate serving quantities based on different demographics and dietary preferences.
-Using your expertise, please solve this problem:
-{FAMILY_REUNION_PROBLEM}
-""",
-        "Chain-of-Thought (Guided)": f"""
-{FAMILY_REUNION_PROBLEM}
-Let's solve this step by step:
-1. First, identify all family members attending:
-- List each person and their relationship to you
-- Account for spouses of married individuals
-- Note any deceased family members who won't be attending
-2. Categorize attendees by age group:
-- Count total adults
-- Count total children
-- Note any special dietary restrictions
-3. Apply consumption rules:
-- Calculate potatoes needed for adults
-- Calculate potatoes needed for children
-- Adjust for any dietary restrictions
-4. Sum the total number of potatoes needed Please work through each step carefully.
-""",
-        "Simple Chain-of-Thought": f"""
-{FAMILY_REUNION_PROBLEM}
-Think step by step and give the answer.
-"""
-    }
-    # Run all strategies in parallel
-    tasks = [
-        run_problem_test(client, model_name, prompt_name, prompt_content, number_of_runs, EXPECTED_ANSWER)
-        for prompt_name, prompt_content in prompts.items()
-    ]
-    results = await asyncio.gather(*tasks)
-    return results
-async def main():
-    """
-    Tests the family reunion problem using various prompt engineering techniques
-    across multiple LLM models.
-    """
-    models_to_test = ["gpt-4.1", "gpt-4.1-mini", "gpt-5", "gpt-5-mini"]
-    number_of_runs = 10
-    print(f"Starting family reunion problem test ({number_of_runs} runs per prompt per model)")
-    print(f"Problem: Calculate how many bags of potatoes needed for family reunion")
-    print(f"Expected answer: '{EXPECTED_ANSWER}'")
-    overall_start = time.time()
-    # Test each model sequentially (could also parallelize this)
-    for model_name in models_to_test:
-        await test_model_with_all_strategies(model_name, number_of_runs)
-    overall_end = time.time()
-    print(f"\n======================================================================")
-    print(f"All tests completed in {overall_end - overall_start:.2f} seconds")
-if __name__ == "__main__":
-    asyncio.run(main())

chapter_03_tool_use/ch3_01_calculator_tool.py DELETED Viewed

@@ -1,123 +0,0 @@
-import json
-from openai import OpenAI
-from dotenv import load_dotenv
-load_dotenv()
-client = OpenAI()
-# Listing 3.1
-calculator_tool_definition = {
-    "type": "function",
-    "function": {
-        "name": "calculator",
-        "description": "Perform basic arithmetic operations between two numbers.",
-        "parameters": {
-            "type": "object",
-            "properties": {
-                "operator": {
-                    "type": "string",
-                    "description": "Arithmetic operation to perform",
-                    "enum": ["add", "subtract", "multiply", "divide"]
-                },
-                "first_number": {
-                    "type": "number",
-                    "description": "First number for the calculation"
-                },
-                "second_number": {
-                    "type": "number",
-                    "description": "Second number for the calculation"
-                }
-            },
-            "required": ["operator", "first_number", "second_number"],
-        }
-    }
-}
-# Listing 3.2
-def calculator(operator: str, first_number: float, second_number: float) -> float:
-   if operator == 'add':
-       return first_number + second_number
-   elif operator == 'subtract':
-       return first_number - second_number
-   elif operator == 'multiply':
-       return first_number * second_number
-   elif operator == 'divide':
-       if second_number == 0:
-           raise ValueError("Cannot divide by zero")
-       return first_number / second_number
-   else:
-       raise ValueError(f"Unsupported operator: {operator}")
-if __name__ == "__main__":
-    # Listing 3.3
-    tools = [calculator_tool_definition]
-    response_without_tool = client.chat.completions.create(
-            model='gpt-5-mini',
-            messages=[{"role": "user", "content": "What is the capital of South Korea?"}],
-            tools=tools
-    )
-    print(response_without_tool.choices[0].message.content) # The capital of South Korea is Seoul.
-    print(response_without_tool.choices[0].message.tool_calls) # None
-    response_with_tool = client.chat.completions.create(
-            model='gpt-5-mini',
-            messages=[{"role": "user", "content": "What is 1234 x 5678?"}],
-            tools=tools
-    )
-    print(response_with_tool.choices[0].message.content) # None
-    print(response_with_tool.choices[0].message.tool_calls)
-    # [ChatCompletionMessageFunctionToolCall(id='call_viaOEiQJ5VEB9YvKl95qlDjM', function=Function(arguments='{"operator":"multiply","first_number":1234,"second_number":5678}', name='calculator'), type='function')]
-    # Listing 3.4
-    ai_message = response_with_tool.choices[0].message
-    if ai_message.tool_calls:
-        for tool_call in ai_message.tool_calls:
-            function_name = tool_call.function.name
-            function_args = json.loads(tool_call.function.arguments)
-            if function_name == "calculator":
-                result = calculator(**function_args)
-                print("calculator result:", result)
-    # Listing 3.5
-    messages = []
-    messages.append({"role": "user", "content": "What is 1234 x 5678?"})
-    response_with_tool = client.chat.completions.create(
-    model='gpt-5-mini',
-    messages=messages,
-    tools=tools
-    )
-    ai_message = response_with_tool.choices[0].message
-    messages.append({
-    "role": "assistant",
-    "content": ai_message.content,
-    "tool_calls": ai_message.tool_calls
-    })
-    if ai_message.tool_calls:
-        for tool_call in ai_message.tool_calls:
-            function_name = tool_call.function.name
-            function_args = json.loads(tool_call.function.arguments)
-            if function_name == "calculator":
-                result = calculator(**function_args)
-                messages.append({
-                    "role": "tool",
-                    "tool_call_id": tool_call.id,
-                    "content": str(result)
-                })
-    final_response = client.chat.completions.create(
-        model='gpt-5-mini',
-        messages=messages
-    )
-    print("Messages:", messages)
-    print("Final Answer:", final_response.choices[0].message.content)

chapter_03_tool_use/ch3_02_tavily_search_tool.py DELETED Viewed

@@ -1,16 +0,0 @@
-# Listing 3.13
-import os
-from tavily import TavilyClient
-from dotenv import load_dotenv
-load_dotenv()
-tavily_client = TavilyClient(os.getenv("TAVILY_API_KEY"))
-def search_web(query: str) -> str:
-    """Search the web for the given query."""
-    response = tavily_client.search(query, max_results=2, chunks_per_source=2)
-    return response.get("results")
-# Listing 3.14
-print(search_web("Kipchoge's marathon world record"))

chapter_03_tool_use/ch3_03_wikipedia_tool.py DELETED Viewed

@@ -1,23 +0,0 @@
-import wikipedia
-# Listing 3.16
-def search_wikipedia(query:str) -> list[str]:
-    """Search Wikipedia for a query and return titles of wikipedia pages"""
-    search_results = wikipedia.search(query)
-    return search_results
-def get_wikipedia_page(title:str) -> str:
-    """Get a wikipedia page by title"""
-    page = wikipedia.page(title, auto_suggest=False)
-    return page.content
-if __name__ == "__main__":
-    # Listing 3.15
-    search_results = wikipedia.search("moon")
-    print("search_results:")
-    print(search_results)
-    page = wikipedia.page("Moon", auto_suggest=False)
-    print("page content:")
-    print(page.content[:100])

chapter_03_tool_use/ch3_04_tool_definition.py DELETED Viewed

@@ -1,72 +0,0 @@
-# Listing 3.17
-import inspect
-from ch3_02_tavily_search_tool import search_web
-def example_tool(input_1:str, input_2:int=1):
-    """docstring for example_tool"""
-    return
-print(f"function name: {example_tool.__name__}")
-print(f"function docstring: {example_tool.__doc__}")
-print(f"function signature: {inspect.signature(example_tool)}")
-# Listing 3.18
-def function_to_input_schema(func) -> dict:
-    type_map = {
-        str: "string",
-        int: "integer",
-        float: "number",
-        bool: "boolean",
-        list: "array",
-        dict: "object",
-        type(None): "null",
-    }
-    try:
-        signature = inspect.signature(func)
-    except ValueError as e:
-        raise ValueError(
-            f"Failed to get signature for function {func.__name__}: {str(e)}"
-        )
-    parameters = {}
-    for param in signature.parameters.values():
-        try:
-            param_type = type_map.get(param.annotation, "string")
-        except KeyError as e:
-            raise KeyError(
-                f"Unknown type annotation {param.annotation} for parameter {param.name}: {str(e)}"
-            )
-        parameters[param.name] = {"type": param_type}
-    required = [
-        param.name
-        for param in signature.parameters.values()
-        if param.default == inspect._empty
-    ]
-    return {
-        "type": "object",
-        "properties": parameters,
-        "required": required,
-    }
-# Listing 3.19
-def format_tool_definition(name: str, description: str, parameters: dict) -> dict:
-    return {
-        "type": "function",
-        "function": {
-            "name": name,
-            "description": description,
-            "parameters": parameters,
-        },
-    }
-def function_to_tool_definition(func) -> dict:
-    return format_tool_definition(
-        func.__name__,
-        func.__doc__ or "",
-        function_to_input_schema(func)
-    )
-print(function_to_input_schema(search_web))

chapter_03_tool_use/ch3_05_tools_exercise.py DELETED Viewed

@@ -1,113 +0,0 @@
-import json
-from openai import OpenAI
-from dotenv import load_dotenv
-from ch3_01_calculator_tool import calculator
-from ch3_02_tavily_search_tool import search_web
-from ch3_03_wikipedia_tool import search_wikipedia, get_wikipedia_page
-from ch3_04_tool_definition import function_to_tool_definition
-load_dotenv()
-client = OpenAI()
-system_prompt = "You are a helpful assistant. calculator has only 4 operations: add, subtract, multiply, divide"
-tools = [calculator, search_web, search_wikipedia, get_wikipedia_page]
-tool_box = {tool.__name__: tool for tool in tools}
-tool_definitions = [function_to_tool_definition(tool) for tool in tools]
-# Listing 3.20
-def tool_execution(tool_box, tool_call):
-    function_name = tool_call.function.name
-    function_args = json.loads(tool_call.function.arguments)
-    tool_result = tool_box[function_name](**function_args)
-    return tool_result
-# Listing 3.21
-def run_step(system_prompt, question):
-    messages = [
-        {"role": "system", "content": system_prompt},
-        {"role": "user", "content": question}
-    ]
-    while True:
-        response = client.chat.completions.create(
-            model="gpt-5-mini",
-            messages=messages,
-            tools=tool_definitions
-        )
-        assistant_message = response.choices[0].message
-        if assistant_message.tool_calls:
-            messages.append(assistant_message)
-            for tool_call in assistant_message.tool_calls:
-                tool_result = tool_execution(tool_box, tool_call)
-                messages.append({
-                    "role": "tool",
-                    "content": str(tool_result),
-                    "tool_call_id": tool_call.id
-                })
-        else:
-            return assistant_message.content
-# Listing 3.22
-def step_1_search_kipchoge():
-    question = """I need to find Eliud Kipchoge's record-making marathon pace.
-    Please search for information about his world record marathon time and
-    calculate his pace per kilometer.
-    FINAL ANSWER should be in the format: "X.XX minutes per km"."""
-    result = run_step(system_prompt, question)
-    return result
-kipchoge_result = step_1_search_kipchoge()
-print(f"Step 1 Complete - Kipchoge pace: {kipchoge_result}")
-# Listing 3.23
-def step_2_search_moon_distance():
-    question = """I need to find the minimum perigee value (closest approach
-    distance) between Earth and Moon from the Wikipedia page for the Moon.
-    Please search for this information.
-    FINAL ANSWER should be in the format: "X km"."""
-    result = run_step(system_prompt, question)
-    return result
-moon_result = step_2_search_moon_distance()
-print(f"Step 2 Complete - Moon distance: {moon_result}")
-# Listing 3.24
-def step_3_calculate(kipchoge_pace, moon_distance):
-    question = f"""Given the following information:
-- Kipchoge's pace: {kipchoge_pace}
-- Moon distance: {moon_distance}
-    Please calculate how many hours it would take Kipchoge to run this distance
-    at his record pace. Make sure to handle unit conversions properly.
-    FINAL ANSWER should be in the format: "X hours"."""
-    result = run_step(system_prompt, question)
-    return result
-time_result = step_3_calculate(kipchoge_result, moon_result)
-print(f"Step 3 Complete - Time needed: {time_result}")
-# Listing 3.25
-def step_4_final_answer(total_hours):
-    question = f"""Given that the total time is {total_hours}, I need to round
-    this to the nearest 1000 hours and express the answer in thousand hours.
-    The original question asks for the result rounded to the nearest 1000 hours.
-    FINAL ANSWER should be just the number (in thousand hours)."""
-    result = run_step(system_prompt, question)
-    return result
-final_result = step_4_final_answer(time_result)
-print(f"Step 4 Complete - Final answer: {final_result}")

chapter_03_tool_use/ch3_06_tool_abstraction.py DELETED Viewed

@@ -1,126 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import Any, Dict, Optional, Union, Type, Callable
-import asyncio
-import inspect
-import json
-from ch3_04_tool_definition import format_tool_definition, function_to_input_schema
-# Listing 3.27
-class BaseTool(ABC):
-    def __init__(
-        self,
-        name: str = None,
-        description: str = None,
-        tool_definition: Union[Dict[str, Any], str] = None,
-        pydantic_input_model: Type = None
-    ):
-        self.name = name or self.__class__.__name__
-        self.description = description or self.__doc__ or ""
-        self.pydantic_input_model = pydantic_input_model
-        if isinstance(tool_definition, str):
-            self._tool_definition = json.loads(tool_definition)
-        elif tool_definition is not None:
-            self._tool_definition = tool_definition
-        else:
-            self._tool_definition = None  # Generate later
-    # Listing 3.28
-    @property
-    def tool_definition(self) -> Dict[str, Any]:
-        if self._tool_definition is None:  #A
-            self._tool_definition = self._generate_definition()  #A
-        return self._tool_definition
-    def _generate_definition(self) -> Dict[str, Any]:
-        if self.pydantic_input_model:
-            try:
-                from pydantic import BaseModel
-                if issubclass(self.pydantic_input_model, BaseModel):
-                    parameters = self.pydantic_input_model.model_json_schema()
-                    return format_tool_definition(
-                        self.name, self.description, parameters
-                    )
-            except ImportError:
-                pass
-        # Subclasses should override this method or provide tool_definition
-        raise NotImplementedError(
-            f"{self.__class__.__name__} must either provide a tool_definition, "
-            f"pydantic_input_model, or override _generate_definition()"
-        )
-# Listing 3.29
-class FunctionTool(BaseTool):
-    def __init__(
-        self,
-        func: Callable,
-        name: str = None,
-        description: str = None,
-        tool_definition: Union[Dict[str, Any], str] = None
-    ):
-        self.func = func
-        self.pydantic_input_model = self._detect_pydantic_model(func)  #A
-        name = name or func.__name__  #B
-        description = description or (func.__doc__ or "").strip()  #B
-        super().__init__(
-            name=name,
-            description=description,
-            tool_definition=tool_definition,
-            pydantic_input_model=self.pydantic_input_model
-        )
-    # Listing 3.30
-    async def execute(self, **kwargs) -> Any:
-        if self.pydantic_input_model:
-            args = (self.pydantic_input_model.model_validate(kwargs),)
-            call_kwargs = {}
-        else:
-            args = ()
-            call_kwargs = kwargs
-        if inspect.iscoroutinefunction(self.func):
-            return await self.func(*args, **call_kwargs)
-        else:
-            loop = asyncio.get_event_loop()
-            return await loop.run_in_executor(
-                None, lambda: self.func(*args, **call_kwargs)
-            )
-    # Listing 3.31
-    def _generate_definition(self) -> Dict[str, Any]:
-        if self.pydantic_input_model:
-            return super()._generate_definition()
-        parameters = function_to_input_schema(self.func)
-        return format_tool_definition(self.name, self.description, parameters)
-    # Listing 3.32
-    def _detect_pydantic_model(self, func: Callable) -> Optional[Type]:
-        try:
-            from pydantic import BaseModel
-            sig = inspect.signature(func)
-            params = list(sig.parameters.values())
-            if len(params) == 1 and params[0].annotation != inspect._empty:
-                param_type = params[0].annotation
-                if isinstance(param_type, type) and issubclass(param_type, BaseModel):
-                    return param_type
-        except ImportError:
-            pass
-        return None
-if __name__ == "__main__":
-    def search_web(query: str) -> str:
-        """Search for information on the web"""
-        # Actual search logic
-        return f"Search results: {query}"
-    search_tool = FunctionTool(search_web)
-    print(type(search_tool))
-    print(search_tool.description)
-    print(search_tool.tool_definition)

chapter_03_tool_use/ch3_07_tool_decorator.py DELETED Viewed

@@ -1,45 +0,0 @@
-from typing import Callable, Union, Dict, Any
-from ch3_06_tool_abstraction import FunctionTool
-def tool(
-    func: Callable = None,
-    *,
-    name: str = None,
-    description: str = None,
-    tool_definition: Union[Dict[str, Any], str] = None
-) -> Union[Callable, FunctionTool]:
-    def decorator(f: Callable) -> FunctionTool:
-        return FunctionTool(
-            func=f,
-            name=name,
-            description=description,
-            tool_definition=tool_definition
-        )
-    # Handle both @tool and @tool() usage
-    if func is not None:
-        return decorator(func)
-    return decorator
-if __name__ == "__main__":
-    def search_web(query: str) -> str:
-        """Search for information on the web"""
-        return f"{query}_result"
-    search_tool_v1 = FunctionTool(search_web)
-    @tool
-    def search_web(query: str) -> str:
-        """Search for information on the web"""
-        return f"{query}_result"
-    @tool(name="internet_search",
-        description="Query the internet for latest information")
-    def search_web_custom(query: str) -> str:
-        """Search for information on the web"""
-        return f"{query}_result"
-    print(search_tool_v1.tool_definition)
-    print(search_web.tool_definition)
-    print(search_web_custom.tool_definition)

chapter_03_tool_use/ch3_08_mcp_tavily_custom.py DELETED Viewed

@@ -1,37 +0,0 @@
-import os
-from tavily import TavilyClient
-from dotenv import load_dotenv
-from mcp.server.fastmcp import FastMCP
-load_dotenv()
-tavily_client = TavilyClient(os.getenv("TAVILY_API_KEY"))
-mcp = FastMCP("tavily-search")
-@mcp.tool()
-def search_web(query: str, max_results: int = 5) -> str:
-    """
-    Search the web using Tavily API.
-    Args:
-        query: Search query string
-        max_results: Maximum number of results to return (default: 5)
-    Returns:
-        Search results as formatted string
-    """
-    try:
-        response = tavily_client.search(
-            query,
-            max_results=max_results,
-            chunks_per_source=2
-        )
-        return "\n".join(response.get("results"))
-    except Exception as e:
-        return f"Error searching web: {str(e)}"
-if __name__ == "__main__":
-    mcp.run(transport='stdio')

chapter_04_basic_agent/01_/bsolve_kipchoge_problem.py DELETED Viewed

@@ -1,30 +0,0 @@
-import asyncio
-from dotenv import load_dotenv
-load_dotenv()
-from scratch_agents.tools import calculator, search_web, search_wikipedia, get_wikipedia_page
-from scratch_agents.models.openai import OpenAILlm
-from scratch_agents.agents.tool_calling_agent_ch4_base import ToolCallingAgent
-gaia_system_prompt = """
-You are a general AI assistant.
-I will ask you a question.
-YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
-If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
-If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
-If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
-"""
-kipchoge_problem = """
-If Eliud Kipchoge could maintain his record-making marathon pace indefinitely, how many thousand hours would it take him to run the distance between the Earth and the Moon at its closest approach? Please use the minimum perigee value on the Wikipedia page for the Moon when carrying out your calculation. Round your result to the nearest integer.
-"""
-async def main():
-    tools = [search_web, calculator, search_wikipedia, get_wikipedia_page]
-    model = OpenAILlm(model="gpt-5")
-    agent = ToolCallingAgent(model=model, tools=tools, instruction=gaia_system_prompt, max_steps=20)
-    result, context = await agent.run(kipchoge_problem, return_context=True)
-    print(result)
-if __name__ == "__main__":
-    asyncio.run(main())

chapter_04_basic_agent/02_agent_structured_output.py DELETED Viewed

@@ -1,33 +0,0 @@
-import asyncio
-from typing import Optional, Literal, List
-from pydantic import BaseModel, Field
-from dotenv import load_dotenv
-from scratch_agents.agents.tool_calling_agent_ch4_structured_output import ToolCallingAgent
-from scratch_agents.models.openai import OpenAILlm
-load_dotenv()
-async def main():
-    # Initialize LLM (ensure OPENAI_API_KEY is set in your environment)
-    llm = OpenAILlm(model="gpt-5-mini")
-    class SentimentAnalysis(BaseModel):
-        sentiment: Literal["positive", "negative", "neutral"]
-        confidence: float
-        key_phrases: List[str]
-    agent = ToolCallingAgent(
-        name="sentiment_analyzer",
-        model=llm,
-        tools=[],  # Could include tools for data retrieval
-        instructions="Analyze the sentiment of the provided text.",
-        output_type=SentimentAnalysis
-    )
-    result = await agent.run("This product exceeded my expectations! Highly recommend.")
-    # result is now a SentimentAnalysis instance with validated fields
-    print(f"Sentiment: {result.sentiment} (confidence: {result.confidence})")
-if __name__ == "__main__":
-    asyncio.run(main())

chapter_04_basic_agent/03_human_in_the_loop.py DELETED Viewed

@@ -1,76 +0,0 @@
-import asyncio
-from scratch_agents.tools import search_web, calculator, search_wikipedia, get_wikipedia_page
-from scratch_agents.models.openai import OpenAILlm
-from scratch_agents.agents.tool_calling_agent_ch4_callback import ToolCallingAgent
-from dotenv import load_dotenv
-load_dotenv()
-gaia_system_prompt = """
-You are a general AI assistant.
-I will ask you a question.
-YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
-If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
-If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
-If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
-"""
-kipchoge_problem = """
-If Eliud Kipchoge could maintain his record-making marathon pace indefinitely, how many thousand hours would it take him to run the distance between the Earth and the Moon at its closest approach? Please use the minimum perigee value on the Wikipedia page for the Moon when carrying out your calculation. Round your result to the nearest integer.
-"""
-def basic_approval_callback(context, tool_call):
-    tool_name = tool_call.name
-    print(f"\n🔧 Tool Execution Request")
-    print(f"Tool: {tool_call.name}")
-    print(f"Arguments: {tool_call.arguments}")
-    response = input("Execute this tool? (y/n): ").lower().strip()
-    if response == 'y':
-        print("✅ Approved. Executing...\n")
-        return None
-    else:
-        print("❌ Denied. Skipping execution.\n")
-        return f"User denied execution of {tool_name}"
-def session_aware_approval_callback(context, tool_call):
-    tool_name = tool_call.name
-    # Check if tool is already marked as safe in this session
-    safe_tools = context.state.get('safe_tools', [])
-    if tool_name in safe_tools:
-        print(f"✓ Auto-executing {tool_name} (marked as safe)")
-        return None
-    response = input("Execute this tool? (y to run once, ya to allow for session, n to skip): ").lower().strip()
-    if response == 'y':
-        print("✅ Approved. Executing...\n")
-        return None
-    elif response == 'ya':
-        if 'safe_tools' not in context.state:
-            context.state['safe_tools'] = []
-        context.state['safe_tools'].append(tool_name)
-        print(f"✅ {tool_name} marked as safe for this session. Executing...\n")
-        return None
-    else:
-        print("❌ Denied. Skipping execution.\n")
-        return f"User denied execution of {tool_name}"
-async def main():
-    tools = [search_web, calculator, search_wikipedia, get_wikipedia_page]
-    model = OpenAILlm(model="gpt-5-mini")
-    agent = ToolCallingAgent(
-        name="callback_agent",
-        model=model,
-        tools=tools,
-        instructions=gaia_system_prompt,
-        before_tool_callbacks=[basic_approval_callback]
-    )
-    result = await agent.run(kipchoge_problem)
-    print(result)
-if __name__ == "__main__":
-    asyncio.run(main())

chapter_06_memory/01_session_agent.py DELETED Viewed

@@ -1,56 +0,0 @@
-import asyncio
-from scratch_agents.models.openai import OpenAILlm
-from scratch_agents.agents.tool_calling_agent_ch6 import ToolCallingAgent
-from scratch_agents.tools import calculator, search_web
-from scratch_agents.sessions.in_memory_session_manager import InMemorySessionManager
-from dotenv import load_dotenv
-import os
-load_dotenv()
-async def main():
-    """Demonstrate session memory functionality"""
-    user_id = "test_123"
-    # Initialize components
-    model = OpenAILlm(model='gpt-5-mini')
-    tools = [calculator, search_web]
-    # Create agent with session manager
-    agent = ToolCallingAgent(
-        name="session_assistant",
-        model=model,
-        instructions="You are a helpful assistant that remembers our conversations.",
-        tools=tools,
-        session_manager=InMemorySessionManager()
-    )
-    # First interaction - session 1
-    print("=== First Interaction (Session 1) ===")
-    answer1 = await agent.run(
-        "My name is Alice and I'm working on Project Alpha. What's 123 * 456?",
-        session_id="session_1",
-        user_id=user_id
-    )
-    print(f"Assistant: {answer1}\n")
-    # Second interaction - continue session 1
-    print("=== Second Interaction (Session 1) ===")
-    answer2 = await agent.run(
-        "What project am I working on and what was the result of the multiplication I asked about?",
-        session_id="session_1",
-        user_id=user_id
-    )
-    print(f"Assistant: {answer2}\n")
-    # New session - session 2
-    print("=== New Session (Session 2) ===")
-    answer3 = await agent.run(
-        "Do you remember my name?",
-        session_id="session_2",
-        user_id=user_id
-    )
-    print(f"Assistant: {answer3}\n")
-if __name__ == "__main__":
-    asyncio.run(main())

chapter_06_memory/02_core_memory_strategy.py DELETED Viewed

@@ -1,35 +0,0 @@
-import asyncio
-from scratch_agents.agents.tool_calling_agent_ch6 import ToolCallingAgent
-from scratch_agents.models.openai import OpenAILlm
-from scratch_agents.sessions.in_memory_session_manager import InMemorySessionManager
-from scratch_agents.memory.core_memory_strategy import CoreMemoryStrategy
-from dotenv import load_dotenv
-import os
-load_dotenv()
-async def test_core_memory_loading():
-    user_id = "test_123"
-    session_id = "test_session"
-    session_manager = InMemorySessionManager()
-    session = session_manager.get_or_create_session(session_id, user_id)
-    session.core_memory["user"] = "User's name is Alice"
-    agent = ToolCallingAgent(
-        name="memory_agent",
-        model=OpenAILlm(model="gpt-5-mini"),
-        instructions="You are a helpful assistant",
-        session_manager=session_manager,
-        before_llm_callbacks=[CoreMemoryStrategy()]
-    )
-    response = await agent.run(
-        "What's my name?",
-        session_id=session_id,
-        user_id=user_id
-    )
-    print(response)
-asyncio.run(test_core_memory_loading())

chapter_06_memory/03_core_memory_update.py DELETED Viewed

@@ -1,32 +0,0 @@
-from scratch_agents.agents.tool_calling_agent_ch6 import ToolCallingAgent
-from scratch_agents.models.openai import OpenAILlm
-from scratch_agents.sessions.in_memory_session_manager import InMemorySessionManager
-from scratch_agents.tools.core_memory_upsert import core_memory_upsert
-import asyncio
-from dotenv import load_dotenv
-import os
-load_dotenv()
-user_id = "test_123"
-session_id = "test_session"
-async def test_automatic_memory_update():
-    agent = ToolCallingAgent(
-        name="learning_agent",
-        model=OpenAILlm(model="gpt-5-mini"),
-        instructions="Remember important user info with core_memory_upsert",
-        tools=[core_memory_upsert],
-        session_manager=InMemorySessionManager(),
-    )
-    await agent.run(
-        "Hi! My name is Alice and I work as a data scientist.",
-        session_id=session_id,
-        user_id=user_id
-    )
-    session = agent.session_manager.get_session(session_id)
-    print(session.core_memory['user'])
-asyncio.run(test_automatic_memory_update())

chapter_06_memory/04_sliding_window.py DELETED Viewed

@@ -1,43 +0,0 @@
-import asyncio
-from scratch_agents.agents.tool_calling_agent_ch6 import ToolCallingAgent
-from scratch_agents.models.openai import OpenAILlm
-from scratch_agents.sessions.in_memory_session_manager import InMemorySessionManager
-from scratch_agents.memory.sliding_window_strategy import SlidingWindowStrategy
-from scratch_agents.types.contents import Message
-from scratch_agents.types.events import Event
-from dotenv import load_dotenv
-import os
-load_dotenv()
-user_id = "test_123"
-session_id = "test_session"
-async def test_sliding_window():
-    session_manager = InMemorySessionManager()
-    session = session_manager.create_session(session_id, user_id)
-    session.events.append(Event(
-        execution_id="exec1",
-        author="user",
-        content=[Message(role="user", content="My name is Alice"),
-        Message(role="user", content="I live in Korea")]
-    ))
-    agent = ToolCallingAgent(
-        name="window_agent",
-        model=OpenAILlm(model="gpt-5-mini"),
-        instructions="You are a helpful assistant",
-        session_manager=session_manager,
-        before_llm_callbacks=[SlidingWindowStrategy(max_messages=2)]
-    )
-    response = await agent.run(
-        "What's my name?",
-        session_id=session_id,
-        user_id=user_id
-    )
-    print(response)
-asyncio.run(test_sliding_window())

chapter_06_memory/05_summarization.py DELETED Viewed

@@ -1,64 +0,0 @@
-from scratch_agents.agents.tool_calling_agent_ch6 import ToolCallingAgent
-from scratch_agents.models.openai import OpenAILlm
-from scratch_agents.sessions.in_memory_session_manager import InMemorySessionManager
-from scratch_agents.memory.summarization_strategy import SummarizationStrategy
-from scratch_agents.types.contents import Message
-from scratch_agents.types.events import Event
-import asyncio
-from dotenv import load_dotenv
-import os
-load_dotenv()
-user_id = "test_123"
-session_id = "test_session"
-async def test_summarization_strategy():
-    """Demonstrate summarization strategy in action"""
-    model = OpenAILlm(model="gpt-5-mini")
-    session_manager = InMemorySessionManager()
-    session = session_manager.create_session(session_id, user_id)
-    messages = [
-        Message(role="user", content="Hi, I'm Bob"),
-        Message(role="assistant", content="Nice to meet you, Bob!"),
-        Message(role="user", content="I work as a teacher"),
-        Message(role="assistant", content="Wow! What subject?"),
-        Message(role="user", content="I teach math"),
-        Message(role="assistant", content="Math is important!"),
-        Message(role="user", content="I have 30 students"),
-        Message(role="assistant", content="That's a good class size"),
-    ]
-    for msg in messages:
-        event = Event(
-            execution_id="test_exec",
-            author="test",
-            content=[msg]
-        )
-        session.events.append(event)
-    agent = ToolCallingAgent(
-        name="summary_agent",
-        model=model,
-        instructions="You are a helpful assistant",
-        session_manager=session_manager,
-        before_llm_callbacks=[
-            SummarizationStrategy(model=model, trigger_count=8, keep_recent=2)
-        ]
-    )
-    response = await agent.run(
-        "What subject do I teach?",
-        session_id=session_id,
-        user_id=user_id
-    )
-    if "conversation_summary" in session.state:
-        print(f"Summary: {session.state['conversation_summary']}")
-        print(f"Summary Index: {session.state['last_summarized_index']}")
-    print(f"\nAgent response: {response}")
-asyncio.run(test_summarization_strategy())

chapter_06_memory/06_conversation_search.py DELETED Viewed

@@ -1,59 +0,0 @@
-from scratch_agents.agents.tool_calling_agent_ch6 import ToolCallingAgent
-from scratch_agents.models.openai import OpenAILlm
-from scratch_agents.sessions.in_memory_session_manager import InMemorySessionManager
-from scratch_agents.memory.sliding_window_strategy import SlidingWindowStrategy
-from scratch_agents.tools.conversation_search import conversation_search
-from scratch_agents.types.contents import Message
-from scratch_agents.types.events import Event
-import asyncio
-from dotenv import load_dotenv
-import os
-load_dotenv()
-user_id = "test_123"
-session_id = "test_session"
-async def test_search_with_sliding_window():
-    """Demonstrate search recovering information lost to sliding window"""
-    model = OpenAILlm(model="gpt-5-mini")
-    session_manager = InMemorySessionManager()
-    session = session_manager.create_session(session_id, user_id)
-    conversation_history = [
-        ("user", "My golden retriever puppy is named Max."),
-        ("assistant", "Max is a lovely name for a golden retriever!"),
-        ("user", "He loves playing fetch in the park."),
-        ("assistant", "That's wonderful! Golden retrievers are great at fetch."),
-    ]
-    for role, content in conversation_history:
-        event = Event(
-            execution_id="pre_loaded",
-            author=role,
-            content=[Message(role=role, content=content)]
-        )
-        session.events.append(event)
-    agent = ToolCallingAgent(
-        name="search_agent",
-        model=model,
-        instructions="""You are a helpful assistant. When asked about
-        information from earlier in our conversation, use the
-        conversation_search tool to find it.""",
-        tools=[conversation_search],
-        session_manager=session_manager,
-        before_llm_callbacks=[
-            SlidingWindowStrategy(max_messages=2)
-        ]
-    )
-    response = await agent.run(
-        "What was my puppy's name?",
-        session_id=session_id,
-        user_id=user_id
-    )
-    print(f"Agent: {response}\n")
-asyncio.run(test_search_with_sliding_window())

chapter_06_memory/07_task_long_term.py DELETED Viewed

@@ -1,76 +0,0 @@
-from scratch_agents.agents.execution_context_ch6 import ExecutionContext
-from scratch_agents.agents.tool_calling_agent_ch6 import ToolCallingAgent
-from scratch_agents.models.openai import OpenAILlm
-from scratch_agents.sessions.task_cross_session_manager import TaskCrossSessionManager
-from scratch_agents.sessions.in_memory_session_manager import InMemorySessionManager
-from scratch_agents.tools.base_tool import BaseTool
-from scratch_agents.models.llm_request import LlmRequest
-from scratch_agents.tools.search_web import search_web
-import asyncio
-from dotenv import load_dotenv
-import os
-load_dotenv()
-user_id = "test_123"
-async def long_term_memory_save_callback(context:ExecutionContext):
-    cross_session_manager = context.cross_session_manager
-    session = context.session
-    execution_id = context.execution_id
-    await cross_session_manager.process_session(session=session, execution_id=execution_id)
-class MemorySearchTool(BaseTool):
-    async def execute(self, context, **kwargs):
-        return None
-    async def process_llm_request(self, request: LlmRequest, context: ExecutionContext):
-        user_input = context.user_input
-        user_id = context.session.user_id
-        results = await context.cross_session_manager.search(user_input, user_id)
-        if results:
-            request.add_instructions(f"Use the following task memory to answer the user's question: {results}")
-async def test_long_term_memory_save():
-    """Test long-term memory saving with a meaningful conversation"""
-    session_manager = InMemorySessionManager()
-    model = OpenAILlm(model="gpt-5-mini")
-    cross_session_manager = TaskCrossSessionManager(model=model)
-    memory_search_tool = MemorySearchTool()
-    agent = ToolCallingAgent(
-        name="memory_agent",
-        model=model,
-        instructions="You are a helpful assistant. Have a natural conversation and learn about the user's task. IMPORTANT: When the user asks about a specific term or technology, use the search results to provide a comprehensive answer. Do NOT ask for clarification if you find relevant search results. Only ask for clarification if search returns no results or the query is truly impossible to understand. If multiple meanings exist, provide information about the most common or relevant one based on the search results.",
-        tools=[search_web, memory_search_tool],
-        session_manager=session_manager,
-        cross_session_manager=cross_session_manager,
-        after_run_callbacks=[long_term_memory_save_callback]
-    )
-    print("=== Testing Long-term Memory Save ===\n")
-    test_conversations = [
-       "What is Mem0?",
-       "How does mem0 work?"
-    ]
-    for i, message in enumerate(test_conversations, 1):
-        print(f"User: {message}")
-        session_id = f"test_session_{i}"
-        response = await agent.run(
-            message,
-            session_id=session_id,
-            user_id=user_id
-        )
-        print(response)
-        # print(cross_session_manager.collection.peek())
-if __name__ == "__main__":
-    asyncio.run(test_long_term_memory_save())

chapter_06_memory/08_user_long_term.py DELETED Viewed

@@ -1,98 +0,0 @@
-from scratch_agents.agents.execution_context_ch6 import ExecutionContext
-from scratch_agents.agents.tool_calling_agent_ch6 import ToolCallingAgent
-from scratch_agents.models.openai import OpenAILlm
-from scratch_agents.sessions.user_cross_session_manager import UserCrossSessionManager
-from scratch_agents.sessions.in_memory_session_manager import InMemorySessionManager
-from scratch_agents.tools.base_tool import BaseTool
-from scratch_agents.models.llm_request import LlmRequest
-import asyncio
-from dotenv import load_dotenv
-import os
-load_dotenv()
-user_id = "test_user_123"
-async def user_memory_save_callback(context: ExecutionContext):
-    """Callback to save user memories after each interaction"""
-    cross_session_manager = context.cross_session_manager
-    session = context.session
-    execution_id = context.execution_id
-    await cross_session_manager.process_session(session=session, execution_id=execution_id)
-class UserMemorySearchTool(BaseTool):
-    """Tool to search and retrieve user memories"""
-    async def execute(self, context, **kwargs):
-        return None
-    async def process_llm_request(self, request: LlmRequest, context: ExecutionContext):
-        user_id = context.session.user_id
-        # Get all existing memories for the user
-        all_memories = await context.cross_session_manager.find_existing([], user_id)
-        if all_memories:
-            memory_contents = [mem['content'] for mem in all_memories]
-            memory_text = "\n".join(f"- {content}" for content in memory_contents)
-            request.add_instructions(f"You have the following memories about this user:\n{memory_text}\n\nUse these memories to personalize your responses.")
-async def test_user_long_term_memory():
-    """Test user long-term memory with location updates"""
-    session_manager = InMemorySessionManager()
-    model = OpenAILlm(model="gpt-4o-mini")
-    cross_session_manager = UserCrossSessionManager(model=model)
-    memory_search_tool = UserMemorySearchTool()
-    agent = ToolCallingAgent(
-        name="user_memory_agent",
-        model=model,
-        instructions="You are a helpful assistant that remembers information about the user. Have natural conversations and acknowledge what you know about the user when relevant.",
-        tools=[memory_search_tool],
-        session_manager=session_manager,
-        cross_session_manager=cross_session_manager,
-        after_run_callbacks=[user_memory_save_callback]
-    )
-    print("=== Testing User Long-term Memory ===\n")
-    # Test conversation about location changes
-    test_conversations = [
-        "Hi! I'm living in New York City. I love the energy here!",
-        "Actually, I just moved to Los Angeles last month. The weather is so much better here.",
-        "What do you remember about where I live?"
-    ]
-    for i, message in enumerate(test_conversations, 1):
-        print(f"\n--- Conversation {i} ---")
-        print(f"User: {message}")
-        session_id = f"user_session_{i}"
-        response = await agent.run(
-            message,
-            session_id=session_id,
-            user_id=user_id
-        )
-        print(f"Assistant: {response}")
-        # Show current memories in the database with timestamps
-        print("\n=> Current User Memories:")
-        memories = await cross_session_manager.find_existing([], user_id)
-        if memories:
-            for mem in memories:
-                created = mem.get('created_at', 'Unknown')[:19] if mem.get('created_at') != 'Unknown' else 'Unknown'
-                updated = mem.get('updated_at', 'Unknown')[:19] if mem.get('updated_at') != 'Unknown' else 'Unknown'
-                print(f"  - {mem['content']}")
-                if created != updated:
-                    print(f"    (Created: {created}, Updated: {updated})")
-                else:
-                    print(f"    (Created: {created})")
-        else:
-            print("  (No memories yet)")
-        # Small delay to see the progression
-        await asyncio.sleep(1)
-if __name__ == "__main__":
-    asyncio.run(test_user_long_term_memory())

my_code.ipynb ADDED Viewed

	@@ -0,0 +1,926 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bd396f3a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from dotenv import load_dotenv, find_dotenv\n",
+    "\n",
+    "load_dotenv(find_dotenv())\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bdc55e33",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ChatCompletionMessage(content='The capital of India is New Delhi.', refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None)\n",
+      "The capital of India is New Delhi.\n"
+     ]
+    }
+   ],
+   "source": [
+    "from openai import OpenAI\n",
+    "client = OpenAI()\n",
+    "\n",
+    "response = client.chat.completions.create(\n",
+    "    model = 'gpt-5-mini',\n",
+    "    messages = [\n",
+    "        {'role': 'system', 'content' : 'You are a helpful assistant !'},\n",
+    "        {'role': 'user', 'content': 'What is the capital of India ?'}\n",
+    "    ]\n",
+    ")\n",
+    "\n",
+    "print(response.choices[0].message)\n",
+    "print(response.choices[0].message.content)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "396e8826",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Hello! How can I help you today?\n"
+     ]
+    }
+   ],
+   "source": [
+    "## with this we can unify all providers\n",
+    "\n",
+    "from litellm import completion\n",
+    "response = completion(\n",
+    "    model = 'gpt-5-mini',\n",
+    "    messages = [{'role' : 'user', 'content' : 'Hello !' }]\n",
+    ")\n",
+    "\n",
+    "print(response.choices[0].message.content)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cb505eb4",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Nice to meet you, Akhil — how can I help you today?\n",
+      "I don't know — I don't have access to personal details unless you tell me. What would you like me to call you in this chat? (I can use that name for this conversation, but I can't remember it across separate sessions unless you set it in your app/profile.)\n"
+     ]
+    }
+   ],
+   "source": [
+    "from litellm import completion\n",
+    "\n",
+    "response1 = completion(\n",
+    "        model = 'gpt-5-mini',\n",
+    "    messages = [{'role' : 'user', 'content':'My name is Akhil'}]\n",
+    ")\n",
+    "\n",
+    "response2 = completion(\n",
+    "        model = 'gpt-5-mini',\n",
+    "    messages = [{'role' : 'user', 'content':'what\\'s my name'}]\n",
+    ")\n",
+    "\n",
+    "print(response1.choices[0].message.content)\n",
+    "print(response2.choices[0].message.content)\n",
+    "\n",
+    "### This proves that each LLM call is independent. Our Model doesn't have memory"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "cd3ade31",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Great — love the ambition, Akhil. If you want to be “the future of AI,” I can help you get there. How would you like me to help right now? (Pick one: roadmap, project ideas, resume/LinkedIn copy, interview prep, or a 12‑month actionable plan.)\n",
+      "\n",
+      "Below are a few immediately useful things you can use or ask me to expand.\n",
+      "\n",
+      "Quick elevator pitch / LinkedIn headline\n",
+      "- Headline: Akhil — Building safe, scalable AI that augments human creativity and solves real-world problems\n",
+      "- 1‑line pitch: “I build trustworthy AI systems that turn complex data into products people love — with a focus on safety, scalability, and real-world impact.”\n",
+      "\n",
+      "High‑level skills to prioritize\n",
+      "- Foundations: probability, linear algebra, optimization\n",
+      "- Core ML: supervised learning, neural networks, transfer learning, transformers\n",
+      "- Systems & infra: PyTorch/TensorFlow, Docker, Kubernetes, model serving, MLOps\n",
+      "- Specialized: LLMs, RL, generative models, multimodal models (vision+language)\n",
+      "- Soft skills: product sense, communication, writing and presenting research\n",
+      "- Ethics & safety: alignment concepts, bias mitigation, robust evaluation\n",
+      "\n",
+      "3 concrete projects (increasing complexity)\n",
+      "1. End‑to‑end ML app: simple image classifier with deployment (Flask/FastAPI + Docker + test pipeline)\n",
+      "2. LLM product prototype: retrieval-augmented chatbot for a specific domain (docs → vector DB → RAG)\n",
+      "3. Research/engineering hybrid: fine-tune or distill a model for efficiency and publish a blog post + code on GitHub\n",
+      "\n",
+      "Practical 12‑month roadmap (high level)\n",
+      "- Months 0–2: Fill gaps — math refresher, PyTorch, small projects, GitHub portfolio\n",
+      "- Months 3–5: Build and deploy 2 production prototypes (one LLM-based), publish writeups\n",
+      "- Months 6–9: Contribute to OSS or collaborate on a research project; attend conferences/meetups\n",
+      "- Months 10–12: Target internships/roles, refine portfolio, prepare interviews, publish a substantial case study or replication\n",
+      "\n",
+      "Quick resources\n",
+      "- Fast theory/math: “Mathematics for Machine Learning” + 3Blue1Brown playlists\n",
+      "- Practical ML: Deep Learning Book (selected chapters), PyTorch docs, Hugging Face course\n",
+      "- MLOps/RAG: LangChain/HF tutorials, Vector DB docs (Pinecone/Weaviate)\n",
+      "\n",
+      "If you want, I can:\n",
+      "- Create a personalized 6‑ or 12‑month plan based on your background and time availability\n",
+      "- Draft a LinkedIn summary, resume bullets, or a cover letter\n",
+      "- Design a project roadmap with milestones and tech stack\n",
+      "Tell me which and give me your experience level (student / early-career / senior / founder) and how many hours per week you can commit.\n",
+      "Short answer: you’re Akhil — the person who just told me “I am going to be the Future of AI.” Beyond that, only you can fully answer “who am I,” but I can help you shape a clear, useful version of that identity for career, confidence, and action.\n",
+      "\n",
+      "Pick one of these and I’ll build it for you:\n",
+      "- A crisp personal identity/mission statement (1–2 lines)\n",
+      "- A short LinkedIn “About” summary\n",
+      "- A 12‑month plan to become a leader in AI\n",
+      "- A set of interview/resume bullets matched to your level\n",
+      "\n",
+      "If you want to explore it yourself first, answer 5 quick prompts (one sentence each):\n",
+      "1. What technical skills do you already have (languages, frameworks, papers/projects)?\n",
+      "2. What do you enjoy doing most in AI (research, building products, deploying models, safety/ethics)?\n",
+      "3. What impact do you want to have (industry, research, social good, startups)?\n",
+      "4. What are your top 2 strengths and top 1 weakness you want to fix?\n",
+      "5. How many hours/week can you commit to learning or working toward this goal?\n",
+      "\n",
+      "Or, if you want an immediate example identity statement based on your earlier claim:\n",
+      "- “I’m Akhil — an aspiring AI leader building safe, scalable systems that augment human creativity. My mission is to bridge cutting‑edge research and real‑world impact.”\n",
+      "\n",
+      "Tell me which option you want or answer the 5 prompts and I’ll draft something tailored.\n"
+     ]
+    }
+   ],
+   "source": [
+    "### Managing conversation history\n",
+    "\n",
+    "\n",
+    "from litellm import completion\n",
+    "\n",
+    "## Maintain a messages object\n",
+    "messages = []\n",
+    "\n",
+    "## append your message/conversation\n",
+    "messages.append({'role':'user', 'content':'My name is Akhil and I am going to be the Future of AI'})\n",
+    "response3 = completion(model = 'gpt-5-mini', messages = messages)\n",
+    "\n",
+    "print(response3.choices[0].message.content)\n",
+    "\n",
+    "## append the message from assistant\n",
+    "messages.append({'role':'assistant', 'content':response3.choices[0].message.content})\n",
+    "\n",
+    "## write a new message\n",
+    "messages.append({'role':'user', 'content':'who am i'})\n",
+    "response4 = completion(model = 'gpt-5-mini', messages = messages)\n",
+    "\n",
+    "print(response4.choices[0].message.content)\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e0868cf6",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{\"name\":\"Akhil\",\"email\":\"akhil.masters21@gmail.com\",\"phone\":\"9550303420\"}\n"
+     ]
+    }
+   ],
+   "source": [
+    "### Structured output\n",
+    "\n",
+    "from pydantic import BaseModel\n",
+    "from litellm import completion\n",
+    "\n",
+    "class ExtractedInfo(BaseModel):\n",
+    "    name  : str\n",
+    "    email : str\n",
+    "    phone : str | None = None\n",
+    "\n",
+    "response = completion(\n",
+    "    model=\"gpt-5-mini\",\n",
+    "    messages=[{\n",
+    "        \"role\": \"user\", \n",
+    "        \"content\": \"My name is Akhil, my email is akhil.masters21@gmail.com, and my phone is 9550303420.\"\n",
+    "    }],\n",
+    "    response_format=ExtractedInfo\n",
+    ")\n",
+    "\n",
+    "print(response.choices[0].message.content)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "03d48814",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Q: What is 2 + 2?\n",
+      "A: 2 + 2 = 4.\n",
+      "\n",
+      "Q: What is the capital of Japan?\n",
+      "A: The capital of Japan is Tokyo.\n",
+      "\n",
+      "Q: Who wrote Romeo and Juliet?\n",
+      "A: Romeo and Juliet was written by William Shakespeare. It was likely written and first performed in the mid-1590s (published in 1597).\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "### Asynchronus calls\n",
+    "\n",
+    "import asyncio\n",
+    "from litellm import acompletion\n",
+    "async def get_response(prompt: str) -> str:\n",
+    "    response = await acompletion(\n",
+    "        model = 'gpt-5-mini',\n",
+    "        messages=[{\"role\": \"user\", \"content\": prompt}]\n",
+    "    )\n",
+    "    return response.choices[0].message.content\n",
+    "    \n",
+    "prompts = [\n",
+    "    \"What is 2 + 2?\",\n",
+    "    \"What is the capital of Japan?\",\n",
+    "    \"Who wrote Romeo and Juliet?\"\n",
+    "]\n",
+    "\n",
+    "### here \n",
+    "## tasks = [get_response(What is 2 + 2?), get_response(What is the capital of Japan?)] \n",
+    "## doesnt run the function, it just creates a coroutine object. Thats the difference in async.\n",
+    "## functions are called in gather step\n",
+    "\n",
+    "tasks = [get_response(p) for p in prompts]\n",
+    "results = await asyncio.gather(*tasks)\n",
+    "\n",
+    "for prompt, result in zip(prompts, results):\n",
+    "      print(f\"Q: {prompt}\")\n",
+    "      print(f\"A: {result}\\n\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "3333de1d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Q: What is 0 + 0?\n",
+      "A: 0\n",
+      "\n",
+      "Because adding zero to zero yields zero.\n",
+      "\n",
+      "Q: What is 1 + 1?\n",
+      "A: 1 + 1 = 2.\n",
+      "\n",
+      "Q: What is 2 + 2?\n",
+      "A: 2 + 2 = 4.\n",
+      "\n",
+      "Q: What is 3 + 3?\n",
+      "A: 3 + 3 = 6.\n",
+      "\n",
+      "Q: What is 4 + 4?\n",
+      "A: 8\n",
+      "\n",
+      "Q: What is 5 + 5?\n",
+      "A: 10\n",
+      "\n",
+      "Q: What is 6 + 6?\n",
+      "A: 12\n",
+      "\n",
+      "Q: What is 7 + 7?\n",
+      "A: 14\n",
+      "\n",
+      "Q: What is 8 + 8?\n",
+      "A: 16\n",
+      "\n",
+      "Q: What is 9 + 9?\n",
+      "A: 18\n",
+      "\n",
+      "Q: What is 10 + 10?\n",
+      "A: 10 + 10 = 20\n",
+      "\n",
+      "Q: What is 11 + 11?\n",
+      "A: 22\n",
+      "\n",
+      "Q: What is 12 + 12?\n",
+      "A: 24\n",
+      "\n",
+      "Q: What is 13 + 13?\n",
+      "A: 26\n",
+      "\n",
+      "Q: What is 14 + 14?\n",
+      "A: 28\n",
+      "\n",
+      "Q: What is 15 + 15?\n",
+      "A: 30\n",
+      "\n",
+      "Q: What is 16 + 16?\n",
+      "A: 32\n",
+      "\n",
+      "Q: What is 17 + 17?\n",
+      "A: 34\n",
+      "\n",
+      "Q: What is 18 + 18?\n",
+      "A: 36\n",
+      "\n",
+      "Q: What is 19 + 19?\n",
+      "A: 38\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "### rate limiting queries\n",
+    "semaphore = asyncio.Semaphore(10)\n",
+    "\n",
+    "async def call_llm(prompt : str) -> str:\n",
+    "    async with semaphore:\n",
+    "        response = await acompletion(\n",
+    "            model=\"gpt-5-mini\",\n",
+    "            messages=[{\"role\": \"user\", \"content\": prompt}],\n",
+    "            num_retries=3  # Automatic retry with exponential backoff\n",
+    "        )\n",
+    "        return response.choices[0].message.content\n",
+    "prompts = [f\"What is {i} + {i}?\" for i in range(20)]\n",
+    "tasks = [call_llm(p) for p in prompts]\n",
+    "results = await asyncio.gather(*tasks, return_exceptions=True)\n",
+    "\n",
+    "\n",
+    "for prompt, result in zip(prompts, results):\n",
+    "      print(f\"Q: {prompt}\")\n",
+    "      print(f\"A: {result}\\n\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "1caef766",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Generating test split: 100%|██████████| 93/93 [00:00<00:00, 1653.78 examples/s]\n",
+      "Generating validation split: 100%|██████████| 53/53 [00:00<00:00, 32022.20 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Number of Level 1 problems: 53\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "## loading the GAIA dataset\n",
+    "\n",
+    "from datasets import load_dataset\n",
+    "level1_problems = load_dataset(\"gaia-benchmark/GAIA\", \"2023_level1\", split=\"validation\")\n",
+    "print(f\"Number of Level 1 problems: {len(level1_problems)}\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "733c211c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'task_id': '8e867cd7-cff9-4e6c-867a-ff5ddc2550be',\n",
+       " 'Question': 'How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.',\n",
+       " 'Level': '1',\n",
+       " 'Final answer': '3',\n",
+       " 'file_name': '',\n",
+       " 'file_path': '',\n",
+       " 'Annotator Metadata': {'Steps': '1. I did a search for Mercedes Sosa\\n2. I went to the Wikipedia page for her\\n3. I scrolled down to \"Studio albums\"\\n4. I counted the ones between 2000 and 2009',\n",
+       "  'Number of steps': '4',\n",
+       "  'How long did this take?': '5 minutes',\n",
+       "  'Tools': '1. web browser\\n2. google search',\n",
+       "  'Number of tools': '2'}}"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "level1_problems[1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "3d5bcb22",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 40/40 [02:23<00:00,  3.58s/it]\n"
+     ]
+    }
+   ],
+   "source": [
+    "## defining a respose for gaia\n",
+    "from pydantic import BaseModel\n",
+    "from tqdm.asyncio import tqdm\n",
+    "gaia_prompt = \"\"\"You are a general AI assistant. I will ask you a question.\n",
+    "First, determine if you can solve this problem with your current capabilities and set \"is_solvable\" accordingly.\n",
+    "If you can solve it, set \"is_solvable\" to true and provide your answer in \"final_answer\".\n",
+    "If you cannot solve it, set \"is_solvable\" to false and explain why in \"unsolvable_reason\".\n",
+    "Your final answer should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.\n",
+    "If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.\n",
+    "If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.\n",
+    "If you are asked for a comma separated list, apply the above rules depending on whether the element is a number or a string.\"\"\"\n",
+    "\n",
+    "class GaiaOutput(BaseModel):\n",
+    "    is_solvable: bool\n",
+    "    unsolvable_reason: str = \"\"\n",
+    "    final_answer: str = \"\"\n",
+    "\n",
+    "PROVIDER_SEMAPHORES = {'openai': asyncio.Semaphore(30), 'anthropic': asyncio.Semaphore(10)}\n",
+    "\n",
+    "def get_provider(model: str) -> str:\n",
+    "    return \"anthropic\" if model.startswith(\"anthropic/\") else \"openai\"\n",
+    "\n",
+    "\n",
+    "async def solve_problem(model: str, question: str) -> GaiaOutput:\n",
+    "    provider = get_provider(model)\n",
+    "    async with PROVIDER_SEMAPHORES[provider]:\n",
+    "        response = await acompletion(\n",
+    "            model = model,\n",
+    "            messages=[\n",
+    "                {\"role\": \"system\", \"content\": gaia_prompt},\n",
+    "                {\"role\": \"user\", \"content\": question},\n",
+    "            ],\n",
+    "            response_format=GaiaOutput,\n",
+    "            num_retries=2,\n",
+    "        )\n",
+    "        finish_reason = response.choices[0].finish_reason\n",
+    "        content = response.choices[0].message.content\n",
+    "        if finish_reason == \"refusal\" or content is None:\n",
+    "            return GaiaOutput(\n",
+    "                is_solvable=False,\n",
+    "                unsolvable_reason=f\"Model refused to answer (finish_reason: {finish_reason})\",\n",
+    "                final_answer=\"\"\n",
+    "            )\n",
+    "        return GaiaOutput.model_validate_json(content)\n",
+    "\n",
+    "def is_correct(prediction: str | None, answer: str) -> bool:\n",
+    "    \"\"\"Check exact match between prediction and answer (case-insensitive).\"\"\"\n",
+    "    if prediction is None:\n",
+    "        return False\n",
+    "    return prediction.strip().lower() == answer.strip().lower()\n",
+    "\n",
+    "async def evaluate_gaia_single(problem: dict, model: str) -> dict:\n",
+    "    \"\"\"Evaluate a single problem-model pair and return result.\"\"\"\n",
+    "    try:\n",
+    "        output = await solve_problem(model, problem[\"Question\"])\n",
+    "        return {\n",
+    "            \"task_id\": problem[\"task_id\"],\n",
+    "            \"model\": model,\n",
+    "            \"correct\": is_correct(output.final_answer, problem[\"Final answer\"]),\n",
+    "            \"is_solvable\": output.is_solvable,\n",
+    "            \"prediction\": output.final_answer,\n",
+    "            \"answer\": problem[\"Final answer\"],\n",
+    "            \"unsolvable_reason\": output.unsolvable_reason,\n",
+    "        }\n",
+    "    except Exception as e:\n",
+    "        return {\n",
+    "            \"task_id\": problem[\"task_id\"],\n",
+    "            \"model\": model,\n",
+    "            \"correct\": False,\n",
+    "            \"is_solvable\": None,\n",
+    "            \"prediction\": None,\n",
+    "            \"answer\": problem[\"Final answer\"],\n",
+    "            \"error\": str(e),\n",
+    "        }\n",
+    "\n",
+    "async def run_experiment(\n",
+    "    problems: list[dict],\n",
+    "    models: list[str],\n",
+    ") -> dict[str, list]:\n",
+    "    \"\"\"Evaluate all models on all problems.\"\"\"\n",
+    "    tasks = [\n",
+    "        evaluate_gaia_single(problem, model)\n",
+    "        for problem in problems\n",
+    "        for model in models\n",
+    "    ]\n",
+    "    \n",
+    "    all_results = await tqdm.gather(*tasks)\n",
+    "    \n",
+    "    # Group results by model\n",
+    "    results = {model: [] for model in models}\n",
+    "    for result in all_results:\n",
+    "        results[result[\"model\"]].append(result)\n",
+    "    \n",
+    "    return results\n",
+    "\n",
+    "MODELS = [\n",
+    "    \"gpt-5\",\n",
+    "    \"gpt-5-mini\"\n",
+    "]\n",
+    " \n",
+    "subset = level1_problems.select(range(20))\n",
+    "results = await run_experiment(subset, MODELS)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "04f60efa",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'gpt-5': [{'task_id': 'e1fc63a2-da7a-432f-be78-7c4a95598703',\n",
+       "   'model': 'gpt-5',\n",
+       "   'correct': True,\n",
+       "   'is_solvable': True,\n",
+       "   'prediction': '17',\n",
+       "   'answer': '17',\n",
+       "   'unsolvable_reason': ''},\n",
+       "  {'task_id': '8e867cd7-cff9-4e6c-867a-ff5ddc2550be',\n",
+       "   'model': 'gpt-5',\n",
+       "   'correct': False,\n",
+       "   'is_solvable': True,\n",
+       "   'prediction': '4',\n",
+       "   'answer': '3',\n",
+       "   'unsolvable_reason': ''},\n",
+       "  {'task_id': 'ec09fa32-d03f-4bf8-84b0-1f16922c3ae4',\n",
+       "   'model': 'gpt-5',\n",
+       "   'correct': True,\n",
+       "   'is_solvable': True,\n",
+       "   'prediction': '3',\n",
+       "   'answer': '3',\n",
+       "   'unsolvable_reason': ''},\n",
+       "  {'task_id': '5d0080cb-90d7-4712-bc33-848150e917d3',\n",
+       "   'model': 'gpt-5',\n",
+       "   'correct': False,\n",
+       "   'is_solvable': False,\n",
+       "   'prediction': '',\n",
+       "   'answer': '0.1777',\n",
+       "   'unsolvable_reason': 'I don’t have access to the specific paper text or its figures and can’t browse to retrieve the exact calculated volume.'},\n",
+       "  {'task_id': 'a1e91b78-d3d8-4675-bb8d-62741b4b68a6',\n",
+       "   'model': 'gpt-5',\n",
+       "   'correct': False,\n",
+       "   'is_solvable': False,\n",
+       "   'prediction': '',\n",
+       "   'answer': '3',\n",
+       "   'unsolvable_reason': 'I can’t access or watch the linked video to determine the number.'},\n",
+       "  {'task_id': '46719c30-f4c3-4cad-be07-d5cb21eee6bb',\n",
+       "   'model': 'gpt-5',\n",
+       "   'correct': False,\n",
+       "   'is_solvable': False,\n",
+       "   'prediction': '',\n",
+       "   'answer': 'Mapping Human Oriented Information to Software Agents for Online Systems Usage',\n",
+       "   'unsolvable_reason': 'I need to look up the 2015 paper’s author list and their publication histories, which I cannot access without web browsing or additional details.'},\n",
+       "  {'task_id': '4b6bb5f7-f634-410e-815d-e673ab7f8632',\n",
+       "   'model': 'gpt-5',\n",
+       "   'correct': True,\n",
+       "   'is_solvable': True,\n",
+       "   'prediction': 'THE CASTLE',\n",
+       "   'answer': 'THE CASTLE',\n",
+       "   'unsolvable_reason': ''},\n",
+       "  {'task_id': 'cffe0e32-c9a6-4c52-9877-78ceb4aaa9fb',\n",
+       "   'model': 'gpt-5',\n",
+       "   'correct': False,\n",
+       "   'is_solvable': False,\n",
+       "   'prediction': '',\n",
+       "   'answer': 'Fred',\n",
+       "   'unsolvable_reason': 'The referenced document with employee profiles and gift assignments is not provided, so the giver who failed to give a gift cannot be determined.'},\n",
+       "  {'task_id': '2d83110e-a098-4ebb-9987-066c06fa42d0',\n",
+       "   'model': 'gpt-5',\n",
+       "   'correct': True,\n",
+       "   'is_solvable': True,\n",
+       "   'prediction': 'right',\n",
+       "   'answer': 'Right',\n",
+       "   'unsolvable_reason': ''},\n",
+       "  {'task_id': '5cfb274c-0207-4aa7-9575-6ac0bd95d9b2',\n",
+       "   'model': 'gpt-5',\n",
+       "   'correct': False,\n",
+       "   'is_solvable': False,\n",
+       "   'prediction': '',\n",
+       "   'answer': 'No',\n",
+       "   'unsolvable_reason': 'Missing the spreadsheet/layout of green plots, so I cannot determine if a non-backtracking loop exists.'},\n",
+       "  {'task_id': '27d5d136-8563-469e-92bf-fd103c28b57c',\n",
+       "   'model': 'gpt-5',\n",
+       "   'correct': True,\n",
+       "   'is_solvable': True,\n",
+       "   'prediction': '(¬A → B) ↔ (A ∨ ¬B)',\n",
+       "   'answer': '(¬A → B) ↔ (A ∨ ¬B)',\n",
+       "   'unsolvable_reason': ''},\n",
+       "  {'task_id': 'dc28cf18-6431-458b-83ef-64b3ce566c10',\n",
+       "   'model': 'gpt-5',\n",
+       "   'correct': True,\n",
+       "   'is_solvable': True,\n",
+       "   'prediction': '2',\n",
+       "   'answer': '2',\n",
+       "   'unsolvable_reason': ''},\n",
+       "  {'task_id': 'b816bfce-3d80-4913-a07d-69b752ce6377',\n",
+       "   'model': 'gpt-5',\n",
+       "   'correct': False,\n",
+       "   'is_solvable': True,\n",
+       "   'prediction': 'cute',\n",
+       "   'answer': 'fluffy',\n",
+       "   'unsolvable_reason': ''},\n",
+       "  {'task_id': '72e110e7-464c-453c-a309-90a95aed6538',\n",
+       "   'model': 'gpt-5',\n",
+       "   'correct': False,\n",
+       "   'is_solvable': False,\n",
+       "   'prediction': '',\n",
+       "   'answer': 'Guatemala',\n",
+       "   'unsolvable_reason': 'I don’t have browsing access to verify the 2020 BASE DDC 633 page and its flags.'},\n",
+       "  {'task_id': '42576abe-0deb-4869-8c63-225c2d75a95a',\n",
+       "   'model': 'gpt-5',\n",
+       "   'correct': True,\n",
+       "   'is_solvable': True,\n",
+       "   'prediction': 'Maktay Mato Apple',\n",
+       "   'answer': 'Maktay mato apple',\n",
+       "   'unsolvable_reason': ''},\n",
+       "  {'task_id': 'b415aba4-4b68-4fc6-9b89-2c812e55a3e1',\n",
+       "   'model': 'gpt-5',\n",
+       "   'correct': False,\n",
+       "   'is_solvable': False,\n",
+       "   'prediction': '',\n",
+       "   'answer': 'diamond',\n",
+       "   'unsolvable_reason': 'I don’t have browsing tools to look up the specific 2012 Scientific Reports conference proceedings article and identify the nano-compound without external access.'},\n",
+       "  {'task_id': 'cca530fc-4052-43b2-b130-b30968d8aa44',\n",
+       "   'model': 'gpt-5',\n",
+       "   'correct': False,\n",
+       "   'is_solvable': False,\n",
+       "   'prediction': '',\n",
+       "   'answer': 'Rd5',\n",
+       "   'unsolvable_reason': 'Cannot view the chessboard image'},\n",
+       "  {'task_id': '935e2cff-ae78-4218-b3f5-115589b19dae',\n",
+       "   'model': 'gpt-5',\n",
+       "   'correct': True,\n",
+       "   'is_solvable': True,\n",
+       "   'prediction': 'research',\n",
+       "   'answer': 'research',\n",
+       "   'unsolvable_reason': ''},\n",
+       "  {'task_id': '4fc2f1ae-8625-45b5-ab34-ad4433bc21f8',\n",
+       "   'model': 'gpt-5',\n",
+       "   'correct': True,\n",
+       "   'is_solvable': True,\n",
+       "   'prediction': 'FunkMonk',\n",
+       "   'answer': 'FunkMonk',\n",
+       "   'unsolvable_reason': ''},\n",
+       "  {'task_id': '5188369a-3bbe-43d8-8b94-11558f909a08',\n",
+       "   'model': 'gpt-5',\n",
+       "   'correct': False,\n",
+       "   'is_solvable': False,\n",
+       "   'prediction': '',\n",
+       "   'answer': 'Annie Levin',\n",
+       "   'unsolvable_reason': 'I need to look up Merriam-Webster’s Word of the Day page for June 27, 2022 to see the quoted writer, but I don’t have browsing access.'}],\n",
+       " 'gpt-5-mini': [{'task_id': 'e1fc63a2-da7a-432f-be78-7c4a95598703',\n",
+       "   'model': 'gpt-5-mini',\n",
+       "   'correct': False,\n",
+       "   'is_solvable': False,\n",
+       "   'prediction': '',\n",
+       "   'answer': '17',\n",
+       "   'unsolvable_reason': 'I cannot access external websites such as Wikipedia to retrieve the exact minimum perigee value required for the calculation.'},\n",
+       "  {'task_id': '8e867cd7-cff9-4e6c-867a-ff5ddc2550be',\n",
+       "   'model': 'gpt-5-mini',\n",
+       "   'correct': False,\n",
+       "   'is_solvable': False,\n",
+       "   'prediction': '',\n",
+       "   'answer': '3',\n",
+       "   'unsolvable_reason': \"I cannot access the 2022 English Wikipedia from here to verify Mercedes Sosa's discography and reliably count studio albums released between 2000 and 2009.\"},\n",
+       "  {'task_id': 'ec09fa32-d03f-4bf8-84b0-1f16922c3ae4',\n",
+       "   'model': 'gpt-5-mini',\n",
+       "   'correct': True,\n",
+       "   'is_solvable': True,\n",
+       "   'prediction': '3',\n",
+       "   'answer': '3',\n",
+       "   'unsolvable_reason': ''},\n",
+       "  {'task_id': '5d0080cb-90d7-4712-bc33-848150e917d3',\n",
+       "   'model': 'gpt-5-mini',\n",
+       "   'correct': False,\n",
+       "   'is_solvable': False,\n",
+       "   'prediction': '',\n",
+       "   'answer': '0.1777',\n",
+       "   'unsolvable_reason': \"I cannot access external documents or the internet and do not have the paper's calculated fish bag volume memorized.\"},\n",
+       "  {'task_id': 'a1e91b78-d3d8-4675-bb8d-62741b4b68a6',\n",
+       "   'model': 'gpt-5-mini',\n",
+       "   'correct': False,\n",
+       "   'is_solvable': False,\n",
+       "   'prediction': '',\n",
+       "   'answer': '3',\n",
+       "   'unsolvable_reason': 'I cannot access or view external video content (YouTube) to count bird species on screen.'},\n",
+       "  {'task_id': '46719c30-f4c3-4cad-be07-d5cb21eee6bb',\n",
+       "   'model': 'gpt-5-mini',\n",
+       "   'correct': False,\n",
+       "   'is_solvable': False,\n",
+       "   'prediction': '',\n",
+       "   'answer': 'Mapping Human Oriented Information to Software Agents for Online Systems Usage',\n",
+       "   'unsolvable_reason': \"I cannot access external databases or the internet to look up the 2015 paper's authors and their publication histories, and I do not have that specific bibliographic information memorized.\"},\n",
+       "  {'task_id': '4b6bb5f7-f634-410e-815d-e673ab7f8632',\n",
+       "   'model': 'gpt-5-mini',\n",
+       "   'correct': False,\n",
+       "   'is_solvable': False,\n",
+       "   'prediction': '',\n",
+       "   'answer': 'THE CASTLE',\n",
+       "   'unsolvable_reason': 'I cannot reliably recall the exact wording of the first scene heading from the official script and I cannot access external resources to check the script to provide the precise, verbatim setting.'},\n",
+       "  {'task_id': 'cffe0e32-c9a6-4c52-9877-78ceb4aaa9fb',\n",
+       "   'model': 'gpt-5-mini',\n",
+       "   'correct': False,\n",
+       "   'is_solvable': False,\n",
+       "   'prediction': '',\n",
+       "   'answer': 'Fred',\n",
+       "   'unsolvable_reason': 'Insufficient information: the document with the employees, their likes, and assignment/gift details was not provided.'},\n",
+       "  {'task_id': '2d83110e-a098-4ebb-9987-066c06fa42d0',\n",
+       "   'model': 'gpt-5-mini',\n",
+       "   'correct': True,\n",
+       "   'is_solvable': True,\n",
+       "   'prediction': 'right',\n",
+       "   'answer': 'Right',\n",
+       "   'unsolvable_reason': ''},\n",
+       "  {'task_id': '5cfb274c-0207-4aa7-9575-6ac0bd95d9b2',\n",
+       "   'model': 'gpt-5-mini',\n",
+       "   'correct': False,\n",
+       "   'is_solvable': False,\n",
+       "   'prediction': '',\n",
+       "   'answer': 'No',\n",
+       "   'unsolvable_reason': 'I cannot access the attached spreadsheet or any images. Paste the grid (use G for Earl plots and . for others) or give coordinates so I can analyze the path.'},\n",
+       "  {'task_id': '27d5d136-8563-469e-92bf-fd103c28b57c',\n",
+       "   'model': 'gpt-5-mini',\n",
+       "   'correct': True,\n",
+       "   'is_solvable': True,\n",
+       "   'prediction': '(¬A → B) ↔ (A ∨ ¬B)',\n",
+       "   'answer': '(¬A → B) ↔ (A ∨ ¬B)',\n",
+       "   'unsolvable_reason': ''},\n",
+       "  {'task_id': 'dc28cf18-6431-458b-83ef-64b3ce566c10',\n",
+       "   'model': 'gpt-5-mini',\n",
+       "   'correct': True,\n",
+       "   'is_solvable': True,\n",
+       "   'prediction': '2',\n",
+       "   'answer': '2',\n",
+       "   'unsolvable_reason': ''},\n",
+       "  {'task_id': 'b816bfce-3d80-4913-a07d-69b752ce6377',\n",
+       "   'model': 'gpt-5-mini',\n",
+       "   'correct': False,\n",
+       "   'is_solvable': False,\n",
+       "   'prediction': '',\n",
+       "   'answer': 'fluffy',\n",
+       "   'unsolvable_reason': \"I cannot access external sources to read Emily Midkiff's June 2014 article in Fafnir and so cannot determine the quoted word.\"},\n",
+       "  {'task_id': '72e110e7-464c-453c-a309-90a95aed6538',\n",
+       "   'model': 'gpt-5-mini',\n",
+       "   'correct': False,\n",
+       "   'is_solvable': False,\n",
+       "   'prediction': '',\n",
+       "   'answer': 'Guatemala',\n",
+       "   'unsolvable_reason': 'I cannot browse the Bielefeld University Library BASE site or view its 2020 content to inspect the article flags. Determining which country’s flag was unique requires accessing that specific webpage or an archived snapshot, which I cannot do.'},\n",
+       "  {'task_id': '42576abe-0deb-4869-8c63-225c2d75a95a',\n",
+       "   'model': 'gpt-5-mini',\n",
+       "   'correct': True,\n",
+       "   'is_solvable': True,\n",
+       "   'prediction': 'Maktay Mato Apple',\n",
+       "   'answer': 'Maktay mato apple',\n",
+       "   'unsolvable_reason': ''},\n",
+       "  {'task_id': 'b415aba4-4b68-4fc6-9b89-2c812e55a3e1',\n",
+       "   'model': 'gpt-5-mini',\n",
+       "   'correct': False,\n",
+       "   'is_solvable': False,\n",
+       "   'prediction': '',\n",
+       "   'answer': 'diamond',\n",
+       "   'unsolvable_reason': 'I cannot access external web resources or the specific 2012 Scientific Reports conference proceedings to identify that article and its studied compound.'},\n",
+       "  {'task_id': 'cca530fc-4052-43b2-b130-b30968d8aa44',\n",
+       "   'model': 'gpt-5-mini',\n",
+       "   'correct': False,\n",
+       "   'is_solvable': False,\n",
+       "   'prediction': 'image not available',\n",
+       "   'answer': 'Rd5',\n",
+       "   'unsolvable_reason': 'Image not provided or inaccessible; cannot determine board position and legal winning move'},\n",
+       "  {'task_id': '935e2cff-ae78-4218-b3f5-115589b19dae',\n",
+       "   'model': 'gpt-5-mini',\n",
+       "   'correct': False,\n",
+       "   'is_solvable': False,\n",
+       "   'prediction': '',\n",
+       "   'answer': 'research',\n",
+       "   'unsolvable_reason': \"I cannot access or view the specific Wikipedia public logs for the Legume page from 2022; determining what 'R' stood for requires looking at those logs or contemporaneous Wikipedia discussion, which I cannot browse from here.\"},\n",
+       "  {'task_id': '4fc2f1ae-8625-45b5-ab34-ad4433bc21f8',\n",
+       "   'model': 'gpt-5-mini',\n",
+       "   'correct': False,\n",
+       "   'is_solvable': False,\n",
+       "   'prediction': '',\n",
+       "   'answer': 'FunkMonk',\n",
+       "   'unsolvable_reason': 'I cannot access Wikipedia or external web sources to check which dinosaur article was promoted in November 2016 and who nominated it.'},\n",
+       "  {'task_id': '5188369a-3bbe-43d8-8b94-11558f909a08',\n",
+       "   'model': 'gpt-5-mini',\n",
+       "   'correct': False,\n",
+       "   'is_solvable': False,\n",
+       "   'prediction': '',\n",
+       "   'answer': 'Annie Levin',\n",
+       "   'unsolvable_reason': 'I cannot access the Merriam-Webster Word of the Day archive or the web to verify the quoted writer for June 27 2022.'}]}"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "results"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "99926f44",
+   "metadata": {},
+   "source": [
+    "## Tool Usage"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ba50100c",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

pyproject.toml CHANGED Viewed

@@ -6,12 +6,16 @@ readme = "README.md"
 requires-python = ">=3.11"
 dependencies = [
     "chromadb>=1.0.20",
     "fastmcp>=2.11.3",
     "mcp>=1.13.1",
     "openai>=1.101.0",
     "pydantic>=2.11.7",
     "python-dotenv>=1.1.1",
     "tavily-python>=0.7.11",
     "wikipedia>=1.4.0",
 ]

 requires-python = ">=3.11"
 dependencies = [
     "chromadb>=1.0.20",
+    "datasets>=4.5.0",
     "fastmcp>=2.11.3",
+    "ipykernel>=7.1.0",
+    "litellm>=1.81.3",
     "mcp>=1.13.1",
     "openai>=1.101.0",
     "pydantic>=2.11.7",
     "python-dotenv>=1.1.1",
     "tavily-python>=0.7.11",
+    "tqdm>=4.67.1",
     "wikipedia>=1.4.0",
 ]

scratch_agents/agents/__init__.py DELETED Viewed

File without changes

scratch_agents/agents/execution_context_ch4.py DELETED Viewed

@@ -1,27 +0,0 @@
-from dataclasses import dataclass, field
-from typing import List, Dict, Any, Optional
-from ..types.events import Event
-from ..types.contents import Message
-from pydantic import BaseModel
-import uuid
-@dataclass
-class ExecutionContext:
-    """Manages the execution state of an agent throughout its lifecycle."""
-    execution_id: str = field(default_factory=lambda: str(uuid.uuid4()))
-    events: List[Event] = field(default_factory=list)
-    user_input: Optional[Message] = None
-    current_step: int = 0
-    state: Dict[str, Any] = field(default_factory=dict)
-    final_result: str | BaseModel = None
-    def add_event(self, event: Event):
-        """Add an event to the history"""
-        self.events.append(event)
-    def increment_step(self):
-        self.current_step += 1

scratch_agents/agents/execution_context_ch6.py DELETED Viewed

@@ -1,34 +0,0 @@
-from dataclasses import dataclass
-from ..sessions.session import Session
-from ..sessions.in_memory_session_manager import InMemorySessionManager
-from ..sessions.base_session_manager import BaseSessionManager
-from dataclasses import field
-import uuid
-from pydantic import BaseModel
-from typing import List, Dict, Any
-from ..types.events import Event
-from ..sessions.base_cross_session_manager import BaseCrossSessionManager
-@dataclass
-class ExecutionContext:
-    session: Session
-    session_manager: BaseSessionManager
-    cross_session_manager: BaseCrossSessionManager
-    execution_id: str = field(default_factory=lambda: str(uuid.uuid4()))
-    current_step: int = 0
-    max_steps: int = 10
-    user_input: str = ""
-    final_result: str | BaseModel = ""
-    def add_event(self, event: Event) -> None:
-        self.session_manager.add_event(self.session, event)
-    @property
-    def events(self) -> List[Event]:
-        return self.session.events
-    @property
-    def state(self) -> Dict[str, Any]:
-        return self.session.state
-    def increment_step(self) -> None:
-        self.current_step += 1

scratch_agents/agents/tool_calling_agent_ch4_base.py DELETED Viewed

@@ -1,116 +0,0 @@
-from typing import List, Dict, Any, Optional
-from ..models.base_llm import BaseLlm
-from ..models.llm_request import LlmRequest
-from ..models.llm_response import LlmResponse
-from ..types.contents import Message, ToolCall
-from ..types.events import Event
-from .execution_context_ch4 import ExecutionContext
-from ..tools.base_tool import BaseTool
-from ..types.contents import ToolResult
-from typing import Type
-from pydantic import BaseModel
-class ToolCallingAgent:
-    def __init__(self, name: str, model: BaseLlm, tools: List[BaseTool], instructions: str, max_steps: int = 10, output_type: Optional[Type[BaseModel]] = None):
-        self.name = name
-        self.model = model
-        self.max_steps = max_steps
-        self.instructions = instructions
-        self.tools = self._setup_tools(tools)
-    def _setup_tools(self, tools: List[BaseTool]):
-        return {tool.name: tool for tool in tools}
-    async def think(self, context: ExecutionContext, llm_request: LlmRequest):
-        llm_response = await self.model.generate(llm_request)
-        return llm_response
-    async def act(self, context: ExecutionContext, tool_calls: List[ToolCall]):
-        tool_results = []
-        for tool_call in tool_calls:
-            tool_name = tool_call.name
-            tool_input = tool_call.arguments
-            print(f"  → Calling {tool_name} with {tool_input}")
-            try:
-                result_output = await self.tools[tool_name](**tool_input)
-                tool_results.append(
-                    ToolResult(
-                        tool_call_id=tool_call.tool_call_id,
-                        name=tool_call.name,
-                        status="success",
-                        content=str(result_output),
-                    )
-                )
-            except Exception as e:
-                tool_results.append(
-                    ToolResult(
-                        tool_call_id=tool_call.tool_call_id,
-                        name=tool_call.name,
-                        status="error",
-                        content=str(e),
-                    )
-                )
-        return tool_results
-    async def step(self, context: ExecutionContext):
-        print(f"[Step {context.current_step + 1}]")
-        llm_request = self._prepare_llm_request(context)
-        llm_response = await self.think(context, llm_request)
-        response_event = Event(
-            execution_id=context.execution_id,
-            author=self.name,
-            **llm_response.model_dump(),
-        )
-        context.add_event(response_event)
-        if tool_calls := response_event.get_tool_calls():
-            tool_results = await self.act(context, tool_calls)
-            tool_results_event = Event(
-                execution_id=context.execution_id,
-                author=self.name,
-                content=tool_results,
-            )
-            context.add_event(tool_results_event)
-        context.increment_step()
-    async def run(self, user_input: str):
-        context = ExecutionContext(
-            user_input=user_input,
-        )
-        user_input_event = Event(
-            execution_id=context.execution_id,
-            author="user",
-            content=[
-                Message(
-                    role="user",
-                    content=user_input,
-                )
-            ],
-        )
-        context.add_event(user_input_event)
-        while not context.final_result and context.current_step < self.max_steps:
-            await self.step(context)
-            last_event = context.events[-1]
-            if last_event.is_final_response():
-                context.final_result = self._extract_final_result(last_event)
-        return context.final_result
-    def _prepare_llm_request(self, context: ExecutionContext):
-        flat_contents = []
-        for event in context.events:
-            flat_contents.extend(event.content)
-        return LlmRequest(
-            instructions=[self.instructions] if self.instructions else [],
-            contents=flat_contents,
-            tools_dict=self.tools,
-            tool_choice="auto" if self.tools else None
-        )
-    def _extract_final_result(self, event: Event):
-        for item in event.content:
-            if isinstance(item, Message) and item.role == "assistant":
-                return item.content

scratch_agents/agents/tool_calling_agent_ch4_callback.py DELETED Viewed

@@ -1,203 +0,0 @@
-from typing import List, Dict, Any, Optional
-from ..models.base_llm import BaseLlm
-from ..models.llm_request import LlmRequest
-from ..models.llm_response import LlmResponse
-from ..types.contents import Message, ToolCall
-from ..types.events import Event
-from .execution_context_ch4 import ExecutionContext
-from ..tools.base_tool import BaseTool
-from ..types.contents import ToolResult
-from typing import Type
-from pydantic import BaseModel
-from ..tools.decorator import tool
-import inspect
-class ToolCallingAgent:
-    def __init__(self, name: str, model: BaseLlm,
-                 tools: List[BaseTool],
-                 instructions: str,
-                 max_steps: int = 10,
-                 output_type: Optional[Type[BaseModel]] = None,
-                 before_llm_callbacks = None,
-                 after_llm_callbacks = None,
-                 before_tool_callbacks = None,
-                 after_tool_callbacks = None,
-                 after_run_callbacks = None):
-        self.name = name
-        self.model = model
-        self.max_steps = max_steps
-        self.instructions = instructions
-        self.output_type: Optional[Type[BaseModel]] = output_type
-        self.output_tool: Optional[str] = None
-        self.tools = self._setup_tools(tools)
-        self.before_llm_callbacks = before_llm_callbacks or []
-        self.after_llm_callbacks = after_llm_callbacks or []
-        self.before_tool_callbacks = before_tool_callbacks or []
-        self.after_tool_callbacks = after_tool_callbacks or []
-        self.after_run_callbacks = after_run_callbacks or []
-    def _setup_tools(self, tools: List[BaseTool]):
-        if self.output_type is not None:
-            @tool(name="final_answer", description="Return the final structured answer matching the required schema.")
-            def final_answer(output: self.output_type) -> self.output_type:
-                return output
-            tools.append(final_answer)
-            self.output_tool = final_answer.name
-        return {t.name: t for t in tools}
-    async def think(self, context: ExecutionContext, llm_request: LlmRequest):
-        for callback in self.before_llm_callbacks:
-            result = callback(context, llm_request)
-            if inspect.isawaitable(result):
-                result = await result
-            if result is not None:
-                return result
-        llm_response = await self.model.generate(llm_request)
-        for callback in self.after_llm_callbacks:
-            result = callback(context, llm_response)
-            if inspect.isawaitable(result):
-                result = await result
-            if result is not None:
-                return result
-        return llm_response
-    async def act(self, context: ExecutionContext, tool_calls: List[ToolCall]):
-        tool_results = []
-        for tool_call in tool_calls:
-            tool_name = tool_call.name
-            tool_input = tool_call.arguments
-            print(f"  → Calling {tool_name} with {tool_input}")
-            # Step 1: before_tool_callbacks - can skip tool execution
-            tool_response = None
-            for callback in self.before_tool_callbacks:
-                result = callback(context, tool_call)
-                if inspect.isawaitable(result):
-                    result = await result
-                if result is not None:
-                    tool_response = result
-                    break
-            # Step 2: Execute tool if no callback provided result
-            status = "success"
-            if tool_response is None:
-                try:
-                    tool_response = await self.tools[tool_name](**tool_input)
-                except Exception as e:
-                    tool_response = str(e)
-                    status = "error"
-                # Step 3: after_tool_callbacks - only after actual tool execution
-                for callback in self.after_tool_callbacks:
-                    result = callback(context, tool_response)
-                    if inspect.isawaitable(result):
-                        result = await result
-                    if result is not None:
-                        tool_response = result
-                        break
-            # Step 4: Wrap in ToolResult at the end
-            if tool_response is not None:
-                tool_result = ToolResult(
-                    tool_call_id=tool_call.tool_call_id,
-                    name=tool_call.name,
-                    status=status,
-                    content=str(tool_response),
-                )
-                tool_results.append(tool_result)
-        return tool_results
-    async def step(self, context: ExecutionContext):
-        print(f"[Step {context.current_step + 1}]")
-        llm_request = self._prepare_llm_request(context)
-        llm_response = await self.think(context, llm_request)
-        if llm_response.error_message:
-            raise RuntimeError(f"LLM error: {llm_response.error_message}")
-        response_event = Event(
-            execution_id=context.execution_id,
-            author=self.name,
-            required_output_tool=self.output_tool or None,
-            **llm_response.model_dump(),
-        )
-        context.add_event(response_event)
-        if tool_calls := response_event.get_tool_calls():
-            tool_results = await self.act(context, tool_calls)
-            tool_results_event = Event(
-                execution_id=context.execution_id,
-                author=self.name,
-                required_output_tool=self.output_tool or None,
-                content=tool_results,
-            )
-            context.add_event(tool_results_event)
-        context.increment_step()
-    async def run(self, user_input: str):
-        context = ExecutionContext(
-            user_input=user_input,
-        )
-        user_input_event = Event(
-            execution_id=context.execution_id,
-            author="user",
-            content=[
-                Message(
-                    role="user",
-                    content=user_input,
-                )
-            ],
-        )
-        context.add_event(user_input_event)
-        while not context.final_result and context.current_step < self.max_steps:
-            await self.step(context)
-            last_event = context.events[-1]
-            if last_event.is_final_response():
-                context.final_result = self._extract_final_result(last_event)
-        for callback in self.after_run_callbacks:
-            result = callback(context)
-            if inspect.isawaitable(result):
-                await result
-        return context.final_result
-    def _prepare_llm_request(self, context: ExecutionContext):
-        flat_contents = []
-        for event in context.events:
-            flat_contents.extend(event.content)
-        if self.output_tool:
-            tool_choice = "required"
-        elif self.tools:
-            tool_choice = "auto"
-        else:
-            tool_choice = None
-        return LlmRequest(
-            instructions=[self.instructions] if self.instructions else [],
-            contents=flat_contents,
-            tools_dict=self.tools,
-            tool_choice=tool_choice
-        )
-    def _extract_final_result(self, event: Event):
-        if event.required_output_tool:
-            for item in event.content:
-                if (
-                    isinstance(item, ToolResult)
-                    and item.status == "success"
-                    and item.name == event.required_output_tool
-                    and item.content
-                ):
-                    return item.content[0]
-        for item in event.content:
-            if isinstance(item, Message) and item.role == "assistant":
-                return item.content

scratch_agents/agents/tool_calling_agent_ch4_structured_output.py DELETED Viewed

@@ -1,146 +0,0 @@
-from typing import List, Dict, Any, Optional
-from ..models.base_llm import BaseLlm
-from ..models.llm_request import LlmRequest
-from ..models.llm_response import LlmResponse
-from ..types.contents import Message, ToolCall
-from ..types.events import Event
-from .execution_context_ch4 import ExecutionContext
-from ..tools.base_tool import BaseTool
-from ..types.contents import ToolResult
-from typing import Type
-from pydantic import BaseModel
-from ..tools.decorator import tool
-class ToolCallingAgent:
-    def __init__(self, name: str, model: BaseLlm, tools: List[BaseTool], instructions: str, max_steps: int = 10, output_type: Optional[Type[BaseModel]] = None):
-        self.name = name
-        self.model = model
-        self.max_steps = max_steps
-        self.instructions = instructions
-        self.output_type: Optional[Type[BaseModel]] = output_type
-        self.output_tool: Optional[str] = None
-        self.tools = self._setup_tools(tools)
-    def _setup_tools(self, tools: List[BaseTool]):
-        if self.output_type is not None:
-            @tool(name="final_answer", description="Return the final structured answer matching the required schema.")
-            def final_answer(output: self.output_type) -> self.output_type:
-                return output
-            tools.append(final_answer)
-            self.output_tool = final_answer.name
-        return {t.name: t for t in tools}
-    async def think(self, context: ExecutionContext, llm_request: LlmRequest):
-        llm_response = await self.model.generate(llm_request)
-        return llm_response
-    async def act(self, context: ExecutionContext, tool_calls: List[ToolCall]):
-        tool_results = []
-        for tool_call in tool_calls:
-            tool_name = tool_call.name
-            tool_input = tool_call.arguments
-            print(f"  → Calling {tool_name} with {tool_input}")
-            try:
-                result_output = await self.tools[tool_name](**tool_input)
-                tool_results.append(
-                    ToolResult(
-                        tool_call_id=tool_call.tool_call_id,
-                        name=tool_call.name,
-                        status="success",
-                        content=str(result_output),
-                    )
-                )
-            except Exception as e:
-                tool_results.append(
-                    ToolResult(
-                        tool_call_id=tool_call.tool_call_id,
-                        name=tool_call.name,
-                        status="error",
-                        content=str(e),
-                    )
-                )
-        return tool_results
-    async def step(self, context: ExecutionContext):
-        print(f"[Step {context.current_step + 1}]")
-        llm_request = self._prepare_llm_request(context)
-        llm_response = await self.think(context, llm_request)
-        if llm_response.error_message:
-            raise RuntimeError(f"LLM error: {llm_response.error_message}")
-        response_event = Event(
-            execution_id=context.execution_id,
-            author=self.name,
-            required_output_tool=self.output_tool or None,
-            **llm_response.model_dump(),
-        )
-        context.add_event(response_event)
-        if tool_calls := response_event.get_tool_calls():
-            tool_results = await self.act(context, tool_calls)
-            tool_results_event = Event(
-                execution_id=context.execution_id,
-                author=self.name,
-                required_output_tool=self.output_tool or None,
-                content=tool_results,
-            )
-            context.add_event(tool_results_event)
-        context.increment_step()
-    async def run(self, user_input: str):
-        context = ExecutionContext(
-            user_input=user_input,
-        )
-        user_input_event = Event(
-            execution_id=context.execution_id,
-            author="user",
-            content=[
-                Message(
-                    role="user",
-                    content=user_input,
-                )
-            ],
-        )
-        context.add_event(user_input_event)
-        while not context.final_result and context.current_step < self.max_steps:
-            await self.step(context)
-            last_event = context.events[-1]
-            if last_event.is_final_response():
-                context.final_result = self._extract_final_result(last_event)
-        return context.final_result
-    def _prepare_llm_request(self, context: ExecutionContext):
-        flat_contents = []
-        for event in context.events:
-            flat_contents.extend(event.content)
-        if self.output_tool:
-            tool_choice = "required"
-        elif self.tools:
-            tool_choice = "auto"
-        else:
-            tool_choice = None
-        return LlmRequest(
-            instructions=[self.instructions] if self.instructions else [],
-            contents=flat_contents,
-            tools_dict=self.tools,
-            tool_choice=tool_choice
-        )
-    def _extract_final_result(self, event: Event):
-        if event.required_output_tool:
-            for item in event.content:
-                if (
-                    isinstance(item, ToolResult)
-                    and item.status == "success"
-                    and item.name == event.required_output_tool
-                    and item.content
-                ):
-                    return item.content[0]
-        for item in event.content:
-            if isinstance(item, Message) and item.role == "assistant":
-                return item.content

scratch_agents/agents/tool_calling_agent_ch6.py DELETED Viewed

@@ -1,226 +0,0 @@
-from typing import List, Dict, Any, Optional
-from ..models.base_llm import BaseLlm
-from ..models.llm_request import LlmRequest
-from ..models.llm_response import LlmResponse
-from ..types.contents import Message, ToolCall
-from ..types.events import Event
-from .execution_context_ch6 import ExecutionContext
-from ..tools.base_tool import BaseTool
-from ..types.contents import ToolResult
-from typing import Type
-from pydantic import BaseModel
-from ..tools.decorator import tool
-import inspect
-from ..sessions.base_session_manager import BaseSessionManager
-from ..sessions.in_memory_session_manager import InMemorySessionManager
-from ..sessions.base_cross_session_manager import BaseCrossSessionManager
-class ToolCallingAgent:
-    def __init__(self, name: str, model: BaseLlm,
-                 tools: List[BaseTool] = [],
-                 instructions: str = "",
-                 max_steps: int = 10,
-                 output_type: Optional[Type[BaseModel]] = None,
-                 before_llm_callbacks = None,
-                 after_llm_callbacks = None,
-                 before_tool_callbacks = None,
-                 after_tool_callbacks = None,
-                 after_run_callbacks = None,
-                 session_manager: BaseSessionManager = None,
-                 cross_session_manager: BaseCrossSessionManager = None):
-        self.name = name
-        self.model = model
-        self.max_steps = max_steps
-        self.instructions = instructions
-        self.output_type: Optional[Type[BaseModel]] = output_type
-        self.output_tool: Optional[str] = None
-        self.tools = self._setup_tools(tools)
-        self.before_llm_callbacks = before_llm_callbacks or []
-        self.after_llm_callbacks = after_llm_callbacks or []
-        self.before_tool_callbacks = before_tool_callbacks or []
-        self.after_tool_callbacks = after_tool_callbacks or []
-        self.after_run_callbacks = after_run_callbacks or []
-        self.session_manager = session_manager or InMemorySessionManager()
-        self.cross_session_manager = cross_session_manager
-    def _setup_tools(self, tools: List[BaseTool]):
-        if self.output_type is not None:
-            @tool(name="final_answer", description="Return the final structured answer matching the required schema.")
-            def final_answer(output: self.output_type) -> self.output_type:
-                return output
-            tools.append(final_answer)
-            self.output_tool = final_answer.name
-        return {t.name: t for t in tools}
-    async def think(self, context: ExecutionContext, llm_request: LlmRequest):
-        for callback in self.before_llm_callbacks:
-            result = callback(context, llm_request)
-            if inspect.isawaitable(result):
-                result = await result
-            if result is not None:
-                return result
-        llm_response = await self.model.generate(llm_request)
-        for callback in self.after_llm_callbacks:
-            result = callback(context, llm_response)
-            if inspect.isawaitable(result):
-                result = await result
-            if result is not None:
-                return result
-        return llm_response
-    async def _execute_tool(self, context: ExecutionContext, tool_name: str, tool_input: dict) -> Any:
-        """Execute a tool with context injection if needed"""
-        tool = self.tools[tool_name]
-        # All tools now handle context properly in their execute method
-        return await tool.execute(context, **tool_input)
-    async def act(self, context: ExecutionContext, tool_calls: List[ToolCall]):
-        tool_results = []
-        for tool_call in tool_calls:
-            tool_name = tool_call.name
-            tool_input = tool_call.arguments
-            print(f"  → Calling {tool_name} with {tool_input}")
-            # Step 1: before_tool_callbacks - can skip tool execution
-            tool_response = None
-            for callback in self.before_tool_callbacks:
-                result = callback(context, tool_call)
-                if inspect.isawaitable(result):
-                    result = await result
-                if result is not None:
-                    tool_response = result
-                    break
-            # Step 2: Execute tool if no callback provided result
-            status = "success"
-            if tool_response is None:
-                try:
-                    tool_response = await self._execute_tool(context, tool_name, tool_input)
-                except Exception as e:
-                    tool_response = str(e)
-                    status = "error"
-                # Step 3: after_tool_callbacks - only after actual tool execution
-                for callback in self.after_tool_callbacks:
-                    result = callback(context, tool_response)
-                    if inspect.isawaitable(result):
-                        result = await result
-                    if result is not None:
-                        tool_response = result
-                        break
-            # Step 4: Wrap in ToolResult at the end
-            if tool_response is not None:
-                tool_result = ToolResult(
-                    tool_call_id=tool_call.tool_call_id,
-                    name=tool_call.name,
-                    status=status,
-                    content=str(tool_response),
-                )
-                tool_results.append(tool_result)
-        return tool_results
-    async def step(self, context: ExecutionContext):
-        print(f"[Step {context.current_step + 1}]")
-        llm_request = await self._prepare_llm_request(context)
-        llm_response = await self.think(context, llm_request)
-        if llm_response.error_message:
-            raise RuntimeError(f"LLM error: {llm_response.error_message}")
-        response_event = Event(
-            execution_id=context.execution_id,
-            author=self.name,
-            required_output_tool=self.output_tool or None,
-            **llm_response.model_dump(),
-        )
-        context.add_event(response_event)
-        if tool_calls := response_event.get_tool_calls():
-            tool_results = await self.act(context, tool_calls)
-            tool_results_event = Event(
-                execution_id=context.execution_id,
-                author=self.name,
-                required_output_tool=self.output_tool or None,
-                content=tool_results,
-            )
-            context.add_event(tool_results_event)
-        context.increment_step()
-    async def run(self, user_input: str,
-                  user_id: str = None,
-                  session_id: str = None):
-        session = self.session_manager.get_or_create_session(session_id, user_id)
-        context = ExecutionContext(
-            user_input=user_input,
-            session=session,
-            session_manager=self.session_manager,
-            cross_session_manager=self.cross_session_manager,
-        )
-        user_input_event = Event(
-            execution_id=context.execution_id,
-            author="user",
-            content=[
-                Message(
-                    role="user",
-                    content=user_input,
-                )
-            ],
-        )
-        context.add_event(user_input_event)
-        while not context.final_result and context.current_step < self.max_steps:
-            await self.step(context)
-            last_event = context.events[-1]
-            if last_event.is_final_response():
-                context.final_result = self._extract_final_result(last_event)
-        for callback in self.after_run_callbacks:
-            result = callback(context)
-            if inspect.isawaitable(result):
-                await result
-        return context.final_result
-    async def _prepare_llm_request(self, context: ExecutionContext):
-        flat_contents = []
-        for event in context.events:
-            flat_contents.extend(event.content)
-        llm_request = LlmRequest(
-            instructions=[self.instructions] if self.instructions else [],
-            contents=flat_contents,
-            tools_dict={tool.name:tool for tool in self.tools.values() if tool.tool_definition},
-        )
-        for tool in self.tools.values():
-            await tool.process_llm_request(llm_request, context)
-        if self.output_tool:
-            llm_request.tool_choice = "required"
-        elif llm_request.tools_dict:
-            llm_request.tool_choice = "auto"
-        else:
-            llm_request.tool_choice = None
-        return llm_request
-    def _extract_final_result(self, event: Event):
-        if event.required_output_tool:
-            for item in event.content:
-                if (
-                    isinstance(item, ToolResult)
-                    and item.status == "success"
-                    and item.name == event.required_output_tool
-                    and item.content
-                ):
-                    return item.content[0]
-        for item in event.content:
-            if isinstance(item, Message) and item.role == "assistant":
-                return item.content

scratch_agents/memory/base_memory_strategy.py DELETED Viewed

@@ -1,13 +0,0 @@
-from abc import ABC, abstractmethod
-class MemoryStrategy(ABC):
-    """Base class for memory management strategies"""
-    @abstractmethod
-    async def apply(self, context, llm_request):  #A
-        """Apply memory management strategy to the request"""
-        pass
-    async def __call__(self, context, llm_request):  #B
-        """Make strategy callable as a before_llm_callback"""
-        return await self.apply(context, llm_request)

scratch_agents/memory/core_memory_strategy.py DELETED Viewed

@@ -1,21 +0,0 @@
-from .base_memory_strategy import MemoryStrategy
-class CoreMemoryStrategy(MemoryStrategy):
-    """Automatically add core memory to LLM context"""
-    async def apply(self, context, llm_request):
-        """Add core memory as instructions if it exists"""
-        core_memory = context.session.core_memory
-        memory_parts = []
-        if core_memory.get("agent"):
-            memory_parts.append(f"[Your Persona]\n{core_memory['agent']}")
-        if core_memory.get("user"):
-            memory_parts.append(f"[User Info]\n{core_memory['user']}")
-        if memory_parts:
-            memory_text = "\n\n".join(memory_parts)
-            llm_request.add_instructions([memory_text])
-        return None

scratch_agents/memory/sliding_window_strategy.py DELETED Viewed

@@ -1,26 +0,0 @@
-from .base_memory_strategy import MemoryStrategy
-from ..models.llm_request import LlmRequest
-from ..agents.execution_context_ch6 import ExecutionContext
-class SlidingWindowStrategy(MemoryStrategy):
-    """Keep only the most recent N messages in context"""
-    def __init__(self, max_messages: int = 20):
-        self.max_messages = max_messages
-    async def apply(self, context: ExecutionContext, llm_request: LlmRequest):
-        """Apply sliding window to conversation history"""
-        contents = llm_request.contents
-        if len(contents) <= self.max_messages:
-            return None
-        # Keep only recent messages
-        recent_contents = contents[-self.max_messages:]
-        llm_request.contents = recent_contents
-        print(f"Trimmed messages")
-        print(f"from {len(contents)} to {self.max_messages}")
-        return None

scratch_agents/memory/summarization_strategy.py DELETED Viewed

@@ -1,77 +0,0 @@
-from .base_memory_strategy import MemoryStrategy
-from ..models.llm_request import LlmRequest
-from ..types.contents import Message
-class SummarizationStrategy(MemoryStrategy):
-    """Summarize old messages to preserve information while reducing tokens"""
-    def __init__(self, model, trigger_count: int = 10, keep_recent: int = 3):
-        self.model = model
-        self.trigger_count = trigger_count  #A
-        self.keep_recent = keep_recent  #B
-    async def _generate_summary(self, messages_text: str):
-        request = LlmRequest(
-            instructions=[  #A
-                "Summarize the following conversation concisely.",  #A
-                "Preserve key facts, decisions, and important context.",  #A
-                "Keep the summary under 200 words."  #A
-            ],
-            contents=[Message(role="user", content=messages_text)]  #B
-        )
-        response = await self.model.generate(request)  #C
-        for item in response.content:  #D
-            if isinstance(item, Message) and item.role == "assistant":  #D
-                return item.content  #D
-        return "Summary generation failed"  #E
-    async def apply(self, context, llm_request):
-        """Apply summarization when new messages since last summary exceed threshold"""
-        contents = llm_request.contents
-        messages_only = [item for item in contents if isinstance(item, Message)]  #A
-        last_summarized = context.state.get("last_summarized_index", 0)
-        total_messages = len(messages_only)  #B
-        new_messages_count = total_messages - last_summarized  #B
-        if new_messages_count < self.trigger_count:
-            return None
-        summarize_until = total_messages - self.keep_recent  #C
-        to_summarize = messages_only[last_summarized:summarize_until]  #C
-        to_keep = contents[-self.keep_recent:] if len(contents) >= self.keep_recent else contents  #C
-        if not to_summarize:
-            return None
-        existing_summary = context.state.get("conversation_summary")
-        summary_input = []  #D
-        if existing_summary:  #D
-            summary_input.append(f"Previous Summary:\n{existing_summary}\n")  #D
-        summary_input.append("New Messages to Summarize:\n")  #D
-        for msg in to_summarize:  #D
-            summary_input.append(f"{msg.role}: {msg.content}")  #D
-        messages_text = "\n".join(summary_input)  #D
-        new_summary = await self._generate_summary(messages_text)  #E
-        context.state["conversation_summary"] = new_summary
-        context.state["last_summarized_index"] = summarize_until
-        if new_summary:
-            summary_instruction = f"[Previous Conversation Summary]\n{new_summary}"
-            llm_request.add_instructions([summary_instruction])  #F
-        llm_request.contents = to_keep  #G
-        print(f"Compressed {len(to_summarize)} messages")
-        print(f"Keeping {len(to_keep)} recent items")
-        return None

scratch_agents/models/__init__.py DELETED Viewed

File without changes

scratch_agents/models/base_llm.py DELETED Viewed

@@ -1,12 +0,0 @@
-from abc import abstractmethod
-from pydantic import BaseModel
-from .llm_request import LlmRequest
-class BaseLlm(BaseModel):
-    """Abstract base class for LLM implementations"""
-    model: str
-    @abstractmethod
-    async def generate(self, request: LlmRequest):
-        pass

scratch_agents/models/llm_request.py DELETED Viewed

@@ -1,19 +0,0 @@
-from typing import Any, Dict, List, Optional
-from pydantic import BaseModel, Field
-from ..types.contents import ContentItem
-class LlmRequest(BaseModel):
-    """Request object for LLM calls"""
-    instructions: List[str] = Field(default_factory=list)
-    contents: List[ContentItem] = Field(default_factory=list)
-    tools_dict: Dict[str, Any] = Field(default_factory=dict)
-    tool_choice: Optional[str] = None
-    def add_instructions(self, instructions: List[str] | str):
-        """Add instructions to the request"""
-        if isinstance(instructions, str):
-            self.instructions.append(instructions)
-        else:
-            self.instructions.extend(instructions)

scratch_agents/models/llm_response.py DELETED Viewed

@@ -1,10 +0,0 @@
-from typing import Any, Dict, List, Optional
-from pydantic import BaseModel, Field
-from ..types.contents import ContentItem
-class LlmResponse(BaseModel):
-    """Response object from LLM calls"""
-    content: List[ContentItem] = Field(default_factory=list)
-    error_message: Optional[str] = None
-    usage_metadata: Dict[str, Any] = Field(default_factory=dict)

scratch_agents/models/openai.py DELETED Viewed

@@ -1,174 +0,0 @@
-from openai import AsyncOpenAI
-from .base_llm import BaseLlm
-from .llm_request import LlmRequest
-from .llm_response import LlmResponse
-from ..types.contents import Message, ToolCall, ToolResult
-import json
-from pydantic import Field, BaseModel
-from typing import Dict, Any, List
-class OpenAILlm(BaseLlm):
-    """OpenAI LLM implementation"""
-    llm_config: dict = Field(default_factory=dict)
-    def __init__(self, model, **kwargs):
-        super().__init__(model=model)
-        self.llm_config = kwargs
-        self._client = None
-    @property
-    def openai_client(self):
-        if self._client is None:
-            self._client = AsyncOpenAI()
-        return self._client
-    async def generate(self, request: LlmRequest) -> LlmResponse:
-        """Generate a response using OpenAI API"""
-        try:
-            # Build messages for OpenAI API
-            messages, model_params = self._build_llm_input(request, self.llm_config)
-            # Convert tools_dict to tools array for OpenAI
-            tools = None
-            if request.tools_dict:
-                tools = [tool.tool_definition for tool in request.tools_dict.values()]
-            # Call OpenAI API
-            call_kwargs = {}
-            if request.tool_choice is not None:
-                call_kwargs["tool_choice"] = request.tool_choice
-            response = await self.openai_client.chat.completions.create(
-                model=self.model,
-                messages=messages,
-                tools=tools,
-                **call_kwargs,
-                **model_params
-            )
-            # Extract message from response
-            choice = response.choices[0]
-            content_items = []
-            # Handle text content
-            if choice.message.content:
-               content_items.append(Message(role="assistant", content=choice.message.content))
-            # Handle tool calls
-            if choice.message.tool_calls:
-               for tool_call in choice.message.tool_calls:
-                   content_items.append(ToolCall(
-                       tool_call_id=tool_call.id,
-                       name=tool_call.function.name,
-                       arguments=json.loads(tool_call.function.arguments)
-                   ))
-            # Extract usage metadata
-            usage_metadata = {
-                "input_tokens": response.usage.prompt_tokens,
-                "output_tokens": response.usage.completion_tokens,
-                "total_tokens": response.usage.total_tokens
-            }
-            return LlmResponse(
-                content=content_items,
-                usage_metadata=usage_metadata
-            )
-        except Exception as e:
-            return LlmResponse(
-                error_message=str(e)
-            )
-    def _build_llm_input(self, request: LlmRequest, model_config: dict):
-        """Build messages and parameters for OpenAI API"""
-        messages = []
-        # Add instructions as system messages
-        for instruction in request.instructions:
-            messages.append({"role": "system", "content": instruction})
-        # Add conversation history
-        # Group assistant messages and their tool calls together
-        pending_assistant_content = None
-        pending_tool_calls = []
-        def flush_assistant_message():
-            """Flush any pending assistant message with its tool calls"""
-            if pending_assistant_content is not None or pending_tool_calls:
-                msg = {"role": "assistant"}
-                if pending_assistant_content is not None:
-                    msg["content"] = pending_assistant_content
-                else:
-                    msg["content"] = None
-                if pending_tool_calls:
-                    msg["tool_calls"] = pending_tool_calls
-                messages.append(msg)
-                return True
-            return False
-        for item in request.contents:
-            if isinstance(item, Message):
-                if item.role == "assistant":
-                    # Accumulate assistant content
-                    pending_assistant_content = item.content
-                else:
-                    # Non-assistant message, flush any pending assistant message
-                    flush_assistant_message()
-                    pending_assistant_content = None
-                    pending_tool_calls = []
-                    messages.append({"role": item.role, "content": item.content})
-            elif isinstance(item, ToolCall):
-                # Accumulate tool calls with the assistant message
-                pending_tool_calls.append({
-                    "id": item.tool_call_id,
-                    "type": "function",
-                    "function": {
-                        "name": item.name,
-                        "arguments": json.dumps(item.arguments)
-                    }
-                })
-            elif isinstance(item, ToolResult):
-                # Tool result means we need to flush any pending assistant message
-                flush_assistant_message()
-                pending_assistant_content = None
-                pending_tool_calls = []
-                messages.append({
-                    "role": "tool",
-                    "tool_call_id": item.tool_call_id,
-                    "content": str(item.content) if item.content else ""
-                })
-        # Flush any remaining assistant message
-        flush_assistant_message()
-        # Extract model parameters
-        model_params = {**self.llm_config}
-        return messages, model_params
-    async def generate_structured(self, messages: List[Dict[str, Any]], response_format: BaseModel):
-        """Generate structured output using OpenAI's response_format"""
-        try:
-            response = await self.openai_client.chat.completions.parse(
-                model=self.model,
-                messages=messages,
-                response_format=response_format,
-                **self.llm_config
-            )
-            return response.choices[0].message.parsed
-        except Exception as e:
-            return {"error": str(e)}
-    async def embed(self, model, texts: List[str]) -> List[List[float]]:
-        """Get embeddings using OpenAI API"""
-        try:
-            response = await self.openai_client.embeddings.create(
-                model=model,
-                input=texts
-            )
-            return [embedding.embedding for embedding in response.data]
-        except Exception as e:
-            return {"error": str(e)}

scratch_agents/sessions/base_cross_session_manager.py DELETED Viewed

@@ -1,297 +0,0 @@
-"""Base class for cross-session memory management."""
-from abc import ABC, abstractmethod
-from typing import List, Dict, Optional, Any
-import chromadb
-from chromadb.utils import embedding_functions
-from chromadb.config import Settings
-from datetime import datetime
-import logging
-import os
-import uuid
-from .session import Session
-logger = logging.getLogger(__name__)
-class BaseCrossSessionManager(ABC):
-    """Abstract base class for cross-session memory management."""
-    def __init__(
-        self,
-        model,
-        collection_name: str,
-        persist_directory: str = "./cross_session_db",
-        embedding_model: str = "text-embedding-3-small"
-    ):
-        """Initialize the base cross-session manager.
-        Args:
-            model: LLM model for memory processing
-            collection_name: Name of the ChromaDB collection
-            persist_directory: Directory to persist ChromaDB data
-            embedding_model: Optional custom embedding model
-        """
-        self.model = model
-        self.collection_name = collection_name
-        self.persist_directory = persist_directory
-        self.embedding_model = embedding_model
-        self.client = chromadb.PersistentClient(
-            path=persist_directory,
-        )
-        embedding_function = embedding_functions.OpenAIEmbeddingFunction(
-            api_key=os.getenv("OPENAI_API_KEY"),
-            model_name=self.embedding_model
-        )
-        # Get or create collection
-        try:
-            self.collection = self.client.get_or_create_collection(
-                name=collection_name,
-                metadata={"hnsw:space": "cosine"},
-                embedding_function=embedding_function
-                )
-            logger.info(f"Using existing collection: {collection_name}")
-        except Exception:
-            logger.error(f"Error getting or creating collection: {collection_name}")
-            raise
-    @abstractmethod
-    async def extract_memories(
-        self,
-        events: List[Dict[str, Any]],
-    ) -> List[str]:
-        """Extract memories from session events.
-        Args:
-            events: List of session events
-            user_id: User identifier
-        Returns:
-            List of extracted memory strings
-        """
-        pass
-    async def process_session(
-        self,
-        session: Session,
-        execution_id: str
-    ) -> None:
-        """Process a completed session and extract/merge memories.
-        Args:
-            session: Session data containing events
-            execution_id: Unique execution identifier
-        """
-        try:
-            user_id = session.user_id
-            events = session.events
-            events = [event for event in events if event.execution_id == execution_id]
-            memories = await self.extract_memories(events)
-            if memories:
-                existing = await self.find_existing(memories, user_id)
-                actions = await self.decide_actions(memories, existing, user_id)
-                await self.execute_memory_actions(actions)
-            else:
-                logger.info(f"No memories extracted for user {user_id}")
-        except Exception as e:
-            logger.error(f"Error processing session: {e}")
-    async def find_existing(
-        self,
-        memories: List[str],
-        user_id: str
-    ) -> List[Dict[str, Any]]:
-        """Find existing memories.
-        Args:
-            memories: List of new memory strings to merge
-            user_id: User identifier
-        Returns:
-            List of existing memories with metadata
-        """
-        existing_memories = []
-        for memory in memories:
-            existing = await self.search(memory, user_id)
-            if existing:
-                existing_memories.append(existing)
-        return existing_memories
-    @abstractmethod
-    async def decide_actions(
-        self,
-        memories: List[str],
-        existing: List[Dict[str, Any]],
-        user_id: str
-    ) -> List[Dict[str, Any]]:
-        """Decide actions for new memories."""
-        pass
-    async def execute_memory_actions(
-        self,
-        actions: List[Dict[str, Any]]
-    ) -> None:
-        """Execute memory actions."""
-        for action in actions:
-            if action["action"] == "ADD":
-                metadata = action.get("metadata", {})
-                await self.add(action["memory"], action["user_id"], action.get("embedding"), metadata)
-            elif action["action"] == "UPDATE":
-                metadata = action.get("metadata", {})
-                await self.update(action["memory_id"], action["memory"], action.get("embedding"), metadata)
-            elif action["action"] == "DELETE":
-                await self.delete(action["memory_id"])
-            elif action["action"] == "NOOP":
-                pass
-    async def search(
-        self,
-        query: str,
-        user_id: str,
-        limit: int = 5
-    ) -> List[Dict[str, Any]]:
-        """Search for relevant memories.
-        Args:
-            query: Search query
-            user_id: User identifier
-            limit: Maximum number of results
-        Returns:
-            List of relevant memories with metadata
-        """
-        try:
-            # Filter by user_id in metadata
-            where = {"user_id": user_id}
-            results = self.collection.query(
-                query_texts=[query],
-                n_results=limit,
-                where=where
-            )
-            memories = []
-            if results["documents"] and results["documents"][0]:
-                for i, doc in enumerate(results["documents"][0]):
-                    memory = {
-                        "id": results["ids"][0][i] if results["ids"] and results["ids"][0] else None,
-                        "content": doc,
-                        "metadata": results["metadatas"][0][i] if results["metadatas"] else {},
-                        "distance": results["distances"][0][i] if results["distances"] else 0
-                    }
-                    memories.append(memory)
-            return memories
-        except Exception as e:
-            logger.error(f"Error searching memories: {e}")
-            return []
-    async def add(
-        self,
-        memory: str,
-        user_id: str,
-        embedding: Optional[List[float]] = None,
-        additional_metadata: Optional[Dict[str, Any]] = None,
-    ) -> str:
-        """Add a new memory.
-        Args:
-            memory: Memory content (as string for ChromaDB)
-            user_id: User identifier
-            embedding: Optional embedding vector
-            additional_metadata: Additional metadata to store
-        Returns:
-            Memory ID
-        """
-        memory_id = f"{uuid.uuid4()}"
-        final_metadata = {
-            "user_id": user_id,
-            "created_at": datetime.now().isoformat(),
-            "updated_at": datetime.now().isoformat()
-        }
-        # Add any additional metadata (like the original structured data)
-        if additional_metadata:
-            final_metadata.update(additional_metadata)
-        if embedding:
-            self.collection.upsert(
-                documents=[memory],
-                ids=[memory_id],
-                embeddings=[embedding],
-                metadatas=[final_metadata]
-            )
-        else:
-            self.collection.add(
-                documents=[memory],
-                ids=[memory_id],
-                metadatas=[final_metadata]
-            )
-        return memory_id
-    async def update(
-        self,
-        memory_id: str,
-        memory: str,
-        embedding: Optional[List[float]] = None,
-        additional_metadata: Optional[Dict[str, Any]] = None,
-    ) -> None:
-        """Update an existing memory.
-        Args:
-            memory_id: ID of memory to update
-            memory: New memory content (as string for ChromaDB)
-            embedding: Optional embedding of the memory
-            additional_metadata: Additional metadata to update
-        """
-        if not memory_id:
-            logger.error("Cannot update memory: memory_id is None")
-            return
-        # Get existing metadata
-        existing = self.collection.get(ids=[memory_id])
-        if existing["metadatas"] and existing["metadatas"][0]:
-            final_metadata = existing["metadatas"][0].copy()
-            final_metadata["updated_at"] = datetime.now().isoformat()
-        else:
-            final_metadata = {}
-            final_metadata["updated_at"] = datetime.now().isoformat()
-        # Update with any additional metadata
-        if additional_metadata:
-            final_metadata.update(additional_metadata)
-        if embedding:
-            self.collection.upsert(
-                ids=[memory_id],
-                documents=[memory],
-                embeddings=[embedding],
-                metadatas=[final_metadata]
-            )
-        else:
-            self.collection.upsert(
-                ids=[memory_id],
-                documents=[memory],
-                metadatas=[final_metadata]
-            )
-    async def delete(
-        self,
-        memory_id: str
-    ) -> None:
-        """Delete a memory.
-        Args:
-            memory_id: ID of memory to delete
-        """
-        self.collection.delete(ids=[memory_id])

scratch_agents/sessions/base_session_manager.py DELETED Viewed

@@ -1,28 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import Optional, Dict, List
-from scratch_agents.types.events import Event
-from scratch_agents.sessions.session import Session
-class BaseSessionManager(ABC):
-    """Abstract base class for session management"""
-    @abstractmethod
-    def create_session(self, session_id: Optional[str] = None, user_id: str = None) -> Session:
-        """Create a new session"""
-        pass
-    @abstractmethod
-    def get_session(self, session_id: str) -> Optional[Session]:
-        """Load a session from storage"""
-        pass
-    @abstractmethod
-    def get_or_create_session(self, session_id: str, user_id: str = None) -> Session:
-        """Get an existing session or create a new one"""
-        pass
-    @abstractmethod
-    def add_event(self, session: Session, event: Event) -> None:
-        """Add an event to the session"""
-        pass

scratch_agents/sessions/in_memory_session_manager.py DELETED Viewed

@@ -1,30 +0,0 @@
-from .base_session_manager import BaseSessionManager
-from .session import Session
-from scratch_agents.types.events import Event
-from datetime import datetime
-class InMemorySessionManager(BaseSessionManager):
-    """In-memory session manager"""
-    def __init__(self):
-        self.sessions = {}
-    def create_session(self, session_id: str, user_id: str = None) -> Session:
-        if session_id in self.sessions:
-            raise ValueError(f"Session with id {session_id} already exists")
-        self.sessions[session_id] = Session(session_id=session_id, user_id=user_id)
-        return self.sessions[session_id]
-    def get_session(self, session_id: str) -> Session:
-        if session_id not in self.sessions:
-            raise ValueError(f"Session with id {session_id} does not exist")
-        return self.sessions[session_id]
-    def get_or_create_session(self, session_id: str, user_id: str = None) -> Session:
-        if session_id not in self.sessions:
-            return self.create_session(session_id, user_id)
-        return self.sessions[session_id]
-    def add_event(self, session: Session, event: Event) -> None:
-        session.events.append(event)
-        session.last_updated_at = datetime.now()

scratch_agents/sessions/session.py DELETED Viewed

@@ -1,23 +0,0 @@
-import uuid
-from pydantic import BaseModel, Field
-from typing import List, Dict, Any
-from datetime import datetime
-from ..types.contents import ContentItem
-class Session(BaseModel):
-    """Container for short-term memory during a conversation session"""
-    user_id: str
-    session_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
-    events: List[ContentItem] = Field(default_factory=list)
-    state: Dict[str, Any] = Field(default_factory=dict)
-    last_updated_at: datetime = Field(default_factory=datetime.now)
-    @property
-    def core_memory(self) -> Dict[str, str]:
-        """Access core memory with automatic initialization"""
-        if "core_memory" not in self.state:
-            self.state["core_memory"] = {
-                "persona": "You are a helpful AI assistant",
-                "human": ""
-            }
-        return self.state["core_memory"]

scratch_agents/sessions/task_cross_session_manager.py DELETED Viewed

@@ -1,194 +0,0 @@
-"""Task-specific cross-session memory management."""
-from typing import List, Dict, Any, Optional, Literal
-import logging
-from pydantic import BaseModel, Field
-import json
-from .base_cross_session_manager import BaseCrossSessionManager
-from ..types.events import Event
-from ..types.contents import Message, ToolCall, ToolResult
-logger = logging.getLogger(__name__)
-MEMORY_EXTRACT_PROMPT = """
-You are a Task Memory Extractor specializing in tracking agent actions and problem-solving attempts.
-Extract ONLY information about what the agent ACTUALLY DID in this conversation.
-Focus on:
-1. **Problem Identification**: What issue or challenge was the agent trying to address?
-2. **Actions Taken**: What specific actions did the agent perform? (tools used, searches made, code written, etc.)
-3. **Key Discoveries**: What important facts or information did the agent discover during the process?
-4. **Success Status**: Was the task completed successfully?
-DO NOT extract:
-- Personal user information (name, preferences, etc.)
-- General conversation or greetings
-- User opinions or feelings
-- Future plans or what should be done
-Format each task as a structured memory with:
-- problem: Clear description of what the agent was asked to do or investigate
-- actions_taken: Specific actions the agent performed (not what it should do)
-- key_discoveries: Important information discovered during the task
-- success: true/false indicating if the task was completed
-Examples of GOOD task memories:
-{
-  "problem": "User asked about React component not rendering",
-  "actions_taken": "Examined useEffect hook, identified missing dependency in array, added state variable to dependency array",
-  "key_discoveries": "useEffect was missing 'count' state variable in dependency array causing stale closure",
-  "success": true
-}
-{
-  "problem": "User reported database connection timeouts in production",
-  "actions_taken": "Checked connection pool configuration, analyzed production logs, increased pool size from 10 to 50, implemented retry logic with exponential backoff",
-  "key_discoveries": "Production load peaked at 45 concurrent connections, default pool size was only 10",
-  "success": true
-}
-{
-  "problem": "User asked 'What is Mem0 and how does it work?'",
-  "actions_taken": "Performed multiple web searches with different query variations to find information about Mem0",
-  "key_discoveries": "Found that Mem0 is an open-source memory layer for LLM applications, has a GitHub repo (mem0ai/mem0), provides hybrid data storage and intelligent retrieval",
-  "success": false
-}
-"""
-MEMORY_ACTION_PROMPT = """
-You are a Task Memory Action Decider specializing in tracking agent actions and problem-solving attempts.
-You are given a list of new task memories and a list of existing task memories.
-You need to decide whether to ADD, UPDATE, DELETE, or NOOP the new task memories.
-Format your response as a list of actions with:
-- action: ADD, UPDATE, DELETE, or NOOP
-- memory_id: The id of the memory to update or delete
-Action:
-- ADD: Add the new task memory if it describes a different problem or significantly different approach
-- UPDATE: Update the existing task memory if it's the same problem but with better/more complete actions or discoveries
-- DELETE: Delete the existing task memory if it's outdated or no longer relevant
-- NOOP: Do not add if it's essentially the same problem with similar actions and discoveries
-"""
-class TaskMemory(BaseModel):
-    """Structured task memory."""
-    problem: str = Field(description="The problem or task the agent was asked to address")
-    actions_taken: str = Field(description="The specific actions the agent performed")
-    success: bool = Field(description="Whether the task was completed successfully")
-    key_discoveries: Optional[str] = Field(default=None, description="Important information discovered during the task")
-class MemoryAction(BaseModel):
-    """Memory action."""
-    action: Literal["ADD", "UPDATE", "DELETE", "NOOP"] = Field(description="The action to take with the memory")
-    memory_id: Optional[str] = Field(description="The id of the memory to update or delete")
-class TaskCrossSessionManager(BaseCrossSessionManager):
-    """Manage task-specific memories across sessions."""
-    def __init__(self, model,
-                 collection_name="task_memories",
-                 persist_directory="./cross_session_db",
-                 ):
-        """Initialize task cross-session manager.
-        Args:
-            model: LLM model for memory extraction
-            collection_name: Name of the ChromaDB collection
-            persist_directory: Directory to persist ChromaDB data
-        """
-        super().__init__(model, collection_name, persist_directory)
-    async def extract_memories(self, events: List[Event]):
-        conversation_parts = []
-        for event in events:
-            for item in event.content:
-                if isinstance(item, Message):
-                    conversation_parts.append(f"{item.role}: {item.content}")
-                elif isinstance(item, ToolCall):
-                    conversation_parts.append(f"{item.tool_call_id}: {item.name}")
-                elif isinstance(item, ToolResult):
-                    conversation_parts.append(f"{item.tool_call_id}: {item.name} {item.content}")
-        conversation = "\n".join(conversation_parts)
-        user_prompt = f"""Conversation:
-        {conversation}
-        """
-        messages = [
-            {"role": "system", "content": MEMORY_EXTRACT_PROMPT},
-            {"role": "user", "content": user_prompt}
-        ]
-        try:
-            response = await self.model.generate_structured(messages, TaskMemory)
-            task_memory = TaskMemory.model_validate(response)
-            return [task_memory.model_dump()]
-        except Exception as e:
-            logger.error(f"Error extracting task memories: {e}")
-            return []
-    async def find_existing(self, memories: List[Dict], user_id: str) -> List[Dict[str, Any]]:
-        existing_memories = []
-        for memory in memories:
-            query = memory["problem"]
-            results = await self.search(query, user_id)
-            if results:
-                existing_memories.append(results[0])
-        return existing_memories
-    async def decide_actions(self, new_memory: List[Dict], existing: List[Dict[str, Any]], user_id: str) -> List[Dict[str, Any]]:
-        system_prompt = MEMORY_ACTION_PROMPT
-        user_prompt = f"""
-        Existing memory: {existing}
-        New memory: {new_memory}
-        """
-        messages = [
-            {"role": "system", "content": system_prompt},
-            {"role": "user", "content": user_prompt}
-        ]
-        action = await self.model.generate_structured(messages, MemoryAction)
-        result = []
-        if action.action == "UPDATE":
-            memory_id = action.memory_id
-            if not memory_id:
-                logger.error("Cannot update memory: no memory_id available")
-                return []
-            embeddings = await self.model.embed(self.embedding_model, [new_memory[0]["problem"]])
-            # Convert dict to string for ChromaDB document field
-            memory_str = json.dumps(new_memory[0], ensure_ascii=False)
-            result.append({
-                "action": "UPDATE",
-                "memory_id": memory_id,
-                "memory": memory_str,
-                "embedding": embeddings[0],
-                "metadata": new_memory[0]  # Store original dict in metadata
-            })
-        elif action.action == "ADD":
-            embeddings = await self.model.embed(self.embedding_model, [new_memory[0]["problem"]])
-            # Convert dict to string for ChromaDB document field
-            memory_str = json.dumps(new_memory[0], ensure_ascii=False)
-            result.append({
-                "action": "ADD",
-                "memory": memory_str,
-                "user_id": user_id,
-                "embedding": embeddings[0],
-                "metadata": new_memory[0]  # Store original dict in metadata
-            })
-        elif action.action == "DELETE":
-            result.append({
-                "action": "DELETE",
-                "memory_id": action.memory_id
-            })
-        elif action.action == "NOOP":
-            result.append({
-                "action": "NOOP"
-            })
-        return result

scratch_agents/sessions/user_cross_session_manager.py DELETED Viewed

@@ -1,185 +0,0 @@
-import json
-import chromadb
-from chromadb.utils import embedding_functions
-from typing import List, Optional, Literal, Dict, Any
-from enum import Enum
-import uuid
-from datetime import datetime
-import os
-from pydantic import BaseModel, Field
-import logging
-from .session import Session
-from .base_cross_session_manager import BaseCrossSessionManager
-from ..types.contents import Message
-from ..types.events import Event
-from ..models.llm_request import LlmRequest
-logger = logging.getLogger(__name__)
-MEMORY_EXTRACT_PROMPT = """
-You are a User Memory Extractor specializing in accurately storing ONLY facts about the USER from their messages.
-CRITICAL RULES:
-1. ONLY extract factual information that the user explicitly states about themselves
-2. NEVER extract questions the user asks
-3. NEVER extract hypothetical scenarios or wishes
-4. NEVER create memories from assistant responses
-5. If the user is only asking questions, return an empty list
-Types of Information to Remember:
-1. **Personal Identity & Details**: Names, relationships, family information, important dates
-2. **Professional Information**: Current job title, company name, work responsibilities, career goals, past work experience
-3. **Personal Preferences**: Likes, dislikes, preferences in food, activities, entertainment, brands
-4. **Goals & Plans**: Future intentions, upcoming events, trips, personal objectives
-5. **Health & Wellness**: Dietary restrictions, fitness routines, health conditions
-6. **Lifestyle & Activities**: Hobbies, regular activities, service preferences
-7. **Location & Living Situation**: Where they live, recent moves, living arrangements
-"""
-MEMORY_ACTION_PROMPT = """
-You are a User Memory Action Decider specializing in accurately managing user facts and preferences.
-CRITICAL RULES FOR CONFLICTING INFORMATION:
-1. When new information CONTRADICTS or UPDATES existing information, you MUST use UPDATE action
-2. Location changes: If user moves from Place A to Place B, UPDATE the existing location memory
-3. Status changes: If user changes jobs, relationships, or any status, UPDATE the relevant memory
-4. Preference changes: If user's preferences change, UPDATE the existing preference
-5. Look for semantic conflicts, not just exact text matches
-Examples of when to UPDATE:
-- Existing: "User works at Company A" + New: "User works at Company B" → UPDATE existing memory
-- Existing: "User likes coffee" + New: "User doesn't like coffee anymore" → UPDATE existing memory
-Format your response as a list of actions with:
-- action: ADD, UPDATE, DELETE, or NOOP
-- memory_id: The id of the memory to update or delete (required for UPDATE/DELETE)
-- content: The content of the memory to add or update (required for ADD/UPDATE)
-Actions:
-- ADD: Add new information that doesn't conflict with existing memories
-- UPDATE: Replace existing memory when there's conflicting or updated information
-- DELETE: Remove outdated or incorrect memory (use sparingly)
-- NOOP: Skip if the information is already stored or not relevant
-"""
-class MemoryAction(BaseModel):
-    """Structured output for memory action decision"""
-    action: Literal["ADD", "UPDATE", "DELETE", "NOOP"] = Field(
-        description="The action to take with the memory"
-    )
-    memory_id: Optional[str] = Field(
-        description="The id of the memory to update or delete"
-    )
-    content: Optional[str] = Field(
-        description="The content of the memory to add or update"
-    )
-class MemoryActions(BaseModel):
-    """A list of memory actions"""
-    actions: List[MemoryAction] = Field(
-        description="A list of memory actions"
-    )
-class MemoryFacts(BaseModel):
-    """A list of facts about the user"""
-    facts: List[str] = Field(
-        description="A list of facts about the user"
-    )
-class UserCrossSessionManager(BaseCrossSessionManager):
-    """Manage memories across sessions using ChromaDB"""
-    def __init__(self, model, collection_name="user_memory", persist_directory="./cross_session_db", embedding_model="text-embedding-3-small"):
-        # Initialize base class first
-        super().__init__(model, collection_name, persist_directory, embedding_model)
-    async def extract_memories(self, events: List[Any]) -> List[str]:
-        """Extract important information from execution events using LLM"""
-        conversation_parts = []
-        for event in events:
-            for item in event.content:
-                if hasattr(item, 'role') and hasattr(item, 'content'):
-                    if item.role == 'user':
-                        conversation_parts.append(f"User: {item.content}")
-        conversation = "\n".join(conversation_parts)
-        if not conversation.strip():
-            return []
-        user_prompt = f"""Conversation:
-        {conversation}
-        """
-        messages = [
-            {"role": "system", "content": MEMORY_EXTRACT_PROMPT},
-            {"role": "user", "content": user_prompt}
-        ]
-        response = await self.model.generate_structured(
-            messages,
-            MemoryFacts
-        )
-        logger.debug(f"Extracted facts: {response}")
-        try:
-            return response.facts
-        except Exception as e:
-            logger.error(f"Error extracting facts: {e}")
-            return []
-    async def find_existing(
-        self,
-        memories: List[str],
-        user_id: str
-    ) -> List[Dict[str, Any]]:
-        """Find existing memories.
-        Args:
-            user_id: User identifier
-        Returns:
-            List of existing memories with metadata including timestamps
-        """
-        existing_memories = []
-        results = self.collection.get(
-            where={"user_id": user_id},
-            include=["documents", "metadatas"]
-        )
-        if results["documents"]:
-            for i, doc in enumerate(results["documents"]):
-                metadata = results["metadatas"][i] if results["metadatas"] else {}
-                existing_memories.append({
-                    "id": results["ids"][i],
-                    "content": doc,
-                    "metadata": metadata,
-                    "created_at": metadata.get("created_at", "Unknown"),
-                    "updated_at": metadata.get("updated_at", "Unknown")
-                })
-        return existing_memories
-    async def decide_actions(self, new_memories: List[str], existing: List[Dict[str, Any]], user_id: str) -> List[Dict[str, Any]]:
-        """Decide actions for new memories."""
-        system_prompt = MEMORY_ACTION_PROMPT
-        user_prompt = f"""
-        Existing memory: {existing}
-        New memory: {new_memories}
-        """
-        messages = [
-            {"role": "system", "content": system_prompt},
-            {"role": "user", "content": user_prompt}
-        ]
-        actions = await self.model.generate_structured(messages, MemoryActions)
-        result = []
-        for action in actions.actions:
-            action_dict = action.model_dump()
-            if action_dict["action"] == "ADD":
-                action_dict["user_id"] = user_id
-                action_dict["memory"] = action_dict.pop("content", None)
-            elif action_dict["action"] == "UPDATE":
-                action_dict["memory"] = action_dict.pop("content", None)
-            result.append(action_dict)
-        return result

scratch_agents/tools/__init__.py DELETED Viewed

@@ -1,5 +0,0 @@
-from .search_web import search_web
-from .wikipedia import search_wikipedia, get_wikipedia_page
-from .calculator import calculator
-__all__ = ["search_web", "search_wikipedia", "get_wikipedia_page", "calculator"]