Spaces:

shekkari21
/

agent-from-scratch

Sleeping

App Files Files Community

shekkari21 commited on Jan 30

Commit

378dbdf

1 Parent(s): 3354010

rearranged code and codebase

Browse files

Files changed (10) hide show

agent_framework/agent.py +129 -173
agent_framework/llm.py +15 -44
agent_framework/tools.py +1 -1
examples/demo.py +29 -45
examples/gaia_evaluation.py +36 -128
CODE_TRACE_OUTLINE.md → misc/CODE_TRACE_OUTLINE.md +0 -0
LANGCHAIN_COMPARISON.md → misc/LANGCHAIN_COMPARISON.md +0 -0
example.py → misc/example.py +0 -0
notebook_example.ipynb → misc/notebook_example.ipynb +123 -0
tavily_mcp_server.py → misc/tavily_mcp_server.py +0 -0

agent_framework/agent.py CHANGED Viewed

@@ -2,9 +2,12 @@
 from dataclasses import dataclass
 from typing import List, Optional, Type
 from pydantic import BaseModel
 import json
 from .models import (
     ExecutionContext,
     Event,
@@ -31,23 +34,125 @@ class Agent:
         model: LlmClient,
         tools: List[BaseTool] = None,
         instructions: str = "",
-        max_steps: int = 10,
-        name: str = "agent",
-        output_type: Optional[Type[BaseModel]] = None,
-        verbose: bool = False,
     ):
         self.model = model
         self.instructions = instructions
         self.max_steps = max_steps
-        self.name = name
         self.output_type = output_type
-        self.verbose = verbose
         self.tools = self._setup_tools(tools or [])
     def _setup_tools(self, tools: List[BaseTool]) -> List[BaseTool]:
         return tools
-    def _prepare_llm_request(self, context: ExecutionContext, enforce_output_type: bool = False) -> LlmRequest:
         """Convert execution context to LLM request.
         Args:
@@ -59,51 +164,44 @@ class Agent:
         flat_contents = []
         for event in context.events:
             flat_contents.extend(event.content)
-        # Only enforce structured output if explicitly requested (for final answer)
-        # This allows tool calls to happen first
-        response_format = self.output_type if (enforce_output_type and self.output_type) else None
         return LlmRequest(
             instructions=[self.instructions] if self.instructions else [],
             contents=flat_contents,
             tools=self.tools,
-            tool_choice="auto" if self.tools else None,
-            response_format=response_format,
         )
     async def think(self, llm_request: LlmRequest) -> LlmResponse:
         """Get LLM's response/decision."""
-        return await self.model.generate(llm_request)
     async def act(
-        self,
-        context: ExecutionContext,
-        tool_calls: List[ToolCall]
-    ) -> List[ToolResult]:
-        """Execute tool calls and return results."""
         tools_dict = {tool.name: tool for tool in self.tools}
         results = []
         for tool_call in tool_calls:
             if tool_call.name not in tools_dict:
-                results.append(ToolResult(
-                    tool_call_id=tool_call.tool_call_id,
-                    name=tool_call.name,
-                    status="error",
-                    content=[f"Tool '{tool_call.name}' not found"],
-                ))
-                continue
             tool = tools_dict[tool_call.name]
             try:
-                output = await tool.execute(context, **tool_call.arguments)
                 results.append(ToolResult(
                     tool_call_id=tool_call.tool_call_id,
                     name=tool_call.name,
                     status="success",
-                    content=[str(output)],
                 ))
             except Exception as e:
                 results.append(ToolResult(
@@ -114,150 +212,8 @@ class Agent:
                 ))
         return results
-    async def step(self, context: ExecutionContext):
-        """Execute one step of the agent loop."""
-        if self.verbose:
-            print(f"\n{'='*60}")
-            print(f"Step {context.current_step + 1} - Agent Thinking...")
-            print(f"{'='*60}")
-        # Check if we should enforce structured output
-        # Only enforce if: we have output_type AND the last event had tool results (meaning tools were used)
-        # This allows tool calls to happen first, then we enforce format for final answer
-        should_enforce_output = False
-        if self.output_type and len(context.events) > 0:
-            last_event = context.events[-1]
-            # If last event had tool results, we might be ready for final structured answer
-            has_tool_results = any(isinstance(item, ToolResult) for item in last_event.content)
-            if has_tool_results:
-                # Check if the event before that had tool calls
-                if len(context.events) >= 2:
-                    prev_event = context.events[-2]
-                    had_tool_calls = any(isinstance(item, ToolCall) for item in prev_event.content)
-                    # If we had tool calls and got results, next response should be final
-                    should_enforce_output = had_tool_calls
-        # Prepare LLM request - don't enforce output type to allow tool calls
-        llm_request = self._prepare_llm_request(context, enforce_output_type=should_enforce_output)
-        if self.verbose:
-            print(f"[SENDING] Request to LLM...")
-            if should_enforce_output:
-                print(f"         (Enforcing structured output format)")
-        # Get LLM's decision
-        llm_response = await self.think(llm_request)
-        # Record LLM response as an event
-        response_event = Event(
-            execution_id=context.execution_id,
-            author=self.name,
-            content=llm_response.content,
-        )
-        context.add_event(response_event)
-        # Show what the LLM responded with
-        if self.verbose:
-            for item in llm_response.content:
-                if isinstance(item, Message):
-                    print(f"\n[AGENT RESPONSE]")
-                    print(f"   {item.content[:200]}{'...' if len(item.content) > 200 else ''}")
-                elif isinstance(item, ToolCall):
-                    print(f"\n[TOOL CALL] {item.name}")
-                    print(f"   Arguments: {item.arguments}")
-        # Execute tools if the LLM requested any
-        tool_calls = [c for c in llm_response.content if isinstance(c, ToolCall)]
-        if tool_calls:
-            if self.verbose:
-                print(f"\n[EXECUTING] {len(tool_calls)} tool(s)...")
-            tool_results = await self.act(context, tool_calls)
-            tool_event = Event(
-                execution_id=context.execution_id,
-                author=self.name,
-                content=tool_results,
-            )
-            context.add_event(tool_event)
-            if self.verbose:
-                for result in tool_results:
-                    status_marker = "[SUCCESS]" if result.status == "success" else "[ERROR]"
-                    print(f"   {status_marker} {result.name}: {result.status}")
-                    if result.content and len(result.content) > 0:
-                        result_preview = str(result.content[0])[:150]
-                        if len(str(result.content[0])) > 150:
-                            result_preview += "..."
-                        print(f"      Result: {result_preview}")
-        elif self.output_type and not should_enforce_output:
-            # No tool calls but we didn't enforce output type - make one more call to get structured output
-            if self.verbose:
-                print(f"\n[NO TOOLS] Requesting structured output...")
-            final_request = self._prepare_llm_request(context, enforce_output_type=True)
-            final_response = await self.think(final_request)
-            # Replace the last event with the structured response
-            if context.events:
-                context.events[-1] = Event(
-                    execution_id=context.execution_id,
-                    author=self.name,
-                    content=final_response.content,
-                )
-        context.increment_step()
-        if self.verbose:
-            print(f"[COMPLETED] Step {context.current_step}\n")
-    async def run(
-        self,
-        user_input: str,
-        context: ExecutionContext = None
-    ) -> AgentResult:
-        """Run the agent with user input."""
-        # Create or reuse context
-        if context is None:
-            context = ExecutionContext()
-        # Add user input as the first event
-        user_event = Event(
-            execution_id=context.execution_id,
-            author="user",
-            content=[Message(role="user", content=user_input)]
-        )
-        context.add_event(user_event)
-        # Execute steps until completion or max steps reached
-        while not context.final_result and context.current_step < self.max_steps:
-            await self.step(context)
-            # Check if the last event is a final response
-            last_event = context.events[-1]
-            if self._is_final_response(last_event):
-                context.final_result = self._extract_final_result(last_event)
-        return AgentResult(output=context.final_result, context=context)
-    def _is_final_response(self, event: Event) -> bool:
-        """Check if this event contains a final response."""
-        has_tool_calls = any(isinstance(c, ToolCall) for c in event.content)
-        has_tool_results = any(isinstance(c, ToolResult) for c in event.content)
-        return not has_tool_calls and not has_tool_results
-    def _extract_final_result(self, event: Event):
-        """Extract the final result from an event."""
-        for item in event.content:
-            if isinstance(item, Message) and item.role == "assistant":
-                content = item.content
-                # If output_type is specified, parse as structured output
-                if self.output_type:
-                    try:
-                        content_json = json.loads(content)
-                        return self.output_type.model_validate(content_json)
-                    except (json.JSONDecodeError, ValueError):
-                        # If parsing fails, return as string
-                        return content
-                return content
-        return None

 from dataclasses import dataclass
 from typing import List, Optional, Type
+from xxlimited import Str
 from pydantic import BaseModel
+from .tools import tool
 import json
+from pydantic_core.core_schema import str_schema
 from .models import (
     ExecutionContext,
     Event,
         model: LlmClient,
         tools: List[BaseTool] = None,
         instructions: str = "",
+        max_steps: int = 5,
+        name: str = "agent",
+        output_type: Optional[Type[BaseModel]] = None
     ):
         self.model = model
         self.instructions = instructions
         self.max_steps = max_steps
+        self.name = name
         self.output_type = output_type
+        self.output_tool_name = None
         self.tools = self._setup_tools(tools or [])
     def _setup_tools(self, tools: List[BaseTool]) -> List[BaseTool]:
+        if self.output_type is not None:
+            @tool(
+                name="final_answer",
+                description="Return the final structured answer matching the required schema."
+            )
+            def final_answer(output: self.output_type) -> self.output_type:
+                return output
+            tools = list(tools)  # Create a copy to avoid modifying the original
+            tools.append(final_answer)
+            self.output_tool_name = "final_answer"
         return tools
+    async def run(
+        self,
+        user_input: str,
+        context: ExecutionContext = None
+    ) -> str:
+        """Run the agent with user input."""
+        # Create or reuse context
+        if context is None:
+            context = ExecutionContext()
+        # Add user input as the first event
+        user_event = Event(
+            execution_id=context.execution_id,
+            author="user",
+            content=[Message(role="user", content=user_input)]
+        )
+        context.add_event(user_event)
+        # Execute steps until completion or max steps reached
+        while not context.final_result and context.current_step < self.max_steps:
+            await self.step(context)
+            # Check if the last event is a final response
+            last_event = context.events[-1]
+            if self._is_final_response(last_event):
+                context.final_result = self._extract_final_result(last_event)
+        return AgentResult(output=context.final_result, context=context)
+    def _is_final_response(self, event: Event) -> bool:
+        """Check if this event contains a final response."""
+        if self.output_tool_name:
+        # For structured output: check if final_answer tool succeeded
+            for item in event.content:
+                if (isinstance(item, ToolResult)
+                    and item.name == self.output_tool_name
+                    and item.status == "success"):
+                    return True
+            return False
+        has_tool_calls = any(isinstance(c, ToolCall) for c in event.content)
+        has_tool_results = any(isinstance(c, ToolResult) for c in event.content)
+        return not has_tool_calls and not has_tool_results
+    def _extract_final_result(self, event: Event) -> str:
+        if self.output_tool_name:
+            # Extract structured output from final_answer tool result
+            for item in event.content:
+                if (isinstance(item, ToolResult)
+                    and item.name == self.output_tool_name
+                    and item.status == "success"
+                    and item.content):
+                    return item.content[0]
+        for item in event.content:
+            if isinstance(item, Message) and item.role == "assistant":
+                return item.content
+        return None
+    async def step(self, context: ExecutionContext):
+        """Execute one step of the agent loop."""
+        llm_request = self._prepare_llm_request(context)
+        # Get LLM's decision
+        llm_response = await self.think(llm_request)
+        # Record LLM response as an event
+        response_event = Event(
+            execution_id=context.execution_id,
+            author=self.name,
+            content=llm_response.content,
+        )
+        context.add_event(response_event)
+        # Execute tools if the LLM requested any
+        tool_calls = [c for c in llm_response.content if isinstance(c, ToolCall)]
+        if tool_calls:
+            tool_results = await self.act(context, tool_calls)
+            tool_event = Event(
+                execution_id=context.execution_id,
+                author=self.name,
+                content=tool_results,
+            )
+            context.add_event(tool_event)
+        context.increment_step()
+    def _prepare_llm_request(self, context: ExecutionContext) -> LlmRequest:
         """Convert execution context to LLM request.
         Args:
         flat_contents = []
         for event in context.events:
             flat_contents.extend(event.content)
+        # Determine tool choice strategy
+        if self.output_tool_name:
+            tool_choice = "required"  # Force tool usage for structured output
+        elif self.tools:
+            tool_choice = "auto"
+        else:
+            tool_choice = None
         return LlmRequest(
             instructions=[self.instructions] if self.instructions else [],
             contents=flat_contents,
             tools=self.tools,
+            tool_choice = tool_choice
         )
     async def think(self, llm_request: LlmRequest) -> LlmResponse:
         """Get LLM's response/decision."""
+        return await self.model.generate(llm_request)
     async def act(
+    self,
+    context: ExecutionContext,
+    tool_calls: List[ToolCall]
+) -> List[ToolResult]:
         tools_dict = {tool.name: tool for tool in self.tools}
         results = []
         for tool_call in tool_calls:
             if tool_call.name not in tools_dict:
+                raise ValueError(f"Tool '{tool_call.name}' not found")
             tool = tools_dict[tool_call.name]
             try:
+                output = await tool(context, **tool_call.arguments)
                 results.append(ToolResult(
                     tool_call_id=tool_call.tool_call_id,
                     name=tool_call.name,
                     status="success",
+                    content=[output],
                 ))
             except Exception as e:
                 results.append(ToolResult(
                 ))
         return results

agent_framework/llm.py CHANGED Viewed

@@ -10,13 +10,10 @@ from .models import Message, ToolCall, ToolResult, ContentItem
 class LlmRequest(BaseModel):
     """Request object for LLM calls."""
-    model_config = ConfigDict(arbitrary_types_allowed=True)
     instructions: List[str] = Field(default_factory=list)
     contents: List[ContentItem] = Field(default_factory=list)
     tools: List[Any] = Field(default_factory=list)
-    tool_choice: Optional[str] = None
-    response_format: Optional[Any] = None  # For structured output (Pydantic models)
 class LlmResponse(BaseModel):
@@ -38,25 +35,17 @@ class LlmClient:
         try:
             messages = self._build_messages(request)
             tools = [t.tool_definition for t in request.tools] if request.tools else None
-            completion_kwargs = {
-                "model": self.model,
-                "messages": messages,
-            }
-            if tools:
-                completion_kwargs["tools"] = tools
-                if request.tool_choice:
-                    completion_kwargs["tool_choice"] = request.tool_choice
-            if request.response_format:
-                completion_kwargs["response_format"] = request.response_format
-            completion_kwargs.update(self.config)
-            response = await acompletion(**completion_kwargs)
-            return self._parse_response(response, request.response_format)
         except Exception as e:
             return LlmResponse(error_message=str(e))
@@ -99,35 +88,17 @@ class LlmClient:
         return messages
-    def _parse_response(self, response, response_format=None) -> LlmResponse:
         """Convert API response to LlmResponse."""
         choice = response.choices[0]
         content_items = []
-        # Handle structured output (Pydantic models)
-        if response_format and choice.message.content:
-            try:
-                # Parse JSON and validate against Pydantic model
-                import json
-                content_json = json.loads(choice.message.content)
-                structured_output = response_format.model_validate(content_json)
-                # Store as string representation for now, will be parsed in Agent
-                content_items.append(Message(
-                    role="assistant",
-                    content=choice.message.content
-                ))
-            except Exception:
-                # Fallback to regular content if parsing fails
-                content_items.append(Message(
-                    role="assistant",
-                    content=choice.message.content
-                ))
-        elif choice.message.content:
             content_items.append(Message(
                 role="assistant",
                 content=choice.message.content
             ))
         if choice.message.tool_calls:
             for tc in choice.message.tool_calls:
                 content_items.append(ToolCall(
@@ -142,4 +113,4 @@ class LlmClient:
                 "input_tokens": response.usage.prompt_tokens,
                 "output_tokens": response.usage.completion_tokens,
             }
-        )

 class LlmRequest(BaseModel):
     """Request object for LLM calls."""
     instructions: List[str] = Field(default_factory=list)
     contents: List[ContentItem] = Field(default_factory=list)
     tools: List[Any] = Field(default_factory=list)
+    tool_choice: Optional[str] = 'auto'
 class LlmResponse(BaseModel):
         try:
             messages = self._build_messages(request)
             tools = [t.tool_definition for t in request.tools] if request.tools else None
+            response = await acompletion(
+                model=self.model,
+                messages=messages,
+                tools=tools,
+                **({"tool_choice": request.tool_choice}
+                   if request.tool_choice else {}),
+                **self.config
+            )
+            return self._parse_response(response)
         except Exception as e:
             return LlmResponse(error_message=str(e))
         return messages
+    def _parse_response(self, response) -> LlmResponse:
         """Convert API response to LlmResponse."""
         choice = response.choices[0]
         content_items = []
+        if choice.message.content:
             content_items.append(Message(
                 role="assistant",
                 content=choice.message.content
             ))
         if choice.message.tool_calls:
             for tc in choice.message.tool_calls:
                 content_items.append(ToolCall(
                 "input_tokens": response.usage.prompt_tokens,
                 "output_tokens": response.usage.completion_tokens,
             }
+        )

agent_framework/tools.py CHANGED Viewed

@@ -112,4 +112,4 @@ def tool(
     if func is not None:
         return decorator(func)
-    return decorator

     if func is not None:
         return decorator(func)
+    return decorator

examples/demo.py CHANGED Viewed

@@ -1,55 +1,39 @@
-"""Demo script showing agent usage with structured output."""
-import asyncio
-import os
-import sys
 from pathlib import Path
-from pydantic import BaseModel, Field
 # Add parent directory to path so we can import agent_framework
 sys.path.insert(0, str(Path(__file__).parent.parent))
-from agent_framework import Agent, LlmClient, display_trace
-from dotenv import load_dotenv
-load_dotenv()
-# Define output structure
-class AnswerOutput(BaseModel):
-    """Structured output for the answer."""
-    final_answer: str = Field(description="The final answer to the question")
 async def main():
-    # Create agent with structured output and verbose mode enabled
-    agent = Agent(
-        model=LlmClient(model="gpt-5-mini"),
-        tools=[],
-        instructions="You are a helpful assistant that answers questions accurately.",
-        output_type=AnswerOutput,
-        verbose=True,  # Enable verbose mode to see thinking process
-    )
-    print("Starting agent execution...")
-    print("=" * 60)
-    result = await agent.run(
-        "If Eliud Kipchoge could maintain his marathon pace, "
-        "how many thousand hours to reach the Moon?"
     )
-    print("\n" + "=" * 60)
-    print("FINAL RESULTS")
-    print("=" * 60)
-    print(f"Answer: {result.output.final_answer}")
-    print(f"Steps taken: {result.context.current_step}")
-    print("=" * 60)
-    # Optionally show full trace
-    print("\nFull Execution Trace:")
-    display_trace(result.context)
 if __name__ == "__main__":
     asyncio.run(main())

 from pathlib import Path
+import sys
+import asyncio
 # Add parent directory to path so we can import agent_framework
 sys.path.insert(0, str(Path(__file__).parent.parent))
+from agent_framework.llm import LlmClient, LlmRequest, Message
 async def main():
+    # Create client
+    client = LlmClient(model="gpt-5-mini")
+    # Build request
+    request = LlmRequest(
+        instructions=["You are a helpful assistant."],
+        contents=[Message(role="user", content="What is 2 + 2?")],
+        tool_choice = None
     )
+# Generate response
+    response = await client.generate(request)
+  # Check for errors first!
+    if response.error_message:
+        print(f"Error: {response.error_message}")
+        return
+    # Response contains the answer
+    if not response.content:
+        print("No content in response")
+        return
+    for item in response.content:
+        if isinstance(item, Message):
+            print(item.content)  # "4"
+        else:
+            print(f"Got {type(item).__name__}: {item}")
 if __name__ == "__main__":
     asyncio.run(main())

examples/gaia_evaluation.py CHANGED Viewed

@@ -4,129 +4,18 @@ import asyncio
 import os
 import sys
 from pathlib import Path
-from typing import List
-from pydantic import BaseModel, Field
 # Add parent directory to path so we can import agent_framework
 sys.path.insert(0, str(Path(__file__).parent.parent))
-from agent_framework import Agent, LlmClient, AgentResult, load_mcp_tools, display_trace
-# GAIA output model
-class GaiaOutput(BaseModel):
-    """Structured output for GAIA benchmark responses."""
-    is_solvable: bool = Field(description="Whether the problem can be solved with available tools")
-    unsolvable_reason: str = Field(default="", description="Reason if problem is unsolvable")
-    final_answer: str = Field(description="The final answer to the problem")
-# GAIA system prompt
-gaia_prompt = """
-You are a general AI assistant. I will ask you a question.
-First, determine if you can solve this problem with your current capabilities and set "is_solvable" accordingly.
-If you can solve it, set "is_solvable" to true and provide your answer in "final_answer".
-If you cannot solve it, set "is_solvable" to false and explain why in "unsolvable_reason".
-Your final answer should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
-If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
-If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
-If you are asked for a comma separated list, apply the above rules depending on whether the element is a number or a string.
-"""
-def create_gaia_agent(model: str, tools: List) -> Agent:
-    """Create an agent configured for GAIA benchmark evaluation.
-    Args:
-        model: LLM model name (e.g., "gpt-5", "gpt-5-mini")
-        tools: List of tools to provide to the agent
-    Returns:
-        Configured Agent instance
-    """
-    return Agent(
-        model=LlmClient(model=model),
-        tools=tools,
-        instructions=gaia_prompt,
-        output_type=GaiaOutput,
-        max_steps=15,
-    )
-# Semaphore for rate limiting
-SEMAPHORE = asyncio.Semaphore(3)
-async def solve_problem(agent: Agent, question: str) -> AgentResult:
-    """Solve a single GAIA problem with rate limiting.
-    Args:
-        agent: Configured agent instance
-        question: Problem question to solve
-    Returns:
-        AgentResult with structured output
-    """
-    async with SEMAPHORE:
-        return await agent.run(question)
-async def run_experiment(
-    problems: List[dict],
-    models: List[str],
-    tools: List = None,
-) -> dict:
-    """Run GAIA evaluation experiment across multiple models.
-    Args:
-        problems: List of problem dictionaries with 'Question' and 'Final answer' keys
-        models: List of model names to evaluate
-        tools: List of tools to provide to agents
-    Returns:
-        Dictionary mapping model names to lists of results
-    """
-    tools = tools or []
-    results = {model: [] for model in models}
-    tasks = []
-    for problem in problems:
-        for model in models:
-            agent = create_gaia_agent(model, tools)
-            task = solve_problem(agent, problem.get("Question", problem.get("question", "")))
-            tasks.append((model, problem, task))
-    # Execute all tasks
-    task_results = await asyncio.gather(*[task for _, _, task in tasks], return_exceptions=True)
-    # Organize results
-    for (model, problem, _), result in zip(tasks, task_results):
-        if isinstance(result, Exception):
-            results[model].append({
-                "task_id": problem.get("task_id", problem.get("id", "")),
-                "model": model,
-                "error": str(result),
-            })
-        else:
-            output = result.output if isinstance(result.output, GaiaOutput) else None
-            results[model].append({
-                "task_id": problem.get("task_id", problem.get("id", "")),
-                "model": model,
-                "is_solvable": output.is_solvable if output else None,
-                "final_answer": output.final_answer if output else None,
-                "unsolvable_reason": output.unsolvable_reason if output else None,
-                "correct": (
-                    output.final_answer.strip().lower() == problem.get("Final answer", "").strip().lower()
-                    if output and "Final answer" in problem
-                    else None
-                ),
-                "steps": result.context.current_step,
-            })
-    return results
 async def main():
     """Example usage of GAIA evaluation."""
@@ -139,22 +28,41 @@ async def main():
     mcp_tools = await load_mcp_tools(tavily_connection)
-    # Create agent
-    agent = create_gaia_agent("gpt-5-mini", mcp_tools)
     # Solve a problem
     result = await agent.run(
-        "If Eliud Kipchoge could maintain his marathon pace, "
-        "how many thousand hours to reach the Moon?"
     )
-    if isinstance(result.output, GaiaOutput):
-        print(f"Answer: {result.output.final_answer}")
-        print(f"Solvable: {result.output.is_solvable}")
-        print(f"Steps: {result.context.current_step}")
-    else:
-        print(f"Answer: {result.output}")
-        print(f"Steps: {result.context.current_step}")
     # Display execution trace
     display_trace(result.context)

 import os
 import sys
 from pathlib import Path
 # Add parent directory to path so we can import agent_framework
 sys.path.insert(0, str(Path(__file__).parent.parent))
+from agent_framework import Agent, LlmClient, load_mcp_tools, display_trace, tool
+# Calculator tool
+@tool
+def calculator(expression: str) -> float:
+    """Calculate mathematical expressions. Supports basic math operations like +, -, *, /, **, etc."""
+    return eval(expression)
 async def main():
     """Example usage of GAIA evaluation."""
     mcp_tools = await load_mcp_tools(tavily_connection)
+    # Combine all tools: calculator (already wrapped by @tool decorator) + MCP tools
+    all_tools = [calculator] + mcp_tools
+    # Show available tools
+    print(f"\n{'='*60}")
+    print(f"Available Tools: {len(all_tools)}")
+    print(f"{'='*60}")
+    for i, tool_obj in enumerate(all_tools, 1):
+        print(f"{i}. {tool_obj.name}")
+        if hasattr(tool_obj, 'description'):
+            desc = tool_obj.description[:80] + "..." if len(tool_obj.description) > 80 else tool_obj.description
+            print(f"   {desc}")
+    print(f"{'='*60}\n")
+    # Create agent with instructions to use web search
+    agent = Agent(
+        model=LlmClient(model="gpt-5-mini"),
+        tools=all_tools,
+        instructions="""You are a helpful assistant. You have access to tools.
+Do NOT rely solely on your training data. Use the tools when necessary to present accurate information.
+Instead of assumptions, use websearch for the questions you don't know exact answer to
+""",
+        max_steps=10,
+    )
     # Solve a problem
     result = await agent.run(
+        'If A is usain bolt\'s world record in 100 meters, B is usain bolt\'s fastest time in 200 meters, what is A x B ?'
     )
+    print(f"\n{'='*60}")
+    print(f"Final Answer: {result.output}")
+    print(f"Steps: {result.context.current_step}")
+    print(f"{'='*60}\n")
     # Display execution trace
     display_trace(result.context)

CODE_TRACE_OUTLINE.md → misc/CODE_TRACE_OUTLINE.md RENAMED Viewed

File without changes

LANGCHAIN_COMPARISON.md → misc/LANGCHAIN_COMPARISON.md RENAMED Viewed

File without changes

example.py → misc/example.py RENAMED Viewed

File without changes

notebook_example.ipynb → misc/notebook_example.ipynb RENAMED Viewed

@@ -73,6 +73,129 @@
         "print(f\"Confidence: {result.output.confidence}\")    # 0.92\n",
         "print(f\"Key phrases: {result.output.key_phrases}\")  # [\"exceeded expectations\", \"highly recommend\"]\n"
       ]
     }
   ],
   "metadata": {

         "print(f\"Confidence: {result.output.confidence}\")    # 0.92\n",
         "print(f\"Key phrases: {result.output.key_phrases}\")  # [\"exceeded expectations\", \"highly recommend\"]\n"
       ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "'''\n",
+        "To implement agent, we need tools, execution context, \n",
+        "instructions(system prompt that defines agent behavior) and an llm. \n",
+        "\n",
+        "Event: It is a record of who did what ? like was it user request, \n",
+        "or llm requested tool call, or did we get a result back from the tool etc., \n",
+        "\n",
+        "'''\n",
+        "\n",
+        "from anyio import Event\n",
+        "from agent_framework import ExecutionContext, Message\n",
+        "from agent_framework.agent import AgentResult\n",
+        "\n",
+        "class Agent: ## does this inherit from anything ? \n",
+        "    def init(self, tools, executionContext, llmClient, instructions, maxSteps, verbose, name = \"agent\"):\n",
+        "        self.tools = self._setup_tools(tools or [])\n",
+        "        self.executionContext = executionContext\n",
+        "        self.llmClient = llmClient\n",
+        "        self.instructions = instructions\n",
+        "        self.maxSteps = maxSteps\n",
+        "        self.verbose = verbose\n",
+        "        self.name = name\n",
+        "\n",
+        "    ## step 1 is to setup tools\n",
+        "\n",
+        "    def _setup_tools(self, tools):\n",
+        "        return tools\n",
+        "\n",
+        "    ## step 2 is to define entry point for users.(run method)\n",
+        "\n",
+        "    async def run(self, user_input, context):\n",
+        "\n",
+        "        ## check if there is any previous context, else create\n",
+        "\n",
+        "        if context is None:\n",
+        "            context = ExecutionContext()\n",
+        "\n",
+        "        ## add the user_event to the event\n",
+        "        user_event = Event(\n",
+        "            execution_id = context.execution_id,\n",
+        "            author = 'user',\n",
+        "            content = [Message(role = 'user', content = user_input)]\n",
+        "        )\n",
+        "        ## add the event to context\n",
+        "        context.add_event(user_event)\n",
+        "\n",
+        "        ## if agent doesnt reach final result or max steps, keep performing\n",
+        "        while not context.final_result and context.current_step < self.max_steps:\n",
+        "            ## each step is a think-act cycle\n",
+        "            await self.step(context)\n",
+        "\n",
+        "            ## check if newly performed action is final\n",
+        "            last_event = context.events[-1]\n",
+        "\n",
+        "            # If it is final, then extract the last event and sent it to \n",
+        "            # Agent result along with the context\n",
+        "            if self._is_final_response(last_event):\n",
+        "                context.final_result = self._extract_final_result(last_event)\n",
+        "\n",
+        "        return AgentResult(context.final_result, context = context)\n",
+        "\n",
+        "    # step 3 prepare for llm request\n",
+        "\n",
+        "    def _prepare_llm_request(self, context):\n",
+        "        \n",
+        "        #flatten all the events (why ?)\n",
+        "        flat_contents = []\n",
+        "        for event in context.events:\n",
+        "            flat_contents.extend(event.content)\n",
+        "\n",
+        "        ## with this context, call llm\n",
+        "        return LlmRequest(\n",
+        "        instructions=[self.instructions] if self.instructions else [],\n",
+        "        contents=flat_contents,\n",
+        "        tools=self.tools,\n",
+        "        tool_choice=\"auto\" if self.tools else None,\n",
+        "    )\n",
+        "\n",
+        "    async def step(self, context):\n",
+        "        \n",
+        "        ## write a method for this\n",
+        "        llm_request = self._prepare_llm_request(context)\n",
+        "\n",
+        "        # Get LLM's decision\n",
+        "        llm_response = await self.think(llm_request)\n",
+        "\n",
+        "        response_event = Event(\n",
+        "            execution_id=context.execution_id,\n",
+        "            author=self.name,\n",
+        "            content=llm_response.content,\n",
+        "        )\n",
+        "\n",
+        "    async def think(self, llm_request):\n",
+        "        \"\"\"Get LLM's response/decision.\"\"\"\n",
+        "        return await self.model.generate(llm_request)\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Q. LLM Request? \n",
+        "\n",
+        "A. It goes from our agent to LLM call. before sending it, we bundle it with necessary context , prompt and tools."
+      ]
     }
   ],
   "metadata": {

tavily_mcp_server.py → misc/tavily_mcp_server.py RENAMED Viewed

File without changes