shekkari21 commited on
Commit
378dbdf
·
1 Parent(s): 3354010

rearranged code and codebase

Browse files
agent_framework/agent.py CHANGED
@@ -2,9 +2,12 @@
2
 
3
  from dataclasses import dataclass
4
  from typing import List, Optional, Type
 
5
  from pydantic import BaseModel
 
6
  import json
7
 
 
8
  from .models import (
9
  ExecutionContext,
10
  Event,
@@ -31,23 +34,125 @@ class Agent:
31
  model: LlmClient,
32
  tools: List[BaseTool] = None,
33
  instructions: str = "",
34
- max_steps: int = 10,
35
- name: str = "agent",
36
- output_type: Optional[Type[BaseModel]] = None,
37
- verbose: bool = False,
38
  ):
39
  self.model = model
40
  self.instructions = instructions
41
  self.max_steps = max_steps
42
- self.name = name
43
  self.output_type = output_type
44
- self.verbose = verbose
45
  self.tools = self._setup_tools(tools or [])
46
 
47
  def _setup_tools(self, tools: List[BaseTool]) -> List[BaseTool]:
 
 
 
 
 
 
 
 
 
 
 
 
48
  return tools
49
 
50
- def _prepare_llm_request(self, context: ExecutionContext, enforce_output_type: bool = False) -> LlmRequest:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  """Convert execution context to LLM request.
52
 
53
  Args:
@@ -59,51 +164,44 @@ class Agent:
59
  flat_contents = []
60
  for event in context.events:
61
  flat_contents.extend(event.content)
62
-
63
- # Only enforce structured output if explicitly requested (for final answer)
64
- # This allows tool calls to happen first
65
- response_format = self.output_type if (enforce_output_type and self.output_type) else None
66
-
 
 
 
67
  return LlmRequest(
68
  instructions=[self.instructions] if self.instructions else [],
69
  contents=flat_contents,
70
  tools=self.tools,
71
- tool_choice="auto" if self.tools else None,
72
- response_format=response_format,
73
  )
74
-
75
  async def think(self, llm_request: LlmRequest) -> LlmResponse:
76
  """Get LLM's response/decision."""
77
- return await self.model.generate(llm_request)
78
-
79
  async def act(
80
- self,
81
- context: ExecutionContext,
82
- tool_calls: List[ToolCall]
83
- ) -> List[ToolResult]:
84
- """Execute tool calls and return results."""
85
  tools_dict = {tool.name: tool for tool in self.tools}
86
  results = []
87
 
88
  for tool_call in tool_calls:
89
  if tool_call.name not in tools_dict:
90
- results.append(ToolResult(
91
- tool_call_id=tool_call.tool_call_id,
92
- name=tool_call.name,
93
- status="error",
94
- content=[f"Tool '{tool_call.name}' not found"],
95
- ))
96
- continue
97
 
98
  tool = tools_dict[tool_call.name]
99
 
100
  try:
101
- output = await tool.execute(context, **tool_call.arguments)
102
  results.append(ToolResult(
103
  tool_call_id=tool_call.tool_call_id,
104
  name=tool_call.name,
105
  status="success",
106
- content=[str(output)],
107
  ))
108
  except Exception as e:
109
  results.append(ToolResult(
@@ -114,150 +212,8 @@ class Agent:
114
  ))
115
 
116
  return results
117
-
118
- async def step(self, context: ExecutionContext):
119
- """Execute one step of the agent loop."""
120
- if self.verbose:
121
- print(f"\n{'='*60}")
122
- print(f"Step {context.current_step + 1} - Agent Thinking...")
123
- print(f"{'='*60}")
124
-
125
- # Check if we should enforce structured output
126
- # Only enforce if: we have output_type AND the last event had tool results (meaning tools were used)
127
- # This allows tool calls to happen first, then we enforce format for final answer
128
- should_enforce_output = False
129
- if self.output_type and len(context.events) > 0:
130
- last_event = context.events[-1]
131
- # If last event had tool results, we might be ready for final structured answer
132
- has_tool_results = any(isinstance(item, ToolResult) for item in last_event.content)
133
- if has_tool_results:
134
- # Check if the event before that had tool calls
135
- if len(context.events) >= 2:
136
- prev_event = context.events[-2]
137
- had_tool_calls = any(isinstance(item, ToolCall) for item in prev_event.content)
138
- # If we had tool calls and got results, next response should be final
139
- should_enforce_output = had_tool_calls
140
-
141
- # Prepare LLM request - don't enforce output type to allow tool calls
142
- llm_request = self._prepare_llm_request(context, enforce_output_type=should_enforce_output)
143
-
144
- if self.verbose:
145
- print(f"[SENDING] Request to LLM...")
146
- if should_enforce_output:
147
- print(f" (Enforcing structured output format)")
148
-
149
- # Get LLM's decision
150
- llm_response = await self.think(llm_request)
151
-
152
- # Record LLM response as an event
153
- response_event = Event(
154
- execution_id=context.execution_id,
155
- author=self.name,
156
- content=llm_response.content,
157
- )
158
- context.add_event(response_event)
159
-
160
- # Show what the LLM responded with
161
- if self.verbose:
162
- for item in llm_response.content:
163
- if isinstance(item, Message):
164
- print(f"\n[AGENT RESPONSE]")
165
- print(f" {item.content[:200]}{'...' if len(item.content) > 200 else ''}")
166
- elif isinstance(item, ToolCall):
167
- print(f"\n[TOOL CALL] {item.name}")
168
- print(f" Arguments: {item.arguments}")
169
-
170
- # Execute tools if the LLM requested any
171
- tool_calls = [c for c in llm_response.content if isinstance(c, ToolCall)]
172
- if tool_calls:
173
- if self.verbose:
174
- print(f"\n[EXECUTING] {len(tool_calls)} tool(s)...")
175
- tool_results = await self.act(context, tool_calls)
176
- tool_event = Event(
177
- execution_id=context.execution_id,
178
- author=self.name,
179
- content=tool_results,
180
- )
181
- context.add_event(tool_event)
182
-
183
- if self.verbose:
184
- for result in tool_results:
185
- status_marker = "[SUCCESS]" if result.status == "success" else "[ERROR]"
186
- print(f" {status_marker} {result.name}: {result.status}")
187
- if result.content and len(result.content) > 0:
188
- result_preview = str(result.content[0])[:150]
189
- if len(str(result.content[0])) > 150:
190
- result_preview += "..."
191
- print(f" Result: {result_preview}")
192
- elif self.output_type and not should_enforce_output:
193
- # No tool calls but we didn't enforce output type - make one more call to get structured output
194
- if self.verbose:
195
- print(f"\n[NO TOOLS] Requesting structured output...")
196
- final_request = self._prepare_llm_request(context, enforce_output_type=True)
197
- final_response = await self.think(final_request)
198
-
199
- # Replace the last event with the structured response
200
- if context.events:
201
- context.events[-1] = Event(
202
- execution_id=context.execution_id,
203
- author=self.name,
204
- content=final_response.content,
205
- )
206
 
207
- context.increment_step()
208
 
209
- if self.verbose:
210
- print(f"[COMPLETED] Step {context.current_step}\n")
211
 
212
- async def run(
213
- self,
214
- user_input: str,
215
- context: ExecutionContext = None
216
- ) -> AgentResult:
217
- """Run the agent with user input."""
218
- # Create or reuse context
219
- if context is None:
220
- context = ExecutionContext()
221
 
222
- # Add user input as the first event
223
- user_event = Event(
224
- execution_id=context.execution_id,
225
- author="user",
226
- content=[Message(role="user", content=user_input)]
227
- )
228
- context.add_event(user_event)
229
-
230
- # Execute steps until completion or max steps reached
231
- while not context.final_result and context.current_step < self.max_steps:
232
- await self.step(context)
233
-
234
- # Check if the last event is a final response
235
- last_event = context.events[-1]
236
- if self._is_final_response(last_event):
237
- context.final_result = self._extract_final_result(last_event)
238
-
239
- return AgentResult(output=context.final_result, context=context)
240
 
241
- def _is_final_response(self, event: Event) -> bool:
242
- """Check if this event contains a final response."""
243
- has_tool_calls = any(isinstance(c, ToolCall) for c in event.content)
244
- has_tool_results = any(isinstance(c, ToolResult) for c in event.content)
245
- return not has_tool_calls and not has_tool_results
246
-
247
- def _extract_final_result(self, event: Event):
248
- """Extract the final result from an event."""
249
- for item in event.content:
250
- if isinstance(item, Message) and item.role == "assistant":
251
- content = item.content
252
-
253
- # If output_type is specified, parse as structured output
254
- if self.output_type:
255
- try:
256
- content_json = json.loads(content)
257
- return self.output_type.model_validate(content_json)
258
- except (json.JSONDecodeError, ValueError):
259
- # If parsing fails, return as string
260
- return content
261
-
262
- return content
263
- return None
 
2
 
3
  from dataclasses import dataclass
4
  from typing import List, Optional, Type
5
+ from xxlimited import Str
6
  from pydantic import BaseModel
7
+ from .tools import tool
8
  import json
9
 
10
+ from pydantic_core.core_schema import str_schema
11
  from .models import (
12
  ExecutionContext,
13
  Event,
 
34
  model: LlmClient,
35
  tools: List[BaseTool] = None,
36
  instructions: str = "",
37
+ max_steps: int = 5,
38
+ name: str = "agent",
39
+ output_type: Optional[Type[BaseModel]] = None
40
+
41
  ):
42
  self.model = model
43
  self.instructions = instructions
44
  self.max_steps = max_steps
45
+ self.name = name
46
  self.output_type = output_type
47
+ self.output_tool_name = None
48
  self.tools = self._setup_tools(tools or [])
49
 
50
  def _setup_tools(self, tools: List[BaseTool]) -> List[BaseTool]:
51
+ if self.output_type is not None:
52
+ @tool(
53
+ name="final_answer",
54
+ description="Return the final structured answer matching the required schema."
55
+ )
56
+ def final_answer(output: self.output_type) -> self.output_type:
57
+ return output
58
+
59
+ tools = list(tools) # Create a copy to avoid modifying the original
60
+ tools.append(final_answer)
61
+ self.output_tool_name = "final_answer"
62
+
63
  return tools
64
 
65
+ async def run(
66
+ self,
67
+ user_input: str,
68
+ context: ExecutionContext = None
69
+ ) -> str:
70
+ """Run the agent with user input."""
71
+ # Create or reuse context
72
+ if context is None:
73
+ context = ExecutionContext()
74
+
75
+ # Add user input as the first event
76
+ user_event = Event(
77
+ execution_id=context.execution_id,
78
+ author="user",
79
+ content=[Message(role="user", content=user_input)]
80
+ )
81
+ context.add_event(user_event)
82
+
83
+ # Execute steps until completion or max steps reached
84
+ while not context.final_result and context.current_step < self.max_steps:
85
+ await self.step(context)
86
+
87
+ # Check if the last event is a final response
88
+ last_event = context.events[-1]
89
+ if self._is_final_response(last_event):
90
+ context.final_result = self._extract_final_result(last_event)
91
+
92
+ return AgentResult(output=context.final_result, context=context)
93
+
94
+
95
+ def _is_final_response(self, event: Event) -> bool:
96
+ """Check if this event contains a final response."""
97
+ if self.output_tool_name:
98
+ # For structured output: check if final_answer tool succeeded
99
+ for item in event.content:
100
+ if (isinstance(item, ToolResult)
101
+ and item.name == self.output_tool_name
102
+ and item.status == "success"):
103
+ return True
104
+ return False
105
+ has_tool_calls = any(isinstance(c, ToolCall) for c in event.content)
106
+ has_tool_results = any(isinstance(c, ToolResult) for c in event.content)
107
+ return not has_tool_calls and not has_tool_results
108
+
109
+ def _extract_final_result(self, event: Event) -> str:
110
+ if self.output_tool_name:
111
+ # Extract structured output from final_answer tool result
112
+ for item in event.content:
113
+ if (isinstance(item, ToolResult)
114
+ and item.name == self.output_tool_name
115
+ and item.status == "success"
116
+ and item.content):
117
+ return item.content[0]
118
+ for item in event.content:
119
+ if isinstance(item, Message) and item.role == "assistant":
120
+ return item.content
121
+ return None
122
+
123
+ async def step(self, context: ExecutionContext):
124
+ """Execute one step of the agent loop."""
125
+
126
+ llm_request = self._prepare_llm_request(context)
127
+
128
+ # Get LLM's decision
129
+ llm_response = await self.think(llm_request)
130
+
131
+
132
+ # Record LLM response as an event
133
+ response_event = Event(
134
+ execution_id=context.execution_id,
135
+ author=self.name,
136
+ content=llm_response.content,
137
+ )
138
+ context.add_event(response_event)
139
+
140
+
141
+ # Execute tools if the LLM requested any
142
+ tool_calls = [c for c in llm_response.content if isinstance(c, ToolCall)]
143
+ if tool_calls:
144
+ tool_results = await self.act(context, tool_calls)
145
+ tool_event = Event(
146
+ execution_id=context.execution_id,
147
+ author=self.name,
148
+ content=tool_results,
149
+ )
150
+ context.add_event(tool_event)
151
+
152
+
153
+ context.increment_step()
154
+
155
+ def _prepare_llm_request(self, context: ExecutionContext) -> LlmRequest:
156
  """Convert execution context to LLM request.
157
 
158
  Args:
 
164
  flat_contents = []
165
  for event in context.events:
166
  flat_contents.extend(event.content)
167
+ # Determine tool choice strategy
168
+ if self.output_tool_name:
169
+ tool_choice = "required" # Force tool usage for structured output
170
+ elif self.tools:
171
+ tool_choice = "auto"
172
+ else:
173
+ tool_choice = None
174
+
175
  return LlmRequest(
176
  instructions=[self.instructions] if self.instructions else [],
177
  contents=flat_contents,
178
  tools=self.tools,
179
+ tool_choice = tool_choice
 
180
  )
 
181
  async def think(self, llm_request: LlmRequest) -> LlmResponse:
182
  """Get LLM's response/decision."""
183
+ return await self.model.generate(llm_request)
 
184
  async def act(
185
+ self,
186
+ context: ExecutionContext,
187
+ tool_calls: List[ToolCall]
188
+ ) -> List[ToolResult]:
 
189
  tools_dict = {tool.name: tool for tool in self.tools}
190
  results = []
191
 
192
  for tool_call in tool_calls:
193
  if tool_call.name not in tools_dict:
194
+ raise ValueError(f"Tool '{tool_call.name}' not found")
 
 
 
 
 
 
195
 
196
  tool = tools_dict[tool_call.name]
197
 
198
  try:
199
+ output = await tool(context, **tool_call.arguments)
200
  results.append(ToolResult(
201
  tool_call_id=tool_call.tool_call_id,
202
  name=tool_call.name,
203
  status="success",
204
+ content=[output],
205
  ))
206
  except Exception as e:
207
  results.append(ToolResult(
 
212
  ))
213
 
214
  return results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
 
 
216
 
 
 
217
 
 
 
 
 
 
 
 
 
 
218
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agent_framework/llm.py CHANGED
@@ -10,13 +10,10 @@ from .models import Message, ToolCall, ToolResult, ContentItem
10
 
11
  class LlmRequest(BaseModel):
12
  """Request object for LLM calls."""
13
- model_config = ConfigDict(arbitrary_types_allowed=True)
14
-
15
  instructions: List[str] = Field(default_factory=list)
16
  contents: List[ContentItem] = Field(default_factory=list)
17
  tools: List[Any] = Field(default_factory=list)
18
- tool_choice: Optional[str] = None
19
- response_format: Optional[Any] = None # For structured output (Pydantic models)
20
 
21
 
22
  class LlmResponse(BaseModel):
@@ -38,25 +35,17 @@ class LlmClient:
38
  try:
39
  messages = self._build_messages(request)
40
  tools = [t.tool_definition for t in request.tools] if request.tools else None
 
 
 
 
 
 
 
 
 
41
 
42
- completion_kwargs = {
43
- "model": self.model,
44
- "messages": messages,
45
- }
46
-
47
- if tools:
48
- completion_kwargs["tools"] = tools
49
- if request.tool_choice:
50
- completion_kwargs["tool_choice"] = request.tool_choice
51
-
52
- if request.response_format:
53
- completion_kwargs["response_format"] = request.response_format
54
-
55
- completion_kwargs.update(self.config)
56
-
57
- response = await acompletion(**completion_kwargs)
58
-
59
- return self._parse_response(response, request.response_format)
60
  except Exception as e:
61
  return LlmResponse(error_message=str(e))
62
 
@@ -99,35 +88,17 @@ class LlmClient:
99
 
100
  return messages
101
 
102
- def _parse_response(self, response, response_format=None) -> LlmResponse:
103
  """Convert API response to LlmResponse."""
104
  choice = response.choices[0]
105
  content_items = []
106
 
107
- # Handle structured output (Pydantic models)
108
- if response_format and choice.message.content:
109
- try:
110
- # Parse JSON and validate against Pydantic model
111
- import json
112
- content_json = json.loads(choice.message.content)
113
- structured_output = response_format.model_validate(content_json)
114
- # Store as string representation for now, will be parsed in Agent
115
- content_items.append(Message(
116
- role="assistant",
117
- content=choice.message.content
118
- ))
119
- except Exception:
120
- # Fallback to regular content if parsing fails
121
- content_items.append(Message(
122
- role="assistant",
123
- content=choice.message.content
124
- ))
125
- elif choice.message.content:
126
  content_items.append(Message(
127
  role="assistant",
128
  content=choice.message.content
129
  ))
130
-
131
  if choice.message.tool_calls:
132
  for tc in choice.message.tool_calls:
133
  content_items.append(ToolCall(
@@ -142,4 +113,4 @@ class LlmClient:
142
  "input_tokens": response.usage.prompt_tokens,
143
  "output_tokens": response.usage.completion_tokens,
144
  }
145
- )
 
10
 
11
  class LlmRequest(BaseModel):
12
  """Request object for LLM calls."""
 
 
13
  instructions: List[str] = Field(default_factory=list)
14
  contents: List[ContentItem] = Field(default_factory=list)
15
  tools: List[Any] = Field(default_factory=list)
16
+ tool_choice: Optional[str] = 'auto'
 
17
 
18
 
19
  class LlmResponse(BaseModel):
 
35
  try:
36
  messages = self._build_messages(request)
37
  tools = [t.tool_definition for t in request.tools] if request.tools else None
38
+
39
+ response = await acompletion(
40
+ model=self.model,
41
+ messages=messages,
42
+ tools=tools,
43
+ **({"tool_choice": request.tool_choice}
44
+ if request.tool_choice else {}),
45
+ **self.config
46
+ )
47
 
48
+ return self._parse_response(response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  except Exception as e:
50
  return LlmResponse(error_message=str(e))
51
 
 
88
 
89
  return messages
90
 
91
+ def _parse_response(self, response) -> LlmResponse:
92
  """Convert API response to LlmResponse."""
93
  choice = response.choices[0]
94
  content_items = []
95
 
96
+ if choice.message.content:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  content_items.append(Message(
98
  role="assistant",
99
  content=choice.message.content
100
  ))
101
+
102
  if choice.message.tool_calls:
103
  for tc in choice.message.tool_calls:
104
  content_items.append(ToolCall(
 
113
  "input_tokens": response.usage.prompt_tokens,
114
  "output_tokens": response.usage.completion_tokens,
115
  }
116
+ )
agent_framework/tools.py CHANGED
@@ -112,4 +112,4 @@ def tool(
112
 
113
  if func is not None:
114
  return decorator(func)
115
- return decorator
 
112
 
113
  if func is not None:
114
  return decorator(func)
115
+ return decorator
examples/demo.py CHANGED
@@ -1,55 +1,39 @@
1
- """Demo script showing agent usage with structured output."""
2
-
3
- import asyncio
4
- import os
5
- import sys
6
  from pathlib import Path
7
- from pydantic import BaseModel, Field
 
 
8
 
9
  # Add parent directory to path so we can import agent_framework
10
  sys.path.insert(0, str(Path(__file__).parent.parent))
11
-
12
- from agent_framework import Agent, LlmClient, display_trace
13
- from dotenv import load_dotenv
14
-
15
- load_dotenv()
16
-
17
-
18
- # Define output structure
19
- class AnswerOutput(BaseModel):
20
- """Structured output for the answer."""
21
- final_answer: str = Field(description="The final answer to the question")
22
-
23
-
24
  async def main():
25
- # Create agent with structured output and verbose mode enabled
26
- agent = Agent(
27
- model=LlmClient(model="gpt-5-mini"),
28
- tools=[],
29
- instructions="You are a helpful assistant that answers questions accurately.",
30
- output_type=AnswerOutput,
31
- verbose=True, # Enable verbose mode to see thinking process
32
- )
33
 
34
- print("Starting agent execution...")
35
- print("=" * 60)
36
-
37
- result = await agent.run(
38
- "If Eliud Kipchoge could maintain his marathon pace, "
39
- "how many thousand hours to reach the Moon?"
40
  )
 
 
 
 
 
 
 
41
 
42
- print("\n" + "=" * 60)
43
- print("FINAL RESULTS")
44
- print("=" * 60)
45
- print(f"Answer: {result.output.final_answer}")
46
- print(f"Steps taken: {result.context.current_step}")
47
- print("=" * 60)
48
-
49
- # Optionally show full trace
50
- print("\nFull Execution Trace:")
51
- display_trace(result.context)
52
-
53
-
54
  if __name__ == "__main__":
55
  asyncio.run(main())
 
 
 
 
 
 
1
  from pathlib import Path
2
+ import sys
3
+ import asyncio
4
+
5
 
6
  # Add parent directory to path so we can import agent_framework
7
  sys.path.insert(0, str(Path(__file__).parent.parent))
8
+ from agent_framework.llm import LlmClient, LlmRequest, Message
9
+
 
 
 
 
 
 
 
 
 
 
 
10
  async def main():
11
+ # Create client
12
+ client = LlmClient(model="gpt-5-mini")
 
 
 
 
 
 
13
 
14
+ # Build request
15
+ request = LlmRequest(
16
+ instructions=["You are a helpful assistant."],
17
+ contents=[Message(role="user", content="What is 2 + 2?")],
18
+ tool_choice = None
 
19
  )
20
+
21
+ # Generate response
22
+ response = await client.generate(request)
23
+ # Check for errors first!
24
+ if response.error_message:
25
+ print(f"Error: {response.error_message}")
26
+ return
27
 
28
+ # Response contains the answer
29
+ if not response.content:
30
+ print("No content in response")
31
+ return
32
+
33
+ for item in response.content:
34
+ if isinstance(item, Message):
35
+ print(item.content) # "4"
36
+ else:
37
+ print(f"Got {type(item).__name__}: {item}")
 
 
38
  if __name__ == "__main__":
39
  asyncio.run(main())
examples/gaia_evaluation.py CHANGED
@@ -4,129 +4,18 @@ import asyncio
4
  import os
5
  import sys
6
  from pathlib import Path
7
- from typing import List
8
- from pydantic import BaseModel, Field
9
 
10
  # Add parent directory to path so we can import agent_framework
11
  sys.path.insert(0, str(Path(__file__).parent.parent))
12
 
13
- from agent_framework import Agent, LlmClient, AgentResult, load_mcp_tools, display_trace
14
 
15
 
16
- # GAIA output model
17
- class GaiaOutput(BaseModel):
18
- """Structured output for GAIA benchmark responses."""
19
- is_solvable: bool = Field(description="Whether the problem can be solved with available tools")
20
- unsolvable_reason: str = Field(default="", description="Reason if problem is unsolvable")
21
- final_answer: str = Field(description="The final answer to the problem")
22
-
23
-
24
- # GAIA system prompt
25
- gaia_prompt = """
26
-
27
- You are a general AI assistant. I will ask you a question.
28
- First, determine if you can solve this problem with your current capabilities and set "is_solvable" accordingly.
29
- If you can solve it, set "is_solvable" to true and provide your answer in "final_answer".
30
- If you cannot solve it, set "is_solvable" to false and explain why in "unsolvable_reason".
31
- Your final answer should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
32
- If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
33
- If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
34
- If you are asked for a comma separated list, apply the above rules depending on whether the element is a number or a string.
35
-
36
- """
37
-
38
-
39
- def create_gaia_agent(model: str, tools: List) -> Agent:
40
- """Create an agent configured for GAIA benchmark evaluation.
41
-
42
- Args:
43
- model: LLM model name (e.g., "gpt-5", "gpt-5-mini")
44
- tools: List of tools to provide to the agent
45
-
46
- Returns:
47
- Configured Agent instance
48
- """
49
- return Agent(
50
- model=LlmClient(model=model),
51
- tools=tools,
52
- instructions=gaia_prompt,
53
- output_type=GaiaOutput,
54
- max_steps=15,
55
- )
56
-
57
-
58
- # Semaphore for rate limiting
59
- SEMAPHORE = asyncio.Semaphore(3)
60
-
61
-
62
- async def solve_problem(agent: Agent, question: str) -> AgentResult:
63
- """Solve a single GAIA problem with rate limiting.
64
-
65
- Args:
66
- agent: Configured agent instance
67
- question: Problem question to solve
68
-
69
- Returns:
70
- AgentResult with structured output
71
- """
72
- async with SEMAPHORE:
73
- return await agent.run(question)
74
-
75
-
76
- async def run_experiment(
77
- problems: List[dict],
78
- models: List[str],
79
- tools: List = None,
80
- ) -> dict:
81
- """Run GAIA evaluation experiment across multiple models.
82
-
83
- Args:
84
- problems: List of problem dictionaries with 'Question' and 'Final answer' keys
85
- models: List of model names to evaluate
86
- tools: List of tools to provide to agents
87
-
88
- Returns:
89
- Dictionary mapping model names to lists of results
90
- """
91
- tools = tools or []
92
- results = {model: [] for model in models}
93
-
94
- tasks = []
95
- for problem in problems:
96
- for model in models:
97
- agent = create_gaia_agent(model, tools)
98
- task = solve_problem(agent, problem.get("Question", problem.get("question", "")))
99
- tasks.append((model, problem, task))
100
-
101
- # Execute all tasks
102
- task_results = await asyncio.gather(*[task for _, _, task in tasks], return_exceptions=True)
103
-
104
- # Organize results
105
- for (model, problem, _), result in zip(tasks, task_results):
106
- if isinstance(result, Exception):
107
- results[model].append({
108
- "task_id": problem.get("task_id", problem.get("id", "")),
109
- "model": model,
110
- "error": str(result),
111
- })
112
- else:
113
- output = result.output if isinstance(result.output, GaiaOutput) else None
114
- results[model].append({
115
- "task_id": problem.get("task_id", problem.get("id", "")),
116
- "model": model,
117
- "is_solvable": output.is_solvable if output else None,
118
- "final_answer": output.final_answer if output else None,
119
- "unsolvable_reason": output.unsolvable_reason if output else None,
120
- "correct": (
121
- output.final_answer.strip().lower() == problem.get("Final answer", "").strip().lower()
122
- if output and "Final answer" in problem
123
- else None
124
- ),
125
- "steps": result.context.current_step,
126
- })
127
-
128
- return results
129
-
130
 
131
  async def main():
132
  """Example usage of GAIA evaluation."""
@@ -139,22 +28,41 @@ async def main():
139
 
140
  mcp_tools = await load_mcp_tools(tavily_connection)
141
 
142
- # Create agent
143
- agent = create_gaia_agent("gpt-5-mini", mcp_tools)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
 
145
  # Solve a problem
146
  result = await agent.run(
147
- "If Eliud Kipchoge could maintain his marathon pace, "
148
- "how many thousand hours to reach the Moon?"
149
  )
150
 
151
- if isinstance(result.output, GaiaOutput):
152
- print(f"Answer: {result.output.final_answer}")
153
- print(f"Solvable: {result.output.is_solvable}")
154
- print(f"Steps: {result.context.current_step}")
155
- else:
156
- print(f"Answer: {result.output}")
157
- print(f"Steps: {result.context.current_step}")
158
 
159
  # Display execution trace
160
  display_trace(result.context)
 
4
  import os
5
  import sys
6
  from pathlib import Path
 
 
7
 
8
  # Add parent directory to path so we can import agent_framework
9
  sys.path.insert(0, str(Path(__file__).parent.parent))
10
 
11
+ from agent_framework import Agent, LlmClient, load_mcp_tools, display_trace, tool
12
 
13
 
14
+ # Calculator tool
15
+ @tool
16
+ def calculator(expression: str) -> float:
17
+ """Calculate mathematical expressions. Supports basic math operations like +, -, *, /, **, etc."""
18
+ return eval(expression)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  async def main():
21
  """Example usage of GAIA evaluation."""
 
28
 
29
  mcp_tools = await load_mcp_tools(tavily_connection)
30
 
31
+ # Combine all tools: calculator (already wrapped by @tool decorator) + MCP tools
32
+ all_tools = [calculator] + mcp_tools
33
+
34
+ # Show available tools
35
+ print(f"\n{'='*60}")
36
+ print(f"Available Tools: {len(all_tools)}")
37
+ print(f"{'='*60}")
38
+ for i, tool_obj in enumerate(all_tools, 1):
39
+ print(f"{i}. {tool_obj.name}")
40
+ if hasattr(tool_obj, 'description'):
41
+ desc = tool_obj.description[:80] + "..." if len(tool_obj.description) > 80 else tool_obj.description
42
+ print(f" {desc}")
43
+ print(f"{'='*60}\n")
44
+
45
+ # Create agent with instructions to use web search
46
+ agent = Agent(
47
+ model=LlmClient(model="gpt-5-mini"),
48
+ tools=all_tools,
49
+ instructions="""You are a helpful assistant. You have access to tools.
50
+
51
+ Do NOT rely solely on your training data. Use the tools when necessary to present accurate information.
52
+ Instead of assumptions, use websearch for the questions you don't know exact answer to
53
+ """,
54
+ max_steps=10,
55
+ )
56
 
57
  # Solve a problem
58
  result = await agent.run(
59
+ 'If A is usain bolt\'s world record in 100 meters, B is usain bolt\'s fastest time in 200 meters, what is A x B ?'
 
60
  )
61
 
62
+ print(f"\n{'='*60}")
63
+ print(f"Final Answer: {result.output}")
64
+ print(f"Steps: {result.context.current_step}")
65
+ print(f"{'='*60}\n")
 
 
 
66
 
67
  # Display execution trace
68
  display_trace(result.context)
CODE_TRACE_OUTLINE.md → misc/CODE_TRACE_OUTLINE.md RENAMED
File without changes
LANGCHAIN_COMPARISON.md → misc/LANGCHAIN_COMPARISON.md RENAMED
File without changes
example.py → misc/example.py RENAMED
File without changes
notebook_example.ipynb → misc/notebook_example.ipynb RENAMED
@@ -73,6 +73,129 @@
73
  "print(f\"Confidence: {result.output.confidence}\") # 0.92\n",
74
  "print(f\"Key phrases: {result.output.key_phrases}\") # [\"exceeded expectations\", \"highly recommend\"]\n"
75
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  }
77
  ],
78
  "metadata": {
 
73
  "print(f\"Confidence: {result.output.confidence}\") # 0.92\n",
74
  "print(f\"Key phrases: {result.output.key_phrases}\") # [\"exceeded expectations\", \"highly recommend\"]\n"
75
  ]
76
+ },
77
+ {
78
+ "cell_type": "code",
79
+ "execution_count": null,
80
+ "metadata": {},
81
+ "outputs": [],
82
+ "source": [
83
+ "'''\n",
84
+ "To implement agent, we need tools, execution context, \n",
85
+ "instructions(system prompt that defines agent behavior) and an llm. \n",
86
+ "\n",
87
+ "Event: It is a record of who did what ? like was it user request, \n",
88
+ "or llm requested tool call, or did we get a result back from the tool etc., \n",
89
+ "\n",
90
+ "'''\n",
91
+ "\n",
92
+ "from anyio import Event\n",
93
+ "from agent_framework import ExecutionContext, Message\n",
94
+ "from agent_framework.agent import AgentResult\n",
95
+ "\n",
96
+ "class Agent: ## does this inherit from anything ? \n",
97
+ " def init(self, tools, executionContext, llmClient, instructions, maxSteps, verbose, name = \"agent\"):\n",
98
+ " self.tools = self._setup_tools(tools or [])\n",
99
+ " self.executionContext = executionContext\n",
100
+ " self.llmClient = llmClient\n",
101
+ " self.instructions = instructions\n",
102
+ " self.maxSteps = maxSteps\n",
103
+ " self.verbose = verbose\n",
104
+ " self.name = name\n",
105
+ "\n",
106
+ " ## step 1 is to setup tools\n",
107
+ "\n",
108
+ " def _setup_tools(self, tools):\n",
109
+ " return tools\n",
110
+ "\n",
111
+ " ## step 2 is to define entry point for users.(run method)\n",
112
+ "\n",
113
+ " async def run(self, user_input, context):\n",
114
+ "\n",
115
+ " ## check if there is any previous context, else create\n",
116
+ "\n",
117
+ " if context is None:\n",
118
+ " context = ExecutionContext()\n",
119
+ "\n",
120
+ " ## add the user_event to the event\n",
121
+ " user_event = Event(\n",
122
+ " execution_id = context.execution_id,\n",
123
+ " author = 'user',\n",
124
+ " content = [Message(role = 'user', content = user_input)]\n",
125
+ " )\n",
126
+ " ## add the event to context\n",
127
+ " context.add_event(user_event)\n",
128
+ "\n",
129
+ " ## if agent doesnt reach final result or max steps, keep performing\n",
130
+ " while not context.final_result and context.current_step < self.max_steps:\n",
131
+ " ## each step is a think-act cycle\n",
132
+ " await self.step(context)\n",
133
+ "\n",
134
+ " ## check if newly performed action is final\n",
135
+ " last_event = context.events[-1]\n",
136
+ "\n",
137
+ " # If it is final, then extract the last event and sent it to \n",
138
+ " # Agent result along with the context\n",
139
+ " if self._is_final_response(last_event):\n",
140
+ " context.final_result = self._extract_final_result(last_event)\n",
141
+ "\n",
142
+ " return AgentResult(context.final_result, context = context)\n",
143
+ "\n",
144
+ " # step 3 prepare for llm request\n",
145
+ "\n",
146
+ " def _prepare_llm_request(self, context):\n",
147
+ " \n",
148
+ " #flatten all the events (why ?)\n",
149
+ " flat_contents = []\n",
150
+ " for event in context.events:\n",
151
+ " flat_contents.extend(event.content)\n",
152
+ "\n",
153
+ " ## with this context, call llm\n",
154
+ " return LlmRequest(\n",
155
+ " instructions=[self.instructions] if self.instructions else [],\n",
156
+ " contents=flat_contents,\n",
157
+ " tools=self.tools,\n",
158
+ " tool_choice=\"auto\" if self.tools else None,\n",
159
+ " )\n",
160
+ "\n",
161
+ " async def step(self, context):\n",
162
+ " \n",
163
+ " ## write a method for this\n",
164
+ " llm_request = self._prepare_llm_request(context)\n",
165
+ "\n",
166
+ " # Get LLM's decision\n",
167
+ " llm_response = await self.think(llm_request)\n",
168
+ "\n",
169
+ " response_event = Event(\n",
170
+ " execution_id=context.execution_id,\n",
171
+ " author=self.name,\n",
172
+ " content=llm_response.content,\n",
173
+ " )\n",
174
+ "\n",
175
+ " async def think(self, llm_request):\n",
176
+ " \"\"\"Get LLM's response/decision.\"\"\"\n",
177
+ " return await self.model.generate(llm_request)\n",
178
+ "\n",
179
+ "\n",
180
+ "\n",
181
+ "\n",
182
+ "\n",
183
+ "\n",
184
+ "\n",
185
+ "\n",
186
+ "\n",
187
+ "\n",
188
+ "\n"
189
+ ]
190
+ },
191
+ {
192
+ "cell_type": "markdown",
193
+ "metadata": {},
194
+ "source": [
195
+ "Q. LLM Request? \n",
196
+ "\n",
197
+ "A. It goes from our agent to LLM call. before sending it, we bundle it with necessary context , prompt and tools."
198
+ ]
199
  }
200
  ],
201
  "metadata": {
tavily_mcp_server.py → misc/tavily_mcp_server.py RENAMED
File without changes