yoshizen commited on
Commit
162ee47
·
verified ·
1 Parent(s): 6c40844

Upload 4 files

Browse files
Files changed (4) hide show
  1. agent.py +757 -0
  2. enhanced_agent.py +411 -0
  3. memory_system.py +462 -0
  4. reasoning_system.py +668 -0
agent.py ADDED
@@ -0,0 +1,757 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ GAIA-Ready AI Agent using smolagents framework
3
+
4
+ This agent is designed to meet the requirements of the Hugging Face Agents Course
5
+ and perform well on the GAIA benchmark. It implements the Think-Act-Observe workflow
6
+ and includes tools for web search, calculation, image analysis, and code execution.
7
+ """
8
+
9
+ import os
10
+ import json
11
+ import base64
12
+ import requests
13
+ from typing import List, Dict, Any, Optional, Union, Callable
14
+ import re
15
+ import time
16
+ from datetime import datetime
17
+ import traceback
18
+
19
+ # Install required packages if not already installed
20
+ try:
21
+ from smolagents import Agent, InferenceClientModel, Tool
22
+ from smolagents.memory import Memory
23
+ except ImportError:
24
+ import subprocess
25
+ subprocess.check_call(["pip", "install", "smolagents"])
26
+ from smolagents import Agent, InferenceClientModel, Tool
27
+ from smolagents.memory import Memory
28
+
29
+ try:
30
+ import numpy as np
31
+ import matplotlib.pyplot as plt
32
+ from PIL import Image
33
+ import io
34
+ except ImportError:
35
+ import subprocess
36
+ subprocess.check_call(["pip", "install", "numpy", "matplotlib", "pillow"])
37
+ import numpy as np
38
+ import matplotlib.pyplot as plt
39
+ from PIL import Image
40
+ import io
41
+
42
+ try:
43
+ import requests
44
+ from bs4 import BeautifulSoup
45
+ except ImportError:
46
+ import subprocess
47
+ subprocess.check_call(["pip", "install", "requests", "beautifulsoup4"])
48
+ import requests
49
+ from bs4 import BeautifulSoup
50
+
51
+
52
+ class MemoryManager:
53
+ """
54
+ Custom memory manager for the agent that maintains short-term, long-term,
55
+ and working memory.
56
+ """
57
+ def __init__(self):
58
+ self.short_term_memory = [] # Current conversation context
59
+ self.long_term_memory = [] # Key facts and results
60
+ self.working_memory = {} # Temporary storage for complex tasks
61
+ self.max_short_term_items = 10
62
+ self.max_long_term_items = 50
63
+
64
+ def add_to_short_term(self, item: Dict[str, Any]) -> None:
65
+ """Add an item to short-term memory, maintaining size limit"""
66
+ self.short_term_memory.append(item)
67
+ if len(self.short_term_memory) > self.max_short_term_items:
68
+ self.short_term_memory.pop(0)
69
+
70
+ def add_to_long_term(self, item: Dict[str, Any]) -> None:
71
+ """Add an important item to long-term memory, maintaining size limit"""
72
+ self.long_term_memory.append(item)
73
+ if len(self.long_term_memory) > self.max_long_term_items:
74
+ self.long_term_memory.pop(0)
75
+
76
+ def store_in_working_memory(self, key: str, value: Any) -> None:
77
+ """Store a value in working memory under the specified key"""
78
+ self.working_memory[key] = value
79
+
80
+ def get_from_working_memory(self, key: str) -> Optional[Any]:
81
+ """Retrieve a value from working memory by key"""
82
+ return self.working_memory.get(key)
83
+
84
+ def clear_working_memory(self) -> None:
85
+ """Clear the working memory"""
86
+ self.working_memory = {}
87
+
88
+ def get_relevant_memories(self, query: str) -> List[Dict[str, Any]]:
89
+ """
90
+ Retrieve memories relevant to the current query
91
+ Simple implementation using keyword matching
92
+ """
93
+ relevant_memories = []
94
+ query_keywords = set(query.lower().split())
95
+
96
+ # Check long-term memory first
97
+ for memory in self.long_term_memory:
98
+ memory_text = memory.get("content", "").lower()
99
+ if any(keyword in memory_text for keyword in query_keywords):
100
+ relevant_memories.append(memory)
101
+
102
+ # Then check short-term memory
103
+ for memory in self.short_term_memory:
104
+ memory_text = memory.get("content", "").lower()
105
+ if any(keyword in memory_text for keyword in query_keywords):
106
+ relevant_memories.append(memory)
107
+
108
+ return relevant_memories
109
+
110
+ def get_memory_summary(self) -> str:
111
+ """Get a summary of the current memory state for the agent"""
112
+ short_term_summary = "\n".join([f"- {m.get('content', '')}" for m in self.short_term_memory[-5:]])
113
+ long_term_summary = "\n".join([f"- {m.get('content', '')}" for m in self.long_term_memory[-5:]])
114
+ working_memory_summary = "\n".join([f"- {k}: {v}" for k, v in self.working_memory.items()])
115
+
116
+ return f"""
117
+ MEMORY SUMMARY:
118
+ --------------
119
+ Recent Short-Term Memory:
120
+ {short_term_summary}
121
+
122
+ Important Long-Term Memory:
123
+ {long_term_summary}
124
+
125
+ Working Memory:
126
+ {working_memory_summary}
127
+ """
128
+
129
+
130
+ # Tool implementations
131
+
132
+ def web_search_function(query: str) -> str:
133
+ """
134
+ Search the web for information using a search API
135
+
136
+ Args:
137
+ query: The search query
138
+
139
+ Returns:
140
+ Search results as a string
141
+ """
142
+ try:
143
+ # Using a public search API (replace with your preferred API)
144
+ url = f"https://ddg-api.herokuapp.com/search?query={query}"
145
+ response = requests.get(url)
146
+
147
+ if response.status_code == 200:
148
+ results = response.json()
149
+ formatted_results = []
150
+
151
+ for i, result in enumerate(results[:5]): # Limit to top 5 results
152
+ title = result.get('title', 'No title')
153
+ snippet = result.get('snippet', 'No snippet')
154
+ link = result.get('link', 'No link')
155
+ formatted_results.append(f"{i+1}. {title}\n {snippet}\n URL: {link}\n")
156
+
157
+ return "Search Results:\n" + "\n".join(formatted_results)
158
+ else:
159
+ return f"Error: Search request failed with status code {response.status_code}"
160
+ except Exception as e:
161
+ return f"Error performing web search: {str(e)}"
162
+
163
+
164
+ def web_page_content_function(url: str) -> str:
165
+ """
166
+ Fetch and extract content from a web page
167
+
168
+ Args:
169
+ url: The URL of the web page to fetch
170
+
171
+ Returns:
172
+ Extracted content as a string
173
+ """
174
+ try:
175
+ headers = {
176
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
177
+ }
178
+ response = requests.get(url, headers=headers)
179
+
180
+ if response.status_code == 200:
181
+ soup = BeautifulSoup(response.text, 'html.parser')
182
+
183
+ # Remove script and style elements
184
+ for script in soup(["script", "style"]):
185
+ script.extract()
186
+
187
+ # Extract text
188
+ text = soup.get_text()
189
+
190
+ # Clean up text
191
+ lines = (line.strip() for line in text.splitlines())
192
+ chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
193
+ text = '\n'.join(chunk for chunk in chunks if chunk)
194
+
195
+ # Limit length to avoid overwhelming the model
196
+ if len(text) > 4000:
197
+ text = text[:4000] + "...\n[Content truncated due to length]"
198
+
199
+ return f"Content from {url}:\n\n{text}"
200
+ else:
201
+ return f"Error: Failed to fetch web page with status code {response.status_code}"
202
+ except Exception as e:
203
+ return f"Error fetching web page content: {str(e)}"
204
+
205
+
206
+ def calculator_function(expression: str) -> str:
207
+ """
208
+ Evaluate a mathematical expression
209
+
210
+ Args:
211
+ expression: The mathematical expression to evaluate
212
+
213
+ Returns:
214
+ Result of the calculation as a string
215
+ """
216
+ try:
217
+ # Clean the expression to ensure it's safe to evaluate
218
+ # Remove any characters that aren't digits, operators, or parentheses
219
+ clean_expr = re.sub(r'[^0-9+\-*/().^ ]', '', expression)
220
+
221
+ # Replace ^ with ** for exponentiation
222
+ clean_expr = clean_expr.replace('^', '**')
223
+
224
+ # Evaluate the expression
225
+ result = eval(clean_expr)
226
+
227
+ return f"Expression: {expression}\nResult: {result}"
228
+ except Exception as e:
229
+ return f"Error calculating result: {str(e)}"
230
+
231
+
232
+ def python_executor_function(code: str) -> str:
233
+ """
234
+ Execute Python code and return the result
235
+
236
+ Args:
237
+ code: The Python code to execute
238
+
239
+ Returns:
240
+ Output of the code execution as a string
241
+ """
242
+ try:
243
+ # Create a string buffer to capture output
244
+ from io import StringIO
245
+ import sys
246
+
247
+ old_stdout = sys.stdout
248
+ redirected_output = StringIO()
249
+ sys.stdout = redirected_output
250
+
251
+ # Execute the code
252
+ exec_globals = {
253
+ "np": np,
254
+ "plt": plt,
255
+ "requests": requests,
256
+ "BeautifulSoup": BeautifulSoup,
257
+ "Image": Image,
258
+ "io": io,
259
+ "json": json,
260
+ "base64": base64,
261
+ "re": re,
262
+ "time": time,
263
+ "datetime": datetime
264
+ }
265
+
266
+ exec(code, exec_globals)
267
+
268
+ # Restore stdout and get the output
269
+ sys.stdout = old_stdout
270
+ output = redirected_output.getvalue()
271
+
272
+ return f"Code executed successfully:\n\n{output}"
273
+ except Exception as e:
274
+ return f"Error executing Python code: {str(e)}\n{traceback.format_exc()}"
275
+
276
+
277
+ def image_analyzer_function(image_url: str) -> str:
278
+ """
279
+ Analyze an image and provide a description
280
+
281
+ Args:
282
+ image_url: URL of the image to analyze
283
+
284
+ Returns:
285
+ Description of the image as a string
286
+ """
287
+ try:
288
+ # Fetch the image
289
+ response = requests.get(image_url)
290
+
291
+ if response.status_code == 200:
292
+ # Convert to base64 for inclusion in the response
293
+ image_data = base64.b64encode(response.content).decode('utf-8')
294
+
295
+ # In a real implementation, you would use a vision model here
296
+ # For now, we'll return a placeholder response
297
+ return f"""
298
+ Image Analysis:
299
+ - Successfully retrieved image from {image_url}
300
+ - Image size: {len(response.content)} bytes
301
+
302
+ [Note: In a production environment, this would use a vision model to analyze the image content]
303
+
304
+ To properly analyze this image, please describe what you see in the image.
305
+ """
306
+ else:
307
+ return f"Error: Failed to fetch image with status code {response.status_code}"
308
+ except Exception as e:
309
+ return f"Error analyzing image: {str(e)}"
310
+
311
+
312
+ def text_processor_function(text: str, operation: str) -> str:
313
+ """
314
+ Process and analyze text
315
+
316
+ Args:
317
+ text: The text to process
318
+ operation: The operation to perform (summarize, analyze_sentiment, extract_keywords)
319
+
320
+ Returns:
321
+ Processed text as a string
322
+ """
323
+ try:
324
+ if operation == "summarize":
325
+ # Simple extractive summarization
326
+ sentences = text.split('. ')
327
+ if len(sentences) <= 3:
328
+ return f"Summary: {text}"
329
+
330
+ # Take first and last sentences, plus one from the middle
331
+ summary = f"{sentences[0]}. {sentences[len(sentences)//2]}. {sentences[-1]}"
332
+ return f"Summary: {summary}"
333
+
334
+ elif operation == "analyze_sentiment":
335
+ # Very simple sentiment analysis
336
+ positive_words = ['good', 'great', 'excellent', 'positive', 'happy', 'love', 'like']
337
+ negative_words = ['bad', 'poor', 'negative', 'unhappy', 'hate', 'dislike']
338
+
339
+ text_lower = text.lower()
340
+ positive_count = sum(1 for word in positive_words if word in text_lower)
341
+ negative_count = sum(1 for word in negative_words if word in text_lower)
342
+
343
+ if positive_count > negative_count:
344
+ sentiment = "positive"
345
+ elif negative_count > positive_count:
346
+ sentiment = "negative"
347
+ else:
348
+ sentiment = "neutral"
349
+
350
+ return f"Sentiment Analysis: {sentiment} (positive words: {positive_count}, negative words: {negative_count})"
351
+
352
+ elif operation == "extract_keywords":
353
+ # Simple keyword extraction
354
+ import re
355
+ from collections import Counter
356
+
357
+ # Remove punctuation and convert to lowercase
358
+ text_clean = re.sub(r'[^\w\s]', '', text.lower())
359
+
360
+ # Remove common stop words
361
+ stop_words = ['the', 'a', 'an', 'and', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by']
362
+ words = [word for word in text_clean.split() if word not in stop_words and len(word) > 2]
363
+
364
+ # Count word frequencies
365
+ word_counts = Counter(words)
366
+
367
+ # Get top 10 keywords
368
+ keywords = [word for word, count in word_counts.most_common(10)]
369
+
370
+ return f"Keywords: {', '.join(keywords)}"
371
+ else:
372
+ return f"Error: Unknown operation '{operation}'. Supported operations: summarize, analyze_sentiment, extract_keywords"
373
+ except Exception as e:
374
+ return f"Error processing text: {str(e)}"
375
+
376
+
377
+ def file_manager_function(operation: str, filename: str, content: str = None) -> str:
378
+ """
379
+ Save and load data from files
380
+
381
+ Args:
382
+ operation: The operation to perform (save, load)
383
+ filename: The name of the file
384
+ content: The content to save (for save operation)
385
+
386
+ Returns:
387
+ Result of the operation as a string
388
+ """
389
+ try:
390
+ if operation == "save":
391
+ if content is None:
392
+ return "Error: Content is required for save operation"
393
+
394
+ with open(filename, 'w') as f:
395
+ f.write(content)
396
+
397
+ return f"Successfully saved content to {filename}"
398
+
399
+ elif operation == "load":
400
+ if not os.path.exists(filename):
401
+ return f"Error: File {filename} does not exist"
402
+
403
+ with open(filename, 'r') as f:
404
+ content = f.read()
405
+
406
+ return f"Content of {filename}:\n\n{content}"
407
+ else:
408
+ return f"Error: Unknown operation '{operation}'. Supported operations: save, load"
409
+ except Exception as e:
410
+ return f"Error managing file: {str(e)}"
411
+
412
+
413
+ class GAIAAgent:
414
+ """
415
+ AI Agent designed to perform well on the GAIA benchmark
416
+ Implements the Think-Act-Observe workflow
417
+ """
418
+ def __init__(self, api_key=None, use_local_model=False):
419
+ self.memory_manager = MemoryManager()
420
+
421
+ # Initialize the LLM model
422
+ if use_local_model:
423
+ # Use Ollama for local model
424
+ try:
425
+ from smolagents import LiteLLMModel
426
+ self.model = LiteLLMModel(
427
+ model_id="ollama_chat/qwen2:7b",
428
+ api_base="http://127.0.0.1:11434",
429
+ num_ctx=8192,
430
+ )
431
+ except Exception as e:
432
+ print(f"Error initializing local model: {str(e)}")
433
+ print("Falling back to Hugging Face Inference API")
434
+ self.model = InferenceClientModel(
435
+ model_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
436
+ api_key=api_key or os.environ.get("HF_API_KEY", "")
437
+ )
438
+ else:
439
+ # Use Hugging Face Inference API
440
+ self.model = InferenceClientModel(
441
+ model_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
442
+ api_key=api_key or os.environ.get("HF_API_KEY", "")
443
+ )
444
+
445
+ # Define tools
446
+ self.tools = [
447
+ Tool(
448
+ name="web_search",
449
+ description="Search the web for information",
450
+ function=web_search_function
451
+ ),
452
+ Tool(
453
+ name="web_page_content",
454
+ description="Fetch and extract content from a web page",
455
+ function=web_page_content_function
456
+ ),
457
+ Tool(
458
+ name="calculator",
459
+ description="Perform mathematical calculations",
460
+ function=calculator_function
461
+ ),
462
+ Tool(
463
+ name="image_analyzer",
464
+ description="Analyze image content",
465
+ function=image_analyzer_function
466
+ ),
467
+ Tool(
468
+ name="python_executor",
469
+ description="Execute Python code",
470
+ function=python_executor_function
471
+ ),
472
+ Tool(
473
+ name="text_processor",
474
+ description="Process and analyze text",
475
+ function=text_processor_function
476
+ ),
477
+ Tool(
478
+ name="file_manager",
479
+ description="Save and load data from files",
480
+ function=file_manager_function
481
+ )
482
+ ]
483
+
484
+ # System prompt
485
+ self.system_prompt = """
486
+ You are an advanced AI assistant designed to solve complex tasks from the GAIA benchmark.
487
+ You have access to various tools that can help you solve these tasks.
488
+
489
+ Always follow the Think-Act-Observe workflow:
490
+ 1. Think: Carefully analyze the task and plan your approach
491
+ 2. Act: Use appropriate tools to gather information or perform actions
492
+ 3. Observe: Analyze the results of your actions and adjust your approach if needed
493
+
494
+ For complex tasks, break them down into smaller steps.
495
+ Always verify your answers before submitting them.
496
+
497
+ When using tools:
498
+ - web_search: Use to find information online
499
+ - web_page_content: Use to extract content from specific web pages
500
+ - calculator: Use for mathematical calculations
501
+ - image_analyzer: Use to analyze image content
502
+ - python_executor: Use to run Python code for complex operations
503
+ - text_processor: Use to process and analyze text (summarize, analyze_sentiment, extract_keywords)
504
+ - file_manager: Use to save and load data from files (save, load)
505
+
506
+ Be thorough, methodical, and precise in your reasoning.
507
+ """
508
+
509
+ # Initialize the agent
510
+ self.agent = Agent(
511
+ model=self.model,
512
+ tools=self.tools,
513
+ system_prompt=self.system_prompt
514
+ )
515
+
516
+ def think(self, query):
517
+ """
518
+ Analyze the task and plan an approach
519
+
520
+ Args:
521
+ query: The user's query or task
522
+
523
+ Returns:
524
+ Dictionary containing analysis and plan
525
+ """
526
+ # Retrieve relevant memories
527
+ relevant_memories = self.memory_manager.get_relevant_memories(query)
528
+
529
+ # Construct a thinking prompt
530
+ thinking_prompt = f"""
531
+ TASK: {query}
532
+
533
+ RELEVANT MEMORIES:
534
+ {relevant_memories if relevant_memories else "No relevant memories found."}
535
+
536
+ Please analyze this task and create a plan:
537
+ 1. What is this task asking for?
538
+ 2. What information do I need to solve it?
539
+ 3. What tools would be most helpful?
540
+ 4. What steps should I take to solve it?
541
+
542
+ Provide your analysis and plan.
543
+ """
544
+
545
+ # Use the agent to generate a plan
546
+ response = self.agent.chat(thinking_prompt)
547
+
548
+ # Store the thinking in memory
549
+ self.memory_manager.add_to_short_term({
550
+ "type": "thinking",
551
+ "content": response,
552
+ "timestamp": datetime.now().isoformat()
553
+ })
554
+
555
+ # Extract plan components (in a real implementation, this would be more structured)
556
+ return {
557
+ "analysis": response,
558
+ "plan": response # For now, we're using the full response as the plan
559
+ }
560
+
561
+ def act(self, plan, query):
562
+ """
563
+ Execute actions based on the plan
564
+
565
+ Args:
566
+ plan: The plan generated by the think step
567
+ query: The original query
568
+
569
+ Returns:
570
+ Results of the actions
571
+ """
572
+ # Use the agent to determine which tools to use based on the plan
573
+ tool_selection_prompt = f"""
574
+ TASK: {query}
575
+
576
+ MY PLAN:
577
+ {plan['plan']}
578
+
579
+ Based on this plan, which tool should I use first and with what parameters?
580
+ Respond in the following format:
581
+ TOOL: [tool name]
582
+ PARAMETERS: [parameters for the tool]
583
+ REASONING: [why this tool is appropriate]
584
+ """
585
+
586
+ tool_selection = self.agent.chat(tool_selection_prompt)
587
+
588
+ # Store the tool selection in memory
589
+ self.memory_manager.add_to_short_term({
590
+ "type": "tool_selection",
591
+ "content": tool_selection,
592
+ "timestamp": datetime.now().isoformat()
593
+ })
594
+
595
+ # Execute the selected tool (in a real implementation, this would parse the tool selection more robustly)
596
+ # For now, we'll use the agent's built-in tool execution
597
+ action_prompt = f"""
598
+ TASK: {query}
599
+
600
+ MY PLAN:
601
+ {plan['plan']}
602
+
603
+ TOOL SELECTION:
604
+ {tool_selection}
605
+
606
+ Please execute the appropriate tool to help solve this task.
607
+ """
608
+
609
+ action_result = self.agent.chat(action_prompt)
610
+
611
+ # Store the action result in memory
612
+ self.memory_manager.add_to_short_term({
613
+ "type": "action_result",
614
+ "content": action_result,
615
+ "timestamp": datetime.now().isoformat()
616
+ })
617
+
618
+ return action_result
619
+
620
+ def observe(self, action_result, plan, query):
621
+ """
622
+ Analyze the results of actions and determine next steps
623
+
624
+ Args:
625
+ action_result: Results from the act step
626
+ plan: The original plan
627
+ query: The original query
628
+
629
+ Returns:
630
+ Observation and next steps
631
+ """
632
+ observation_prompt = f"""
633
+ TASK: {query}
634
+
635
+ MY PLAN:
636
+ {plan['plan']}
637
+
638
+ ACTION RESULT:
639
+ {action_result}
640
+
641
+ Please analyze these results:
642
+ 1. What did I learn from this action?
643
+ 2. Does this fully answer the original task?
644
+ 3. If not, what should I do next?
645
+ 4. If yes, what is the final answer?
646
+
647
+ Provide your analysis and next steps or final answer.
648
+ """
649
+
650
+ observation = self.agent.chat(observation_prompt)
651
+
652
+ # Store the observation in memory
653
+ self.memory_manager.add_to_short_term({
654
+ "type": "observation",
655
+ "content": observation,
656
+ "timestamp": datetime.now().isoformat()
657
+ })
658
+
659
+ # Check if we need to continue with more actions
660
+ if "next steps" in observation.lower() or "next tool" in observation.lower():
661
+ continue_execution = True
662
+ else:
663
+ # If it seems like we have a final answer, store it in long-term memory
664
+ self.memory_manager.add_to_long_term({
665
+ "type": "final_answer",
666
+ "query": query,
667
+ "content": observation,
668
+ "timestamp": datetime.now().isoformat()
669
+ })
670
+ continue_execution = False
671
+
672
+ return {
673
+ "observation": observation,
674
+ "continue": continue_execution
675
+ }
676
+
677
+ def solve(self, query, max_iterations=5):
678
+ """
679
+ Solve a task using the Think-Act-Observe workflow
680
+
681
+ Args:
682
+ query: The user's query or task
683
+ max_iterations: Maximum number of iterations to prevent infinite loops
684
+
685
+ Returns:
686
+ Final answer to the query
687
+ """
688
+ # Store the query in memory
689
+ self.memory_manager.add_to_short_term({
690
+ "type": "query",
691
+ "content": query,
692
+ "timestamp": datetime.now().isoformat()
693
+ })
694
+
695
+ # Initialize the workflow
696
+ iteration = 0
697
+ final_answer = None
698
+
699
+ while iteration < max_iterations:
700
+ print(f"Iteration {iteration + 1}/{max_iterations}")
701
+
702
+ # Think
703
+ print("Thinking...")
704
+ plan = self.think(query)
705
+
706
+ # Act
707
+ print("Acting...")
708
+ action_result = self.act(plan, query)
709
+
710
+ # Observe
711
+ print("Observing...")
712
+ observation = self.observe(action_result, plan, query)
713
+
714
+ # Check if we have a final answer
715
+ if not observation["continue"]:
716
+ final_answer = observation["observation"]
717
+ break
718
+
719
+ # Update the query with the observation for the next iteration
720
+ query = f"""
721
+ Original task: {query}
722
+
723
+ Progress so far:
724
+ {observation["observation"]}
725
+
726
+ Please continue solving this task.
727
+ """
728
+
729
+ iteration += 1
730
+
731
+ # If we reached max iterations without a final answer
732
+ if final_answer is None:
733
+ final_answer = f"""
734
+ I've spent {max_iterations} iterations trying to solve this task.
735
+ Here's my best answer based on what I've learned:
736
+
737
+ {observation["observation"]}
738
+
739
+ Note: This answer may be incomplete as I reached the maximum number of iterations.
740
+ """
741
+
742
+ return final_answer
743
+
744
+
745
+ # Example usage
746
+ if __name__ == "__main__":
747
+ # Initialize the agent
748
+ agent = GAIAAgent(use_local_model=False)
749
+
750
+ # Example GAIA-style query
751
+ query = "What is the capital of France and what is its population? Also, calculate 15% of this population."
752
+
753
+ # Solve the query
754
+ answer = agent.solve(query)
755
+
756
+ print("\nFinal Answer:")
757
+ print(answer)
enhanced_agent.py ADDED
@@ -0,0 +1,411 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Enhanced GAIA-Ready AI Agent with integrated memory and reasoning systems
3
+
4
+ This is the main integration file that combines the agent, memory system,
5
+ and reasoning system into a complete solution for the Hugging Face Agents Course.
6
+ """
7
+
8
+ import os
9
+ import sys
10
+ import json
11
+ import traceback
12
+ from typing import List, Dict, Any, Optional, Union
13
+ from datetime import datetime
14
+
15
+ # Import the memory and reasoning systems
16
+ try:
17
+ from memory_system import EnhancedMemoryManager
18
+ from reasoning_system import ReasoningSystem
19
+ except ImportError:
20
+ print("Error: Could not import memory_system or reasoning_system modules.")
21
+ print("Make sure memory_system.py and reasoning_system.py are in the same directory.")
22
+ sys.exit(1)
23
+
24
+ # Import smolagents
25
+ try:
26
+ from smolagents import Agent, InferenceClientModel, Tool, LiteLLMModel
27
+ except ImportError:
28
+ import subprocess
29
+ subprocess.check_call(["pip", "install", "smolagents"])
30
+ from smolagents import Agent, InferenceClientModel, Tool
31
+ try:
32
+ from smolagents import LiteLLMModel
33
+ except ImportError:
34
+ print("Warning: LiteLLMModel not available, will use InferenceClientModel only.")
35
+
36
+ # Import tool implementations
37
+ from agent import (
38
+ web_search_function,
39
+ web_page_content_function,
40
+ calculator_function,
41
+ python_executor_function,
42
+ image_analyzer_function,
43
+ text_processor_function,
44
+ file_manager_function
45
+ )
46
+
47
+
48
+ class EnhancedGAIAAgent:
49
+ """
50
+ Enhanced AI Agent designed to perform well on the GAIA benchmark
51
+ Integrates memory and reasoning systems with the Think-Act-Observe workflow
52
+ """
53
+ def __init__(self, api_key=None, use_local_model=False, use_semantic_memory=True):
54
+ """
55
+ Initialize the enhanced GAIA agent
56
+
57
+ Args:
58
+ api_key: API key for Hugging Face Inference API
59
+ use_local_model: Whether to use a local model via Ollama
60
+ use_semantic_memory: Whether to use semantic search for memory retrieval
61
+ """
62
+ # Initialize the memory system
63
+ self.memory_manager = EnhancedMemoryManager(use_semantic_search=use_semantic_memory)
64
+
65
+ # Initialize the LLM model
66
+ if use_local_model:
67
+ # Use Ollama for local model
68
+ try:
69
+ self.model = LiteLLMModel(
70
+ model_id="ollama_chat/qwen2:7b",
71
+ api_base="http://127.0.0.1:11434",
72
+ num_ctx=8192,
73
+ )
74
+ print("Using local Ollama model: qwen2:7b")
75
+ except Exception as e:
76
+ print(f"Error initializing local model: {str(e)}")
77
+ print("Falling back to Hugging Face Inference API")
78
+ self.model = InferenceClientModel(
79
+ model_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
80
+ api_key=api_key or os.environ.get("HF_API_KEY", "")
81
+ )
82
+ print("Using Hugging Face Inference API model: Mixtral-8x7B")
83
+ else:
84
+ # Use Hugging Face Inference API
85
+ self.model = InferenceClientModel(
86
+ model_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
87
+ api_key=api_key or os.environ.get("HF_API_KEY", "")
88
+ )
89
+ print("Using Hugging Face Inference API model: Mixtral-8x7B")
90
+
91
+ # Define tools
92
+ self.tools = [
93
+ Tool(
94
+ name="web_search",
95
+ description="Search the web for information",
96
+ function=web_search_function
97
+ ),
98
+ Tool(
99
+ name="web_page_content",
100
+ description="Fetch and extract content from a web page",
101
+ function=web_page_content_function
102
+ ),
103
+ Tool(
104
+ name="calculator",
105
+ description="Perform mathematical calculations",
106
+ function=calculator_function
107
+ ),
108
+ Tool(
109
+ name="image_analyzer",
110
+ description="Analyze image content",
111
+ function=image_analyzer_function
112
+ ),
113
+ Tool(
114
+ name="python_executor",
115
+ description="Execute Python code",
116
+ function=python_executor_function
117
+ ),
118
+ Tool(
119
+ name="text_processor",
120
+ description="Process and analyze text",
121
+ function=text_processor_function
122
+ ),
123
+ Tool(
124
+ name="file_manager",
125
+ description="Save and load data from files",
126
+ function=file_manager_function
127
+ )
128
+ ]
129
+
130
+ # Enhanced system prompt for GAIA benchmark
131
+ self.system_prompt = """
132
+ You are an advanced AI assistant designed to solve complex tasks from the GAIA benchmark.
133
+ You have access to various tools that can help you solve these tasks.
134
+
135
+ Always follow the Think-Act-Observe workflow:
136
+ 1. Think: Carefully analyze the task and plan your approach
137
+ - Break down complex tasks into smaller steps
138
+ - Consider what information you need and how to get it
139
+ - Plan your approach before taking action
140
+
141
+ 2. Act: Use appropriate tools to gather information or perform actions
142
+ - web_search: Search the web for information
143
+ - web_page_content: Extract content from specific web pages
144
+ - calculator: Perform mathematical calculations
145
+ - image_analyzer: Analyze image content
146
+ - python_executor: Run Python code for complex operations
147
+ - text_processor: Process and analyze text (summarize, analyze_sentiment, extract_keywords)
148
+ - file_manager: Save and load data from files (save, load)
149
+
150
+ 3. Observe: Analyze the results of your actions and adjust your approach
151
+ - Verify if the information answers the original question
152
+ - Identify any gaps or inconsistencies
153
+ - Determine if additional actions are needed
154
+
155
+ For complex tasks:
156
+ - Break them down into smaller, manageable steps
157
+ - Keep track of your progress and intermediate results
158
+ - Verify each step before moving to the next
159
+ - Always double-check your final answer
160
+
161
+ When reasoning:
162
+ - Be thorough and methodical
163
+ - Consider multiple perspectives
164
+ - Explain your thought process clearly
165
+ - Cite sources when providing factual information
166
+
167
+ Remember that the GAIA benchmark tests your ability to:
168
+ - Reason effectively about complex problems
169
+ - Understand and process multimodal information
170
+ - Navigate the web to find information
171
+ - Use tools appropriately to solve tasks
172
+
173
+ Always verify your answers before submitting them.
174
+ """
175
+
176
+ # Initialize the base agent
177
+ self.base_agent = Agent(
178
+ model=self.model,
179
+ tools=self.tools,
180
+ system_prompt=self.system_prompt
181
+ )
182
+
183
+ # Initialize the reasoning system
184
+ self.reasoning_system = ReasoningSystem(self.base_agent, self.memory_manager)
185
+
186
+ # Error handling and recovery settings
187
+ self.max_retries = 3
188
+ self.error_log = []
189
+
190
+ def solve(self, query: str, max_iterations: int = 5, verbose: bool = True) -> Dict[str, Any]:
191
+ """
192
+ Solve a task using the enhanced Think-Act-Observe workflow
193
+
194
+ Args:
195
+ query: The user's query or task
196
+ max_iterations: Maximum number of iterations
197
+ verbose: Whether to print detailed progress
198
+
199
+ Returns:
200
+ Dictionary containing the final answer and metadata
201
+ """
202
+ start_time = datetime.now()
203
+
204
+ if verbose:
205
+ print(f"\n{'='*50}")
206
+ print(f"Starting to solve: {query}")
207
+ print(f"{'='*50}\n")
208
+
209
+ try:
210
+ # Execute the reasoning cycle
211
+ final_answer = self.reasoning_system.execute_reasoning_cycle(query, max_iterations)
212
+
213
+ # Record execution time
214
+ execution_time = (datetime.now() - start_time).total_seconds()
215
+
216
+ if verbose:
217
+ print(f"\n{'='*50}")
218
+ print(f"Task completed in {execution_time:.2f} seconds")
219
+ print(f"{'='*50}\n")
220
+
221
+ # Get memory summary for debugging
222
+ memory_summary = self.memory_manager.get_memory_summary()
223
+
224
+ return {
225
+ "query": query,
226
+ "answer": final_answer,
227
+ "execution_time": execution_time,
228
+ "iterations": max_iterations,
229
+ "memory_summary": memory_summary,
230
+ "success": True,
231
+ "error": None
232
+ }
233
+ except Exception as e:
234
+ error_msg = f"Error solving task: {str(e)}\n{traceback.format_exc()}"
235
+ print(error_msg)
236
+
237
+ # Record the error
238
+ self.error_log.append({
239
+ "timestamp": datetime.now().isoformat(),
240
+ "query": query,
241
+ "error": str(e),
242
+ "traceback": traceback.format_exc()
243
+ })
244
+
245
+ # Try to recover and provide a partial answer
246
+ try:
247
+ recovery_prompt = f"""
248
+ I encountered an error while trying to solve this task: {query}
249
+
250
+ The error was: {str(e)}
251
+
252
+ Based on what I know so far, please provide the best possible answer or explanation.
253
+ If you can't provide a complete answer, explain what you do know and what information is missing.
254
+ """
255
+ recovery_answer = self.base_agent.chat(recovery_prompt)
256
+
257
+ execution_time = (datetime.now() - start_time).total_seconds()
258
+
259
+ if verbose:
260
+ print(f"\n{'='*50}")
261
+ print(f"Task completed with recovery in {execution_time:.2f} seconds")
262
+ print(f"{'='*50}\n")
263
+
264
+ return {
265
+ "query": query,
266
+ "answer": recovery_answer,
267
+ "execution_time": execution_time,
268
+ "iterations": 0,
269
+ "success": False,
270
+ "error": str(e),
271
+ "recovery": True
272
+ }
273
+ except Exception as recovery_error:
274
+ # If recovery fails, return a basic error message
275
+ return {
276
+ "query": query,
277
+ "answer": f"I'm sorry, I encountered an error while solving this task and couldn't recover: {str(e)}",
278
+ "execution_time": (datetime.now() - start_time).total_seconds(),
279
+ "iterations": 0,
280
+ "success": False,
281
+ "error": str(e),
282
+ "recovery_error": str(recovery_error),
283
+ "recovery": False
284
+ }
285
+
286
+ def batch_solve(self, queries: List[str], max_iterations: int = 5, verbose: bool = True) -> List[Dict[str, Any]]:
287
+ """
288
+ Solve multiple tasks in batch
289
+
290
+ Args:
291
+ queries: List of user queries or tasks
292
+ max_iterations: Maximum number of iterations per query
293
+ verbose: Whether to print detailed progress
294
+
295
+ Returns:
296
+ List of results for each query
297
+ """
298
+ results = []
299
+
300
+ for i, query in enumerate(queries):
301
+ if verbose:
302
+ print(f"\n{'='*50}")
303
+ print(f"Processing task {i+1}/{len(queries)}: {query}")
304
+ print(f"{'='*50}\n")
305
+
306
+ result = self.solve(query, max_iterations, verbose)
307
+ results.append(result)
308
+
309
+ # Clear working memory between tasks
310
+ self.memory_manager.clear_working_memory()
311
+
312
+ return results
313
+
314
+ def save_results(self, results: Union[Dict[str, Any], List[Dict[str, Any]]], filename: str = "gaia_results.json") -> None:
315
+ """
316
+ Save results to a file
317
+
318
+ Args:
319
+ results: Results from solve() or batch_solve()
320
+ filename: Name of the file to save results to
321
+ """
322
+ try:
323
+ with open(filename, 'w') as f:
324
+ json.dump(results, f, indent=2)
325
+
326
+ print(f"Results saved to {filename}")
327
+ except Exception as e:
328
+ print(f"Error saving results: {str(e)}")
329
+
330
+ def load_results(self, filename: str = "gaia_results.json") -> Union[Dict[str, Any], List[Dict[str, Any]]]:
331
+ """
332
+ Load results from a file
333
+
334
+ Args:
335
+ filename: Name of the file to load results from
336
+
337
+ Returns:
338
+ Loaded results
339
+ """
340
+ try:
341
+ with open(filename, 'r') as f:
342
+ results = json.load(f)
343
+
344
+ print(f"Results loaded from {filename}")
345
+ return results
346
+ except Exception as e:
347
+ print(f"Error loading results: {str(e)}")
348
+ return []
349
+
350
+ def evaluate_performance(self, results: List[Dict[str, Any]]) -> Dict[str, Any]:
351
+ """
352
+ Evaluate performance metrics from batch results
353
+
354
+ Args:
355
+ results: Results from batch_solve()
356
+
357
+ Returns:
358
+ Dictionary of performance metrics
359
+ """
360
+ if not results:
361
+ return {"error": "No results to evaluate"}
362
+
363
+ total_queries = len(results)
364
+ successful_queries = sum(1 for r in results if r.get("success", False))
365
+ recovery_queries = sum(1 for r in results if not r.get("success", False) and r.get("recovery", False))
366
+ failed_queries = total_queries - successful_queries - recovery_queries
367
+
368
+ avg_execution_time = sum(r.get("execution_time", 0) for r in results) / total_queries
369
+
370
+ return {
371
+ "total_queries": total_queries,
372
+ "successful_queries": successful_queries,
373
+ "recovery_queries": recovery_queries,
374
+ "failed_queries": failed_queries,
375
+ "success_rate": successful_queries / total_queries if total_queries > 0 else 0,
376
+ "recovery_rate": recovery_queries / total_queries if total_queries > 0 else 0,
377
+ "failure_rate": failed_queries / total_queries if total_queries > 0 else 0,
378
+ "avg_execution_time": avg_execution_time
379
+ }
380
+
381
+
382
+ # Example usage
383
+ if __name__ == "__main__":
384
+ # Initialize the agent
385
+ agent = EnhancedGAIAAgent(use_local_model=False, use_semantic_memory=True)
386
+
387
+ # Example GAIA-style queries
388
+ sample_queries = [
389
+ "What is the capital of France and what is its population? Also, calculate 15% of this population.",
390
+ "Who was the first person to walk on the moon? What year did this happen?",
391
+ "Explain the concept of photosynthesis in simple terms."
392
+ ]
393
+
394
+ # Solve a single query
395
+ print("\nSolving single query...")
396
+ result = agent.solve(sample_queries[0])
397
+ print("\nFinal Answer:")
398
+ print(result["answer"])
399
+
400
+ # Uncomment to solve batch queries
401
+ # print("\nSolving batch queries...")
402
+ # batch_results = agent.batch_solve(sample_queries)
403
+ #
404
+ # # Save results
405
+ # agent.save_results(batch_results)
406
+ #
407
+ # # Evaluate performance
408
+ # performance = agent.evaluate_performance(batch_results)
409
+ # print("\nPerformance Metrics:")
410
+ # for key, value in performance.items():
411
+ # print(f"{key}: {value}")
memory_system.py ADDED
@@ -0,0 +1,462 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Enhanced Memory System for GAIA-Ready AI Agent
3
+
4
+ This module provides an advanced memory system for the AI agent,
5
+ including short-term, long-term, and working memory components,
6
+ as well as semantic retrieval capabilities.
7
+ """
8
+
9
+ import os
10
+ import json
11
+ from typing import List, Dict, Any, Optional, Union
12
+ from datetime import datetime
13
+ import re
14
+ import numpy as np
15
+ from collections import defaultdict
16
+
17
+ try:
18
+ from sentence_transformers import SentenceTransformer
19
+ except ImportError:
20
+ import subprocess
21
+ subprocess.check_call(["pip", "install", "sentence-transformers"])
22
+ from sentence_transformers import SentenceTransformer
23
+
24
+
25
+ class EnhancedMemoryManager:
26
+ """
27
+ Advanced memory manager for the agent that maintains short-term, long-term,
28
+ and working memory with semantic retrieval capabilities.
29
+ """
30
+ def __init__(self, use_semantic_search=True):
31
+ self.short_term_memory = [] # Current conversation context
32
+ self.long_term_memory = [] # Key facts and results
33
+ self.working_memory = {} # Temporary storage for complex tasks
34
+ self.max_short_term_items = 15
35
+ self.max_long_term_items = 100
36
+ self.use_semantic_search = use_semantic_search
37
+
38
+ # Initialize semantic search if enabled
39
+ if self.use_semantic_search:
40
+ try:
41
+ self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
42
+ self.memory_embeddings = []
43
+ except Exception as e:
44
+ print(f"Warning: Could not initialize semantic search: {str(e)}")
45
+ self.use_semantic_search = False
46
+
47
+ # Memory persistence
48
+ self.memory_file = "agent_memory.json"
49
+ self.load_memories()
50
+
51
+ def add_to_short_term(self, item: Dict[str, Any]) -> None:
52
+ """Add an item to short-term memory, maintaining size limit"""
53
+ # Ensure item has all required fields
54
+ if "content" not in item:
55
+ raise ValueError("Memory item must have 'content' field")
56
+
57
+ if "timestamp" not in item:
58
+ item["timestamp"] = datetime.now().isoformat()
59
+
60
+ if "type" not in item:
61
+ item["type"] = "general"
62
+
63
+ self.short_term_memory.append(item)
64
+
65
+ # Update semantic embeddings if enabled
66
+ if self.use_semantic_search:
67
+ try:
68
+ content = item.get("content", "")
69
+ embedding = self.embedding_model.encode(content)
70
+ self.memory_embeddings.append((embedding, len(self.short_term_memory) - 1, "short_term"))
71
+ except Exception as e:
72
+ print(f"Warning: Could not create embedding for memory item: {str(e)}")
73
+
74
+ # Maintain size limit
75
+ if len(self.short_term_memory) > self.max_short_term_items:
76
+ removed_item = self.short_term_memory.pop(0)
77
+ # Remove corresponding embedding if it exists
78
+ if self.use_semantic_search:
79
+ self.memory_embeddings = [(emb, idx, mem_type) for emb, idx, mem_type in self.memory_embeddings
80
+ if not (mem_type == "short_term" and idx == 0)]
81
+ # Update indices for remaining short-term memories
82
+ self.memory_embeddings = [(emb, idx-1 if mem_type == "short_term" else idx, mem_type)
83
+ for emb, idx, mem_type in self.memory_embeddings]
84
+
85
+ # Save memories periodically
86
+ self.save_memories()
87
+
88
+ def add_to_long_term(self, item: Dict[str, Any]) -> None:
89
+ """Add an important item to long-term memory, maintaining size limit"""
90
+ # Ensure item has all required fields
91
+ if "content" not in item:
92
+ raise ValueError("Memory item must have 'content' field")
93
+
94
+ if "timestamp" not in item:
95
+ item["timestamp"] = datetime.now().isoformat()
96
+
97
+ if "type" not in item:
98
+ item["type"] = "general"
99
+
100
+ # Add importance score if not present
101
+ if "importance" not in item:
102
+ # Calculate importance based on content length and type
103
+ content_length = len(item.get("content", ""))
104
+ type_importance = {
105
+ "final_answer": 0.9,
106
+ "key_fact": 0.8,
107
+ "reasoning": 0.7,
108
+ "general": 0.5
109
+ }
110
+ item["importance"] = min(1.0, (content_length / 1000) * type_importance.get(item["type"], 0.5))
111
+
112
+ self.long_term_memory.append(item)
113
+
114
+ # Update semantic embeddings if enabled
115
+ if self.use_semantic_search:
116
+ try:
117
+ content = item.get("content", "")
118
+ embedding = self.embedding_model.encode(content)
119
+ self.memory_embeddings.append((embedding, len(self.long_term_memory) - 1, "long_term"))
120
+ except Exception as e:
121
+ print(f"Warning: Could not create embedding for memory item: {str(e)}")
122
+
123
+ # Sort long-term memory by importance (descending)
124
+ self.long_term_memory.sort(key=lambda x: x.get("importance", 0), reverse=True)
125
+
126
+ # Maintain size limit
127
+ if len(self.long_term_memory) > self.max_long_term_items:
128
+ # Remove least important memory
129
+ removed_item = self.long_term_memory.pop()
130
+ # Remove corresponding embedding if it exists
131
+ if self.use_semantic_search:
132
+ self.memory_embeddings = [(emb, idx, mem_type) for emb, idx, mem_type in self.memory_embeddings
133
+ if not (mem_type == "long_term" and idx == len(self.long_term_memory))]
134
+ # Update indices for remaining long-term memories
135
+ # This is more complex since we sorted by importance, so we need to rebuild indices
136
+ long_term_embeddings = []
137
+ for i, item in enumerate(self.long_term_memory):
138
+ content = item.get("content", "")
139
+ embedding = self.embedding_model.encode(content)
140
+ long_term_embeddings.append((embedding, i, "long_term"))
141
+
142
+ # Keep short-term embeddings and replace long-term ones
143
+ self.memory_embeddings = [(emb, idx, mem_type) for emb, idx, mem_type in self.memory_embeddings
144
+ if mem_type == "short_term"] + long_term_embeddings
145
+
146
+ # Save memories periodically
147
+ self.save_memories()
148
+
149
+ def store_in_working_memory(self, key: str, value: Any) -> None:
150
+ """Store a value in working memory under the specified key"""
151
+ self.working_memory[key] = value
152
+ # Working memory is not persisted between sessions
153
+
154
+ def get_from_working_memory(self, key: str) -> Optional[Any]:
155
+ """Retrieve a value from working memory by key"""
156
+ return self.working_memory.get(key)
157
+
158
+ def clear_working_memory(self) -> None:
159
+ """Clear the working memory"""
160
+ self.working_memory = {}
161
+
162
+ def get_relevant_memories(self, query: str, max_results: int = 10) -> List[Dict[str, Any]]:
163
+ """
164
+ Retrieve memories relevant to the current query
165
+
166
+ Args:
167
+ query: The query to find relevant memories for
168
+ max_results: Maximum number of results to return
169
+
170
+ Returns:
171
+ List of relevant memory items
172
+ """
173
+ if self.use_semantic_search:
174
+ try:
175
+ # Use semantic search to find relevant memories
176
+ query_embedding = self.embedding_model.encode(query)
177
+
178
+ # Calculate cosine similarity with all memory embeddings
179
+ similarities = []
180
+ for embedding, idx, mem_type in self.memory_embeddings:
181
+ similarity = np.dot(query_embedding, embedding) / (np.linalg.norm(query_embedding) * np.linalg.norm(embedding))
182
+ similarities.append((similarity, idx, mem_type))
183
+
184
+ # Sort by similarity (descending)
185
+ similarities.sort(reverse=True)
186
+
187
+ # Get top results
188
+ relevant_memories = []
189
+ for similarity, idx, mem_type in similarities[:max_results]:
190
+ if mem_type == "short_term":
191
+ memory = self.short_term_memory[idx]
192
+ else: # long_term
193
+ memory = self.long_term_memory[idx]
194
+
195
+ # Add similarity score to memory item
196
+ memory_with_score = memory.copy()
197
+ memory_with_score["relevance_score"] = float(similarity)
198
+ relevant_memories.append(memory_with_score)
199
+
200
+ return relevant_memories
201
+ except Exception as e:
202
+ print(f"Warning: Semantic search failed: {str(e)}. Falling back to keyword search.")
203
+ return self._keyword_search(query, max_results)
204
+ else:
205
+ return self._keyword_search(query, max_results)
206
+
207
+ def _keyword_search(self, query: str, max_results: int = 10) -> List[Dict[str, Any]]:
208
+ """
209
+ Fallback keyword-based search for relevant memories
210
+
211
+ Args:
212
+ query: The query to find relevant memories for
213
+ max_results: Maximum number of results to return
214
+
215
+ Returns:
216
+ List of relevant memory items
217
+ """
218
+ relevant_memories = []
219
+ query_keywords = set(re.findall(r'\b\w+\b', query.lower()))
220
+
221
+ # Score function for keyword matching
222
+ def score_memory(memory):
223
+ content = memory.get("content", "").lower()
224
+ content_words = set(re.findall(r'\b\w+\b', content))
225
+
226
+ # Count matching keywords
227
+ matches = len(query_keywords.intersection(content_words))
228
+
229
+ # Consider memory type and recency
230
+ type_boost = {
231
+ "final_answer": 2.0,
232
+ "key_fact": 1.5,
233
+ "reasoning": 1.2,
234
+ "general": 1.0
235
+ }
236
+
237
+ # Calculate recency (assuming ISO format timestamps)
238
+ try:
239
+ timestamp = datetime.fromisoformat(memory.get("timestamp", "2000-01-01T00:00:00"))
240
+ now = datetime.now()
241
+ hours_ago = (now - timestamp).total_seconds() / 3600
242
+ recency_factor = max(0.5, 1.0 - (hours_ago / 24)) # Decay over 24 hours
243
+ except:
244
+ recency_factor = 0.5
245
+
246
+ # Calculate final score
247
+ score = matches * type_boost.get(memory.get("type", "general"), 1.0) * recency_factor
248
+
249
+ return score
250
+
251
+ # Score all memories
252
+ scored_memories = []
253
+
254
+ # Check long-term memory first (more important)
255
+ for memory in self.long_term_memory:
256
+ score = score_memory(memory)
257
+ if score > 0:
258
+ memory_with_score = memory.copy()
259
+ memory_with_score["relevance_score"] = score
260
+ scored_memories.append((score, memory_with_score))
261
+
262
+ # Then check short-term memory
263
+ for memory in self.short_term_memory:
264
+ score = score_memory(memory)
265
+ if score > 0:
266
+ memory_with_score = memory.copy()
267
+ memory_with_score["relevance_score"] = score
268
+ scored_memories.append((score, memory_with_score))
269
+
270
+ # Sort by score (descending) and take top results
271
+ scored_memories.sort(reverse=True, key=lambda x: x[0])
272
+ relevant_memories = [memory for _, memory in scored_memories[:max_results]]
273
+
274
+ return relevant_memories
275
+
276
+ def get_memory_summary(self) -> str:
277
+ """Get a summary of the current memory state for the agent"""
278
+ # Get most recent short-term memories
279
+ recent_short_term = self.short_term_memory[-5:] if self.short_term_memory else []
280
+ short_term_summary = "\n".join([f"- [{m.get('type', 'general')}] {m.get('content', '')[:100]}..."
281
+ for m in recent_short_term])
282
+
283
+ # Get most important long-term memories
284
+ important_long_term = sorted(self.long_term_memory,
285
+ key=lambda x: x.get("importance", 0),
286
+ reverse=True)[:5] if self.long_term_memory else []
287
+ long_term_summary = "\n".join([f"- [{m.get('type', 'general')}] {m.get('content', '')[:100]}..."
288
+ for m in important_long_term])
289
+
290
+ # Summarize working memory
291
+ working_memory_summary = "\n".join([f"- {k}: {str(v)[:50]}..." if isinstance(v, str) and len(str(v)) > 50
292
+ else f"- {k}: {v}" for k, v in self.working_memory.items()])
293
+
294
+ return f"""
295
+ MEMORY SUMMARY:
296
+ --------------
297
+ Recent Short-Term Memory:
298
+ {short_term_summary if short_term_summary else "No recent short-term memories."}
299
+
300
+ Important Long-Term Memory:
301
+ {long_term_summary if long_term_summary else "No important long-term memories."}
302
+
303
+ Working Memory:
304
+ {working_memory_summary if working_memory_summary else "Working memory is empty."}
305
+ """
306
+
307
+ def save_memories(self) -> None:
308
+ """Save memories to disk for persistence"""
309
+ try:
310
+ # Only save short-term and long-term memories (not working memory)
311
+ memories = {
312
+ "short_term": self.short_term_memory,
313
+ "long_term": self.long_term_memory,
314
+ "last_updated": datetime.now().isoformat()
315
+ }
316
+
317
+ with open(self.memory_file, 'w') as f:
318
+ json.dump(memories, f, indent=2)
319
+ except Exception as e:
320
+ print(f"Warning: Could not save memories: {str(e)}")
321
+
322
+ def load_memories(self) -> None:
323
+ """Load memories from disk if available"""
324
+ try:
325
+ if os.path.exists(self.memory_file):
326
+ with open(self.memory_file, 'r') as f:
327
+ memories = json.load(f)
328
+
329
+ self.short_term_memory = memories.get("short_term", [])
330
+ self.long_term_memory = memories.get("long_term", [])
331
+
332
+ # Rebuild embeddings if semantic search is enabled
333
+ if self.use_semantic_search:
334
+ self.memory_embeddings = []
335
+
336
+ # Add embeddings for short-term memories
337
+ for i, memory in enumerate(self.short_term_memory):
338
+ try:
339
+ content = memory.get("content", "")
340
+ embedding = self.embedding_model.encode(content)
341
+ self.memory_embeddings.append((embedding, i, "short_term"))
342
+ except Exception as e:
343
+ print(f"Warning: Could not create embedding for memory item: {str(e)}")
344
+
345
+ # Add embeddings for long-term memories
346
+ for i, memory in enumerate(self.long_term_memory):
347
+ try:
348
+ content = memory.get("content", "")
349
+ embedding = self.embedding_model.encode(content)
350
+ self.memory_embeddings.append((embedding, i, "long_term"))
351
+ except Exception as e:
352
+ print(f"Warning: Could not create embedding for memory item: {str(e)}")
353
+
354
+ print(f"Loaded {len(self.short_term_memory)} short-term and {len(self.long_term_memory)} long-term memories.")
355
+ except Exception as e:
356
+ print(f"Warning: Could not load memories: {str(e)}")
357
+
358
+ def forget_old_memories(self, days_threshold: int = 30) -> None:
359
+ """
360
+ Remove memories older than the specified threshold
361
+
362
+ Args:
363
+ days_threshold: Age threshold in days
364
+ """
365
+ try:
366
+ now = datetime.now()
367
+ threshold = days_threshold * 24 * 60 * 60 # Convert to seconds
368
+
369
+ # Filter short-term memories
370
+ new_short_term = []
371
+ for i, memory in enumerate(self.short_term_memory):
372
+ try:
373
+ timestamp = datetime.fromisoformat(memory.get("timestamp", "2000-01-01T00:00:00"))
374
+ age = (now - timestamp).total_seconds()
375
+ if age < threshold:
376
+ new_short_term.append(memory)
377
+ except:
378
+ # Keep memories with invalid timestamps
379
+ new_short_term.append(memory)
380
+
381
+ # Filter long-term memories
382
+ new_long_term = []
383
+ for i, memory in enumerate(self.long_term_memory):
384
+ try:
385
+ timestamp = datetime.fromisoformat(memory.get("timestamp", "2000-01-01T00:00:00"))
386
+ age = (now - timestamp).total_seconds()
387
+ # For long-term, also consider importance
388
+ importance = memory.get("importance", 0.5)
389
+ # More important memories have a higher threshold
390
+ adjusted_threshold = threshold * (1 + importance)
391
+ if age < adjusted_threshold:
392
+ new_long_term.append(memory)
393
+ except:
394
+ # Keep memories with invalid timestamps
395
+ new_long_term.append(memory)
396
+
397
+ # Update memories
398
+ removed_short_term = len(self.short_term_memory) - len(new_short_term)
399
+ removed_long_term = len(self.long_term_memory) - len(new_long_term)
400
+
401
+ self.short_term_memory = new_short_term
402
+ self.long_term_memory = new_long_term
403
+
404
+ # Rebuild embeddings if semantic search is enabled
405
+ if self.use_semantic_search:
406
+ self.memory_embeddings = []
407
+
408
+ # Add embeddings for short-term memories
409
+ for i, memory in enumerate(self.short_term_memory):
410
+ try:
411
+ content = memory.get("content", "")
412
+ embedding = self.embedding_model.encode(content)
413
+ self.memory_embeddings.append((embedding, i, "short_term"))
414
+ except Exception as e:
415
+ print(f"Warning: Could not create embedding for memory item: {str(e)}")
416
+
417
+ # Add embeddings for long-term memories
418
+ for i, memory in enumerate(self.long_term_memory):
419
+ try:
420
+ content = memory.get("content", "")
421
+ embedding = self.embedding_model.encode(content)
422
+ self.memory_embeddings.append((embedding, i, "long_term"))
423
+ except Exception as e:
424
+ print(f"Warning: Could not create embedding for memory item: {str(e)}")
425
+
426
+ # Save updated memories
427
+ self.save_memories()
428
+
429
+ print(f"Forgot {removed_short_term} short-term and {removed_long_term} long-term memories older than {days_threshold} days.")
430
+ except Exception as e:
431
+ print(f"Warning: Could not forget old memories: {str(e)}")
432
+
433
+
434
+ # Example usage
435
+ if __name__ == "__main__":
436
+ # Initialize the memory manager
437
+ memory_manager = EnhancedMemoryManager(use_semantic_search=True)
438
+
439
+ # Add some test memories
440
+ memory_manager.add_to_short_term({
441
+ "type": "query",
442
+ "content": "What is the capital of France?",
443
+ "timestamp": datetime.now().isoformat()
444
+ })
445
+
446
+ memory_manager.add_to_long_term({
447
+ "type": "key_fact",
448
+ "content": "Paris is the capital of France with a population of about 2.2 million people.",
449
+ "timestamp": datetime.now().isoformat()
450
+ })
451
+
452
+ memory_manager.store_in_working_memory("current_task", "Finding information about France")
453
+
454
+ # Test retrieval
455
+ relevant_memories = memory_manager.get_relevant_memories("What is the population of Paris?")
456
+ print("\nRelevant memories for 'What is the population of Paris?':")
457
+ for memory in relevant_memories:
458
+ print(f"- Score: {memory.get('relevance_score', 0):.2f}, Content: {memory.get('content', '')}")
459
+
460
+ # Print memory summary
461
+ print("\nMemory Summary:")
462
+ print(memory_manager.get_memory_summary())
reasoning_system.py ADDED
@@ -0,0 +1,668 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Reasoning System for GAIA-Ready AI Agent
3
+
4
+ This module provides advanced reasoning capabilities for the AI agent,
5
+ implementing the ReAct approach (Reasoning + Acting) and supporting
6
+ the Think-Act-Observe workflow.
7
+ """
8
+
9
+ import os
10
+ import json
11
+ from typing import List, Dict, Any, Optional, Union, Tuple
12
+ from datetime import datetime
13
+ import traceback
14
+ import re
15
+
16
+ try:
17
+ from smolagents import Agent, InferenceClientModel, Tool
18
+ except ImportError:
19
+ import subprocess
20
+ subprocess.check_call(["pip", "install", "smolagents"])
21
+ from smolagents import Agent, InferenceClientModel, Tool
22
+
23
+
24
+ class ReasoningSystem:
25
+ """
26
+ Advanced reasoning system implementing the ReAct approach
27
+ and supporting the Think-Act-Observe workflow.
28
+ """
29
+ def __init__(self, agent, memory_manager):
30
+ self.agent = agent
31
+ self.memory_manager = memory_manager
32
+ self.max_reasoning_depth = 5
33
+ self.reasoning_templates = self._load_reasoning_templates()
34
+
35
+ def _load_reasoning_templates(self) -> Dict[str, str]:
36
+ """Load reasoning templates for different stages of the workflow"""
37
+ return {
38
+ "think": """
39
+ # Task Analysis and Planning
40
+
41
+ ## Task
42
+ {query}
43
+
44
+ ## Relevant Context
45
+ {context}
46
+
47
+ ## Analysis
48
+ Let me analyze this task step by step:
49
+ 1. What is being asked?
50
+ 2. What information do I need?
51
+ 3. What challenges might I encounter?
52
+
53
+ ## Plan
54
+ Based on my analysis, here's my plan:
55
+ 1. [First step]
56
+ 2. [Second step]
57
+ 3. [Third step]
58
+ ...
59
+
60
+ ## Tools Needed
61
+ To accomplish this task, I'll need:
62
+ - [Tool 1]: For [purpose]
63
+ - [Tool 2]: For [purpose]
64
+ ...
65
+
66
+ ## Expected Outcome
67
+ If successful, I expect to:
68
+ [Description of expected outcome]
69
+ """,
70
+ "act": """
71
+ # Action Execution
72
+
73
+ ## Current Task
74
+ {query}
75
+
76
+ ## Current Plan
77
+ {plan}
78
+
79
+ ## Previous Results
80
+ {previous_results}
81
+
82
+ ## Next Action
83
+ Based on my plan and previous results, I'll now:
84
+ 1. Use the [tool name] tool
85
+ 2. With parameters: [parameters]
86
+ 3. Purpose: [why this action is needed]
87
+
88
+ ## Execution
89
+ [Detailed description of how I'll execute this action]
90
+ """,
91
+ "observe": """
92
+ # Result Analysis
93
+
94
+ ## Current Task
95
+ {query}
96
+
97
+ ## Action Taken
98
+ {action}
99
+
100
+ ## Results Obtained
101
+ {results}
102
+
103
+ ## Analysis
104
+ Let me analyze these results:
105
+ 1. What did I learn?
106
+ 2. Does this answer the original question?
107
+ 3. Are there any inconsistencies or gaps?
108
+
109
+ ## Next Steps
110
+ Based on my analysis:
111
+ - [Next step recommendation]
112
+ - [Alternative approach if needed]
113
+
114
+ ## Progress Assessment
115
+ Task completion status: [percentage]%
116
+ [Explanation of current progress]
117
+ """
118
+ }
119
+
120
+ def think(self, query: str) -> Dict[str, Any]:
121
+ """
122
+ Analyze the task and plan an approach (Think phase)
123
+
124
+ Args:
125
+ query: The user's query or task
126
+
127
+ Returns:
128
+ Dictionary containing analysis and plan
129
+ """
130
+ # Retrieve relevant memories
131
+ relevant_memories = self.memory_manager.get_relevant_memories(query)
132
+
133
+ # Format context from relevant memories
134
+ context = ""
135
+ if relevant_memories:
136
+ context_items = []
137
+ for memory in relevant_memories:
138
+ memory_type = memory.get("type", "general")
139
+ content = memory.get("content", "")
140
+ relevance = memory.get("relevance_score", 0)
141
+ context_items.append(f"- [{memory_type.upper()}] (Relevance: {relevance:.2f}): {content}")
142
+ context = "\n".join(context_items)
143
+ else:
144
+ context = "No relevant prior knowledge found."
145
+
146
+ # Apply the thinking template
147
+ thinking_template = self.reasoning_templates["think"]
148
+ thinking_prompt = thinking_template.format(
149
+ query=query,
150
+ context=context
151
+ )
152
+
153
+ # Use the agent to generate a plan
154
+ try:
155
+ response = self.agent.chat(thinking_prompt)
156
+
157
+ # Store the thinking in memory
158
+ self.memory_manager.add_to_short_term({
159
+ "type": "thinking",
160
+ "content": response,
161
+ "timestamp": datetime.now().isoformat()
162
+ })
163
+
164
+ # Parse the response to extract structured information
165
+ analysis = self._extract_section(response, "Analysis")
166
+ plan = self._extract_section(response, "Plan")
167
+ tools_needed = self._extract_section(response, "Tools Needed")
168
+ expected_outcome = self._extract_section(response, "Expected Outcome")
169
+
170
+ return {
171
+ "raw_response": response,
172
+ "analysis": analysis,
173
+ "plan": plan,
174
+ "tools_needed": tools_needed,
175
+ "expected_outcome": expected_outcome
176
+ }
177
+ except Exception as e:
178
+ error_msg = f"Error during thinking phase: {str(e)}\n{traceback.format_exc()}"
179
+ print(error_msg)
180
+
181
+ # Store the error in memory
182
+ self.memory_manager.add_to_short_term({
183
+ "type": "error",
184
+ "content": error_msg,
185
+ "timestamp": datetime.now().isoformat()
186
+ })
187
+
188
+ # Return a basic plan despite the error
189
+ return {
190
+ "raw_response": "Error occurred during thinking phase.",
191
+ "analysis": "Could not analyze the task due to an error.",
192
+ "plan": "1. Try a simpler approach\n2. Break down the task into smaller steps",
193
+ "tools_needed": "web_search: To find basic information",
194
+ "expected_outcome": "Partial answer to the query"
195
+ }
196
+
197
+ def act(self, plan: Dict[str, Any], query: str, previous_results: str = "") -> Dict[str, Any]:
198
+ """
199
+ Execute actions based on the plan (Act phase)
200
+
201
+ Args:
202
+ plan: The plan generated by the think step
203
+ query: The original query
204
+ previous_results: Results from previous actions
205
+
206
+ Returns:
207
+ Dictionary containing action details and results
208
+ """
209
+ # Apply the action template
210
+ action_template = self.reasoning_templates["act"]
211
+ action_prompt = action_template.format(
212
+ query=query,
213
+ plan=plan.get("plan", "No plan available."),
214
+ previous_results=previous_results if previous_results else "No previous results."
215
+ )
216
+
217
+ try:
218
+ # Use the agent to determine the next action
219
+ action_response = self.agent.chat(action_prompt)
220
+
221
+ # Store the action planning in memory
222
+ self.memory_manager.add_to_short_term({
223
+ "type": "action_planning",
224
+ "content": action_response,
225
+ "timestamp": datetime.now().isoformat()
226
+ })
227
+
228
+ # Parse the action response to extract tool and parameters
229
+ tool_info = self._extract_tool_info(action_response)
230
+
231
+ if not tool_info:
232
+ # If no tool was identified, try a more direct approach
233
+ direct_prompt = f"""
234
+ Based on the task "{query}" and the plan:
235
+ {plan.get('plan', 'No plan available.')}
236
+
237
+ Which specific tool should I use next and with what parameters?
238
+ Respond in this format:
239
+ TOOL: [tool name]
240
+ PARAMETERS: [parameter1=value1, parameter2=value2, ...]
241
+ """
242
+ direct_response = self.agent.chat(direct_prompt)
243
+ tool_info = self._extract_tool_info(direct_response)
244
+
245
+ if tool_info:
246
+ tool_name = tool_info["tool"]
247
+ tool_params = tool_info["parameters"]
248
+
249
+ # Find the matching tool
250
+ matching_tool = None
251
+ for tool in self.agent.tools:
252
+ if tool.name == tool_name:
253
+ matching_tool = tool
254
+ break
255
+
256
+ if matching_tool:
257
+ # Execute the tool
258
+ try:
259
+ if isinstance(tool_params, dict):
260
+ result = matching_tool.function(**tool_params)
261
+ else:
262
+ result = matching_tool.function(tool_params)
263
+
264
+ # Store the successful action result in memory
265
+ self.memory_manager.add_to_short_term({
266
+ "type": "action_result",
267
+ "content": f"Tool: {tool_name}\nParameters: {tool_params}\nResult: {result}",
268
+ "timestamp": datetime.now().isoformat()
269
+ })
270
+
271
+ return {
272
+ "tool": tool_name,
273
+ "parameters": tool_params,
274
+ "result": result,
275
+ "success": True,
276
+ "error": None
277
+ }
278
+ except Exception as e:
279
+ error_msg = f"Error executing tool {tool_name}: {str(e)}\n{traceback.format_exc()}"
280
+ print(error_msg)
281
+
282
+ # Store the error in memory
283
+ self.memory_manager.add_to_short_term({
284
+ "type": "error",
285
+ "content": error_msg,
286
+ "timestamp": datetime.now().isoformat()
287
+ })
288
+
289
+ return {
290
+ "tool": tool_name,
291
+ "parameters": tool_params,
292
+ "result": f"Error: {str(e)}",
293
+ "success": False,
294
+ "error": str(e)
295
+ }
296
+ else:
297
+ error_msg = f"Tool '{tool_name}' not found."
298
+ print(error_msg)
299
+
300
+ # Store the error in memory
301
+ self.memory_manager.add_to_short_term({
302
+ "type": "error",
303
+ "content": error_msg,
304
+ "timestamp": datetime.now().isoformat()
305
+ })
306
+
307
+ return {
308
+ "tool": tool_name,
309
+ "parameters": tool_params,
310
+ "result": f"Error: Tool '{tool_name}' not found.",
311
+ "success": False,
312
+ "error": "Tool not found"
313
+ }
314
+ else:
315
+ error_msg = "Could not determine which tool to use."
316
+ print(error_msg)
317
+
318
+ # Store the error in memory
319
+ self.memory_manager.add_to_short_term({
320
+ "type": "error",
321
+ "content": error_msg,
322
+ "timestamp": datetime.now().isoformat()
323
+ })
324
+
325
+ # Default to web search as a fallback
326
+ try:
327
+ web_search_tool = None
328
+ for tool in self.agent.tools:
329
+ if tool.name == "web_search":
330
+ web_search_tool = tool
331
+ break
332
+
333
+ if web_search_tool:
334
+ result = web_search_tool.function(query)
335
+ return {
336
+ "tool": "web_search",
337
+ "parameters": query,
338
+ "result": result,
339
+ "success": True,
340
+ "error": None,
341
+ "fallback": True
342
+ }
343
+ else:
344
+ return {
345
+ "tool": "none",
346
+ "parameters": "none",
347
+ "result": "Could not determine which tool to use and web_search fallback not available.",
348
+ "success": False,
349
+ "error": "No tool selected"
350
+ }
351
+ except Exception as e:
352
+ return {
353
+ "tool": "web_search",
354
+ "parameters": query,
355
+ "result": f"Error in fallback web search: {str(e)}",
356
+ "success": False,
357
+ "error": str(e),
358
+ "fallback": True
359
+ }
360
+ except Exception as e:
361
+ error_msg = f"Error during action phase: {str(e)}\n{traceback.format_exc()}"
362
+ print(error_msg)
363
+
364
+ # Store the error in memory
365
+ self.memory_manager.add_to_short_term({
366
+ "type": "error",
367
+ "content": error_msg,
368
+ "timestamp": datetime.now().isoformat()
369
+ })
370
+
371
+ return {
372
+ "tool": "none",
373
+ "parameters": "none",
374
+ "result": f"Error during action planning: {str(e)}",
375
+ "success": False,
376
+ "error": str(e)
377
+ }
378
+
379
+ def observe(self, action_result: Dict[str, Any], plan: Dict[str, Any], query: str) -> Dict[str, Any]:
380
+ """
381
+ Analyze the results of actions and determine next steps (Observe phase)
382
+
383
+ Args:
384
+ action_result: Results from the act step
385
+ plan: The original plan
386
+ query: The original query
387
+
388
+ Returns:
389
+ Dictionary containing observation and next steps
390
+ """
391
+ # Apply the observation template
392
+ observation_template = self.reasoning_templates["observe"]
393
+ observation_prompt = observation_template.format(
394
+ query=query,
395
+ action=f"Tool: {action_result.get('tool', 'none')}\nParameters: {action_result.get('parameters', 'none')}",
396
+ results=action_result.get('result', 'No results.')
397
+ )
398
+
399
+ try:
400
+ # Use the agent to analyze the results
401
+ observation_response = self.agent.chat(observation_prompt)
402
+
403
+ # Store the observation in memory
404
+ self.memory_manager.add_to_short_term({
405
+ "type": "observation",
406
+ "content": observation_response,
407
+ "timestamp": datetime.now().isoformat()
408
+ })
409
+
410
+ # Parse the observation to extract structured information
411
+ analysis = self._extract_section(observation_response, "Analysis")
412
+ next_steps = self._extract_section(observation_response, "Next Steps")
413
+ progress = self._extract_section(observation_response, "Progress Assessment")
414
+
415
+ # Determine if we need to continue with more actions
416
+ continue_execution = True
417
+
418
+ # Check for completion indicators
419
+ completion_phrases = [
420
+ "task complete", "question answered", "fully answered",
421
+ "100%", "task is complete", "fully resolved"
422
+ ]
423
+
424
+ if any(phrase in observation_response.lower() for phrase in completion_phrases):
425
+ continue_execution = False
426
+
427
+ # Store the final answer in long-term memory
428
+ self.memory_manager.add_to_long_term({
429
+ "type": "final_answer",
430
+ "query": query,
431
+ "content": observation_response,
432
+ "timestamp": datetime.now().isoformat(),
433
+ "importance": 0.8 # High importance for final answers
434
+ })
435
+
436
+ return {
437
+ "raw_response": observation_response,
438
+ "analysis": analysis,
439
+ "next_steps": next_steps,
440
+ "progress": progress,
441
+ "continue": continue_execution
442
+ }
443
+ except Exception as e:
444
+ error_msg = f"Error during observation phase: {str(e)}\n{traceback.format_exc()}"
445
+ print(error_msg)
446
+
447
+ # Store the error in memory
448
+ self.memory_manager.add_to_short_term({
449
+ "type": "error",
450
+ "content": error_msg,
451
+ "timestamp": datetime.now().isoformat()
452
+ })
453
+
454
+ # Default observation with continuation
455
+ return {
456
+ "raw_response": f"Error occurred during observation phase: {str(e)}",
457
+ "analysis": "Could not analyze the results due to an error.",
458
+ "next_steps": "Try a different approach or tool.",
459
+ "progress": "Unknown due to error.",
460
+ "continue": True # Continue by default on error
461
+ }
462
+
463
+ def _extract_section(self, text: str, section_name: str) -> str:
464
+ """Extract a section from the response text"""
465
+ pattern = rf"(?:^|\n)(?:#+\s*{re.escape(section_name)}:?|\*\*{re.escape(section_name)}:?\*\*|{re.escape(section_name)}:?)\s*(.*?)(?:\n(?:#+\s*|$)|\Z)"
466
+ match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
467
+
468
+ if match:
469
+ content = match.group(1).strip()
470
+ return content
471
+
472
+ # Try a more lenient approach if the first one fails
473
+ pattern = rf"{re.escape(section_name)}:?\s*(.*?)(?:\n\n|\n[A-Z]|\Z)"
474
+ match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
475
+
476
+ if match:
477
+ content = match.group(1).strip()
478
+ return content
479
+
480
+ return f"No {section_name.lower()} found."
481
+
482
+ def _extract_tool_info(self, text: str) -> Optional[Dict[str, Any]]:
483
+ """Extract tool name and parameters from the response text"""
484
+ # Try to find tool name
485
+ tool_pattern = r"(?:TOOL|Tool|tool):\s*(\w+)"
486
+ tool_match = re.search(tool_pattern, text)
487
+
488
+ if not tool_match:
489
+ return None
490
+
491
+ tool_name = tool_match.group(1).strip()
492
+
493
+ # Try to find parameters
494
+ params_pattern = r"(?:PARAMETERS|Parameters|parameters):\s*(.*?)(?:\n\n|\n[A-Z]|\Z)"
495
+ params_match = re.search(params_pattern, text, re.DOTALL)
496
+
497
+ if params_match:
498
+ params_text = params_match.group(1).strip()
499
+
500
+ # Check if parameters are in key=value format
501
+ if "=" in params_text:
502
+ # Parse as dictionary
503
+ params_dict = {}
504
+ param_pairs = re.findall(r"(\w+)\s*=\s*([^,\n]+)", params_text)
505
+
506
+ for key, value in param_pairs:
507
+ params_dict[key.strip()] = value.strip()
508
+
509
+ return {
510
+ "tool": tool_name,
511
+ "parameters": params_dict
512
+ }
513
+ else:
514
+ # Treat as a single string parameter
515
+ return {
516
+ "tool": tool_name,
517
+ "parameters": params_text
518
+ }
519
+ else:
520
+ # No parameters found, use empty dict
521
+ return {
522
+ "tool": tool_name,
523
+ "parameters": {}
524
+ }
525
+
526
+ def execute_reasoning_cycle(self, query: str, max_iterations: int = 5) -> str:
527
+ """
528
+ Execute a complete Think-Act-Observe reasoning cycle
529
+
530
+ Args:
531
+ query: The user's query or task
532
+ max_iterations: Maximum number of iterations
533
+
534
+ Returns:
535
+ Final answer to the query
536
+ """
537
+ # Store the query in memory
538
+ self.memory_manager.add_to_short_term({
539
+ "type": "query",
540
+ "content": query,
541
+ "timestamp": datetime.now().isoformat()
542
+ })
543
+
544
+ # Initialize the workflow
545
+ iteration = 0
546
+ final_answer = None
547
+ all_results = []
548
+
549
+ while iteration < max_iterations:
550
+ print(f"Iteration {iteration + 1}/{max_iterations}")
551
+
552
+ # Think
553
+ print("Thinking...")
554
+ plan = self.think(query)
555
+
556
+ # Act
557
+ print("Acting...")
558
+ previous_results = "\n".join([r.get("result", "") for r in all_results])
559
+ action_result = self.act(plan, query, previous_results)
560
+ all_results.append(action_result)
561
+
562
+ # Observe
563
+ print("Observing...")
564
+ observation = self.observe(action_result, plan, query)
565
+
566
+ # Check if we have a final answer
567
+ if not observation["continue"]:
568
+ # Generate final answer
569
+ final_answer_prompt = f"""
570
+ TASK: {query}
571
+
572
+ REASONING PROCESS:
573
+ {plan.get('raw_response', 'No thinking process available.')}
574
+
575
+ ACTIONS TAKEN:
576
+ {', '.join([f"{r.get('tool', 'unknown')}({r.get('parameters', '')})" for r in all_results])}
577
+
578
+ RESULTS:
579
+ {previous_results}
580
+ {action_result.get('result', '')}
581
+
582
+ OBSERVATION:
583
+ {observation.get('raw_response', 'No observation available.')}
584
+
585
+ Based on all the above, provide a comprehensive final answer to the original task.
586
+ """
587
+ final_answer = self.agent.chat(final_answer_prompt)
588
+
589
+ # Store the final answer in long-term memory
590
+ self.memory_manager.add_to_long_term({
591
+ "type": "final_answer",
592
+ "query": query,
593
+ "content": final_answer,
594
+ "timestamp": datetime.now().isoformat(),
595
+ "importance": 0.9 # Very high importance
596
+ })
597
+
598
+ break
599
+
600
+ # Update the query with the observation for the next iteration
601
+ query = f"""
602
+ Original task: {query}
603
+
604
+ Progress so far:
605
+ {observation.get('raw_response', 'No observation available.')}
606
+
607
+ Please continue solving this task.
608
+ """
609
+
610
+ iteration += 1
611
+
612
+ # If we reached max iterations without a final answer
613
+ if final_answer is None:
614
+ final_answer = f"""
615
+ I've spent {max_iterations} iterations trying to solve this task.
616
+ Here's my best answer based on what I've learned:
617
+
618
+ {observation.get('raw_response', 'No final observation available.')}
619
+
620
+ Note: This answer may be incomplete as I reached the maximum number of iterations.
621
+ """
622
+
623
+ # Store the partial answer in long-term memory
624
+ self.memory_manager.add_to_long_term({
625
+ "type": "partial_answer",
626
+ "query": query,
627
+ "content": final_answer,
628
+ "timestamp": datetime.now().isoformat(),
629
+ "importance": 0.6 # Medium importance for partial answers
630
+ })
631
+
632
+ return final_answer
633
+
634
+
635
+ # Example usage
636
+ if __name__ == "__main__":
637
+ # This would be imported from your agent.py
638
+ from smolagents import Agent, InferenceClientModel, Tool
639
+
640
+ # Mock agent for testing
641
+ class MockAgent:
642
+ def __init__(self):
643
+ self.tools = [
644
+ Tool(name="web_search", description="Search the web", function=lambda x: f"Search results for: {x}"),
645
+ Tool(name="calculator", description="Calculate", function=lambda x: f"Result: {eval(x)}")
646
+ ]
647
+
648
+ def chat(self, message):
649
+ return f"Response to: {message[:50]}..."
650
+
651
+ # Mock memory manager
652
+ class MockMemoryManager:
653
+ def add_to_short_term(self, item):
654
+ print(f"Added to short-term: {item['type']}")
655
+
656
+ def add_to_long_term(self, item):
657
+ print(f"Added to long-term: {item['type']}")
658
+
659
+ def get_relevant_memories(self, query):
660
+ return []
661
+
662
+ # Test the reasoning system
663
+ agent = MockAgent()
664
+ memory_manager = MockMemoryManager()
665
+ reasoning = ReasoningSystem(agent, memory_manager)
666
+
667
+ result = reasoning.execute_reasoning_cycle("What is 2+2?")
668
+ print(f"\nFinal result: {result}")