datdevsteve commited on
Commit
83a50ac
·
verified ·
1 Parent(s): 47f3b3f

Upload gaia_agent.py

Browse files
Files changed (1) hide show
  1. gaia_agent.py +161 -0
gaia_agent.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ from langchain.agents import create_agent
4
+ from langchain.tools import tool
5
+ from dotenv import load_dotenv
6
+ from langchain_community.document_loaders import ArxivLoader, WikipediaLoader
7
+ from ddgs import DDGS
8
+ from bs4 import BeautifulSoup
9
+
10
+ # Load environment variables
11
+ load_dotenv()
12
+
13
+ # --- Agent Setup ---
14
+ openai_key = os.getenv("OPENAI_API_KEY")
15
+ googleai_key = os.getenv("GOOGLE_API_KEY")
16
+
17
+ # Use OpenRouter via LangChain's ChatOpenAI
18
+ openrouter_key = os.getenv("OPENROUTER_API_KEY")
19
+ if not openrouter_key:
20
+ raise RuntimeError("Set OPENROUTER_API_KEY in your .env (OpenRouter API key)")
21
+
22
+ # Defer ChatOpenAI import until runtime to avoid import-time errors in environments without the package
23
+ from langchain_openai import ChatOpenAI
24
+
25
+ model = ChatOpenAI(
26
+ api_key=openrouter_key,
27
+ base_url="https://openrouter.ai/api/v1",
28
+ model="gpt-4o-mini",
29
+ max_completion_tokens=10000,
30
+ )
31
+
32
+ # --- Tools Definition ---
33
+ @tool
34
+ def multiply(a: int, b: int) -> int:
35
+ return a * b
36
+
37
+ @tool
38
+ def add(a: int, b: int) -> int:
39
+ return a + b
40
+
41
+ @tool
42
+ def subtract(a: int, b: int) -> int:
43
+ return a - b
44
+
45
+ @tool
46
+ def divide(a: int, b: int) -> float:
47
+ if b == 0:
48
+ raise ValueError("Cannot divide by zero.")
49
+ return a / b
50
+
51
+ @tool
52
+ def modulus(a: int, b: int) -> int:
53
+ return a % b
54
+
55
+ @tool
56
+ def wiki_search(query: str) -> str:
57
+ """Search Wikipedia for a query and return maximum 2 results."""
58
+ search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
59
+ formatted_search_docs = "\n\n---\n\n".join(
60
+ [
61
+ f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
62
+ for doc in search_docs
63
+ ]
64
+ )
65
+ return formatted_search_docs
66
+
67
+ @tool
68
+ def web_search(query: str) -> str:
69
+ """Search DDGS for a query and return maximum 3 results."""
70
+ search_docs = DDGS().text(query, max_results=3)
71
+ formatted_search_docs = "\n\n---\n\n".join(
72
+ [
73
+ f'Title:{doc["title"]}\nContent:{doc["body"]}\n--\n'
74
+ for doc in search_docs
75
+ ]
76
+ )
77
+ return formatted_search_docs
78
+
79
+ @tool
80
+ def arxiv_search(query: str) -> str:
81
+ """Search arXiv for a query and return maximum 3 results."""
82
+ search_docs = ArxivLoader(query=query, load_max_docs=3).load()
83
+ formatted_search_docs = "\n\n---\n\n".join(
84
+ [
85
+ f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
86
+ for doc in search_docs
87
+ ]
88
+ )
89
+ return formatted_search_docs
90
+
91
+ @tool
92
+ def image_search(query: str) -> str:
93
+ """Searches DDGS for an image query and returns maximum 10 image results"""
94
+ search_images = DDGS().images(query=query)
95
+ formatted_result = "\n\n---\n\n".join(
96
+ [
97
+ f'Image Title:{image["title"]}\nImage URL: {image["url"]}'
98
+ for image in search_images
99
+ ]
100
+ )
101
+ return formatted_result
102
+
103
+ @tool
104
+ def fetch_url_content(url: str) -> str:
105
+ """Fetch and return the text content from a webpage URL."""
106
+ try:
107
+ response = requests.get(url, timeout=10)
108
+ response.raise_for_status()
109
+ soup = BeautifulSoup(response.text, 'html.parser')
110
+ for script in soup(["script", "style"]):
111
+ script.decompose()
112
+ text = soup.get_text(separator='\n', strip=True)
113
+ return text[:2000] + ("..." if len(text) > 2000 else "")
114
+ except Exception as e:
115
+ return f"Error fetching URL: {str(e)}"
116
+
117
+ # Tools list
118
+ tools = [
119
+ multiply, add, subtract, divide, modulus,
120
+ wiki_search, web_search, arxiv_search, image_search,
121
+ fetch_url_content,
122
+ ]
123
+
124
+ # System prompt
125
+ sys_prompt = """You are a helpful agent, please provide clear and concise answers to asked questions.
126
+ Keep your word limit for answers as minimum as you can. You are equipped with the following tools:
127
+ 1. [multiply], [add], [subtract], [divide], [modulus] - basic calculator operations.
128
+ 2. [wiki_search] - search Wikipedia and return up to 2 documents as text.
129
+ 3. [web_search] - perform a web search and return up to 3 documents as text.
130
+ 4. [arxiv_search] - search arXiv and return up to 3 documents as text.
131
+ 5. [image_search] - Searches the internet for an image query and returns maximum 10 image results
132
+
133
+ Under any circumstances, if you fail to provide the accurate answer expected by the user, you may say the same to the user and provide a similar answer which is approximately the closest. Disregard spelling mistakes and provide answer with results retreived from the correct spelling.
134
+
135
+ For every tool you use, append a single line at the end of your response exactly in this format:
136
+ [TOOLS USED: (tool_name)]
137
+ When no tools are used, append:
138
+ [TOOLS USED WERE NONE]
139
+ """
140
+
141
+ class GAIAAgent:
142
+ def __init__(self):
143
+ # create internal agent
144
+ try:
145
+ self.agent = create_agent(model, tools=tools, system_prompt=sys_prompt)
146
+ except Exception as e:
147
+ raise
148
+
149
+ def __call__(self, question: str) -> str:
150
+ result = self.agent.invoke({"messages": [{"role": "user", "content": question}]})
151
+ raw_content = result["messages"][-1].content
152
+ if isinstance(raw_content, list) and len(raw_content) > 0:
153
+ if isinstance(raw_content[0], dict) and 'text' in raw_content[0]:
154
+ answer = raw_content[0]['text']
155
+ else:
156
+ answer = str(raw_content)
157
+ elif isinstance(raw_content, str):
158
+ answer = raw_content
159
+ else:
160
+ answer = str(raw_content)
161
+ return answer