Tingusto commited on
Commit
2f65b93
·
1 Parent(s): 6eb1e80

Build agent & graph

Browse files
Files changed (1) hide show
  1. agent.py +125 -160
agent.py CHANGED
@@ -1,27 +1,90 @@
1
  import os
2
- from typing import Dict, List, Optional
3
  from dotenv import load_dotenv
 
 
 
4
  from langchain_groq import ChatGroq
5
- from langchain_core.messages import SystemMessage, HumanMessage
6
  from langchain_community.document_loaders import WikipediaLoader
7
  from langchain_community.document_loaders import ArxivLoader
8
- import json
 
9
  import requests
10
  from bs4 import BeautifulSoup
11
  import urllib.parse
12
- import pandas as pd
13
- import re
14
 
15
  load_dotenv()
16
 
17
- class BasicAgent:
18
- def __init__(self):
19
- self.llm = ChatGroq(
20
- model="meta-llama/llama-4-maverick-17b-128e-instruct",
21
- temperature=0.1
22
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
- self.system_prompt = """You are a highly accurate question-answering assistant. Your task is to provide precise, direct answers to questions.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  Key Rules:
27
  1. Answer Format:
@@ -55,152 +118,54 @@ Key Rules:
55
  - For scientific terms: Use the standard scientific notation
56
  - For geographical locations: Use official names without abbreviations
57
  - For audio/video questions: Focus on the specific detail requested"""
58
-
59
- # Initialize tools
60
- self.tools = [
61
- self.wiki_search,
62
- self.web_search,
63
- self.arxiv_search
64
- ]
65
-
66
- def wiki_search(self, query: str) -> str:
67
- """Search Wikipedia for information."""
68
- try:
69
- search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
70
- return "\n".join([doc.page_content for doc in search_docs])
71
- except Exception as e:
72
- return f"Error searching Wikipedia: {str(e)}"
73
-
74
- def web_search(self, query: str) -> str:
75
- """Search the web using DuckDuckGo."""
76
- try:
77
- encoded_query = urllib.parse.quote(query)
78
- url = f"https://html.duckduckgo.com/html/?q={encoded_query}"
79
-
80
- headers = {
81
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
82
- }
83
-
84
- response = requests.get(url, headers=headers)
85
- response.raise_for_status()
86
-
87
- soup = BeautifulSoup(response.text, 'html.parser')
88
-
89
- results = []
90
- for result in soup.find_all('div', class_='result__body'):
91
- title = result.find('h2', class_='result__title')
92
- snippet = result.find('a', class_='result__snippet')
93
-
94
- if title and snippet:
95
- results.append(f"Title: {title.get_text()}\nSnippet: {snippet.get_text()}")
96
-
97
- if len(results) >= 3:
98
- break
99
-
100
- return "\n\n".join(results) if results else "No results found"
101
-
102
- except Exception as e:
103
- return f"Error searching web: {str(e)}"
104
-
105
- def arxiv_search(self, query: str) -> str:
106
- """Search Arxiv for scientific papers."""
107
- try:
108
- search_docs = ArxivLoader(query=query, load_max_docs=2).load()
109
- return "\n".join([doc.page_content[:1000] for doc in search_docs])
110
- except Exception as e:
111
- return f"Error searching Arxiv: {str(e)}"
112
-
113
- def process_file(self, file_name: str, question: str) -> str:
114
- """Process different types of files based on extension."""
115
- try:
116
- if not file_name:
117
- return "No file provided"
118
-
119
- file_ext = file_name.split('.')[-1].lower()
120
-
121
- if file_ext == 'xlsx':
122
- df = pd.read_excel(file_name)
123
- return f"Excel file loaded with {len(df)} rows"
124
-
125
- elif file_ext == 'mp3':
126
- return "Audio file detected - requires speech processing"
127
-
128
- elif file_ext == 'png':
129
- return "Image file detected - requires image processing"
130
-
131
- elif file_ext == 'py':
132
- with open(file_name, 'r') as f:
133
- code = f.read()
134
- return f"Python code loaded: {len(code)} characters"
135
-
136
- else:
137
- return f"Unsupported file type: {file_ext}"
138
-
139
- except Exception as e:
140
- return f"Error processing file: {str(e)}"
141
 
142
- def __call__(self, question: str, file_name: str = None) -> str:
143
- try:
144
- if question.startswith('.'):
145
- question = question[::-1]
146
-
147
- file_info = ""
148
- if file_name:
149
- file_info = self.process_file(file_name, question)
150
-
151
- analysis_prompt = f"""Analyze this question and determine its type and required format:
152
- Question: {question}
153
- File Info: {file_info}
154
- Provide a JSON response with:
155
- 1. question_type: (number/text/list/date/name/multiple_choice/file_processing)
156
- 2. required_format: (specific format requirements)
157
- 3. key_terms: (important terms to search for)
158
- 4. file_processing_needed: (true/false)"""
159
-
160
- analysis_messages = [
161
- SystemMessage(content="You are a question analyzer. Provide a JSON response."),
162
- HumanMessage(content=analysis_prompt)
163
- ]
164
-
165
- analysis = self.llm.invoke(analysis_messages)
166
- try:
167
- analysis_data = json.loads(analysis.content)
168
- except:
169
- analysis_data = {
170
- "question_type": "text",
171
- "required_format": "direct",
172
- "key_terms": question,
173
- "file_processing_needed": bool(file_name)
174
- }
175
-
176
- messages = [
177
- SystemMessage(content=self.system_prompt),
178
- HumanMessage(content=f"""Question Type: {analysis_data['question_type']}
179
- Required Format: {analysis_data['required_format']}
180
- Key Terms: {analysis_data['key_terms']}
181
- File Processing: {analysis_data.get('file_processing_needed', False)}
182
-
183
- Question: {question}""")
184
- ]
185
-
186
- response = self.llm.invoke(messages)
187
-
188
- answer = response.content.strip()
189
-
190
- if answer.lower().startswith("final answer:"):
191
- answer = answer[len("final answer:"):].strip()
192
-
193
- if analysis_data['question_type'] == 'number':
194
- answer = ''.join(c for c in answer if c.isdigit() or c in '.-')
195
- elif analysis_data['question_type'] == 'list':
196
- answer = ','.join(item.strip() for item in answer.split(','))
197
- elif analysis_data['question_type'] == 'country_code':
198
- answer = answer[:3].upper()
199
- elif analysis_data['question_type'] == 'chess_move':
200
- answer = re.sub(r'[^a-h1-8x+=#]', '', answer)
201
-
202
- return answer
203
-
204
- except Exception as e:
205
- print(f"Error in agent response: {e}")
206
- return f"Error processing question: {str(e)}"
 
1
  import os
 
2
  from dotenv import load_dotenv
3
+ from langgraph.graph import START, StateGraph, MessagesState
4
+ from langgraph.prebuilt import tools_condition
5
+ from langgraph.prebuilt import ToolNode
6
  from langchain_groq import ChatGroq
 
7
  from langchain_community.document_loaders import WikipediaLoader
8
  from langchain_community.document_loaders import ArxivLoader
9
+ from langchain_core.messages import SystemMessage, HumanMessage
10
+ from langchain_core.tools import tool
11
  import requests
12
  from bs4 import BeautifulSoup
13
  import urllib.parse
 
 
14
 
15
  load_dotenv()
16
 
17
+ @tool
18
+ def wiki_search(query: str) -> str:
19
+ """Search Wikipedia for information.
20
+
21
+ Args:
22
+ query: The search query."""
23
+ try:
24
+ search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
25
+ formatted_search_docs = "\n\n---\n\n".join(
26
+ [
27
+ f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
28
+ for doc in search_docs
29
+ ])
30
+ return {"wiki_results": formatted_search_docs}
31
+ except Exception as e:
32
+ return f"Error searching Wikipedia: {str(e)}"
33
+
34
+ @tool
35
+ def web_search(query: str) -> str:
36
+ """Search the web using DuckDuckGo.
37
+
38
+ Args:
39
+ query: The search query."""
40
+ try:
41
+ encoded_query = urllib.parse.quote(query)
42
+ url = f"https://html.duckduckgo.com/html/?q={encoded_query}"
43
+
44
+ headers = {
45
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
46
+ }
47
 
48
+ response = requests.get(url, headers=headers)
49
+ response.raise_for_status()
50
+
51
+ soup = BeautifulSoup(response.text, 'html.parser')
52
+
53
+ results = []
54
+ for result in soup.find_all('div', class_='result__body'):
55
+ title = result.find('h2', class_='result__title')
56
+ snippet = result.find('a', class_='result__snippet')
57
+
58
+ if title and snippet:
59
+ results.append(f"Title: {title.get_text()}\nSnippet: {snippet.get_text()}")
60
+
61
+ if len(results) >= 3:
62
+ break
63
+
64
+ return {"web_results": "\n\n".join(results) if results else "No results found"}
65
+
66
+ except Exception as e:
67
+ return f"Error searching web: {str(e)}"
68
+
69
+ @tool
70
+ def arxiv_search(query: str) -> str:
71
+ """Search Arxiv for scientific papers.
72
+
73
+ Args:
74
+ query: The search query."""
75
+ try:
76
+ search_docs = ArxivLoader(query=query, load_max_docs=2).load()
77
+ formatted_search_docs = "\n\n---\n\n".join(
78
+ [
79
+ f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
80
+ for doc in search_docs
81
+ ])
82
+ return {"arxiv_results": formatted_search_docs}
83
+ except Exception as e:
84
+ return f"Error searching Arxiv: {str(e)}"
85
+
86
+ # System prompt
87
+ system_prompt = """You are a highly accurate question-answering assistant. Your task is to provide precise, direct answers to questions.
88
 
89
  Key Rules:
90
  1. Answer Format:
 
118
  - For scientific terms: Use the standard scientific notation
119
  - For geographical locations: Use official names without abbreviations
120
  - For audio/video questions: Focus on the specific detail requested"""
121
+
122
+ # System message
123
+ sys_msg = SystemMessage(content=system_prompt)
124
+
125
+ # Tools list
126
+ tools = [
127
+ wiki_search,
128
+ web_search,
129
+ arxiv_search,
130
+ ]
131
+
132
+ def build_graph():
133
+ """Build the graph"""
134
+ # Initialize Groq LLM
135
+ llm = ChatGroq(
136
+ model="meta-llama/llama-4-maverick-17b-128e-instruct",
137
+ temperature=0.1
138
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
+ # Bind tools to LLM
141
+ llm_with_tools = llm.bind_tools(tools)
142
+
143
+ # Node
144
+ def assistant(state: MessagesState):
145
+ """Assistant node"""
146
+ return {"messages": [llm_with_tools.invoke(state["messages"])]}
147
+
148
+ # Build graph
149
+ builder = StateGraph(MessagesState)
150
+ builder.add_node("assistant", assistant)
151
+ builder.add_node("tools", ToolNode(tools))
152
+ builder.add_edge(START, "assistant")
153
+ builder.add_conditional_edges(
154
+ "assistant",
155
+ tools_condition,
156
+ )
157
+ builder.add_edge("tools", "assistant")
158
+
159
+ # Compile graph
160
+ return builder.compile()
161
+
162
+ # Test
163
+ if __name__ == "__main__":
164
+ question = "When was a picture of St. Thomas Aquinas first added to the Wikipedia page on the Principle of double effect?"
165
+ # Build the graph
166
+ graph = build_graph()
167
+ # Run the graph
168
+ messages = [HumanMessage(content=question)]
169
+ messages = graph.invoke({"messages": messages})
170
+ for m in messages["messages"]:
171
+ m.pretty_print()