AlaaWO commited on
Commit
9ac16fa
·
verified ·
1 Parent(s): 81917a3

Upload agent.py

Browse files

a langgraph agent, made through a HuggingFace Endpoint, of a LLama-Instruct model

Files changed (1) hide show
  1. agent.py +375 -0
agent.py ADDED
@@ -0,0 +1,375 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """langgraph ReAct LLAMA instruct agent"""
2
+ from dotenv import load_dotenv
3
+ import os
4
+ from typing import TypedDict, List, Dict, Any, Optional
5
+ from langchain_tavily import TavilySearch
6
+ from langchain_core.tools import tool
7
+ import requests
8
+ from urllib.parse import urlparse
9
+ from langgraph.graph import START, StateGraph, MessagesState
10
+ from langgraph.prebuilt import tools_condition,ToolNode
11
+ from langchain_core.messages import SystemMessage, HumanMessage
12
+ from langchain.schema import HumanMessage, SystemMessage
13
+ import json
14
+ from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
15
+ from langchain.agents import initialize_agent
16
+ from langchain.agents.agent_types import AgentType
17
+ from IPython.display import Image, display
18
+ import pandas as pd
19
+ from langchain_community.document_loaders import WikipediaLoader
20
+ from langchain_community.document_loaders import ArxivLoader
21
+ import sympy
22
+ from sympy import sympify
23
+
24
+ load_dotenv()
25
+
26
+ @tool
27
+ def arvix_search(query: str) -> str:
28
+ """
29
+ Search Arxiv for a query and return up to 3 results.
30
+
31
+ Args:
32
+ query: The search query.
33
+
34
+ Returns:
35
+ A string with formatted Arxiv search results (truncated to 1000 chars each).
36
+ """
37
+ search_docs = ArxivLoader(query=query, load_max_docs=3).load()
38
+ formatted_search_docs = "\n\n---\n\n".join(
39
+ [
40
+ f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
41
+ for doc in search_docs
42
+ ]
43
+ )
44
+ return formatted_search_docs
45
+
46
+ @tool
47
+ def wiki_search(query: str) -> str:
48
+ """
49
+ Search Wikipedia for a query and return up to 2 formatted results.
50
+
51
+ Args:
52
+ query: The search query.
53
+
54
+ Returns:
55
+ A string with formatted Wikipedia search results.
56
+ """
57
+ search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
58
+ formatted_search_docs = "\n\n---\n\n".join(
59
+ [
60
+ f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
61
+ for doc in search_docs
62
+ ]
63
+ )
64
+ return formatted_search_docs
65
+
66
+ @tool
67
+ def analyze_excel_file(input_str: str) -> str:
68
+ """
69
+ Analyze an Excel file using pandas and answer a question about it.
70
+
71
+ Args:
72
+ input_str: JSON string with fields:
73
+ - file_path: Path to the Excel file
74
+ - query: A question about the file contents (optional)
75
+
76
+ Returns:
77
+ A summary of the file contents or an error message.
78
+ """
79
+ try:
80
+ import json
81
+ import pandas as pd
82
+
83
+ # Parse JSON input
84
+ data = json.loads(input_str)
85
+ file_path = data.get("file_path")
86
+ query = data.get("query")
87
+
88
+ if not file_path:
89
+ return "Error: 'file_path' is required."
90
+
91
+ # Read the Excel file (all sheets)
92
+ xls = pd.ExcelFile(file_path)
93
+ sheet_names = xls.sheet_names
94
+
95
+ result = f"Excel file loaded with sheets: {', '.join(sheet_names)}.\n\n"
96
+
97
+ # Analyze the first sheet as default
98
+ df = pd.read_excel(xls, sheet_name=sheet_names[0])
99
+
100
+ result += f"First sheet '{sheet_names[0]}' loaded with {len(df)} rows and {len(df.columns)} columns.\n"
101
+ result += f"Columns: {', '.join(df.columns)}\n\n"
102
+ result += "Summary statistics:\n"
103
+ result += str(df.describe(include='all'))
104
+
105
+ if query:
106
+ result += f"\n\nQuery: {query} (No advanced query handling implemented yet.)"
107
+
108
+ return result
109
+
110
+ except json.JSONDecodeError:
111
+ return "Error: Input must be a valid JSON string with 'file_path' and optional 'query'."
112
+ except Exception as e:
113
+ return f"Error analyzing Excel file: {str(e)}"
114
+
115
+ @tool
116
+ def web_search(query: str) -> str:
117
+ """
118
+ Perform a web search using Tavily and return the result.
119
+ """
120
+ try:
121
+ search = TavilySearch()
122
+ result = search.invoke(query)
123
+
124
+ if isinstance(result, dict) and "results" in result:
125
+ docs = result["results"]
126
+ return "\n\n---\n\n".join(
127
+ [f"{doc['title']}\n{doc['url']}\n{doc['content']}" for doc in docs]
128
+ )
129
+ else:
130
+ return f"Error: Unexpected Tavily response format: {result}"
131
+ except Exception as e:
132
+ return f"Error using TavilySearch: {str(e)}"
133
+
134
+ @tool
135
+ def analyze_csv_file(input_str: str) -> str:
136
+ """
137
+ Analyze a CSV file using pandas and answer a question about it.
138
+
139
+ Args:
140
+ input_str: JSON string with fields:
141
+ - file_path: Path to the CSV file
142
+ - query: A question about the file contents
143
+
144
+ Returns:
145
+ A basic analysis of the file or an error message
146
+ """
147
+ try:
148
+
149
+ # Parse the JSON string
150
+ data = json.loads(input_str)
151
+ file_path = data.get("file_path")
152
+ query = data.get("query")
153
+
154
+ if not file_path:
155
+ return "Error: 'file_path' is required."
156
+
157
+ # Read the CSV
158
+ df = pd.read_csv(file_path)
159
+
160
+ # Basic metadata
161
+ result = f"CSV file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
162
+ result += f"Columns: {', '.join(df.columns)}\n\n"
163
+ result += "Summary statistics:\n"
164
+ result += str(df.describe(include='all', datetime_is_numeric=True))
165
+
166
+ # Optionally handle a query (not implemented in detail here)
167
+ if query:
168
+ result += f"\n\nQuery: {query} (No logic implemented yet to answer it.)"
169
+
170
+ return result
171
+
172
+ except json.JSONDecodeError:
173
+ return "Error: Input must be a valid JSON string with 'file_path' and optional 'query'."
174
+ except Exception as e:
175
+ return f"Error analyzing CSV file: {str(e)}"
176
+
177
+
178
+ @tool
179
+ def download_file_from_url(input_str: str) -> str:
180
+ """
181
+ Downloads a file from a URL and saves it in the 'saved_files' directory.
182
+
183
+ Args:
184
+ input_str (str): A JSON string with keys:
185
+ - "url": the URL to download from (required)
186
+ - "filename": optional filename to save as
187
+
188
+ Returns:
189
+ A message indicating success and file path, or an error message.
190
+ """
191
+ try:
192
+ # Parse the input string
193
+ data = json.loads(input_str)
194
+ url = data.get("url")
195
+ filename = data.get("filename", None)
196
+
197
+ if not url:
198
+ return "Error: 'url' is required in the input JSON."
199
+
200
+ # Create directory if not exists
201
+ new_dir = os.path.join(os.getcwd(), "saved_files")
202
+ os.makedirs(new_dir, exist_ok=True)
203
+
204
+ # Generate filename if not provided
205
+ if not filename:
206
+ path = urlparse(url).path
207
+ filename = os.path.basename(path) or f"downloaded_{os.urandom(4).hex()}"
208
+
209
+ filepath = os.path.join(new_dir, filename)
210
+
211
+ # Download the file
212
+ response = requests.get(url, stream=True)
213
+ response.raise_for_status()
214
+
215
+ # Save the file
216
+ with open(filepath, 'wb') as f:
217
+ for chunk in response.iter_content(chunk_size=8192):
218
+ f.write(chunk)
219
+
220
+ return f"File downloaded to {filepath}. You can now process this file."
221
+
222
+ except json.JSONDecodeError:
223
+ return "Error: Invalid JSON input. Expected format: {\"url\": \"...\", \"filename\": \"optional_name\"}"
224
+ except Exception as e:
225
+ return f"Error: {str(e)}"
226
+
227
+ @tool
228
+ def find_file_for_question(input_str: str) -> str:
229
+ """
230
+ Constructs a multimodal question prompt for the agent to answer.
231
+
232
+ Args:
233
+ input_str (str): JSON string with keys:
234
+ - task_id: ID of the file
235
+ - question: The actual question
236
+ - file_name: (optional) file name, if image is involved
237
+
238
+ Returns:
239
+ A full natural language prompt that includes the file URL if needed.
240
+ """
241
+ try:
242
+ data = json.loads(input_str)
243
+ task_id = data.get("task_id")
244
+ question = data.get("question")
245
+ file_name = data.get("file_name")
246
+
247
+ if not task_id or not question:
248
+ return "Error: Missing 'task_id' or 'question' in input."
249
+
250
+ prompt = question
251
+
252
+ if file_name:
253
+ file_url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
254
+ prompt += f"\n\nImage file to consider: {file_url}"
255
+
256
+ return prompt
257
+
258
+ except json.JSONDecodeError:
259
+ return "Error: Invalid input. Provide JSON with 'task_id', 'question', and optional 'file_name'."
260
+ except Exception as e:
261
+ return f"Error: {str(e)}"
262
+
263
+ @tool
264
+ def calculate_math_expression(expr: str) -> str:
265
+ """
266
+ Evaluate a symbolic math expression (e.g., algebraic, numeric, or arithmetic).
267
+
268
+ Use this tool if the input is a math expression like '2 + 3*sqrt(4)', 'sin(pi/2)', or '3 ** 2'.
269
+
270
+ Input:
271
+ A raw string expression. Example: '2 + 3 * sqrt(4)'
272
+
273
+ Returns:
274
+ A float result as a string if successful,
275
+ otherwise a string with the error message.
276
+ """
277
+ try:
278
+ result = sympify(expr)
279
+ # Check if the result is an actual sympy object with evalf
280
+ if hasattr(result, "evalf"):
281
+ return str(result.evalf())
282
+ else:
283
+ return str(result) # Already a number or something that can't be evaluated further
284
+ except Exception as e:
285
+ return f"Error: {str(e)}"
286
+
287
+ class AgentState(TypedDict):
288
+ messages: str # The original input question
289
+ attachments: Dict[str, Any] # Attachments (e.g., images, files) related to the question
290
+ context: List[Dict] # Retrieved context (e.g., search results, documents)
291
+ reasoning: List[str] # Step-by-step reasoning traces
292
+ partial_answer: Optional[str] # Intermediate answer (if multi-step)
293
+ final_answer: Optional[str] # Final answer to return
294
+ tools_used: List[str] # Track which tools were called (for debugging)
295
+
296
+ tools = [
297
+ find_file_for_question,
298
+ analyze_excel_file,
299
+ analyze_csv_file,
300
+ web_search,
301
+ arvix_search,
302
+ wiki_search,
303
+ download_file_from_url,
304
+ calculate_math_expression]
305
+
306
+
307
+ # Build graph function
308
+ def build_graph():
309
+ """Build the graph"""
310
+ llm = HuggingFaceEndpoint(
311
+ repo_id="meta-llama/Llama-4-Scout-17B-16E-Instruct",
312
+ temperature= 0,
313
+ provider="novita",
314
+
315
+ )
316
+
317
+ chat_model = ChatHuggingFace(llm=llm)
318
+
319
+ agent = initialize_agent(
320
+ tools=tools,
321
+ llm=chat_model,
322
+ agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
323
+ verbose=True,
324
+ handle_parsing_errors=True
325
+ )
326
+
327
+ def assistant(state: AgentState):
328
+ system_prompt = f"""
329
+ You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
330
+ YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
331
+ If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
332
+ If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
333
+ If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
334
+ """
335
+ sys_msg = SystemMessage(content= system_prompt)
336
+
337
+ return {
338
+ "messages": [agent.invoke({"input": [sys_msg] + state["messages"]})],
339
+ }
340
+
341
+ builder = StateGraph(AgentState)
342
+
343
+ # Define nodes: these do the work
344
+ builder.add_node("assistant", assistant)
345
+ builder.add_node("tools", ToolNode(tools))
346
+
347
+ # Define edges: these determine how the control flow moves
348
+ builder.add_edge(START, "assistant")
349
+ builder.add_conditional_edges(
350
+ "assistant",
351
+ # If the latest message requires a tool, route to tools
352
+ # Otherwise, provide a direct response
353
+ tools_condition,
354
+ )
355
+ builder.add_edge("tools", "assistant")
356
+
357
+ return builder.compile()
358
+
359
+
360
+ if __name__ == "__main__":
361
+ #test the agent with a sample question
362
+ question = "what was the first university in the world?"
363
+ messages = [HumanMessage(content=question)]
364
+ output = build_graph().invoke({"messages": messages})
365
+ #print out the response
366
+ for entry in output["messages"]:
367
+ for msg in entry["input"]:
368
+ if isinstance(msg, HumanMessage):
369
+ print("🧑 Human:", msg.content)
370
+ elif isinstance(msg, SystemMessage):
371
+ print("⚙️ System:", msg.content)
372
+ print("🤖 Output:", entry["output"])
373
+ print("-" * 50)
374
+
375
+