carolinacon commited on
Commit
778116a
·
1 Parent(s): 2e8bb22

Added logic for audio, images, excel and python code

Browse files
app.py CHANGED
@@ -2,8 +2,10 @@ import os
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
 
5
 
6
- from core.agent import GaiaAgent
 
7
 
8
  # (Keep Constants as is)
9
  # --- Constants ---
@@ -15,12 +17,46 @@ class BasicAgent:
15
  agent: GaiaAgent
16
  def __init__(self):
17
  self.agent = GaiaAgent()
18
- def __call__(self, question: str) -> str:
19
  print(f"Agent received question (first 50 chars): {question[:50]}...")
20
- answer = self.agent.__call__(question)
 
 
 
21
  print(f"Agent returning fixed answer: {answer}")
22
  return answer
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  def run_and_submit_all( profile: gr.OAuthProfile | None):
25
  """
26
  Fetches all questions, runs the BasicAgent on them, submits all answers,
@@ -78,11 +114,14 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
78
  for item in questions_data:
79
  task_id = item.get("task_id")
80
  question_text = item.get("question")
 
 
 
81
  if not task_id or question_text is None:
82
  print(f"Skipping item with missing task_id or question: {item}")
83
  continue
84
  try:
85
- submitted_answer = agent(question_text)
86
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
87
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
88
  except Exception as e:
@@ -98,7 +137,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
98
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
99
  print(status_update)
100
 
101
- # 5. Submit
102
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
103
  try:
104
  response = requests.post(submit_url, json=submission_data, timeout=60)
 
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
5
+ from pathlib import Path
6
 
7
+
8
+ from core.agent import GaiaAgent, Attachment
9
 
10
  # (Keep Constants as is)
11
  # --- Constants ---
 
17
  agent: GaiaAgent
18
  def __init__(self):
19
  self.agent = GaiaAgent()
20
+ def __call__(self, question: str, attached_content: bytes|None, attached_file_name: str|None ) -> str:
21
  print(f"Agent received question (first 50 chars): {question[:50]}...")
22
+ attachment = None
23
+ if attached_content:
24
+ attachment =Attachment(attached_content, attached_file_name)
25
+ answer = self.agent.__call__(question, attachment)
26
  print(f"Agent returning fixed answer: {answer}")
27
  return answer
28
 
29
+
30
+ def get_question_attached_file(task_id, file_name) -> bytes:
31
+ api_url = DEFAULT_API_URL
32
+ attachment_url = f"{api_url}/files/{task_id}"
33
+
34
+ print(f"Fetching attachment from: {attachment_url}")
35
+
36
+ try:
37
+ response = requests.get(attachment_url, timeout=15)
38
+ response.raise_for_status()
39
+
40
+ print(f"Retrieved {file_name} attachment from: {attachment_url}")
41
+
42
+ # Save to disk
43
+ file_path = Path(f"attachments\{task_id}\{file_name}")
44
+ content = response.content
45
+
46
+ # Create parent directories if they don't exist
47
+ file_path.parent.mkdir(parents=True, exist_ok=True)
48
+
49
+ # Write the file
50
+ file_path.write_bytes(content)
51
+
52
+ return content
53
+
54
+ except Exception as e:
55
+ print(f"An unexpected error occurred fetching attachment for taskid{task_id}: {e}")
56
+
57
+
58
+
59
+
60
  def run_and_submit_all( profile: gr.OAuthProfile | None):
61
  """
62
  Fetches all questions, runs the BasicAgent on them, submits all answers,
 
114
  for item in questions_data:
115
  task_id = item.get("task_id")
116
  question_text = item.get("question")
117
+ attached_file_name = item.get("file_name")
118
+ if attached_file_name:
119
+ file_content = get_question_attached_file(task_id, attached_file_name)
120
  if not task_id or question_text is None:
121
  print(f"Skipping item with missing task_id or question: {item}")
122
  continue
123
  try:
124
+ submitted_answer = agent(question_text, file_content)
125
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
126
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
127
  except Exception as e:
 
137
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
138
  print(status_update)
139
 
140
+ #5. Submit
141
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
142
  try:
143
  response = requests.post(submit_url, json=submission_data, timeout=60)
config/prompts.yaml CHANGED
@@ -3,8 +3,9 @@ prompts:
3
  content: |
4
  You are a general AI assistant tasked with answering complex questions.
5
 
6
-
7
  Make sure you think step by step in order to answer the given question.
 
 
8
 
9
  Here is a summary of the steps you took so far:
10
  <summary>
@@ -61,8 +62,10 @@ prompts:
61
  If the value of chunked_last_tool_call is true, this means that the last tool execution returns a result formed from the concatenation
62
  of multiple chunks.
63
  Current value of the chunked_last_tool_call is {{chunked_last_tool_call}}
 
 
64
  type: base_system
65
- variables: ["summary"]
66
  version: 1.0
67
  description: "Core system prompt for all interactions"
68
  final_answer_processor:
@@ -99,4 +102,12 @@ prompts:
99
  type: question_refinement
100
  variables: ["question"]
101
  version: 1.0
102
- description: "Prompt for evaluating a question"
 
 
 
 
 
 
 
 
 
3
  content: |
4
  You are a general AI assistant tasked with answering complex questions.
5
 
 
6
  Make sure you think step by step in order to answer the given question.
7
+
8
+ {{attachment}}
9
 
10
  Here is a summary of the steps you took so far:
11
  <summary>
 
62
  If the value of chunked_last_tool_call is true, this means that the last tool execution returns a result formed from the concatenation
63
  of multiple chunks.
64
  Current value of the chunked_last_tool_call is {{chunked_last_tool_call}}
65
+
66
+ If you generate python code make sure you print the value of the variable you are interested in.
67
  type: base_system
68
+ variables: ["summary", "chunked_last_tool_call", "attachment"]
69
  version: 1.0
70
  description: "Core system prompt for all interactions"
71
  final_answer_processor:
 
102
  type: question_refinement
103
  variables: ["question"]
104
  version: 1.0
105
+ description: "Prompt for evaluating a question"
106
+ audio_evaluation:
107
+ content: |
108
+ You are an audio analysis assistant. Answer questions based on the provided audio. Be precise and factual.
109
+ If you're asked about facts not present in the given audio, say so.
110
+ type: tool
111
+ variables: []
112
+ version: 1.0
113
+ description: "Prompt for audio tool"
core/agent.py CHANGED
@@ -1,9 +1,11 @@
 
 
1
  from langchain_core.messages import HumanMessage
2
  from langgraph.graph.state import CompiledStateGraph
3
 
4
- from core.edges import file_condition
5
  from core.state import State
6
- from nodes.nodes import assistant, optimize_memory, response_processing, orchestrator
7
  from tools.tavily_tools import llm_tools
8
 
9
  from langgraph.graph import START, StateGraph, END
@@ -19,19 +21,15 @@ class GaiaAgent:
19
  builder = StateGraph(State)
20
 
21
  # Define nodes: these do the work
22
- builder.add_node("orchestrator", orchestrator)
23
  builder.add_node("assistant", assistant)
24
  builder.add_node("tools", ToolNode(llm_tools))
25
  builder.add_node("optimize_memory", optimize_memory)
26
  builder.add_node("response_processing", response_processing)
27
 
28
  # Define edges: these determine how the control flow moves
29
- builder.add_edge(START, "orchestrator")
30
- builder.add_conditional_edges("orchestrator",
31
- # If the question involves a file processing -> file_condition routes to the END state
32
- # If the question does not involve a file processing -> tools_condition routes to
33
- # assistant
34
- file_condition)
35
 
36
  builder.add_conditional_edges(
37
  "assistant",
@@ -46,27 +44,25 @@ class GaiaAgent:
46
  builder.add_edge("response_processing", END)
47
  self.react_graph = builder.compile()
48
 
49
- def __call__(self, question: str) -> str:
50
- messages = [HumanMessage(content=question)]
51
- messages = self.react_graph.invoke({"messages": messages})
52
- # for m in messages['messages']:
53
- # m.pretty_print()
 
 
 
54
 
55
  answer = messages['messages'][-1].content
56
  return answer
57
 
58
- def __streamed_call__(self, question: str) -> str:
59
- # Test the web agent
60
- inputs = {
61
- "messages": [
62
- HumanMessage(
63
- content=question
64
- )
65
- ]
66
- }
67
 
68
  # Stream the web agent's response
69
- for s in self.react_graph.stream(inputs, stream_mode="values"):
70
  message = s["messages"][-1]
71
  if isinstance(message, tuple):
72
  print(message)
 
1
+ from typing import Optional
2
+
3
  from langchain_core.messages import HumanMessage
4
  from langgraph.graph.state import CompiledStateGraph
5
 
6
+ from core.messages import Attachment
7
  from core.state import State
8
+ from nodes.nodes import assistant, optimize_memory, response_processing, pre_processor
9
  from tools.tavily_tools import llm_tools
10
 
11
  from langgraph.graph import START, StateGraph, END
 
21
  builder = StateGraph(State)
22
 
23
  # Define nodes: these do the work
24
+ builder.add_node("pre_processor", pre_processor)
25
  builder.add_node("assistant", assistant)
26
  builder.add_node("tools", ToolNode(llm_tools))
27
  builder.add_node("optimize_memory", optimize_memory)
28
  builder.add_node("response_processing", response_processing)
29
 
30
  # Define edges: these determine how the control flow moves
31
+ builder.add_edge(START, "pre_processor")
32
+ builder.add_edge("pre_processor", "assistant")
 
 
 
 
33
 
34
  builder.add_conditional_edges(
35
  "assistant",
 
44
  builder.add_edge("response_processing", END)
45
  self.react_graph = builder.compile()
46
 
47
+ def __call__(self, question: str, attachment: Optional[Attachment] = None) -> str:
48
+ initial_state = {"messages": [HumanMessage(content=question)]}
49
+ if attachment:
50
+ initial_state["file_reference"] = attachment.file_path
51
+
52
+ messages = self.react_graph.invoke(initial_state)
53
+ # for m in messages['messages']:
54
+ # m.pretty_print()
55
 
56
  answer = messages['messages'][-1].content
57
  return answer
58
 
59
+ def __streamed_call__(self, question: str, attachment: Optional[Attachment] = None) -> str:
60
+ initial_state = {"messages": [HumanMessage(content=question)]}
61
+ if attachment:
62
+ initial_state["file_reference"] = attachment.file_path
 
 
 
 
 
63
 
64
  # Stream the web agent's response
65
+ for s in self.react_graph.stream(initial_state, stream_mode="values"):
66
  message = s["messages"][-1]
67
  if isinstance(message, tuple):
68
  print(message)
core/edges.py DELETED
@@ -1,10 +0,0 @@
1
- from core.state import State
2
- from typing import Literal
3
- from langgraph.graph import END
4
-
5
-
6
- def file_condition(state: State) -> Literal["assistant", END]:
7
- has_attachment = state.get("attachment", "")
8
- if has_attachment == "true":
9
- return END
10
- return "assistant"
 
 
 
 
 
 
 
 
 
 
 
core/messages.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Literal, Optional
2
+ import mimetypes
3
+ import base64
4
+ from pathlib import Path
5
+
6
+
7
+ def get_content_type(mime_type: str) -> Optional[Literal["image", "audio", "video", "document"]]:
8
+ """Extracts content type from MIME type string."""
9
+ if not mime_type:
10
+ return None
11
+
12
+ # Split into type/subtype (e.g., "image/png" → "image")
13
+ main_type = mime_type.split('/')[0].lower()
14
+
15
+ # Map to LangChain content types
16
+ if main_type in ["image", "audio", "file", "text"]:
17
+ return main_type
18
+ elif mime_type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
19
+ return "file"
20
+ raise Exception(f"Cannot extract type from mime_type {mime_type}")
21
+
22
+
23
+ class Attachment:
24
+ content: bytes
25
+ mime_type: str
26
+ file_path: str
27
+ type: str
28
+
29
+ def __init__(self, content: bytes, file_path: str):
30
+ self.content = content
31
+ self.file_path = file_path
32
+ self.mime_type = mimetypes.guess_type(file_path)[0]
33
+ self.type = get_content_type(self.mime_type)
34
+
35
+ def get_encoded_content_b64(self) -> str:
36
+ return base64.b64encode(self.content).decode("utf-8")
37
+
38
+
39
+ class AttachmentHandler:
40
+ def __init__(self, supported_types: list):
41
+ self.supported_types = supported_types
42
+
43
+ def get_attachment_representation(self, attachment: Attachment) -> dict:
44
+ if attachment.type not in self.supported_types:
45
+ raise Exception(f"Invalid attachment type{attachment.type}")
46
+
47
+ if attachment.type == "image":
48
+ return {"type": "image_url",
49
+ "image_url": {"url": f"data:{attachment.mime_type};base64," + attachment.get_encoded_content_b64()}}
50
+
51
+ if attachment.type == "audio":
52
+ return {"type": "text",
53
+ "text": attachment.get_encoded_content_b64()}
54
+ if attachment.type == "text":
55
+ return {"type": attachment.type, "data": attachment.content, "mime_type": attachment.mime_type}
56
+
57
+ # The remaining types are image, file, audio
58
+ return {"type": attachment.type, "source": "base64", "data": attachment.get_encoded_content_b64(),
59
+ "mime_type": attachment.mime_type}
60
+
61
+ def get_representation(self, type: str, content: bytes, format: str, mime_type) -> dict:
62
+ base64_content = base64.b64encode(content).decode("utf-8")
63
+ if type not in self.supported_types:
64
+ raise Exception(f"Invalid attachment type{type}")
65
+ if type == "audio":
66
+ return {"type": "input_audio",
67
+ "input_audio": {"data": base64_content, "format": format}}
68
+ if type == "image":
69
+ return {"type": "image_url",
70
+ "image_url": {"url": f"data:{mime_type};base64," + base64_content}}
71
+
72
+ raise Exception(f"Cannot extract a representation for type {type}")
73
+
74
+ def fetch_file_from_reference(self, file_reference: str) -> bytes:
75
+ """Fetches file bytes from a reference (e.g., S3, local path, URL)."""
76
+
77
+ # It's a local file path
78
+ file = Path(file_reference)
79
+ if file_reference.startswith("/") or file_reference.startswith("./") or file.exists():
80
+ return file.read_bytes()
81
+
82
+
83
+ # Example 3: It's an ID in your database (pseudocode)
84
+ else:
85
+ # file_bytes = database.lookup_file_bytes(file_reference)
86
+ # return file_bytes
87
+ raise ValueError(
88
+ f"Could not resolve file reference: {file_reference}. Implement 'fetch_file_from_reference' for your "
89
+ f"storage system.")
90
+
91
+
92
+ supported_types = ["image", "audio", "file", "text"]
93
+ attachmentHandler = AttachmentHandler(supported_types)
core/state.py CHANGED
@@ -4,5 +4,6 @@ from langgraph.graph import MessagesState
4
  class State(MessagesState):
5
  summary: str
6
  question: str
7
- attachment: str
8
  chunked_last_tool_call: bool
 
 
 
4
  class State(MessagesState):
5
  summary: str
6
  question: str
 
7
  chunked_last_tool_call: bool
8
+ attachment: str
9
+ file_reference: str # Attachment file reference: a path, URL, or unique ID
nodes/nodes.py CHANGED
@@ -1,33 +1,49 @@
1
- from langchain_core.messages import SystemMessage, HumanMessage, AIMessage, RemoveMessage, ToolMessage
 
 
 
 
2
  from langchain_openai import ChatOpenAI
3
 
 
4
  from core.state import State
5
- import time
6
-
7
  from nodes.chunking_node import OversizedContentHandler
 
 
 
8
  from tools.tavily_tools import llm_tools
9
  from utils.prompt_manager import prompt_mgmt
10
 
11
  model = ChatOpenAI(model="gpt-4.1")
12
  response_processing_model = ChatOpenAI(model="gpt-4.1-mini")
13
-
 
 
14
  model = model.bind_tools(llm_tools, parallel_tool_calls=False)
15
 
16
 
17
  # Node
18
- def orchestrator(state: State):
19
  # Get original question if it exists
20
  question = state.get("question", "")
21
  if not question:
22
  question = state["messages"][0].content
23
 
24
- message = prompt_mgmt.render_template("question_evaluation", {"question": question})
 
 
 
 
 
 
 
 
25
 
26
  # Add prompt to our history
27
  messages = [HumanMessage(content=message)]
28
  response = response_processing_model.invoke(messages)
29
  if response.content == "YES":
30
- return {"question": question, "attachment": "true", "messages": [response]}
31
  return {"question": question}
32
 
33
 
@@ -40,9 +56,14 @@ def assistant(state: State):
40
  # Get original question if it exists
41
  question = state.get("question", "")
42
  if not question:
43
- question = state["messages"][0].content
44
-
45
- prompt_params = {"summary": summary, "chunked_last_tool_call": state.get("chunked_last_tool_call", False)}
 
 
 
 
 
46
  sys_msg = SystemMessage(content=prompt_mgmt.render_template("base_system", prompt_params))
47
  try:
48
  response = model.invoke([sys_msg] + state["messages"])
@@ -83,7 +104,6 @@ def optimize_memory(state: State):
83
  messages = state["messages"][:-2] + [HumanMessage(content=summary_message)]
84
  response = model.invoke(messages)
85
 
86
- print("&&&" * 50, state["messages"][-1].type)
87
  # Delete all but the 2 most recent messages and the first one
88
  remaining_messages = [RemoveMessage(id=m.id) for m in state["messages"][:-2]]
89
 
 
1
+ import mimetypes
2
+ import pathlib
3
+ import time
4
+
5
+ from langchain_core.messages import SystemMessage, HumanMessage, AIMessage, RemoveMessage
6
  from langchain_openai import ChatOpenAI
7
 
8
+ from core.messages import attachmentHandler
9
  from core.state import State
 
 
10
  from nodes.chunking_node import OversizedContentHandler
11
+ from tools.audio_tool import query_audio
12
+ from tools.excel_tool import query_excel_file
13
+ from tools.python_executor import execute_python_code
14
  from tools.tavily_tools import llm_tools
15
  from utils.prompt_manager import prompt_mgmt
16
 
17
  model = ChatOpenAI(model="gpt-4.1")
18
  response_processing_model = ChatOpenAI(model="gpt-4.1-mini")
19
+ llm_tools.append(query_audio)
20
+ llm_tools.append(query_excel_file)
21
+ llm_tools.append(execute_python_code)
22
  model = model.bind_tools(llm_tools, parallel_tool_calls=False)
23
 
24
 
25
  # Node
26
+ def pre_processor(state: State):
27
  # Get original question if it exists
28
  question = state.get("question", "")
29
  if not question:
30
  question = state["messages"][0].content
31
 
32
+ file_reference = state.get("file_reference", "")
33
+ extension = pathlib.Path(file_reference).suffix
34
+ if extension == "png":
35
+ content_bytes = attachmentHandler.fetch_file_from_reference(file_reference)
36
+ mime_type = mimetypes.guess_type(file_reference)[0]
37
+ state["messages"][0].content = [{"type": "text", "text": question},
38
+ attachmentHandler.get_representation("image", content_bytes, "png", mime_type)]
39
+
40
+ message = prompt_mgmt.render_template("question_evaluation", {"question": question[0]})
41
 
42
  # Add prompt to our history
43
  messages = [HumanMessage(content=message)]
44
  response = response_processing_model.invoke(messages)
45
  if response.content == "YES":
46
+ return {"question": question, "attachment": "true"}
47
  return {"question": question}
48
 
49
 
 
56
  # Get original question if it exists
57
  question = state.get("question", "")
58
  if not question:
59
+ question = state["messages"][0].content[0]
60
+
61
+ attachment = ""
62
+ file_reference = state.get("file_reference", "")
63
+ if file_reference:
64
+ attachment = f" you have access to the file with the following reference {file_reference}"
65
+ prompt_params = {"summary": summary, "chunked_last_tool_call": state.get("chunked_last_tool_call", False),
66
+ "attachment": attachment}
67
  sys_msg = SystemMessage(content=prompt_mgmt.render_template("base_system", prompt_params))
68
  try:
69
  response = model.invoke([sys_msg] + state["messages"])
 
104
  messages = state["messages"][:-2] + [HumanMessage(content=summary_message)]
105
  response = model.invoke(messages)
106
 
 
107
  # Delete all but the 2 most recent messages and the first one
108
  remaining_messages = [RemoveMessage(id=m.id) for m in state["messages"][:-2]]
109
 
requirements.txt CHANGED
@@ -5,4 +5,7 @@ langchain_core
5
  langgraph
6
  langchain-tavily
7
  langchain-community
8
- faiss-cpu
 
 
 
 
5
  langgraph
6
  langchain-tavily
7
  langchain-community
8
+ faiss-cpu
9
+ langchain-experimental
10
+ openpyxl
11
+ tabulate
tools/audio_tool.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.tools import tool
2
+ from langchain_core.messages import SystemMessage, HumanMessage
3
+ from langchain_openai import ChatOpenAI
4
+
5
+ from core.messages import attachmentHandler
6
+ from utils.prompt_manager import prompt_mgmt
7
+
8
+ audio_model = ChatOpenAI(model="gpt-4o-audio-preview")
9
+
10
+
11
+ @tool
12
+ def query_audio(question: str, file_reference: str) -> str:
13
+ """
14
+ Tool to answer questions based on the audio file identified by the provided file reference
15
+ :param question: Question to be answered
16
+ :param file_reference: file reference
17
+ :return: the answer to the given question
18
+ """
19
+ sys_msg = SystemMessage(content=prompt_mgmt.render_template("audio_evaluation", []))
20
+ content_bytes = attachmentHandler.fetch_file_from_reference(file_reference)
21
+
22
+ content = [{"type": "text", "text": question}, attachmentHandler.get_representation("audio", content_bytes, "mp3", None)]
23
+
24
+ message = [HumanMessage(content=content)]
25
+ try:
26
+ response = audio_model.invoke([sys_msg] + message)
27
+ return response
28
+ except Exception as e:
29
+ print("Exception while invoking audio tool")
tools/excel_tool.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from langchain.tools import tool
3
+ from langchain_experimental.agents import create_pandas_dataframe_agent
4
+ from langchain_openai import ChatOpenAI
5
+
6
+ from core.messages import attachmentHandler
7
+
8
+ llm = ChatOpenAI(model="gpt-4.1")
9
+
10
+
11
+ @tool
12
+ def query_excel_file(question: str, file_reference: str) -> str:
13
+ """
14
+ Analyze the incoming excel file (xls/xlsx) and answer the question based on this analysis
15
+ :param question: the question concerning the data in the given excel file
16
+ :param file_reference: the content of the excel file encoded base64
17
+ :return: the answer to the question
18
+ """
19
+ # Load Excel file
20
+ content_bytes = attachmentHandler.fetch_file_from_reference(file_reference)
21
+ df = pd.read_excel(content_bytes)
22
+ # Create agent
23
+ pandas_agent = create_pandas_dataframe_agent(llm, df, verbose=True, allow_dangerous_code=True)
24
+ response = pandas_agent.run(question)
25
+ return response
tools/python_executor.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import subprocess
2
+ import sys
3
+ from typing import Optional
4
+
5
+ from langchain.tools import tool
6
+ from langchain_experimental.tools import PythonREPLTool
7
+
8
+
9
+ @tool
10
+ def execute_python_code(intent: str, code: Optional[str] = None, file_reference: Optional[str] = None) -> str:
11
+ """
12
+ Executes the provided python code snippet or python file identified by its reference and returns the outcome of
13
+ the execution
14
+ :param intent: this parameter should be set to either code_snippet or file_execution depending on
15
+ the intent of the user
16
+ :param code: if the intent is code_snippet, this parameter should be populated with the
17
+ python code snippet to be executed
18
+ :param file_reference: if the intent is file_execution, this parameter should
19
+ be populated with the reference of the file to be executed
20
+ :return: the outcome of the python code execution
21
+ """
22
+ if intent == "code_snippet":
23
+ python_tool = PythonREPLTool()
24
+ if not code:
25
+ raise Exception("Invalid arguments. Tool intent is code_snippet but no value provided for code argument")
26
+ return python_tool.run(code, verbose=True)
27
+
28
+ if intent == "file_execution":
29
+ return subprocess_python_exec(file_reference)
30
+ raise Exception("Invalid arguments. Invalid value for intent parameter")
31
+
32
+
33
+ def subprocess_python_exec(file_reference: str) -> str:
34
+ """Execute Python code in a subprocess for better isolation"""
35
+ try:
36
+
37
+ # Execute in subprocess
38
+ result = subprocess.run(
39
+ [sys.executable, file_reference],
40
+ capture_output=True,
41
+ text=True,
42
+ timeout=60
43
+ )
44
+
45
+ if result.returncode == 0:
46
+ return result.stdout if result.stdout else "Code executed successfully"
47
+ else:
48
+ return f"Error: {result.stderr}"
49
+ except subprocess.TimeoutExpired:
50
+ return "Error: Code execution timed out"
51
+ except Exception as e:
52
+ return f"Error: {str(e)}"
utils/prompt_manager.py CHANGED
@@ -1,11 +1,12 @@
1
- from enum import Enum
2
  from dataclasses import dataclass, field
3
- from typing import Any, Dict, List, Optional
4
- from jinja2 import Environment, BaseLoader
 
 
5
  import tiktoken
6
  import yaml
7
- from pathlib import Path
8
- import os
9
 
10
 
11
  class PromptType(Enum):
@@ -13,6 +14,7 @@ class PromptType(Enum):
13
  ANSWER_REFINEMENT = "answer_refinement"
14
  MEMORY_OPTIMIZATION = "memory_optimization"
15
  QUESTION_REFINEMENT = "question_refinement"
 
16
 
17
 
18
  @dataclass
 
1
+ import os
2
  from dataclasses import dataclass, field
3
+ from enum import Enum
4
+ from pathlib import Path
5
+ from typing import Any, Dict, List
6
+
7
  import tiktoken
8
  import yaml
9
+ from jinja2 import Environment, BaseLoader
 
10
 
11
 
12
  class PromptType(Enum):
 
14
  ANSWER_REFINEMENT = "answer_refinement"
15
  MEMORY_OPTIMIZATION = "memory_optimization"
16
  QUESTION_REFINEMENT = "question_refinement"
17
+ TOOL = "tool"
18
 
19
 
20
  @dataclass