giulia-fontanella commited on
Commit
7a40d3a
·
unverified ·
1 Parent(s): 16262d0

test agent

Browse files

Signed-off-by: giulia fontanella <giulia.fontanella@secomind.com>

app.py CHANGED
@@ -1,32 +1,34 @@
 
1
  import os
 
2
  import gradio as gr
3
- import requests
4
- import inspect
5
  import pandas as pd
6
- from agent import BasicAgent
7
- from langchain_huggingface import HuggingFaceEndpoint
8
  from langchain_openai import ChatOpenAI
9
 
 
10
 
11
  # --- Constants ---
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
  HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
14
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
15
 
16
- REPO_ID = "meta-llama/Llama-3.1-8B-Instruct"
17
  PROVIDER_TYPE = "openai" # "openai" or "huggingface"
18
 
19
 
20
- def run_and_submit_all( profile: gr.OAuthProfile | None):
21
- """
22
- Fetches all questions, runs the BasicAgent on them, submits all answers,
 
23
  and displays the results.
24
  """
25
  # --- Determine HF Space Runtime URL and Repo URL ---
26
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
27
 
28
  if profile:
29
- username= f"{profile.username}"
30
  print(f"User logged in: {username}")
31
  else:
32
  print("User not logged in.")
@@ -36,27 +38,28 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
36
  questions_url = f"{api_url}/questions"
37
  submit_url = f"{api_url}/submit"
38
 
39
- # 1. Instantiate Agent
40
  try:
41
  if PROVIDER_TYPE == "huggingface":
42
  llm = HuggingFaceEndpoint(
43
- repo_id=REPO_ID,
44
  huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
45
  )
46
  chat = ChatHuggingFace(llm=llm, verbose=True)
47
  elif PROVIDER_TYPE == "openai":
48
- chat = ChatOpenAI(model="gpt-4o")
49
  else:
50
  print(f"Provider {PROVIDER_TYPE} not supported.")
51
  return f"Provider {PROVIDER_TYPE} not supported", None
52
 
53
- agent = BasicAgent(chat)
54
 
55
  except Exception as e:
56
  print(f"Error instantiating agent: {e}")
57
  return f"Error initializing agent: {e}", None
58
-
59
- # In the case of an app running as a hugging Face space, this link points toward your codebase
 
60
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
61
  print(agent_code)
62
 
@@ -67,16 +70,16 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
67
  response.raise_for_status()
68
  questions_data = response.json()
69
  if not questions_data:
70
- print("Fetched questions list is empty.")
71
- return "Fetched questions list is empty or invalid format.", None
72
  print(f"Fetched {len(questions_data)} questions.")
73
  except requests.exceptions.RequestException as e:
74
  print(f"Error fetching questions: {e}")
75
  return f"Error fetching questions: {e}", None
76
  except requests.exceptions.JSONDecodeError as e:
77
- print(f"Error decoding JSON response from questions endpoint: {e}")
78
- print(f"Response text: {response.text[:500]}")
79
- return f"Error decoding server response for questions: {e}", None
80
  except Exception as e:
81
  print(f"An unexpected error occurred fetching questions: {e}")
82
  return f"An unexpected error occurred fetching questions: {e}", None
@@ -89,10 +92,10 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
89
  task_id = item.get("task_id")
90
  question_text = item.get("question")
91
  file_name = item.get("file_name")
92
- if file_name!='':
93
  files_url = f"{api_url}/files/{task_id}"
94
  file = requests.get(files_url, timeout=15)
95
- with open(file_name, 'wb') as f:
96
  f.write(file.content)
97
  print(f"Downloaded {files_url}.")
98
  if not task_id or question_text is None:
@@ -100,18 +103,36 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
100
  continue
101
  try:
102
  submitted_answer = agent(question_text, file_name)
103
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
104
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
 
 
 
 
105
  except Exception as e:
106
- print(f"Error running agent on task {task_id}: {e}")
107
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
 
 
108
 
109
  if not answers_payload:
110
  print("Agent did not produce any answers to submit.")
111
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
112
 
113
- # 4. Prepare Submission
114
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
 
 
 
 
115
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
116
  print(status_update)
117
 
@@ -180,20 +201,19 @@ with gr.Blocks() as demo:
180
 
181
  run_button = gr.Button("Run Evaluation & Submit All Answers")
182
 
183
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
 
 
184
  # Removed max_rows=10 from DataFrame constructor
185
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
186
 
187
- run_button.click(
188
- fn=run_and_submit_all,
189
- outputs=[status_output, results_table]
190
- )
191
 
192
  if __name__ == "__main__":
193
- print("\n" + "-"*30 + " App Starting " + "-"*30)
194
  # Check for SPACE_HOST and SPACE_ID at startup for information
195
  space_host_startup = os.getenv("SPACE_HOST")
196
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
197
 
198
  if space_host_startup:
199
  print(f"✅ SPACE_HOST found: {space_host_startup}")
@@ -201,14 +221,18 @@ if __name__ == "__main__":
201
  else:
202
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
203
 
204
- if space_id_startup: # Print repo URLs if SPACE_ID is found
205
  print(f"✅ SPACE_ID found: {space_id_startup}")
206
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
207
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
 
 
208
  else:
209
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
 
 
210
 
211
- print("-"*(60 + len(" App Starting ")) + "\n")
212
 
213
- print("Launching Gradio Interface for Basic Agent Evaluation...")
214
- demo.launch(debug=True, share=False)
 
1
+ import inspect
2
  import os
3
+
4
  import gradio as gr
 
 
5
  import pandas as pd
6
+ import requests
7
+ from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
8
  from langchain_openai import ChatOpenAI
9
 
10
+ from src.agent import SmartAgent
11
 
12
  # --- Constants ---
13
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
14
  HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
15
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
16
 
17
+ MODEL = "gpt-4o" # "gpt-4o", "meta-llama/Llama-3.1-8B-Instruct", ...
18
  PROVIDER_TYPE = "openai" # "openai" or "huggingface"
19
 
20
 
21
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
22
+ """Run the agent and submit the results.
23
+
24
+ Fetches all questions, runs the SmartAgent on them, submits all answers,
25
  and displays the results.
26
  """
27
  # --- Determine HF Space Runtime URL and Repo URL ---
28
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
29
 
30
  if profile:
31
+ username = f"{profile.username}"
32
  print(f"User logged in: {username}")
33
  else:
34
  print("User not logged in.")
 
38
  questions_url = f"{api_url}/questions"
39
  submit_url = f"{api_url}/submit"
40
 
41
+ # 1. Instantiate Agent
42
  try:
43
  if PROVIDER_TYPE == "huggingface":
44
  llm = HuggingFaceEndpoint(
45
+ repo_id=MODEL,
46
  huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
47
  )
48
  chat = ChatHuggingFace(llm=llm, verbose=True)
49
  elif PROVIDER_TYPE == "openai":
50
+ chat = ChatOpenAI(model=MODEL, temperature=0.2)
51
  else:
52
  print(f"Provider {PROVIDER_TYPE} not supported.")
53
  return f"Provider {PROVIDER_TYPE} not supported", None
54
 
55
+ agent = SmartAgent(chat)
56
 
57
  except Exception as e:
58
  print(f"Error instantiating agent: {e}")
59
  return f"Error initializing agent: {e}", None
60
+
61
+ # In the case of an app running as a hugging Face space,
62
+ # this link points toward your codebase
63
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
64
  print(agent_code)
65
 
 
70
  response.raise_for_status()
71
  questions_data = response.json()
72
  if not questions_data:
73
+ print("Fetched questions list is empty.")
74
+ return "Fetched questions list is empty or invalid format.", None
75
  print(f"Fetched {len(questions_data)} questions.")
76
  except requests.exceptions.RequestException as e:
77
  print(f"Error fetching questions: {e}")
78
  return f"Error fetching questions: {e}", None
79
  except requests.exceptions.JSONDecodeError as e:
80
+ print(f"Error decoding JSON response from questions endpoint: {e}")
81
+ print(f"Response text: {response.text[:500]}")
82
+ return f"Error decoding server response for questions: {e}", None
83
  except Exception as e:
84
  print(f"An unexpected error occurred fetching questions: {e}")
85
  return f"An unexpected error occurred fetching questions: {e}", None
 
92
  task_id = item.get("task_id")
93
  question_text = item.get("question")
94
  file_name = item.get("file_name")
95
+ if file_name != "":
96
  files_url = f"{api_url}/files/{task_id}"
97
  file = requests.get(files_url, timeout=15)
98
+ with open(file_name, "wb") as f:
99
  f.write(file.content)
100
  print(f"Downloaded {files_url}.")
101
  if not task_id or question_text is None:
 
103
  continue
104
  try:
105
  submitted_answer = agent(question_text, file_name)
106
+ answers_payload.append(
107
+ {"task_id": task_id, "submitted_answer": submitted_answer}
108
+ )
109
+ results_log.append(
110
+ {
111
+ "Task ID": task_id,
112
+ "Question": question_text,
113
+ "Submitted Answer": submitted_answer,
114
+ }
115
+ )
116
  except Exception as e:
117
+ print(f"Error running agent on task {task_id}: {e}")
118
+ results_log.append(
119
+ {
120
+ "Task ID": task_id,
121
+ "Question": question_text,
122
+ "Submitted Answer": f"AGENT ERROR: {e}",
123
+ }
124
+ )
125
 
126
  if not answers_payload:
127
  print("Agent did not produce any answers to submit.")
128
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
129
 
130
+ # 4. Prepare Submission
131
+ submission_data = {
132
+ "username": username.strip(),
133
+ "agent_code": agent_code,
134
+ "answers": answers_payload,
135
+ }
136
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
137
  print(status_update)
138
 
 
201
 
202
  run_button = gr.Button("Run Evaluation & Submit All Answers")
203
 
204
+ status_output = gr.Textbox(
205
+ label="Run Status / Submission Result", lines=5, interactive=False
206
+ )
207
  # Removed max_rows=10 from DataFrame constructor
208
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
209
 
210
+ run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 
 
 
211
 
212
  if __name__ == "__main__":
213
+ print("\n" + "-" * 30 + " App Starting " + "-" * 30)
214
  # Check for SPACE_HOST and SPACE_ID at startup for information
215
  space_host_startup = os.getenv("SPACE_HOST")
216
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
217
 
218
  if space_host_startup:
219
  print(f"✅ SPACE_HOST found: {space_host_startup}")
 
221
  else:
222
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
223
 
224
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
225
  print(f"✅ SPACE_ID found: {space_id_startup}")
226
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
227
+ print(
228
+ f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main"
229
+ )
230
  else:
231
+ print(
232
+ "ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined."
233
+ )
234
 
235
+ print("-" * (60 + len(" App Starting ")) + "\n")
236
 
237
+ print("Launching Gradio Interface for Agent Evaluation...")
238
+ demo.launch(debug=True, share=False)
notebooks/test.ipynb ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "abf90ca5",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import os\n",
11
+ "import requests\n",
12
+ "from langchain_openai import ChatOpenAI\n",
13
+ "from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace"
14
+ ]
15
+ },
16
+ {
17
+ "cell_type": "code",
18
+ "execution_count": null,
19
+ "id": "b4299f37",
20
+ "metadata": {},
21
+ "outputs": [],
22
+ "source": [
23
+ "import sys\n",
24
+ "\n",
25
+ "sys.path.append(os.path.abspath(\"../src\"))"
26
+ ]
27
+ },
28
+ {
29
+ "cell_type": "code",
30
+ "execution_count": null,
31
+ "id": "73b38064",
32
+ "metadata": {},
33
+ "outputs": [],
34
+ "source": [
35
+ "from agent import SmartAgent"
36
+ ]
37
+ },
38
+ {
39
+ "cell_type": "code",
40
+ "execution_count": null,
41
+ "id": "0f925adb",
42
+ "metadata": {},
43
+ "outputs": [],
44
+ "source": [
45
+ "# --- Constants ---\n",
46
+ "DEFAULT_API_URL = \"https://agents-course-unit4-scoring.hf.space\"\n",
47
+ "HUGGINGFACEHUB_API_TOKEN = os.getenv(\"HUGGINGFACEHUB_API_TOKEN\")\n",
48
+ "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n",
49
+ "\n",
50
+ "REPO_ID = \"meta-llama/Llama-3.1-8B-Instruct\"\n",
51
+ "PROVIDER_TYPE = \"openai\" # \"openai\" or \"huggingface\""
52
+ ]
53
+ },
54
+ {
55
+ "cell_type": "code",
56
+ "execution_count": null,
57
+ "id": "541ebb1b",
58
+ "metadata": {},
59
+ "outputs": [],
60
+ "source": [
61
+ "TAVILY_API_KEY = os.getenv(\"TAVILY_API_KEY\")"
62
+ ]
63
+ },
64
+ {
65
+ "cell_type": "code",
66
+ "execution_count": null,
67
+ "id": "320e99b7",
68
+ "metadata": {},
69
+ "outputs": [],
70
+ "source": [
71
+ "api_url = DEFAULT_API_URL\n",
72
+ "questions_url = f\"{api_url}/questions\"\n",
73
+ "submit_url = f\"{api_url}/submit\""
74
+ ]
75
+ },
76
+ {
77
+ "cell_type": "code",
78
+ "execution_count": null,
79
+ "id": "f31b88db",
80
+ "metadata": {},
81
+ "outputs": [],
82
+ "source": [
83
+ "# 1. Instantiate Agent\n",
84
+ "try:\n",
85
+ " if PROVIDER_TYPE == \"huggingface\":\n",
86
+ " llm = HuggingFaceEndpoint(\n",
87
+ " repo_id=REPO_ID,\n",
88
+ " huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,\n",
89
+ " )\n",
90
+ " chat = ChatHuggingFace(llm=llm, verbose=True)\n",
91
+ " elif PROVIDER_TYPE == \"openai\":\n",
92
+ " chat = ChatOpenAI(model=\"gpt-4o\", temperature=0.2)\n",
93
+ " else:\n",
94
+ " print(f\"Provider {PROVIDER_TYPE} not supported.\")\n",
95
+ "\n",
96
+ " agent = SmartAgent(chat)\n",
97
+ "\n",
98
+ "except Exception as e:\n",
99
+ " print(f\"Error instantiating agent: {e}\")"
100
+ ]
101
+ },
102
+ {
103
+ "cell_type": "code",
104
+ "execution_count": null,
105
+ "id": "b4d18d12",
106
+ "metadata": {},
107
+ "outputs": [],
108
+ "source": [
109
+ "# 2. Fetch Questions\n",
110
+ "print(f\"Fetching questions from: {questions_url}\")\n",
111
+ "try:\n",
112
+ " response = requests.get(questions_url, timeout=15)\n",
113
+ " response.raise_for_status()\n",
114
+ " questions_data = response.json()\n",
115
+ " if not questions_data:\n",
116
+ " print(\"Fetched questions list is empty.\")\n",
117
+ " print(f\"Fetched {len(questions_data)} questions.\")\n",
118
+ "except requests.exceptions.RequestException as e:\n",
119
+ " print(f\"Error fetching questions: {e}\")\n",
120
+ "except requests.exceptions.JSONDecodeError as e:\n",
121
+ " print(f\"Error decoding JSON response from questions endpoint: {e}\")\n",
122
+ " print(f\"Response text: {response.text[:500]}\")\n",
123
+ "except Exception as e:\n",
124
+ " print(f\"An unexpected error occurred fetching questions: {e}\")"
125
+ ]
126
+ },
127
+ {
128
+ "cell_type": "code",
129
+ "execution_count": null,
130
+ "id": "9627e327",
131
+ "metadata": {},
132
+ "outputs": [],
133
+ "source": [
134
+ "# 3. Run your Agent\n",
135
+ "results_log = []\n",
136
+ "answers_payload = []\n",
137
+ "\n",
138
+ "item = questions_data[0]\n",
139
+ "print(f\"Running agent on question: {item}\")\n",
140
+ "\n",
141
+ "task_id = item.get(\"task_id\")\n",
142
+ "question_text = item.get(\"question\")\n",
143
+ "file_name = item.get(\"file_name\")\n",
144
+ "if file_name != \"\":\n",
145
+ " files_url = f\"{api_url}/files/{task_id}\"\n",
146
+ " file = requests.get(files_url, timeout=15)\n",
147
+ " with open(file_name, \"wb\") as f:\n",
148
+ " f.write(file.content)\n",
149
+ " print(f\"Downloaded {files_url}.\")\n",
150
+ "if not task_id or question_text is None:\n",
151
+ " print(f\"Skipping item with missing task_id or question: {item}\")\n",
152
+ "try:\n",
153
+ " submitted_answer = agent(question_text, file_name)\n",
154
+ " answers_payload.append({\"task_id\": task_id, \"submitted_answer\": submitted_answer})\n",
155
+ " results_log.append(\n",
156
+ " {\n",
157
+ " \"Task ID\": task_id,\n",
158
+ " \"Question\": question_text,\n",
159
+ " \"Submitted Answer\": submitted_answer,\n",
160
+ " }\n",
161
+ " )\n",
162
+ "except Exception as e:\n",
163
+ " print(f\"Error running agent on task {task_id}: {e}\")\n",
164
+ " results_log.append(\n",
165
+ " {\n",
166
+ " \"Task ID\": task_id,\n",
167
+ " \"Question\": question_text,\n",
168
+ " \"Submitted Answer\": f\"AGENT ERROR: {e}\",\n",
169
+ " }\n",
170
+ " )"
171
+ ]
172
+ },
173
+ {
174
+ "cell_type": "code",
175
+ "execution_count": null,
176
+ "id": "699cba0f",
177
+ "metadata": {},
178
+ "outputs": [],
179
+ "source": []
180
+ }
181
+ ],
182
+ "metadata": {
183
+ "kernelspec": {
184
+ "display_name": ".venv",
185
+ "language": "python",
186
+ "name": "python3"
187
+ },
188
+ "language_info": {
189
+ "codemirror_mode": {
190
+ "name": "ipython",
191
+ "version": 3
192
+ },
193
+ "file_extension": ".py",
194
+ "mimetype": "text/x-python",
195
+ "name": "python",
196
+ "nbconvert_exporter": "python",
197
+ "pygments_lexer": "ipython3",
198
+ "version": "3.10.12"
199
+ }
200
+ },
201
+ "nbformat": 4,
202
+ "nbformat_minor": 5
203
+ }
requirements.txt CHANGED
@@ -11,3 +11,7 @@ wikipedia
11
  arxiv
12
  pymupdf
13
  feedparser
 
 
 
 
 
11
  arxiv
12
  pymupdf
13
  feedparser
14
+ ffmpeg-python
15
+ yt_dlp
16
+ openpyxl
17
+ openai-whisper
src/__init__.py ADDED
File without changes
agent.py → src/agent.py RENAMED
@@ -1,35 +1,65 @@
1
  import os
2
- from typing import TypedDict, Annotated, Optional
3
- from langgraph.graph.message import add_messages
 
4
  from langchain_core.messages import AnyMessage, HumanMessage, SystemMessage
5
  from langchain_openai import ChatOpenAI
6
- from langchain.tools import tool
7
- from langgraph.prebuilt import ToolNode
8
  from langgraph.graph import START, StateGraph
9
- from langgraph.prebuilt import tools_condition
10
- from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
11
- from langfuse.callback import CallbackHandler
12
- from tools import ExtractTextFromImage, DescribeImage, TranscribeAudio, read_excel, read_python, wiki_search, web_search, arxiv_search
 
 
 
 
 
 
 
 
 
 
 
13
 
14
 
15
  class AgentState(TypedDict):
 
 
16
  messages: Annotated[list[AnyMessage], add_messages]
17
 
18
 
19
- class BasicAgent():
20
  def __init__(self, chat):
 
21
  self.multimodal_model = ChatOpenAI(model="gpt-4o")
22
-
23
- extract_text_from_image = tool(ExtractTextFromImage(self.multimodal_model).__call__)
24
- describe_image = tool(DescribeImage(self.multimodal_model).__call__)
25
- transcribe_audio = tool(TranscribeAudio(self.multimodal_model).__call__)
26
 
27
- self.tools = [extract_text_from_image, describe_image, transcribe_audio, read_excel, read_python, wiki_search, web_search, arxiv_search]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  self.chat_with_tools = chat.bind_tools(self.tools)
29
  self._initialize_graph()
30
  self._initialize_telemetry()
31
 
32
  def _initialize_graph(self):
 
33
  builder = StateGraph(AgentState)
34
 
35
  # Define nodes
@@ -38,7 +68,7 @@ class BasicAgent():
38
 
39
  # Define edges
40
  builder.add_edge(START, "assistant")
41
- builder.add_conditional_edges("assistant",tools_condition)
42
  builder.add_edge("tools", "assistant")
43
 
44
  # Compile the graph
@@ -46,41 +76,65 @@ class BasicAgent():
46
  print("Agent initialized.")
47
 
48
  def _initialize_telemetry(self):
 
49
  LANGFUSE_PUBLIC_KEY = os.getenv("LANGFUSE_PUBLIC_KEY")
50
  LANGFUSE_SECRET_KEY = os.getenv("LANGFUSE_SECRET_KEY")
51
  LANGFUSE_HOST = "https://cloud.langfuse.com"
52
 
53
- self.langfuse_handler = CallbackHandler(
54
  public_key=LANGFUSE_PUBLIC_KEY,
55
  secret_key=LANGFUSE_SECRET_KEY,
56
- host=LANGFUSE_HOST
57
  )
 
 
 
58
  print("Telemetry initialized.")
59
 
60
- def __call__(self, question: str, file_name : str) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
- sys_msg = SystemMessage(content=f"""
63
- You are a general AI assistant. I will ask you a question. Reason step by step and search for the information you need using available tools.
64
- Finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
65
- YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
66
- When providing the final answer, ONLY give [YOUR FINAL ANSWER]. Do not add anything else, no additional motivation or explanation, and do not return 'FINAL ANSWER:'.
67
- """)
68
-
69
  print(f"Agent received question: {question}.")
70
-
71
- if file_name is not None and file_name!='':
72
  print(f"Provided file: {file_name}.")
73
- messages=[sys_msg] + [HumanMessage(content=f"{question}. The file you have access to is {file_name}.")]
 
 
 
 
74
  else:
75
- messages=[sys_msg] + [HumanMessage(content=question)]
76
-
77
- response = self.agent.invoke({"messages":messages}, config={"callbacks": [self.langfuse_handler]})
78
- answer = response['messages'][-1].content
 
 
79
  print(f"Agent returning answer: {answer}")
80
  return answer
81
 
82
  def assistant(self, state: AgentState):
 
83
  response = self.chat_with_tools.invoke(state["messages"])
84
  return {
85
- "messages": state["messages"] + [response],
86
  }
 
1
  import os
2
+ from typing import Annotated, TypedDict
3
+
4
+ from langchain.tools import tool
5
  from langchain_core.messages import AnyMessage, HumanMessage, SystemMessage
6
  from langchain_openai import ChatOpenAI
7
+ from langfuse import Langfuse
8
+ from langfuse.langchain import CallbackHandler
9
  from langgraph.graph import START, StateGraph
10
+ from langgraph.graph.message import add_messages
11
+ from langgraph.prebuilt import ToolNode, tools_condition
12
+
13
+ from .tools import (
14
+ DescribeImage,
15
+ ExtractTextFromImage,
16
+ arxiv_search,
17
+ download_youtube_video,
18
+ extract_audio_from_video,
19
+ read_excel,
20
+ read_python,
21
+ transcribe_audio,
22
+ web_search,
23
+ wiki_search,
24
+ )
25
 
26
 
27
  class AgentState(TypedDict):
28
+ """Class representing the state for agent graph."""
29
+
30
  messages: Annotated[list[AnyMessage], add_messages]
31
 
32
 
33
+ class SmartAgent:
34
  def __init__(self, chat):
35
+ """Initialize agent, multimodal model and tools."""
36
  self.multimodal_model = ChatOpenAI(model="gpt-4o")
 
 
 
 
37
 
38
+ extract_text_from_image = tool(
39
+ ExtractTextFromImage(self.multimodal_model).__call_extract_text_from_image__
40
+ )
41
+ describe_image = tool(
42
+ DescribeImage(self.multimodal_model).__call_describe_image__
43
+ )
44
+
45
+ self.tools = [
46
+ extract_text_from_image,
47
+ describe_image,
48
+ transcribe_audio,
49
+ read_excel,
50
+ read_python,
51
+ wiki_search,
52
+ web_search,
53
+ arxiv_search,
54
+ download_youtube_video,
55
+ extract_audio_from_video,
56
+ ]
57
  self.chat_with_tools = chat.bind_tools(self.tools)
58
  self._initialize_graph()
59
  self._initialize_telemetry()
60
 
61
  def _initialize_graph(self):
62
+ """Initialize and compile the agent graph."""
63
  builder = StateGraph(AgentState)
64
 
65
  # Define nodes
 
68
 
69
  # Define edges
70
  builder.add_edge(START, "assistant")
71
+ builder.add_conditional_edges("assistant", tools_condition)
72
  builder.add_edge("tools", "assistant")
73
 
74
  # Compile the graph
 
76
  print("Agent initialized.")
77
 
78
  def _initialize_telemetry(self):
79
+ """Initialize langfuse telemetry using CallbackHandler."""
80
  LANGFUSE_PUBLIC_KEY = os.getenv("LANGFUSE_PUBLIC_KEY")
81
  LANGFUSE_SECRET_KEY = os.getenv("LANGFUSE_SECRET_KEY")
82
  LANGFUSE_HOST = "https://cloud.langfuse.com"
83
 
84
+ langfuse = Langfuse(
85
  public_key=LANGFUSE_PUBLIC_KEY,
86
  secret_key=LANGFUSE_SECRET_KEY,
87
+ host=LANGFUSE_HOST, # or your custom host if applicable
88
  )
89
+
90
+ # Create a Langchain callback handler using the initialized client
91
+ self.langfuse_handler = CallbackHandler()
92
  print("Telemetry initialized.")
93
 
94
+ def __call__(self, question: str, file_name: str) -> str:
95
+ """Call the agent, passing system prompt and eventual file name."""
96
+ sys_msg = SystemMessage(
97
+ content="""You are a general AI assistant. You will be asked a factual question.
98
+
99
+ 1. Reason step by step and search for the information using available tools if needed.
100
+ 2. Finish your response with this exact format:
101
+ FINAL ANSWER: [YOUR FINAL ANSWER]
102
+
103
+ IMPORTANT RULES for [YOUR FINAL ANSWER]:
104
+ - If the answer is a number, provide only the number, with no commas, units, or symbols, do not write it as a string.
105
+ - If the answer is a string, provide only the core noun phrase with no articles or abbreviations.
106
+ - If the answer is a list, return a comma-separated list applying the above rules per item.
107
+ - DO NOT include any other text before or after the final answer.
108
+ - DO NOT explain or justify the answer after it is given.
109
+ - DO NOT repeat the question.
110
+ - DO NOT include the words 'FINAL ANSWER: '.
111
+
112
+ Strictly follow these formatting rules.
113
+ """
114
+ )
115
 
 
 
 
 
 
 
 
116
  print(f"Agent received question: {question}.")
117
+
118
+ if file_name is not None and file_name != "":
119
  print(f"Provided file: {file_name}.")
120
+ messages = [sys_msg] + [
121
+ HumanMessage(
122
+ content=f"{question}. The file you have access to is {file_name}."
123
+ )
124
+ ]
125
  else:
126
+ messages = [sys_msg] + [HumanMessage(content=question)]
127
+
128
+ response = self.agent.invoke(
129
+ {"messages": messages}, config={"callbacks": [self.langfuse_handler]}
130
+ )
131
+ answer = response["messages"][-1].content
132
  print(f"Agent returning answer: {answer}")
133
  return answer
134
 
135
  def assistant(self, state: AgentState):
136
+ """Assistant node which calls the model initialized with tools."""
137
  response = self.chat_with_tools.invoke(state["messages"])
138
  return {
139
+ "messages": state["messages"] + [response],
140
  }
tools.py → src/tools.py RENAMED
@@ -1,17 +1,18 @@
1
  import base64
 
 
2
  import pandas as pd
3
- from langchain_core.messages import HumanMessage
 
4
  from langchain.tools import tool
5
- from langchain_community.tools.tavily_search import TavilySearchResults
6
- from langchain_community.document_loaders import WikipediaLoader, ArxivLoader
7
- # import yt_dlp
8
- # import ffmpeg
9
 
10
 
11
  @tool
12
  def read_excel(file_path: str) -> str:
13
- """
14
- Extract readable text from an Excel file (.xlsx or .xls).
15
 
16
  Args:
17
  file_path: Path to the Excel file.
@@ -23,9 +24,15 @@ def read_excel(file_path: str) -> str:
23
  df_dict = pd.read_excel(file_path, sheet_name=None) # Read all sheets
24
  result = []
25
  for sheet_name, sheet_df in df_dict.items():
26
- sheet_text = sheet_df.to_string(index=False)
27
- result.append(f"Sheet: {sheet_name}\n{sheet_text}")
28
- return "\n\n".join(result)
 
 
 
 
 
 
29
 
30
  except Exception as e:
31
  return f"Error reading Excel file: {str(e)}"
@@ -33,8 +40,7 @@ def read_excel(file_path: str) -> str:
33
 
34
  @tool
35
  def read_python(file_path: str) -> str:
36
- """
37
- Extract source code from a Python (.py) file.
38
 
39
  Args:
40
  file_path: Path to the Python file.
@@ -48,29 +54,31 @@ def read_python(file_path: str) -> str:
48
  except Exception as e:
49
  return f"Error reading Python file: {str(e)}"
50
 
51
-
52
  class ExtractTextFromImage:
 
 
53
  def __init__(self, multimodal_model):
 
54
  self.multimodal_model = multimodal_model
55
 
56
- def __call__(self, img_path: str) -> str:
57
- """
58
- Extract text from an image file.
59
-
60
  Args:
61
  img_path: A string representing the path to an image (e.g., PNG, JPEG).
62
-
63
  Returns:
64
- A single string containing the concatenated text extracted from the image.
65
  """
66
  all_text = ""
67
  try:
68
  # Read image and encode as base64
69
  with open(img_path, "rb") as image_file:
70
  image_bytes = image_file.read()
71
-
72
  image_base64 = base64.b64encode(image_bytes).decode("utf-8")
73
-
74
  # Prepare the prompt including the base64 image data
75
  message = [
76
  HumanMessage(
@@ -91,13 +99,13 @@ class ExtractTextFromImage:
91
  ]
92
  )
93
  ]
94
-
95
  # Call the vision-capable model
96
  response = self.multimodal_model.invoke(message)
97
-
98
  # Append extracted text
99
  all_text += response.content + "\n\n"
100
-
101
  return all_text.strip()
102
  except Exception as e:
103
  error_msg = f"Error extracting text: {str(e)}"
@@ -106,21 +114,24 @@ class ExtractTextFromImage:
106
 
107
 
108
  class DescribeImage:
 
 
109
  def __init__(self, multimodal_model):
 
110
  self.multimodal_model = multimodal_model
111
 
112
- def __call__(self, img_path: str, query: str) -> str:
113
- """
114
- Generate a detailed description of an image.
115
- This function reads a image from an url, encodes it, and sends it to a
116
- vision-capable language model to obtain a comprehensive, natural language
117
  description of the image's content, including its objects, actions, and context,
118
  following a specific query.
119
-
120
  Args:
121
  img_path: A string representing the path to an image (e.g., PNG, JPEG).
122
  query: Information to extract from the image.
123
-
124
  Returns:
125
  A single string containing a detailed description of the image.
126
  """
@@ -128,9 +139,9 @@ class DescribeImage:
128
  # Read image and encode as base64
129
  with open(img_path, "rb") as image_file:
130
  image_bytes = image_file.read()
131
-
132
  image_base64 = base64.b64encode(image_bytes).decode("utf-8")
133
-
134
  # Prepare message payload
135
  message = [
136
  HumanMessage(
@@ -138,7 +149,8 @@ class DescribeImage:
138
  {
139
  "type": "text",
140
  "text": (
141
- f"Describe this image in rich detail. Include objects, people, setting, background elements, and any inferred actions or context. Avoid technical jargon. In particular, extract the following information: {query}" ),
 
142
  },
143
  {
144
  "type": "image_url",
@@ -151,151 +163,137 @@ class DescribeImage:
151
  ]
152
  response = self.multimodal_model.invoke(message)
153
  return response.content.strip()
154
-
155
  except Exception as e:
156
  error_msg = f"Error describing image: {str(e)}"
157
  print(error_msg)
158
  return ""
159
 
160
-
161
- class TranscribeAudio:
162
- def __init__(self, multimodal_model):
163
- self.multimodal_model = multimodal_model
164
 
165
- def __call__(self, audio_path: str, query:str) -> str:
166
- """
167
- Transcribe an MP3 file.
168
 
169
- Args:
170
- audio_path: Path to the MP3 audio file.
171
 
172
- Returns:
173
- Transcribed text as a string.
174
- """
175
- try:
176
- with open(audio_path, "rb") as audio_file:
177
- audio_bytes = audio_file.read()
178
 
179
- audio_data = AudioFile(
180
- mime_type="audio/mpeg", # MP3 MIME type
181
- data=audio_bytes
182
- )
183
 
184
- message = [
185
- HumanMessage(
186
- content=[
187
- {
188
- "type": "text",
189
- "text": (
190
- "Transcribe the speech from this audio file. "
191
- "Return only the transcribed text, with no extra commentary."
192
- ),
193
- },
194
- {
195
- "type": "audio",
196
- "audio": audio_data,
197
- },
198
- ]
199
- )
200
- ]
201
 
202
- response = self.audio_llm.invoke(message)
203
- return response.content.strip()
204
 
205
- except Exception as e:
206
- error_msg = f"Error transcribing audio: {str(e)}"
207
- print(error_msg)
208
- return ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
 
210
 
211
- # @tool
212
- # def download_youtube_video(youtube_url: str, output_path: str) -> str:
213
- # """
214
- # Download a YouTube video as an MP4 file.
215
-
216
- # Args:
217
- # youtube_url: The YouTube video URL.
218
- # output_path: Desired output path for the downloaded MP4 file.
219
-
220
- # Returns:
221
- # Path to the saved video file.
222
- # """
223
- # ydl_opts = {
224
- # 'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best',
225
- # 'outtmpl': output_path,
226
- # 'merge_output_format': 'mp4',
227
- # 'quiet': True,
228
- # }
229
- # with yt_dlp.YoutubeDL(ydl_opts) as ydl:
230
- # ydl.download([youtube_url])
231
- # return output_path
232
-
233
-
234
- # @tool
235
- # def extract_audio_from_video(video_path: str, audio_output: str) -> str:
236
- # """
237
- # Extracts audio from an MP4 video file and saves it as MP3.
238
-
239
- # Args:
240
- # video_path: Path to the input MP4 video file.
241
- # audio_output: Path for the output MP3 file.
242
-
243
- # Returns:
244
- # Path to the audio file.
245
- # """
246
- # try:
247
- # (
248
- # ffmpeg
249
- # .input(video_path)
250
- # .output(audio_output, format='mp3', acodec='libmp3lame', t=60) # limit to 60 sec
251
- # .overwrite_output()
252
- # .run(quiet=True)
253
- # )
254
- # return audio_output
255
- # except ffmpeg.Error as e:
256
- # raise RuntimeError(f"FFmpeg error: {e.stderr.decode()}") from e
257
-
258
-
259
  @tool
260
  def wiki_search(query: str) -> str:
261
  """Search Wikipedia for a query and return maximum 2 results.
262
-
263
  Args:
264
- query: The search query."""
 
265
  search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
266
  formatted_search_docs = "\n\n---\n\n".join(
267
  [
268
  f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
269
  for doc in search_docs
270
- ])
 
271
  return {"wiki_results": formatted_search_docs}
272
 
273
 
274
  @tool
275
  def web_search(query: str) -> str:
276
  """Search Tavily for a query and return maximum 3 results.
277
-
278
  Args:
279
- query: The search query."""
 
280
  search_docs = TavilySearchResults(max_results=3).invoke(query)
281
  formatted_search_docs = "\n\n---\n\n".join(
282
  [
283
- f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
284
  for doc in search_docs
285
- ])
 
286
  return {"web_results": formatted_search_docs}
287
 
288
 
289
  @tool
290
  def arxiv_search(query: str) -> str:
291
- """Search Arxiv for a query and return maximum 3 result.
292
-
293
  Args:
294
- query: The search query."""
295
- search_docs = ArxivLoader(query=query, load_max_docs=3).load()
 
296
  formatted_search_docs = "\n\n---\n\n".join(
297
  [
298
- f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
 
 
 
 
 
 
 
299
  for doc in search_docs
300
- ])
 
301
  return {"arvix_results": formatted_search_docs}
 
1
  import base64
2
+
3
+ import ffmpeg
4
  import pandas as pd
5
+ import whisper
6
+ import yt_dlp
7
  from langchain.tools import tool
8
+ from langchain.tools.tavily_search import TavilySearchResults
9
+ from langchain_community.document_loaders import ArxivLoader, WikipediaLoader
10
+ from langchain_core.messages import HumanMessage
 
11
 
12
 
13
  @tool
14
  def read_excel(file_path: str) -> str:
15
+ """Extract readable text from an Excel file (.xlsx or .xls).
 
16
 
17
  Args:
18
  file_path: Path to the Excel file.
 
24
  df_dict = pd.read_excel(file_path, sheet_name=None) # Read all sheets
25
  result = []
26
  for sheet_name, sheet_df in df_dict.items():
27
+ sheet_text = sheet_df.to_json(orient="records", lines=False)
28
+ result.append({f"Sheet: {sheet_name}": sheet_text})
29
+
30
+ full_text = ""
31
+ for sheet in result:
32
+ for sheet_name, sheet_data in sheet.items():
33
+ full_text += f"{sheet_name}\n{sheet_data}\n\n"
34
+
35
+ return full_text
36
 
37
  except Exception as e:
38
  return f"Error reading Excel file: {str(e)}"
 
40
 
41
  @tool
42
  def read_python(file_path: str) -> str:
43
+ """Extract source code from a Python (.py) file.
 
44
 
45
  Args:
46
  file_path: Path to the Python file.
 
54
  except Exception as e:
55
  return f"Error reading Python file: {str(e)}"
56
 
57
+
58
  class ExtractTextFromImage:
59
+ """Class to initialize the extract_text_from_image tool."""
60
+
61
  def __init__(self, multimodal_model):
62
+ """Initialize multimodal model."""
63
  self.multimodal_model = multimodal_model
64
 
65
+ def __call_extract_text_from_image__(self, img_path: str) -> str:
66
+ """Extract text from an image file.
67
+
 
68
  Args:
69
  img_path: A string representing the path to an image (e.g., PNG, JPEG).
70
+
71
  Returns:
72
+ A single string containing the concatenated text extracted from the image.
73
  """
74
  all_text = ""
75
  try:
76
  # Read image and encode as base64
77
  with open(img_path, "rb") as image_file:
78
  image_bytes = image_file.read()
79
+
80
  image_base64 = base64.b64encode(image_bytes).decode("utf-8")
81
+
82
  # Prepare the prompt including the base64 image data
83
  message = [
84
  HumanMessage(
 
99
  ]
100
  )
101
  ]
102
+
103
  # Call the vision-capable model
104
  response = self.multimodal_model.invoke(message)
105
+
106
  # Append extracted text
107
  all_text += response.content + "\n\n"
108
+
109
  return all_text.strip()
110
  except Exception as e:
111
  error_msg = f"Error extracting text: {str(e)}"
 
114
 
115
 
116
  class DescribeImage:
117
+ """Class to initialize the describe_image tool."""
118
+
119
  def __init__(self, multimodal_model):
120
+ """Initialize multimodal model."""
121
  self.multimodal_model = multimodal_model
122
 
123
+ def __call_describe_image__(self, img_path: str, query: str) -> str:
124
+ """Generate a detailed description of an image.
125
+
126
+ This function reads a image from an url, encodes it, and sends it to a
127
+ vision-capable language model to obtain a comprehensive, natural language
128
  description of the image's content, including its objects, actions, and context,
129
  following a specific query.
130
+
131
  Args:
132
  img_path: A string representing the path to an image (e.g., PNG, JPEG).
133
  query: Information to extract from the image.
134
+
135
  Returns:
136
  A single string containing a detailed description of the image.
137
  """
 
139
  # Read image and encode as base64
140
  with open(img_path, "rb") as image_file:
141
  image_bytes = image_file.read()
142
+
143
  image_base64 = base64.b64encode(image_bytes).decode("utf-8")
144
+
145
  # Prepare message payload
146
  message = [
147
  HumanMessage(
 
149
  {
150
  "type": "text",
151
  "text": (
152
+ f"Describe this image in rich detail. Include objects, people, setting, background elements, and any inferred actions or context. Avoid technical jargon. In particular, extract the following information: {query}"
153
+ ),
154
  },
155
  {
156
  "type": "image_url",
 
163
  ]
164
  response = self.multimodal_model.invoke(message)
165
  return response.content.strip()
166
+
167
  except Exception as e:
168
  error_msg = f"Error describing image: {str(e)}"
169
  print(error_msg)
170
  return ""
171
 
 
 
 
 
172
 
173
+ @tool
174
+ def transcribe_audio(audio_path: str) -> str:
175
+ """Transcribe an MP3 file.
176
 
177
+ Args:
178
+ audio_path: Path to the MP3 audio file.
179
 
180
+ Returns:
181
+ Transcribed text as a string.
182
+ """
183
+ try:
 
 
184
 
185
+ model = whisper.load_model("small") # or "tiny", "small", "medium", "large"
186
+ result = model.transcribe(audio_path)
187
+ return result
 
188
 
189
+ except Exception as e:
190
+ error_msg = f"Error transcribing audio: {str(e)}"
191
+ print(error_msg)
192
+ return ""
 
 
 
 
 
 
 
 
 
 
 
 
 
193
 
 
 
194
 
195
+ @tool
196
+ def download_youtube_video(youtube_url: str, output_path: str) -> str:
197
+ """Download a YouTube video as an MP4 file.
198
+
199
+ Args:
200
+ youtube_url: The YouTube video URL.
201
+ output_path: Desired output path for the downloaded MP4 file.
202
+
203
+ Returns:
204
+ Path to the saved video file.
205
+ """
206
+ ydl_opts = {
207
+ "format": "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best",
208
+ "outtmpl": output_path,
209
+ "merge_output_format": "mp4",
210
+ "quiet": True,
211
+ }
212
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
213
+ ydl.download([youtube_url])
214
+ return output_path
215
+
216
+
217
+ @tool
218
+ def extract_audio_from_video(video_path: str, audio_output: str) -> str:
219
+ """Extracts audio from an MP4 video file and saves it as MP3.
220
+
221
+ Args:
222
+ video_path: Path to the input MP4 video file.
223
+ audio_output: Path for the output MP3 file.
224
+
225
+ Returns:
226
+ Path to the audio file.
227
+ """
228
+ try:
229
+ (
230
+ ffmpeg.input(video_path)
231
+ .output(
232
+ audio_output, format="mp3", acodec="libmp3lame", t=60
233
+ ) # limit to 60 sec
234
+ .overwrite_output()
235
+ .run(quiet=True)
236
+ )
237
+ return audio_output
238
+ except Exception as e:
239
+ error_msg = f"Error transcribing audio: {str(e)}"
240
+ print(error_msg)
241
+ return ""
242
 
243
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
  @tool
245
  def wiki_search(query: str) -> str:
246
  """Search Wikipedia for a query and return maximum 2 results.
247
+
248
  Args:
249
+ query: The search query.
250
+ """
251
  search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
252
  formatted_search_docs = "\n\n---\n\n".join(
253
  [
254
  f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
255
  for doc in search_docs
256
+ ]
257
+ )
258
  return {"wiki_results": formatted_search_docs}
259
 
260
 
261
  @tool
262
  def web_search(query: str) -> str:
263
  """Search Tavily for a query and return maximum 3 results.
264
+
265
  Args:
266
+ query: The search query.
267
+ """
268
  search_docs = TavilySearchResults(max_results=3).invoke(query)
269
  formatted_search_docs = "\n\n---\n\n".join(
270
  [
271
+ f'<Document source="{doc["url"]}" title="{doc["title"]}" score="{doc.get("score", "")}">\n{doc["content"]}\n</Document>'
272
  for doc in search_docs
273
+ ]
274
+ )
275
  return {"web_results": formatted_search_docs}
276
 
277
 
278
  @tool
279
  def arxiv_search(query: str) -> str:
280
+ """Search Arxiv for a query and return maximum 2 result.
281
+
282
  Args:
283
+ query: The search query.
284
+ """
285
+ search_docs = ArxivLoader(query=query, load_max_docs=2).load()
286
  formatted_search_docs = "\n\n---\n\n".join(
287
  [
288
+ (
289
+ f'<Document title="{doc.metadata.get("Title", "")}" '
290
+ f'published="{doc.metadata.get("Published", "")}" '
291
+ f'authors="{doc.metadata.get("Authors", "")}">\n'
292
+ f'Summary: {doc.metadata.get("Summary", "")}\n\n'
293
+ f"{doc.page_content}\n"
294
+ f"</Document>"
295
+ )
296
  for doc in search_docs
297
+ ]
298
+ )
299
  return {"arvix_results": formatted_search_docs}