Princekumar commited on
Commit
1d256d0
·
1 Parent(s): 81917a3

Agent files

Browse files
Files changed (6) hide show
  1. app.py +68 -27
  2. helpers.py +101 -0
  3. llm.py +37 -0
  4. prompts.py +3 -0
  5. requirements.txt +7 -1
  6. tools.py +515 -0
app.py CHANGED
@@ -3,32 +3,46 @@ import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
 
 
 
 
 
 
 
 
6
 
7
  # (Keep Constants as is)
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
 
11
  # --- Basic Agent Definition ---
12
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
  class BasicAgent:
14
  def __init__(self):
15
  print("BasicAgent initialized.")
 
 
 
 
16
  def __call__(self, question: str) -> str:
17
  print(f"Agent received question (first 50 chars): {question[:50]}...")
18
- fixed_answer = "This is a default answer."
19
  print(f"Agent returning fixed answer: {fixed_answer}")
20
  return fixed_answer
21
 
22
- def run_and_submit_all( profile: gr.OAuthProfile | None):
 
23
  """
24
  Fetches all questions, runs the BasicAgent on them, submits all answers,
25
  and displays the results.
26
  """
27
  # --- Determine HF Space Runtime URL and Repo URL ---
28
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
29
 
30
  if profile:
31
- username= f"{profile.username}"
32
  print(f"User logged in: {username}")
33
  else:
34
  print("User not logged in.")
@@ -37,6 +51,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
37
  api_url = DEFAULT_API_URL
38
  questions_url = f"{api_url}/questions"
39
  submit_url = f"{api_url}/submit"
 
40
 
41
  # 1. Instantiate Agent ( modify this part to create your agent)
42
  try:
@@ -55,16 +70,16 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
55
  response.raise_for_status()
56
  questions_data = response.json()
57
  if not questions_data:
58
- print("Fetched questions list is empty.")
59
- return "Fetched questions list is empty or invalid format.", None
60
  print(f"Fetched {len(questions_data)} questions.")
61
  except requests.exceptions.RequestException as e:
62
  print(f"Error fetching questions: {e}")
63
  return f"Error fetching questions: {e}", None
64
  except requests.exceptions.JSONDecodeError as e:
65
- print(f"Error decoding JSON response from questions endpoint: {e}")
66
- print(f"Response text: {response.text[:500]}")
67
- return f"Error decoding server response for questions: {e}", None
68
  except Exception as e:
69
  print(f"An unexpected error occurred fetching questions: {e}")
70
  return f"An unexpected error occurred fetching questions: {e}", None
@@ -76,23 +91,46 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
76
  for item in questions_data:
77
  task_id = item.get("task_id")
78
  question_text = item.get("question")
 
 
 
 
 
79
  if not task_id or question_text is None:
80
  print(f"Skipping item with missing task_id or question: {item}")
81
  continue
82
  try:
83
  submitted_answer = agent(question_text)
84
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
85
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
 
 
 
 
86
  except Exception as e:
87
- print(f"Error running agent on task {task_id}: {e}")
88
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
 
 
89
 
90
  if not answers_payload:
91
  print("Agent did not produce any answers to submit.")
92
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
93
 
94
- # 4. Prepare Submission
95
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
 
 
 
 
96
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
97
  print(status_update)
98
 
@@ -162,20 +200,19 @@ with gr.Blocks() as demo:
162
 
163
  run_button = gr.Button("Run Evaluation & Submit All Answers")
164
 
165
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
 
 
166
  # Removed max_rows=10 from DataFrame constructor
167
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
168
 
169
- run_button.click(
170
- fn=run_and_submit_all,
171
- outputs=[status_output, results_table]
172
- )
173
 
174
  if __name__ == "__main__":
175
- print("\n" + "-"*30 + " App Starting " + "-"*30)
176
  # Check for SPACE_HOST and SPACE_ID at startup for information
177
  space_host_startup = os.getenv("SPACE_HOST")
178
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
179
 
180
  if space_host_startup:
181
  print(f"✅ SPACE_HOST found: {space_host_startup}")
@@ -183,14 +220,18 @@ if __name__ == "__main__":
183
  else:
184
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
185
 
186
- if space_id_startup: # Print repo URLs if SPACE_ID is found
187
  print(f"✅ SPACE_ID found: {space_id_startup}")
188
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
189
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
 
 
190
  else:
191
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
 
 
192
 
193
- print("-"*(60 + len(" App Starting ")) + "\n")
194
 
195
  print("Launching Gradio Interface for Basic Agent Evaluation...")
196
- demo.launch(debug=True, share=False)
 
3
  import requests
4
  import inspect
5
  import pandas as pd
6
+ from smolagents import CodeAgent
7
+ from helpers import download_file_from_url
8
+ from llm import model
9
+ from prompts import SYSTEM_PROMPT
10
+ from tools import agent_tools
11
+ from dotenv import load_dotenv
12
+
13
+ load_dotenv()
14
 
15
  # (Keep Constants as is)
16
  # --- Constants ---
17
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
18
 
19
+
20
  # --- Basic Agent Definition ---
21
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
22
  class BasicAgent:
23
  def __init__(self):
24
  print("BasicAgent initialized.")
25
+ agent = CodeAgent(model=model, tools=agent_tools, planning_interval=3)
26
+ self.agent = agent
27
+ self.agent.system_prompt = SYSTEM_PROMPT + "\n" + self.agent.system_prompt
28
+
29
  def __call__(self, question: str) -> str:
30
  print(f"Agent received question (first 50 chars): {question[:50]}...")
31
+ fixed_answer = self.agent.run(question)
32
  print(f"Agent returning fixed answer: {fixed_answer}")
33
  return fixed_answer
34
 
35
+
36
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
37
  """
38
  Fetches all questions, runs the BasicAgent on them, submits all answers,
39
  and displays the results.
40
  """
41
  # --- Determine HF Space Runtime URL and Repo URL ---
42
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
43
 
44
  if profile:
45
+ username = f"{profile.username}"
46
  print(f"User logged in: {username}")
47
  else:
48
  print("User not logged in.")
 
51
  api_url = DEFAULT_API_URL
52
  questions_url = f"{api_url}/questions"
53
  submit_url = f"{api_url}/submit"
54
+ file_download_url = f"{api_url}/files"
55
 
56
  # 1. Instantiate Agent ( modify this part to create your agent)
57
  try:
 
70
  response.raise_for_status()
71
  questions_data = response.json()
72
  if not questions_data:
73
+ print("Fetched questions list is empty.")
74
+ return "Fetched questions list is empty or invalid format.", None
75
  print(f"Fetched {len(questions_data)} questions.")
76
  except requests.exceptions.RequestException as e:
77
  print(f"Error fetching questions: {e}")
78
  return f"Error fetching questions: {e}", None
79
  except requests.exceptions.JSONDecodeError as e:
80
+ print(f"Error decoding JSON response from questions endpoint: {e}")
81
+ print(f"Response text: {response.text[:500]}")
82
+ return f"Error decoding server response for questions: {e}", None
83
  except Exception as e:
84
  print(f"An unexpected error occurred fetching questions: {e}")
85
  return f"An unexpected error occurred fetching questions: {e}", None
 
91
  for item in questions_data:
92
  task_id = item.get("task_id")
93
  question_text = item.get("question")
94
+ file_name = item.get("file_name")
95
+ if file_name:
96
+ file_url = f"{file_download_url}/{task_id}"
97
+ file_path = download_file_from_url(file_url, file_name)
98
+ question_text = f"{question_text} (File: {file_path})"
99
  if not task_id or question_text is None:
100
  print(f"Skipping item with missing task_id or question: {item}")
101
  continue
102
  try:
103
  submitted_answer = agent(question_text)
104
+ answers_payload.append(
105
+ {"task_id": task_id, "submitted_answer": submitted_answer}
106
+ )
107
+ results_log.append(
108
+ {
109
+ "Task ID": task_id,
110
+ "Question": question_text,
111
+ "Submitted Answer": submitted_answer,
112
+ }
113
+ )
114
  except Exception as e:
115
+ print(f"Error running agent on task {task_id}: {e}")
116
+ results_log.append(
117
+ {
118
+ "Task ID": task_id,
119
+ "Question": question_text,
120
+ "Submitted Answer": f"AGENT ERROR: {e}",
121
+ }
122
+ )
123
 
124
  if not answers_payload:
125
  print("Agent did not produce any answers to submit.")
126
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
127
 
128
+ # 4. Prepare Submission
129
+ submission_data = {
130
+ "username": username.strip(),
131
+ "agent_code": agent_code,
132
+ "answers": answers_payload,
133
+ }
134
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
135
  print(status_update)
136
 
 
200
 
201
  run_button = gr.Button("Run Evaluation & Submit All Answers")
202
 
203
+ status_output = gr.Textbox(
204
+ label="Run Status / Submission Result", lines=5, interactive=False
205
+ )
206
  # Removed max_rows=10 from DataFrame constructor
207
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
208
 
209
+ run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 
 
 
210
 
211
  if __name__ == "__main__":
212
+ print("\n" + "-" * 30 + " App Starting " + "-" * 30)
213
  # Check for SPACE_HOST and SPACE_ID at startup for information
214
  space_host_startup = os.getenv("SPACE_HOST")
215
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
216
 
217
  if space_host_startup:
218
  print(f"✅ SPACE_HOST found: {space_host_startup}")
 
220
  else:
221
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
222
 
223
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
224
  print(f"✅ SPACE_ID found: {space_id_startup}")
225
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
226
+ print(
227
+ f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main"
228
+ )
229
  else:
230
+ print(
231
+ "ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined."
232
+ )
233
 
234
+ print("-" * (60 + len(" App Starting ")) + "\n")
235
 
236
  print("Launching Gradio Interface for Basic Agent Evaluation...")
237
+ demo.launch(debug=True, share=False)
helpers.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import os
3
+ from litellm import completion, create_file
4
+ import requests
5
+ from dotenv import load_dotenv
6
+
7
+ load_dotenv()
8
+
9
+ DEFAULT_MODEL = os.getenv("GEMINI_MODEL")
10
+
11
+
12
+ def analyze_file_with_gemini(file_path: str, file_name: str) -> str:
13
+ # 1. Read file and encode in base64
14
+ try:
15
+ with open(file_path, "rb") as f:
16
+ content = f.read()
17
+ mime_type = _get_mime_type(file_path)
18
+ base64_data = base64.b64encode(content).decode("utf-8")
19
+ except Exception as e:
20
+ return f"Error reading file: {e}"
21
+
22
+ file = create_file(
23
+ file=base64_data,
24
+ purpose="user_data",
25
+ extra_body={"custom_llm_provider": "gemini"},
26
+ api_key=os.getenv("GEMINI_API_KEY"),
27
+ )
28
+ # 2. Construct Gemini-style multimodal input
29
+ prompt = (
30
+ f"Analyze the following {mime_type} file and provide a detailed report. "
31
+ "The file is encoded in base64 format. "
32
+ "Please include any relevant information or insights."
33
+ )
34
+
35
+ try:
36
+ response = completion(
37
+ model=DEFAULT_MODEL,
38
+ messages=[
39
+ {
40
+ "role": "user",
41
+ "content": [
42
+ {"type": "text", "text": prompt},
43
+ {
44
+ "type": "file",
45
+ "file": {
46
+ "file_id": file.id,
47
+ "filename": file_name,
48
+ "format": "audio/wav",
49
+ },
50
+ },
51
+ ],
52
+ },
53
+ ],
54
+ )
55
+
56
+ return response.choices[0].message
57
+ except Exception as e:
58
+ return f"Error from Gemini: {e}"
59
+
60
+
61
+ def _get_mime_type(file_path: str) -> str:
62
+ if file_path.endswith(".png"):
63
+ return "image/png"
64
+ elif file_path.endswith(".jpg") or file_path.endswith(".jpeg"):
65
+ return "image/jpeg"
66
+ elif file_path.endswith(".mp3"):
67
+ return "audio/mpeg"
68
+ else:
69
+ raise ValueError(
70
+ "Unsupported file type: only .png, .jpg, .jpeg, .mp3 are supported"
71
+ )
72
+
73
+
74
+ def download_file_from_url(url: str, save_dir: str = "./downloads") -> str:
75
+ """
76
+ Downloads a file from a public URL and saves it locally.
77
+
78
+ Args:
79
+ url (str): The direct URL to the file (must not be a blob: URL).
80
+ save_dir (str): Directory to save the downloaded file (default: ./downloads).
81
+
82
+ Returns:
83
+ str: Full path to the downloaded file.
84
+ """
85
+ try:
86
+ os.makedirs(save_dir, exist_ok=True)
87
+
88
+ # Get file name from the URL or fallback
89
+ local_filename = url.split("/")[-1] or "downloaded_file"
90
+ file_path = os.path.join(save_dir, local_filename)
91
+
92
+ # Perform streaming download
93
+ with requests.get(url, stream=True) as r:
94
+ r.raise_for_status()
95
+ with open(file_path, "wb") as f:
96
+ for chunk in r.iter_content(chunk_size=8192):
97
+ f.write(chunk)
98
+
99
+ return file_path
100
+ except Exception as e:
101
+ raise RuntimeError(f"Failed to download file from {url}: {e}")
llm.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # llm.py
2
+ import os
3
+ import litellm
4
+ from smolagents import LiteLLMModel
5
+ from dotenv import load_dotenv
6
+
7
+ load_dotenv()
8
+ # Set default model
9
+ DEFAULT_MODEL = os.getenv("GEMINI_MODEL")
10
+
11
+
12
+ def chat_with_llm(messages, model=DEFAULT_MODEL):
13
+ """
14
+ messages: list of {"role": "user"/"system"/"assistant", "content": "..."}
15
+ model: model string (e.g., "gemini-pro" or "gpt-3.5-turbo")
16
+ """
17
+ try:
18
+ response = litellm.completion(
19
+ model=model, messages=messages, api_key=os.getenv("GEMINI_API_KEY")
20
+ )
21
+ return response["choices"][0]["message"]["content"]
22
+ except Exception as e:
23
+ return f"[LLM Error] {e}"
24
+
25
+
26
+ def ask_llm(prompt: str, model=DEFAULT_MODEL):
27
+ """
28
+ Simpler wrapper for single-turn prompts
29
+ """
30
+ return chat_with_llm([{"role": "user", "content": prompt}], model=model)
31
+
32
+
33
+ model = LiteLLMModel(
34
+ model_id=DEFAULT_MODEL,
35
+ api_key=os.getenv("GEMINI_API_KEY"),
36
+ max_tokens=8192,
37
+ )
prompts.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ SYSTEM_PROMPT = """
2
+ You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
3
+ """
requirements.txt CHANGED
@@ -1,2 +1,8 @@
1
  gradio
2
- requests
 
 
 
 
 
 
 
1
  gradio
2
+ requests
3
+ smolagents
4
+ smolagents[litellm]
5
+ pytesseract
6
+ pillow
7
+ pytube
8
+ python-dotenv
tools.py ADDED
@@ -0,0 +1,515 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import os
3
+ from smolagents import Tool
4
+ import math
5
+ import datetime
6
+ from PIL import Image
7
+ import pandas as pd
8
+ import litellm
9
+ from prompts import SYSTEM_PROMPT
10
+ from pytube import YouTube
11
+ from PIL import Image
12
+ import pytesseract
13
+ from smolagents import DuckDuckGoSearchTool
14
+ from dotenv import load_dotenv
15
+
16
+ load_dotenv()
17
+
18
+
19
+ class GeminiFileAnalyzerTool(Tool):
20
+ name = "gemini_file_analyzer"
21
+ description = "Analyze an image or audio file using Gemini via LiteLLM. Supports jpg, png, and mp3."
22
+ inputs = {
23
+ "file_path": {"type": "string", "description": "Path to image/audio file"},
24
+ "file_name": {
25
+ "type": "string",
26
+ "description": "Name of the file (e.g., photo.jpg, audio.mp3)",
27
+ },
28
+ }
29
+ output_type = "string"
30
+
31
+ def forward(self, file_path: str, file_name: str):
32
+ try:
33
+ with open(file_path, "rb") as f:
34
+ content = f.read()
35
+ mime_type = self._get_mime_type(file_path)
36
+ base64_data = base64.b64encode(content).decode("utf-8")
37
+ except Exception as e:
38
+ return f"Error reading file: {e}"
39
+
40
+ try:
41
+ file = litellm.create_file(
42
+ file=base64_data,
43
+ purpose="user_data",
44
+ extra_body={"custom_llm_provider": "gemini"},
45
+ api_key=os.getenv("GEMINI_API_KEY"),
46
+ )
47
+ except Exception as e:
48
+ return f"Error uploading file: {e}"
49
+
50
+ prompt = (
51
+ f"Analyze the following {mime_type} file and provide a detailed report. "
52
+ "The file is encoded in base64 format. "
53
+ "Please include any relevant information or insights."
54
+ )
55
+
56
+ try:
57
+ response = litellm.completion(
58
+ model=os.getenv("GEMINI_MODEL", "gemini-pro-vision"),
59
+ messages=[
60
+ {
61
+ "role": "user",
62
+ "content": [
63
+ {"type": "text", "text": prompt},
64
+ {
65
+ "type": "file",
66
+ "file": {
67
+ "file_id": file.id,
68
+ "filename": file_name,
69
+ "format": mime_type.split("/")[-1], # e.g., "mp3"
70
+ },
71
+ },
72
+ ],
73
+ },
74
+ ],
75
+ )
76
+ return response["choices"][0]["message"]["content"]
77
+ except Exception as e:
78
+ return f"Error from Gemini: {e}"
79
+
80
+ def _get_mime_type(self, file_path: str) -> str:
81
+ if file_path.endswith(".png"):
82
+ return "image/png"
83
+ elif file_path.endswith(".jpg") or file_path.endswith(".jpeg"):
84
+ return "image/jpeg"
85
+ elif file_path.endswith(".mp3"):
86
+ return "audio/mpeg"
87
+ else:
88
+ raise ValueError(
89
+ "Unsupported file type: only .png, .jpg, .jpeg, .mp3 are supported"
90
+ )
91
+
92
+
93
+ class ImageTextExtractorTool(Tool):
94
+ name = "image_text_extractor"
95
+ description = "Extract text from an image using OCR."
96
+ inputs = {
97
+ "image_path": {
98
+ "type": "string",
99
+ "description": "Path to the image file (jpg, png, etc.)",
100
+ }
101
+ }
102
+ output_type = "string"
103
+
104
+ def forward(self, image_path: str):
105
+ try:
106
+ image = Image.open(image_path)
107
+ text = pytesseract.image_to_string(image)
108
+ return text.strip() or "No text found in image."
109
+ except Exception as e:
110
+ return f"Error extracting text: {e}"
111
+
112
+
113
+ class TableInspectorTool(Tool):
114
+ name = "table_inspector"
115
+ description = "Load a CSV or Excel file and return table info and summary stats in Markdown format."
116
+ inputs = {
117
+ "file_path": {
118
+ "type": "string",
119
+ "description": "Path to CSV or Excel file (.csv, .xls, .xlsx)",
120
+ }
121
+ }
122
+ output_type = "string"
123
+
124
+ def forward(self, file_path: str):
125
+ try:
126
+ if file_path.endswith(".csv"):
127
+ df = pd.read_csv(file_path)
128
+ elif file_path.endswith(".xls") or file_path.endswith(".xlsx"):
129
+ df = pd.read_excel(file_path)
130
+ else:
131
+ return "Unsupported file type. Only CSV and Excel (.xls/.xlsx) are supported."
132
+
133
+ # Get basic info
134
+ n_rows, n_cols = df.shape
135
+ headers = list(df.columns)
136
+ summary = (
137
+ df.describe(include="all", datetime_is_numeric=True)
138
+ .fillna("")
139
+ .astype(str)
140
+ )
141
+
142
+ # Markdown output
143
+ md = f"### File loaded: **{file_path}**\n"
144
+ md += f"- Rows: **{n_rows}**\n"
145
+ md += f"- Columns: **{n_cols}**\n"
146
+ md += f"- Column Headers:\n"
147
+ for col in headers:
148
+ md += f" - `{col}`\n"
149
+
150
+ md += "\n### Summary Statistics (markdown table):\n\n"
151
+ md += summary.to_markdown()
152
+
153
+ return md
154
+
155
+ except Exception as e:
156
+ return f"Error loading file: {str(e)}"
157
+
158
+
159
+ class YouTubeVideoAnalyzerTool(Tool):
160
+ name = "youtube_video_analyzer"
161
+ description = "Given a YouTube URL, extracts metadata and comments, then analyzes it for summary, highlights, and visuals."
162
+ inputs = {
163
+ "url": {"type": "string", "description": "Full YouTube video URL"},
164
+ "user_prompt": {
165
+ "type": "string",
166
+ "description": "What you want to analyze from the video content",
167
+ },
168
+ }
169
+ output_type = "string"
170
+
171
+ def forward(self, url: str, user_prompt: str):
172
+ try:
173
+ yt = YouTube(url)
174
+ title = yt.title
175
+ description = yt.description
176
+ comments = yt.comments[:5] if yt.comments else []
177
+
178
+ comment_text = (
179
+ "\n".join([f"- {c}" for c in comments])
180
+ if comments
181
+ else "No comments found."
182
+ )
183
+
184
+ system_prompt = f"""You are an AI video analyzer. A user wants to analyze the following YouTube video.
185
+
186
+ ### Title
187
+ {title}
188
+
189
+ ### Description
190
+ {description or 'No description.'}
191
+
192
+ ### Top Comments
193
+ {comment_text}
194
+
195
+ ### User Request
196
+ {user_prompt}
197
+
198
+ ### Instructions:
199
+ - Identify the main topic of the video.
200
+ - List any unique characteristics or production traits.
201
+ - Mention key highlights or scenes if they are implied.
202
+ - Give an overall summary based on description and social sentiment.
203
+
204
+ Respond in structured markdown.
205
+ """
206
+
207
+ response = litellm.completion(
208
+ api_key=os.getenv("GEMINI_API_KEY"),
209
+ model=os.getenv("GEMINI_MODEL"),
210
+ messages=[
211
+ {"role": "system", "content": SYSTEM_PROMPT},
212
+ {"role": "user", "content": system_prompt},
213
+ ],
214
+ )
215
+
216
+ return response["choices"][0]["message"]["content"]
217
+
218
+ except Exception as e:
219
+ return f"Error analyzing video: {e}"
220
+
221
+
222
+ # --- Math Tools ---
223
+ class CalculatorTool(Tool):
224
+ name = "calculator"
225
+ description = (
226
+ "Evaluate a basic mathematical expression (supports +, -, *, /, **, %, etc.)."
227
+ )
228
+ inputs = {
229
+ "expression": {
230
+ "type": "string",
231
+ "description": "A mathematical expression to evaluate",
232
+ }
233
+ }
234
+ output_type = "number"
235
+
236
+ def forward(self, expression: str):
237
+ # Safely evaluate the expression using ast
238
+ import ast, operator
239
+
240
+ # Allowed node types
241
+ allowed_nodes = {
242
+ ast.Expression,
243
+ ast.BinOp,
244
+ ast.UnaryOp,
245
+ ast.Num,
246
+ ast.Constant,
247
+ ast.Add,
248
+ ast.Sub,
249
+ ast.Mult,
250
+ ast.Div,
251
+ ast.Pow,
252
+ ast.Mod,
253
+ ast.USub,
254
+ ast.UAdd,
255
+ }
256
+ node = ast.parse(expression, mode="eval")
257
+ for subnode in ast.walk(node):
258
+ if type(subnode) not in allowed_nodes:
259
+ raise ValueError(f"Unsafe or unsupported expression: {expression}")
260
+ return eval(compile(node, "<string>", "eval"))
261
+
262
+
263
+ # Optionally, separate basic operations could be defined (e.g., add, subtract).
264
+ class AddTool(Tool):
265
+ name = "add"
266
+ description = "Add two numbers together."
267
+ inputs = {
268
+ "a": {"type": "number", "description": "First number"},
269
+ "b": {"type": "number", "description": "Second number"},
270
+ }
271
+ output_type = "number"
272
+
273
+ def forward(self, a: float, b: float):
274
+ return a + b
275
+
276
+
277
+ class MultiplyTool(Tool):
278
+ name = "multiply"
279
+ description = "Multiply two numbers."
280
+ inputs = {
281
+ "a": {"type": "number", "description": "First number"},
282
+ "b": {"type": "number", "description": "Second number"},
283
+ }
284
+ output_type = "number"
285
+
286
+ def forward(self, a: float, b: float):
287
+ return a * b
288
+
289
+
290
+ # --- Date/Time Tools ---
291
+ class DayOfWeekTool(Tool):
292
+ name = "day_of_week"
293
+ description = "Return the day of week for a given date (YYYY-MM-DD)."
294
+ inputs = {"date": {"type": "string", "description": "Date in format YYYY-MM-DD"}}
295
+ output_type = "string"
296
+
297
+ def forward(self, date: str):
298
+ year, month, day = map(int, date.split("-"))
299
+ dow = datetime.date(year, month, day).strftime("%A")
300
+ return dow
301
+
302
+
303
+ class AddDaysTool(Tool):
304
+ name = "add_days"
305
+ description = "Add a number of days to a date (YYYY-MM-DD)."
306
+ inputs = {
307
+ "date": {"type": "string", "description": "Start date (YYYY-MM-DD)"},
308
+ "days": {"type": "integer", "description": "Number of days to add"},
309
+ }
310
+ output_type = "string"
311
+
312
+ def forward(self, date: str, days: int):
313
+ year, month, day = map(int, date.split("-"))
314
+ new_date = datetime.date(year, month, day) + datetime.timedelta(days=days)
315
+ return new_date.isoformat()
316
+
317
+
318
+ class DateDiffTool(Tool):
319
+ name = "date_diff"
320
+ description = "Compute difference in days between two dates (YYYY-MM-DD)."
321
+ inputs = {
322
+ "start_date": {"type": "string", "description": "First date (YYYY-MM-DD)"},
323
+ "end_date": {"type": "string", "description": "Second date (YYYY-MM-DD)"},
324
+ }
325
+ output_type = "integer"
326
+
327
+ def forward(self, start_date: str, end_date: str):
328
+ y1, m1, d1 = map(int, start_date.split("-"))
329
+ y2, m2, d2 = map(int, end_date.split("-"))
330
+ d0 = datetime.date(y1, m1, d1)
331
+ d1 = datetime.date(y2, m2, d2)
332
+ return abs((d1 - d0).days)
333
+
334
+
335
+ # --- Unit Conversion Tools ---
336
+ class TempConvertTool(Tool):
337
+ name = "convert_temperature"
338
+ description = "Convert temperature between Celsius and Fahrenheit."
339
+ inputs = {
340
+ "value": {"type": "number", "description": "Temperature value to convert"},
341
+ "from_unit": {"type": "string", "description": "Unit of input ('C' or 'F')"},
342
+ }
343
+ output_type = "number"
344
+
345
+ def forward(self, value: float, from_unit: str):
346
+ unit = from_unit.strip().upper()
347
+ if unit == "C":
348
+ # Celsius to Fahrenheit
349
+ return value * 9 / 5 + 32
350
+ elif unit == "F":
351
+ # Fahrenheit to Celsius
352
+ return (value - 32) * 5 / 9
353
+ else:
354
+ raise ValueError("Unit must be 'C' or 'F'.")
355
+
356
+
357
+ class LengthConvertTool(Tool):
358
+ name = "convert_length"
359
+ description = "Convert length between kilometers, miles, meters, and feet."
360
+ inputs = {
361
+ "value": {"type": "number", "description": "Length value to convert"},
362
+ "from_unit": {
363
+ "type": "string",
364
+ "description": "Original unit ('km','mi','m','ft')",
365
+ },
366
+ "to_unit": {
367
+ "type": "string",
368
+ "description": "Target unit ('km','mi','m','ft')",
369
+ },
370
+ }
371
+ output_type = "number"
372
+
373
+ def forward(self, value: float, from_unit: str, to_unit: str):
374
+ u1 = from_unit.lower()
375
+ u2 = to_unit.lower()
376
+ # Convert input to meters first
377
+ if u1 == "km":
378
+ meters = value * 1000
379
+ elif u1 == "m":
380
+ meters = value
381
+ elif u1 == "mi":
382
+ meters = value * 1609.34
383
+ elif u1 == "ft":
384
+ meters = value * 0.3048
385
+ else:
386
+ raise ValueError("Unsupported from_unit")
387
+ # Convert meters to target unit
388
+ if u2 == "km":
389
+ return meters / 1000
390
+ if u2 == "m":
391
+ return meters
392
+ if u2 == "mi":
393
+ return meters / 1609.34
394
+ if u2 == "ft":
395
+ return meters / 0.3048
396
+ raise ValueError("Unsupported to_unit")
397
+
398
+
399
+ # --- Text Tools ---
400
+ class WordCountTool(Tool):
401
+ name = "word_count"
402
+ description = "Count the number of words in a text string."
403
+ inputs = {"text": {"type": "string", "description": "Input text"}}
404
+ output_type = "integer"
405
+
406
+ def forward(self, text: str):
407
+ return len(text.split())
408
+
409
+
410
+ class FindTextTool(Tool):
411
+ name = "find_text"
412
+ description = "Find occurrences of a substring in a text; returns count."
413
+ inputs = {
414
+ "text": {"type": "string", "description": "Text to search in"},
415
+ "query": {"type": "string", "description": "Substring to search for"},
416
+ }
417
+ output_type = "integer"
418
+
419
+ def forward(self, text: str, query: str):
420
+ return text.count(query)
421
+
422
+
423
+ # --- List/Sequence Tools ---
424
+ class SortListTool(Tool):
425
+ name = "sort_list"
426
+ description = "Sort a list of items (numbers or strings)."
427
+ inputs = {"items": {"type": "array", "description": "List of items to sort"}}
428
+ output_type = "array"
429
+
430
+ def forward(self, items):
431
+ return sorted(items)
432
+
433
+
434
+ class UniqueListTool(Tool):
435
+ name = "unique_list"
436
+ description = "Return a list with duplicate items removed (preserving order)."
437
+ inputs = {"items": {"type": "array", "description": "List of items"}}
438
+ output_type = "array"
439
+
440
+ def forward(self, items):
441
+ seen = []
442
+ for x in items:
443
+ if x not in seen:
444
+ seen.append(x)
445
+ return seen
446
+
447
+
448
+ # --- File I/O Tools ---
449
+ class ReadFileTool(Tool):
450
+ name = "read_file"
451
+ description = "Read and return the contents of a text file."
452
+ inputs = {"file_path": {"type": "string", "description": "Path to a text file"}}
453
+ output_type = "string"
454
+
455
+ def forward(self, file_path: str):
456
+ try:
457
+ with open(file_path, "r") as f:
458
+ return f.read()
459
+ except FileNotFoundError:
460
+ return f"Error: File not found: {file_path}"
461
+
462
+
463
+ class WriteFileTool(Tool):
464
+ name = "write_file"
465
+ description = "Write a string to a text file (overwrites if exists)."
466
+ inputs = {
467
+ "file_path": {"type": "string", "description": "Path to write the file"},
468
+ "content": {"type": "string", "description": "Content to write"},
469
+ }
470
+ output_type = "string"
471
+
472
+ def forward(self, file_path: str, content: str):
473
+ with open(file_path, "w") as f:
474
+ f.write(content)
475
+ return f"Wrote to {file_path}"
476
+
477
+
478
+ # --- Image Tool (stub) ---
479
+ class ImageInfoTool(Tool):
480
+ name = "image_info"
481
+ description = "Load an image and report basic info (size and mode)."
482
+ inputs = {"image_path": {"type": "string", "description": "Path to an image file"}}
483
+ output_type = "string"
484
+
485
+ def forward(self, image_path: str):
486
+ try:
487
+ img = Image.open(image_path)
488
+ return f"Image {image_path}: size={img.size}, mode={img.mode}"
489
+ except Exception as e:
490
+ return f"Error loading image: {e}"
491
+
492
+
493
+ # List of all available tools
494
+ agent_tools = [
495
+ GeminiFileAnalyzerTool(),
496
+ ImageTextExtractorTool(),
497
+ TableInspectorTool(),
498
+ YouTubeVideoAnalyzerTool(),
499
+ CalculatorTool(),
500
+ AddTool(),
501
+ MultiplyTool(),
502
+ DayOfWeekTool(),
503
+ AddDaysTool(),
504
+ DateDiffTool(),
505
+ TempConvertTool(),
506
+ LengthConvertTool(),
507
+ WordCountTool(),
508
+ FindTextTool(),
509
+ SortListTool(),
510
+ UniqueListTool(),
511
+ ReadFileTool(),
512
+ WriteFileTool(),
513
+ ImageInfoTool(),
514
+ DuckDuckGoSearchTool(),
515
+ ]