mdicio commited on
Commit
1aa70af
·
1 Parent(s): 263a013
Files changed (7) hide show
  1. .gitignore +9 -0
  2. agent.py +16 -11
  3. app.py +3 -0
  4. app_template.py +53 -28
  5. realreq.txt +12 -0
  6. requirements.txt +11 -23
  7. tools.py +18 -24
.gitignore ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ .env
2
+ ragdata/
3
+ chroma_store
4
+ .python-version
5
+ downloads/
6
+ .python_version
7
+ *.jsonl
8
+ *__pycache__/
9
+ *.log
agent.py CHANGED
@@ -1,5 +1,7 @@
1
  import os
 
2
  from dotenv import load_dotenv
 
3
  load_dotenv()
4
 
5
  # Import models from SmolaAgents
@@ -20,7 +22,7 @@ from tools import (
20
  TranscribeAudioTool,
21
  VisitWebpageTool,
22
  WikipediaSearchTool,
23
- image_question_answering
24
  )
25
 
26
  # Import utility functions
@@ -69,11 +71,13 @@ class BoomBot:
69
  )
70
  elif self.provider == "anthropic":
71
  model_id = "anthropic/claude-3-5-haiku-latest"
72
- return LiteLLMModel(model_id=model_id,
73
- temperature=0.6,
74
- max_tokens=8192,
75
- api_key=os.getenv("ANTHROPIC_API_KEY"))
76
-
 
 
77
  elif self.provider == "deepinfra":
78
  deepinfra_model = "Qwen/Qwen3-235B-A22B"
79
  return OpenAIServerModel(
@@ -277,7 +281,7 @@ class BoomBot:
277
  )
278
 
279
  # Run the agent with the given question
280
- result = self.agent.generate_response(question)
281
 
282
  # Extract the final answer from the result
283
  final_answer = extract_final_answer(result)
@@ -286,7 +290,8 @@ class BoomBot:
286
 
287
 
288
  # Example of how to use this code (commented out)
289
- # if __name__ == "__main__":
290
- # agent = BasicAgent()
291
- # response = agent("What is the current population of Tokyo?", "population_query", True)
292
- # print(f"Response: {response}")
 
 
1
  import os
2
+
3
  from dotenv import load_dotenv
4
+
5
  load_dotenv()
6
 
7
  # Import models from SmolaAgents
 
22
  TranscribeAudioTool,
23
  VisitWebpageTool,
24
  WikipediaSearchTool,
25
+ image_question_answering,
26
  )
27
 
28
  # Import utility functions
 
71
  )
72
  elif self.provider == "anthropic":
73
  model_id = "anthropic/claude-3-5-haiku-latest"
74
+ return LiteLLMModel(
75
+ model_id=model_id,
76
+ temperature=0.6,
77
+ max_tokens=8192,
78
+ api_key=os.getenv("ANTHROPIC_API_KEY"),
79
+ )
80
+
81
  elif self.provider == "deepinfra":
82
  deepinfra_model = "Qwen/Qwen3-235B-A22B"
83
  return OpenAIServerModel(
 
281
  )
282
 
283
  # Run the agent with the given question
284
+ result = self.agent.run(question)
285
 
286
  # Extract the final answer from the result
287
  final_answer = extract_final_answer(result)
 
290
 
291
 
292
  # Example of how to use this code (commented out)
293
+ if __name__ == "__main__":
294
+ agent = BoomBot(provider="gemma")
295
+ question = "In the year 2020, where were koi fish found in the watershed with the id 02040203? Give only the name of the pond, lake, or stream where the fish were found, and not the name of the city or county."
296
+ response = agent.run(question=question, task_id="1", to_download=False)
297
+ print(f"Response: {response}")
app.py CHANGED
@@ -12,7 +12,10 @@ from agent import BoomBot
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
 
14
  from dotenv import load_dotenv
 
15
  load_dotenv()
 
 
16
  # --- Basic Agent Definition --
17
  class BasicAgent:
18
  def __init__(self):
 
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
 
14
  from dotenv import load_dotenv
15
+
16
  load_dotenv()
17
+
18
+
19
  # --- Basic Agent Definition --
20
  class BasicAgent:
21
  def __init__(self):
app_template.py CHANGED
@@ -1,34 +1,38 @@
 
1
  import os
 
2
  import gradio as gr
3
- import requests
4
- import inspect
5
  import pandas as pd
 
6
 
7
  # (Keep Constants as is)
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
 
11
  # --- Basic Agent Definition ---
12
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
  class BasicAgent:
14
  def __init__(self):
15
  print("BasicAgent initialized.")
 
16
  def __call__(self, question: str) -> str:
17
  print(f"Agent received question (first 50 chars): {question[:50]}...")
18
  fixed_answer = "This is a default answer."
19
  print(f"Agent returning fixed answer: {fixed_answer}")
20
  return fixed_answer
21
 
22
- def run_and_submit_all( profile: gr.OAuthProfile | None):
 
23
  """
24
  Fetches all questions, runs the BasicAgent on them, submits all answers,
25
  and displays the results.
26
  """
27
  # --- Determine HF Space Runtime URL and Repo URL ---
28
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
29
 
30
  if profile:
31
- username= f"{profile.username}"
32
  print(f"User logged in: {username}")
33
  else:
34
  print("User not logged in.")
@@ -55,16 +59,16 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
55
  response.raise_for_status()
56
  questions_data = response.json()
57
  if not questions_data:
58
- print("Fetched questions list is empty.")
59
- return "Fetched questions list is empty or invalid format.", None
60
  print(f"Fetched {len(questions_data)} questions.")
61
  except requests.exceptions.RequestException as e:
62
  print(f"Error fetching questions: {e}")
63
  return f"Error fetching questions: {e}", None
64
  except requests.exceptions.JSONDecodeError as e:
65
- print(f"Error decoding JSON response from questions endpoint: {e}")
66
- print(f"Response text: {response.text[:500]}")
67
- return f"Error decoding server response for questions: {e}", None
68
  except Exception as e:
69
  print(f"An unexpected error occurred fetching questions: {e}")
70
  return f"An unexpected error occurred fetching questions: {e}", None
@@ -81,18 +85,36 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
81
  continue
82
  try:
83
  submitted_answer = agent(question_text)
84
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
85
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
 
 
 
 
86
  except Exception as e:
87
- print(f"Error running agent on task {task_id}: {e}")
88
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
 
 
89
 
90
  if not answers_payload:
91
  print("Agent did not produce any answers to submit.")
92
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
93
 
94
- # 4. Prepare Submission
95
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
 
 
 
 
96
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
97
  print(status_update)
98
 
@@ -162,20 +184,19 @@ with gr.Blocks() as demo:
162
 
163
  run_button = gr.Button("Run Evaluation & Submit All Answers")
164
 
165
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
 
 
166
  # Removed max_rows=10 from DataFrame constructor
167
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
168
 
169
- run_button.click(
170
- fn=run_and_submit_all,
171
- outputs=[status_output, results_table]
172
- )
173
 
174
  if __name__ == "__main__":
175
- print("\n" + "-"*30 + " App Starting " + "-"*30)
176
  # Check for SPACE_HOST and SPACE_ID at startup for information
177
  space_host_startup = os.getenv("SPACE_HOST")
178
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
179
 
180
  if space_host_startup:
181
  print(f"✅ SPACE_HOST found: {space_host_startup}")
@@ -183,14 +204,18 @@ if __name__ == "__main__":
183
  else:
184
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
185
 
186
- if space_id_startup: # Print repo URLs if SPACE_ID is found
187
  print(f"✅ SPACE_ID found: {space_id_startup}")
188
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
189
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
 
 
190
  else:
191
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
 
 
192
 
193
- print("-"*(60 + len(" App Starting ")) + "\n")
194
 
195
  print("Launching Gradio Interface for Basic Agent Evaluation...")
196
- demo.launch(debug=True, share=False)
 
1
+ import inspect
2
  import os
3
+
4
  import gradio as gr
 
 
5
  import pandas as pd
6
+ import requests
7
 
8
  # (Keep Constants as is)
9
  # --- Constants ---
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
 
12
+
13
  # --- Basic Agent Definition ---
14
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
15
  class BasicAgent:
16
  def __init__(self):
17
  print("BasicAgent initialized.")
18
+
19
  def __call__(self, question: str) -> str:
20
  print(f"Agent received question (first 50 chars): {question[:50]}...")
21
  fixed_answer = "This is a default answer."
22
  print(f"Agent returning fixed answer: {fixed_answer}")
23
  return fixed_answer
24
 
25
+
26
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
27
  """
28
  Fetches all questions, runs the BasicAgent on them, submits all answers,
29
  and displays the results.
30
  """
31
  # --- Determine HF Space Runtime URL and Repo URL ---
32
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
33
 
34
  if profile:
35
+ username = f"{profile.username}"
36
  print(f"User logged in: {username}")
37
  else:
38
  print("User not logged in.")
 
59
  response.raise_for_status()
60
  questions_data = response.json()
61
  if not questions_data:
62
+ print("Fetched questions list is empty.")
63
+ return "Fetched questions list is empty or invalid format.", None
64
  print(f"Fetched {len(questions_data)} questions.")
65
  except requests.exceptions.RequestException as e:
66
  print(f"Error fetching questions: {e}")
67
  return f"Error fetching questions: {e}", None
68
  except requests.exceptions.JSONDecodeError as e:
69
+ print(f"Error decoding JSON response from questions endpoint: {e}")
70
+ print(f"Response text: {response.text[:500]}")
71
+ return f"Error decoding server response for questions: {e}", None
72
  except Exception as e:
73
  print(f"An unexpected error occurred fetching questions: {e}")
74
  return f"An unexpected error occurred fetching questions: {e}", None
 
85
  continue
86
  try:
87
  submitted_answer = agent(question_text)
88
+ answers_payload.append(
89
+ {"task_id": task_id, "submitted_answer": submitted_answer}
90
+ )
91
+ results_log.append(
92
+ {
93
+ "Task ID": task_id,
94
+ "Question": question_text,
95
+ "Submitted Answer": submitted_answer,
96
+ }
97
+ )
98
  except Exception as e:
99
+ print(f"Error running agent on task {task_id}: {e}")
100
+ results_log.append(
101
+ {
102
+ "Task ID": task_id,
103
+ "Question": question_text,
104
+ "Submitted Answer": f"AGENT ERROR: {e}",
105
+ }
106
+ )
107
 
108
  if not answers_payload:
109
  print("Agent did not produce any answers to submit.")
110
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
111
 
112
+ # 4. Prepare Submission
113
+ submission_data = {
114
+ "username": username.strip(),
115
+ "agent_code": agent_code,
116
+ "answers": answers_payload,
117
+ }
118
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
119
  print(status_update)
120
 
 
184
 
185
  run_button = gr.Button("Run Evaluation & Submit All Answers")
186
 
187
+ status_output = gr.Textbox(
188
+ label="Run Status / Submission Result", lines=5, interactive=False
189
+ )
190
  # Removed max_rows=10 from DataFrame constructor
191
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
192
 
193
+ run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 
 
 
194
 
195
  if __name__ == "__main__":
196
+ print("\n" + "-" * 30 + " App Starting " + "-" * 30)
197
  # Check for SPACE_HOST and SPACE_ID at startup for information
198
  space_host_startup = os.getenv("SPACE_HOST")
199
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
200
 
201
  if space_host_startup:
202
  print(f"✅ SPACE_HOST found: {space_host_startup}")
 
204
  else:
205
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
206
 
207
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
208
  print(f"✅ SPACE_ID found: {space_id_startup}")
209
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
210
+ print(
211
+ f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main"
212
+ )
213
  else:
214
+ print(
215
+ "ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined."
216
+ )
217
 
218
+ print("-" * (60 + len(" App Starting ")) + "\n")
219
 
220
  print("Launching Gradio Interface for Basic Agent Evaluation...")
221
+ demo.launch(debug=True, share=False)
realreq.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dotenv
2
+ smolagents
3
+ ollama
4
+ chromadb
5
+ pymupdf
6
+ pandas
7
+ bs4
8
+ duckduckgo-search
9
+ langchain_community
10
+ markdownify
11
+ smolagents[litellm]
12
+ smolagents[openai]
requirements.txt CHANGED
@@ -1,25 +1,13 @@
1
- beautifulsoup4
2
- chromadb
3
- duckduckgo_search
4
  gradio
5
- huggingface_hub
6
- langchain
7
- langchain-chroma
8
- langchain-community
9
- langchain-core
10
- langchain-groq
11
- langchain-huggingface
12
- langchain-google-genai
13
- langchain-tavily
14
- langgraph
15
- markdownify
16
- pandas
17
- protobuf==3.20.*
18
- PyMuPDF
19
- python-dotenv
20
- requests
21
- sentence-transformers
22
  smolagents
23
- smolagents[openai]
24
- smolagents[toolkit]
25
- ollama
 
 
 
 
 
 
 
 
 
 
 
1
  gradio
2
+ dotenv
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  smolagents
4
+ ollama
5
+ chromadb
6
+ pymupdf
7
+ pandas
8
+ bs4
9
+ duckduckgo-search
10
+ langchain_community
11
+ markdownify
12
+ smolagents[litellm]
13
+ smolagents[openai]
tools.py CHANGED
@@ -7,10 +7,7 @@ import time
7
  import traceback
8
  from pathlib import Path
9
  from typing import Dict, List
10
- from urllib.parse import urlparse
11
- from pathlib import Path
12
- from ollama import chat
13
- from PIL import Image
14
 
15
  import chromadb
16
  import chromadb.utils.embedding_functions as embedding_functions
@@ -18,6 +15,7 @@ import fitz # PyMuPDF
18
  import pandas as pd
19
  import requests
20
  from bs4 import BeautifulSoup
 
21
  from duckduckgo_search import DDGS
22
  from duckduckgo_search.exceptions import (
23
  ConversationLimitException,
@@ -25,6 +23,7 @@ from duckduckgo_search.exceptions import (
25
  RatelimitException,
26
  TimeoutException,
27
  )
 
28
  from langchain_community.document_loaders import (
29
  BSHTMLLoader,
30
  JSONLoader,
@@ -32,21 +31,16 @@ from langchain_community.document_loaders import (
32
  TextLoader,
33
  UnstructuredFileLoader,
34
  )
35
- from langchain.text_splitter import RecursiveCharacterTextSplitter
36
  from langchain_community.tools import BraveSearch
37
  from markdownify import markdownify
 
 
38
  from smolagents import Tool, tool
39
  from smolagents.utils import truncate_content
40
 
41
- from typing import Dict, List
42
-
43
- import requests
44
- from bs4 import BeautifulSoup
45
- from urllib.parse import quote_plus
46
-
47
- from dotenv import load_dotenv
48
  load_dotenv()
49
 
 
50
  class ReadFileContentTool(Tool):
51
  name = "read_file_content"
52
  description = """Reads local files in various formats (text, CSV, Excel, PDF, HTML, etc.) and returns their content as readable text. Automatically detects and processes the appropriate file format."""
@@ -295,7 +289,7 @@ class BraveWebSearchTool(Tool):
295
  output_type = "string"
296
 
297
  # api_key = os.getenv("BRAVE_SEARCH_API_KEY")
298
- api_key=None
299
  count = 3
300
  char_limit = 4000 # Adjust based on LLM context window
301
  tool = BraveSearch.from_api_key(api_key=api_key, search_kwargs={"count": count})
@@ -491,9 +485,6 @@ class DuckDuckGoSearchTool(Tool):
491
 
492
  def forward(self, query: str) -> str:
493
  self._configure()
494
- print(
495
- f"EXECUTING TOOL: duckduckgo_search(query='{query}', top_results={top_results})"
496
- )
497
 
498
  top_results = 5
499
 
@@ -551,6 +542,7 @@ class DuckDuckGoSearchTool(Tool):
551
 
552
  return f"❌ Failed to retrieve results after {max_retries} retries."
553
 
 
554
  huggingface_ef = embedding_functions.HuggingFaceEmbeddingFunction(
555
  model_name="sentence-transformers/all-mpnet-base-v2"
556
  )
@@ -565,6 +557,7 @@ SUPPORTED_EXTENSIONS = [
565
  ".htm",
566
  ]
567
 
 
568
  class AddDocumentToVectorStoreTool(Tool):
569
  name = "add_document_to_vector_store"
570
  description = "Processes a document and adds it to the vector database for semantic search. Automatically chunks files and creates text embeddings to enable powerful content retrieval."
@@ -632,6 +625,7 @@ class AddDocumentToVectorStoreTool(Tool):
632
  traceback.print_exc()
633
  return f"Error: {e}"
634
 
 
635
  class QueryVectorStoreTool(Tool):
636
  name = "query_downloaded_documents"
637
  description = "Performs semantic searches across your downloaded documents. Use detailed queries to find specific information, concepts, or answers from your collected resources."
@@ -640,16 +634,11 @@ class QueryVectorStoreTool(Tool):
640
  "query": {
641
  "type": "string",
642
  "description": "The search query. Ensure this is constructed intelligently so to retrieve the most relevant outputs.",
643
- },
644
- "top_k": {
645
- "type": "integer",
646
- "description": "Number of top results to retrieve. Usually between 3 and 30",
647
- "nullable": True,
648
- },
649
  }
650
  output_type = "string"
651
 
652
- def forward(self, query: str, top_k: int = 5) -> str:
653
  collection_name = "vectorstore"
654
 
655
  if k < 3:
@@ -668,7 +657,7 @@ class QueryVectorStoreTool(Tool):
668
 
669
  results = collection.query(
670
  query_texts=[query],
671
- n_results=top_k,
672
  )
673
 
674
  formatted = []
@@ -686,6 +675,7 @@ class QueryVectorStoreTool(Tool):
686
  traceback.print_exc()
687
  return f"Error querying vector store: {e}"
688
 
 
689
  @tool
690
  def image_question_answering(image_path: str, prompt: str) -> str:
691
  """
@@ -722,6 +712,7 @@ def image_question_answering(image_path: str, prompt: str) -> str:
722
 
723
  return response.message.content.strip()
724
 
 
725
  class VisitWebpageTool(Tool):
726
  name = "visit_webpage"
727
  description = "Loads a webpage from a URL and converts its content to markdown format. Use this to browse websites, extract information, or identify downloadable resources from a specific web address."
@@ -956,6 +947,7 @@ class VisitWebpageTool(Tool):
956
 
957
  return content
958
 
 
959
  class ArxivSearchTool(Tool):
960
  name = "arxiv_search"
961
  description = """Searches arXiv for academic papers and returns structured information including titles, authors, publication dates, abstracts, and download links."""
@@ -1013,6 +1005,7 @@ class ArxivSearchTool(Tool):
1013
 
1014
  return "\n".join(output_lines).strip()
1015
 
 
1016
  def fetch_and_parse_arxiv(url: str) -> List[Dict[str, str]]:
1017
  """
1018
  Fetches the given arXiv advanced‐search URL, parses the HTML,
@@ -1075,6 +1068,7 @@ def fetch_and_parse_arxiv(url: str) -> List[Dict[str, str]]:
1075
 
1076
  return results
1077
 
 
1078
  def build_arxiv_url(
1079
  query: str, from_date: str = None, to_date: str = None, size: int = 50
1080
  ) -> str:
 
7
  import traceback
8
  from pathlib import Path
9
  from typing import Dict, List
10
+ from urllib.parse import quote_plus, urlparse
 
 
 
11
 
12
  import chromadb
13
  import chromadb.utils.embedding_functions as embedding_functions
 
15
  import pandas as pd
16
  import requests
17
  from bs4 import BeautifulSoup
18
+ from dotenv import load_dotenv
19
  from duckduckgo_search import DDGS
20
  from duckduckgo_search.exceptions import (
21
  ConversationLimitException,
 
23
  RatelimitException,
24
  TimeoutException,
25
  )
26
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
27
  from langchain_community.document_loaders import (
28
  BSHTMLLoader,
29
  JSONLoader,
 
31
  TextLoader,
32
  UnstructuredFileLoader,
33
  )
 
34
  from langchain_community.tools import BraveSearch
35
  from markdownify import markdownify
36
+ from ollama import chat
37
+ from PIL import Image
38
  from smolagents import Tool, tool
39
  from smolagents.utils import truncate_content
40
 
 
 
 
 
 
 
 
41
  load_dotenv()
42
 
43
+
44
  class ReadFileContentTool(Tool):
45
  name = "read_file_content"
46
  description = """Reads local files in various formats (text, CSV, Excel, PDF, HTML, etc.) and returns their content as readable text. Automatically detects and processes the appropriate file format."""
 
289
  output_type = "string"
290
 
291
  # api_key = os.getenv("BRAVE_SEARCH_API_KEY")
292
+ api_key = None
293
  count = 3
294
  char_limit = 4000 # Adjust based on LLM context window
295
  tool = BraveSearch.from_api_key(api_key=api_key, search_kwargs={"count": count})
 
485
 
486
  def forward(self, query: str) -> str:
487
  self._configure()
 
 
 
488
 
489
  top_results = 5
490
 
 
542
 
543
  return f"❌ Failed to retrieve results after {max_retries} retries."
544
 
545
+
546
  huggingface_ef = embedding_functions.HuggingFaceEmbeddingFunction(
547
  model_name="sentence-transformers/all-mpnet-base-v2"
548
  )
 
557
  ".htm",
558
  ]
559
 
560
+
561
  class AddDocumentToVectorStoreTool(Tool):
562
  name = "add_document_to_vector_store"
563
  description = "Processes a document and adds it to the vector database for semantic search. Automatically chunks files and creates text embeddings to enable powerful content retrieval."
 
625
  traceback.print_exc()
626
  return f"Error: {e}"
627
 
628
+
629
  class QueryVectorStoreTool(Tool):
630
  name = "query_downloaded_documents"
631
  description = "Performs semantic searches across your downloaded documents. Use detailed queries to find specific information, concepts, or answers from your collected resources."
 
634
  "query": {
635
  "type": "string",
636
  "description": "The search query. Ensure this is constructed intelligently so to retrieve the most relevant outputs.",
637
+ }
 
 
 
 
 
638
  }
639
  output_type = "string"
640
 
641
+ def forward(self, query: str) -> str:
642
  collection_name = "vectorstore"
643
 
644
  if k < 3:
 
657
 
658
  results = collection.query(
659
  query_texts=[query],
660
+ n_results=k,
661
  )
662
 
663
  formatted = []
 
675
  traceback.print_exc()
676
  return f"Error querying vector store: {e}"
677
 
678
+
679
  @tool
680
  def image_question_answering(image_path: str, prompt: str) -> str:
681
  """
 
712
 
713
  return response.message.content.strip()
714
 
715
+
716
  class VisitWebpageTool(Tool):
717
  name = "visit_webpage"
718
  description = "Loads a webpage from a URL and converts its content to markdown format. Use this to browse websites, extract information, or identify downloadable resources from a specific web address."
 
947
 
948
  return content
949
 
950
+
951
  class ArxivSearchTool(Tool):
952
  name = "arxiv_search"
953
  description = """Searches arXiv for academic papers and returns structured information including titles, authors, publication dates, abstracts, and download links."""
 
1005
 
1006
  return "\n".join(output_lines).strip()
1007
 
1008
+
1009
  def fetch_and_parse_arxiv(url: str) -> List[Dict[str, str]]:
1010
  """
1011
  Fetches the given arXiv advanced‐search URL, parses the HTML,
 
1068
 
1069
  return results
1070
 
1071
+
1072
  def build_arxiv_url(
1073
  query: str, from_date: str = None, to_date: str = None, size: int = 50
1074
  ) -> str: