nikhmr1235 commited on
Commit
fa27c77
·
verified ·
1 Parent(s): deffd2a

remove the webdownloader tool (instead add filedownload to local tool)

Browse files
Files changed (1) hide show
  1. helper.py +64 -66
helper.py CHANGED
@@ -95,84 +95,82 @@ def get_travily_api_search_tool(tavily_api_key: str) -> Tool:
95
 
96
  import requests
97
  from langchain.tools import Tool
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
- '''
100
- # Your web_downloader tool function
101
- def download_url_content(url: str) -> str:
102
- """Downloads the content from a given URL as a string."""
103
  try:
104
- response = requests.get(url, timeout=10)
105
- response.raise_for_status()
106
- return response.text # For text files like Python code
107
- # For binary files like audio, you'd handle it differently, e.g.,
108
- # return response.content # if the audio transcriber accepts bytes
109
- # or save to a temp file and pass path, or stream directly
 
 
 
 
 
 
110
  except requests.exceptions.RequestException as e:
111
- return f"Error downloading content from {url}: {e}"
 
 
 
 
 
 
112
 
113
  # Get your web_downloader tool
114
- web_downloader_tool = Tool(
115
- name="web_downloader",
116
  description="""
117
- Downloads the content of a given URL as a string.
118
- Useful for accessing information directly from web pages or online files.
119
- Input should be a single, valid URL (e.g., 'https://www.example.com').
120
- """,
121
- func=download_url_content,
122
- )
123
- '''
124
 
125
- from bs4 import BeautifulSoup
126
- import requests
127
- from langchain.tools import Tool
128
 
129
- def download_limited_content(url: str, max_chars: int = 10000) -> str: # Limit to ~2500 tokens
130
- """Downloads text content from a URL, truncating if it exceeds max_chars."""
131
- try:
132
- with requests.get(url, stream=True, timeout=10) as response:
133
- response.raise_for_status()
134
- content_chunks = []
135
- total_chars = 0
136
- for chunk in response.iter_content(chunk_size=8192): # Iterate in chunks
137
- decoded_chunk = chunk.decode('utf-8', errors='ignore') # Decode as it comes
138
- if total_chars + len(decoded_chunk) > max_chars:
139
- content_chunks.append(decoded_chunk[:max_chars - total_chars])
140
- total_chars = max_chars
141
- break
142
- content_chunks.append(decoded_chunk)
143
- total_chars += len(decoded_chunk)
144
-
145
- raw_text = "".join(content_chunks)
146
-
147
- # Optional: use BeautifulSoup to strip HTML tags from the truncated raw text
148
- soup = BeautifulSoup(raw_text, 'html.parser')
149
- for script_or_style in soup(["script", "style"]):
150
- script_or_style.extract()
151
- clean_text = soup.get_text(separator="\n", strip=True)
152
-
153
- if total_chars >= max_chars:
154
- return clean_text + "\n\n[Content truncated due to size limit.]"
155
- return clean_text
156
-
157
- except requests.exceptions.RequestException as e:
158
- return f"Error downloading content from {url}: {e}"
159
- except Exception as e:
160
- return f"Error processing content from {url}: {e}"
161
 
162
- text_downloader_limited_tool = Tool(
163
- name="text_downloader_limited_tool",
164
- description="""
165
- Downloads text content from a URL, automatically truncating it to save tokens.
166
- Useful when you need information from a web page but want to avoid
167
- exceeding token limits by downloading excessively large content.
168
- Input should be a single, valid URL.
169
- NOTE: use this tool only for text-based-content URLs (e.g., articles, documentation, python code file).
170
- The content will be truncated to approximately 10,000 characters (~2500 tokens).
171
- If the content is larger, it will be cut off with a note indicating truncation.
172
  """,
173
- func=download_limited_content,
174
  )
175
 
 
176
  import speech_recognition as sr
177
  from pydub import AudioSegment
178
  import os
 
95
 
96
  import requests
97
  from langchain.tools import Tool
98
+ import os
99
+
100
+ def download_file_from_url(url: str, local_filename: str = None) -> str | None:
101
+ """
102
+ Downloads a file from a given URL and saves it locally.
103
+
104
+ This function acts as a tool for an AI agent to retrieve files from the web.
105
+
106
+ Args:
107
+ url (str): The complete URL of the file to download.
108
+ local_filename (str, optional): The desired name for the locally saved file.
109
+ If None, the function attempts to extract the
110
+ filename from the URL.
111
+
112
+ Returns:
113
+ str: The local path of the downloaded file if successful.
114
+ None: If the download fails due to a request error or file I/O issue.
115
+ """
116
+ # If no local filename is provided, try to infer it from the URL
117
+ if local_filename is None:
118
+ # A simple approach to get the filename from the URL.
119
+ # This might need more robust parsing for complex URLs or if
120
+ # the server provides a Content-Disposition header.
121
+ local_filename = url.split('/')[-1]
122
+ if not local_filename: # Handle cases like 'http://example.com/'
123
+ local_filename = "downloaded_file"
124
+ # Basic sanitization for filenames (e.g., remove query parameters)
125
+ if '?' in local_filename:
126
+ local_filename = local_filename.split('?')[0]
127
+
128
+ print(f"Attempting to download from: {url} to {local_filename}")
129
 
 
 
 
 
130
  try:
131
+ # Make a GET request to the URL, streaming the content
132
+ response = requests.get(url, stream=True)
133
+ response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)
134
+
135
+ # Open the local file in binary write mode
136
+ with open(local_filename, 'wb') as f:
137
+ # Iterate over the response content in chunks to handle large files efficiently
138
+ for chunk in response.iter_content(chunk_size=8192):
139
+ f.write(chunk)
140
+
141
+ print(f"File downloaded successfully to: {os.path.abspath(local_filename)}")
142
+ return os.path.abspath(local_filename) # Return the absolute path for clarity
143
  except requests.exceptions.RequestException as e:
144
+ # Catch any request-related errors (e.g., network issues, invalid URL, HTTP errors)
145
+ print(f"Error downloading file from {url}: {e}")
146
+ return None
147
+ except IOError as e:
148
+ # Catch any file I/O errors during saving
149
+ print(f"Error saving file to {local_filename}: {e}")
150
+ return None
151
 
152
  # Get your web_downloader tool
153
+ file_download_tool = Tool(
154
+ name="file_download_tool",
155
  description="""
156
+ Downloads a file from a given URL and saves it locally.
 
 
 
 
 
 
157
 
158
+ This function acts as a tool for an AI agent to retrieve files from the web.
 
 
159
 
160
+ Args:
161
+ url (str): The complete URL of the file to download.
162
+ local_filename (str, optional): The desired name for the locally saved file.
163
+ If None, the function attempts to extract the
164
+ filename from the URL.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
 
166
+ Returns:
167
+ str: The local path of the downloaded file if successful.
168
+ None: If the download fails due to a request error or file I/O issue.
 
 
 
 
 
 
 
169
  """,
170
+ func=download_file_from_url,
171
  )
172
 
173
+
174
  import speech_recognition as sr
175
  from pydub import AudioSegment
176
  import os