Spaces:
Runtime error
Runtime error
remove the webdownloader tool (instead add filedownload to local tool)
Browse files
helper.py
CHANGED
|
@@ -95,84 +95,82 @@ def get_travily_api_search_tool(tavily_api_key: str) -> Tool:
|
|
| 95 |
|
| 96 |
import requests
|
| 97 |
from langchain.tools import Tool
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
|
| 99 |
-
'''
|
| 100 |
-
# Your web_downloader tool function
|
| 101 |
-
def download_url_content(url: str) -> str:
|
| 102 |
-
"""Downloads the content from a given URL as a string."""
|
| 103 |
try:
|
| 104 |
-
|
| 105 |
-
response.
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
#
|
| 109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
except requests.exceptions.RequestException as e:
|
| 111 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
# Get your web_downloader tool
|
| 114 |
-
|
| 115 |
-
name="
|
| 116 |
description="""
|
| 117 |
-
Downloads
|
| 118 |
-
Useful for accessing information directly from web pages or online files.
|
| 119 |
-
Input should be a single, valid URL (e.g., 'https://www.example.com').
|
| 120 |
-
""",
|
| 121 |
-
func=download_url_content,
|
| 122 |
-
)
|
| 123 |
-
'''
|
| 124 |
|
| 125 |
-
from
|
| 126 |
-
import requests
|
| 127 |
-
from langchain.tools import Tool
|
| 128 |
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
content_chunks = []
|
| 135 |
-
total_chars = 0
|
| 136 |
-
for chunk in response.iter_content(chunk_size=8192): # Iterate in chunks
|
| 137 |
-
decoded_chunk = chunk.decode('utf-8', errors='ignore') # Decode as it comes
|
| 138 |
-
if total_chars + len(decoded_chunk) > max_chars:
|
| 139 |
-
content_chunks.append(decoded_chunk[:max_chars - total_chars])
|
| 140 |
-
total_chars = max_chars
|
| 141 |
-
break
|
| 142 |
-
content_chunks.append(decoded_chunk)
|
| 143 |
-
total_chars += len(decoded_chunk)
|
| 144 |
-
|
| 145 |
-
raw_text = "".join(content_chunks)
|
| 146 |
-
|
| 147 |
-
# Optional: use BeautifulSoup to strip HTML tags from the truncated raw text
|
| 148 |
-
soup = BeautifulSoup(raw_text, 'html.parser')
|
| 149 |
-
for script_or_style in soup(["script", "style"]):
|
| 150 |
-
script_or_style.extract()
|
| 151 |
-
clean_text = soup.get_text(separator="\n", strip=True)
|
| 152 |
-
|
| 153 |
-
if total_chars >= max_chars:
|
| 154 |
-
return clean_text + "\n\n[Content truncated due to size limit.]"
|
| 155 |
-
return clean_text
|
| 156 |
-
|
| 157 |
-
except requests.exceptions.RequestException as e:
|
| 158 |
-
return f"Error downloading content from {url}: {e}"
|
| 159 |
-
except Exception as e:
|
| 160 |
-
return f"Error processing content from {url}: {e}"
|
| 161 |
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
Downloads text content from a URL, automatically truncating it to save tokens.
|
| 166 |
-
Useful when you need information from a web page but want to avoid
|
| 167 |
-
exceeding token limits by downloading excessively large content.
|
| 168 |
-
Input should be a single, valid URL.
|
| 169 |
-
NOTE: use this tool only for text-based-content URLs (e.g., articles, documentation, python code file).
|
| 170 |
-
The content will be truncated to approximately 10,000 characters (~2500 tokens).
|
| 171 |
-
If the content is larger, it will be cut off with a note indicating truncation.
|
| 172 |
""",
|
| 173 |
-
func=
|
| 174 |
)
|
| 175 |
|
|
|
|
| 176 |
import speech_recognition as sr
|
| 177 |
from pydub import AudioSegment
|
| 178 |
import os
|
|
|
|
| 95 |
|
| 96 |
import requests
|
| 97 |
from langchain.tools import Tool
|
| 98 |
+
import os
|
| 99 |
+
|
| 100 |
+
def download_file_from_url(url: str, local_filename: str = None) -> str | None:
|
| 101 |
+
"""
|
| 102 |
+
Downloads a file from a given URL and saves it locally.
|
| 103 |
+
|
| 104 |
+
This function acts as a tool for an AI agent to retrieve files from the web.
|
| 105 |
+
|
| 106 |
+
Args:
|
| 107 |
+
url (str): The complete URL of the file to download.
|
| 108 |
+
local_filename (str, optional): The desired name for the locally saved file.
|
| 109 |
+
If None, the function attempts to extract the
|
| 110 |
+
filename from the URL.
|
| 111 |
+
|
| 112 |
+
Returns:
|
| 113 |
+
str: The local path of the downloaded file if successful.
|
| 114 |
+
None: If the download fails due to a request error or file I/O issue.
|
| 115 |
+
"""
|
| 116 |
+
# If no local filename is provided, try to infer it from the URL
|
| 117 |
+
if local_filename is None:
|
| 118 |
+
# A simple approach to get the filename from the URL.
|
| 119 |
+
# This might need more robust parsing for complex URLs or if
|
| 120 |
+
# the server provides a Content-Disposition header.
|
| 121 |
+
local_filename = url.split('/')[-1]
|
| 122 |
+
if not local_filename: # Handle cases like 'http://example.com/'
|
| 123 |
+
local_filename = "downloaded_file"
|
| 124 |
+
# Basic sanitization for filenames (e.g., remove query parameters)
|
| 125 |
+
if '?' in local_filename:
|
| 126 |
+
local_filename = local_filename.split('?')[0]
|
| 127 |
+
|
| 128 |
+
print(f"Attempting to download from: {url} to {local_filename}")
|
| 129 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
try:
|
| 131 |
+
# Make a GET request to the URL, streaming the content
|
| 132 |
+
response = requests.get(url, stream=True)
|
| 133 |
+
response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)
|
| 134 |
+
|
| 135 |
+
# Open the local file in binary write mode
|
| 136 |
+
with open(local_filename, 'wb') as f:
|
| 137 |
+
# Iterate over the response content in chunks to handle large files efficiently
|
| 138 |
+
for chunk in response.iter_content(chunk_size=8192):
|
| 139 |
+
f.write(chunk)
|
| 140 |
+
|
| 141 |
+
print(f"File downloaded successfully to: {os.path.abspath(local_filename)}")
|
| 142 |
+
return os.path.abspath(local_filename) # Return the absolute path for clarity
|
| 143 |
except requests.exceptions.RequestException as e:
|
| 144 |
+
# Catch any request-related errors (e.g., network issues, invalid URL, HTTP errors)
|
| 145 |
+
print(f"Error downloading file from {url}: {e}")
|
| 146 |
+
return None
|
| 147 |
+
except IOError as e:
|
| 148 |
+
# Catch any file I/O errors during saving
|
| 149 |
+
print(f"Error saving file to {local_filename}: {e}")
|
| 150 |
+
return None
|
| 151 |
|
| 152 |
# Get your web_downloader tool
|
| 153 |
+
file_download_tool = Tool(
|
| 154 |
+
name="file_download_tool",
|
| 155 |
description="""
|
| 156 |
+
Downloads a file from a given URL and saves it locally.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
|
| 158 |
+
This function acts as a tool for an AI agent to retrieve files from the web.
|
|
|
|
|
|
|
| 159 |
|
| 160 |
+
Args:
|
| 161 |
+
url (str): The complete URL of the file to download.
|
| 162 |
+
local_filename (str, optional): The desired name for the locally saved file.
|
| 163 |
+
If None, the function attempts to extract the
|
| 164 |
+
filename from the URL.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
|
| 166 |
+
Returns:
|
| 167 |
+
str: The local path of the downloaded file if successful.
|
| 168 |
+
None: If the download fails due to a request error or file I/O issue.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
""",
|
| 170 |
+
func=download_file_from_url,
|
| 171 |
)
|
| 172 |
|
| 173 |
+
|
| 174 |
import speech_recognition as sr
|
| 175 |
from pydub import AudioSegment
|
| 176 |
import os
|