Final_Assignment_Template

Runtime error

App Files Files Community

nikhmr1235 commited on Jun 4, 2025

Commit

fa27c77

verified ·

1 Parent(s): deffd2a

remove the webdownloader tool (instead add filedownload to local tool)

Browse files

Files changed (1) hide show

helper.py +64 -66

helper.py CHANGED Viewed

@@ -95,84 +95,82 @@ def get_travily_api_search_tool(tavily_api_key: str) -> Tool:
 import requests
 from langchain.tools import Tool
-'''
-# Your web_downloader tool function
-def download_url_content(url: str) -> str:
-    """Downloads the content from a given URL as a string."""
     try:
-        response = requests.get(url, timeout=10)
-        response.raise_for_status()
-        return response.text # For text files like Python code
-        # For binary files like audio, you'd handle it differently, e.g.,
-        # return response.content # if the audio transcriber accepts bytes
-        # or save to a temp file and pass path, or stream directly
     except requests.exceptions.RequestException as e:
-        return f"Error downloading content from {url}: {e}"
 # Get your web_downloader tool
-web_downloader_tool = Tool(
-    name="web_downloader",
     description="""
-    Downloads the content of a given URL as a string.
-    Useful for accessing information directly from web pages or online files.
-    Input should be a single, valid URL (e.g., 'https://www.example.com').
-    """,
-    func=download_url_content,
-)
-'''
-from bs4 import BeautifulSoup
-import requests
-from langchain.tools import Tool
-def download_limited_content(url: str, max_chars: int = 10000) -> str: # Limit to ~2500 tokens
-    """Downloads text content from a URL, truncating if it exceeds max_chars."""
-    try:
-        with requests.get(url, stream=True, timeout=10) as response:
-            response.raise_for_status()
-            content_chunks = []
-            total_chars = 0
-            for chunk in response.iter_content(chunk_size=8192): # Iterate in chunks
-                decoded_chunk = chunk.decode('utf-8', errors='ignore') # Decode as it comes
-                if total_chars + len(decoded_chunk) > max_chars:
-                    content_chunks.append(decoded_chunk[:max_chars - total_chars])
-                    total_chars = max_chars
-                    break
-                content_chunks.append(decoded_chunk)
-                total_chars += len(decoded_chunk)
-            raw_text = "".join(content_chunks)
-            # Optional: use BeautifulSoup to strip HTML tags from the truncated raw text
-            soup = BeautifulSoup(raw_text, 'html.parser')
-            for script_or_style in soup(["script", "style"]):
-                script_or_style.extract()
-            clean_text = soup.get_text(separator="\n", strip=True)
-            if total_chars >= max_chars:
-                return clean_text + "\n\n[Content truncated due to size limit.]"
-            return clean_text
-    except requests.exceptions.RequestException as e:
-        return f"Error downloading content from {url}: {e}"
-    except Exception as e:
-        return f"Error processing content from {url}: {e}"
-text_downloader_limited_tool = Tool(
-    name="text_downloader_limited_tool",
-    description="""
-    Downloads text content from a URL, automatically truncating it to save tokens.
-    Useful when you need information from a web page but want to avoid
-    exceeding token limits by downloading excessively large content.
-    Input should be a single, valid URL.
-    NOTE: use this tool only for text-based-content URLs (e.g., articles, documentation, python code file).
-    The content will be truncated to approximately 10,000 characters (~2500 tokens).
-    If the content is larger, it will be cut off with a note indicating truncation.
     """,
-    func=download_limited_content,
 )
 import speech_recognition as sr
 from pydub import AudioSegment
 import os

 import requests
 from langchain.tools import Tool
+import os
+def download_file_from_url(url: str, local_filename: str = None) -> str | None:
+    """
+    Downloads a file from a given URL and saves it locally.
+    This function acts as a tool for an AI agent to retrieve files from the web.
+    Args:
+        url (str): The complete URL of the file to download.
+        local_filename (str, optional): The desired name for the locally saved file.
+                                        If None, the function attempts to extract the
+                                        filename from the URL.
+    Returns:
+        str: The local path of the downloaded file if successful.
+        None: If the download fails due to a request error or file I/O issue.
+    """
+    # If no local filename is provided, try to infer it from the URL
+    if local_filename is None:
+        # A simple approach to get the filename from the URL.
+        # This might need more robust parsing for complex URLs or if
+        # the server provides a Content-Disposition header.
+        local_filename = url.split('/')[-1]
+        if not local_filename: # Handle cases like 'http://example.com/'
+            local_filename = "downloaded_file"
+        # Basic sanitization for filenames (e.g., remove query parameters)
+        if '?' in local_filename:
+            local_filename = local_filename.split('?')[0]
+    print(f"Attempting to download from: {url} to {local_filename}")
     try:
+        # Make a GET request to the URL, streaming the content
+        response = requests.get(url, stream=True)
+        response.raise_for_status()  # Raise an HTTPError for bad responses (4xx or 5xx)
+        # Open the local file in binary write mode
+        with open(local_filename, 'wb') as f:
+            # Iterate over the response content in chunks to handle large files efficiently
+            for chunk in response.iter_content(chunk_size=8192):
+                f.write(chunk)
+        print(f"File downloaded successfully to: {os.path.abspath(local_filename)}")
+        return os.path.abspath(local_filename) # Return the absolute path for clarity
     except requests.exceptions.RequestException as e:
+        # Catch any request-related errors (e.g., network issues, invalid URL, HTTP errors)
+        print(f"Error downloading file from {url}: {e}")
+        return None
+    except IOError as e:
+        # Catch any file I/O errors during saving
+        print(f"Error saving file to {local_filename}: {e}")
+        return None
 # Get your web_downloader tool
+file_download_tool = Tool(
+    name="file_download_tool",
     description="""
+    Downloads a file from a given URL and saves it locally.
+    This function acts as a tool for an AI agent to retrieve files from the web.
+    Args:
+        url (str): The complete URL of the file to download.
+        local_filename (str, optional): The desired name for the locally saved file.
+                                        If None, the function attempts to extract the
+                                        filename from the URL.
+    Returns:
+        str: The local path of the downloaded file if successful.
+        None: If the download fails due to a request error or file I/O issue.
     """,
+    func=download_file_from_url,
 )
 import speech_recognition as sr
 from pydub import AudioSegment
 import os