Spaces:
Sleeping
Sleeping
| import os | |
| import requests | |
| from application.utils.logger import get_logger | |
| from langchain_core.tools import tool | |
| logger = get_logger() | |
| def download_pdf(filename:str, url: str, save_path: str = "reports", overwrite: bool = False): | |
| """ | |
| Downloads a PDF file from a given URL ('pdf_link') and saves it locally | |
| with the specified 'filename'. Returns the local path if successful, otherwise None. | |
| Use this tool AFTER get_sustainability_report_pdf has returned a valid PDF link or if user provides the PDF link. | |
| Args: | |
| filename (str): The name to save the PDF as (should end with .pdf). | |
| url (str): The direct URL to the PDF file. | |
| save_path (str): The directory to save the PDF into (default: "reports"). | |
| overwrite (bool): Whether to overwrite the file if it already exists. | |
| Returns: | |
| str | None: The path to the saved file if successful, otherwise None. | |
| """ | |
| try: | |
| # parsed_url = urlparse(url) | |
| # filename = os.path.basename(parsed_url.path) | |
| if not filename.lower().endswith(".pdf"): | |
| logger.warning(f"URL does not point to a PDF file: {url}") | |
| return None | |
| os.makedirs(save_path, exist_ok=True) | |
| full_path = os.path.join(save_path, filename) | |
| if os.path.exists(full_path) and not overwrite: | |
| logger.info(f"File already exists, skipping download: {full_path}") | |
| return full_path | |
| logger.info(f"Starting download from {url}") | |
| response = requests.get(url, stream=True, timeout=20) | |
| response.raise_for_status() | |
| with open(full_path, "wb") as file: | |
| for chunk in response.iter_content(chunk_size=8192): | |
| if chunk: | |
| file.write(chunk) | |
| logger.info(f"Successfully downloaded to: {full_path}") | |
| return full_path | |
| except requests.exceptions.Timeout: | |
| logger.error(f"Timeout while downloading {url}") | |
| except requests.exceptions.HTTPError as http_err: | |
| logger.error(f"HTTP error while downloading {url}: {http_err}") | |
| except requests.exceptions.RequestException as req_err: | |
| logger.error(f"Request error while downloading {url}: {req_err}") | |
| except Exception as e: | |
| logger.error(f"Unexpected error: {e}") | |
| return None |