Spaces:
Sleeping
Sleeping
File size: 3,464 Bytes
4fc93b8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 | import logging
import httpx
from app.core.config import get_settings
from app.utils.exceptions import (
FileDownloadError,
InvalidURLError,
DownloadTimeoutError,
FileSizeError,
)
logger = logging.getLogger(__name__)
async def download_file(file_url: str) -> bytes:
"""
Asynchronously download a file from the given URL.
Args:
file_url: The URL of the file to download
Returns:
The file contents as bytes
Raises:
InvalidURLError: If the URL is invalid
DownloadTimeoutError: If download times out
FileSizeError: If file size exceeds limit
FileDownloadError: If download fails for other reasons
"""
settings = get_settings()
logger.info(f"Starting file download from: {file_url}")
try:
async with httpx.AsyncClient(timeout=settings.DOWNLOAD_TIMEOUT) as client:
response = await client.get(file_url, follow_redirects=True)
# Check for HTTP errors
if response.status_code != 200:
logger.error(
f"Failed to download file. Status code: {response.status_code}"
)
raise FileDownloadError(
f"Failed to download file. Server returned status code {response.status_code}",
status_code=response.status_code,
)
# Check content length header
content_length = response.headers.get("content-length")
if content_length and int(content_length) > settings.MAX_FILE_SIZE:
logger.error(
f"File size exceeds maximum allowed size: {content_length} bytes"
)
raise FileSizeError(
f"File size exceeds maximum allowed size of {settings.MAX_FILE_SIZE} bytes"
)
file_bytes = response.content
# Check actual content size
if len(file_bytes) > settings.MAX_FILE_SIZE:
logger.error(
f"Downloaded file exceeds maximum size: {len(file_bytes)} bytes"
)
raise FileSizeError(
f"File size exceeds maximum allowed size of {settings.MAX_FILE_SIZE} bytes"
)
logger.info(
f"File download completed successfully. Size: {len(file_bytes)} bytes"
)
return file_bytes
except httpx.InvalidURL as e:
logger.error(f"Invalid URL provided: {file_url} - {str(e)}")
raise InvalidURLError("Invalid URL format")
except httpx.TimeoutException as e:
logger.error(f"Download timeout for URL: {file_url}")
raise DownloadTimeoutError("File download timed out. Please try again with a faster source.")
except (FileSizeError, InvalidURLError, DownloadTimeoutError):
# Re-raise custom exceptions
raise
except httpx.RequestError as e:
logger.error(f"Failed to download file from {file_url}: {str(e)}")
raise FileDownloadError(
"Failed to download file from the provided URL. Please check the URL and try again."
)
except Exception as e:
logger.error(f"Unexpected error during file download: {str(e)}")
raise FileDownloadError(
"An unexpected error occurred during file download."
)
|