Spaces:
Runtime error
Runtime error
| import boto3 | |
| import os | |
| from pathlib import Path | |
| from dotenv import load_dotenv, find_dotenv | |
| # Load environment variables from .env file | |
| load_dotenv(find_dotenv(".env")) | |
| class S3Handler: | |
| def __init__(self, bucket_name): | |
| self.bucket_name = bucket_name | |
| self.s3 = boto3.client( | |
| "s3", | |
| aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"), | |
| aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"), | |
| region_name=os.getenv("AWS_REGION"), | |
| ) | |
| def upload_folder(self, source_folder, dest_folder, filenames=None): | |
| """ | |
| Upload specified files or all files from a local folder to an S3 folder. | |
| Args: | |
| source_folder (str): Local source folder path. | |
| dest_folder (str): Destination folder path in S3. | |
| filenames (list): List of filenames to upload (relative to source_folder). If None, uploads all files. | |
| """ | |
| source_folder = Path(source_folder) | |
| # Select files based on filenames list or all files if filenames is None | |
| files_to_upload = ( | |
| [source_folder / file for file in filenames] | |
| if filenames | |
| else list(source_folder.rglob("*")) | |
| ) | |
| for file_path in files_to_upload: | |
| if file_path.is_file(): | |
| s3_path = f"{dest_folder}/{file_path.relative_to(source_folder)}" | |
| self.s3.upload_file(str(file_path), self.bucket_name, s3_path) | |
| print(f"Uploaded: {file_path} to {s3_path}") | |
| else: | |
| print(f"File not found: {file_path}") | |
| def download_folder(self, s3_folder, dest_folder): | |
| """ | |
| Download all files from an S3 folder to a local folder. | |
| Args: | |
| s3_folder (str): Source folder in S3. | |
| dest_folder (str): Local destination folder path. | |
| """ | |
| dest_folder = Path(dest_folder).resolve() | |
| paginator = self.s3.get_paginator("list_objects_v2") | |
| for page in paginator.paginate(Bucket=self.bucket_name, Prefix=s3_folder): | |
| for obj in page.get("Contents", []): | |
| s3_path = obj["Key"] | |
| # Skip folder itself if returned by S3 | |
| if s3_path.endswith("/"): | |
| continue | |
| # Compute relative path and local destination | |
| relative_path = Path(s3_path[len(s3_folder) :].lstrip("/")) | |
| local_path = dest_folder / relative_path | |
| # Create necessary local directories | |
| local_path.parent.mkdir(parents=True, exist_ok=True) | |
| # Download file | |
| self.s3.download_file(self.bucket_name, s3_path, str(local_path)) | |
| print(f"Downloaded: {s3_path} to {local_path}") | |
| # Usage Example | |
| if __name__ == "__main__": | |
| # Initialize with bucket name | |
| s3_handler = S3Handler(bucket_name="deep-bucket-s3") | |
| # Upload specific files | |
| s3_handler.upload_folder( | |
| "checkpoints_test", | |
| "checkpoints_test", | |
| ) | |
| # Download example | |
| s3_handler.download_folder("checkpoints_test", "checkpoints_test") | |