Spaces:
Sleeping
Sleeping
| import boto3 | |
| from botocore.exceptions import ClientError, NoCredentialsError | |
| import os | |
| def upload_folder_to_s3(local_dir, prefix=''): | |
| s3_bucket = os.getenv("AWS_BUCKET_NAME") | |
| s3_client = boto3.client('s3') | |
| for root, dirs, files in os.walk(local_dir): | |
| for dir in dirs: | |
| dir_path = os.path.join(root, dir) | |
| relative_path = os.path.relpath(dir_path, local_dir) | |
| # Create the directory in S3 if it doesn't exist | |
| try: | |
| s3_client.put_object(Bucket=s3_bucket, Key=os.path.join(prefix, relative_path)) | |
| except ClientError as e: | |
| if e.response['Error']['Code'] == '404': | |
| continue # Directory already exists | |
| else: | |
| raise | |
| for file in files: | |
| file_path = os.path.join(root, file) | |
| relative_path = os.path.relpath(file_path, local_dir) | |
| try: | |
| s3_client.upload_file(file_path, s3_bucket, os.path.join(prefix, relative_path)) | |
| print(f"Uploaded: {file_path} -> s3://{s3_bucket}/{os.path.join(prefix, relative_path)}") | |
| except Exception as e: | |
| raise e | |
| # print(f"Error uploading {file_path}: {e}") | |
| def check_file_exists_in_s3(file_path): | |
| bucket_name = os.getenv("AWS_BUCKET_NAME") | |
| s3_client = boto3.client('s3') | |
| try: | |
| s3_client.head_object(Bucket=bucket_name, Key=file_path) | |
| return True | |
| except ClientError as e: | |
| if e.response['Error']['Code'] == '404': | |
| return False | |
| else: | |
| raise e | |
| def download_files_from_s3(local_folder, file_path_list): | |
| s3 = boto3.client('s3') | |
| bucket_name = os.getenv("AWS_BUCKET_NAME") | |
| folder_prefix = '' | |
| try: | |
| # List objects in the S3 bucket | |
| paginator = s3.get_paginator('list_objects_v2') | |
| page_iterator = paginator.paginate(Bucket=bucket_name, Prefix=folder_prefix) | |
| # Download filtered files | |
| for page in page_iterator: | |
| for obj in page.get('Contents', []): | |
| key = obj['Key'] | |
| # Apply file filter if specified | |
| if key not in file_path_list: | |
| continue | |
| # Construct local file path | |
| local_path = os.path.join(local_folder, key) | |
| os.makedirs(os.path.dirname(local_path), exist_ok=True) | |
| try: | |
| print(f"Downloading: {key} -> {local_path}") | |
| s3.download_file(bucket_name, key, local_path) | |
| print(f"Downloaded: {local_path}") | |
| except Exception as e: | |
| print(f"Error downloading {key}: {e}") | |
| except NoCredentialsError: | |
| print("No AWS credentials found.") | |
| except Exception as e: | |
| print(f"An error occurred: {e}") | |
| def download_folder_from_s3(local_folder, aws_folder_prefix): | |
| s3 = boto3.client('s3') | |
| bucket_name = os.getenv("AWS_BUCKET_NAME") | |
| if not bucket_name: | |
| raise ValueError("AWS_BUCKET_NAME environment variable is not set") | |
| try: | |
| # Create the local folder if it doesn't exist | |
| os.makedirs(local_folder, exist_ok=True) | |
| # List objects in the S3 bucket | |
| paginator = s3.get_paginator('list_objects_v2') | |
| page_iterator = paginator.paginate(Bucket=bucket_name, Prefix=aws_folder_prefix) | |
| # Process objects | |
| for page in page_iterator: | |
| for obj in page.get('Contents', []): | |
| key = obj['Key'] | |
| # Determine if it's a file or directory | |
| if obj['Size'] == 0: | |
| continue | |
| # Construct local file path | |
| local_path = os.path.join(local_folder, os.path.relpath(key, aws_folder_prefix)) | |
| os.makedirs(os.path.dirname(local_path), exist_ok=True) | |
| try: | |
| print(f"Downloading: {key} -> {local_path}") | |
| s3.download_file(bucket_name, key, local_path) | |
| print(f"Downloaded: {local_path}") | |
| except ClientError as e: | |
| if e.response['Error']['Code'] == 'AccessDenied': | |
| print(f"Permission denied when trying to download {key}: {e}") | |
| elif e.response['Error']['Code'] == 'NoSuchKey': | |
| print(f"The object {key} does not exist in the bucket.") | |
| elif e.response['Error']['Code'] == "": | |
| pass | |
| else: | |
| print(f"An error occurred while downloading {key}: {e}") | |
| raise e | |
| except Exception as e: | |
| print(f"An unexpected error occurred : {e}") | |
| def delete_s3_folder(folder_path): | |
| bucket_name = os.getenv("AWS_BUCKET_NAME") | |
| s3_client = boto3.client('s3') | |
| try: | |
| # List objects in the S3 bucket | |
| paginator = s3_client.get_paginator('list_objects_v2') | |
| page_iterator = paginator.paginate(Bucket=bucket_name, Prefix=folder_path) | |
| # Delete objects within the folder_path | |
| delete_keys = {'Objects': []} | |
| for page in page_iterator: | |
| for obj in page.get('Contents', []): | |
| key = obj['Key'] | |
| # Construct the full key for deletion | |
| delete_key = {'Key': key} | |
| delete_keys['Objects'].append(delete_key) | |
| print(f"Deleting: {key}") | |
| # Perform batch delete operation | |
| if len(delete_keys['Objects']) > 0: | |
| s3_client.delete_objects(Bucket=bucket_name, Delete=delete_keys) | |
| print(f"Deleted {len(delete_keys['Objects'])} objects in folder '{folder_path}'") | |
| else: | |
| print(f"No objects found in folder '{folder_path}'") | |
| except ClientError as e: | |
| print(f"An error occurred: {e}") | |
| def list_s3_objects(prefix=''): | |
| bucket_name = os.getenv("AWS_BUCKET_NAME") | |
| s3_client = boto3.client('s3') | |
| try: | |
| paginator = s3_client.get_paginator('list_objects_v2') | |
| page_iterator = paginator.paginate(Bucket=bucket_name, Prefix=prefix) | |
| for page in page_iterator: | |
| for obj in page.get('Contents', []): | |
| print(f"Key: {obj['Key']}") | |
| print(f"Size: {obj['Size']} bytes") | |
| print(f"Last Modified: {obj['LastModified']}") | |
| print(f"ETag: {obj['ETag']}") | |
| print(f"File Extension: {os.path.splitext(obj['Key'])[-1]}") | |
| print("---") | |
| except ClientError as e: | |
| print(f"An error occurred: {e}") | |