Code / s3_migration.py
iasjkk's picture
Update s3_migration.py
8f66156 verified
import os
import boto3
from multiprocessing import Pool, cpu_count
# Configuration
LOCAL_FOLDER = 'PRT' # The local folder with files
BUCKET_NAME = 'your-s3-bucket-name' # Your S3 bucket name
REGION = 'your-region' # Your AWS region, e.g., 'us-east-1'
# Initialize S3 client
s3_client = boto3.client('s3', region_name=REGION)
def upload_file(file_path):
"""
Upload a file to S3.
:param file_path: Path of the file to upload
"""
try:
# Define the key for the file in S3 (optional: you can keep the same name or change it)
s3_key = os.path.relpath(file_path, LOCAL_FOLDER)
print(f"Uploading {file_path} to s3://{BUCKET_NAME}/{s3_key}")
s3_client.upload_file(file_path, BUCKET_NAME, s3_key)
print(f"Upload of {file_path} completed successfully.")
except Exception as e:
print(f"Error uploading {file_path}: {e}")
def main():
# Get all files in the LOCAL_FOLDER
files_to_upload = [
os.path.join(LOCAL_FOLDER, f) for f in os.listdir(LOCAL_FOLDER)
if os.path.isfile(os.path.join(LOCAL_FOLDER, f))
]
# Determine the number of processes to use
num_processes = min(cpu_count(), len(files_to_upload))
# Use multiprocessing Pool to upload files in parallel
with Pool(processes=num_processes) as pool:
pool.map(upload_file, files_to_upload)
if __name__ == "__main__":
main()
import boto3
import multiprocessing
import os
def upload_file_to_s3(file_path, s3_bucket, s3_key):
s3 = boto3.client('s3')
s3.upload_file(file_path, s3_bucket, s3_key)
def migrate_files_to_s3(folder_path, s3_bucket, s3_folder, num_processes):
files = [os.path.join(folder_path, file) for file in os.listdir(folder_path)]
def worker(files):
for file in files:
s3_key = os.path.join(s3_folder, os.path.basename(file))
upload_file_to_s3(file, s3_bucket, s3_key)
with multiprocessing.Pool(processes=num_processes) as pool:
chunk_size = len(files) // num_processes
for i in range(0, len(files), chunk_size):
chunk = files[i:i+chunk_size]
pool.apply_async(worker, args=(chunk,))
pool.close()
pool.join()
if __name__ == '__main__':
folder_path = 'PRT'
s3_bucket = 'MPT-2'
s3_folder = 'Ortus'
num_processes = 4 # Adjust the number of processes as needed
migrate_files_to_s3(folder_path, s3_bucket, s3_folder, num_processes)
import boto3
s3 = boto3.client('s3')
response = s3.list_objects_v2(Bucket='your-bucket-name', Prefix='')
for obj in response['Contents']:
if obj['Key'].endswith('/'):
print(obj['Key'])