Update s3_migration.py
Browse files- s3_migration.py +47 -0
s3_migration.py
CHANGED
|
@@ -40,3 +40,50 @@ def main():
|
|
| 40 |
|
| 41 |
if __name__ == "__main__":
|
| 42 |
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
if __name__ == "__main__":
|
| 42 |
main()
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
import boto3
|
| 60 |
+
import multiprocessing
|
| 61 |
+
import os
|
| 62 |
+
|
| 63 |
+
def upload_file_to_s3(file_path, s3_bucket, s3_key):
|
| 64 |
+
s3 = boto3.client('s3')
|
| 65 |
+
s3.upload_file(file_path, s3_bucket, s3_key)
|
| 66 |
+
|
| 67 |
+
def migrate_files_to_s3(folder_path, s3_bucket, s3_folder, num_processes):
|
| 68 |
+
files = [os.path.join(folder_path, file) for file in os.listdir(folder_path)]
|
| 69 |
+
|
| 70 |
+
def worker(files):
|
| 71 |
+
for file in files:
|
| 72 |
+
s3_key = os.path.join(s3_folder, os.path.basename(file))
|
| 73 |
+
upload_file_to_s3(file, s3_bucket, s3_key)
|
| 74 |
+
|
| 75 |
+
with multiprocessing.Pool(processes=num_processes) as pool:
|
| 76 |
+
chunk_size = len(files) // num_processes
|
| 77 |
+
for i in range(0, len(files), chunk_size):
|
| 78 |
+
chunk = files[i:i+chunk_size]
|
| 79 |
+
pool.apply_async(worker, args=(chunk,))
|
| 80 |
+
pool.close()
|
| 81 |
+
pool.join()
|
| 82 |
+
|
| 83 |
+
if __name__ == '__main__':
|
| 84 |
+
folder_path = 'PRT'
|
| 85 |
+
s3_bucket = 'MPT-2'
|
| 86 |
+
s3_folder = 'Ortus'
|
| 87 |
+
num_processes = 4 # Adjust the number of processes as needed
|
| 88 |
+
|
| 89 |
+
migrate_files_to_s3(folder_path, s3_bucket, s3_folder, num_processes)
|