chirmy
/

FungiCLEF2024

PyTorch

Model card Files Files and versions

xet

Community

chirmy commited on May 25, 2024

Commit

863fd3a

verified ·

1 Parent(s): 39fc4a0

Upload 07_data_augmentation.py

Browse files

Files changed (1) hide show

07_data_augmentation.py +171 -0

07_data_augmentation.py ADDED Viewed

	@@ -0,0 +1,171 @@

+# import os
+# import pandas as pd
+# from PIL import Image, ImageOps
+# import numpy as np
+# from tqdm import tqdm
+# from multiprocessing import Pool, cpu_count
+# # 读取CSV文件
+# csv_path = '/data/cjm/FungiCLEF2024/Dataset/06_new_train_valmetadata.csv'
+# data = pd.read_csv(csv_path)
+# # 设置根目录
+# root_dir = '/data/cjm/FungiCLEF2024/Dataset/DF20_21_300'
+# # 过滤poisonous为1的数据
+# poisonous_data = data[data['poisonous'] == 1]
+# # 创建保存增强数据的DataFrame，并包含原始数据
+# new_data = data.copy()
+# # 定义数据增强函数
+# def augment_image(args):
+#     row, root_dir = args
+#     image_path = row['image_path']
+#     full_path = os.path.join(root_dir, image_path)
+#     augmented_rows = []
+#     if os.path.exists(full_path):
+#         image = Image.open(full_path)
+#         w, h = image.size
+#         # 定义旋转和翻转操作
+#         transformations = {
+#             'r90': image.rotate(90, expand=True),
+#             'r180': image.rotate(180, expand=True),
+#             'r270': image.rotate(270, expand=True),
+#             'fh': ImageOps.mirror(image),
+#             'fv': ImageOps.flip(image),
+#         }
+#         for suffix, img in transformations.items():
+#             # 裁剪图片以去除旋转后的黑边
+#             if suffix in ['r90', 'r270']:
+#                 img = img.crop((0, 0, h, w))
+#             new_image_path = os.path.splitext(image_path)[0] + f'_{suffix}.JPG'
+#             new_full_path = os.path.join(root_dir, new_image_path)
+#             img.save(new_full_path)
+#             new_row = row.copy()
+#             new_row['image_path'] = new_image_path
+#             augmented_rows.append(new_row)
+#     return augmented_rows
+# # 准备多进程处理
+# num_processes = cpu_count()
+# pool = Pool(processes=num_processes)
+# # 使用tqdm显示进度
+# augmented_data = []
+# for augmented_rows in tqdm(pool.imap_unordered(augment_image, [(row, root_dir) for _, row in poisonous_data.iterrows()]), total=len(poisonous_data)):
+#     augmented_data.extend(augmented_rows)
+# # 关闭进程池
+# pool.close()
+# pool.join()
+# # 将增强后的数据添加到new_data中
+# new_data = new_data.append(augmented_data, ignore_index=True)
+# # 将数据保存到新的CSV文件中
+# new_csv_path = '/data/cjm/FungiCLEF2024/Dataset/07_new_train_valmetadata.csv'
+# new_data.to_csv(new_csv_path, index=False)
+import os
+import pandas as pd
+from PIL import Image, ImageOps
+import numpy as np
+from tqdm import tqdm
+from multiprocessing import Pool, cpu_count
+import random
+# 读取CSV文件
+csv_path = '/data/cjm/FungiCLEF2024/Dataset/06_new_train_valmetadata.csv'
+data = pd.read_csv(csv_path)
+# 设置根目录
+root_dir = '/data/cjm/FungiCLEF2024/Dataset/DF20_21_300'
+# 过滤poisonous为1的数据
+poisonous_data = data[data['poisonous'] == 1]
+# 创建保存增强数据的DataFrame，并包含原始数据
+new_data = data.copy()
+# 定义数据增强函数
+def augment_image(args):
+    row, root_dir = args
+    image_path = row['image_path']
+    full_path = os.path.join(root_dir, image_path)
+    augmented_rows = []
+    if os.path.exists(full_path):
+        image = Image.open(full_path)
+        w, h = image.size
+        # 定义旋转和翻转操作
+        transformations = {
+            'r90': image.rotate(90, expand=True),
+            'r180': image.rotate(180, expand=True),
+            'r270': image.rotate(270, expand=True),
+            'fh': ImageOps.mirror(image),
+            'fv': ImageOps.flip(image),
+        }
+        # 添加随机裁剪操作
+        for i in range(4):
+            rand = random.uniform(0.7, 0.8)
+            new_w = int(w * rand)
+            new_h = int(h * rand)
+            left = random.randint(0, w - new_w)
+            top = random.randint(0, h - new_h)
+            right = left + new_w
+            bottom = top + new_h
+            cropped_image = image.crop((left, top, right, bottom))
+            # cropped_image = cropped_image.resize((w, h))  # 调整回原始尺寸
+            new_image_path = os.path.splitext(image_path)[0] + f'_crop{rand}.JPG'
+            new_full_path = os.path.join(root_dir, new_image_path)
+            cropped_image.save(new_full_path)
+            new_row = row.copy()
+            new_row['image_path'] = new_image_path
+            augmented_rows.append(new_row)
+        for suffix, img in transformations.items():
+            # 裁剪图片以去除旋转后的黑边
+            if suffix in ['r90', 'r270']:
+                img = img.crop((0, 0, h, w))
+            new_image_path = os.path.splitext(image_path)[0] + f'_{suffix}.JPG'
+            new_full_path = os.path.join(root_dir, new_image_path)
+            img.save(new_full_path)
+            new_row = row.copy()
+            new_row['image_path'] = new_image_path
+            augmented_rows.append(new_row)
+    return augmented_rows
+# 准备多进程处理
+num_processes = cpu_count()
+pool = Pool(processes=num_processes)
+# 使用tqdm显示进度
+augmented_data = []
+for augmented_rows in tqdm(pool.imap_unordered(augment_image, [(row, root_dir) for _, row in poisonous_data.iterrows()]), total=len(poisonous_data)):
+    augmented_data.extend(augmented_rows)
+# 关闭进程池
+pool.close()
+pool.join()
+# 将增强后的数据添加到new_data中
+new_data = new_data.append(augmented_data, ignore_index=True)
+# 将数据保存到新的CSV文件中
+new_csv_path = '/data/cjm/FungiCLEF2024/Dataset/07_new_train_valmetadata.csv'
+new_data.to_csv(new_csv_path, index=False)