| # import os | |
| # import pandas as pd | |
| # from PIL import Image, ImageOps | |
| # import numpy as np | |
| # from tqdm import tqdm | |
| # from multiprocessing import Pool, cpu_count | |
| # # 读取CSV文件 | |
| # csv_path = '/data/cjm/FungiCLEF2024/Dataset/06_new_train_valmetadata.csv' | |
| # data = pd.read_csv(csv_path) | |
| # # 设置根目录 | |
| # root_dir = '/data/cjm/FungiCLEF2024/Dataset/DF20_21_300' | |
| # # 过滤poisonous为1的数据 | |
| # poisonous_data = data[data['poisonous'] == 1] | |
| # # 创建保存增强数据的DataFrame,并包含原始数据 | |
| # new_data = data.copy() | |
| # # 定义数据增强函数 | |
| # def augment_image(args): | |
| # row, root_dir = args | |
| # image_path = row['image_path'] | |
| # full_path = os.path.join(root_dir, image_path) | |
| # augmented_rows = [] | |
| # if os.path.exists(full_path): | |
| # image = Image.open(full_path) | |
| # w, h = image.size | |
| # # 定义旋转和翻转操作 | |
| # transformations = { | |
| # 'r90': image.rotate(90, expand=True), | |
| # 'r180': image.rotate(180, expand=True), | |
| # 'r270': image.rotate(270, expand=True), | |
| # 'fh': ImageOps.mirror(image), | |
| # 'fv': ImageOps.flip(image), | |
| # } | |
| # for suffix, img in transformations.items(): | |
| # # 裁剪图片以去除旋转后的黑边 | |
| # if suffix in ['r90', 'r270']: | |
| # img = img.crop((0, 0, h, w)) | |
| # new_image_path = os.path.splitext(image_path)[0] + f'_{suffix}.JPG' | |
| # new_full_path = os.path.join(root_dir, new_image_path) | |
| # img.save(new_full_path) | |
| # new_row = row.copy() | |
| # new_row['image_path'] = new_image_path | |
| # augmented_rows.append(new_row) | |
| # return augmented_rows | |
| # # 准备多进程处理 | |
| # num_processes = cpu_count() | |
| # pool = Pool(processes=num_processes) | |
| # # 使用tqdm显示进度 | |
| # augmented_data = [] | |
| # for augmented_rows in tqdm(pool.imap_unordered(augment_image, [(row, root_dir) for _, row in poisonous_data.iterrows()]), total=len(poisonous_data)): | |
| # augmented_data.extend(augmented_rows) | |
| # # 关闭进程池 | |
| # pool.close() | |
| # pool.join() | |
| # # 将增强后的数据添加到new_data中 | |
| # new_data = new_data.append(augmented_data, ignore_index=True) | |
| # # 将数据保存到新的CSV文件中 | |
| # new_csv_path = '/data/cjm/FungiCLEF2024/Dataset/07_new_train_valmetadata.csv' | |
| # new_data.to_csv(new_csv_path, index=False) | |
| import os | |
| import pandas as pd | |
| from PIL import Image, ImageOps | |
| import numpy as np | |
| from tqdm import tqdm | |
| from multiprocessing import Pool, cpu_count | |
| import random | |
| # 读取CSV文件 | |
| csv_path = '/data/cjm/FungiCLEF2024/Dataset/06_new_train_valmetadata.csv' | |
| data = pd.read_csv(csv_path) | |
| # 设置根目录 | |
| root_dir = '/data/cjm/FungiCLEF2024/Dataset/DF20_21_300' | |
| # 过滤poisonous为1的数据 | |
| poisonous_data = data[data['poisonous'] == 1] | |
| # 创建保存增强数据的DataFrame,并包含原始数据 | |
| new_data = data.copy() | |
| # 定义数据增强函数 | |
| def augment_image(args): | |
| row, root_dir = args | |
| image_path = row['image_path'] | |
| full_path = os.path.join(root_dir, image_path) | |
| augmented_rows = [] | |
| if os.path.exists(full_path): | |
| image = Image.open(full_path) | |
| w, h = image.size | |
| # 定义旋转和翻转操作 | |
| transformations = { | |
| 'r90': image.rotate(90, expand=True), | |
| 'r180': image.rotate(180, expand=True), | |
| 'r270': image.rotate(270, expand=True), | |
| 'fh': ImageOps.mirror(image), | |
| 'fv': ImageOps.flip(image), | |
| } | |
| # 添加随机裁剪操作 | |
| for i in range(4): | |
| rand = random.uniform(0.7, 0.8) | |
| new_w = int(w * rand) | |
| new_h = int(h * rand) | |
| left = random.randint(0, w - new_w) | |
| top = random.randint(0, h - new_h) | |
| right = left + new_w | |
| bottom = top + new_h | |
| cropped_image = image.crop((left, top, right, bottom)) | |
| # cropped_image = cropped_image.resize((w, h)) # 调整回原始尺寸 | |
| new_image_path = os.path.splitext(image_path)[0] + f'_crop{rand}.JPG' | |
| new_full_path = os.path.join(root_dir, new_image_path) | |
| cropped_image.save(new_full_path) | |
| new_row = row.copy() | |
| new_row['image_path'] = new_image_path | |
| augmented_rows.append(new_row) | |
| for suffix, img in transformations.items(): | |
| # 裁剪图片以去除旋转后的黑边 | |
| if suffix in ['r90', 'r270']: | |
| img = img.crop((0, 0, h, w)) | |
| new_image_path = os.path.splitext(image_path)[0] + f'_{suffix}.JPG' | |
| new_full_path = os.path.join(root_dir, new_image_path) | |
| img.save(new_full_path) | |
| new_row = row.copy() | |
| new_row['image_path'] = new_image_path | |
| augmented_rows.append(new_row) | |
| return augmented_rows | |
| # 准备多进程处理 | |
| num_processes = cpu_count() | |
| pool = Pool(processes=num_processes) | |
| # 使用tqdm显示进度 | |
| augmented_data = [] | |
| for augmented_rows in tqdm(pool.imap_unordered(augment_image, [(row, root_dir) for _, row in poisonous_data.iterrows()]), total=len(poisonous_data)): | |
| augmented_data.extend(augmented_rows) | |
| # 关闭进程池 | |
| pool.close() | |
| pool.join() | |
| # 将增强后的数据添加到new_data中 | |
| new_data = new_data.append(augmented_data, ignore_index=True) | |
| # 将数据保存到新的CSV文件中 | |
| new_csv_path = '/data/cjm/FungiCLEF2024/Dataset/07_new_train_valmetadata.csv' | |
| new_data.to_csv(new_csv_path, index=False) | |