Spaces:
Sleeping
Sleeping
| import os | |
| from datasets import load_dataset | |
| # 1. 讟注讬谞转 讛讚讗讟讛 讜注专讘讜讘 (讘讜讞专讬诐 1200 讻讚讬 诇讞诇拽 讘爪讜专讛 谞讜讞讛) | |
| dataset = load_dataset("Hemg/AI-Generated-vs-Real-Images-Datasets", split="train") | |
| dataset = dataset.shuffle(seed=42) | |
| subset_size = 3000 | |
| small_subset = dataset.select(range(subset_size)) | |
| train_float , val_float = 0.8 , 0.1 | |
| # 2. 讛讙讚专转 讬讞住讬 讞诇讜拽讛 | |
| train_end = int(subset_size * train_float) # 840 转诪讜谞讜转 | |
| val_end = train_end + int(subset_size * val_float) # 180 转诪讜谞讜转 | |
| # 3. 讬爪讬专转 诪讘谞讛 讛转讬拽讬讜转 | |
| base_dir = "data" | |
| splits = ['train', 'val', 'test'] | |
| categories = ['ai', 'real'] | |
| for s in splits: | |
| for cat in categories: | |
| os.makedirs(f"{base_dir}/{s}/{cat}", exist_ok=True) | |
| # 4. 诪注讘专 注诇 讛转诪讜谞讜转 讜砖诪讬专转谉 讘诪拽讜诐 讛谞讻讜谉 | |
| for i, item in enumerate(small_subset): | |
| image = item['image'] | |
| label = item['label'] # 0=Real, 1=Fake/AI | |
| # 拽讘讬注转 讛驻讬爪讜诇 (Split) | |
| if i < train_end: | |
| current_split = 'train' | |
| elif i < val_end: | |
| current_split = 'val' | |
| else: | |
| current_split = 'test' | |
| # 拽讘讬注转 讛拽讟讙讜专讬讛 (Category) | |
| # 砖讬诐 诇讘: 讘诪注专讱 讛讝讛 1 讛讜讗 Fake (AI) 讜-0 讛讜讗 Real | |
| category = "ai" if label == 1 else "real" | |
| # 砖诪讬专转 讛转诪讜谞讛 | |
| file_path = f"{base_dir}/{current_split}/{category}/img_{i}.png" | |
| image.save(file_path) | |
| print(f"讛住转讬讬诐! 讛诪讘谞讛 谞讜爪专 讘转讬拽讬讬转 '{base_dir}':") | |
| print("- Train: 840 images") | |
| print("- Val: 180 images") | |
| print("- Test: 180 images") | |