Spaces:
Sleeping
Sleeping
File size: 1,677 Bytes
239017e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 | import os
import shutil
import random
from tqdm import tqdm
def split_dataset(train_dir, val_dir, split_ratio=0.1):
random.seed(42) # For reproducibility
for class_name in ['real', 'fake']:
src_folder = os.path.join(train_dir, class_name)
dest_folder = os.path.join(val_dir, class_name)
os.makedirs(dest_folder, exist_ok=True)
if not os.path.exists(src_folder):
print(f"Warning: {src_folder} not found. Skipping {class_name}.")
continue
files = [f for f in os.listdir(src_folder) if os.path.isfile(os.path.join(src_folder, f))]
# Calculate exactly 10% split
split_index = int(len(files) * split_ratio)
print(f"Class '{class_name}': Found {len(files)} training images.")
print(f"Class '{class_name}': Splicing {split_index} images to the validation set...")
# Shuffle deterministically to prevent bias
random.shuffle(files)
val_files = files[:split_index]
# Move files over to the validation array
for file in tqdm(val_files, desc=f"Migrating {class_name} images"):
src_path = os.path.join(src_folder, file)
dest_path = os.path.join(dest_folder, file)
shutil.move(src_path, dest_path)
print(f"Class '{class_name}': Split operation permanently completed.\n")
if __name__ == "__main__":
split_dataset("dataset/processed_train", "dataset/processed_val", split_ratio=0.1)
print("=== SYNCHRONIZATION COMPLETE ===")
print("DataLoader dependencies securely satisfied. Ready for pure model training.")
|