Veritas-AI / split_val.py
Aditya-Jadhav150
Initial commit: Veritas-AI Production Build
239017e
import os
import shutil
import random
from tqdm import tqdm
def split_dataset(train_dir, val_dir, split_ratio=0.1):
random.seed(42) # For reproducibility
for class_name in ['real', 'fake']:
src_folder = os.path.join(train_dir, class_name)
dest_folder = os.path.join(val_dir, class_name)
os.makedirs(dest_folder, exist_ok=True)
if not os.path.exists(src_folder):
print(f"Warning: {src_folder} not found. Skipping {class_name}.")
continue
files = [f for f in os.listdir(src_folder) if os.path.isfile(os.path.join(src_folder, f))]
# Calculate exactly 10% split
split_index = int(len(files) * split_ratio)
print(f"Class '{class_name}': Found {len(files)} training images.")
print(f"Class '{class_name}': Splicing {split_index} images to the validation set...")
# Shuffle deterministically to prevent bias
random.shuffle(files)
val_files = files[:split_index]
# Move files over to the validation array
for file in tqdm(val_files, desc=f"Migrating {class_name} images"):
src_path = os.path.join(src_folder, file)
dest_path = os.path.join(dest_folder, file)
shutil.move(src_path, dest_path)
print(f"Class '{class_name}': Split operation permanently completed.\n")
if __name__ == "__main__":
split_dataset("dataset/processed_train", "dataset/processed_val", split_ratio=0.1)
print("=== SYNCHRONIZATION COMPLETE ===")
print("DataLoader dependencies securely satisfied. Ready for pure model training.")