Update code.txt
Browse files
code.txt
CHANGED
|
@@ -1,56 +1,59 @@
|
|
| 1 |
import os
|
|
|
|
| 2 |
import shutil
|
| 3 |
-
from concurrent.futures import ThreadPoolExecutor
|
| 4 |
-
|
| 5 |
-
#
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
os.
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
#
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
with ThreadPoolExecutor(max_workers=num_threads) as executor:
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
if __name__ == "__main__":
|
| 43 |
-
|
| 44 |
-
image_folder = "/path/to/your/image_folder"
|
| 45 |
-
destination_folder = "/path/to/your/destination_folder"
|
| 46 |
-
|
| 47 |
-
# Your labels dictionary (image_name: label)
|
| 48 |
-
labels = {
|
| 49 |
-
"image1.jpg": "good",
|
| 50 |
-
"image2.jpg": "bad",
|
| 51 |
-
"image3.jpg": "good",
|
| 52 |
-
# Add the rest of your image labels here (1M entries)
|
| 53 |
-
}
|
| 54 |
-
|
| 55 |
-
# Organize images using 100 threads
|
| 56 |
-
organize_images(image_folder, labels, destination_folder, num_threads=100)
|
|
|
|
| 1 |
import os
|
| 2 |
+
import random
|
| 3 |
import shutil
|
| 4 |
+
from concurrent.futures import ThreadPoolExecutor
|
| 5 |
+
|
| 6 |
+
# Define paths
|
| 7 |
+
dataset_folder = 'path/to/dataset'
|
| 8 |
+
train_folder = os.path.join(dataset_folder, 'train')
|
| 9 |
+
val_folder = os.path.join(dataset_folder, 'validation')
|
| 10 |
+
|
| 11 |
+
# Create validation folder if it doesn't exist
|
| 12 |
+
os.makedirs(val_folder, exist_ok=True)
|
| 13 |
+
|
| 14 |
+
# Get all label folders inside train folder
|
| 15 |
+
label_folders = [f for f in os.listdir(train_folder) if os.path.isdir(os.path.join(train_folder, f))]
|
| 16 |
+
|
| 17 |
+
# Function to move images from a specific label folder
|
| 18 |
+
def process_label_folder(label_folder, num_threads):
|
| 19 |
+
train_label_folder = os.path.join(train_folder, label_folder)
|
| 20 |
+
val_label_folder = os.path.join(val_folder, label_folder)
|
| 21 |
+
|
| 22 |
+
# Create corresponding validation label folder
|
| 23 |
+
os.makedirs(val_label_folder, exist_ok=True)
|
| 24 |
+
|
| 25 |
+
# Get all images in the train/label_folder
|
| 26 |
+
all_images = os.listdir(train_label_folder)
|
| 27 |
+
total_images = len(all_images)
|
| 28 |
+
|
| 29 |
+
# Calculate 20% of images for validation
|
| 30 |
+
val_size = int(total_images * 0.2)
|
| 31 |
+
|
| 32 |
+
# Randomly select 20% of the images for validation
|
| 33 |
+
val_images = random.sample(all_images, val_size)
|
| 34 |
+
|
| 35 |
+
# Function to move a single image
|
| 36 |
+
def move_image(image):
|
| 37 |
+
src = os.path.join(train_label_folder, image)
|
| 38 |
+
dest = os.path.join(val_label_folder, image)
|
| 39 |
+
shutil.move(src, dest)
|
| 40 |
+
|
| 41 |
+
# Use ThreadPoolExecutor to move images in parallel
|
| 42 |
with ThreadPoolExecutor(max_workers=num_threads) as executor:
|
| 43 |
+
executor.map(move_image, val_images)
|
| 44 |
+
|
| 45 |
+
print(f"Moved {val_size} images from {label_folder} to validation folder.")
|
| 46 |
+
|
| 47 |
+
# Main function to get user input for number of threads and process folders
|
| 48 |
+
def main():
|
| 49 |
+
# Ask user for the number of threads
|
| 50 |
+
num_threads = int(input("Enter the number of threads to use: "))
|
| 51 |
+
|
| 52 |
+
# Process each label folder using the input number of threads
|
| 53 |
+
for label_folder in label_folders:
|
| 54 |
+
process_label_folder(label_folder, num_threads)
|
| 55 |
+
|
| 56 |
+
print("Validation dataset created.")
|
| 57 |
|
| 58 |
if __name__ == "__main__":
|
| 59 |
+
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|