## Check for if class names in the txt file matches with the label dir 

### Rename to bring the consistency

In [1]:
import os

def rename_directories(dataset_directory):
    for dirname in os.listdir(dataset_directory):
        current_dir_path = os.path.join(dataset_directory, dirname)
        if os.path.isdir(current_dir_path):
            new_dirname = dirname.replace(' ', '_')
            new_dir_path = os.path.join(dataset_directory, new_dirname)
            if current_dir_path != new_dir_path:  
                os.rename(current_dir_path, new_dir_path)
                print(f"Renamed '{dirname}' to '{new_dirname}'")
dataset_directory = r'C:\Users\Kiyo\Desktop\DL\Project\image_data\initial_data'
rename_directories(dataset_directory)

print("Directory renaming completed.")


Renamed 'all purpose flour' to 'all_purpose_flour'
Renamed 'basmati rice' to 'basmati_rice'
Renamed 'bell pepper' to 'bell_pepper'
Renamed 'black pepper' to 'black_pepper'
Renamed 'bread crumbs' to 'bread_crumbs'
Renamed 'bread flour' to 'bread_flour'
Renamed 'brown rice' to 'brown_rice'
Renamed 'brownie mix' to 'brownie_mix'
Renamed 'chocolate chips' to 'chocolate_chips'
Renamed 'chocolate syrup' to 'chocolate_syrup'
Renamed 'cocoa powder' to 'cocoa_powder'
Renamed 'kidney beans' to 'kidney_beans'
Renamed 'oreo cookies' to 'oreo_cookies'
Renamed 'red chilies' to 'red_chilies'
Renamed 'sweet potato' to 'sweet_potato'
Renamed 'vanilla ice cream' to 'vanilla_ice_cream'
Directory renaming completed.


In [2]:

# dataset directory and class names file
dataset_dir = r'C:\Users\Kiyo\Desktop\DL\Project\image_data\initial_data'
class_names_file = r'C:\Users\Kiyo\Desktop\DL\Project\image_data\Final_classes.txt'

# Load class names from file
with open(class_names_file, 'r') as f:
    class_names = [line.strip().replace(' ', '_') for line in f]

# Get a list of actual directory names in the dataset
actual_dirs = [d for d in os.listdir(dataset_dir) if os.path.isdir(os.path.join(dataset_dir, d))]

# Check for discrepancies
missing_dirs = set(class_names) - set(actual_dirs)
extra_dirs = set(actual_dirs) - set(class_names)

if missing_dirs:
    print(f"Missing directories for classes in 'Final_classes.txt': {missing_dirs}")
else:
    print("All classes in 'Final_classes.txt' have corresponding directories in the dataset.")

if extra_dirs:
    print(f"Extra directories in the dataset that are not listed in 'Final_classes.txt': {extra_dirs}")
else:
    print("No extra directories in the dataset that are not listed in 'Final_classes.txt'.")


All classes in 'Final_classes.txt' have corresponding directories in the dataset.
No extra directories in the dataset that are not listed in 'Final_classes.txt'.


## Removing files other than jpg

In [17]:

def remove_non_jpg_images(dataset_dir):
    removed_files = []
    for root, dirs, files in os.walk(dataset_dir):
        for file in files:
            # Check if the file extension is not .jpg
            if not file.lower().endswith('.jpg'):
                file_path = os.path.join(root, file)
                os.remove(file_path)  # Remove the non-JPG file
                removed_files.append(file_path)
    return removed_files


dataset_dir = r'C:\Users\Kiyo\Desktop\DL\Project\image_data\initial_data'
removed_files = remove_non_jpg_images(dataset_dir)

if removed_files:
    print(f"Removed {len(removed_files)} non-JPG files:")
    for file in removed_files:
        print(file)
else:
    print("No non-JPG files found in the dataset.")


No non-JPG files found in the dataset.


In [20]:
def count_images(dataset_dir):
    class_image_counts = {}
    for class_name in os.listdir(dataset_dir):
        class_dir = os.path.join(dataset_dir, class_name)
        if os.path.isdir(class_dir):
            image_count = sum(1 for file in os.listdir(class_dir) if file.lower().endswith('.jpg'))
            class_image_counts[class_name] = image_count
    return class_image_counts

#
dataset_dir = r'C:\Users\Kiyo\Desktop\DL\Project\image_data\initial_data'
image_counts = count_images(dataset_dir)

# Print out the counts
for class_name, count in image_counts.items():
    print(f"{class_name}: {count} images")

# Optional: total image count
total_images = sum(image_counts.values())
print(f"\nTotal images in dataset: {total_images}")

all_purpose_flour: 50 images
almonds: 50 images
apple: 50 images
apricot: 50 images
asparagus: 50 images
avocado: 50 images
bacon: 50 images
banana: 50 images
barley: 50 images
basil: 50 images
basmati_rice: 50 images
beans: 50 images
beef: 50 images
beets: 50 images
bell_pepper: 50 images
berries: 50 images
biscuits: 50 images
blackberries: 50 images
black_pepper: 50 images
blueberries: 50 images
bread: 50 images
bread_crumbs: 50 images
bread_flour: 50 images
broccoli: 50 images
brownie_mix: 50 images
brown_rice: 50 images
butter: 50 images
cabbage: 50 images
cake: 50 images
cardamom: 50 images
carrot: 50 images
cashews: 50 images
cauliflower: 50 images
celery: 50 images
cereal: 50 images
cheese: 50 images
cherries: 50 images
chicken: 50 images
chickpeas: 50 images
chocolate: 50 images
chocolate_chips: 50 images
chocolate_syrup: 50 images
cilantro: 50 images
cinnamon: 50 images
clove: 50 images
cocoa_powder: 50 images
coconut: 50 images
cookies: 50 images
corn: 50 images
cucumber: 50 

- remove images that count is  more than 50 per class 

In [5]:
import random

def remove_images_if_more_than(dataset_dir, max_images=50):
    for class_name in os.listdir(dataset_dir):
        class_dir = os.path.join(dataset_dir, class_name)
        if os.path.isdir(class_dir):
            images = [file for file in os.listdir(class_dir) if file.lower().endswith('.jpg')]
            if len(images) > max_images:
                images_to_remove = random.sample(images, len(images) - max_images)
                for image in images_to_remove:
                    image_path = os.path.join(class_dir, image)
                    os.remove(image_path)
                print(f"Removed {len(images_to_remove)} images from '{class_name}' to meet the maximum of {max_images} images.")
            else:
                print(f"Class '{class_name}' meets the maximum image requirement.")

# Specify the path to your dataset directory
dataset_dir = r'C:\Users\Kiyo\Desktop\DL\Project\image_data\initial_data'
remove_images_if_more_than(dataset_dir, 50)

print("Image reduction completed.")

Class 'all_purpose_flour' meets the maximum image requirement.
Class 'almonds' meets the maximum image requirement.
Class 'apple' meets the maximum image requirement.
Class 'apricot' meets the maximum image requirement.
Class 'asparagus' meets the maximum image requirement.
Class 'avocado' meets the maximum image requirement.
Class 'bacon' meets the maximum image requirement.
Class 'banana' meets the maximum image requirement.
Class 'barley' meets the maximum image requirement.
Class 'basil' meets the maximum image requirement.
Class 'basmati_rice' meets the maximum image requirement.
Class 'beans' meets the maximum image requirement.
Class 'beef' meets the maximum image requirement.
Class 'beets' meets the maximum image requirement.
Class 'bell_pepper' meets the maximum image requirement.
Class 'berries' meets the maximum image requirement.
Class 'biscuits' meets the maximum image requirement.
Class 'blackberries' meets the maximum image requirement.
Class 'black_pepper' meets the max

- augemnet to make every class contains count of 50

In [10]:
!pip install imgaug


Collecting imgaug
  Downloading imgaug-0.4.0-py2.py3-none-any.whl.metadata (1.8 kB)
Downloading imgaug-0.4.0-py2.py3-none-any.whl (948 kB)
   ---------------------------------------- 0.0/948.0 kB ? eta -:--:--
   ---------------------------------------- 10.2/948.0 kB ? eta -:--:--
   - ------------------------------------- 30.7/948.0 kB 660.6 kB/s eta 0:00:02
   ------ --------------------------------- 153.6/948.0 kB 1.5 MB/s eta 0:00:01
   ------------------------ --------------- 583.7/948.0 kB 4.6 MB/s eta 0:00:01
   ---------------------------------------- 948.0/948.0 kB 5.0 MB/s eta 0:00:00
Installing collected packages: imgaug
Successfully installed imgaug-0.4.0


DEPRECATION: celery 4.2.0 has a non-standard dependency specifier pytz>dev. pip 24.1 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of celery or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063


In [19]:
import os
import random
from PIL import Image
import imgaug as ia
import imgaug.augmenters as iaa
import numpy as np

ia.seed(1)

def augment_image(image_path, save_dir, augmentation, img_count):
    image = Image.open(image_path)
    # Convert image to RGB if it's not already in that format
    if image.mode != 'RGB':
        image = image.convert('RGB')
    image_np = np.array(image)
    augmented_image_np = augmentation(image=image_np)
    augmented_image = Image.fromarray(augmented_image_np)
    base_name = os.path.basename(image_path)
    new_image_path = os.path.join(save_dir, f"{os.path.splitext(base_name)[0]}_aug_{img_count}.jpg")
    augmented_image.save(new_image_path)


def augment_class_images(class_dir, target_count=50):
    images = [file for file in os.listdir(class_dir) if file.lower().endswith('.jpg')]
    current_count = len(images)
    
    augmentation = iaa.Sequential([
        iaa.Fliplr(0.5),  # Horizontally flip 50% of the images
        iaa.Crop(percent=(0, 0.1)),  # Perform random crops
        
        # Apply affine transformations
        iaa.Affine(
            scale={"x": (0.8, 1.2), "y": (0.8, 1.2)},
            translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)},
            rotate=(-25, 25),
            shear=(-8, 8)
        ),
        
        # Adjust brightness and contrast
        iaa.Multiply((0.8, 1.2)),  # Change brightness (80-120% of original value)
        iaa.LinearContrast((0.75, 1.5)),  # Strengthen or weaken the contrast in each image.
        
        # Apply color temperature changes
        iaa.Sequential([
            iaa.ChangeColorTemperature((1100, 10000)),  # Simulate different color temperatures
            iaa.WithChannels(0, iaa.Add((10, 100)))  # Optionally, add more red for warmth
        ], random_order=True)  # Apply these changes in a random order
    ])
    
    if current_count < target_count:
        while current_count < target_count:
            for img_file in random.sample(images, min(len(images), target_count - current_count)):
                augment_image(os.path.join(class_dir, img_file), class_dir, augmentation, current_count)
                current_count += 1
                if current_count >= target_count:
                    break

def augment_dataset_if_needed(dataset_dir, target_count=50):
    for class_name in os.listdir(dataset_dir):
        class_dir = os.path.join(dataset_dir, class_name)
        if os.path.isdir(class_dir):
            augment_class_images(class_dir, target_count)
            print(f"Completed augmentation for class '{class_name}'.")

# Specify the path to your dataset directory
dataset_dir = r'C:\Users\Kiyo\Desktop\DL\Project\image_data\initial_data'
augment_dataset_if_needed(dataset_dir, 50)

print("Dataset augmentation completed.")


Completed augmentation for class 'all_purpose_flour'.
Completed augmentation for class 'almonds'.
Completed augmentation for class 'apple'.
Completed augmentation for class 'apricot'.
Completed augmentation for class 'asparagus'.
Completed augmentation for class 'avocado'.
Completed augmentation for class 'bacon'.
Completed augmentation for class 'banana'.
Completed augmentation for class 'barley'.
Completed augmentation for class 'basil'.
Completed augmentation for class 'basmati_rice'.
Completed augmentation for class 'beans'.
Completed augmentation for class 'beef'.
Completed augmentation for class 'beets'.
Completed augmentation for class 'bell_pepper'.
Completed augmentation for class 'berries'.
Completed augmentation for class 'biscuits'.
Completed augmentation for class 'blackberries'.
Completed augmentation for class 'black_pepper'.
Completed augmentation for class 'blueberries'.
Completed augmentation for class 'bread'.
Completed augmentation for class 'bread_crumbs'.
Complete