# Object Detection

In [1]:
import torch

if torch.cuda.is_available():
 print(f"GPU available: {torch.cuda.get_device_name(0)}")
else:
 print("GPU not available.")


GPU available: NVIDIA H100 PCIe


In [2]:
from zipfile import ZipFile

# Path to your ZIP file
zip_file_path = '/user/bhanucha/final_data.zip'
# Destination directory where you want to extract the files
extraction_directory = '/user/bhanucha/input'

# Extract the ZIP file
with ZipFile(zip_file_path, 'r') as zip_ref:
 zip_ref.extractall(extraction_directory)

print("Extraction completed.")


Extraction completed.


In [3]:
import xml.etree.ElementTree as ET
import os
from PIL import Image

# Load class names and create a mapping to IDs
def load_class_names(class_file_path):
 with open(class_file_path, 'r') as file:
 class_names = file.read().strip().split('\n')
 return {name: i for i, name in enumerate(class_names)}

def convert_voc_to_yolo(voc_xml_file, class_mapping, img_width, img_height):
 tree = ET.parse(voc_xml_file)
 root = tree.getroot()
 yolo_format = []

 for member in root.findall('object'):
 classname = member.find('name').text
 class_id = class_mapping[classname]

 bndbox = member.find('bndbox')
 xmin = int(bndbox.find('xmin').text)
 ymin = int(bndbox.find('ymin').text)
 xmax = int(bndbox.find('xmax').text)
 ymax = int(bndbox.find('ymax').text)

 x_center = ((xmin + xmax) / 2) / img_width
 y_center = ((ymin + ymax) / 2) / img_height
 width = (xmax - xmin) / img_width
 height = (ymax - ymin) / img_height

 yolo_format.append(f"{class_id} {x_center} {y_center} {width} {height}")

 return yolo_format

def process_dataset(dataset_directory, class_file_path):
 class_mapping = load_class_names(class_file_path)

 for class_dir in os.listdir(dataset_directory):
 class_path = os.path.join(dataset_directory, class_dir)
 if os.path.isdir(class_path):
 for file in os.listdir(class_path):
 if file.endswith('.xml'):
 img_file = os.path.splitext(file)[0] + '.jpg'
 img_path = os.path.join(class_path, img_file)
 xml_path = os.path.join(class_path, file)

 # Use PIL to get image dimensions
 with Image.open(img_path) as img:
 img_width, img_height = img.size

 yolo_annotations = convert_voc_to_yolo(xml_path, class_mapping, img_width, img_height)
 yolo_annotation_text = "\n".join(yolo_annotations)

 # Save YOLO annotations to a .txt file
 txt_filename = os.path.splitext(xml_path)[0] + '.txt'
 with open(txt_filename, 'w') as f:
 f.write(yolo_annotation_text)

# Assuming your class file path and dataset directory are as follows:
class_file_path = '/user/bhanucha/Final_classes.txt'
dataset_directory = '/user/bhanucha/input/initial_data_annotated'
process_dataset(dataset_directory, class_file_path)






In [5]:
pip install scikit-learn

Defaulting to user installation because normal site-packages is not writeable
Collecting scikit-learn
 Downloading scikit_learn-1.4.1.post1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.2 MB)
[K |████████████████████████████████| 12.2 MB 3.7 MB/s eta 0:00:01
Collecting joblib>=1.2.0
 Downloading joblib-1.3.2-py3-none-any.whl (302 kB)
[K |████████████████████████████████| 302 kB 170.2 MB/s eta 0:00:01
[?25hInstalling collected packages: joblib, scikit-learn
Successfully installed joblib-1.3.2 scikit-learn-1.4.1.post1
You should consider upgrading via the '/cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/bin/python -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [1]:
import os
import random
from sklearn.model_selection import train_test_split

# Define your dataset directory and output files
dataset_dir = '/user/bhanucha/input/initial_data_annotated'
output_train = '/user/bhanucha/train.txt'
output_val = '/user/bhanucha/valid.txt'
output_test = '/user/bhanucha/test.txt'

# Specify the split ratios
train_ratio = 0.8
val_ratio = 0.1
test_ratio = 0.1 # Ensures train + val + test = 1.0

# Collect all image file paths
image_paths = []
for root, dirs, files in os.walk(dataset_dir):
 for file in files:
 if file.endswith('.jpg'):
 image_paths.append(os.path.join(root, file))

# Split the data
train_val_paths, test_paths = train_test_split(image_paths, test_size=test_ratio, random_state=42)
train_paths, val_paths = train_test_split(train_val_paths, test_size=val_ratio/(train_ratio+val_ratio), random_state=42)

# Function to write paths to a file
def write_paths(file_paths, output_file):
 with open(output_file, 'w') as f:
 for path in file_paths:
 f.write(path + '\n')

# Write the splits to their respective files
write_paths(train_paths, output_train)
write_paths(val_paths, output_val)
write_paths(test_paths, output_test)

print(f"Training images: {len(train_paths)}")
print(f"Validation images: {len(val_paths)}")
print(f"Test images: {len(test_paths)}")


Training images: 3988
Validation images: 499
Test images: 499


In [2]:
!git clone https://github.com/WongKinYiu/yolov7.git
%cd yolov7


Cloning into 'yolov7'...
remote: Enumerating objects: 1197, done.[K
remote: Total 1197 (delta 0), reused 0 (delta 0), pack-reused 1197[K
Receiving objects: 100% (1197/1197), 74.23 MiB | 69.03 MiB/s, done.
Resolving deltas: 100% (519/519), done.
/user/bhanucha/yolov7


In [3]:
!pip install -r requirements.txt


Defaulting to user installation because normal site-packages is not writeable
Collecting matplotlib>=3.2.2
 Downloading matplotlib-3.8.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.6 MB)
[K |████████████████████████████████| 11.6 MB 3.5 MB/s eta 0:00:01
Collecting opencv-python>=4.1.1
 Downloading opencv_python-4.9.0.80-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (62.2 MB)
[K |████████████████████████████████| 62.2 MB 169.7 MB/s eta 0:00:01
Collecting tqdm>=4.41.0
 Downloading tqdm-4.66.2-py3-none-any.whl (78 kB)
[K |████████████████████████████████| 78 kB 3.5 MB/s s eta 0:00:01
Collecting seaborn>=0.11.0
 Downloading seaborn-0.13.2-py3-none-any.whl (294 kB)
[K |████████████████████████████████| 294 kB 165.8 MB/s eta 0:00:01
Collecting thop
 Downloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Collecting cycler>=0.10
 Downloading cycler-0.12.1-py3-none-any.whl (8.3 kB)
Collecting fonttools>=4.22.0
 Downloading fonttools-4.50.0-cp39-cp39-manyl

Installing collected packages: kiwisolver, fonttools, cycler, contourpy, matplotlib, tqdm, thop, seaborn, opencv-python
Successfully installed contourpy-1.2.1 cycler-0.12.1 fonttools-4.50.0 kiwisolver-1.4.5 matplotlib-3.8.4 opencv-python-4.9.0.80 seaborn-0.13.2 thop-0.1.1.post2209072238 tqdm-4.66.2
You should consider upgrading via the '/cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/bin/python3.9 -m pip install --upgrade pip' command.[0m


In [4]:
yaml_content = """
train: /user/bhanucha/train.txt
val: /user/bhanucha/valid.txt
test: /user/bhanucha/test.txt

nc: 100
names: [
 'all_purpose_flour', 'almonds', 'apple', 'apricot', 'asparagus', 'avocado', 'bacon', 'banana', 'barley', 'basil',
 'basmati_rice', 'beans', 'beef', 'beets', 'bell_pepper', 'berries', 'biscuits', 'blackberries', 'black_pepper',
 'blueberries', 'bread', 'bread_crumbs', 'bread_flour', 'broccoli', 'brownie_mix', 'brown_rice', 'butter', 'cabbage',
 'cake', 'cardamom', 'carrot', 'cashews', 'cauliflower', 'celery', 'cereal', 'cheese', 'cherries', 'chicken',
 'chickpeas', 'chocolate', 'chocolate_chips', 'chocolate_syrup', 'cilantro', 'cinnamon', 'clove', 'cocoa_powder',
 'coconut', 'cookies', 'corn', 'cucumber', 'dates', 'eggplant', 'eggs', 'fish', 'garlic', 'ginger', 'grapes', 'honey',
 'jalapeno', 'kidney_beans', 'lemon', 'mango', 'marshmallows', 'milk', 'mint', 'muffins', 'mushroom', 'noodles',
 'nuts', 'oats', 'okra', 'olive', 'onion', 'orange', 'oreo_cookies', 'pasta', 'pear', 'pepper', 'pineapple',
 'pistachios', 'pork', 'potato', 'pumpkin', 'radishes', 'raisins', 'red_chilies', 'rice', 'rosemary', 'salmon', 'salt',
 'shrimp', 'spinach', 'strawberries', 'sugar', 'sweet_potato', 'tomato', 'vanilla_ice_cream', 'walnuts', 'watermelon',
 'yogurt'
]
"""

with open('/user/bhanucha/ingredients.yaml', 'w') as file:
 file.write(yaml_content.strip())


In [5]:
%cd /user/bhanucha/yolov7


/user/bhanucha/yolov7


In [7]:
!python train.py --batch-size 64 --img 640 640 --data /user/bhanucha/ingredients.yaml --cfg cfg/training/yolov7.yaml --weights 'yolov7.pt' --device 0 --epochs 50 --workers 4

YOLOR 🚀 v0.1-128-ga207844 torch 1.13.1 CUDA:0 (NVIDIA H100 PCIe, 81230.375MB)

Namespace(weights='yolov7.pt', cfg='cfg/training/yolov7.yaml', data='/user/bhanucha/ingredients.yaml', hyp='data/hyp.scratch.p5.yaml', epochs=50, batch_size=64, img_size=[640, 640], rect=False, resume=False, nosave=False, notest=False, noautoanchor=False, evolve=False, bucket='', cache_images=False, image_weights=False, device='0', multi_scale=False, single_cls=False, adam=False, sync_bn=False, local_rank=-1, workers=4, project='runs/train', entity=None, name='exp', exist_ok=False, quad=False, linear_lr=False, label_smoothing=0.0, upload_dataset=False, bbox_interval=-1, save_period=-1, artifact_alias='latest', freeze=[0], v5_metric=False, world_size=1, global_rank=-1, save_dir='runs/train/exp2', total_batch_size=64)
[34m[1mtensorboard: [0mStart with 'tensorboard --logdir runs/train', view at http://localhost:6006/
2024-04-04 11:10:34.277310: I tensorflow/core/platform/cpu_feature_guard.cc:193] This Tensor

 92 -1 1 590336 models.common.Conv [256, 256, 3, 2] 
 93 [-1, -3, 51] 1 0 models.common.Concat [1] 
 94 -1 1 525312 models.common.Conv [1024, 512, 1, 1] 
 95 -2 1 525312 models.common.Conv [1024, 512, 1, 1] 
 96 -1 1 1180160 models.common.Conv [512, 256, 3, 1] 
 97 -1 1 590336 models.common.Conv [256, 256, 3, 1] 
 98 -1 1 590336 models.common.Conv [256, 256, 3, 1] 
 99 -1 1 590336 models.common.Conv [256, 256, 3, 1] 
100[-1, -2, -3, -4, -5, -6] 1 0 models.common.Concat [1] 
101 -1 1 1049600 models.common.Conv [2048, 512, 1, 1] 
102 75 1 328704 models.common.RepConv [128, 256, 3, 1] 
103 88 1 1312768 models.common.RepConv [256, 512, 3, 1] 
104 101 1 5246976 models.common.RepConv [512, 1024, 3, 1] 
105 [102, 103, 104] 1 568162 models.yolo.IDetect [100, [[12, 16, 19, 36, 40, 28], [36, 75, 76, 55, 72, 146], [142, 110, 192, 243, 459, 401]], [256, 512, 1024]]
 return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]
Model Summary: 415 layers, 37730562 parameters, 37730562 gradient

 16/49 43.7G 0.02668 0.009865 0.0499 0.08644 55 640
 Class Images Labels P R mAP@.5
 all 499 367 0.6 0.183 0.175 0.143

 Epoch gpu_mem box obj cls total labels img_size
 17/49 43.7G 0.02685 0.00988 0.04829 0.08502 44 640
 Class Images Labels P R mAP@.5
 all 499 367 0.196 0.266 0.138 0.107

 Epoch gpu_mem box obj cls total labels img_size
 18/49 43.7G 0.02572 0.00947 0.04623 0.08143 57 640
 Class Images Labels P R mAP@.5
 all 499 367 0.348 0.299 0.226 0.181

 Epoch gpu_mem box obj cls total labels img_size
 19/49 43.7G 0.02658 0.009464 0.04458 0.08062 50 640
 Class Images Labels P R mAP@.5
 all 499 367 0.262 0.315 0.21 0.172

 Epoch gpu_mem box obj cls total labels img_size
 20/49 43.7G 0.02621 0.009319 0.04289 0.07842 46 640
 Class Images Labels P R mAP@.5
 all 499 367 0.326 0.375 0.26 0.215

 Epoch gpu_mem box obj cls total labels img_size
 21/49 43.7G 0.02617 0.009057 0.04091 0.07614 35 640
 Class Images Labels P R mAP@.5
 all 499 367 0.522 0.25 0.238 0.187

 Epoch gpu_mem box obj cl

 Class Images Labels P R mAP@.5
 all 499 367 0.478 0.497 0.513 0.464

 Epoch gpu_mem box obj cls total labels img_size
 41/49 43.7G 0.01893 0.007398 0.02043 0.04676 40 640
 Class Images Labels P R mAP@.5
 all 499 367 0.469 0.543 0.521 0.476

 Epoch gpu_mem box obj cls total labels img_size
 42/49 43.7G 0.01868 0.007207 0.01998 0.04588 39 640
 Class Images Labels P R mAP@.5
 all 499 367 0.626 0.46 0.541 0.493

 Epoch gpu_mem box obj cls total labels img_size
 43/49 43.7G 0.01885 0.007177 0.01992 0.04595 48 640
 Class Images Labels P R mAP@.5
 all 499 367 0.462 0.544 0.542 0.494

 Epoch gpu_mem box obj cls total labels img_size
 44/49 43.7G 0.0187 0.007042 0.01925 0.04499 55 640
 Class Images Labels P R mAP@.5
 all 499 367 0.463 0.518 0.513 0.468

 Epoch gpu_mem box obj cls total labels img_size
 45/49 43.7G 0.01834 0.007062 0.01923 0.04463 33 640
 Class Images Labels P R mAP@.5
 all 499 367 0.446 0.509 0.537 0.486

 Epoch gpu_mem box obj cls total labels img_size
 46/49 43.7G 0.01818 0.

In [12]:
!python test.py --weights /user/bhanucha/yolov7/runs/train/exp2/weights/best.pt --data /user/bhanucha/ingredients.yaml --img 640 --iou-thres 0.5 --batch-size 64 --task test


Namespace(weights=['/user/bhanucha/yolov7/runs/train/exp2/weights/best.pt'], data='/user/bhanucha/ingredients.yaml', batch_size=64, img_size=640, conf_thres=0.001, iou_thres=0.5, task='test', device='', single_cls=False, augment=False, verbose=False, save_txt=False, save_hybrid=False, save_conf=False, save_json=False, project='runs/test', name='exp', exist_ok=False, no_trace=False, v5_metric=False)
YOLOR 🚀 v0.1-128-ga207844 torch 1.13.1 CUDA:0 (NVIDIA H100 PCIe, 81230.375MB)

Fusing layers... 
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block
IDetect.fuse
 return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]
Model Summary: 314 layers, 37015778 parameters, 6194944 gradients, 104.9 GFLOPS
 Convert model to Traced-model... 
 traced_script_module saved! 
 model is traced! 

[34m[1mtest: [0mScanning '/user/bhanucha/test.cache' images and labels... 499 found, 0 mis[0m
 Class Images Labels P R mAP@.5
 all 499 346 0.501 0.546 0.532 0.483
Speed: 1.

In [11]:
!python test.py --weights /user/bhanucha/yolov7/runs/train/exp2/weights/best.pt --data /user/bhanucha/ingredients.yaml --img 640 --iou-thres 0.25 --task test


Namespace(weights=['/user/bhanucha/yolov7/runs/train/exp2/weights/best.pt'], data='/user/bhanucha/ingredients.yaml', batch_size=32, img_size=640, conf_thres=0.001, iou_thres=0.25, task='test', device='', single_cls=False, augment=False, verbose=False, save_txt=False, save_hybrid=False, save_conf=False, save_json=False, project='runs/test', name='exp', exist_ok=False, no_trace=False, v5_metric=False)
YOLOR 🚀 v0.1-128-ga207844 torch 1.13.1 CUDA:0 (NVIDIA H100 PCIe, 81230.375MB)

Fusing layers... 
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block
IDetect.fuse
 return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]
Model Summary: 314 layers, 37015778 parameters, 6194944 gradients, 104.9 GFLOPS
 Convert model to Traced-model... 
 traced_script_module saved! 
 model is traced! 

[34m[1mtest: [0mScanning '/user/bhanucha/test.cache' images and labels... 499 found, 0 mis[0m
 Class Images Labels P R mAP@.5
 all 499 346 0.54 0.515 0.53 0.481
Speed: 2.5