rough_work / coco_format.py
jkushwaha's picture
Create coco_format.py
1512fcc verified
from glob import glob
import pandas as pd
import json
import os
import shutil
def jsons_to_dataframe(json_dir):
# Initialize lists to store data
filename_list = []
image_id_list = []
width_list = []
height_list = []
category_name_list = []
bbox_list = []
# Iterate over each JSON file in the directory
for filename in os.listdir(json_dir):
image_id = int(filename.split('/')[-1].split('.')[0])
if (filename.endswith('.json') and image_id>=1700):
image_dir = json_dir.replace('annots', 'images')
image_filename = filename.split('/')[-1].replace('.json', '.png')
shutil.copy2(f"{image_dir}/{image_filename}", "/content/drive/MyDrive/final/circuit/val/")
json_file = os.path.join(json_dir, filename)
# Load JSON data from file
with open(json_file, 'r') as f:
data = json.load(f)
# Extract relevant data from JSON
filename_value = image_filename#data['filename']
width_value = int(data['size']['width'])
height_value = int(data['size']['height'])
# Process each object in the JSON data
for obj in data['object']:
category_name = obj['name']
xmin = int(float(obj['bndbox']['xmin']))
ymin = int(float(obj['bndbox']['ymin']))
xmax = int(float(obj['bndbox']['xmax']))
ymax = int(float(obj['bndbox']['ymax']))
# Calculate width and height of the bbox
bbox_width = xmax - xmin
bbox_height = ymax - ymin
# Create bbox dictionary
bbox_dict = {
"xmin": xmin,
"ymin": ymin,
"width": bbox_width,
"height": bbox_height
}
# Append data to lists
filename_list.append(filename_value)
image_id_list.append(image_id)
width_list.append(width_value)
height_list.append(height_value)
category_name_list.append(category_name)
bbox_list.append(bbox_dict)
# Create DataFrame
df = pd.DataFrame({
'filename': filename_list,
'image_id': image_id_list,
'width': width_list,
'height': height_list,
'category_name': category_name_list,
'bbox': bbox_list
})
return df
categories = [
{'id': 1, 'name': 'Active_IC'},
{'id': 2, 'name': 'capacitor'},
{'id': 3, 'name': 'connector'},
{'id': 4, 'name': 'crystal'},
{'id': 5, 'name': 'diode'},
{'id': 6, 'name': 'gnd'},
{'id': 7, 'name': 'inductor'},
{'id': 8, 'name': 'led'},
{'id': 9, 'name': 'misc'},
{'id': 10, 'name': 'nmos'},
{'id': 11, 'name': 'npn'},
{'id': 12, 'name': 'pmos'},
{'id': 13, 'name': 'pnp'},
{'id': 14, 'name': 'pwr'},
{'id': 15, 'name': 'pwr_connector'},
{'id': 16, 'name': 'resistor'},
{'id': 17, 'name': 'switch'}
]
def dataframe_to_coco_format(df):
# Initialize COCO format dictionary
coco_format = {
"info": {
"description": "COCO format dataset",
"version": "1.0",
"year": 2024,
"contributor": "Anonymous",
"date_created": "2024/06/30"
},
"licenses": [],
"categories": categories,
"images": [],
"annotations": []
}
# Track image IDs to ensure uniqueness
image_id_map = {}
# Iterate over DataFrame rows
for idx, row in df.iterrows():
image_id = row['image_id']
filename = row['filename']
width = row['width']
height = row['height']
category_name = row['category_name']
bbox = row['bbox']
# Add image information if not already added
if image_id not in image_id_map:
image_id_map[image_id] = len(coco_format['images']) + 1 # COCO image ID starts from 1
coco_format['images'].append({
'id': image_id_map[image_id],
'file_name': filename,
'width': width,
'height': height
})
# Find category ID
category_id = [cat['id'] for cat in categories if cat['name'] == category_name][0]
# Add annotation information
coco_format['annotations'].append({
'id': len(coco_format['annotations']) + 1, # COCO annotation ID starts from 1
'image_id': image_id_map[image_id],
'category_id': category_id,
'bbox': [bbox['xmin'], bbox['ymin'], bbox['width'], bbox['height']],
'area': bbox['width'] * bbox['height'],
'iscrowd': 0 # Assuming no crowds in the dataset
})
return coco_format
# Example usage:
# json_directory = '/content/drive/MyDrive/final/full_bboxcnn_data/annots' # Replace with the directory containing your JSON files
# df = jsons_to_dataframe(json_directory)
# # Example usage:
# # Assuming `df` is your pandas DataFrame obtained from `jsons_to_dataframe` function
# # Convert DataFrame to COCO format
# coco_data = dataframe_to_coco_format(df)
# # Save COCO format JSON to a file
# output_json_file = '/content/drive/MyDrive/final/circuit/val/val_coco_format.json'
# with open(output_json_file, 'w') as f:
# json.dump(coco_data, f)