from glob import glob import pandas as pd import json import os import shutil def jsons_to_dataframe(json_dir): # Initialize lists to store data filename_list = [] image_id_list = [] width_list = [] height_list = [] category_name_list = [] bbox_list = [] # Iterate over each JSON file in the directory for filename in os.listdir(json_dir): image_id = int(filename.split('/')[-1].split('.')[0]) if (filename.endswith('.json') and image_id>=1700): image_dir = json_dir.replace('annots', 'images') image_filename = filename.split('/')[-1].replace('.json', '.png') shutil.copy2(f"{image_dir}/{image_filename}", "/content/drive/MyDrive/final/circuit/val/") json_file = os.path.join(json_dir, filename) # Load JSON data from file with open(json_file, 'r') as f: data = json.load(f) # Extract relevant data from JSON filename_value = image_filename#data['filename'] width_value = int(data['size']['width']) height_value = int(data['size']['height']) # Process each object in the JSON data for obj in data['object']: category_name = obj['name'] xmin = int(float(obj['bndbox']['xmin'])) ymin = int(float(obj['bndbox']['ymin'])) xmax = int(float(obj['bndbox']['xmax'])) ymax = int(float(obj['bndbox']['ymax'])) # Calculate width and height of the bbox bbox_width = xmax - xmin bbox_height = ymax - ymin # Create bbox dictionary bbox_dict = { "xmin": xmin, "ymin": ymin, "width": bbox_width, "height": bbox_height } # Append data to lists filename_list.append(filename_value) image_id_list.append(image_id) width_list.append(width_value) height_list.append(height_value) category_name_list.append(category_name) bbox_list.append(bbox_dict) # Create DataFrame df = pd.DataFrame({ 'filename': filename_list, 'image_id': image_id_list, 'width': width_list, 'height': height_list, 'category_name': category_name_list, 'bbox': bbox_list }) return df categories = [ {'id': 1, 'name': 'Active_IC'}, {'id': 2, 'name': 'capacitor'}, {'id': 3, 'name': 'connector'}, {'id': 4, 'name': 'crystal'}, {'id': 5, 'name': 'diode'}, {'id': 6, 'name': 'gnd'}, {'id': 7, 'name': 'inductor'}, {'id': 8, 'name': 'led'}, {'id': 9, 'name': 'misc'}, {'id': 10, 'name': 'nmos'}, {'id': 11, 'name': 'npn'}, {'id': 12, 'name': 'pmos'}, {'id': 13, 'name': 'pnp'}, {'id': 14, 'name': 'pwr'}, {'id': 15, 'name': 'pwr_connector'}, {'id': 16, 'name': 'resistor'}, {'id': 17, 'name': 'switch'} ] def dataframe_to_coco_format(df): # Initialize COCO format dictionary coco_format = { "info": { "description": "COCO format dataset", "version": "1.0", "year": 2024, "contributor": "Anonymous", "date_created": "2024/06/30" }, "licenses": [], "categories": categories, "images": [], "annotations": [] } # Track image IDs to ensure uniqueness image_id_map = {} # Iterate over DataFrame rows for idx, row in df.iterrows(): image_id = row['image_id'] filename = row['filename'] width = row['width'] height = row['height'] category_name = row['category_name'] bbox = row['bbox'] # Add image information if not already added if image_id not in image_id_map: image_id_map[image_id] = len(coco_format['images']) + 1 # COCO image ID starts from 1 coco_format['images'].append({ 'id': image_id_map[image_id], 'file_name': filename, 'width': width, 'height': height }) # Find category ID category_id = [cat['id'] for cat in categories if cat['name'] == category_name][0] # Add annotation information coco_format['annotations'].append({ 'id': len(coco_format['annotations']) + 1, # COCO annotation ID starts from 1 'image_id': image_id_map[image_id], 'category_id': category_id, 'bbox': [bbox['xmin'], bbox['ymin'], bbox['width'], bbox['height']], 'area': bbox['width'] * bbox['height'], 'iscrowd': 0 # Assuming no crowds in the dataset }) return coco_format # Example usage: # json_directory = '/content/drive/MyDrive/final/full_bboxcnn_data/annots' # Replace with the directory containing your JSON files # df = jsons_to_dataframe(json_directory) # # Example usage: # # Assuming `df` is your pandas DataFrame obtained from `jsons_to_dataframe` function # # Convert DataFrame to COCO format # coco_data = dataframe_to_coco_format(df) # # Save COCO format JSON to a file # output_json_file = '/content/drive/MyDrive/final/circuit/val/val_coco_format.json' # with open(output_json_file, 'w') as f: # json.dump(coco_data, f)