File size: 5,422 Bytes

1512fcc

from glob import glob
import pandas as pd
import json
import os
import shutil

def jsons_to_dataframe(json_dir):
    # Initialize lists to store data
    filename_list = []
    image_id_list = []
    width_list = []
    height_list = []
    category_name_list = []
    bbox_list = []

    # Iterate over each JSON file in the directory
    for filename in os.listdir(json_dir):
        image_id = int(filename.split('/')[-1].split('.')[0])
        if (filename.endswith('.json') and image_id>=1700):
            image_dir = json_dir.replace('annots', 'images')
            image_filename = filename.split('/')[-1].replace('.json', '.png')
            shutil.copy2(f"{image_dir}/{image_filename}", "/content/drive/MyDrive/final/circuit/val/")
            json_file = os.path.join(json_dir, filename)

            # Load JSON data from file
            with open(json_file, 'r') as f:
                data = json.load(f)

            # Extract relevant data from JSON
            filename_value = image_filename#data['filename']
            width_value = int(data['size']['width'])
            height_value = int(data['size']['height'])

            # Process each object in the JSON data
            for obj in data['object']:
                category_name = obj['name']
                xmin = int(float(obj['bndbox']['xmin']))
                ymin = int(float(obj['bndbox']['ymin']))
                xmax = int(float(obj['bndbox']['xmax']))
                ymax = int(float(obj['bndbox']['ymax']))

                # Calculate width and height of the bbox
                bbox_width = xmax - xmin
                bbox_height = ymax - ymin

                # Create bbox dictionary
                bbox_dict = {
                    "xmin": xmin,
                    "ymin": ymin,
                    "width": bbox_width,
                    "height": bbox_height
                }

                # Append data to lists
                filename_list.append(filename_value)
                image_id_list.append(image_id)
                width_list.append(width_value)
                height_list.append(height_value)
                category_name_list.append(category_name)
                bbox_list.append(bbox_dict)

    # Create DataFrame
    df = pd.DataFrame({
        'filename': filename_list,
        'image_id': image_id_list,
        'width': width_list,
        'height': height_list,
        'category_name': category_name_list,
        'bbox': bbox_list
    })

    return df


categories = [
    {'id': 1, 'name': 'Active_IC'},
    {'id': 2, 'name': 'capacitor'},
    {'id': 3, 'name': 'connector'},
    {'id': 4, 'name': 'crystal'},
    {'id': 5, 'name': 'diode'},
    {'id': 6, 'name': 'gnd'},
    {'id': 7, 'name': 'inductor'},
    {'id': 8, 'name': 'led'},
    {'id': 9, 'name': 'misc'},
    {'id': 10, 'name': 'nmos'},
    {'id': 11, 'name': 'npn'},
    {'id': 12, 'name': 'pmos'},
    {'id': 13, 'name': 'pnp'},
    {'id': 14, 'name': 'pwr'},
    {'id': 15, 'name': 'pwr_connector'},
    {'id': 16, 'name': 'resistor'},
    {'id': 17, 'name': 'switch'}
]

def dataframe_to_coco_format(df):
    # Initialize COCO format dictionary
    coco_format = {
        "info": {
            "description": "COCO format dataset",
            "version": "1.0",
            "year": 2024,
            "contributor": "Anonymous",
            "date_created": "2024/06/30"
        },
        "licenses": [],
        "categories": categories,
        "images": [],
        "annotations": []
    }

    # Track image IDs to ensure uniqueness
    image_id_map = {}

    # Iterate over DataFrame rows
    for idx, row in df.iterrows():
        image_id = row['image_id']
        filename = row['filename']
        width = row['width']
        height = row['height']
        category_name = row['category_name']
        bbox = row['bbox']

        # Add image information if not already added
        if image_id not in image_id_map:
            image_id_map[image_id] = len(coco_format['images']) + 1  # COCO image ID starts from 1
            coco_format['images'].append({
                'id': image_id_map[image_id],
                'file_name': filename,
                'width': width,
                'height': height
            })

        # Find category ID
        category_id = [cat['id'] for cat in categories if cat['name'] == category_name][0]

        # Add annotation information
        coco_format['annotations'].append({
            'id': len(coco_format['annotations']) + 1,  # COCO annotation ID starts from 1
            'image_id': image_id_map[image_id],
            'category_id': category_id,
            'bbox': [bbox['xmin'], bbox['ymin'], bbox['width'], bbox['height']],
            'area': bbox['width'] * bbox['height'],
            'iscrowd': 0  # Assuming no crowds in the dataset
        })

    return coco_format



# Example usage:
# json_directory = '/content/drive/MyDrive/final/full_bboxcnn_data/annots'  # Replace with the directory containing your JSON files
# df = jsons_to_dataframe(json_directory)
# # Example usage:
# # Assuming `df` is your pandas DataFrame obtained from `jsons_to_dataframe` function

# # Convert DataFrame to COCO format
# coco_data = dataframe_to_coco_format(df)

# # Save COCO format JSON to a file
# output_json_file = '/content/drive/MyDrive/final/circuit/val/val_coco_format.json'
# with open(output_json_file, 'w') as f:
#     json.dump(coco_data, f)