Spaces:
Sleeping
Sleeping
File size: 3,228 Bytes
b0a9355 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 | import torch
import torchvision.transforms as T
from torchvision.models.detection import maskrcnn_resnet50_fpn
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import uuid
import os
import cv2
import json
input_images_dir = 'data/input_images/'
segmented_objects_dir = 'data/segmented_objects/'
os.makedirs(input_images_dir, exist_ok=True)
os.makedirs(segmented_objects_dir, exist_ok=True)
#Loading the model
def load_model():
model = maskrcnn_resnet50_fpn(pretrained=True)
# Using a different backbone
#model = maskrcnn_resnet50_fpn(pretrained=False, pretrained_backbone=False, backbone_name='resnext50_32x4d')
model.eval()
"""
We have set this to evaluation mode,
because we have loaded a pretrained model
so we must deactivate dropout layers and other
training-specific behaviors.
"""
return model
model = load_model() #model initialization
def transform_image(image):
transform = T.Compose([
T.Resize((256, 256)), # Resize to match model input
T.ToTensor(), # Convert to torch tensor
T.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) # Normalize
])
return transform(image).unsqueeze(0) # Add batch dimension to get [1,C,H,W] #C is channels, RGB has 3, greyscale has 1
# # Test image transformation
# image_path = "D:\multiobject.jpeg" # Replace with the path to your image
# image_tensor = transform_image(image_path)
def run_inference(model,image_tensor):
with torch.no_grad():
outputs = model(image_tensor)
return outputs
def extract_object(image, mask):
img_np = np.array(image)
# Resize mask to match image dimensions
mask_resized = cv2.resize(mask, (img_np.shape[1], img_np.shape[0]), interpolation=cv2.INTER_NEAREST)
# Create an empty image with the same dimensions as the original image
object_img = np.zeros_like(img_np)
# Apply the mask to the image
for c in range(3): # Assuming image has 3 channels (RGB)
object_img[:, :, c] = img_np[:, :, c] * mask_resized
return Image.fromarray(object_img)
# def extract_object(image, mask):
# object_img = Image.fromarray((np.array(image) * mask[:, :, None]).astype(np.uint8))
# return object_img
# Save the input image
def save_input_image(image, master_id):
input_image_path = os.path.join(input_images_dir, f'{master_id}.png')
image.save(input_image_path)
return input_image_path
# Save the extracted objects and their metadata
def save_objects_and_metadata(extracted_objects, master_id):
object_metadata = []
for i, obj_img in enumerate(extracted_objects):
object_id = str(uuid.uuid4())
object_image_path = os.path.join(segmented_objects_dir, f'{object_id}.png')
obj_img.save(object_image_path)
metadata = {
'object_id': object_id,
'master_id': master_id,
'object_image_path': object_image_path
}
object_metadata.append(metadata)
metadata_file = os.path.join(segmented_objects_dir, f'{master_id}_metadata.json')
with open(metadata_file, 'w') as f:
json.dump(object_metadata, f, indent=4)
return object_metadata |