File size: 3,746 Bytes
01aa4a0 b24d218 01aa4a0 b24d218 64ec0f2 b24d218 64ec0f2 b24d218 64ec0f2 b24d218 64ec0f2 b24d218 8c3a4fd 64ec0f2 8c3a4fd b24d218 a967175 b24d218 01aa4a0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
---
---
license: apache-2.0
tags:
- image-segmentation
- image-matting
- background-removal
- computer-vision
- custom-architecture
library_name: transformers
pipeline_tag: image-segmentation
---
# MODNet – Fast Image Matting for Background Removal
This repository provides a Hugging Face–compatible version of **MODNet (Mobile Object Detection Network)** for fast and high-quality foreground matting and background removal.
The model is designed to produce **pixel-perfect alpha mattes**, handling fine details such as hair, fabric edges, and soft shadows, while remaining efficient enough for real-time CPU inference.
---
## 🔹 Model Details
- **Architecture**: MODNet (custom architecture)
- **Task**: Image matting / background removal
- **Framework**: PyTorch
- **Backbone**: MobileNetV2
- **Input**: RGB image tensor `(B, 3, H, W)`
- **Output**: `(semantic, detail, matte)` predictions
## How to Load the Model
```python
from transformers import AutoModel
model = AutoModel.from_pretrained(
"boopathiraj/MODNet",
trust_remote_code=True
)
model.eval()
```
## Example Inference
```python
import torch
import cv2
import numpy as np
from PIL import Image
from torchvision import transforms
# Preprocess
def preprocess(image_path, ref_size=512):
image = Image.open(image_path).convert("RGB")
w, h = image.size
# Resize while maintaining aspect ratio
scale = ref_size / max(h, w)
new_w, new_h = int(w * scale), int(h * scale)
image_resized = image.resize((new_w, new_h), Image.BILINEAR)
# Create a new blank image of ref_size x ref_size and paste the resized image
# This will pad the image to ref_size x ref_size
padded_image = Image.new("RGB", (ref_size, ref_size), (0, 0, 0)) # Black padding
padded_image.paste(image_resized, ((ref_size - new_w) // 2, (ref_size - new_h) // 2))
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5])
])
tensor = transform(padded_image).unsqueeze(0)
return tensor, (w, h)
# Run inference
inp, original_size = preprocess("/content/portrait.jpg")
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
inp = inp.to(device) # Assign the tensor to the device
with torch.no_grad():
semantic, detail, matte = model(inp, True)
matte = matte[0, 0].cpu().numpy()
matte = cv2.resize(matte, original_size)
# Save alpha matte
matte = (matte * 255).astype(np.uint8)
Image.fromarray(matte).save("alpha_matte.png")
```
## Visualization
```python
import numpy as np
from PIL import Image
def combined_display(image, matte):
# calculate display resolution
w, h = image.width, image.height
rw, rh = 800, int(h * 800 / (3 * w))
# obtain predicted foreground
image = np.asarray(image)
if len(image.shape) == 2:
image = image[:, :, None]
if image.shape[2] == 1:
image = np.repeat(image, 3, axis=2)
elif image.shape[2] == 4:
image = image[:, :, 0:3]
matte = np.repeat(np.asarray(matte)[:, :, None], 3, axis=2) / 255
foreground = image * matte + np.full(image.shape, 255) * (1 - matte)
# combine image, foreground, and alpha into one line
combined = np.concatenate((image, foreground, matte * 255), axis=1)
combined = Image.fromarray(np.uint8(combined)).resize((rw, rh))
return combined
# visualize all images
image_names = os.listdir(input_folder)
for image_name in image_names:
matte_name = image_name.split('.')[0] + '.png'
image = Image.open(os.path.join(input_folder, image_name))
matte = Image.open(os.path.join(output_folder, matte_name))
display(combined_display(image, matte))
print(image_name, '\n')
```
---
|