whitney0507
/

unet-model

computer-vision

Model card Files Files and versions

unet-model / handler.py

whitney0507's picture

Update handler.py

80d6002 verified 10 months ago

history blame contribute delete

4.21 kB

	import torch
	import torch.nn as nn
	from torchvision import transforms
	from PIL import Image
	from huggingface_hub import hf_hub_download
	import io
	import base64
	import numpy as np

	# --- Basic UNet Components ---
	class DoubleConv(nn.Module):
	def __init__(self, in_channels, out_channels):
	super().__init__()
	self.double_conv = nn.Sequential(
	nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
	nn.ReLU(inplace=True),
	nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
	nn.ReLU(inplace=True)
	)

	def forward(self, x):
	return self.double_conv(x)

	class Down(nn.Module):
	def __init__(self, in_channels, out_channels):
	super().__init__()
	self.maxpool_conv = nn.Sequential(
	nn.MaxPool2d(2),
	DoubleConv(in_channels, out_channels)
	)

	def forward(self, x):
	return self.maxpool_conv(x)

	class Up(nn.Module):
	def __init__(self, in_channels, out_channels, bilinear=True):
	super().__init__()
	if bilinear:
	self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
	else:
	self.up = nn.ConvTranspose2d(in_channels // 2, in_channels // 2, kernel_size=2, stride=2)
	self.conv = DoubleConv(in_channels, out_channels)

	def forward(self, x1, x2):
	x1 = self.up(x1)
	diffY = x2.size()[2] - x1.size()[2]
	diffX = x2.size()[3] - x1.size()[3]
	x1 = nn.functional.pad(x1, [diffX // 2, diffX - diffX // 2,
	diffY // 2, diffY - diffY // 2])
	x = torch.cat([x2, x1], dim=1)
	return self.conv(x)

	class OutConv(nn.Module):
	def __init__(self, in_channels, out_channels):
	super().__init__()
	self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)

	def forward(self, x):
	return self.conv(x)

	# --- Full UNet ---
	class UNet(nn.Module):
	def __init__(self, n_channels=3, n_classes=1, bilinear=True):
	super().__init__()
	self.n_channels = n_channels
	self.n_classes = n_classes
	self.bilinear = bilinear

	self.inc = DoubleConv(n_channels, 64)
	self.down1 = Down(64, 128)
	self.down2 = Down(128, 256)
	self.down3 = Down(256, 512)
	factor = 2 if bilinear else 1
	self.down4 = Down(512, 1024 // factor)
	self.up1 = Up(1024, 512 // factor, bilinear)
	self.up2 = Up(512, 256 // factor, bilinear)
	self.up3 = Up(256, 128 // factor, bilinear)
	self.up4 = Up(128, 64, bilinear)
	self.outc = OutConv(64, n_classes)

	def forward(self, x):
	x1 = self.inc(x)
	x2 = self.down1(x1)
	x3 = self.down2(x2)
	x4 = self.down3(x3)
	x5 = self.down4(x4)
	x = self.up1(x5, x4)
	x = self.up2(x, x3)
	x = self.up3(x, x2)
	x = self.up4(x, x1)
	logits = self.outc(x)
	return torch.sigmoid(logits)

	# --- EndpointHandler for Hugging Face Inference Endpoint ---
	class EndpointHandler:
	def __init__(self, path=""):
	model_path = hf_hub_download(repo_id="whitney0507/unet-model", filename="UNet_Model.pth")
	self.model = UNet()
	state_dict = torch.load(model_path, map_location=torch.device("cpu"))
	self.model.load_state_dict(state_dict)
	self.model.eval()
	self.transform = transforms.Compose([
	transforms.Resize((256, 256)),
	transforms.ToTensor()
	])

	def __call__(self, data):
	image_bytes = base64.b64decode(data["inputs"])
	image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
	input_tensor = self.transform(image).unsqueeze(0)

	with torch.no_grad():
	output = self.model(input_tensor)
	mask = (output > 0.5).int().squeeze().cpu().numpy()

	# Ensure mask is in uint8 format for image encoding
	result_img = Image.fromarray((mask * 255).astype(np.uint8))
	buffer = io.BytesIO()
	result_img.save(buffer, format="PNG")
	encoded_output = base64.b64encode(buffer.getvalue()).decode("utf-8")
	return {"prediction": encoded_output}