Spaces:
Sleeping
Sleeping
File size: 5,490 Bytes
1b7e337 968f0c3 1b7e337 60d260f 94c65ad 43353bb 968f0c3 1b7e337 8a73c52 b403896 8a73c52 487a9af 1b7e337 487a9af 1b7e337 e4a77bc 1b7e337 487a9af 43353bb 487a9af 1b7e337 487a9af 8a73c52 487a9af b403896 8a73c52 1b7e337 b403896 1b7e337 8a73c52 1b7e337 b403896 487a9af 1b7e337 b403896 1b7e337 8a73c52 1b7e337 8a73c52 43353bb 1b7e337 487a9af 968f0c3 1b7e337 968f0c3 60d260f 487a9af 4704c26 b2f5c8e 968f0c3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 | from transformers import SegformerFeatureExtractor, SegformerForSemanticSegmentation
from PIL import Image, ImageDraw
import numpy as np
from torch import nn
import gradio as gr
import os
import torch
import time
feature_extractor = SegformerFeatureExtractor.from_pretrained("nvidia/segformer-b5-finetuned-cityscapes-1024-1024")
model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b5-finetuned-cityscapes-1024-1024")
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Is CUDA available: {torch.cuda.is_available()} --> {device=}")
if (torch.cuda.is_available()):
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
model.to(device)
# https://github.com/NielsRogge/Transformers-Tutorials/blob/master/SegFormer/Segformer_inference_notebook.ipynb
def cityscapes_palette():
"""Cityscapes palette for external use."""
return [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156],
[190, 153, 153], [153, 153, 153], [250, 170, 30], [220, 220, 0],
[107, 142, 35], [152, 251, 152], [70, 130, 180], [220, 20, 60],
[255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100], [0, 80, 100],
[0, 0, 230], [119, 11, 32]]
def cityscapes_classes():
"""Cityscapes class names for external use."""
return [
'road', 'sidewalk', 'building', 'wall', 'fence', 'pole',
'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky',
'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle',
'bicycle'
]
def annotation(image:ImageDraw, color_seg:np.array):
assert image.size == (1024, 1024)
assert color_seg.shape == (1024, 1024, 3)
blocks = 4 # 4x4 sub grid
step_size = 256 # sub square edge size
draw = ImageDraw.Draw(image)
sub_square_xy = [(x,y) for x in range(0, blocks * step_size, step_size) for y in range(0, blocks * step_size, step_size)]
# print(f"{sub_square_xy=}")
for (x,y) in sub_square_xy:
reduced_seg = color_seg.sum(axis=2) # collapsing all colors into 1024 x 1024
# print(f"{reduced_seg.shape=}")
sub_square_seg = reduced_seg[ y:y+step_size, x:x+step_size]
# print(f"{sub_square_seg.shape=}, {sub_square_seg.sum()}")
if (sub_square_seg.sum() > 100000):
print("light found at square ", x, y)
draw.rectangle([(x, y), (x + step_size, y + step_size)], outline="white", width=3)
def call(image): #nparray
start = time.time()
resized = Image.fromarray(image).resize((1024,1024))
resized_image = np.array(resized)
print(f"{np.array(resized_image).shape=}") # 1024, 1024, 3
print(f"*processing time: {(time.time() - start):.2f} s")
# resized_image = Image.fromarray(resized_image_np)
# print(f"{resized_image=}")
inputs = feature_extractor(images=resized_image, return_tensors="pt").to(device)
print(f"**processing time: {(time.time() - start):.2f} s")
outputs = model(**inputs)
logits = outputs.logits.cpu()
print(f"{logits.shape=}") # shape (batch_size, num_labels, height/4, width/4) -> 3, 19, 256 ,256
# print(f"{logits}")
print(f"***processing time: {(time.time() - start):.2f} s")
# First, rescale logits to original image size
interpolated_logits = nn.functional.interpolate(
logits,
size=[1024, 1024], #resized_image.size[::-1], # (height, width)
mode='bilinear',
align_corners=False)
print(f"{interpolated_logits.shape=}, {logits.shape=}") # 1, 19, 1024, 1024
# Second, apply argmax on the class dimension
seg = interpolated_logits.argmax(dim=1)[0]
print(f"{seg.shape=}")
color_seg = np.zeros((seg.shape[0], seg.shape[1], 3), dtype=np.uint8) # height, width, 3
print(f"{color_seg.shape=}")
for label, color in enumerate(cityscapes_palette()):
if (label == 6): color_seg[seg == label, :] = color
# Convert to BGR
color_seg = color_seg[..., ::-1]
print(f"{color_seg.shape=}")
print(f"****processing time: {(time.time() - start):.2f} s")
# Show image + mask
img = np.array(resized_image) * 0.5 + color_seg * 0.5
img = img.astype(np.uint8)
out_im_file = Image.fromarray(img)
annotation(out_im_file, color_seg)
print(f"--> processing time: {(time.time() - start):.2f} s")
return out_im_file
# original_image = Image.open("./examples/1.jpg")
# print(f"{np.array(original_image).shape=}") # eg 729, 1000, 3
# out = call(original_image)
# out.save("out2.jpeg")
title = "Traffic Light Detector"
description = "Experiment traffic light detection to evaluate the value of captcha security controls"
iface = gr.Interface(fn=call,
inputs="image",
outputs="image",
title=title,
description=description,
examples=[
os.path.join(os.path.dirname(__file__), "examples/1.jpg"),
os.path.join(os.path.dirname(__file__), "examples/2.jpg"),
os.path.join(os.path.dirname(__file__), "examples/3.jpg"),
os.path.join(os.path.dirname(__file__), "examples/4.jpg"),
os.path.join(os.path.dirname(__file__), "examples/5.jpg"),
os.path.join(os.path.dirname(__file__), "examples/6.jpg"),
],
thumbnail="thumbnail.webp")
iface.launch() |