Spaces:
Sleeping
Sleeping
option to pick any model
Browse files
README.md
CHANGED
|
@@ -11,7 +11,7 @@ license: mit
|
|
| 11 |
short_description: BiRefNet lite Background Removal on CPU
|
| 12 |
---
|
| 13 |
|
| 14 |
-
Runs `ibrhr/BiRefNet-lite-openvino-xeon-w2145
|
| 15 |
|
| 16 |
The Space accepts one uploaded image and returns:
|
| 17 |
|
|
@@ -19,3 +19,18 @@ The Space accepts one uploaded image and returns:
|
|
| 19 |
- a transparent PNG with the background removed
|
| 20 |
- processing time broken down by preprocessing, inference, and postprocessing
|
| 21 |
- runtime specs for the model variant, device, tensor shapes, and image size
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
short_description: BiRefNet lite Background Removal on CPU
|
| 12 |
---
|
| 13 |
|
| 14 |
+
Runs `ibrhr/BiRefNet-lite-openvino-xeon-w2145` with selectable OpenVINO model variants on CPU.
|
| 15 |
|
| 16 |
The Space accepts one uploaded image and returns:
|
| 17 |
|
|
|
|
| 19 |
- a transparent PNG with the background removed
|
| 20 |
- processing time broken down by preprocessing, inference, and postprocessing
|
| 21 |
- runtime specs for the model variant, device, tensor shapes, and image size
|
| 22 |
+
|
| 23 |
+
The model picker includes all 8 OpenVINO files from the model repo:
|
| 24 |
+
|
| 25 |
+
| Variant | Resolution | Benchmark |
|
| 26 |
+
|---|---:|---:|
|
| 27 |
+
| INT8 NNCF | 1024x1024 | 1272 ms / 0.79 FPS |
|
| 28 |
+
| INT8 NNCF | 512x512 | 332 ms / 3.01 FPS |
|
| 29 |
+
| FP16 | 1024x1024 | 1419 ms / 0.70 FPS |
|
| 30 |
+
| FP16 | 512x512 | 366 ms / 2.73 FPS |
|
| 31 |
+
| FP32 | 1024x1024 | 1441 ms / 0.69 FPS |
|
| 32 |
+
| FP32 | 512x512 | 366 ms / 2.73 FPS |
|
| 33 |
+
| INT8 weight-only | 1024x1024 | 1440 ms / 0.69 FPS |
|
| 34 |
+
| INT8 weight-only | 512x512 | 366 ms / 2.73 FPS |
|
| 35 |
+
|
| 36 |
+
Set `MODEL_VARIANT` to change the default selection. Valid values are `int8_1024x1024`, `int8_512x512`, `fp16_1024x1024`, `fp16_512x512`, `fp32_1024x1024`, `fp32_512x512`, `int8wo_1024x1024`, and `int8wo_512x512`.
|
app.py
CHANGED
|
@@ -10,54 +10,163 @@ from openvino import Core
|
|
| 10 |
from PIL import Image, ImageOps
|
| 11 |
|
| 12 |
MODEL_REPO_ID = os.getenv("MODEL_REPO_ID", "ibrhr/BiRefNet-lite-openvino-xeon-w2145")
|
| 13 |
-
MODEL_XML = os.getenv("MODEL_XML", "openvino_fp32/birefnet_lite_1024x1024.xml")
|
| 14 |
-
MODEL_BIN = MODEL_XML.replace(".xml", ".bin")
|
| 15 |
-
MODEL_VARIANT = "birefnet_lite_1024x1024"
|
| 16 |
-
MODEL_SIZE = 1024
|
| 17 |
DEVICE = os.getenv("OPENVINO_DEVICE", "CPU")
|
|
|
|
| 18 |
|
| 19 |
IMAGENET_MEAN = np.array([0.485, 0.456, 0.406], dtype=np.float32)
|
| 20 |
IMAGENET_STD = np.array([0.229, 0.224, 0.225], dtype=np.float32)
|
| 21 |
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
@dataclass(frozen=True)
|
| 24 |
class Runtime:
|
| 25 |
compiled_model: object
|
| 26 |
input_node: object
|
| 27 |
output_node: object
|
|
|
|
| 28 |
model_path: str
|
| 29 |
load_seconds: float
|
| 30 |
device: str
|
| 31 |
|
| 32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
def _resampling(name: str) -> int:
|
| 34 |
return getattr(Image.Resampling, name)
|
| 35 |
|
| 36 |
|
| 37 |
-
@lru_cache(maxsize=
|
| 38 |
-
def get_runtime() -> Runtime:
|
|
|
|
| 39 |
started = time.perf_counter()
|
| 40 |
-
model_path = hf_hub_download(repo_id=MODEL_REPO_ID, filename=
|
| 41 |
-
weights_path = hf_hub_download(repo_id=MODEL_REPO_ID, filename=
|
| 42 |
|
| 43 |
core = Core()
|
| 44 |
model = core.read_model(model=model_path, weights=weights_path)
|
| 45 |
-
model.reshape({model.input(0): [1, 3,
|
| 46 |
compiled_model = core.compile_model(model, DEVICE)
|
| 47 |
|
| 48 |
return Runtime(
|
| 49 |
compiled_model=compiled_model,
|
| 50 |
input_node=compiled_model.input(0),
|
| 51 |
output_node=compiled_model.output(0),
|
|
|
|
| 52 |
model_path=model_path,
|
| 53 |
load_seconds=time.perf_counter() - started,
|
| 54 |
device=DEVICE,
|
| 55 |
)
|
| 56 |
|
| 57 |
|
| 58 |
-
def preprocess(image: Image.Image) -> np.ndarray:
|
| 59 |
rgb_image = ImageOps.exif_transpose(image).convert("RGB")
|
| 60 |
-
resized = rgb_image.resize((
|
| 61 |
array = np.asarray(resized, dtype=np.float32) / 255.0
|
| 62 |
array = (array - IMAGENET_MEAN) / IMAGENET_STD
|
| 63 |
array = np.transpose(array, (2, 0, 1))[None, ...]
|
|
@@ -80,16 +189,17 @@ def postprocess_mask(output: np.ndarray, size: tuple[int, int]) -> Image.Image:
|
|
| 80 |
return mask_image.resize(size, _resampling("LANCZOS"))
|
| 81 |
|
| 82 |
|
| 83 |
-
def remove_background(image: Image.Image):
|
| 84 |
if image is None:
|
| 85 |
raise gr.Error("Upload an image first.")
|
| 86 |
|
| 87 |
total_started = time.perf_counter()
|
| 88 |
-
|
|
|
|
| 89 |
original = ImageOps.exif_transpose(image).convert("RGB")
|
| 90 |
|
| 91 |
preprocess_started = time.perf_counter()
|
| 92 |
-
tensor = preprocess(original)
|
| 93 |
preprocess_seconds = time.perf_counter() - preprocess_started
|
| 94 |
|
| 95 |
inference_started = time.perf_counter()
|
|
@@ -112,10 +222,15 @@ def remove_background(image: Image.Image):
|
|
| 112 |
|
| 113 |
specs = {
|
| 114 |
"model": MODEL_REPO_ID,
|
| 115 |
-
"variant":
|
|
|
|
|
|
|
| 116 |
"device": runtime.device,
|
| 117 |
-
"precision":
|
| 118 |
-
"model_input_size": f"{
|
|
|
|
|
|
|
|
|
|
| 119 |
"uploaded_image_size": f"{original.width}x{original.height}",
|
| 120 |
"input_tensor_shape": list(tensor.shape),
|
| 121 |
"output_tensor_shape": list(np.asarray(output).shape),
|
|
@@ -132,6 +247,12 @@ with gr.Blocks(title="BiRefNet OpenVINO") as demo:
|
|
| 132 |
gr.Markdown("# BiRefNet OpenVINO")
|
| 133 |
with gr.Row():
|
| 134 |
input_image = gr.Image(label="Image", type="pil")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
run_button = gr.Button("Run", variant="primary")
|
| 136 |
with gr.Row():
|
| 137 |
mask_output = gr.Image(label="Mask", type="pil")
|
|
@@ -142,12 +263,12 @@ with gr.Blocks(title="BiRefNet OpenVINO") as demo:
|
|
| 142 |
|
| 143 |
run_button.click(
|
| 144 |
fn=remove_background,
|
| 145 |
-
inputs=input_image,
|
| 146 |
outputs=[mask_output, cutout_output, timing_output, specs_output],
|
| 147 |
)
|
| 148 |
input_image.upload(
|
| 149 |
fn=remove_background,
|
| 150 |
-
inputs=input_image,
|
| 151 |
outputs=[mask_output, cutout_output, timing_output, specs_output],
|
| 152 |
)
|
| 153 |
|
|
|
|
| 10 |
from PIL import Image, ImageOps
|
| 11 |
|
| 12 |
MODEL_REPO_ID = os.getenv("MODEL_REPO_ID", "ibrhr/BiRefNet-lite-openvino-xeon-w2145")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
DEVICE = os.getenv("OPENVINO_DEVICE", "CPU")
|
| 14 |
+
DEFAULT_MODEL_VARIANT_KEY = os.getenv("MODEL_VARIANT", "fp32_1024x1024")
|
| 15 |
|
| 16 |
IMAGENET_MEAN = np.array([0.485, 0.456, 0.406], dtype=np.float32)
|
| 17 |
IMAGENET_STD = np.array([0.229, 0.224, 0.225], dtype=np.float32)
|
| 18 |
|
| 19 |
|
| 20 |
+
@dataclass(frozen=True)
|
| 21 |
+
class ModelVariant:
|
| 22 |
+
key: str
|
| 23 |
+
label: str
|
| 24 |
+
xml: str
|
| 25 |
+
precision: str
|
| 26 |
+
input_size: int
|
| 27 |
+
benchmark_ms: float
|
| 28 |
+
benchmark_fps: float
|
| 29 |
+
notes: str
|
| 30 |
+
|
| 31 |
+
@property
|
| 32 |
+
def bin(self) -> str:
|
| 33 |
+
return self.xml.replace(".xml", ".bin")
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
MODEL_VARIANTS = (
|
| 37 |
+
ModelVariant(
|
| 38 |
+
key="int8_1024x1024",
|
| 39 |
+
label="INT8 NNCF - 1024x1024 - 1272 ms / 0.79 FPS",
|
| 40 |
+
xml="openvino_int8/birefnet_lite_1024x1024_int8.xml",
|
| 41 |
+
precision="INT8 NNCF",
|
| 42 |
+
input_size=1024,
|
| 43 |
+
benchmark_ms=1272.2,
|
| 44 |
+
benchmark_fps=0.79,
|
| 45 |
+
notes="Best benchmarked full-quality option on the target CPU.",
|
| 46 |
+
),
|
| 47 |
+
ModelVariant(
|
| 48 |
+
key="int8_512x512",
|
| 49 |
+
label="INT8 NNCF - 512x512 - 332 ms / 3.01 FPS",
|
| 50 |
+
xml="openvino_int8/birefnet_lite_512x512_int8.xml",
|
| 51 |
+
precision="INT8 NNCF",
|
| 52 |
+
input_size=512,
|
| 53 |
+
benchmark_ms=332.32,
|
| 54 |
+
benchmark_fps=3.01,
|
| 55 |
+
notes="Fastest benchmarked option, with lower input resolution.",
|
| 56 |
+
),
|
| 57 |
+
ModelVariant(
|
| 58 |
+
key="fp16_1024x1024",
|
| 59 |
+
label="FP16 - 1024x1024 - 1419 ms / 0.70 FPS",
|
| 60 |
+
xml="openvino_fp16/birefnet_lite_1024x1024_fp16.xml",
|
| 61 |
+
precision="FP16",
|
| 62 |
+
input_size=1024,
|
| 63 |
+
benchmark_ms=1419.0,
|
| 64 |
+
benchmark_fps=0.70,
|
| 65 |
+
notes="Smaller weights than FP32 at full input resolution.",
|
| 66 |
+
),
|
| 67 |
+
ModelVariant(
|
| 68 |
+
key="fp16_512x512",
|
| 69 |
+
label="FP16 - 512x512 - 366 ms / 2.73 FPS",
|
| 70 |
+
xml="openvino_fp16/birefnet_lite_512x512_fp16.xml",
|
| 71 |
+
precision="FP16",
|
| 72 |
+
input_size=512,
|
| 73 |
+
benchmark_ms=365.97,
|
| 74 |
+
benchmark_fps=2.73,
|
| 75 |
+
notes="Smaller weights than FP32 at lower input resolution.",
|
| 76 |
+
),
|
| 77 |
+
ModelVariant(
|
| 78 |
+
key="fp32_1024x1024",
|
| 79 |
+
label="FP32 - 1024x1024 - 1441 ms / 0.69 FPS",
|
| 80 |
+
xml="openvino_fp32/birefnet_lite_1024x1024.xml",
|
| 81 |
+
precision="FP32",
|
| 82 |
+
input_size=1024,
|
| 83 |
+
benchmark_ms=1440.9,
|
| 84 |
+
benchmark_fps=0.69,
|
| 85 |
+
notes="Original default and reference OpenVINO precision.",
|
| 86 |
+
),
|
| 87 |
+
ModelVariant(
|
| 88 |
+
key="fp32_512x512",
|
| 89 |
+
label="FP32 - 512x512 - 366 ms / 2.73 FPS",
|
| 90 |
+
xml="openvino_fp32/birefnet_lite_512x512.xml",
|
| 91 |
+
precision="FP32",
|
| 92 |
+
input_size=512,
|
| 93 |
+
benchmark_ms=366.46,
|
| 94 |
+
benchmark_fps=2.73,
|
| 95 |
+
notes="Reference OpenVINO precision at lower input resolution.",
|
| 96 |
+
),
|
| 97 |
+
ModelVariant(
|
| 98 |
+
key="int8wo_1024x1024",
|
| 99 |
+
label="INT8 weight-only - 1024x1024 - 1440 ms / 0.69 FPS",
|
| 100 |
+
xml="openvino_int8wo/birefnet_lite_1024x1024_int8wo.xml",
|
| 101 |
+
precision="INT8 weight-only",
|
| 102 |
+
input_size=1024,
|
| 103 |
+
benchmark_ms=1439.53,
|
| 104 |
+
benchmark_fps=0.69,
|
| 105 |
+
notes="Alternative weight-only quantized full-resolution model.",
|
| 106 |
+
),
|
| 107 |
+
ModelVariant(
|
| 108 |
+
key="int8wo_512x512",
|
| 109 |
+
label="INT8 weight-only - 512x512 - 366 ms / 2.73 FPS",
|
| 110 |
+
xml="openvino_int8wo/birefnet_lite_512x512_int8wo.xml",
|
| 111 |
+
precision="INT8 weight-only",
|
| 112 |
+
input_size=512,
|
| 113 |
+
benchmark_ms=365.75,
|
| 114 |
+
benchmark_fps=2.73,
|
| 115 |
+
notes="Alternative weight-only quantized lower-resolution model.",
|
| 116 |
+
),
|
| 117 |
+
)
|
| 118 |
+
MODEL_VARIANTS_BY_KEY = {variant.key: variant for variant in MODEL_VARIANTS}
|
| 119 |
+
|
| 120 |
+
|
| 121 |
@dataclass(frozen=True)
|
| 122 |
class Runtime:
|
| 123 |
compiled_model: object
|
| 124 |
input_node: object
|
| 125 |
output_node: object
|
| 126 |
+
variant: ModelVariant
|
| 127 |
model_path: str
|
| 128 |
load_seconds: float
|
| 129 |
device: str
|
| 130 |
|
| 131 |
|
| 132 |
+
def get_model_variant(variant_key: str | None) -> ModelVariant:
|
| 133 |
+
key = variant_key or DEFAULT_MODEL_VARIANT_KEY
|
| 134 |
+
if key not in MODEL_VARIANTS_BY_KEY:
|
| 135 |
+
valid_keys = ", ".join(MODEL_VARIANTS_BY_KEY)
|
| 136 |
+
raise gr.Error(f"Unknown model variant '{key}'. Valid variants: {valid_keys}")
|
| 137 |
+
return MODEL_VARIANTS_BY_KEY[key]
|
| 138 |
+
|
| 139 |
+
|
| 140 |
def _resampling(name: str) -> int:
|
| 141 |
return getattr(Image.Resampling, name)
|
| 142 |
|
| 143 |
|
| 144 |
+
@lru_cache(maxsize=len(MODEL_VARIANTS))
|
| 145 |
+
def get_runtime(variant_key: str) -> Runtime:
|
| 146 |
+
variant = get_model_variant(variant_key)
|
| 147 |
started = time.perf_counter()
|
| 148 |
+
model_path = hf_hub_download(repo_id=MODEL_REPO_ID, filename=variant.xml)
|
| 149 |
+
weights_path = hf_hub_download(repo_id=MODEL_REPO_ID, filename=variant.bin)
|
| 150 |
|
| 151 |
core = Core()
|
| 152 |
model = core.read_model(model=model_path, weights=weights_path)
|
| 153 |
+
model.reshape({model.input(0): [1, 3, variant.input_size, variant.input_size]})
|
| 154 |
compiled_model = core.compile_model(model, DEVICE)
|
| 155 |
|
| 156 |
return Runtime(
|
| 157 |
compiled_model=compiled_model,
|
| 158 |
input_node=compiled_model.input(0),
|
| 159 |
output_node=compiled_model.output(0),
|
| 160 |
+
variant=variant,
|
| 161 |
model_path=model_path,
|
| 162 |
load_seconds=time.perf_counter() - started,
|
| 163 |
device=DEVICE,
|
| 164 |
)
|
| 165 |
|
| 166 |
|
| 167 |
+
def preprocess(image: Image.Image, model_size: int) -> np.ndarray:
|
| 168 |
rgb_image = ImageOps.exif_transpose(image).convert("RGB")
|
| 169 |
+
resized = rgb_image.resize((model_size, model_size), _resampling("BICUBIC"))
|
| 170 |
array = np.asarray(resized, dtype=np.float32) / 255.0
|
| 171 |
array = (array - IMAGENET_MEAN) / IMAGENET_STD
|
| 172 |
array = np.transpose(array, (2, 0, 1))[None, ...]
|
|
|
|
| 189 |
return mask_image.resize(size, _resampling("LANCZOS"))
|
| 190 |
|
| 191 |
|
| 192 |
+
def remove_background(image: Image.Image, model_variant_key: str):
|
| 193 |
if image is None:
|
| 194 |
raise gr.Error("Upload an image first.")
|
| 195 |
|
| 196 |
total_started = time.perf_counter()
|
| 197 |
+
variant = get_model_variant(model_variant_key)
|
| 198 |
+
runtime = get_runtime(variant.key)
|
| 199 |
original = ImageOps.exif_transpose(image).convert("RGB")
|
| 200 |
|
| 201 |
preprocess_started = time.perf_counter()
|
| 202 |
+
tensor = preprocess(original, variant.input_size)
|
| 203 |
preprocess_seconds = time.perf_counter() - preprocess_started
|
| 204 |
|
| 205 |
inference_started = time.perf_counter()
|
|
|
|
| 222 |
|
| 223 |
specs = {
|
| 224 |
"model": MODEL_REPO_ID,
|
| 225 |
+
"variant": variant.key,
|
| 226 |
+
"variant_label": variant.label,
|
| 227 |
+
"model_xml": variant.xml,
|
| 228 |
"device": runtime.device,
|
| 229 |
+
"precision": variant.precision,
|
| 230 |
+
"model_input_size": f"{variant.input_size}x{variant.input_size}",
|
| 231 |
+
"benchmark_ms": variant.benchmark_ms,
|
| 232 |
+
"benchmark_fps": variant.benchmark_fps,
|
| 233 |
+
"variant_notes": variant.notes,
|
| 234 |
"uploaded_image_size": f"{original.width}x{original.height}",
|
| 235 |
"input_tensor_shape": list(tensor.shape),
|
| 236 |
"output_tensor_shape": list(np.asarray(output).shape),
|
|
|
|
| 247 |
gr.Markdown("# BiRefNet OpenVINO")
|
| 248 |
with gr.Row():
|
| 249 |
input_image = gr.Image(label="Image", type="pil")
|
| 250 |
+
model_dropdown = gr.Dropdown(
|
| 251 |
+
label="Model variant",
|
| 252 |
+
choices=[(variant.label, variant.key) for variant in MODEL_VARIANTS],
|
| 253 |
+
value=get_model_variant(DEFAULT_MODEL_VARIANT_KEY).key,
|
| 254 |
+
interactive=True,
|
| 255 |
+
)
|
| 256 |
run_button = gr.Button("Run", variant="primary")
|
| 257 |
with gr.Row():
|
| 258 |
mask_output = gr.Image(label="Mask", type="pil")
|
|
|
|
| 263 |
|
| 264 |
run_button.click(
|
| 265 |
fn=remove_background,
|
| 266 |
+
inputs=[input_image, model_dropdown],
|
| 267 |
outputs=[mask_output, cutout_output, timing_output, specs_output],
|
| 268 |
)
|
| 269 |
input_image.upload(
|
| 270 |
fn=remove_background,
|
| 271 |
+
inputs=[input_image, model_dropdown],
|
| 272 |
outputs=[mask_output, cutout_output, timing_output, specs_output],
|
| 273 |
)
|
| 274 |
|