ibrhrsw commited on
Commit
ea84176
·
1 Parent(s): e7a3343

option to pick any model

Browse files
Files changed (2) hide show
  1. README.md +16 -1
  2. app.py +140 -19
README.md CHANGED
@@ -11,7 +11,7 @@ license: mit
11
  short_description: BiRefNet lite Background Removal on CPU
12
  ---
13
 
14
- Runs `ibrhr/BiRefNet-lite-openvino-xeon-w2145)` with the `fp32_1024x1024` OpenVINO model on CPU.
15
 
16
  The Space accepts one uploaded image and returns:
17
 
@@ -19,3 +19,18 @@ The Space accepts one uploaded image and returns:
19
  - a transparent PNG with the background removed
20
  - processing time broken down by preprocessing, inference, and postprocessing
21
  - runtime specs for the model variant, device, tensor shapes, and image size
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  short_description: BiRefNet lite Background Removal on CPU
12
  ---
13
 
14
+ Runs `ibrhr/BiRefNet-lite-openvino-xeon-w2145` with selectable OpenVINO model variants on CPU.
15
 
16
  The Space accepts one uploaded image and returns:
17
 
 
19
  - a transparent PNG with the background removed
20
  - processing time broken down by preprocessing, inference, and postprocessing
21
  - runtime specs for the model variant, device, tensor shapes, and image size
22
+
23
+ The model picker includes all 8 OpenVINO files from the model repo:
24
+
25
+ | Variant | Resolution | Benchmark |
26
+ |---|---:|---:|
27
+ | INT8 NNCF | 1024x1024 | 1272 ms / 0.79 FPS |
28
+ | INT8 NNCF | 512x512 | 332 ms / 3.01 FPS |
29
+ | FP16 | 1024x1024 | 1419 ms / 0.70 FPS |
30
+ | FP16 | 512x512 | 366 ms / 2.73 FPS |
31
+ | FP32 | 1024x1024 | 1441 ms / 0.69 FPS |
32
+ | FP32 | 512x512 | 366 ms / 2.73 FPS |
33
+ | INT8 weight-only | 1024x1024 | 1440 ms / 0.69 FPS |
34
+ | INT8 weight-only | 512x512 | 366 ms / 2.73 FPS |
35
+
36
+ Set `MODEL_VARIANT` to change the default selection. Valid values are `int8_1024x1024`, `int8_512x512`, `fp16_1024x1024`, `fp16_512x512`, `fp32_1024x1024`, `fp32_512x512`, `int8wo_1024x1024`, and `int8wo_512x512`.
app.py CHANGED
@@ -10,54 +10,163 @@ from openvino import Core
10
  from PIL import Image, ImageOps
11
 
12
  MODEL_REPO_ID = os.getenv("MODEL_REPO_ID", "ibrhr/BiRefNet-lite-openvino-xeon-w2145")
13
- MODEL_XML = os.getenv("MODEL_XML", "openvino_fp32/birefnet_lite_1024x1024.xml")
14
- MODEL_BIN = MODEL_XML.replace(".xml", ".bin")
15
- MODEL_VARIANT = "birefnet_lite_1024x1024"
16
- MODEL_SIZE = 1024
17
  DEVICE = os.getenv("OPENVINO_DEVICE", "CPU")
 
18
 
19
  IMAGENET_MEAN = np.array([0.485, 0.456, 0.406], dtype=np.float32)
20
  IMAGENET_STD = np.array([0.229, 0.224, 0.225], dtype=np.float32)
21
 
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  @dataclass(frozen=True)
24
  class Runtime:
25
  compiled_model: object
26
  input_node: object
27
  output_node: object
 
28
  model_path: str
29
  load_seconds: float
30
  device: str
31
 
32
 
 
 
 
 
 
 
 
 
33
  def _resampling(name: str) -> int:
34
  return getattr(Image.Resampling, name)
35
 
36
 
37
- @lru_cache(maxsize=1)
38
- def get_runtime() -> Runtime:
 
39
  started = time.perf_counter()
40
- model_path = hf_hub_download(repo_id=MODEL_REPO_ID, filename=MODEL_XML)
41
- weights_path = hf_hub_download(repo_id=MODEL_REPO_ID, filename=MODEL_BIN)
42
 
43
  core = Core()
44
  model = core.read_model(model=model_path, weights=weights_path)
45
- model.reshape({model.input(0): [1, 3, MODEL_SIZE, MODEL_SIZE]})
46
  compiled_model = core.compile_model(model, DEVICE)
47
 
48
  return Runtime(
49
  compiled_model=compiled_model,
50
  input_node=compiled_model.input(0),
51
  output_node=compiled_model.output(0),
 
52
  model_path=model_path,
53
  load_seconds=time.perf_counter() - started,
54
  device=DEVICE,
55
  )
56
 
57
 
58
- def preprocess(image: Image.Image) -> np.ndarray:
59
  rgb_image = ImageOps.exif_transpose(image).convert("RGB")
60
- resized = rgb_image.resize((MODEL_SIZE, MODEL_SIZE), _resampling("BICUBIC"))
61
  array = np.asarray(resized, dtype=np.float32) / 255.0
62
  array = (array - IMAGENET_MEAN) / IMAGENET_STD
63
  array = np.transpose(array, (2, 0, 1))[None, ...]
@@ -80,16 +189,17 @@ def postprocess_mask(output: np.ndarray, size: tuple[int, int]) -> Image.Image:
80
  return mask_image.resize(size, _resampling("LANCZOS"))
81
 
82
 
83
- def remove_background(image: Image.Image):
84
  if image is None:
85
  raise gr.Error("Upload an image first.")
86
 
87
  total_started = time.perf_counter()
88
- runtime = get_runtime()
 
89
  original = ImageOps.exif_transpose(image).convert("RGB")
90
 
91
  preprocess_started = time.perf_counter()
92
- tensor = preprocess(original)
93
  preprocess_seconds = time.perf_counter() - preprocess_started
94
 
95
  inference_started = time.perf_counter()
@@ -112,10 +222,15 @@ def remove_background(image: Image.Image):
112
 
113
  specs = {
114
  "model": MODEL_REPO_ID,
115
- "variant": MODEL_VARIANT,
 
 
116
  "device": runtime.device,
117
- "precision": "FP32",
118
- "model_input_size": f"{MODEL_SIZE}x{MODEL_SIZE}",
 
 
 
119
  "uploaded_image_size": f"{original.width}x{original.height}",
120
  "input_tensor_shape": list(tensor.shape),
121
  "output_tensor_shape": list(np.asarray(output).shape),
@@ -132,6 +247,12 @@ with gr.Blocks(title="BiRefNet OpenVINO") as demo:
132
  gr.Markdown("# BiRefNet OpenVINO")
133
  with gr.Row():
134
  input_image = gr.Image(label="Image", type="pil")
 
 
 
 
 
 
135
  run_button = gr.Button("Run", variant="primary")
136
  with gr.Row():
137
  mask_output = gr.Image(label="Mask", type="pil")
@@ -142,12 +263,12 @@ with gr.Blocks(title="BiRefNet OpenVINO") as demo:
142
 
143
  run_button.click(
144
  fn=remove_background,
145
- inputs=input_image,
146
  outputs=[mask_output, cutout_output, timing_output, specs_output],
147
  )
148
  input_image.upload(
149
  fn=remove_background,
150
- inputs=input_image,
151
  outputs=[mask_output, cutout_output, timing_output, specs_output],
152
  )
153
 
 
10
  from PIL import Image, ImageOps
11
 
12
  MODEL_REPO_ID = os.getenv("MODEL_REPO_ID", "ibrhr/BiRefNet-lite-openvino-xeon-w2145")
 
 
 
 
13
  DEVICE = os.getenv("OPENVINO_DEVICE", "CPU")
14
+ DEFAULT_MODEL_VARIANT_KEY = os.getenv("MODEL_VARIANT", "fp32_1024x1024")
15
 
16
  IMAGENET_MEAN = np.array([0.485, 0.456, 0.406], dtype=np.float32)
17
  IMAGENET_STD = np.array([0.229, 0.224, 0.225], dtype=np.float32)
18
 
19
 
20
+ @dataclass(frozen=True)
21
+ class ModelVariant:
22
+ key: str
23
+ label: str
24
+ xml: str
25
+ precision: str
26
+ input_size: int
27
+ benchmark_ms: float
28
+ benchmark_fps: float
29
+ notes: str
30
+
31
+ @property
32
+ def bin(self) -> str:
33
+ return self.xml.replace(".xml", ".bin")
34
+
35
+
36
+ MODEL_VARIANTS = (
37
+ ModelVariant(
38
+ key="int8_1024x1024",
39
+ label="INT8 NNCF - 1024x1024 - 1272 ms / 0.79 FPS",
40
+ xml="openvino_int8/birefnet_lite_1024x1024_int8.xml",
41
+ precision="INT8 NNCF",
42
+ input_size=1024,
43
+ benchmark_ms=1272.2,
44
+ benchmark_fps=0.79,
45
+ notes="Best benchmarked full-quality option on the target CPU.",
46
+ ),
47
+ ModelVariant(
48
+ key="int8_512x512",
49
+ label="INT8 NNCF - 512x512 - 332 ms / 3.01 FPS",
50
+ xml="openvino_int8/birefnet_lite_512x512_int8.xml",
51
+ precision="INT8 NNCF",
52
+ input_size=512,
53
+ benchmark_ms=332.32,
54
+ benchmark_fps=3.01,
55
+ notes="Fastest benchmarked option, with lower input resolution.",
56
+ ),
57
+ ModelVariant(
58
+ key="fp16_1024x1024",
59
+ label="FP16 - 1024x1024 - 1419 ms / 0.70 FPS",
60
+ xml="openvino_fp16/birefnet_lite_1024x1024_fp16.xml",
61
+ precision="FP16",
62
+ input_size=1024,
63
+ benchmark_ms=1419.0,
64
+ benchmark_fps=0.70,
65
+ notes="Smaller weights than FP32 at full input resolution.",
66
+ ),
67
+ ModelVariant(
68
+ key="fp16_512x512",
69
+ label="FP16 - 512x512 - 366 ms / 2.73 FPS",
70
+ xml="openvino_fp16/birefnet_lite_512x512_fp16.xml",
71
+ precision="FP16",
72
+ input_size=512,
73
+ benchmark_ms=365.97,
74
+ benchmark_fps=2.73,
75
+ notes="Smaller weights than FP32 at lower input resolution.",
76
+ ),
77
+ ModelVariant(
78
+ key="fp32_1024x1024",
79
+ label="FP32 - 1024x1024 - 1441 ms / 0.69 FPS",
80
+ xml="openvino_fp32/birefnet_lite_1024x1024.xml",
81
+ precision="FP32",
82
+ input_size=1024,
83
+ benchmark_ms=1440.9,
84
+ benchmark_fps=0.69,
85
+ notes="Original default and reference OpenVINO precision.",
86
+ ),
87
+ ModelVariant(
88
+ key="fp32_512x512",
89
+ label="FP32 - 512x512 - 366 ms / 2.73 FPS",
90
+ xml="openvino_fp32/birefnet_lite_512x512.xml",
91
+ precision="FP32",
92
+ input_size=512,
93
+ benchmark_ms=366.46,
94
+ benchmark_fps=2.73,
95
+ notes="Reference OpenVINO precision at lower input resolution.",
96
+ ),
97
+ ModelVariant(
98
+ key="int8wo_1024x1024",
99
+ label="INT8 weight-only - 1024x1024 - 1440 ms / 0.69 FPS",
100
+ xml="openvino_int8wo/birefnet_lite_1024x1024_int8wo.xml",
101
+ precision="INT8 weight-only",
102
+ input_size=1024,
103
+ benchmark_ms=1439.53,
104
+ benchmark_fps=0.69,
105
+ notes="Alternative weight-only quantized full-resolution model.",
106
+ ),
107
+ ModelVariant(
108
+ key="int8wo_512x512",
109
+ label="INT8 weight-only - 512x512 - 366 ms / 2.73 FPS",
110
+ xml="openvino_int8wo/birefnet_lite_512x512_int8wo.xml",
111
+ precision="INT8 weight-only",
112
+ input_size=512,
113
+ benchmark_ms=365.75,
114
+ benchmark_fps=2.73,
115
+ notes="Alternative weight-only quantized lower-resolution model.",
116
+ ),
117
+ )
118
+ MODEL_VARIANTS_BY_KEY = {variant.key: variant for variant in MODEL_VARIANTS}
119
+
120
+
121
  @dataclass(frozen=True)
122
  class Runtime:
123
  compiled_model: object
124
  input_node: object
125
  output_node: object
126
+ variant: ModelVariant
127
  model_path: str
128
  load_seconds: float
129
  device: str
130
 
131
 
132
+ def get_model_variant(variant_key: str | None) -> ModelVariant:
133
+ key = variant_key or DEFAULT_MODEL_VARIANT_KEY
134
+ if key not in MODEL_VARIANTS_BY_KEY:
135
+ valid_keys = ", ".join(MODEL_VARIANTS_BY_KEY)
136
+ raise gr.Error(f"Unknown model variant '{key}'. Valid variants: {valid_keys}")
137
+ return MODEL_VARIANTS_BY_KEY[key]
138
+
139
+
140
  def _resampling(name: str) -> int:
141
  return getattr(Image.Resampling, name)
142
 
143
 
144
+ @lru_cache(maxsize=len(MODEL_VARIANTS))
145
+ def get_runtime(variant_key: str) -> Runtime:
146
+ variant = get_model_variant(variant_key)
147
  started = time.perf_counter()
148
+ model_path = hf_hub_download(repo_id=MODEL_REPO_ID, filename=variant.xml)
149
+ weights_path = hf_hub_download(repo_id=MODEL_REPO_ID, filename=variant.bin)
150
 
151
  core = Core()
152
  model = core.read_model(model=model_path, weights=weights_path)
153
+ model.reshape({model.input(0): [1, 3, variant.input_size, variant.input_size]})
154
  compiled_model = core.compile_model(model, DEVICE)
155
 
156
  return Runtime(
157
  compiled_model=compiled_model,
158
  input_node=compiled_model.input(0),
159
  output_node=compiled_model.output(0),
160
+ variant=variant,
161
  model_path=model_path,
162
  load_seconds=time.perf_counter() - started,
163
  device=DEVICE,
164
  )
165
 
166
 
167
+ def preprocess(image: Image.Image, model_size: int) -> np.ndarray:
168
  rgb_image = ImageOps.exif_transpose(image).convert("RGB")
169
+ resized = rgb_image.resize((model_size, model_size), _resampling("BICUBIC"))
170
  array = np.asarray(resized, dtype=np.float32) / 255.0
171
  array = (array - IMAGENET_MEAN) / IMAGENET_STD
172
  array = np.transpose(array, (2, 0, 1))[None, ...]
 
189
  return mask_image.resize(size, _resampling("LANCZOS"))
190
 
191
 
192
+ def remove_background(image: Image.Image, model_variant_key: str):
193
  if image is None:
194
  raise gr.Error("Upload an image first.")
195
 
196
  total_started = time.perf_counter()
197
+ variant = get_model_variant(model_variant_key)
198
+ runtime = get_runtime(variant.key)
199
  original = ImageOps.exif_transpose(image).convert("RGB")
200
 
201
  preprocess_started = time.perf_counter()
202
+ tensor = preprocess(original, variant.input_size)
203
  preprocess_seconds = time.perf_counter() - preprocess_started
204
 
205
  inference_started = time.perf_counter()
 
222
 
223
  specs = {
224
  "model": MODEL_REPO_ID,
225
+ "variant": variant.key,
226
+ "variant_label": variant.label,
227
+ "model_xml": variant.xml,
228
  "device": runtime.device,
229
+ "precision": variant.precision,
230
+ "model_input_size": f"{variant.input_size}x{variant.input_size}",
231
+ "benchmark_ms": variant.benchmark_ms,
232
+ "benchmark_fps": variant.benchmark_fps,
233
+ "variant_notes": variant.notes,
234
  "uploaded_image_size": f"{original.width}x{original.height}",
235
  "input_tensor_shape": list(tensor.shape),
236
  "output_tensor_shape": list(np.asarray(output).shape),
 
247
  gr.Markdown("# BiRefNet OpenVINO")
248
  with gr.Row():
249
  input_image = gr.Image(label="Image", type="pil")
250
+ model_dropdown = gr.Dropdown(
251
+ label="Model variant",
252
+ choices=[(variant.label, variant.key) for variant in MODEL_VARIANTS],
253
+ value=get_model_variant(DEFAULT_MODEL_VARIANT_KEY).key,
254
+ interactive=True,
255
+ )
256
  run_button = gr.Button("Run", variant="primary")
257
  with gr.Row():
258
  mask_output = gr.Image(label="Mask", type="pil")
 
263
 
264
  run_button.click(
265
  fn=remove_background,
266
+ inputs=[input_image, model_dropdown],
267
  outputs=[mask_output, cutout_output, timing_output, specs_output],
268
  )
269
  input_image.upload(
270
  fn=remove_background,
271
+ inputs=[input_image, model_dropdown],
272
  outputs=[mask_output, cutout_output, timing_output, specs_output],
273
  )
274