Spaces:

ibrhr
/

BiRefNet-liteopenvino

Sleeping

App Files Files Community

ibrhrsw commited on 24 days ago

Commit

ea84176

1 Parent(s): e7a3343

option to pick any model

Browse files

Files changed (2) hide show

README.md +16 -1
app.py +140 -19

README.md CHANGED Viewed

@@ -11,7 +11,7 @@ license: mit
 short_description: BiRefNet lite Background Removal on CPU
 ---
-Runs `ibrhr/BiRefNet-lite-openvino-xeon-w2145)` with the `fp32_1024x1024` OpenVINO model on CPU.
 The Space accepts one uploaded image and returns:
@@ -19,3 +19,18 @@ The Space accepts one uploaded image and returns:
 - a transparent PNG with the background removed
 - processing time broken down by preprocessing, inference, and postprocessing
 - runtime specs for the model variant, device, tensor shapes, and image size

 short_description: BiRefNet lite Background Removal on CPU
 ---
+Runs `ibrhr/BiRefNet-lite-openvino-xeon-w2145` with selectable OpenVINO model variants on CPU.
 The Space accepts one uploaded image and returns:
 - a transparent PNG with the background removed
 - processing time broken down by preprocessing, inference, and postprocessing
 - runtime specs for the model variant, device, tensor shapes, and image size
+The model picker includes all 8 OpenVINO files from the model repo:
+| Variant | Resolution | Benchmark |
+|---|---:|---:|
+| INT8 NNCF | 1024x1024 | 1272 ms / 0.79 FPS |
+| INT8 NNCF | 512x512 | 332 ms / 3.01 FPS |
+| FP16 | 1024x1024 | 1419 ms / 0.70 FPS |
+| FP16 | 512x512 | 366 ms / 2.73 FPS |
+| FP32 | 1024x1024 | 1441 ms / 0.69 FPS |
+| FP32 | 512x512 | 366 ms / 2.73 FPS |
+| INT8 weight-only | 1024x1024 | 1440 ms / 0.69 FPS |
+| INT8 weight-only | 512x512 | 366 ms / 2.73 FPS |
+Set `MODEL_VARIANT` to change the default selection. Valid values are `int8_1024x1024`, `int8_512x512`, `fp16_1024x1024`, `fp16_512x512`, `fp32_1024x1024`, `fp32_512x512`, `int8wo_1024x1024`, and `int8wo_512x512`.

app.py CHANGED Viewed

@@ -10,54 +10,163 @@ from openvino import Core
 from PIL import Image, ImageOps
 MODEL_REPO_ID = os.getenv("MODEL_REPO_ID", "ibrhr/BiRefNet-lite-openvino-xeon-w2145")
-MODEL_XML = os.getenv("MODEL_XML", "openvino_fp32/birefnet_lite_1024x1024.xml")
-MODEL_BIN = MODEL_XML.replace(".xml", ".bin")
-MODEL_VARIANT = "birefnet_lite_1024x1024"
-MODEL_SIZE = 1024
 DEVICE = os.getenv("OPENVINO_DEVICE", "CPU")
 IMAGENET_MEAN = np.array([0.485, 0.456, 0.406], dtype=np.float32)
 IMAGENET_STD = np.array([0.229, 0.224, 0.225], dtype=np.float32)
 @dataclass(frozen=True)
 class Runtime:
     compiled_model: object
     input_node: object
     output_node: object
     model_path: str
     load_seconds: float
     device: str
 def _resampling(name: str) -> int:
     return getattr(Image.Resampling, name)
-@lru_cache(maxsize=1)
-def get_runtime() -> Runtime:
     started = time.perf_counter()
-    model_path = hf_hub_download(repo_id=MODEL_REPO_ID, filename=MODEL_XML)
-    weights_path = hf_hub_download(repo_id=MODEL_REPO_ID, filename=MODEL_BIN)
     core = Core()
     model = core.read_model(model=model_path, weights=weights_path)
-    model.reshape({model.input(0): [1, 3, MODEL_SIZE, MODEL_SIZE]})
     compiled_model = core.compile_model(model, DEVICE)
     return Runtime(
         compiled_model=compiled_model,
         input_node=compiled_model.input(0),
         output_node=compiled_model.output(0),
         model_path=model_path,
         load_seconds=time.perf_counter() - started,
         device=DEVICE,
     )
-def preprocess(image: Image.Image) -> np.ndarray:
     rgb_image = ImageOps.exif_transpose(image).convert("RGB")
-    resized = rgb_image.resize((MODEL_SIZE, MODEL_SIZE), _resampling("BICUBIC"))
     array = np.asarray(resized, dtype=np.float32) / 255.0
     array = (array - IMAGENET_MEAN) / IMAGENET_STD
     array = np.transpose(array, (2, 0, 1))[None, ...]
@@ -80,16 +189,17 @@ def postprocess_mask(output: np.ndarray, size: tuple[int, int]) -> Image.Image:
     return mask_image.resize(size, _resampling("LANCZOS"))
-def remove_background(image: Image.Image):
     if image is None:
         raise gr.Error("Upload an image first.")
     total_started = time.perf_counter()
-    runtime = get_runtime()
     original = ImageOps.exif_transpose(image).convert("RGB")
     preprocess_started = time.perf_counter()
-    tensor = preprocess(original)
     preprocess_seconds = time.perf_counter() - preprocess_started
     inference_started = time.perf_counter()
@@ -112,10 +222,15 @@ def remove_background(image: Image.Image):
     specs = {
         "model": MODEL_REPO_ID,
-        "variant": MODEL_VARIANT,
         "device": runtime.device,
-        "precision": "FP32",
-        "model_input_size": f"{MODEL_SIZE}x{MODEL_SIZE}",
         "uploaded_image_size": f"{original.width}x{original.height}",
         "input_tensor_shape": list(tensor.shape),
         "output_tensor_shape": list(np.asarray(output).shape),
@@ -132,6 +247,12 @@ with gr.Blocks(title="BiRefNet OpenVINO") as demo:
     gr.Markdown("# BiRefNet OpenVINO")
     with gr.Row():
         input_image = gr.Image(label="Image", type="pil")
     run_button = gr.Button("Run", variant="primary")
     with gr.Row():
         mask_output = gr.Image(label="Mask", type="pil")
@@ -142,12 +263,12 @@ with gr.Blocks(title="BiRefNet OpenVINO") as demo:
     run_button.click(
         fn=remove_background,
-        inputs=input_image,
         outputs=[mask_output, cutout_output, timing_output, specs_output],
     )
     input_image.upload(
         fn=remove_background,
-        inputs=input_image,
         outputs=[mask_output, cutout_output, timing_output, specs_output],
     )

 from PIL import Image, ImageOps
 MODEL_REPO_ID = os.getenv("MODEL_REPO_ID", "ibrhr/BiRefNet-lite-openvino-xeon-w2145")
 DEVICE = os.getenv("OPENVINO_DEVICE", "CPU")
+DEFAULT_MODEL_VARIANT_KEY = os.getenv("MODEL_VARIANT", "fp32_1024x1024")
 IMAGENET_MEAN = np.array([0.485, 0.456, 0.406], dtype=np.float32)
 IMAGENET_STD = np.array([0.229, 0.224, 0.225], dtype=np.float32)
+@dataclass(frozen=True)
+class ModelVariant:
+    key: str
+    label: str
+    xml: str
+    precision: str
+    input_size: int
+    benchmark_ms: float
+    benchmark_fps: float
+    notes: str
+    @property
+    def bin(self) -> str:
+        return self.xml.replace(".xml", ".bin")
+MODEL_VARIANTS = (
+    ModelVariant(
+        key="int8_1024x1024",
+        label="INT8 NNCF - 1024x1024 - 1272 ms / 0.79 FPS",
+        xml="openvino_int8/birefnet_lite_1024x1024_int8.xml",
+        precision="INT8 NNCF",
+        input_size=1024,
+        benchmark_ms=1272.2,
+        benchmark_fps=0.79,
+        notes="Best benchmarked full-quality option on the target CPU.",
+    ),
+    ModelVariant(
+        key="int8_512x512",
+        label="INT8 NNCF - 512x512 - 332 ms / 3.01 FPS",
+        xml="openvino_int8/birefnet_lite_512x512_int8.xml",
+        precision="INT8 NNCF",
+        input_size=512,
+        benchmark_ms=332.32,
+        benchmark_fps=3.01,
+        notes="Fastest benchmarked option, with lower input resolution.",
+    ),
+    ModelVariant(
+        key="fp16_1024x1024",
+        label="FP16 - 1024x1024 - 1419 ms / 0.70 FPS",
+        xml="openvino_fp16/birefnet_lite_1024x1024_fp16.xml",
+        precision="FP16",
+        input_size=1024,
+        benchmark_ms=1419.0,
+        benchmark_fps=0.70,
+        notes="Smaller weights than FP32 at full input resolution.",
+    ),
+    ModelVariant(
+        key="fp16_512x512",
+        label="FP16 - 512x512 - 366 ms / 2.73 FPS",
+        xml="openvino_fp16/birefnet_lite_512x512_fp16.xml",
+        precision="FP16",
+        input_size=512,
+        benchmark_ms=365.97,
+        benchmark_fps=2.73,
+        notes="Smaller weights than FP32 at lower input resolution.",
+    ),
+    ModelVariant(
+        key="fp32_1024x1024",
+        label="FP32 - 1024x1024 - 1441 ms / 0.69 FPS",
+        xml="openvino_fp32/birefnet_lite_1024x1024.xml",
+        precision="FP32",
+        input_size=1024,
+        benchmark_ms=1440.9,
+        benchmark_fps=0.69,
+        notes="Original default and reference OpenVINO precision.",
+    ),
+    ModelVariant(
+        key="fp32_512x512",
+        label="FP32 - 512x512 - 366 ms / 2.73 FPS",
+        xml="openvino_fp32/birefnet_lite_512x512.xml",
+        precision="FP32",
+        input_size=512,
+        benchmark_ms=366.46,
+        benchmark_fps=2.73,
+        notes="Reference OpenVINO precision at lower input resolution.",
+    ),
+    ModelVariant(
+        key="int8wo_1024x1024",
+        label="INT8 weight-only - 1024x1024 - 1440 ms / 0.69 FPS",
+        xml="openvino_int8wo/birefnet_lite_1024x1024_int8wo.xml",
+        precision="INT8 weight-only",
+        input_size=1024,
+        benchmark_ms=1439.53,
+        benchmark_fps=0.69,
+        notes="Alternative weight-only quantized full-resolution model.",
+    ),
+    ModelVariant(
+        key="int8wo_512x512",
+        label="INT8 weight-only - 512x512 - 366 ms / 2.73 FPS",
+        xml="openvino_int8wo/birefnet_lite_512x512_int8wo.xml",
+        precision="INT8 weight-only",
+        input_size=512,
+        benchmark_ms=365.75,
+        benchmark_fps=2.73,
+        notes="Alternative weight-only quantized lower-resolution model.",
+    ),
+)
+MODEL_VARIANTS_BY_KEY = {variant.key: variant for variant in MODEL_VARIANTS}
 @dataclass(frozen=True)
 class Runtime:
     compiled_model: object
     input_node: object
     output_node: object
+    variant: ModelVariant
     model_path: str
     load_seconds: float
     device: str
+def get_model_variant(variant_key: str | None) -> ModelVariant:
+    key = variant_key or DEFAULT_MODEL_VARIANT_KEY
+    if key not in MODEL_VARIANTS_BY_KEY:
+        valid_keys = ", ".join(MODEL_VARIANTS_BY_KEY)
+        raise gr.Error(f"Unknown model variant '{key}'. Valid variants: {valid_keys}")
+    return MODEL_VARIANTS_BY_KEY[key]
 def _resampling(name: str) -> int:
     return getattr(Image.Resampling, name)
+@lru_cache(maxsize=len(MODEL_VARIANTS))
+def get_runtime(variant_key: str) -> Runtime:
+    variant = get_model_variant(variant_key)
     started = time.perf_counter()
+    model_path = hf_hub_download(repo_id=MODEL_REPO_ID, filename=variant.xml)
+    weights_path = hf_hub_download(repo_id=MODEL_REPO_ID, filename=variant.bin)
     core = Core()
     model = core.read_model(model=model_path, weights=weights_path)
+    model.reshape({model.input(0): [1, 3, variant.input_size, variant.input_size]})
     compiled_model = core.compile_model(model, DEVICE)
     return Runtime(
         compiled_model=compiled_model,
         input_node=compiled_model.input(0),
         output_node=compiled_model.output(0),
+        variant=variant,
         model_path=model_path,
         load_seconds=time.perf_counter() - started,
         device=DEVICE,
     )
+def preprocess(image: Image.Image, model_size: int) -> np.ndarray:
     rgb_image = ImageOps.exif_transpose(image).convert("RGB")
+    resized = rgb_image.resize((model_size, model_size), _resampling("BICUBIC"))
     array = np.asarray(resized, dtype=np.float32) / 255.0
     array = (array - IMAGENET_MEAN) / IMAGENET_STD
     array = np.transpose(array, (2, 0, 1))[None, ...]
     return mask_image.resize(size, _resampling("LANCZOS"))
+def remove_background(image: Image.Image, model_variant_key: str):
     if image is None:
         raise gr.Error("Upload an image first.")
     total_started = time.perf_counter()
+    variant = get_model_variant(model_variant_key)
+    runtime = get_runtime(variant.key)
     original = ImageOps.exif_transpose(image).convert("RGB")
     preprocess_started = time.perf_counter()
+    tensor = preprocess(original, variant.input_size)
     preprocess_seconds = time.perf_counter() - preprocess_started
     inference_started = time.perf_counter()
     specs = {
         "model": MODEL_REPO_ID,
+        "variant": variant.key,
+        "variant_label": variant.label,
+        "model_xml": variant.xml,
         "device": runtime.device,
+        "precision": variant.precision,
+        "model_input_size": f"{variant.input_size}x{variant.input_size}",
+        "benchmark_ms": variant.benchmark_ms,
+        "benchmark_fps": variant.benchmark_fps,
+        "variant_notes": variant.notes,
         "uploaded_image_size": f"{original.width}x{original.height}",
         "input_tensor_shape": list(tensor.shape),
         "output_tensor_shape": list(np.asarray(output).shape),
     gr.Markdown("# BiRefNet OpenVINO")
     with gr.Row():
         input_image = gr.Image(label="Image", type="pil")
+        model_dropdown = gr.Dropdown(
+            label="Model variant",
+            choices=[(variant.label, variant.key) for variant in MODEL_VARIANTS],
+            value=get_model_variant(DEFAULT_MODEL_VARIANT_KEY).key,
+            interactive=True,
+        )
     run_button = gr.Button("Run", variant="primary")
     with gr.Row():
         mask_output = gr.Image(label="Mask", type="pil")
     run_button.click(
         fn=remove_background,
+        inputs=[input_image, model_dropdown],
         outputs=[mask_output, cutout_output, timing_output, specs_output],
     )
     input_image.upload(
         fn=remove_background,
+        inputs=[input_image, model_dropdown],
         outputs=[mask_output, cutout_output, timing_output, specs_output],
     )