Spaces:

ernestchu
/

barcode

Sleeping

App Files Files Community

ernestchu commited on Jan 30

Commit

043bbad

1 Parent(s): 2a6d081

i

Browse files

Files changed (7) hide show

.gitattributes +2 -0
YOLOV8s_Barcode_Detection.pt +3 -0
app.py +112 -146
loading.gif +3 -0
requirements.txt +4 -6
test.jpeg +3 -0
utils.py +92 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.jpeg filter=lfs diff=lfs merge=lfs -text
+*.gif filter=lfs diff=lfs merge=lfs -text

YOLOV8s_Barcode_Detection.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:316ded312281da5d4de06c36c66fdc682bd1c2052689008237baf22eb8e4f5ed
+size 22502634

app.py CHANGED Viewed

@@ -1,153 +1,119 @@
 import gradio as gr
 import numpy as np
-import random
-# import spaces #[uncomment to use ZeroGPU]
-from diffusers import DiffusionPipeline
-import torch
-device = "cuda" if torch.cuda.is_available() else "cpu"
-model_repo_id = "stabilityai/sdxl-turbo"  # Replace to the model you would like to use
-if torch.cuda.is_available():
-    torch_dtype = torch.float16
-else:
-    torch_dtype = torch.float32
-pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
-pipe = pipe.to(device)
-MAX_SEED = np.iinfo(np.int32).max
-MAX_IMAGE_SIZE = 1024
-# @spaces.GPU #[uncomment to use ZeroGPU]
-def infer(
-    prompt,
-    negative_prompt,
-    seed,
-    randomize_seed,
-    width,
-    height,
-    guidance_scale,
-    num_inference_steps,
-    progress=gr.Progress(track_tqdm=True),
-):
-    if randomize_seed:
-        seed = random.randint(0, MAX_SEED)
-    generator = torch.Generator().manual_seed(seed)
-    image = pipe(
-        prompt=prompt,
-        negative_prompt=negative_prompt,
-        guidance_scale=guidance_scale,
-        num_inference_steps=num_inference_steps,
-        width=width,
-        height=height,
-        generator=generator,
-    ).images[0]
-    return image, seed
-examples = [
-    "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
-    "An astronaut riding a green horse",
-    "A delicious ceviche cheesecake slice",
-]
-css = """
-#col-container {
-    margin: 0 auto;
-    max-width: 640px;
-}
-"""
-with gr.Blocks(css=css) as demo:
-    with gr.Column(elem_id="col-container"):
-        gr.Markdown(" # Text-to-Image Gradio Template")
-        with gr.Row():
-            prompt = gr.Text(
-                label="Prompt",
-                show_label=False,
-                max_lines=1,
-                placeholder="Enter your prompt",
-                container=False,
-            )
-            run_button = gr.Button("Run", scale=0, variant="primary")
-        result = gr.Image(label="Result", show_label=False)
-        with gr.Accordion("Advanced Settings", open=False):
-            negative_prompt = gr.Text(
-                label="Negative prompt",
-                max_lines=1,
-                placeholder="Enter a negative prompt",
-                visible=False,
             )
-            seed = gr.Slider(
-                label="Seed",
-                minimum=0,
-                maximum=MAX_SEED,
-                step=1,
-                value=0,
-            )
-            randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
-            with gr.Row():
-                width = gr.Slider(
-                    label="Width",
-                    minimum=256,
-                    maximum=MAX_IMAGE_SIZE,
-                    step=32,
-                    value=1024,  # Replace with defaults that work for your model
-                )
-                height = gr.Slider(
-                    label="Height",
-                    minimum=256,
-                    maximum=MAX_IMAGE_SIZE,
-                    step=32,
-                    value=1024,  # Replace with defaults that work for your model
-                )
-            with gr.Row():
-                guidance_scale = gr.Slider(
-                    label="Guidance scale",
-                    minimum=0.0,
-                    maximum=10.0,
-                    step=0.1,
-                    value=0.0,  # Replace with defaults that work for your model
-                )
-                num_inference_steps = gr.Slider(
-                    label="Number of inference steps",
-                    minimum=1,
-                    maximum=50,
-                    step=1,
-                    value=2,  # Replace with defaults that work for your model
-                )
-        gr.Examples(examples=examples, inputs=[prompt])
-    gr.on(
-        triggers=[run_button.click, prompt.submit],
-        fn=infer,
-        inputs=[
-            prompt,
-            negative_prompt,
-            seed,
-            randomize_seed,
-            width,
-            height,
-            guidance_scale,
-            num_inference_steps,
-        ],
-        outputs=[result, seed],
     )
 if __name__ == "__main__":

+from types import SimpleNamespace
 import gradio as gr
+from PIL import Image
+from ultralytics import YOLO
+from ultralytics.utils.plotting import save_one_box
+import easyocr
+import zxingcpp
 import numpy as np
+from utils import custom_plot
+model = YOLO("YOLOV8s_Barcode_Detection.pt")
+reader = easyocr.Reader(['en'])
+loading_img = "loading.gif"
+def process_image(input_img, progress=gr.Progress()):
+    if input_img is None:
+        return None, "No image provided."
+    # Perform object detection on an image
+    result = model(input_img, imgsz=(1280))[0]
+    crops = []
+    for d in result.boxes:
+        crops.append(save_one_box(
+            d.xyxy,
+            result.orig_img.copy(),
+            save=False,
+        ))
+    texts = []
+    for pr, crop in enumerate(crops):
+        progress((pr+1) / len(crops), desc="辨識中")
+        img = Image.fromarray(crop)
+        res = zxingcpp.read_barcodes(img)
+        if not res:
+            # rotate and retry
+            for i in range(1, 8):
+                res = zxingcpp.read_barcodes(img.rotate(i, resample=2))
+                if res:
+                    break
+                res = zxingcpp.read_barcodes(img.rotate(-i, resample=2))
+                if res:
+                    break
+        if not res:
+            # resort to ocr the bottom-left no.
+            full_res = reader.readtext(crop, allowlist='-0123456789')
+            res = sorted(
+                [r for r in full_res if (
+                    r[0][3][0] < (img.width/4) and r[0][3][1] > (img.height/3) and
+                    r[0][0][0] < (img.width/4) and r[0][0][1] > (img.height/2)
+                )],
+                key=lambda x: x[2], reverse=True,
             )
+            if res:
+                pred_text = res[0][1]
+                # sanity check if the no. is not divided into multiple box
+                cur_box = res[0]
+                # if len(cur_box[1]) < 13:
+                other_boxes = [r for r in full_res if r[1] != cur_box[1]]
+                thrs = np.linalg.norm(img.size) / 25
+                while other_boxes:
+                    upper = np.linalg.norm(cur_box[0][1] - np.array([b[0][0] for b in other_boxes]), axis=1)
+                    lower = np.linalg.norm(cur_box[0][2] - np.array([b[0][3] for b in other_boxes]), axis=1)
+                    is_same = (upper < thrs) & (lower < thrs)
+                    rank = sorted(
+                        [(i, dist, s) for (i, dist), s in zip(enumerate(lower + upper), is_same) if s],
+                        key=lambda x: x[1]
+                    )
+                    if rank:
+                        cur_box = other_boxes[rank[0][0]]
+                        other_boxes = [r for r in other_boxes if r[1] != cur_box[1]]
+                        pred_text += cur_box[1]
+                    else:
+                        break
+                if len(pred_text) != 15:
+                    res = []
+            res = [SimpleNamespace(text=pred_text)] if res else []
+        texts.append(res[0].text if res else None)
+    output_text = '\n'.join([t for t in texts if isinstance(t, str)])
+    yield loading_img, output_text
+    results_img = custom_plot(
+        result, font_size=40, pil=True,
+        barcode_texts=texts,
+    )
+    yield results_img, output_text
+# Defining the Gradio Interface
+with gr.Blocks() as demo:
+    gr.Markdown("# Barcode")
+    gr.Markdown("_")
+    with gr.Row():
+        with gr.Column():
+            input_view = gr.Image(type="pil", label="Input Image")
+            btn = gr.Button("Transform", variant="primary")
+            gr.Examples(examples=["test.jpeg"], inputs=input_view)
+        with gr.Column():
+            output_view = gr.Image(type="pil", label="Output Image")
+        with gr.Column():
+            text_output = gr.Textbox(label="Results")
+    # Wire up the button
+    btn.click(
+        fn=process_image,
+        inputs=input_view,
+        outputs=[output_view, text_output]
     )
 if __name__ == "__main__":

loading.gif ADDED Viewed

Git LFS Details

SHA256: adf2b976d9100497943bc9a40f780604ce95cf9da4d3e6de8e58c6387aeee7e1
Pointer size: 130 Bytes
Size of remote file: 55.5 kB

requirements.txt CHANGED Viewed

@@ -1,6 +1,4 @@
-accelerate
-diffusers
-invisible_watermark
-torch
-transformers
-xformers

+gradio
+ultralytics
+zxing-cpp
+easyocr

test.jpeg ADDED Viewed

Git LFS Details

SHA256: 56f4ae486f6843449cb175c424a88f1fd21e1ddead2222cefc46a1bbb1163a02
Pointer size: 132 Bytes
Size of remote file: 3.55 MB

utils.py ADDED Viewed

	@@ -0,0 +1,92 @@

+from ultralytics.utils.plotting import Annotator, colors
+import numpy as np
+import torch
+from copy import deepcopy
+def custom_plot(
+    self,
+    conf: bool = True,
+    line_width: float | None = None,
+    font_size: float | None = None,
+    font: str = "Arial.ttf",
+    pil: bool = False,
+    img: np.ndarray | None = None,
+    im_gpu: torch.Tensor | None = None,
+    kpt_radius: int = 5,
+    kpt_line: bool = True,
+    labels: bool = True,
+    boxes: bool = True,
+    masks: bool = True,
+    probs: bool = True,
+    show: bool = False,
+    save: bool = False,
+    filename: str | None = None,
+    color_mode: str = "class",
+    txt_color: tuple[int, int, int] = (255, 255, 255),
+    barcode_texts: list[str|None] = None,
+) -> np.ndarray:
+    """Plot detection results on an input BGR image.
+    Args:
+        conf (bool): Whether to plot detection confidence scores.
+        line_width (float | None): Line width of bounding boxes. If None, scaled to image size.
+        font_size (float | None): Font size for text. If None, scaled to image size.
+        font (str): Font to use for text.
+        pil (bool): Whether to return the image as a PIL Image.
+        img (np.ndarray | None): Image to plot on. If None, uses original image.
+        im_gpu (torch.Tensor | None): Normalized image on GPU for faster mask plotting.
+        kpt_radius (int): Radius of drawn keypoints.
+        kpt_line (bool): Whether to draw lines connecting keypoints.
+        labels (bool): Whether to plot labels of bounding boxes.
+        boxes (bool): Whether to plot bounding boxes.
+        masks (bool): Whether to plot masks.
+        probs (bool): Whether to plot classification probabilities.
+        show (bool): Whether to display the annotated image.
+        save (bool): Whether to save the annotated image.
+        filename (str | None): Filename to save image if save is True.
+        color_mode (str): Specify the color mode, e.g., 'instance' or 'class'.
+        txt_color (tuple[int, int, int]): Text color in BGR format for classification output.
+    Returns:
+        (np.ndarray | PIL.Image.Image): Annotated image as a NumPy array (BGR) or PIL image (RGB) if `pil=True`.
+    Examples:
+        >>> results = model("image.jpg")
+        >>> for result in results:
+        >>>     im = result.plot()
+        >>>     im.show()
+    """
+    assert color_mode in {"instance", "class"}, f"Expected color_mode='instance' or 'class', not {color_mode}."
+    if img is None and isinstance(self.orig_img, torch.Tensor):
+        img = (self.orig_img[0].detach().permute(1, 2, 0).contiguous() * 255).byte().cpu().numpy()
+    names = self.names
+    is_obb = self.obb is not None
+    pred_boxes, show_boxes = self.obb if is_obb else self.boxes, boxes
+    pred_masks, show_masks = self.masks, masks
+    pred_probs, show_probs = self.probs, probs
+    annotator = Annotator(
+        deepcopy(self.orig_img if img is None else img),
+        line_width,
+        font_size,
+        font,
+        pil or (pred_probs is not None and show_probs),  # Classify tasks default to pil=True
+        example=names,
+    )
+    # Plot Detect results
+    if pred_boxes is not None and show_boxes:
+        for i, d in enumerate(reversed(pred_boxes)):
+            c, d_conf, id = int(d.cls), float(d.conf) if conf else None, int(d.id.item()) if d.is_track else None
+            name = ("" if id is None else f"id:{id} ") + names[c]
+            if barcode_texts is None:
+                label = (f"{name} {d_conf:.2f}" if conf else name) if labels else None
+            else:
+                label = barcode_texts[len(pred_boxes) - i - 1]
+                # label = f'{len(pred_boxes) - i - 1} {label if label else ""}'
+            box = d.xyxyxyxy.squeeze() if is_obb else d.xyxy.squeeze()
+            annotator.box_label(
+                box,
+                label,
+                color=colors(0 if not label else 6),
+            )
+    return annotator.result(pil)