ernestchu commited on
Commit
043bbad
·
1 Parent(s): 2a6d081
Files changed (7) hide show
  1. .gitattributes +2 -0
  2. YOLOV8s_Barcode_Detection.pt +3 -0
  3. app.py +112 -146
  4. loading.gif +3 -0
  5. requirements.txt +4 -6
  6. test.jpeg +3 -0
  7. utils.py +92 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.jpeg filter=lfs diff=lfs merge=lfs -text
37
+ *.gif filter=lfs diff=lfs merge=lfs -text
YOLOV8s_Barcode_Detection.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:316ded312281da5d4de06c36c66fdc682bd1c2052689008237baf22eb8e4f5ed
3
+ size 22502634
app.py CHANGED
@@ -1,153 +1,119 @@
 
1
  import gradio as gr
 
 
 
 
 
2
  import numpy as np
3
- import random
4
-
5
- # import spaces #[uncomment to use ZeroGPU]
6
- from diffusers import DiffusionPipeline
7
- import torch
8
-
9
- device = "cuda" if torch.cuda.is_available() else "cpu"
10
- model_repo_id = "stabilityai/sdxl-turbo" # Replace to the model you would like to use
11
-
12
- if torch.cuda.is_available():
13
- torch_dtype = torch.float16
14
- else:
15
- torch_dtype = torch.float32
16
-
17
- pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
18
- pipe = pipe.to(device)
19
-
20
- MAX_SEED = np.iinfo(np.int32).max
21
- MAX_IMAGE_SIZE = 1024
22
-
23
-
24
- # @spaces.GPU #[uncomment to use ZeroGPU]
25
- def infer(
26
- prompt,
27
- negative_prompt,
28
- seed,
29
- randomize_seed,
30
- width,
31
- height,
32
- guidance_scale,
33
- num_inference_steps,
34
- progress=gr.Progress(track_tqdm=True),
35
- ):
36
- if randomize_seed:
37
- seed = random.randint(0, MAX_SEED)
38
-
39
- generator = torch.Generator().manual_seed(seed)
40
-
41
- image = pipe(
42
- prompt=prompt,
43
- negative_prompt=negative_prompt,
44
- guidance_scale=guidance_scale,
45
- num_inference_steps=num_inference_steps,
46
- width=width,
47
- height=height,
48
- generator=generator,
49
- ).images[0]
50
-
51
- return image, seed
52
-
53
-
54
- examples = [
55
- "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
56
- "An astronaut riding a green horse",
57
- "A delicious ceviche cheesecake slice",
58
- ]
59
-
60
- css = """
61
- #col-container {
62
- margin: 0 auto;
63
- max-width: 640px;
64
- }
65
- """
66
-
67
- with gr.Blocks(css=css) as demo:
68
- with gr.Column(elem_id="col-container"):
69
- gr.Markdown(" # Text-to-Image Gradio Template")
70
-
71
- with gr.Row():
72
- prompt = gr.Text(
73
- label="Prompt",
74
- show_label=False,
75
- max_lines=1,
76
- placeholder="Enter your prompt",
77
- container=False,
78
- )
79
-
80
- run_button = gr.Button("Run", scale=0, variant="primary")
81
 
82
- result = gr.Image(label="Result", show_label=False)
83
-
84
- with gr.Accordion("Advanced Settings", open=False):
85
- negative_prompt = gr.Text(
86
- label="Negative prompt",
87
- max_lines=1,
88
- placeholder="Enter a negative prompt",
89
- visible=False,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  )
91
-
92
- seed = gr.Slider(
93
- label="Seed",
94
- minimum=0,
95
- maximum=MAX_SEED,
96
- step=1,
97
- value=0,
98
- )
99
-
100
- randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
101
-
102
- with gr.Row():
103
- width = gr.Slider(
104
- label="Width",
105
- minimum=256,
106
- maximum=MAX_IMAGE_SIZE,
107
- step=32,
108
- value=1024, # Replace with defaults that work for your model
109
- )
110
-
111
- height = gr.Slider(
112
- label="Height",
113
- minimum=256,
114
- maximum=MAX_IMAGE_SIZE,
115
- step=32,
116
- value=1024, # Replace with defaults that work for your model
117
- )
118
-
119
- with gr.Row():
120
- guidance_scale = gr.Slider(
121
- label="Guidance scale",
122
- minimum=0.0,
123
- maximum=10.0,
124
- step=0.1,
125
- value=0.0, # Replace with defaults that work for your model
126
- )
127
-
128
- num_inference_steps = gr.Slider(
129
- label="Number of inference steps",
130
- minimum=1,
131
- maximum=50,
132
- step=1,
133
- value=2, # Replace with defaults that work for your model
134
- )
135
-
136
- gr.Examples(examples=examples, inputs=[prompt])
137
- gr.on(
138
- triggers=[run_button.click, prompt.submit],
139
- fn=infer,
140
- inputs=[
141
- prompt,
142
- negative_prompt,
143
- seed,
144
- randomize_seed,
145
- width,
146
- height,
147
- guidance_scale,
148
- num_inference_steps,
149
- ],
150
- outputs=[result, seed],
151
  )
152
 
153
  if __name__ == "__main__":
 
1
+ from types import SimpleNamespace
2
  import gradio as gr
3
+ from PIL import Image
4
+ from ultralytics import YOLO
5
+ from ultralytics.utils.plotting import save_one_box
6
+ import easyocr
7
+ import zxingcpp
8
  import numpy as np
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
+ from utils import custom_plot
11
+
12
+
13
+ model = YOLO("YOLOV8s_Barcode_Detection.pt")
14
+ reader = easyocr.Reader(['en'])
15
+ loading_img = "loading.gif"
16
+
17
+ def process_image(input_img, progress=gr.Progress()):
18
+ if input_img is None:
19
+ return None, "No image provided."
20
+
21
+ # Perform object detection on an image
22
+ result = model(input_img, imgsz=(1280))[0]
23
+
24
+ crops = []
25
+ for d in result.boxes:
26
+ crops.append(save_one_box(
27
+ d.xyxy,
28
+ result.orig_img.copy(),
29
+ save=False,
30
+ ))
31
+
32
+ texts = []
33
+
34
+ for pr, crop in enumerate(crops):
35
+ progress((pr+1) / len(crops), desc="辨識中")
36
+ img = Image.fromarray(crop)
37
+ res = zxingcpp.read_barcodes(img)
38
+
39
+ if not res:
40
+ # rotate and retry
41
+ for i in range(1, 8):
42
+ res = zxingcpp.read_barcodes(img.rotate(i, resample=2))
43
+ if res:
44
+ break
45
+ res = zxingcpp.read_barcodes(img.rotate(-i, resample=2))
46
+ if res:
47
+ break
48
+
49
+ if not res:
50
+ # resort to ocr the bottom-left no.
51
+ full_res = reader.readtext(crop, allowlist='-0123456789')
52
+ res = sorted(
53
+ [r for r in full_res if (
54
+ r[0][3][0] < (img.width/4) and r[0][3][1] > (img.height/3) and
55
+ r[0][0][0] < (img.width/4) and r[0][0][1] > (img.height/2)
56
+ )],
57
+ key=lambda x: x[2], reverse=True,
58
  )
59
+ if res:
60
+ pred_text = res[0][1]
61
+ # sanity check if the no. is not divided into multiple box
62
+ cur_box = res[0]
63
+ # if len(cur_box[1]) < 13:
64
+ other_boxes = [r for r in full_res if r[1] != cur_box[1]]
65
+ thrs = np.linalg.norm(img.size) / 25
66
+ while other_boxes:
67
+ upper = np.linalg.norm(cur_box[0][1] - np.array([b[0][0] for b in other_boxes]), axis=1)
68
+ lower = np.linalg.norm(cur_box[0][2] - np.array([b[0][3] for b in other_boxes]), axis=1)
69
+ is_same = (upper < thrs) & (lower < thrs)
70
+ rank = sorted(
71
+ [(i, dist, s) for (i, dist), s in zip(enumerate(lower + upper), is_same) if s],
72
+ key=lambda x: x[1]
73
+ )
74
+ if rank:
75
+ cur_box = other_boxes[rank[0][0]]
76
+ other_boxes = [r for r in other_boxes if r[1] != cur_box[1]]
77
+ pred_text += cur_box[1]
78
+ else:
79
+ break
80
+ if len(pred_text) != 15:
81
+ res = []
82
+ res = [SimpleNamespace(text=pred_text)] if res else []
83
+
84
+ texts.append(res[0].text if res else None)
85
+
86
+ output_text = '\n'.join([t for t in texts if isinstance(t, str)])
87
+ yield loading_img, output_text
88
+ results_img = custom_plot(
89
+ result, font_size=40, pil=True,
90
+ barcode_texts=texts,
91
+ )
92
+ yield results_img, output_text
93
+
94
+
95
+
96
+ # Defining the Gradio Interface
97
+ with gr.Blocks() as demo:
98
+ gr.Markdown("# Barcode")
99
+ gr.Markdown("_")
100
+
101
+ with gr.Row():
102
+ with gr.Column():
103
+ input_view = gr.Image(type="pil", label="Input Image")
104
+ btn = gr.Button("Transform", variant="primary")
105
+
106
+ gr.Examples(examples=["test.jpeg"], inputs=input_view)
107
+ with gr.Column():
108
+ output_view = gr.Image(type="pil", label="Output Image")
109
+ with gr.Column():
110
+ text_output = gr.Textbox(label="Results")
111
+
112
+ # Wire up the button
113
+ btn.click(
114
+ fn=process_image,
115
+ inputs=input_view,
116
+ outputs=[output_view, text_output]
 
 
117
  )
118
 
119
  if __name__ == "__main__":
loading.gif ADDED

Git LFS Details

  • SHA256: adf2b976d9100497943bc9a40f780604ce95cf9da4d3e6de8e58c6387aeee7e1
  • Pointer size: 130 Bytes
  • Size of remote file: 55.5 kB
requirements.txt CHANGED
@@ -1,6 +1,4 @@
1
- accelerate
2
- diffusers
3
- invisible_watermark
4
- torch
5
- transformers
6
- xformers
 
1
+ gradio
2
+ ultralytics
3
+ zxing-cpp
4
+ easyocr
 
 
test.jpeg ADDED

Git LFS Details

  • SHA256: 56f4ae486f6843449cb175c424a88f1fd21e1ddead2222cefc46a1bbb1163a02
  • Pointer size: 132 Bytes
  • Size of remote file: 3.55 MB
utils.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ultralytics.utils.plotting import Annotator, colors
2
+ import numpy as np
3
+ import torch
4
+ from copy import deepcopy
5
+
6
+ def custom_plot(
7
+ self,
8
+ conf: bool = True,
9
+ line_width: float | None = None,
10
+ font_size: float | None = None,
11
+ font: str = "Arial.ttf",
12
+ pil: bool = False,
13
+ img: np.ndarray | None = None,
14
+ im_gpu: torch.Tensor | None = None,
15
+ kpt_radius: int = 5,
16
+ kpt_line: bool = True,
17
+ labels: bool = True,
18
+ boxes: bool = True,
19
+ masks: bool = True,
20
+ probs: bool = True,
21
+ show: bool = False,
22
+ save: bool = False,
23
+ filename: str | None = None,
24
+ color_mode: str = "class",
25
+ txt_color: tuple[int, int, int] = (255, 255, 255),
26
+ barcode_texts: list[str|None] = None,
27
+ ) -> np.ndarray:
28
+ """Plot detection results on an input BGR image.
29
+
30
+ Args:
31
+ conf (bool): Whether to plot detection confidence scores.
32
+ line_width (float | None): Line width of bounding boxes. If None, scaled to image size.
33
+ font_size (float | None): Font size for text. If None, scaled to image size.
34
+ font (str): Font to use for text.
35
+ pil (bool): Whether to return the image as a PIL Image.
36
+ img (np.ndarray | None): Image to plot on. If None, uses original image.
37
+ im_gpu (torch.Tensor | None): Normalized image on GPU for faster mask plotting.
38
+ kpt_radius (int): Radius of drawn keypoints.
39
+ kpt_line (bool): Whether to draw lines connecting keypoints.
40
+ labels (bool): Whether to plot labels of bounding boxes.
41
+ boxes (bool): Whether to plot bounding boxes.
42
+ masks (bool): Whether to plot masks.
43
+ probs (bool): Whether to plot classification probabilities.
44
+ show (bool): Whether to display the annotated image.
45
+ save (bool): Whether to save the annotated image.
46
+ filename (str | None): Filename to save image if save is True.
47
+ color_mode (str): Specify the color mode, e.g., 'instance' or 'class'.
48
+ txt_color (tuple[int, int, int]): Text color in BGR format for classification output.
49
+
50
+ Returns:
51
+ (np.ndarray | PIL.Image.Image): Annotated image as a NumPy array (BGR) or PIL image (RGB) if `pil=True`.
52
+
53
+ Examples:
54
+ >>> results = model("image.jpg")
55
+ >>> for result in results:
56
+ >>> im = result.plot()
57
+ >>> im.show()
58
+ """
59
+ assert color_mode in {"instance", "class"}, f"Expected color_mode='instance' or 'class', not {color_mode}."
60
+ if img is None and isinstance(self.orig_img, torch.Tensor):
61
+ img = (self.orig_img[0].detach().permute(1, 2, 0).contiguous() * 255).byte().cpu().numpy()
62
+
63
+ names = self.names
64
+ is_obb = self.obb is not None
65
+ pred_boxes, show_boxes = self.obb if is_obb else self.boxes, boxes
66
+ pred_masks, show_masks = self.masks, masks
67
+ pred_probs, show_probs = self.probs, probs
68
+ annotator = Annotator(
69
+ deepcopy(self.orig_img if img is None else img),
70
+ line_width,
71
+ font_size,
72
+ font,
73
+ pil or (pred_probs is not None and show_probs), # Classify tasks default to pil=True
74
+ example=names,
75
+ )
76
+ # Plot Detect results
77
+ if pred_boxes is not None and show_boxes:
78
+ for i, d in enumerate(reversed(pred_boxes)):
79
+ c, d_conf, id = int(d.cls), float(d.conf) if conf else None, int(d.id.item()) if d.is_track else None
80
+ name = ("" if id is None else f"id:{id} ") + names[c]
81
+ if barcode_texts is None:
82
+ label = (f"{name} {d_conf:.2f}" if conf else name) if labels else None
83
+ else:
84
+ label = barcode_texts[len(pred_boxes) - i - 1]
85
+ # label = f'{len(pred_boxes) - i - 1} {label if label else ""}'
86
+ box = d.xyxyxyxy.squeeze() if is_obb else d.xyxy.squeeze()
87
+ annotator.box_label(
88
+ box,
89
+ label,
90
+ color=colors(0 if not label else 6),
91
+ )
92
+ return annotator.result(pil)