theDavidGuy commited on
Commit
84c3056
·
verified ·
1 Parent(s): 3a081c4

Update ocr_utils.py

Browse files
Files changed (1) hide show
  1. ocr_utils.py +29 -50
ocr_utils.py CHANGED
@@ -1,5 +1,4 @@
1
  import io, re
2
- from typing import List, Dict, Tuple, Any
3
  from PIL import Image, ImageDraw
4
 
5
  try:
@@ -9,16 +8,10 @@ except Exception:
9
  pytesseract = None
10
  TessOutput = None
11
 
12
- # regex for price detection
13
- PRICE_RE = re.compile(r"(\$\s*\d{1,4}(?:\.\d{2})?|\b\d{1,4}\.\d{2}\b)")
14
-
15
  def ocr_image(image_bytes: bytes):
16
  """
17
- Returns (full_text, tokens, size).
18
- - full_text: the whole OCR text as one string
19
- - tokens: list of {'text','conf','box'}
20
- - size: (width, height)
21
- Always returns 3 values, even if OCR fails.
22
  """
23
  try:
24
  img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
@@ -34,60 +27,46 @@ def ocr_image(image_bytes: bytes):
34
  return "", [], img.size
35
 
36
  tokens = []
37
- for i, txt in enumerate(data["text"]):
38
- if not txt:
39
  continue
40
  try:
41
  conf = float(data.get("conf", ["-1"])[i])
42
- except:
43
  conf = -1.0
44
- x, y, w, h = data["left"][i], data["top"][i], data["width"][i], data["height"][i]
45
- tokens.append({"text": txt, "conf": conf, "box": (x, y, w, h)})
 
 
 
 
 
46
 
47
- full = " ".join(t["text"] for t in tokens)
48
- return full, tokens, img.size
49
 
50
 
51
- def guess_price(tokens: List[Dict[str,Any]]):
52
- """Pick the smallest plausible price (typical for tags)."""
53
- best = None
54
  for t in tokens:
55
- m = PRICE_RE.search(t["text"].replace(",", ""))
56
  if m:
57
- raw = m.group(0).replace("$", "").strip()
58
  try:
59
- val = float(raw)
60
- if 0.5 <= val <= 1000:
61
- if best is None or val < best[0]:
62
- best = (val, t["box"])
63
- except:
64
  pass
65
- return best
66
 
67
 
68
- def guess_title(full_text: str) -> str:
69
- """Return a short readable title."""
70
- text = (full_text or "").strip()
71
- if not text:
72
- return ""
73
- words = [w for w in re.findall(r"[A-Za-z][A-Za-z0-9\-]{2,}", text) if not w.isdigit()]
74
- return " ".join(words[:4]) if words else "Item"
75
 
76
 
77
- def annotate_price_box(image_bytes: bytes, box: Tuple[int,int,int,int], label: str = None) -> bytes:
78
- """Draw a red box around the detected price and optional label."""
79
  try:
80
- img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
81
- draw = ImageDraw.Draw(img)
82
- x, y, w, h = box
83
- draw.rectangle([x, y, x + w, y + h], outline=(255, 0, 0), width=3)
84
- if label:
85
- tw = max(50, int(len(label) * 8))
86
- y0 = max(0, y - 22)
87
- draw.rectangle([x, y0, x + tw, y0 + 22], fill=(255, 0, 0))
88
- draw.text((x + 4, y0 + 4), label, fill=(255, 255, 255))
89
- out = io.BytesIO()
90
- img.save(out, format="PNG")
91
- return out.getvalue()
92
- except Exception:
93
- return image_bytes
 
1
  import io, re
 
2
  from PIL import Image, ImageDraw
3
 
4
  try:
 
8
  pytesseract = None
9
  TessOutput = None
10
 
 
 
 
11
  def ocr_image(image_bytes: bytes):
12
  """
13
+ Always return exactly three values: (text, tokens, size)
14
+ even if OCR fails or is missing.
 
 
 
15
  """
16
  try:
17
  img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
 
27
  return "", [], img.size
28
 
29
  tokens = []
30
+ for i, word in enumerate(data["text"]):
31
+ if not word:
32
  continue
33
  try:
34
  conf = float(data.get("conf", ["-1"])[i])
35
+ except Exception:
36
  conf = -1.0
37
+ box = (
38
+ data["left"][i],
39
+ data["top"][i],
40
+ data["width"][i],
41
+ data["height"][i],
42
+ )
43
+ tokens.append({"text": word, "conf": conf, "box": box})
44
 
45
+ full_text = " ".join(t["text"] for t in tokens)
46
+ return full_text, tokens, img.size
47
 
48
 
49
+ def guess_price(tokens):
50
+ """Find first $xx.xx or similar pattern."""
51
+ price_pattern = re.compile(r"\$?\d{1,4}(?:[.,]\d{2})?")
52
  for t in tokens:
53
+ m = price_pattern.search(t["text"])
54
  if m:
55
+ val = m.group(0)
56
  try:
57
+ return float(val.replace("$", "").replace(",", "")), t["box"]
58
+ except Exception:
 
 
 
59
  pass
60
+ return None, None
61
 
62
 
63
+ def guess_title(text: str) -> str:
64
+ """Simplify long OCR text into a short title."""
65
+ words = re.findall(r"[A-Za-z0-9\-]{3,}", text)
66
+ return " ".join(words[:5]) if words else "Item"
 
 
 
67
 
68
 
69
+ def annotate_price_box(image_bytes: bytes, box, label: str = None):
70
+ """Draw red rectangle around detected price."""
71
  try:
72
+ img = Image.open(io.By