theDavidGuy commited on
Commit
9b62fe2
·
verified ·
1 Parent(s): ba9dd6c

Update src/ocr_utils.py

Browse files
Files changed (1) hide show
  1. src/ocr_utils.py +62 -38
src/ocr_utils.py CHANGED
@@ -1,42 +1,66 @@
1
- import pytesseract
 
2
  from PIL import Image, ImageDraw
3
- import re
4
 
5
- def ocr_image(image):
6
- """Extract raw OCR text from an uploaded image."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  try:
8
- return pytesseract.image_to_string(image)
9
  except Exception:
10
- return ""
11
-
12
- def guess_price(text):
13
- """Find first number that looks like a price."""
14
- m = re.search(r'\$?\s*(\d{1,3}(?:[.,]\d{2})?)', text)
15
- if m:
16
- return float(m.group(1).replace(",", ""))
17
- return 0.0
18
-
19
- def guess_title(text):
20
- """Guess a short title from OCR text."""
21
- lines = [ln.strip() for ln in text.splitlines() if len(ln.strip()) > 3]
22
- if not lines:
23
- return "Item"
24
- first = lines[0]
25
- return first[:40]
26
-
27
- def annotate_price_box(image, text):
28
- """Draw a simple red box on the detected price."""
29
- img = image.convert("RGB")
30
- draw = ImageDraw.Draw(img)
31
- boxes = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT)
32
- for i, word in enumerate(boxes["text"]):
33
- if re.match(r"\$?\d+", word):
34
- (x, y, w, h) = (
35
- boxes["left"][i],
36
- boxes["top"][i],
37
- boxes["width"][i],
38
- boxes["height"][i],
39
- )
40
- draw.rectangle([x, y, x + w, y + h], outline="red", width=3)
41
- draw.text((x, y - 15), word, fill="red")
42
- return img
 
 
 
 
 
 
1
+ import io, re
2
+ from typing import List, Dict, Tuple, Any
3
  from PIL import Image, ImageDraw
 
4
 
5
+ try:
6
+ import pytesseract
7
+ from pytesseract import Output as TessOutput
8
+ except Exception:
9
+ pytesseract = None
10
+ TessOutput = None
11
+
12
+ PRICE_RE = re.compile(r"(\$\s*\d{1,4}(?:\.\d{2})?|\b\d{1,4}\.\d{2}\b)")
13
+
14
+ def ocr_image(image_bytes: bytes):
15
+ """
16
+ ALWAYS return (full_text, tokens, size) even if OCR fails.
17
+ """
18
+ try:
19
+ img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
20
+ except Exception:
21
+ return "", [], (0, 0)
22
+
23
+ if pytesseract is None:
24
+ return "", [], img.size
25
+
26
  try:
27
+ data = pytesseract.image_to_data(img, output_type=TessOutput.DICT)
28
  except Exception:
29
+ return "", [], img.size
30
+
31
+ tokens: List[Dict[str, Any]] = []
32
+ for i, txt in enumerate(data["text"]):
33
+ if not txt:
34
+ continue
35
+ try:
36
+ conf = float(data.get("conf", ["-1"])[i])
37
+ except Exception:
38
+ conf = -1.0
39
+ x, y, w, h = data["left"][i], data["top"][i], data["width"][i], data["height"][i]
40
+ tokens.append({"text": txt, "conf": conf, "box": (x, y, w, h)})
41
+
42
+ full_text = " ".join(t["text"] for t in tokens)
43
+ return full_text, tokens, img.size
44
+
45
+
46
+ def guess_price(tokens: List[Dict[str, Any]]):
47
+ """Find smallest plausible price in tokens."""
48
+ best = None
49
+ for t in tokens:
50
+ m = PRICE_RE.search(t["text"].replace(",", ""))
51
+ if m:
52
+ raw = m.group(0).replace("$", "").strip()
53
+ try:
54
+ val = float(raw)
55
+ if 0.5 <= val <= 1000:
56
+ if best is None or val < best[0]:
57
+ best = (val, t["box"])
58
+ except Exception:
59
+ pass
60
+ return best if best else (None, None)
61
+
62
+
63
+ def guess_title(text: str) -> str:
64
+ """Make a short human-readable title from OCR text."""
65
+ words = re.findall(r"[A-Za-z0-9\-]{3,}", text or "")
66
+ ret