File size: 3,040 Bytes
b0cc733
ba9dd6c
b0cc733
 
 
 
 
 
 
 
 
ba9dd6c
 
b0cc733
3a081c4
ba9dd6c
 
 
 
3a081c4
ba9dd6c
3a081c4
 
 
 
 
ba9dd6c
b0cc733
 
3a081c4
ba9dd6c
b0cc733
 
 
 
3a081c4
ba9dd6c
 
 
b0cc733
 
 
84c3056
b0cc733
ba9dd6c
 
3a081c4
84c3056
 
b0cc733
3a081c4
ba9dd6c
 
 
b0cc733
ba9dd6c
b0cc733
ba9dd6c
b0cc733
ba9dd6c
 
 
 
84c3056
b0cc733
ba9dd6c
b0cc733
3a081c4
84c3056
ba9dd6c
 
84c3056
b0cc733
3a081c4
ba9dd6c
 
3a081c4
ba9dd6c
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import io, re
from typing import List, Dict, Tuple, Any
from PIL import Image, ImageDraw

try:
    import pytesseract
    from pytesseract import Output as TessOutput
except Exception:
    pytesseract = None
    TessOutput = None

PRICE_RE = re.compile(r"(\$\s*\d{1,4}(?:\.\d{2})?|\b\d{1,4}\.\d{2}\b)")

def ocr_image(image_bytes: bytes):
    """
    ALWAYS return (full_text, tokens, size) even if OCR fails.
    - full_text: str
    - tokens: list of {'text','conf','box'}
    - size: (width, height)
    """
    # load image safely
    try:
        img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
    except Exception:
        return "", [], (0, 0)

    # no OCR engine → empty, but still a triple
    if pytesseract is None:
        return "", [], img.size

    # run OCR; on error return empty triple
    try:
        data = pytesseract.image_to_data(img, output_type=TessOutput.DICT)
    except Exception:
        return "", [], img.size

    tokens: List[Dict[str, Any]] = []
    for i, txt in enumerate(data["text"]):
        if not txt:
            continue
        try:
            conf = float(data.get("conf", ["-1"])[i])
        except Exception:
            conf = -1.0
        x, y, w, h = data["left"][i], data["top"][i], data["width"][i], data["height"][i]
        tokens.append({"text": txt, "conf": conf, "box": (x, y, w, h)})

    full_text = " ".join(t["text"] for t in tokens)
    return full_text, tokens, img.size


def guess_price(tokens: List[Dict[str, Any]]):
    """Pick smallest plausible price found in token texts. Returns (value, box) or (None, None)."""
    best = None
    for t in tokens:
        m = PRICE_RE.search(t["text"].replace(",", ""))
        if m:
            raw = m.group(0).replace("$", "").strip()
            try:
                val = float(raw)
                if 0.5 <= val <= 1000:
                    if best is None or val < best[0]:
                        best = (val, t["box"])
            except Exception:
                pass
    return best if best else (None, None)


def guess_title(text: str) -> str:
    """Make a short human-readable title from OCR text."""
    words = re.findall(r"[A-Za-z0-9\-]{3,}", text or "")
    return " ".join(words[:5]) if words else "Item"


def annotate_price_box(image_bytes: bytes, box: Tuple[int, int, int, int], label: str = None) -> bytes:
    """Draw a red rectangle around the detected price; return annotated PNG bytes."""
    try:
        img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
        draw = ImageDraw.Draw(img)
        if box:
            x, y, w, h = box
            draw.rectangle([x, y, x + w, y + h], outline=(255, 0, 0), width=3)
            if label:
                y0 = max(0, y - 22)
                draw.rectangle([x, y0, x + max(60, len(label) * 8), y0 + 22], fill=(255, 0, 0))
                draw.text((x + 4, y0 + 4), label, fill=(255, 255, 255))
        out = io.BytesIO()
        img.save(out, format="PNG")
        return out.getvalue()
    except Exception:
        return image_bytes