Spaces:

theDavidGuy
/

snipebargain

Sleeping

App Files Files Community

snipebargain / ocr_utils.py

theDavidGuy

Update ocr_utils.py

ba9dd6c verified 5 months ago

raw

history blame contribute delete

3.04 kB

	import io, re
	from typing import List, Dict, Tuple, Any
	from PIL import Image, ImageDraw

	try:
	import pytesseract
	from pytesseract import Output as TessOutput
	except Exception:
	pytesseract = None
	TessOutput = None

	PRICE_RE = re.compile(r"(\$\s*\d{1,4}(?:\.\d{2})?\|\b\d{1,4}\.\d{2}\b)")

	def ocr_image(image_bytes: bytes):
	"""
	ALWAYS return (full_text, tokens, size) even if OCR fails.
	- full_text: str
	- tokens: list of {'text','conf','box'}
	- size: (width, height)
	"""
	# load image safely
	try:
	img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
	except Exception:
	return "", [], (0, 0)

	# no OCR engine → empty, but still a triple
	if pytesseract is None:
	return "", [], img.size

	# run OCR; on error return empty triple
	try:
	data = pytesseract.image_to_data(img, output_type=TessOutput.DICT)
	except Exception:
	return "", [], img.size

	tokens: List[Dict[str, Any]] = []
	for i, txt in enumerate(data["text"]):
	if not txt:
	continue
	try:
	conf = float(data.get("conf", ["-1"])[i])
	except Exception:
	conf = -1.0
	x, y, w, h = data["left"][i], data["top"][i], data["width"][i], data["height"][i]
	tokens.append({"text": txt, "conf": conf, "box": (x, y, w, h)})

	full_text = " ".join(t["text"] for t in tokens)
	return full_text, tokens, img.size


	def guess_price(tokens: List[Dict[str, Any]]):
	"""Pick smallest plausible price found in token texts. Returns (value, box) or (None, None)."""
	best = None
	for t in tokens:
	m = PRICE_RE.search(t["text"].replace(",", ""))
	if m:
	raw = m.group(0).replace("$", "").strip()
	try:
	val = float(raw)
	if 0.5 <= val <= 1000:
	if best is None or val < best[0]:
	best = (val, t["box"])
	except Exception:
	pass
	return best if best else (None, None)


	def guess_title(text: str) -> str:
	"""Make a short human-readable title from OCR text."""
	words = re.findall(r"[A-Za-z0-9\-]{3,}", text or "")
	return " ".join(words[:5]) if words else "Item"


	def annotate_price_box(image_bytes: bytes, box: Tuple[int, int, int, int], label: str = None) -> bytes:
	"""Draw a red rectangle around the detected price; return annotated PNG bytes."""
	try:
	img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
	draw = ImageDraw.Draw(img)
	if box:
	x, y, w, h = box
	draw.rectangle([x, y, x + w, y + h], outline=(255, 0, 0), width=3)
	if label:
	y0 = max(0, y - 22)
	draw.rectangle([x, y0, x + max(60, len(label) * 8), y0 + 22], fill=(255, 0, 0))
	draw.text((x + 4, y0 + 4), label, fill=(255, 255, 255))
	out = io.BytesIO()
	img.save(out, format="PNG")
	return out.getvalue()
	except Exception:
	return image_bytes