Spaces:
Sleeping
Sleeping
Update ocr_engine.py
Browse files- ocr_engine.py +8 -4
ocr_engine.py
CHANGED
|
@@ -2,23 +2,27 @@ import cv2
|
|
| 2 |
import pytesseract
|
| 3 |
import re
|
| 4 |
|
|
|
|
|
|
|
|
|
|
| 5 |
def extract_weight_from_image(img_cv):
|
| 6 |
# Resize to enhance detail
|
| 7 |
img = cv2.resize(img_cv, None, fx=2.0, fy=2.0, interpolation=cv2.INTER_CUBIC)
|
| 8 |
|
| 9 |
-
# Convert to grayscale
|
| 10 |
if len(img.shape) == 3:
|
| 11 |
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
| 12 |
|
| 13 |
-
#
|
| 14 |
img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
|
| 15 |
cv2.THRESH_BINARY, 11, 3)
|
| 16 |
|
| 17 |
-
# OCR
|
| 18 |
config = "--psm 6 -c tessedit_char_whitelist=0123456789."
|
|
|
|
| 19 |
text = pytesseract.image_to_string(img, config=config)
|
| 20 |
|
| 21 |
-
# Regex
|
| 22 |
matches = re.findall(r'\d+\.\d+|\d+', text)
|
| 23 |
weight = matches[0] if matches else "0.0"
|
| 24 |
confidence = 85 if matches else 0 # Simulated confidence
|
|
|
|
| 2 |
import pytesseract
|
| 3 |
import re
|
| 4 |
|
| 5 |
+
# Optional: manually set Tesseract path if needed
|
| 6 |
+
# pytesseract.pytesseract.tesseract_cmd = r"/usr/bin/tesseract"
|
| 7 |
+
|
| 8 |
def extract_weight_from_image(img_cv):
|
| 9 |
# Resize to enhance detail
|
| 10 |
img = cv2.resize(img_cv, None, fx=2.0, fy=2.0, interpolation=cv2.INTER_CUBIC)
|
| 11 |
|
| 12 |
+
# Convert to grayscale if needed
|
| 13 |
if len(img.shape) == 3:
|
| 14 |
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
| 15 |
|
| 16 |
+
# Adaptive threshold to boost OCR contrast
|
| 17 |
img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
|
| 18 |
cv2.THRESH_BINARY, 11, 3)
|
| 19 |
|
| 20 |
+
# OCR configuration to focus on digits and decimal points
|
| 21 |
config = "--psm 6 -c tessedit_char_whitelist=0123456789."
|
| 22 |
+
|
| 23 |
text = pytesseract.image_to_string(img, config=config)
|
| 24 |
|
| 25 |
+
# Regex to extract decimal or integer weight
|
| 26 |
matches = re.findall(r'\d+\.\d+|\d+', text)
|
| 27 |
weight = matches[0] if matches else "0.0"
|
| 28 |
confidence = 85 if matches else 0 # Simulated confidence
|