Rammohan0504 commited on
Commit
b272a68
·
verified ·
1 Parent(s): 158e677

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +8 -4
ocr_engine.py CHANGED
@@ -2,23 +2,27 @@ import cv2
2
  import pytesseract
3
  import re
4
 
 
 
 
5
  def extract_weight_from_image(img_cv):
6
  # Resize to enhance detail
7
  img = cv2.resize(img_cv, None, fx=2.0, fy=2.0, interpolation=cv2.INTER_CUBIC)
8
 
9
- # Convert to grayscale
10
  if len(img.shape) == 3:
11
  img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
12
 
13
- # Apply adaptive threshold
14
  img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
15
  cv2.THRESH_BINARY, 11, 3)
16
 
17
- # OCR config
18
  config = "--psm 6 -c tessedit_char_whitelist=0123456789."
 
19
  text = pytesseract.image_to_string(img, config=config)
20
 
21
- # Regex match for weight
22
  matches = re.findall(r'\d+\.\d+|\d+', text)
23
  weight = matches[0] if matches else "0.0"
24
  confidence = 85 if matches else 0 # Simulated confidence
 
2
  import pytesseract
3
  import re
4
 
5
+ # Optional: manually set Tesseract path if needed
6
+ # pytesseract.pytesseract.tesseract_cmd = r"/usr/bin/tesseract"
7
+
8
  def extract_weight_from_image(img_cv):
9
  # Resize to enhance detail
10
  img = cv2.resize(img_cv, None, fx=2.0, fy=2.0, interpolation=cv2.INTER_CUBIC)
11
 
12
+ # Convert to grayscale if needed
13
  if len(img.shape) == 3:
14
  img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
15
 
16
+ # Adaptive threshold to boost OCR contrast
17
  img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
18
  cv2.THRESH_BINARY, 11, 3)
19
 
20
+ # OCR configuration to focus on digits and decimal points
21
  config = "--psm 6 -c tessedit_char_whitelist=0123456789."
22
+
23
  text = pytesseract.image_to_string(img, config=config)
24
 
25
+ # Regex to extract decimal or integer weight
26
  matches = re.findall(r'\d+\.\d+|\d+', text)
27
  weight = matches[0] if matches else "0.0"
28
  confidence = 85 if matches else 0 # Simulated confidence