Rammohan0504 commited on
Commit
deda0a5
·
verified ·
1 Parent(s): 03efb8e

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +9 -15
ocr_engine.py CHANGED
@@ -1,29 +1,23 @@
1
  import cv2
2
- import pytesseract
3
  import re
4
 
5
- # Optional: manually set Tesseract path if needed
6
- # pytesseract.pytesseract.tesseract_cmd = r"/usr/bin/tesseract"
7
 
8
  def extract_weight_from_image(img_cv):
9
- # Resize to enhance detail
10
  img = cv2.resize(img_cv, None, fx=2.0, fy=2.0, interpolation=cv2.INTER_CUBIC)
11
 
12
  # Convert to grayscale if needed
13
  if len(img.shape) == 3:
14
  img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
15
 
16
- # Adaptive threshold to boost OCR contrast
17
- img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
18
- cv2.THRESH_BINARY, 11, 3)
19
 
20
- # OCR configuration to focus on digits and decimal points
21
- config = "--psm 6 -c tessedit_char_whitelist=0123456789."
22
 
23
- text = pytesseract.image_to_string(img, config=config)
24
-
25
- # Regex to extract decimal or integer weight
26
- matches = re.findall(r'\d+\.\d+|\d+', text)
27
  weight = matches[0] if matches else "0.0"
28
- confidence = 85 if matches else 0 # Simulated confidence
29
- return weight, confidence, text
 
 
1
  import cv2
2
+ import easyocr
3
  import re
4
 
5
+ reader = easyocr.Reader(['en'], gpu=False)
 
6
 
7
  def extract_weight_from_image(img_cv):
8
+ # Resize to help with clarity
9
  img = cv2.resize(img_cv, None, fx=2.0, fy=2.0, interpolation=cv2.INTER_CUBIC)
10
 
11
  # Convert to grayscale if needed
12
  if len(img.shape) == 3:
13
  img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
14
 
15
+ results = reader.readtext(img)
 
 
16
 
17
+ raw_text = " ".join([text for _, text, _ in results])
18
+ matches = re.findall(r'\d+\.\d+|\d+', raw_text)
19
 
 
 
 
 
20
  weight = matches[0] if matches else "0.0"
21
+ confidence = int(results[0][2] * 100) if results else 0
22
+
23
+ return weight, confidence, raw_text