mohamed12ahmed commited on
Commit
ff90c28
·
verified ·
1 Parent(s): 03c1817

Update ocr_processing.py

Browse files
Files changed (1) hide show
  1. ocr_processing.py +22 -2
ocr_processing.py CHANGED
@@ -32,16 +32,36 @@ def correct_lighting(image):
32
 
33
  # OCR Extraction using EasyOCR
34
  def extract_text(image, langs=['en']):
35
- reader = easyocr.Reader(langs, gpu=False) # GPU=True لو عندك دعم CUDA
36
- results = reader.readtext(image, detail=0)
37
  text = "\n".join(results)
38
  return text
39
 
 
 
 
 
 
 
 
 
 
 
 
40
  # Full pipeline
41
  def process_image(file, langs=['en']):
42
  img = Image.open(file).convert('RGB')
43
  img_cv = np.array(img)
 
 
44
  img_cv = deskew(img_cv)
 
 
45
  img_cv = correct_lighting(img_cv)
 
 
 
 
 
46
  text = extract_text(img_cv, langs)
47
  return text
 
32
 
33
  # OCR Extraction using EasyOCR
34
  def extract_text(image, langs=['en']):
35
+ reader = easyocr.Reader(langs, gpu=False)
36
+ results = reader.readtext(image, detail=0, paragraph=True)
37
  text = "\n".join(results)
38
  return text
39
 
40
+
41
+ def enhance_for_ocr(image):
42
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
43
+ # Adaptive Threshold
44
+ th = cv2.adaptiveThreshold(
45
+ gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
46
+ cv2.THRESH_BINARY, 15, 10
47
+ )
48
+ return th
49
+
50
+
51
  # Full pipeline
52
  def process_image(file, langs=['en']):
53
  img = Image.open(file).convert('RGB')
54
  img_cv = np.array(img)
55
+
56
+ # 1. تصحيح الانحراف
57
  img_cv = deskew(img_cv)
58
+
59
+ # 2. تصحيح الإضاءة
60
  img_cv = correct_lighting(img_cv)
61
+
62
+ # 3. تحسين للنصوص
63
+ img_cv = enhance_for_ocr(img_cv)
64
+
65
+ # 4. استخراج النصوص
66
  text = extract_text(img_cv, langs)
67
  return text