Suhasdev commited on
Commit
1e73641
·
1 Parent(s): e235e41

Fix: Make OCR imports lazy and add version constraints to requirements.txt

Browse files
Files changed (1) hide show
  1. ocr_strategies.py +34 -4
ocr_strategies.py CHANGED
@@ -1,9 +1,25 @@
1
- import easyocr
2
- import pytesseract
3
  from abc import ABC, abstractmethod
4
  from typing import Set
5
- import cv2
6
- import numpy as np
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  class OCRStrategy(ABC):
9
  """Abstract base class for OCR strategies."""
@@ -14,6 +30,10 @@ class OCRStrategy(ABC):
14
  class EasyOCRStrategy(OCRStrategy):
15
  """Concrete strategy for EasyOCR."""
16
  def __init__(self):
 
 
 
 
17
  # Initialize once to save memory/time
18
  print("Loading EasyOCR Model...")
19
  self.reader = easyocr.Reader(['en'], gpu=False)
@@ -24,6 +44,16 @@ class EasyOCRStrategy(OCRStrategy):
24
 
25
  class TesseractOCRStrategy(OCRStrategy):
26
  """Concrete strategy for Tesseract OCR (Free & Fast)."""
 
 
 
 
 
 
 
 
 
 
27
  def extract_text(self, image_path: str) -> Set[str]:
28
  # Preprocessing for better Tesseract accuracy
29
  img = cv2.imread(image_path)
 
 
 
1
  from abc import ABC, abstractmethod
2
  from typing import Set
3
+
4
+ # Lazy imports to handle missing dependencies gracefully
5
+ try:
6
+ import easyocr
7
+ EASYOCR_AVAILABLE = True
8
+ except ImportError:
9
+ EASYOCR_AVAILABLE = False
10
+
11
+ try:
12
+ import pytesseract
13
+ PYTESSERACT_AVAILABLE = True
14
+ except ImportError:
15
+ PYTESSERACT_AVAILABLE = False
16
+
17
+ try:
18
+ import cv2
19
+ import numpy as np
20
+ CV2_AVAILABLE = True
21
+ except ImportError:
22
+ CV2_AVAILABLE = False
23
 
24
  class OCRStrategy(ABC):
25
  """Abstract base class for OCR strategies."""
 
30
  class EasyOCRStrategy(OCRStrategy):
31
  """Concrete strategy for EasyOCR."""
32
  def __init__(self):
33
+ if not EASYOCR_AVAILABLE:
34
+ raise ImportError(
35
+ "EasyOCR is not installed. Please install it with: pip install easyocr"
36
+ )
37
  # Initialize once to save memory/time
38
  print("Loading EasyOCR Model...")
39
  self.reader = easyocr.Reader(['en'], gpu=False)
 
44
 
45
  class TesseractOCRStrategy(OCRStrategy):
46
  """Concrete strategy for Tesseract OCR (Free & Fast)."""
47
+ def __init__(self):
48
+ if not PYTESSERACT_AVAILABLE:
49
+ raise ImportError(
50
+ "pytesseract is not installed. Please install it with: pip install pytesseract"
51
+ )
52
+ if not CV2_AVAILABLE:
53
+ raise ImportError(
54
+ "opencv-python is not installed. Please install it with: pip install opencv-python-headless"
55
+ )
56
+
57
  def extract_text(self, image_path: str) -> Set[str]:
58
  # Preprocessing for better Tesseract accuracy
59
  img = cv2.imread(image_path)