zhlajiex commited on
Commit
ed58206
·
1 Parent(s): 1856281

Feat: Support .heic image format for OCR

Browse files
Dockerfile CHANGED
@@ -21,7 +21,7 @@ ENV PATH="$VIRTUAL_ENV/bin:$PATH"
21
 
22
  # Install PaddleOCR and Dependencies
23
  # This might take a while to build/download
24
- RUN pip install --no-cache-dir paddlepaddle paddleocr
25
 
26
  # Set working directory
27
  WORKDIR /app
 
21
 
22
  # Install PaddleOCR and Dependencies
23
  # This might take a while to build/download
24
+ RUN pip install --no-cache-dir paddlepaddle paddleocr pillow pillow-heif
25
 
26
  # Set working directory
27
  WORKDIR /app
backend/services/paddle_ocr.py CHANGED
@@ -1,18 +1,23 @@
1
  import sys
2
  import os
3
  import logging
 
4
 
5
  # Disable heavy logging from Paddle
6
  os.environ['GLOG_minloglevel'] = '3'
7
 
8
  try:
9
  from paddleocr import PaddleOCR
 
 
10
  except ImportError:
11
- print("Error: paddleocr not installed. Please run 'pip install paddleocr paddlepaddle'")
12
  sys.exit(1)
13
 
 
 
 
14
  # Initialize OCR engine
15
- # lang='en' is default. use_angle_cls=True helps with rotated text
16
  ocr = PaddleOCR(use_angle_cls=True, lang='en', show_log=False)
17
 
18
  def process_image(img_path):
@@ -21,16 +26,26 @@ def process_image(img_path):
21
  sys.exit(1)
22
 
23
  try:
24
- result = ocr.ocr(img_path, cls=True)
 
 
 
 
 
 
 
 
 
 
 
 
25
  full_text = []
26
 
27
- # Result structure: [ [ [ [x1,y1],[x2,y2].. ], ("text", conf) ], ... ]
28
  if result and result[0]:
29
  for line in result[0]:
30
  text = line[1][0]
31
  full_text.append(text)
32
 
33
- # Output strictly the text to stdout for Node.js to capture
34
  print("\n".join(full_text))
35
 
36
  except Exception as e:
@@ -42,4 +57,4 @@ if __name__ == "__main__":
42
  print("Usage: python paddle_ocr.py <image_path>")
43
  sys.exit(1)
44
 
45
- process_image(sys.argv[1])
 
1
  import sys
2
  import os
3
  import logging
4
+ import numpy as np
5
 
6
  # Disable heavy logging from Paddle
7
  os.environ['GLOG_minloglevel'] = '3'
8
 
9
  try:
10
  from paddleocr import PaddleOCR
11
+ from PIL import Image
12
+ import pillow_heif
13
  except ImportError:
14
+ print("Error: Missing dependencies. Run 'pip install paddleocr paddlepaddle pillow pillow-heif'")
15
  sys.exit(1)
16
 
17
+ # Register HEIC opener
18
+ pillow_heif.register_heif_opener()
19
+
20
  # Initialize OCR engine
 
21
  ocr = PaddleOCR(use_angle_cls=True, lang='en', show_log=False)
22
 
23
  def process_image(img_path):
 
26
  sys.exit(1)
27
 
28
  try:
29
+ # Check extension for HEIC handling
30
+ ext = os.path.splitext(img_path)[1].lower()
31
+
32
+ img_input = img_path
33
+
34
+ # If HEIC, load with Pillow and convert to numpy array (RGB)
35
+ if ext == '.heic':
36
+ image = Image.open(img_path)
37
+ if image.mode != 'RGB':
38
+ image = image.convert('RGB')
39
+ img_input = np.array(image)
40
+
41
+ result = ocr.ocr(img_input, cls=True)
42
  full_text = []
43
 
 
44
  if result and result[0]:
45
  for line in result[0]:
46
  text = line[1][0]
47
  full_text.append(text)
48
 
 
49
  print("\n".join(full_text))
50
 
51
  except Exception as e:
 
57
  print("Usage: python paddle_ocr.py <image_path>")
58
  sys.exit(1)
59
 
60
+ process_image(sys.argv[1])
backend/utils/fileProcessor.js CHANGED
@@ -11,7 +11,7 @@ exports.processFile = async (filePath) => {
11
  let content = '';
12
 
13
  try {
14
- if (['.png', '.jpg', '.jpeg', '.webp'].includes(ext)) {
15
  // Process image with Local PaddleOCR
16
  console.log(`[PaddleOCR] Initiating local analysis for: ${fileName}...`);
17
 
 
11
  let content = '';
12
 
13
  try {
14
+ if (['.png', '.jpg', '.jpeg', '.webp', '.heic'].includes(ext)) {
15
  // Process image with Local PaddleOCR
16
  console.log(`[PaddleOCR] Initiating local analysis for: ${fileName}...`);
17