added tesseract checks and debian install
Browse files- create_interest_areas_from_image.py +13 -4
- packages.txt +4 -4
create_interest_areas_from_image.py
CHANGED
|
@@ -3,19 +3,28 @@ import pandas as pd
|
|
| 3 |
import io
|
| 4 |
import csv
|
| 5 |
import os
|
|
|
|
| 6 |
|
| 7 |
if os.environ.get('TESSDATA_PREFIX') is None and os.name == 'nt':
|
| 8 |
-
os.environ['TESSDATA_PREFIX'] = 'C:/Program Files/Tesseract-OCR/tessdata/'
|
| 9 |
tessdata_prefix = 'C:/Program Files/Tesseract-OCR/tessdata/'
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
if os.environ.get('TESSDATA_PREFIX') is None and os.name != 'nt':
|
| 11 |
-
os.environ['TESSDATA_PREFIX'] = '/usr/share/tesseract-ocr/4.00/tessdata'
|
| 12 |
tessdata_prefix = '/usr/share/tesseract-ocr/4.00/tessdata'
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
import pytesseract
|
| 15 |
if os.name == 'nt':
|
| 16 |
-
|
|
|
|
| 17 |
else:
|
| 18 |
-
|
|
|
|
| 19 |
|
| 20 |
def recognize_text(image_path, tesseract_config='--psm 6 -l spa'):
|
| 21 |
"""
|
|
|
|
| 3 |
import io
|
| 4 |
import csv
|
| 5 |
import os
|
| 6 |
+
from pathlib import Path
|
| 7 |
|
| 8 |
if os.environ.get('TESSDATA_PREFIX') is None and os.name == 'nt':
|
|
|
|
| 9 |
tessdata_prefix = 'C:/Program Files/Tesseract-OCR/tessdata/'
|
| 10 |
+
if Path(tessdata_prefix).exists():
|
| 11 |
+
os.environ['TESSDATA_PREFIX'] = 'C:/Program Files/Tesseract-OCR/tessdata/'
|
| 12 |
+
else:
|
| 13 |
+
tessdata_prefix = None
|
| 14 |
if os.environ.get('TESSDATA_PREFIX') is None and os.name != 'nt':
|
|
|
|
| 15 |
tessdata_prefix = '/usr/share/tesseract-ocr/4.00/tessdata'
|
| 16 |
+
if Path(tessdata_prefix).exists():
|
| 17 |
+
os.environ['TESSDATA_PREFIX'] = '/usr/share/tesseract-ocr/4.00/tessdata'
|
| 18 |
+
else:
|
| 19 |
+
tessdata_prefix = None
|
| 20 |
|
| 21 |
import pytesseract
|
| 22 |
if os.name == 'nt':
|
| 23 |
+
if Path(r'c:/Program Files/Tesseract-OCR/tesseract.exe').exists():
|
| 24 |
+
pytesseract.pytesseract.tesseract_cmd = r'c:/Program Files/Tesseract-OCR/tesseract.exe'
|
| 25 |
else:
|
| 26 |
+
if Path(r'/usr/bin/tesseract').exists():
|
| 27 |
+
pytesseract.pytesseract.tesseract_cmd =r'/usr/bin/tesseract'
|
| 28 |
|
| 29 |
def recognize_text(image_path, tesseract_config='--psm 6 -l spa'):
|
| 30 |
"""
|
packages.txt
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
-
build-essential
|
| 2 |
-
curl
|
| 3 |
-
software-properties-common
|
| 4 |
libcairo2-dev
|
| 5 |
tesseract-ocr
|
| 6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
libcairo2-dev
|
| 2 |
tesseract-ocr
|
| 3 |
+
libtesseract-dev
|
| 4 |
+
tesseract-ocr-eng
|
| 5 |
+
tesseract-ocr-spa
|
| 6 |
+
tesseract-ocr-script-latn
|