| import os
|
| from pathlib import Path
|
|
|
| import numpy as np
|
| import requests
|
|
|
| from docgenie.generation.utils.image import img_read, img_write_to_bytes
|
| from docgenie.utils.ocr import MicrosoftOCR, MicrosoftOCRWord
|
|
|
| OCR_ENGINE = 'microsoft_di'
|
| OCR_PORT_ENV = os.getenv('DOCGENIE_OCR_PORT')
|
| OCR_PORT = OCR_PORT_ENV or '8000'
|
| OCR_URL = 'http://localhost:' + OCR_PORT
|
| OCR_POSTFIX = '0.MicrosoftOcrService.json'
|
|
|
|
|
| def get_ocr_cache_path(image_path: Path, postfix: str) -> Path:
|
| return image_path.parent / f'{image_path.name}.{postfix}'
|
|
|
|
|
| def call_ocr_service_from_image(image: np.ndarray,
|
| url: str = OCR_URL,
|
| engine: str = OCR_ENGINE,
|
| client_caching: bool = True,
|
| image_path: Path | None = None) -> MicrosoftOCR:
|
| headers = {'accept': 'application/json'}
|
|
|
| cache_path = None
|
| if client_caching:
|
| cache_path = get_ocr_cache_path(image_path, OCR_POSTFIX)
|
| if cache_path.exists():
|
| return MicrosoftOCR.load_from_file(cache_path)
|
|
|
| encoded_image = img_write_to_bytes(image)
|
| files = {'image': encoded_image, 'type': 'image/png'}
|
| endpoint = f'{url}/v1/sync/ocr/{engine}'
|
| response = requests.post(url=endpoint, headers=headers, files=files)
|
| response.raise_for_status()
|
|
|
| data = response.json()
|
| first_page = data['ocr']['pages'][0]
|
| ocr = MicrosoftOCR(
|
| angle=first_page['angle'],
|
| width=first_page['imageWidth'],
|
| height=first_page['imageHeight'],
|
| words=[
|
| MicrosoftOCRWord(
|
| text=proto['text'],
|
| confidence=proto['confidence'],
|
| geo=proto['geo']
|
| )
|
| for proto in first_page['words']
|
| ],
|
| lines=[
|
| MicrosoftOCRWord(
|
| text=proto['text'],
|
| confidence=proto['confidence'],
|
| geo=proto['geo']
|
| )
|
| for proto in first_page['lines']
|
| ],
|
| )
|
|
|
| if client_caching and cache_path:
|
| ocr.save_to_file(cache_path)
|
|
|
| return ocr
|
|
|
|
|
| def call_ocr_service_from_file(image_path: Path,
|
| url: str = OCR_URL,
|
| engine: str = OCR_ENGINE,
|
| client_caching: bool = True) -> MicrosoftOCR:
|
| if client_caching:
|
| cache_path = get_ocr_cache_path(image_path, OCR_POSTFIX)
|
| if cache_path.exists():
|
| return MicrosoftOCR.load_from_file(cache_path)
|
|
|
| image = img_read(image_path)
|
| return call_ocr_service_from_image(image, url, engine, client_caching=client_caching, image_path=image_path)
|
|
|
|
|
| if __name__ == '__main__':
|
| base_dir = Path("data/temp/OCR/test-dataset")
|
| image_file = base_dir / "04276b91-eb12-4b47-80a6-666f6d09b6ce_1.jpg"
|
|
|
|
|
| ocr: MicrosoftOCR = call_ocr_service_from_file(image_file, client_caching=True)
|
|
|
| print(ocr.words)
|
|
|