File size: 637 Bytes
a72da64
e17aca4
 
 
 
a72da64
 
e17aca4
a72da64
 
e17aca4
 
a72da64
 
e17aca4
a72da64
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import os
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"

from paddleocr import PaddleOCR

ocr = PaddleOCR(
    use_angle_cls=False,     
    lang='en',
    use_gpu=False,
    show_log=False,
    rec_batch_num=1,          
)


def run_ocr(image_path: str):
    result = ocr.ocr(image_path, cls=True)

    extracted_text = []
    for line in result:
        for word_info in line:
            text = word_info[1][0]
            confidence = float(word_info[1][1])
            extracted_text.append({
                "text": text,
                "confidence": confidence
            })

    return extracted_text