File size: 1,335 Bytes
affeefc
 
f73e9d3
affeefc
 
 
 
 
4e64270
f73e9d3
4e64270
f73e9d3
 
4e64270
f73e9d3
4e64270
f73e9d3
4e64270
affeefc
 
 
f73e9d3
affeefc
 
 
 
f73e9d3
affeefc
f73e9d3
 
affeefc
f73e9d3
 
4e64270
affeefc
f73e9d3
affeefc
 
 
 
 
 
4e64270
affeefc
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
from fastapi import FastAPI, UploadFile, File
from paddleocr import PaddleOCR
from PIL import Image
import numpy as np
import io

app = FastAPI()

# ---------------------------------------------------------
# 🧠 LOAD MODEL - STANDARD CONFIGURATION
# ---------------------------------------------------------
# We use the defaults here because they are generally more robust
# for standard invoices than the aggressive "High Precision" settings.
ocr = PaddleOCR(
    use_angle_cls=True,  # Keep this True to handle rotated pages
    lang='en', 
    use_gpu=False
)

@app.get("/")
def home():
    return {"status": "Standard OCR Ready"}

@app.post("/ocr")
async def get_ocr(file: UploadFile = File(...)):
    try:
        # 1. Read image
        content = await file.read()
        image = Image.open(io.BytesIO(content)).convert("RGB")
        img_array = np.array(image)
        
        # 2. Run OCR (Standard Mode)
        # cls=True ensures we check for rotation
        result = ocr.ocr(img_array, cls=True)
        
        # 3. Extract text
        full_text = ""
        if result and result[0]:
            text_lines = [line[1][0] for line in result[0]]
            full_text = "\n".join(text_lines)
        
        return {"text": full_text}
        
    except Exception as e:
        return {"text": "", "error": str(e)}