File size: 4,267 Bytes
1f215d0
8f08ada
 
 
 
 
 
769c5ec
8f08ada
 
 
 
5423de8
 
1f215d0
 
 
 
 
 
 
 
 
 
 
769c5ec
5423de8
 
 
769c5ec
 
 
 
8f08ada
1f215d0
 
 
 
 
 
 
 
 
8f08ada
1f215d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
329024f
 
 
1f215d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ab9a75d
1f215d0
ab9a75d
1f215d0
 
ab9a75d
 
1f215d0
 
 
 
 
17996c7
1f215d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ab9a75d
 
1f215d0
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import os

import shutil
import cv2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from paddleocr import PaddleOCR
from fastapi import FastAPI, UploadFile, File
from fastapi.responses import FileResponse, JSONResponse
from fastapi.middleware.cors import CORSMiddleware

os.environ["PADDLEOCR_HOME"] = "/app/paddleocr_models"
os.environ["MPLCONFIGDIR"] = "/app/tmp/mpl_config"

app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=['*'],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

ocr = PaddleOCR(
    det_model_dir='/app/paddleocr_models/det',
    rec_model_dir='/app/paddleocr_models/rec',
    cls_model_dir='/app/paddleocr_models/cls',
    use_angle_cls=True,
    lang='en',
)


# Initialize PaddleOCR
# ocr = PaddleOCR(use_angle_cls=True, lang="en", det_db_box_thresh=0.5)

@app.get("/")
def read_root():
    return JSONResponse({
        "message": "Hello World"
    })


# Load Image
@app.post("/process_image")
def load_image(file: UploadFile = File(...)):
    image_path = f"uploads/{file.filename}"
    os.makedirs("uploads", exist_ok=True)

    with open(image_path, "wb") as f:
        shutil.copyfileobj(file.file, f)

    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    height, width, channels = image.shape

    # OCR Processing
    ocr_results = ocr.ocr(image_path)
    print(ocr_results)

    extracted_text = []
    page = ocr_results[0]
    for block in page:
        print(block)
    # Lists of recognized texts and their bounding boxes
    texts = [line[1][0] for line in page]
    boxes = [np.array(line[0]) for line in page]
    scores = [line[1][1] for line in page]
    print(texts)

    # Zip them together
    text_and_boxes = list(zip(texts, boxes, scores))
    final_text = list(zip(texts, scores))

    # Display all results
    for text, box, score in text_and_boxes:
        print(f"Text: {text}")
        print(f"Bounding Box: {box.tolist()}")  # Convert numpy array to regular list
        print(f"Score: {score}")
        print("---")
        extracted_text.append((text, score))


    # Print Extracted Text
    print("🔹 Extracted Text from Invoice:")
    collected_text = ""
    for text, score in extracted_text:
        collected_text = f"{collected_text} {text}"
        print(f"{text} (Confidence: {score:.2f})")

    collected_text = collected_text.strip()

    # Create a simple dataframe from all OCR text
    all_text = [text for text, _ in extracted_text]
    print("\n🔹 Creating a simple data structure from all OCR text")
    df = pd.DataFrame({'text': all_text})
    print(df.head())
    # df.to_csv("invoice_extracted_text.csv", index=False)

    # Display Image with OCR Text Overlay
    plt.figure(figsize=(10, 10))
    plt.imshow(image)

    # Draw bounding boxes and text annotations
    for text, box, score in text_and_boxes:

        # y_offset = int(0.03 * height)  # 5% downward shift
        y_offset = 0
        print(height)
        corrected_box = [(x, y + y_offset) for (x, y) in box]

        # Draw bounding box
        plt.plot(
            [corrected_box[0][0], corrected_box[1][0], corrected_box[2][0], corrected_box[3][0], corrected_box[0][0]],
            [corrected_box[0][1], corrected_box[1][1], corrected_box[2][1], corrected_box[3][1], corrected_box[0][1]], 'r-'
        )

        # Add text annotation
        csfont = {'fontname': 'Poppins'}
        plt.text(corrected_box[0][0], corrected_box[0][1], text, color='blue', fontsize=8, **csfont)

    plt.axis("off")
    plt.tight_layout(pad=2.0)
    plt.savefig(f"uploads/result.png", bbox_inches='tight')
    plt.close()

    if os.path.exists(image_path):
        os.remove(image_path)

    return JSONResponse({
            "message": "Image processed successfully",
            "image_path": "result.png",
            "extracted_text": final_text,
            "text": collected_text,
    })

@app.get("/get_image")
def get_image(input_path: str):
    if not os.path.exists(f"uploads/{input_path}"):
        return JSONResponse({
            "message": "Image not found"
        })
    return FileResponse(f"uploads/{input_path}")

print("\n🔹 Processing complete! Annotated image and extracted data saved.")