import os import shutil import cv2 import numpy as np import matplotlib.pyplot as plt import pandas as pd from paddleocr import PaddleOCR from fastapi import FastAPI, UploadFile, File from fastapi.responses import FileResponse, JSONResponse from fastapi.middleware.cors import CORSMiddleware os.environ["PADDLEOCR_HOME"] = "/app/paddleocr_models" os.environ["MPLCONFIGDIR"] = "/app/tmp/mpl_config" app = FastAPI() app.add_middleware( CORSMiddleware, allow_origins=['*'], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) ocr = PaddleOCR( det_model_dir='/app/paddleocr_models/det', rec_model_dir='/app/paddleocr_models/rec', cls_model_dir='/app/paddleocr_models/cls', use_angle_cls=True, lang='en', ) # Initialize PaddleOCR # ocr = PaddleOCR(use_angle_cls=True, lang="en", det_db_box_thresh=0.5) @app.get("/") def read_root(): return JSONResponse({ "message": "Hello World" }) # Load Image @app.post("/process_image") def load_image(file: UploadFile = File(...)): image_path = f"uploads/{file.filename}" os.makedirs("uploads", exist_ok=True) with open(image_path, "wb") as f: shutil.copyfileobj(file.file, f) image = cv2.imread(image_path) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) height, width, channels = image.shape # OCR Processing ocr_results = ocr.ocr(image_path) print(ocr_results) extracted_text = [] page = ocr_results[0] for block in page: print(block) # Lists of recognized texts and their bounding boxes texts = [line[1][0] for line in page] boxes = [np.array(line[0]) for line in page] scores = [line[1][1] for line in page] print(texts) # Zip them together text_and_boxes = list(zip(texts, boxes, scores)) final_text = list(zip(texts, scores)) # Display all results for text, box, score in text_and_boxes: print(f"Text: {text}") print(f"Bounding Box: {box.tolist()}") # Convert numpy array to regular list print(f"Score: {score}") print("---") extracted_text.append((text, score)) # Print Extracted Text print("šŸ”¹ Extracted Text from Invoice:") collected_text = "" for text, score in extracted_text: collected_text = f"{collected_text} {text}" print(f"{text} (Confidence: {score:.2f})") collected_text = collected_text.strip() # Create a simple dataframe from all OCR text all_text = [text for text, _ in extracted_text] print("\nšŸ”¹ Creating a simple data structure from all OCR text") df = pd.DataFrame({'text': all_text}) print(df.head()) # df.to_csv("invoice_extracted_text.csv", index=False) # Display Image with OCR Text Overlay plt.figure(figsize=(10, 10)) plt.imshow(image) # Draw bounding boxes and text annotations for text, box, score in text_and_boxes: # y_offset = int(0.03 * height) # 5% downward shift y_offset = 0 print(height) corrected_box = [(x, y + y_offset) for (x, y) in box] # Draw bounding box plt.plot( [corrected_box[0][0], corrected_box[1][0], corrected_box[2][0], corrected_box[3][0], corrected_box[0][0]], [corrected_box[0][1], corrected_box[1][1], corrected_box[2][1], corrected_box[3][1], corrected_box[0][1]], 'r-' ) # Add text annotation csfont = {'fontname': 'Poppins'} plt.text(corrected_box[0][0], corrected_box[0][1], text, color='blue', fontsize=8, **csfont) plt.axis("off") plt.tight_layout(pad=2.0) plt.savefig(f"uploads/result.png", bbox_inches='tight') plt.close() if os.path.exists(image_path): os.remove(image_path) return JSONResponse({ "message": "Image processed successfully", "image_path": "result.png", "extracted_text": final_text, "text": collected_text, }) @app.get("/get_image") def get_image(input_path: str): if not os.path.exists(f"uploads/{input_path}"): return JSONResponse({ "message": "Image not found" }) return FileResponse(f"uploads/{input_path}") print("\nšŸ”¹ Processing complete! Annotated image and extracted data saved.")