PeddleOCR / app.py
harshvisualz's picture
Add application file
17996c7
import os
import shutil
import cv2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from paddleocr import PaddleOCR
from fastapi import FastAPI, UploadFile, File
from fastapi.responses import FileResponse, JSONResponse
from fastapi.middleware.cors import CORSMiddleware
os.environ["PADDLEOCR_HOME"] = "/app/paddleocr_models"
os.environ["MPLCONFIGDIR"] = "/app/tmp/mpl_config"
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=['*'],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
ocr = PaddleOCR(
det_model_dir='/app/paddleocr_models/det',
rec_model_dir='/app/paddleocr_models/rec',
cls_model_dir='/app/paddleocr_models/cls',
use_angle_cls=True,
lang='en',
)
# Initialize PaddleOCR
# ocr = PaddleOCR(use_angle_cls=True, lang="en", det_db_box_thresh=0.5)
@app.get("/")
def read_root():
return JSONResponse({
"message": "Hello World"
})
# Load Image
@app.post("/process_image")
def load_image(file: UploadFile = File(...)):
image_path = f"uploads/{file.filename}"
os.makedirs("uploads", exist_ok=True)
with open(image_path, "wb") as f:
shutil.copyfileobj(file.file, f)
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
height, width, channels = image.shape
# OCR Processing
ocr_results = ocr.ocr(image_path)
print(ocr_results)
extracted_text = []
page = ocr_results[0]
for block in page:
print(block)
# Lists of recognized texts and their bounding boxes
texts = [line[1][0] for line in page]
boxes = [np.array(line[0]) for line in page]
scores = [line[1][1] for line in page]
print(texts)
# Zip them together
text_and_boxes = list(zip(texts, boxes, scores))
final_text = list(zip(texts, scores))
# Display all results
for text, box, score in text_and_boxes:
print(f"Text: {text}")
print(f"Bounding Box: {box.tolist()}") # Convert numpy array to regular list
print(f"Score: {score}")
print("---")
extracted_text.append((text, score))
# Print Extracted Text
print("🔹 Extracted Text from Invoice:")
collected_text = ""
for text, score in extracted_text:
collected_text = f"{collected_text} {text}"
print(f"{text} (Confidence: {score:.2f})")
collected_text = collected_text.strip()
# Create a simple dataframe from all OCR text
all_text = [text for text, _ in extracted_text]
print("\n🔹 Creating a simple data structure from all OCR text")
df = pd.DataFrame({'text': all_text})
print(df.head())
# df.to_csv("invoice_extracted_text.csv", index=False)
# Display Image with OCR Text Overlay
plt.figure(figsize=(10, 10))
plt.imshow(image)
# Draw bounding boxes and text annotations
for text, box, score in text_and_boxes:
# y_offset = int(0.03 * height) # 5% downward shift
y_offset = 0
print(height)
corrected_box = [(x, y + y_offset) for (x, y) in box]
# Draw bounding box
plt.plot(
[corrected_box[0][0], corrected_box[1][0], corrected_box[2][0], corrected_box[3][0], corrected_box[0][0]],
[corrected_box[0][1], corrected_box[1][1], corrected_box[2][1], corrected_box[3][1], corrected_box[0][1]], 'r-'
)
# Add text annotation
csfont = {'fontname': 'Poppins'}
plt.text(corrected_box[0][0], corrected_box[0][1], text, color='blue', fontsize=8, **csfont)
plt.axis("off")
plt.tight_layout(pad=2.0)
plt.savefig(f"uploads/result.png", bbox_inches='tight')
plt.close()
if os.path.exists(image_path):
os.remove(image_path)
return JSONResponse({
"message": "Image processed successfully",
"image_path": "result.png",
"extracted_text": final_text,
"text": collected_text,
})
@app.get("/get_image")
def get_image(input_path: str):
if not os.path.exists(f"uploads/{input_path}"):
return JSONResponse({
"message": "Image not found"
})
return FileResponse(f"uploads/{input_path}")
print("\n🔹 Processing complete! Annotated image and extracted data saved.")