Spaces:

harshvisualz
/

PeddleOCR

Build error

File size: 4,267 Bytes

import os

import shutil
import cv2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from paddleocr import PaddleOCR
from fastapi import FastAPI, UploadFile, File
from fastapi.responses import FileResponse, JSONResponse
from fastapi.middleware.cors import CORSMiddleware

os.environ["PADDLEOCR_HOME"] = "/app/paddleocr_models"
os.environ["MPLCONFIGDIR"] = "/app/tmp/mpl_config"

app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=['*'],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

ocr = PaddleOCR(
    det_model_dir='/app/paddleocr_models/det',
    rec_model_dir='/app/paddleocr_models/rec',
    cls_model_dir='/app/paddleocr_models/cls',
    use_angle_cls=True,
    lang='en',
)


# Initialize PaddleOCR
# ocr = PaddleOCR(use_angle_cls=True, lang="en", det_db_box_thresh=0.5)

@app.get("/")
def read_root():
    return JSONResponse({
        "message": "Hello World"
    })


# Load Image
@app.post("/process_image")
def load_image(file: UploadFile = File(...)):
    image_path = f"uploads/{file.filename}"
    os.makedirs("uploads", exist_ok=True)

    with open(image_path, "wb") as f:
        shutil.copyfileobj(file.file, f)

    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    height, width, channels = image.shape

    # OCR Processing
    ocr_results = ocr.ocr(image_path)
    print(ocr_results)

    extracted_text = []
    page = ocr_results[0]
    for block in page:
        print(block)
    # Lists of recognized texts and their bounding boxes
    texts = [line[1][0] for line in page]
    boxes = [np.array(line[0]) for line in page]
    scores = [line[1][1] for line in page]
    print(texts)

    # Zip them together
    text_and_boxes = list(zip(texts, boxes, scores))
    final_text = list(zip(texts, scores))

    # Display all results
    for text, box, score in text_and_boxes:
        print(f"Text: {text}")
        print(f"Bounding Box: {box.tolist()}")  # Convert numpy array to regular list
        print(f"Score: {score}")
        print("---")
        extracted_text.append((text, score))


    # Print Extracted Text
    print("🔹 Extracted Text from Invoice:")
    collected_text = ""
    for text, score in extracted_text:
        collected_text = f"{collected_text} {text}"
        print(f"{text} (Confidence: {score:.2f})")

    collected_text = collected_text.strip()

    # Create a simple dataframe from all OCR text
    all_text = [text for text, _ in extracted_text]
    print("\n🔹 Creating a simple data structure from all OCR text")
    df = pd.DataFrame({'text': all_text})
    print(df.head())
    # df.to_csv("invoice_extracted_text.csv", index=False)

    # Display Image with OCR Text Overlay
    plt.figure(figsize=(10, 10))
    plt.imshow(image)

    # Draw bounding boxes and text annotations
    for text, box, score in text_and_boxes:

        # y_offset = int(0.03 * height)  # 5% downward shift
        y_offset = 0
        print(height)
        corrected_box = [(x, y + y_offset) for (x, y) in box]

        # Draw bounding box
        plt.plot(
            [corrected_box[0][0], corrected_box[1][0], corrected_box[2][0], corrected_box[3][0], corrected_box[0][0]],
            [corrected_box[0][1], corrected_box[1][1], corrected_box[2][1], corrected_box[3][1], corrected_box[0][1]], 'r-'
        )

        # Add text annotation
        csfont = {'fontname': 'Poppins'}
        plt.text(corrected_box[0][0], corrected_box[0][1], text, color='blue', fontsize=8, **csfont)

    plt.axis("off")
    plt.tight_layout(pad=2.0)
    plt.savefig(f"uploads/result.png", bbox_inches='tight')
    plt.close()

    if os.path.exists(image_path):
        os.remove(image_path)

    return JSONResponse({
            "message": "Image processed successfully",
            "image_path": "result.png",
            "extracted_text": final_text,
            "text": collected_text,
    })

@app.get("/get_image")
def get_image(input_path: str):
    if not os.path.exists(f"uploads/{input_path}"):
        return JSONResponse({
            "message": "Image not found"
        })
    return FileResponse(f"uploads/{input_path}")

print("\n🔹 Processing complete! Annotated image and extracted data saved.")