import cv2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from paddleocr import PaddleOCR

# Initialize PaddleOCR
# ocr = PaddleOCR(use_angle_cls=True, lang="en", det_db_box_thresh=0.5)
ocr = PaddleOCR(use_angle_cls=True, lang='en')

# Load Image
image_path = "image.png"  # Replace with your vendor statement
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
height, width, channels = image.shape

# OCR Processing
ocr_results = ocr.ocr(image_path)
print(ocr_results)

extracted_text = []
page = ocr_results[0]
for block in page:
    print(block)
# Lists of recognized texts and their bounding boxes
texts = page['rec_texts']
boxes = page['dt_polys']
scores = page['rec_scores']
print(texts)

# Zip them together
text_and_boxes = list(zip(texts, boxes, scores))

# Display all results
for text, box, score in text_and_boxes:
    print(f"Text: {text}")
    print(f"Bounding Box: {box.tolist()}")  # Convert numpy array to regular list
    print(f"Score: {score}")
    print("---")
    extracted_text.append((text, score))


# Print Extracted Text
print("🔹 Extracted Text from Invoice:")
for text, score in extracted_text:
    print(f"{text} (Confidence: {score:.2f})")

# Create a simple dataframe from all OCR text
all_text = [text for text, _ in extracted_text]
print("\n🔹 Creating a simple data structure from all OCR text")
df = pd.DataFrame({'text': all_text})
print(df.head())
df.to_csv("invoice_extracted_text.csv", index=False)

# Display Image with OCR Text Overlay
plt.figure(figsize=(10, 10))
plt.imshow(image)
# Add text annotations


for text, box, score in text_and_boxes:

    # y_offset = int(0.03 * height)  # 5% downward shift
    y_offset = 0
    print(height)
    corrected_box = [(x, y + y_offset) for (x, y) in box]

    # Draw bounding box
    plt.plot(
        [corrected_box[0][0], corrected_box[1][0], corrected_box[2][0], corrected_box[3][0], corrected_box[0][0]],
        [corrected_box[0][1], corrected_box[1][1], corrected_box[2][1], corrected_box[3][1], corrected_box[0][1]], 'r-'
    )

    # Add text annotation
    csfont = {'fontname': 'Poppins'}
    plt.text(corrected_box[0][0], corrected_box[0][1], text, color='blue', fontsize=8, **csfont)

plt.axis("off")
plt.tight_layout()
plt.savefig("s3.png", bbox_inches='tight')
plt.show()

print("\n🔹 Processing complete! Annotated image and extracted data saved.")