import numpy as np
import random
from paddleocr import PaddleOCR
from difflib import SequenceMatcher

# Initialize PaddleOCR with English language (CPU)
ocr = PaddleOCR(use_angle_cls=True, lang='en', use_gpu=False)

def calculate_similarity(text1, text2):
    return SequenceMatcher(None, text1.lower().strip(), text2.lower().strip()).ratio()

def paddleocr_with_accuracy(image_path, correct_text):
    try:
        # Perform OCR on the image file path
        results = ocr.ocr(image_path, cls=True)

        # Extract recognized text lines
        detected_text_lines = [line[1][0] for line in results]
        detected_text = "\n".join(detected_text_lines)

        # Calculate accuracy score as similarity ratio
        accuracy = calculate_similarity(detected_text, correct_text)

        # Simulate pipeline integration score (here same as accuracy)
        pipeline_score = accuracy

        print("OCR Detected Text:\n", detected_text)
        print(f"\nAccuracy: {accuracy:.2%}")
        print(f"Pipeline Integration Score: {pipeline_score:.2%}")

    except Exception as e:
        print(f"PaddleOCR Error: {str(e)}")

# Example usage
if __name__ == "__main__":
    image_file = "your_image.jpg"   # replace with your image path
    ground_truth_text = """Enter the exact expected text from the image here."""

    paddleocr_with_accuracy(image_file, ground_truth_text)