import numpy as np import random from paddleocr import PaddleOCR from difflib import SequenceMatcher # Initialize PaddleOCR with English language (CPU) ocr = PaddleOCR(use_angle_cls=True, lang='en', use_gpu=False) def calculate_similarity(text1, text2): return SequenceMatcher(None, text1.lower().strip(), text2.lower().strip()).ratio() def paddleocr_with_accuracy(image_path, correct_text): try: # Perform OCR on the image file path results = ocr.ocr(image_path, cls=True) # Extract recognized text lines detected_text_lines = [line[1][0] for line in results] detected_text = "\n".join(detected_text_lines) # Calculate accuracy score as similarity ratio accuracy = calculate_similarity(detected_text, correct_text) # Simulate pipeline integration score (here same as accuracy) pipeline_score = accuracy print("OCR Detected Text:\n", detected_text) print(f"\nAccuracy: {accuracy:.2%}") print(f"Pipeline Integration Score: {pipeline_score:.2%}") except Exception as e: print(f"PaddleOCR Error: {str(e)}") # Example usage if __name__ == "__main__": image_file = "your_image.jpg" # replace with your image path ground_truth_text = """Enter the exact expected text from the image here.""" paddleocr_with_accuracy(image_file, ground_truth_text)