Spaces:

nit454
/

paddle_ocr_testing

Build error

File size: 1,380 Bytes

4fedfa8
 
 
8df9245
4fedfa8
8df9245
4fedfa8
 
8df9245
 
4fedfa8
8df9245
4fedfa8
8df9245
 
4fedfa8
8df9245
4fedfa8
 
 
8df9245
 
4fedfa8
8df9245
 
4fedfa8
8df9245
 
 
4fedfa8
 
8df9245
4fedfa8
8df9245
 
 
 
4fedfa8
8df9245

import numpy as np
import random
from paddleocr import PaddleOCR
from difflib import SequenceMatcher

# Initialize PaddleOCR with English language (CPU)
ocr = PaddleOCR(use_angle_cls=True, lang='en', use_gpu=False)

def calculate_similarity(text1, text2):
    return SequenceMatcher(None, text1.lower().strip(), text2.lower().strip()).ratio()

def paddleocr_with_accuracy(image_path, correct_text):
    try:
        # Perform OCR on the image file path
        results = ocr.ocr(image_path, cls=True)

        # Extract recognized text lines
        detected_text_lines = [line[1][0] for line in results]
        detected_text = "\n".join(detected_text_lines)

        # Calculate accuracy score as similarity ratio
        accuracy = calculate_similarity(detected_text, correct_text)

        # Simulate pipeline integration score (here same as accuracy)
        pipeline_score = accuracy

        print("OCR Detected Text:\n", detected_text)
        print(f"\nAccuracy: {accuracy:.2%}")
        print(f"Pipeline Integration Score: {pipeline_score:.2%}")

    except Exception as e:
        print(f"PaddleOCR Error: {str(e)}")

# Example usage
if __name__ == "__main__":
    image_file = "your_image.jpg"   # replace with your image path
    ground_truth_text = """Enter the exact expected text from the image here."""

    paddleocr_with_accuracy(image_file, ground_truth_text)