Spaces:
Sleeping
Sleeping
| from paddleocr import PaddleOCR | |
| import cv2 | |
| def ocr_with_paddle(img_path): | |
| finaltext = '' | |
| ocr = PaddleOCR(lang='en', use_angle_cls=True) | |
| # Read the image using OpenCV (this is just one way of loading the image) | |
| img = cv2.imread(img_path) | |
| # Run OCR on the image | |
| result = ocr.ocr(img) | |
| # Extract text from the OCR result | |
| for line in result[0]: # iterate through the detected lines | |
| for word_info in line: | |
| # Check if word_info[1] is a list (the first element of word_info should be the text) | |
| if isinstance(word_info[1], list): | |
| text = word_info[1][0] | |
| text=str(text) # The recognized text is in the second element (index 1) | |
| finaltext += text + ' ' # Append each detected word followed by a space | |
| else: | |
| # If word_info[1] is not a list (e.g., if it's a float), skip or handle the case | |
| print(f"Skipping invalid word_info: {word_info}") | |
| return finaltext.strip() # return the cleaned final text | |
| # Example usage: | |
| img_path = 'invoice-c56a1861.png' | |
| text = ocr_with_paddle(img_path) | |
| print(text) | |