Spaces:
Sleeping
Sleeping
File size: 2,242 Bytes
e84d7bb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
import os
import easyocr
import logging
logging_str = "[%(asctime)s: %(levelname)s: %(module)s]: %(message)s"
log_dir = "logs"
os.makedirs(log_dir, exist_ok=True)
logging.basicConfig(filename=os.path.join(log_dir,"ekyc_logs.log"), level=logging.INFO, format=logging_str, filemode="a")
# def extract_text(image_path, confidence_threshold=0.8):
# # Initialize EasyOCR reader
# reader = easyocr.Reader(['en'])
# # Read the image and extract text
# result = reader.readtext(image_path)
# # Filter the extracted text based on confidence score
# filtered_texts = {}
# for text in result:
# bounding_box, recognized_text, confidence = text
# if confidence > confidence_threshold:
# filtered_texts[recognized_text] = bounding_box
# return filtered_texts
def extract_text(image_path, confidence_threshold=0.3, languages=['en']):
"""
Extracts and filters text from an image using OCR, based on a confidence threshold.
Parameters:
- image_path (str): Path to the image file.
- confidence_threshold (float): Minimum confidence for text inclusion. Default is 0.3.
- languages (list): OCR languages. Default is ['en'].
Returns:
- str: Filtered text separated by '|' if confidence is met, otherwise an empty string.
Raises:
- Exception: Outputs error message if OCR processing fails.
"""
logging.info("Text Extraction Started...")
# Initialize EasyOCR reader
reader = easyocr.Reader(languages)
try:
logging.info("Inside Try-Catch...")
# Read the image and extract text
result = reader.readtext(image_path)
filtered_text = "|" # Initialize an empty string to store filtered text
for text in result:
bounding_box, recognized_text, confidence = text
if confidence > confidence_threshold:
filtered_text += recognized_text + "|" # Append filtered text with newline
return filtered_text
except Exception as e:
print("An error occurred during text extraction:", e)
logging.info(f"An error occurred during text extraction: {e}")
return ""
# Filter the extracted text based on confidence score
|