|
|
import cv2
|
|
|
import numpy as np
|
|
|
import pytesseract
|
|
|
from PIL import Image
|
|
|
import re
|
|
|
|
|
|
class PrescriptionOCR:
|
|
|
def __init__(self):
|
|
|
self.medication_keywords = [
|
|
|
'tablet', 'capsule', 'mg', 'ml', 'injection', 'dose',
|
|
|
'twice', 'thrice', 'daily', 'weekly', 'monthly'
|
|
|
]
|
|
|
|
|
|
def preprocess_image(self, image):
|
|
|
"""Enhanced image preprocessing for medical prescriptions"""
|
|
|
try:
|
|
|
|
|
|
img_array = np.array(image)
|
|
|
|
|
|
|
|
|
if len(img_array.shape) == 3:
|
|
|
gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
|
|
|
else:
|
|
|
gray = img_array
|
|
|
|
|
|
|
|
|
denoised = cv2.medianBlur(gray, 3)
|
|
|
|
|
|
|
|
|
_, thresh = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
|
|
|
|
|
|
|
|
kernel = np.ones((2, 2), np.uint8)
|
|
|
processed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
|
|
|
|
|
|
return processed
|
|
|
|
|
|
except Exception as e:
|
|
|
print(f"Image preprocessing error: {e}")
|
|
|
return np.array(image)
|
|
|
|
|
|
def extract_medication_info(self, text):
|
|
|
"""Extract medication information from OCR text"""
|
|
|
medications = []
|
|
|
lines = text.split('\n')
|
|
|
|
|
|
for line in lines:
|
|
|
line_clean = line.strip()
|
|
|
if any(keyword in line_clean.lower() for keyword in self.medication_keywords):
|
|
|
|
|
|
dosage_pattern = r'(\d+\s*(?:mg|ml|tablet|cap)s?)'
|
|
|
dosages = re.findall(dosage_pattern, line_clean, re.IGNORECASE)
|
|
|
|
|
|
|
|
|
freq_pattern = r'(?:once|twice|thrice|\d+\s*times)\s*(?:daily|a day|per day)'
|
|
|
frequency = re.findall(freq_pattern, line_clean, re.IGNORECASE)
|
|
|
|
|
|
medication_info = {
|
|
|
'text': line_clean,
|
|
|
'dosages': dosages,
|
|
|
'frequency': frequency[0] if frequency else 'Unknown',
|
|
|
'confidence': 'High' if dosages else 'Medium'
|
|
|
}
|
|
|
medications.append(medication_info)
|
|
|
|
|
|
return medications
|
|
|
|
|
|
def process_prescription(self, image):
|
|
|
"""Main method to process prescription and extract information"""
|
|
|
try:
|
|
|
|
|
|
processed_img = self.preprocess_image(image)
|
|
|
|
|
|
|
|
|
custom_config = r'--oem 3 --psm 6 -l eng'
|
|
|
extracted_text = pytesseract.image_to_string(processed_img, config=custom_config)
|
|
|
|
|
|
|
|
|
medications = self.extract_medication_info(extracted_text)
|
|
|
|
|
|
|
|
|
confidence = self.calculate_confidence(extracted_text, medications)
|
|
|
|
|
|
return {
|
|
|
'raw_text': extracted_text,
|
|
|
'medications': medications,
|
|
|
'confidence_score': confidence,
|
|
|
'medication_count': len(medications)
|
|
|
}
|
|
|
|
|
|
except Exception as e:
|
|
|
return {
|
|
|
'raw_text': '',
|
|
|
'medications': [],
|
|
|
'confidence_score': 0,
|
|
|
'error': str(e)
|
|
|
}
|
|
|
|
|
|
def calculate_confidence(self, text, medications):
|
|
|
"""Calculate confidence score for OCR extraction"""
|
|
|
if not text.strip():
|
|
|
return 0
|
|
|
|
|
|
|
|
|
base_score = min(100, len(text) / 10)
|
|
|
|
|
|
|
|
|
medication_bonus = len(medications) * 15
|
|
|
|
|
|
|
|
|
error_penalty = 0
|
|
|
if len(text) < 20:
|
|
|
error_penalty += 20
|
|
|
if len(re.findall(r'[^\w\s.,]', text)) > len(text) * 0.3:
|
|
|
error_penalty += 15
|
|
|
|
|
|
final_score = base_score + medication_bonus - error_penalty
|
|
|
return max(0, min(100, final_score)) |