File size: 5,622 Bytes
d28e16a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
"""

OCR Utilities for document processing

"""

import cv2
import numpy as np
from PIL import Image

class OCRProcessor:
    """Handles OCR processing for images including handwriting detection."""
    
    def __init__(self):
        try:
            import pytesseract
            self.pytesseract = pytesseract
        except ImportError:
            self.pytesseract = None
            print("Warning: pytesseract not available")
    
    def detect_handwriting(self, image):
        """Detect if image contains handwriting."""
        try:
            # Convert PIL Image to numpy array
            img_array = np.array(image)
            
            # Convert to grayscale
            if len(img_array.shape) == 3:
                gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
            else:
                gray = img_array
            
            # Apply edge detection
            edges = cv2.Canny(gray, 50, 150)
            
            # Count edges
            edge_pixels = np.sum(edges > 0)
            total_pixels = edges.size
            edge_ratio = edge_pixels / total_pixels
            
            # Find contours
            contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
            
            # Handwriting typically has more irregular contours
            is_handwritten = edge_ratio > 0.05 and len(contours) > 20
            
            return {
                'is_handwritten': is_handwritten,
                'confidence': edge_ratio * 10,  # Normalize to 0-1 range
                'edge_ratio': edge_ratio,
                'contour_count': len(contours)
            }
        except Exception as e:
            return {
                'is_handwritten': False,
                'confidence': 0,
                'edge_ratio': 0,
                'error': str(e)
            }
    
    def extract_text(self, image, enhance=True):
        """Extract text from image using standard OCR."""
        if not self.pytesseract:
            return "OCR not available"
        
        try:
            if enhance:
                image = self._enhance_image(image)
            
            text = self.pytesseract.image_to_string(image)
            return text
        except Exception as e:
            return f"OCR error: {str(e)}"
    
    def extract_from_handwriting(self, image):
        """Extract text from handwritten image."""
        if not self.pytesseract:
            return "OCR not available"
        
        try:
            # Enhance for handwriting
            enhanced = self._enhance_for_handwriting(image)
            
            # Use specific OCR config for handwriting
            custom_config = r'--oem 3 --psm 6'
            text = self.pytesseract.image_to_string(enhanced, config=custom_config)
            return text
        except Exception as e:
            return f"Handwriting OCR error: {str(e)}"
    
    def extract_text_with_confidence(self, image):
        """Extract text with confidence scores."""
        if not self.pytesseract:
            return {'text': 'OCR not available', 'confidence': 0, 'word_count': 0}
        
        try:
            data = self.pytesseract.image_to_data(image, output_type=self.pytesseract.Output.DICT)
            
            # Filter by confidence
            text_parts = []
            confidences = []
            
            for i, conf in enumerate(data['conf']):
                if int(conf) > 30:  # Threshold
                    text_parts.append(data['text'][i])
                    confidences.append(int(conf))
            
            text = ' '.join(text_parts)
            avg_confidence = np.mean(confidences) if confidences else 0
            
            return {
                'text': text,
                'confidence': avg_confidence,
                'word_count': len(text_parts)
            }
        except Exception as e:
            return {
                'text': f"Error: {str(e)}",
                'confidence': 0,
                'word_count': 0
            }
    
    def _enhance_image(self, image):
        """Enhance image for better OCR."""
        try:
            # Convert PIL to numpy
            img_array = np.array(image)
            
            # Convert to grayscale
            if len(img_array.shape) == 3:
                gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
            else:
                gray = img_array
            
            # Apply thresholding
            _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
            
            # Convert back to PIL
            return Image.fromarray(binary)
        except Exception:
            return image
    
    def _enhance_for_handwriting(self, image):
        """Enhance image specifically for handwriting recognition."""
        try:
            img_array = np.array(image)
            
            if len(img_array.shape) == 3:
                gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
            else:
                gray = img_array
            
            # Apply adaptive thresholding for handwriting
            binary = cv2.adaptiveThreshold(
                gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2
            )
            
            # Denoise
            denoised = cv2.fastNlMeansDenoising(binary)
            
            return Image.fromarray(denoised)
        except Exception:
            return image