File size: 12,609 Bytes
97960a9
0dc0b49
97960a9
 
 
 
0dc0b49
 
8861d9a
 
 
 
 
793cd17
 
97960a9
0dc0b49
97960a9
 
0dc0b49
 
97960a9
 
0dc0b49
 
 
793cd17
0dc0b49
 
 
 
 
 
 
 
97960a9
0dc0b49
97960a9
0dc0b49
 
 
97960a9
0dc0b49
 
 
 
 
 
 
 
f93fee0
 
0dc0b49
 
 
 
 
 
 
 
 
 
 
793cd17
0dc0b49
 
97960a9
0dc0b49
 
bc84a55
0dc0b49
bc84a55
 
793cd17
0dc0b49
 
793cd17
0dc0b49
 
97960a9
 
bc84a55
0dc0b49
 
 
 
 
bc84a55
0dc0b49
 
bc84a55
0dc0b49
 
 
bc84a55
0d0944b
793cd17
bc84a55
793cd17
 
0dc0b49
 
bc84a55
 
 
 
 
0dc0b49
 
 
bc84a55
0dc0b49
 
8861d9a
0dc0b49
 
 
 
 
 
 
 
bc84a55
0dc0b49
8861d9a
 
 
0dc0b49
8861d9a
 
f93fee0
 
 
8861d9a
bc84a55
 
 
 
4b3e4fe
8861d9a
bc84a55
8861d9a
0dc0b49
bc84a55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
acd1b3a
 
 
 
 
 
 
 
 
 
 
 
 
bc84a55
 
 
 
 
 
f93fee0
bc84a55
acd1b3a
bc84a55
 
 
 
 
 
 
 
f93fee0
8861d9a
bc84a55
 
8861d9a
bc84a55
 
 
 
 
 
 
0dc0b49
bc84a55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0dc0b49
bc84a55
 
 
0dc0b49
 
bc84a55
97960a9
0dc0b49
 
bc84a55
0dc0b49
 
 
bc84a55
 
0dc0b49
 
bc84a55
0dc0b49
 
 
bc84a55
 
0dc0b49
 
bc84a55
0dc0b49
 
 
97960a9
0dc0b49
 
bc84a55
0dc0b49
 
bc84a55
0dc0b49
 
793cd17
0dc0b49
 
793cd17
0dc0b49
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
import cv2
import json
import numpy as np
import matplotlib.pyplot as plt
from ultralytics import YOLO
from paddleocr import PaddleOCR
from huggingface_hub import hf_hub_download
import os
import logging

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Suppress ultralytics verbose output
logging.getLogger("ultralytics").setLevel(logging.WARNING)

# Load configuration
def load_config(config_path="config.json"):
    if not os.path.exists(config_path):
        config_path = hf_hub_download(repo_id="logasanjeev/indian-id-validator", filename="config.json")
    with open(config_path, "r") as f:
        return json.load(f)

CONFIG = load_config()

# Initialize PaddleOCR
OCR = PaddleOCR(use_angle_cls=True, lang="en", show_log=False)

# Preprocessing functions
def upscale_image(image, scale=2):
    """Upscales the image to improve OCR accuracy."""
    return cv2.resize(image, (image.shape[1] * scale, image.shape[0] * scale), interpolation=cv2.INTER_CUBIC)

def unblur_image(image):
    """Sharpens the image to reduce blurriness."""
    kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
    return cv2.filter2D(image, -1, kernel)

def denoise_image(image):
    """Removes noise using Non-Local Means Denoising."""
    return cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)

def enhance_contrast(image):
    """Enhances contrast using CLAHE."""
    lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
    l = clahe.apply(l)
    return cv2.cvtColor(cv2.merge((l, a, b)), cv2.COLOR_LAB2BGR)

def preprocess_image(image):
    """Applies all preprocessing steps."""
    if isinstance(image, str):
        image = cv2.imread(image)
    if image is None or not isinstance(image, np.ndarray):
        raise ValueError("Invalid image input. Provide a valid file path or numpy array.")
    image = upscale_image(image, scale=2)
    image = unblur_image(image)
    image = denoise_image(image)
    image = enhance_contrast(image)
    return image

# Core inference function
def process_id(image_path, model_name=None, save_json=True, output_json="detected_text.json", verbose=False, classify_only=False):
    """
    Process an ID image to classify document type, detect fields, and extract text.
    
    Args:
        image_path (str): Path to the input image.
        model_name (str, optional): Specific model to use. If None, uses Id_Classifier.
        save_json (bool): Save extracted text to JSON file.
        output_json (str): Path to save JSON output.
        verbose (bool): Display visualizations.
        classify_only (bool): If True, only classify document type and return result.
    
    Returns:
        dict: Extracted text for each detected field, or {} for unmapped document types or classify_only.
    """
    # Load image
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError(f"Failed to load image: {image_path}")

    # Download and load model
    def load_model(model_key):
        model_path = CONFIG["models"][model_key]["path"]
        if not os.path.exists(model_path):
            model_path = hf_hub_download(repo_id="logasanjeev/indian-id-validator", filename=model_path)
        return YOLO(model_path)

    # Classify document type if model_name is not specified
    if model_name is None:
        classifier = load_model("Id_Classifier")
        results = classifier(image)
        doc_type = results[0].names[results[0].probs.top1]
        confidence = results[0].probs.top1conf.item()
        print(f"Detected document type: {doc_type} with confidence: {confidence:.2f}")
        logger.info(f"Detected document type: {doc_type}, confidence: {confidence:.2f}")
        if classify_only:
            return {"doc_type": doc_type, "confidence": confidence}
        model_name = CONFIG["doc_type_to_model"].get(doc_type, None)
        if model_name is None:
            logger.warning(f"No detection model mapped for document type: {doc_type}. Returning empty result.")
            if save_json:
                with open(output_json, "w") as f:
                    json.dump({}, f, indent=4)
            return {}

    # Load detection model
    if model_name not in CONFIG["models"]:
        raise ValueError(f"Invalid model name: {model_name}")
    model = load_model(model_name)
    class_names = CONFIG["models"][model_name]["classes"]
    logger.info(f"Loaded model: {model_name} with classes: {class_names}")

    # Run inference
    results = model(image_path)
    filtered_boxes = {}
    output_image = results[0].orig_img.copy()
    original_image = cv2.imread(image_path)
    h, w, _ = output_image.shape

    # Filter highest confidence box for each class
    for result in results:
        if not result.boxes:
            logger.warning("No boxes detected in the image.")
            continue
        for box in result.boxes:
            try:
                cls = int(box.cls[0].item())
                if cls >= len(class_names):
                    logger.warning(f"Invalid class index {cls} for model {model_name}. Skipping box.")
                    continue
                conf = box.conf[0].item()
                xyxy = box.xyxy[0].tolist()
                class_name = class_names[cls]
                logger.info(f"Detected box for class index: {cls}, class name: {class_name}, confidence: {conf:.2f}, coords: {xyxy}")
                if cls not in filtered_boxes or conf > filtered_boxes[cls]["conf"]:
                    filtered_boxes[cls] = {"conf": conf, "xyxy": xyxy, "class_name": class_name}
            except IndexError as e:
                logger.error(f"Error processing box: {e}, box data: {box}")
                continue

    # Extract text and visualize
    detected_text = {}
    processed_images = []
    for cls, data in filtered_boxes.items():
        try:
            x_min, y_min, x_max, y_max = map(int, data["xyxy"])
            class_name = data["class_name"]
            x_min, y_min = max(0, x_min), max(0, y_min)
            x_max, y_max = min(w, x_max), min(h, y_max)
            logger.info(f"Processing class: {class_name} at coordinates: ({x_min}, {y_min}, {x_max}, {y_max})")

            # Crop region
            region_img = original_image[y_min:y_max, x_min:x_max]
            if region_img.size == 0:
                logger.warning(f"Empty region for class: {class_name}. Skipping.")
                continue
            region_img = preprocess_image(region_img)
            region_h, region_w = region_img.shape[:2]

            # Create black canvas and center the cropped region
            black_canvas = np.ones((h, w, 3), dtype=np.uint8)
            center_x, center_y = w // 2, h // 2
            top_left_x = max(0, min(w - region_w, center_x - region_w // 2))
            top_left_y = max(0, min(h - region_h, center_y - region_h // 2))
            region_w = min(region_w, w - top_left_x)
            region_h = min(region_h, h - top_left_y)
            region_img = cv2.resize(region_img, (region_w, region_h))
            black_canvas[top_left_y:top_left_y+region_h, top_left_x:top_left_x+region_w] = region_img

            # Perform OCR
            ocr_result = OCR.ocr(black_canvas, cls=True)
            if ocr_result is None or not ocr_result:
                logger.warning(f"No OCR result for class: {class_name}. Skipping.")
                detected_text[class_name] = "No text detected"
                continue
            extracted_text = []
            for line in ocr_result:
                if line is None:
                    continue
                for word_info in line:
                    if word_info is None or len(word_info) < 2 or not word_info[1]:
                        continue
                    extracted_text.append(word_info[1][0])
            extracted_text = " ".join(extracted_text) if extracted_text else "No text detected"
            logger.info(f"Extracted text for {class_name}: {extracted_text}")
            detected_text[class_name] = extracted_text

            # Draw OCR bounding boxes
            for line in ocr_result:
                if line is None:
                    continue
                for word_info in line:
                    if word_info is None or len(word_info) < 1:
                        continue
                    try:
                        box = word_info[0]
                        x1, y1 = int(box[0][0]), int(box[0][1])
                        x2, y2 = int(box[2][0]), int(box[2][1])
                        cv2.rectangle(black_canvas, (x1, y1), (x2, y2), (0, 255, 0), 5)
                    except (IndexError, TypeError) as e:
                        logger.error(f"Error drawing OCR box for class {class_name}: {e}")
                        continue

            # Save processed image
            processed_images.append((class_name, black_canvas, extracted_text))

            # Draw original bounding box
            cv2.rectangle(output_image, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
            cv2.putText(output_image, class_name, (x_min, y_min - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
        except Exception as e:
            logger.error(f"Error processing class {class_name}: {e}")
            continue

    # Save JSON
    if save_json:
        with open(output_json, "w") as f:
            json.dump(detected_text, f, indent=4)

    # Visualize
    if verbose:
        plt.figure(figsize=(10, 10))
        plt.imshow(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB))
        plt.axis("off")
        plt.title("Raw Image")
        plt.show()

        plt.figure(figsize=(10, 10))
        plt.imshow(cv2.cvtColor(output_image, cv2.COLOR_BGR2RGB))
        plt.axis("off")
        plt.title("Output Image with Bounding Boxes")
        plt.show()

        for class_name, cropped_image, text in processed_images:
            plt.figure(figsize=(10, 10))
            plt.imshow(cv2.cvtColor(cropped_image, cv2.COLOR_BGR2RGB))
            plt.axis("off")
            plt.title(f"{class_name} - Extracted: {text}")
            plt.show()

    return detected_text

# Model-specific functions
def aadhaar(image_path, save_json=True, output_json="detected_text.json", verbose=False):
    """Process an Aadhaar card image."""
    return process_id(image_path, model_name="Aadhaar", save_json=save_json, output_json=output_json, verbose=verbose)

def pan_card(image_path, save_json=True, output_json="detected_text.json", verbose=False):
    """Process a PAN card image."""
    return process_id(image_path, model_name="Pan_Card", save_json=save_json, output_json=output_json, verbose=verbose)

def passport(image_path, save_json=True, output_json="detected_text.json", verbose=False):
    """Process a passport image."""
    return process_id(image_path, model_name="Passport", save_json=save_json, output_json=output_json, verbose=verbose)

def voter_id(image_path, save_json=True, output_json="detected_text.json", verbose=False):
    """Process a voter ID image."""
    return process_id(image_path, model_name="Voter_Id", save_json=save_json, output_json=output_json, verbose=verbose)

def driving_license(image_path, save_json=True, output_json="detected_text.json", verbose=False):
    """Process a driving license image."""
    return process_id(image_path, model_name="Driving_License", save_json=save_json, output_json=output_json, verbose=verbose)

# Command-line interface
if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser(description="Indian ID Validator: Classify and extract fields from ID images.")
    parser.add_argument("image_path", help="Path to the input ID image")
    parser.add_argument("--model", default=None, choices=["Aadhaar", "Pan_Card", "Passport", "Voter_Id", "Driving_License"],
                        help="Specific model to use (default: auto-detect with Id_Classifier)")
    parser.add_argument("--no-save-json", action="store_false", dest="save_json", help="Disable saving to JSON")
    parser.add_argument("--output-json", default="detected_text.json", help="Path to save JSON output")
    parser.add_argument("--verbose", action="store_true", help="Display visualizations")
    parser.add_argument("--classify-only", action="store_true", dest="classify_only", help="Only classify document type")
    args = parser.parse_args()

    result = process_id(args.image_path, args.model, args.save_json, args.output_json, args.verbose, args.classify_only)
    print("Extracted Text:")
    print(json.dumps(result, indent=4))