logasanjeev
/

indian-id-validator

@@ -65,62 +65,57 @@ def process_id(image_path, model_name=None, save_json=True, output_json="detecte
     Args:
         image_path (str): Path to the input image.
-        str): Path to the input ID image.
-        model_name (str, optional): Name to specific model to use. If None, uses Id_Classifier.
-        str, optional): Specific model name to use. If None, uses Id_Classifier).
         save_json (bool): Save extracted text to JSON file.
-        bool save_json (bool): Whether to Save JSON file to extracted text.
-        output_json (str): Path to save JSON file.
-        str output_json (str): Path where to save JSON output file.
-        verbose (bool): Display visualization.
-        bool verbose (bool): Whether to display visualizations.
     Returns:
-        dict: Extracted text for each detected field.
     """
     # Load image
     image = cv2.imread(image_path)
     if image is None:
-        raise ValueError("Failed to load image {image_path}")
     # Download and load model
     def load_model(model_key):
         model_path = CONFIG["models"][model_key]["path"]
         if not os.path.exists(model_path):
-            model_path = hf_hub_download(repo_id="logasanjeev/indian-id-validator", filename="model_path)
-        """
         return YOLO(model_path)
-    # Classify document type if no model is specified
     if model_name is None:
         classifier = load_model("Id_Classifier")
         results = classifier(image)
-        doc_type = results[0].names[0].probs.top1]
         confidence = results[0].probs.top1conf.item()
-        print(f"Id_Classifier Result: Detected confidence type: {doc_type} with document: {confidence:.2f}")
-        logger.info(f"Detected document type: {doc_type}, confidence: {confidence:.2f}, mapped to model: {model_name}")
         model_name = CONFIG["doc_type_to_model"].get(doc_type, None)
         if model_name is None:
-            raise ValueError("f"No detection model mapped for document type: {doc_type}")
     # Load detection model
     if model_name not in CONFIG["models"]:
-        raise ValueError("f"Invalid model: {model_name}")
     model = load_model(model_name)
     class_names = CONFIG["models"][model_name]["classes"]
     logger.info(f"Loaded model: {model_name} with classes: {class_names}")
     # Run inference
     results = model(image_path)
-    # Detect filtered boxes
     filtered_boxes = {}
     output_image = results[0].orig_img.copy()
     original_image = cv2.imread(image_path)
     h, w, _ = output_image.shape
-    # Filter highest confidence box for each detected class
     for result in results:
         if not result.boxes:
             logger.warning("No boxes detected in the image.")
@@ -132,167 +127,138 @@ def process_id(image_path, model_name=None, save_json=True, output_json="detecte
                     logger.warning(f"Invalid class index {cls} for model {model_name}. Skipping box.")
                     continue
                 conf = box.conf[0].item()
-                xyxy = box.xyxy(0].tolist()[0])
-                class_name = class_names[cls][cls]
-                logger.info(f"Detected box for class {cls}: {cls}, class name: {class_name}, confidence: {conf:.2f}, coords: {xyxy}")
-                if not cls not in filtered_boxes or conf > filtered_boxes[cls]["conf"]:
                     filtered_boxes[cls] = {"conf": conf, "xyxy": xyxy, "class_name": class_name}
-            except Exception as e:
-                logger.error(f"Error processing class: {e}, box data: {box}")
-                continue
             except IndexError as e:
                 continue
-        # Extract text and visualize
-        detected_text = {}
-        processed_images = []
-        for cls, data in filtered_boxes.items():
-            try:
-                x_min, y_min, x_max, y_max = map(int, data["xyxy"])
-                class_name = data["class_name"]
-                x_min, y_min = max(0, x_min), max(0, y_min)
-                x_max, y_max = min(x_max, x_max), min(h, y_max)
-                logger.info(f"Processing class {class_name} at {class_name}: ({x_min}, {y_min}, {x_max}, {y_max})")
-                # Crop region
-                region_img = original_image[y_min:y_max, x_min:x_max]
-                if region_img.size == == 0:
-                    logger.warning(f"Empty region for class {class_name}. Skipping.")
                     continue
-                region_img = preprocess_image(region_image)
-                region_h, region_w = region_img.shape[:2]
-                # Create black canvas and center region
-                black_canvas = np.ones((h, w, 3), dtype=np.uint8)
-                center_x, center_y = w // 2, h // 2
-                top_left_x = max(0, min(w - region_w, center_x - region_w // 2))
-                top_left_y = max(0, center_y, center_y - region_h // 2))
-                region_w = min(region_w, w - top_left_x)
-                region_h = min(region_h, h - top_left_y)
-                region_img = cv2.resize(region_image, (region_w, region_h))
-                black_canvas[top_left_y:top_left_y+region_h, top_left_x:top_left_x+region_w] = region_img
-                # Perform OCR
-                ocr_result = OCR.ocr(black_canvas, cls=True)
-                if ocr_result is None:
-                    ocr_result = []
-                extracted_text = " ".join(
-                    word_info[1][0]
-                    for line in ocr_result
-                    for word_info in line
-                    if word_info and len(word_info) > 1 and len(word_info[1]) > 0
-                ) if ocr_result else "No text detected"
-                logger.info(f"Extracted text: {class_name}: {extracted_text}")
-                detected_text[class_name] = extracted_text
-                # Draw OCR bounding boxes
-                for line in ocr_result:
-                    if line is None:
                         continue
-                    for word_info in line:
-                        if word_info is None:
-                            continue
-                        try:
-                            box = word_info[0]
-                            x1, y1 = int(box[0][0]), int(box[0][0][1])
-                            x2, y2 = int(box[2][0]), int(box[2][0])
-                            cv2.rectangle(
-                                black_canvas,
-                                (x1, y1),
-                                (x2, y2),
-                                (0, 255, 0),
-                                5
-                            )
-                        except Exception as e:
-                            logger.error(f"Error drawing OCR box for {class_name}: {e}")
-                            continue
-                        except (IndexError, TypeError) as e:
-                            logger.error(f"Error drawing box for class {class_name}: {e}")
-                            continue
-                    # Save processed image
-                    processed_images.append((class_name, black_canvas, extracted_text))
-                # Draw original bounding box
-                cv2.rectangle(
-                    output_image,
-                    (x_min, y_min),
-                    (x_max, y_max),
-                    (0, 255, 0),
-                    2
-                )
-                cv2.putText(
-                    output_image,
-                    class_name,
-                    (x_min, y_min - 10),
-                    cv2.FONT_HERSHEY_SIMPLEX,
-                    0.5,
-                    (255, 0, 0),
-                    2
-                )
-            except Exception as e:
-                logger.error(f"Error processing {class_name}: {e}")
-                continue
-        # Save JSON file
-        if save_json:
-            with open(output_json, "w") as f:
-                json.dump(detected_text, f, indent=4)
-        # Visualize results
-        if verbose:
-            plt.figure(figsize=(10, 10))
-            plt.imshow(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB))
-            plt.axis('off')
-            plt.title('Raw Image')
-            plt.show()
             plt.figure(figsize=(10, 10))
-            plt.imshow(cv2.cvtColor(output_image, cv2.COLOR_BGR2RGB))
-            plt.axis('off')
-            plt.title('Output Image with Bounding Boxes')
             plt.show()
-            for class_name, cropped_image, text in processed_images:
-                for class_name, cropped_image, cropped_text in processed_images:
-                plt.figure(figsize=(10, 10))
-                plt.imshow(cv2.cvtColor(cropped_image, cv2.COLOR_BGR2RGB))
-                plt.axis('off')
-                plt.title('f"{class_name} - Extracted: {text}")
-                plt.show()
-        return detected_text
 # Model-specific functions
 def aadhaar(image_path, save_json=True, output_json="detected_text.json", verbose=False):
-    """Process an Aadhaar Card image."""
     return process_id(image_path, model_name="Aadhaar", save_json=save_json, output_json=output_json, verbose=verbose)
 def pan_card(image_path, save_json=True, output_json="detected_text.json", verbose=False):
-    """Process a PAN Card image."""
-    return process_image(image_path, "Pan_Card", save_json=save_json, output_json=output_json," verbose=verbose)
 def passport(image_path, save_json=True, output_json="detected_text.json", verbose=False):
-    """Process a Passport image."""
     return process_id(image_path, model_name="Passport", save_json=save_json, output_json=output_json, verbose=verbose)
 def voter_id(image_path, save_json=True, output_json="detected_text.json", verbose=False):
-    """Process a Voter ID card image."""
-    return process_id(image_path, "Voter_Id", save_json=save_json," output_json=output_json, verbose=verbose)
 def driving_license(image_path, save_json=True, output_json="detected_text.json", verbose=False):
-    """Process a Driving License image."""
     return process_id(image_path, model_name="Driving_License", save_json=save_json, output_json=output_json, verbose=verbose)
 # Command-line interface
 if __name__ == "__main__":
     import argparse
     parser = argparse.ArgumentParser(description="Indian ID Validator: Classify and extract fields from ID images.")
-    parser.add_argument("image_path", help="Path to ID image")
     parser.add_argument("--model", default=None, choices=["Aadhaar", "Pan_Card", "Passport", "Voter_Id", "Driving_License"],
                         help="Specific model to use (default: auto-detect with Id_Classifier)")
-    parser.add_argument("--no-save-json", action="store_false", dest="save_json",
-                        help="Disable saving to JSON file.")
     parser.add_argument("--output-json", default="detected_text.json", help="Path to save JSON output")
     parser.add_argument("--verbose", action="store_true", help="Display visualizations")
     args = parser.parse_args()

     Args:
         image_path (str): Path to the input image.
+        model_name (str, optional): Specific model to use. If None, uses Id_Classifier.
         save_json (bool): Save extracted text to JSON file.
+        output_json (str): Path to save JSON output.
+        verbose (bool): Display visualizations.
     Returns:
+        dict: Extracted text for each detected field, or {} for unmapped document types.
     """
     # Load image
     image = cv2.imread(image_path)
     if image is None:
+        raise ValueError(f"Failed to load image: {image_path}")
     # Download and load model
     def load_model(model_key):
         model_path = CONFIG["models"][model_key]["path"]
         if not os.path.exists(model_path):
+            model_path = hf_hub_download(repo_id="logasanjeev/indian-id-validator", filename=model_path)
         return YOLO(model_path)
+    # Classify document type if model_name is not specified
     if model_name is None:
         classifier = load_model("Id_Classifier")
         results = classifier(image)
+        doc_type = results[0].names[results[0].probs.top1]
         confidence = results[0].probs.top1conf.item()
+        print(f"Id_Classifier Result: Detected document type: {doc_type} with confidence: {confidence:.2f}")
+        logger.info(f"Detected document type: {doc_type}, confidence: {confidence:.2f}")
         model_name = CONFIG["doc_type_to_model"].get(doc_type, None)
         if model_name is None:
+            logger.warning(f"No detection model mapped for document type: {doc_type}. Returning empty result.")
+            if save_json:
+                with open(output_json, "w") as f:
+                    json.dump({}, f, indent=4)
+            return {}
     # Load detection model
     if model_name not in CONFIG["models"]:
+        raise ValueError(f"Invalid model name: {model_name}")
     model = load_model(model_name)
     class_names = CONFIG["models"][model_name]["classes"]
     logger.info(f"Loaded model: {model_name} with classes: {class_names}")
     # Run inference
     results = model(image_path)
     filtered_boxes = {}
     output_image = results[0].orig_img.copy()
     original_image = cv2.imread(image_path)
     h, w, _ = output_image.shape
+    # Filter highest confidence box for each class
     for result in results:
         if not result.boxes:
             logger.warning("No boxes detected in the image.")
                     logger.warning(f"Invalid class index {cls} for model {model_name}. Skipping box.")
                     continue
                 conf = box.conf[0].item()
+                xyxy = box.xyxy[0].tolist()
+                class_name = class_names[cls]
+                logger.info(f"Detected box for class index: {cls}, class name: {class_name}, confidence: {conf:.2f}, coords: {xyxy}")
+                if cls not in filtered_boxes or conf > filtered_boxes[cls]["conf"]:
                     filtered_boxes[cls] = {"conf": conf, "xyxy": xyxy, "class_name": class_name}
             except IndexError as e:
+                logger.error(f"Error processing box: {e}, box data: {box}")
                 continue
+    # Extract text and visualize
+    detected_text = {}
+    processed_images = []
+    for cls, data in filtered_boxes.items():
+        try:
+            x_min, y_min, x_max, y_max = map(int, data["xyxy"])
+            class_name = data["class_name"]
+            x_min, y_min = max(0, x_min), max(0, y_min)
+            x_max, y_max = min(w, x_max), min(h, y_max)
+            logger.info(f"Processing class: {class_name} at coordinates: ({x_min}, {y_min}, {x_max}, {y_max})")
+            # Crop region
+            region_img = original_image[y_min:y_max, x_min:x_max]
+            if region_img.size == 0:
+                logger.warning(f"Empty region for class: {class_name}. Skipping.")
+                continue
+            region_img = preprocess_image(region_img)
+            region_h, region_w = region_img.shape[:2]
+            # Create black canvas and center the cropped region
+            black_canvas = np.ones((h, w, 3), dtype=np.uint8)
+            center_x, center_y = w // 2, h // 2
+            top_left_x = max(0, min(w - region_w, center_x - region_w // 2))
+            top_left_y = max(0, min(h - region_h, center_y - region_h // 2))
+            region_w = min(region_w, w - top_left_x)
+            region_h = min(region_h, h - top_left_y)
+            region_img = cv2.resize(region_img, (region_w, region_h))
+            black_canvas[top_left_y:top_left_y+region_h, top_left_x:top_left_x+region_w] = region_img
+            # Perform OCR
+            ocr_result = OCR.ocr(black_canvas, cls=True)
+            if ocr_result is None:
+                ocr_result = []
+            extracted_text = " ".join(
+                word_info[1][0] for line in ocr_result for word_info in line if word_info and len(word_info) > 1 and len(word_info[1]) > 0
+            ) if ocr_result else "No text detected"
+            logger.info(f"Extracted text for {class_name}: {extracted_text}")
+            detected_text[class_name] = extracted_text
+            # Draw OCR bounding boxes
+            for line in ocr_result:
+                if line is None:
                     continue
+                for word_info in line:
+                    if word_info is None:
+                        continue
+                    try:
+                        box = word_info[0]
+                        x1, y1 = int(box[0][0]), int(box[0][1])
+                        x2, y2 = int(box[2][0]), int(box[2][1])
+                        cv2.rectangle(black_canvas, (x1, y1), (x2, y2), (0, 255, 0), 5)
+                    except (IndexError, TypeError) as e:
+                        logger.error(f"Error drawing OCR box for class {class_name}: {e}")
                         continue
+            # Save processed image
+            processed_images.append((class_name, black_canvas, extracted_text))
+            # Draw original bounding box
+            cv2.rectangle(output_image, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
+            cv2.putText(output_image, class_name, (x_min, y_min - 10),
+                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
+        except Exception as e:
+            logger.error(f"Error processing class {class_name}: {e}")
+            continue
+    # Save JSON
+    if save_json:
+        with open(output_json, "w") as f:
+            json.dump(detected_text, f, indent=4)
+    # Visualize
+    if verbose:
+        plt.figure(figsize=(10, 10))
+        plt.imshow(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB))
+        plt.axis("off")
+        plt.title("Raw Image")
+        plt.show()
+        plt.figure(figsize=(10, 10))
+        plt.imshow(cv2.cvtColor(output_image, cv2.COLOR_BGR2RGB))
+        plt.axis("off")
+        plt.title("Output Image with Bounding Boxes")
+        plt.show()
+        for class_name, cropped_image, text in processed_images:
             plt.figure(figsize=(10, 10))
+            plt.imshow(cv2.cvtColor(cropped_image, cv2.COLOR_BGR2RGB))
+            plt.axis("off")
+            plt.title(f"{class_name} - Extracted: {text}")
             plt.show()
+    return detected_text
 # Model-specific functions
 def aadhaar(image_path, save_json=True, output_json="detected_text.json", verbose=False):
+    """Process an Aadhaar card image."""
     return process_id(image_path, model_name="Aadhaar", save_json=save_json, output_json=output_json, verbose=verbose)
 def pan_card(image_path, save_json=True, output_json="detected_text.json", verbose=False):
+    """Process a PAN card image."""
+    return process_id(image_path, model_name="Pan_Card", save_json=save_json, output_json=output_json, verbose=verbose)
 def passport(image_path, save_json=True, output_json="detected_text.json", verbose=False):
+    """Process a passport image."""
     return process_id(image_path, model_name="Passport", save_json=save_json, output_json=output_json, verbose=verbose)
 def voter_id(image_path, save_json=True, output_json="detected_text.json", verbose=False):
+    """Process a voter ID image."""
+    return process_id(image_path, model_name="Voter_Id", save_json=save_json, output_json=output_json, verbose=verbose)
 def driving_license(image_path, save_json=True, output_json="detected_text.json", verbose=False):
+    """Process a driving license image."""
     return process_id(image_path, model_name="Driving_License", save_json=save_json, output_json=output_json, verbose=verbose)
 # Command-line interface
 if __name__ == "__main__":
     import argparse
     parser = argparse.ArgumentParser(description="Indian ID Validator: Classify and extract fields from ID images.")
+    parser.add_argument("image_path", help="Path to the input ID image")
     parser.add_argument("--model", default=None, choices=["Aadhaar", "Pan_Card", "Passport", "Voter_Id", "Driving_License"],
                         help="Specific model to use (default: auto-detect with Id_Classifier)")
+    parser.add_argument("--no-save-json", action="store_false", dest="save_json", help="Disable saving to JSON")
     parser.add_argument("--output-json", default="detected_text.json", help="Path to save JSON output")
     parser.add_argument("--verbose", action="store_true", help="Display visualizations")
     args = parser.parse_args()