Spaces:
Sleeping
Sleeping
| import os | |
| import io | |
| import uuid | |
| import json | |
| import time | |
| import tempfile | |
| import unicodedata | |
| import re | |
| from dataclasses import dataclass | |
| from typing import List, Dict, Tuple | |
| import cv2 | |
| import numpy as np | |
| import torch | |
| from paddleocr import TextDetection | |
| from easyocr import Reader | |
| from rapidfuzz import fuzz | |
| import gradio as gr | |
| # ============ CORE VALIDATORS (UNCHANGED) ============ | |
| class VerhoeffValidator: | |
| d_table = [[0,1,2,3,4,5,6,7,8,9],[1,2,3,4,0,6,7,8,9,5],[2,3,4,0,1,7,8,9,5,6],[3,4,0,1,2,8,9,5,6,7],[4,0,1,2,3,9,5,6,7,8],[5,9,8,7,6,0,4,3,2,1],[6,5,9,8,7,1,0,4,3,2],[7,6,5,9,8,2,1,0,4,3],[8,7,6,5,9,3,2,1,0,4],[9,8,7,6,5,4,3,2,1,0]] | |
| p_table = [[0,1,2,3,4,5,6,7,8,9],[1,5,7,6,2,8,3,0,9,4],[5,8,0,3,7,9,6,1,4,2],[8,9,1,6,0,4,3,5,2,7],[9,4,5,3,1,2,6,8,7,0],[4,2,8,6,5,7,3,9,0,1],[2,7,9,3,8,0,6,4,1,5],[7,0,4,6,9,1,3,2,5,8]] | |
| def validate(cls, n: str) -> bool: | |
| if not n or len(n)!=12 or not n.isdigit() or n[0] in '01': return False | |
| c=0 | |
| for i,ch in enumerate(reversed(n)): c=cls.d_table[c][cls.p_table[i%8][int(ch)]] | |
| return c==0 | |
| class PatternValidator: | |
| def find_aadhaar(t: str) -> List[str]: | |
| return [re.sub(r'\s','',m) for p in [r'\b[2-9]\d{3}\s?\d{4}\s?\d{4}\b', r'\b[2-9]\d{11}\b'] | |
| for m in re.findall(p,t) if VerhoeffValidator.validate(re.sub(r'\s','',m))] | |
| def find_pan(t: str) -> List[str]: | |
| return list(set(re.findall(r'\b[A-Z]{3}[PCHFATBLJG][A-Z]\d{4}[A-Z]\b', t.upper()))) | |
| class TextNormalizer: | |
| OCR_CORRECTIONS = {'O':'0','o':'0','l':'1','I':'1','Z':'2','z':'2','S':'5','G':'6','b':'6','T':'7','B':'8','g':'9','q':'9'} | |
| def normalize(text: str, aggressive: bool=False) -> str: | |
| if not text: return "" | |
| text = ''.join(ch for ch in unicodedata.normalize('NFKC',text) if unicodedata.category(ch)[0]!='C') | |
| if aggressive: | |
| def fix(m): | |
| s=m.group(0) | |
| for o,n in TextNormalizer.OCR_CORRECTIONS.items(): s=s.replace(o,n) | |
| return s | |
| text = re.sub(r'\b[0-9OolIZzSGbTBgq]{4,}\b', fix, text) | |
| return re.sub(r'\s+',' ',re.sub(r'[^\w\s\u0900-\u097F.,/-]','',text)).strip() | |
| # ============ CONFIGURATION ============ | |
| class Config: | |
| fuzzy_threshold: int = 80 | |
| min_keywords: int = 1 | |
| max_image_dim: int = 2000 | |
| languages: List[str] = None | |
| doc_keywords: Dict[str, List[str]] = None | |
| def __post_init__(self): | |
| if self.languages is None: self.languages = ['en','hi'] | |
| if self.doc_keywords is None: | |
| self.doc_keywords = { | |
| "Aadhaar": ["uidai","aadhaar","aadhar","government","india","mera","naam","pehchaan","यूआईडीएआई","आधार","भारत","सरकार","जन्म","तिथि"], | |
| "PAN": ["permanent","account","number","income","tax","incometaxindia","pan","स्थायी","खाता","आयकर","पिता","नाम"], | |
| "Driving_License": ["driving","licence","motor","vehicles","rto","mcwg","lmv","ड्राइविंग","वाहन","परिवहन","चालविण्याचा","परवाना"], | |
| "Passport": ["passport","republic","india","ministry","external","affairs","पासपोर्ट","गणराज्य","विदेश","मंत्रालय"], | |
| "Ration_Card": ["ration","card","food","civil","supplies","apl","bpl","राशन","कार्ड","खाद्य","नागरी","पुरवठा"] | |
| } | |
| # ============ MAIN PIPELINE ============ | |
| class DocumentOCRVerifier: | |
| def __init__(self, config: Config=None): | |
| self.cfg = config or Config() | |
| # initialize PaddleOCR detector and EasyOCR reader | |
| try: | |
| self.detector = TextDetection(model_name="PP-OCRv5_mobile_det") | |
| except Exception: | |
| self.detector = None | |
| self.reader = Reader(self.cfg.languages, gpu=torch.cuda.is_available()) | |
| def _preprocess(self, img: np.ndarray) -> np.ndarray: | |
| img = self._resize(img) | |
| img = self._deskew(img) | |
| return self._enhance(img) | |
| def _resize(self, img: np.ndarray) -> np.ndarray: | |
| h,w = img.shape[:2] | |
| if max(h,w) > self.cfg.max_image_dim: | |
| scale = self.cfg.max_image_dim / max(h,w) | |
| img = cv2.resize(img, (int(w*scale), int(h*scale)), interpolation=cv2.INTER_AREA) | |
| return img | |
| def _deskew(self, img: np.ndarray) -> np.ndarray: | |
| gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
| _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) | |
| contours,_ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
| if contours: | |
| rect = cv2.minAreaRect(max(contours, key=cv2.contourArea)) | |
| angle = rect[-1] | |
| if angle < -45: angle = 90 + angle | |
| elif angle > 45: angle -= 90 | |
| if abs(angle) > 0.5: | |
| h,w = img.shape[:2] | |
| M = cv2.getRotationMatrix2D((w//2, h//2), angle, 1.0) | |
| img = cv2.warpAffine(img, M, (w,h), borderValue=(255,255,255)) | |
| return img | |
| def _enhance(self, img: np.ndarray) -> np.ndarray: | |
| denoised = cv2.fastNlMeansDenoisingColored(img, None, 10, 10, 7, 21) | |
| lab = cv2.cvtColor(denoised, cv2.COLOR_BGR2LAB) | |
| l,a,b = cv2.split(lab) | |
| l = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)).apply(l) | |
| enhanced = cv2.cvtColor(cv2.merge([l,a,b]), cv2.COLOR_LAB2BGR) | |
| kernel = np.array([[0,-1,0],[-1,5,-1],[0,-1,0]]) | |
| return cv2.addWeighted(cv2.filter2D(enhanced, -1, kernel), 0.6, enhanced, 0.4, 0) | |
| def _extract_keywords(self, text: str) -> List[str]: | |
| if not text: return [] | |
| return [t for t in re.split(r'\s+', text.strip()) if t] | |
| def _classify(self, text: str) -> Tuple[str, float, List[str]]: | |
| norm_text = TextNormalizer.normalize(text, aggressive=True) | |
| scores = {} | |
| for doc_type, keywords in self.cfg.doc_keywords.items(): | |
| matched = [] | |
| for kw in keywords: | |
| if kw.lower() in norm_text.lower(): matched.append(kw); continue | |
| words = norm_text.lower().split() | |
| for i,w in enumerate(words): | |
| if fuzz.ratio(kw.lower(), w) >= self.cfg.fuzzy_threshold: matched.append(kw); break | |
| phrase = " ".join(words[i:min(i+5, len(words))]) | |
| if fuzz.ratio(kw.lower(), phrase) >= self.cfg.fuzzy_threshold: matched.append(kw); break | |
| score = len(matched) | |
| if doc_type == "Aadhaar" and PatternValidator.find_aadhaar(text): score = 100 | |
| elif doc_type == "PAN" and PatternValidator.find_pan(text): score = 100 | |
| scores[doc_type] = {"score": score, "matched": matched} | |
| winner = max(scores.items(), key=lambda x: x[1]["score"]) | |
| if winner[1]["score"] >= self.cfg.min_keywords: | |
| conf = 0.95 if winner[1]["score"] == 100 else min(0.90, len(winner[1]["matched"])/len(self.cfg.doc_keywords[winner[0]]) + 0.3) | |
| return winner[0], conf, winner[1]["matched"] | |
| return "UNCLASSIFIED", 0.0, [] | |
| def verify(self, image_path: str, user_keywords: List[str]) -> Dict: | |
| img = cv2.imread(image_path) | |
| if img is None: return {"error": "Image not found", "imagePath": image_path} | |
| img = self._preprocess(img) | |
| # Region-based OCR with word-level granularity | |
| ocr_keywords = [] | |
| all_text = "" | |
| if self.detector: | |
| try: | |
| regions = self.detector.predict(input=image_path, batch_size=1) | |
| except Exception: | |
| regions = [] | |
| else: | |
| regions = [] | |
| # If detector provided regions, use them; otherwise fallback to whole-image read | |
| if regions: | |
| for res in regions: | |
| for poly, score in zip(res.get("dt_polys", []), res.get("dt_scores", [])): | |
| pts = np.array(poly, dtype=np.int32) | |
| x,y,w,h = cv2.boundingRect(pts) | |
| cropped = img[y:y+h, x:x+w] | |
| texts = self.reader.readtext(cropped, detail=0) | |
| if texts: | |
| text = texts[0] | |
| words = self._extract_keywords(text) | |
| ocr_keywords.extend(words) | |
| all_text += " " + text | |
| else: | |
| # fallback: run reader on whole image | |
| texts = self.reader.readtext(img, detail=0) | |
| if texts: | |
| for t in texts: | |
| ocr_keywords.extend(self._extract_keywords(t)) | |
| all_text += " " + t | |
| # Classification | |
| doc_type, accuracy, matched_keywords = self._classify(all_text) | |
| # Verification - match against combined text for phrase support | |
| # Preserve raw input keywords (split externally) but perform exact matching on the combined OCR text without further altering user's internal spacing | |
| raw_input_keywords = user_keywords | |
| # Do minimal trimming for matching (only strip outer whitespace) | |
| minimal_norm_user_keywords = [kw.strip() for kw in raw_input_keywords if kw is not None] | |
| exact_matches = list(set([kw for kw in minimal_norm_user_keywords if kw.lower() in all_text.lower()])) | |
| status = "verified" if exact_matches else "not_verified" | |
| return { | |
| "documentType": doc_type, | |
| "documentTypeAccuracy": round(accuracy, 4), | |
| "ocrKeywords": ocr_keywords, | |
| "inputUserKeywords": minimal_norm_user_keywords, | |
| "rawInputUserKeywords": raw_input_keywords, | |
| "exactMatchingKeywords": exact_matches, | |
| "verificationStatus": status, | |
| "imagePath": image_path | |
| } | |
| # ============ APP ============ | |
| verifier = DocumentOCRVerifier() | |
| def save_upload_to_tmp(uploaded_file) -> str: | |
| """ | |
| Save an uploaded file-like object (from Gradio) to /tmp with a unique name. | |
| Returns absolute path. | |
| """ | |
| if isinstance(uploaded_file, str) and os.path.exists(uploaded_file): | |
| return uploaded_file | |
| tmp_dir = "/tmp/ocr_app" | |
| os.makedirs(tmp_dir, exist_ok=True) | |
| ext = ".jpg" | |
| # preserve original extension if available | |
| if hasattr(uploaded_file, "name") and uploaded_file.name: | |
| _, e = os.path.splitext(uploaded_file.name) | |
| if e: | |
| ext = e | |
| fname = f"{int(time.time())}_{uuid.uuid4().hex}{ext}" | |
| out_path = os.path.join(tmp_dir, fname) | |
| # uploaded_file could be bytes or file path | |
| if isinstance(uploaded_file, bytes): | |
| with open(out_path, "wb") as f: | |
| f.write(uploaded_file) | |
| else: | |
| # Gradio sometimes gives a path | |
| try: | |
| with open(uploaded_file, "rb") as src, open(out_path, "wb") as dst: | |
| dst.write(src.read()) | |
| except Exception: | |
| # last resort: try to read as numpy array (if provided) | |
| try: | |
| import PIL.Image as Image | |
| im = Image.open(uploaded_file).convert("RGB") | |
| im.save(out_path) | |
| except Exception: | |
| raise | |
| return out_path | |
| def display_uploaded_image(image): | |
| """ | |
| Immediately display the uploaded image without processing. | |
| """ | |
| if image is None: | |
| return None | |
| return image | |
| def run_ocr(image, keywords_raw: str): | |
| """ | |
| image: uploaded file path or bytes (Gradio Image component with type='file' or 'numpy') | |
| keywords_raw: raw string entered by user. Split by comma EXACTLY to form keywords. Preserve internal spacing. | |
| """ | |
| if image is None: | |
| return "<div style='color: red; padding: 20px;'>⚠️ Please upload an image first!</div>", "" | |
| # Split user keywords by comma only; do not auto-trim internal spaces (only strip ends) | |
| if keywords_raw is None: | |
| user_keywords = [] | |
| else: | |
| # Split on commas. Keep empty tokens if user left them intentionally. | |
| user_keywords = [s if s is not None else "" for s in re.split(r',', keywords_raw)] | |
| # strip only leading/trailing newline and tabs, but preserve internal spacing and common spaces | |
| user_keywords = [s.rstrip("\n\r\t ").lstrip("\n\r\t ") for s in user_keywords] | |
| # Save file to /tmp and call verifier | |
| image_path = save_upload_to_tmp(image) | |
| result = verifier.verify(image_path=image_path, user_keywords=user_keywords) | |
| # Extract fields for card display | |
| doc_type = result.get("documentType", "N/A") | |
| doc_accuracy = result.get("documentTypeAccuracy", 0.0) | |
| input_keywords = result.get("inputUserKeywords", []) | |
| verification_status = result.get("verificationStatus", "not_verified") | |
| # Format accuracy as percentage | |
| accuracy_text = f"{doc_accuracy * 100:.2f}%" | |
| # Format keywords as comma-separated string | |
| keywords_text = ", ".join([f'"{kw}"' for kw in input_keywords]) if input_keywords else "None provided" | |
| # Color-coded status | |
| if verification_status == "verified": | |
| status_html = '<span style="color: #16a34a; font-weight: bold; font-size: 22px;">✓ VERIFIED</span>' | |
| status_bg = "#dcfce7" | |
| status_border = "#16a34a" | |
| else: | |
| status_html = '<span style="color: #dc2626; font-weight: bold; font-size: 22px;">✗ NOT VERIFIED</span>' | |
| status_bg = "#fee2e2" | |
| status_border = "#dc2626" | |
| # Create HTML card with improved styling | |
| card_html = f""" | |
| <div style="border: 2px solid #e5e7eb; border-radius: 16px; padding: 28px; background: linear-gradient(135deg, #ffffff 0%, #f9fafb 100%); box-shadow: 0 10px 25px rgba(0,0,0,0.1); font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;"> | |
| <div style="display: flex; align-items: center; margin-bottom: 24px; border-bottom: 3px solid #3b82f6; padding-bottom: 16px;"> | |
| <span style="font-size: 32px; margin-right: 12px;">📄</span> | |
| <h2 style="margin: 0; color: #1f2937; font-size: 26px; font-weight: 700;">Document Verification Results</h2> | |
| </div> | |
| <div style="display: grid; gap: 16px;"> | |
| <div style="background: linear-gradient(135deg, #dbeafe 0%, #eff6ff 100%); padding: 20px; border-radius: 12px; border-left: 5px solid #3b82f6; box-shadow: 0 2px 8px rgba(59,130,246,0.2);"> | |
| <div style="color: #1e40af; font-size: 13px; font-weight: 600; text-transform: uppercase; letter-spacing: 0.5px; margin-bottom: 8px;">📋 Document Type</div> | |
| <div style="font-size: 24px; color: #1f2937; font-weight: 700;">{doc_type}</div> | |
| </div> | |
| <div style="background: linear-gradient(135deg, #d1fae5 0%, #ecfdf5 100%); padding: 20px; border-radius: 12px; border-left: 5px solid #10b981; box-shadow: 0 2px 8px rgba(16,185,129,0.2);"> | |
| <div style="color: #065f46; font-size: 13px; font-weight: 600; text-transform: uppercase; letter-spacing: 0.5px; margin-bottom: 8px;">🎯 Type Detection Accuracy</div> | |
| <div style="font-size: 24px; color: #1f2937; font-weight: 700;">{accuracy_text}</div> | |
| </div> | |
| <div style="background: linear-gradient(135deg, #fef3c7 0%, #fefce8 100%); padding: 20px; border-radius: 12px; border-left: 5px solid #f59e0b; box-shadow: 0 2px 8px rgba(245,158,11,0.2);"> | |
| <div style="color: #92400e; font-size: 13px; font-weight: 600; text-transform: uppercase; letter-spacing: 0.5px; margin-bottom: 8px;">🔑 Input Keywords</div> | |
| <div style="font-size: 17px; color: #451a03; font-weight: 500; line-height: 1.6;">{keywords_text}</div> | |
| </div> | |
| <div style="background: linear-gradient(135deg, {status_bg} 0%, {status_bg}dd 100%); padding: 24px; border-radius: 12px; border: 3px solid {status_border}; text-align: center; box-shadow: 0 4px 12px rgba(0,0,0,0.15);"> | |
| <div style="color: #374151; font-size: 13px; font-weight: 600; text-transform: uppercase; letter-spacing: 0.5px; margin-bottom: 12px;">🔍 Verification Status</div> | |
| <div style="margin-top: 8px;">{status_html}</div> | |
| </div> | |
| </div> | |
| </div> | |
| """ | |
| # Return JSON string exactly as produced | |
| json_output = json.dumps(result, indent=2, ensure_ascii=False) | |
| return card_html, json_output | |
| # Custom CSS for better styling | |
| custom_css = """ | |
| .gradio-container { | |
| font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif !important; | |
| } | |
| .gr-button-primary { | |
| background: linear-gradient(90deg, #3b82f6 0%, #2563eb 100%) !important; | |
| border: none !important; | |
| font-weight: 600 !important; | |
| font-size: 16px !important; | |
| padding: 12px 24px !important; | |
| transition: all 0.3s ease !important; | |
| } | |
| .gr-button-primary:hover { | |
| transform: translateY(-2px) !important; | |
| box-shadow: 0 8px 16px rgba(59, 130, 246, 0.3) !important; | |
| } | |
| .gr-box { | |
| border-radius: 12px !important; | |
| } | |
| """ | |
| with gr.Blocks(title="Document OCR Verifier", css=custom_css) as demo: | |
| gr.Markdown(""" | |
| # 🔍 Document OCR Verifier | |
| ### Upload a document image and provide comma-separated keywords to verify the document authenticity. | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| img_in = gr.File(label="📤 Upload Document Image (JPEG/PNG)") | |
| kws = gr.Textbox( | |
| label="🔑 Verification Keywords (comma-separated)", | |
| placeholder="Example: ROHIT, KUMAR, SINGH, Date of Birth", | |
| lines=3 | |
| ) | |
| run_btn = gr.Button("🚀 Run OCR & Verify", variant="primary", size="lg") | |
| with gr.Column(scale=1): | |
| img_out = gr.Image(label="📸 Uploaded Document", type="filepath", height=400) | |
| with gr.Row(): | |
| card_out = gr.HTML(label="📊 Verification Summary") | |
| with gr.Row(): | |
| json_out = gr.Textbox(label="📋 Complete JSON Response", lines=18, max_lines=25) | |
| # Image displays immediately when uploaded | |
| img_in.upload( | |
| fn=display_uploaded_image, | |
| inputs=[img_in], | |
| outputs=[img_out] | |
| ) | |
| # Processing happens when button is clicked | |
| run_btn.click( | |
| fn=run_ocr, | |
| inputs=[img_in, kws], | |
| outputs=[card_out, json_out] | |
| ) | |
| gr.Markdown(""" | |
| --- | |
| **Note:** The document will be stored in `/tmp/ocr_app/` directory. Supported formats: JPEG, PNG, JPG. | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860) |