Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,18 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
-
import random
|
| 3 |
import uuid
|
| 4 |
-
import json
|
| 5 |
import time
|
| 6 |
import re
|
|
|
|
| 7 |
from threading import Thread
|
| 8 |
-
from typing import Iterable,
|
| 9 |
|
| 10 |
import gradio as gr
|
| 11 |
import spaces
|
| 12 |
import torch
|
| 13 |
-
import numpy as np
|
| 14 |
from PIL import Image
|
| 15 |
-
import cv2
|
| 16 |
|
| 17 |
os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
|
| 18 |
os.environ["HF_HOME"] = "/tmp/hf_home"
|
|
@@ -22,7 +32,7 @@ from transformers import (
|
|
| 22 |
Qwen2VLForConditionalGeneration,
|
| 23 |
AutoProcessor,
|
| 24 |
TextIteratorStreamer,
|
| 25 |
-
|
| 26 |
)
|
| 27 |
|
| 28 |
try:
|
|
@@ -30,7 +40,7 @@ try:
|
|
| 30 |
PEFT_AVAILABLE = True
|
| 31 |
except:
|
| 32 |
PEFT_AVAILABLE = False
|
| 33 |
-
print("⚠️ PEFT not available
|
| 34 |
|
| 35 |
try:
|
| 36 |
from transformers import Qwen3VLForConditionalGeneration
|
|
@@ -39,68 +49,36 @@ except:
|
|
| 39 |
QWEN3_AVAILABLE = False
|
| 40 |
print("⚠️ Qwen3VL not available in current transformers version")
|
| 41 |
|
| 42 |
-
from transformers.image_utils import load_image
|
| 43 |
from gradio.themes import Soft
|
| 44 |
from gradio.themes.utils import colors, fonts, sizes
|
| 45 |
|
| 46 |
-
# ===== THEME
|
| 47 |
colors.steel_blue = colors.Color(
|
| 48 |
name="steel_blue",
|
| 49 |
-
c50="#EBF3F8",
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
c300="#7DB3D2",
|
| 53 |
-
c400="#529AC3",
|
| 54 |
-
c500="#4682B4",
|
| 55 |
-
c600="#3E72A0",
|
| 56 |
-
c700="#36638C",
|
| 57 |
-
c800="#2E5378",
|
| 58 |
-
c900="#264364",
|
| 59 |
-
c950="#1E3450",
|
| 60 |
)
|
| 61 |
|
| 62 |
class SteelBlueTheme(Soft):
|
| 63 |
-
def __init__(
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
text_size: sizes.Size | str = sizes.text_lg,
|
| 70 |
-
font: fonts.Font | str | Iterable[fonts.Font | str] = (
|
| 71 |
-
fonts.GoogleFont("Outfit"), "Arial", "sans-serif",
|
| 72 |
-
),
|
| 73 |
-
font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
|
| 74 |
-
fonts.GoogleFont("IBM Plex Mono"), "ui-monospace", "monospace",
|
| 75 |
-
),
|
| 76 |
-
):
|
| 77 |
-
super().__init__(
|
| 78 |
-
primary_hue=primary_hue,
|
| 79 |
-
secondary_hue=secondary_hue,
|
| 80 |
-
neutral_hue=neutral_hue,
|
| 81 |
-
text_size=text_size,
|
| 82 |
-
font=font,
|
| 83 |
-
font_mono=font_mono,
|
| 84 |
-
)
|
| 85 |
super().set(
|
| 86 |
background_fill_primary="*primary_50",
|
| 87 |
background_fill_primary_dark="*primary_900",
|
| 88 |
body_background_fill="linear-gradient(135deg, *primary_200, *primary_100)",
|
| 89 |
body_background_fill_dark="linear-gradient(135deg, *primary_900, *primary_800)",
|
| 90 |
button_primary_text_color="white",
|
| 91 |
-
button_primary_text_color_hover="white",
|
| 92 |
button_primary_background_fill="linear-gradient(90deg, *secondary_500, *secondary_600)",
|
| 93 |
button_primary_background_fill_hover="linear-gradient(90deg, *secondary_600, *secondary_700)",
|
| 94 |
-
button_primary_background_fill_dark="linear-gradient(90deg, *secondary_600, *secondary_800)",
|
| 95 |
-
button_primary_background_fill_hover_dark="linear-gradient(90deg, *secondary_500, *secondary_500)",
|
| 96 |
button_secondary_text_color="black",
|
| 97 |
-
button_secondary_text_color_hover="white",
|
| 98 |
button_secondary_background_fill="linear-gradient(90deg, *primary_300, *primary_300)",
|
| 99 |
button_secondary_background_fill_hover="linear-gradient(90deg, *primary_400, *primary_400)",
|
| 100 |
-
button_secondary_background_fill_dark="linear-gradient(90deg, *primary_500, *primary_600)",
|
| 101 |
-
button_secondary_background_fill_hover_dark="linear-gradient(90deg, *primary_500, *primary_500)",
|
| 102 |
slider_color="*secondary_500",
|
| 103 |
-
slider_color_dark="*secondary_600",
|
| 104 |
block_title_text_weight="600",
|
| 105 |
block_border_width="3px",
|
| 106 |
block_shadow="*shadow_drop_lg",
|
|
@@ -116,485 +94,436 @@ css = """
|
|
| 116 |
#main-title h1 { font-size: 2.3em !important; }
|
| 117 |
#output-title h2 { font-size: 2.2em !important; }
|
| 118 |
.ra-wrap{ width: fit-content; }
|
| 119 |
-
.ra-inner{
|
| 120 |
-
|
| 121 |
-
background: var(--neutral-200); border-radius: 9999px; overflow: hidden;
|
| 122 |
-
}
|
| 123 |
.ra-input{ display: none; }
|
| 124 |
-
.ra-label{
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
}
|
| 129 |
-
.ra-highlight{
|
| 130 |
-
position: absolute; z-index: 1; top: 6px; left: 6px;
|
| 131 |
-
height: calc(100% - 12px); border-radius: 9999px;
|
| 132 |
-
background: white; box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
| 133 |
-
transition: transform 0.2s, width 0.2s;
|
| 134 |
-
}
|
| 135 |
.ra-input:checked + .ra-label{ color: black; }
|
| 136 |
.dark .ra-inner { background: var(--neutral-800); }
|
| 137 |
.dark .ra-label { color: var(--neutral-400); }
|
| 138 |
.dark .ra-highlight { background: var(--neutral-600); }
|
| 139 |
.dark .ra-input:checked + .ra-label { color: white; }
|
| 140 |
-
#gpu-duration-container {
|
| 141 |
-
|
| 142 |
-
border-radius: 8px;
|
| 143 |
-
background: var(--background-fill-secondary);
|
| 144 |
-
border: 1px solid var(--border-color-primary);
|
| 145 |
-
margin-top: 10px;
|
| 146 |
-
}
|
| 147 |
"""
|
| 148 |
|
| 149 |
MAX_MAX_NEW_TOKENS = 4096
|
| 150 |
DEFAULT_MAX_NEW_TOKENS = 1024
|
| 151 |
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
| 152 |
-
|
| 153 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
| 154 |
|
| 155 |
-
print("
|
| 156 |
-
print("torch.__version__ =", torch.__version__)
|
| 157 |
-
print("torch.version.cuda =", torch.version.cuda)
|
| 158 |
-
print("cuda available:", torch.cuda.is_available())
|
| 159 |
-
print("cuda device count:", torch.cuda.device_count())
|
| 160 |
if torch.cuda.is_available():
|
| 161 |
-
print("
|
| 162 |
-
|
| 163 |
-
print("Using device:", device)
|
| 164 |
-
|
| 165 |
|
| 166 |
-
# ===== PROMPTS =====
|
| 167 |
|
| 168 |
-
|
|
|
|
|
|
|
| 169 |
|
| 170 |
-
|
| 171 |
-
- Copy ALL text EXACTLY as it appears in original language/script (Hindi, Arabic, Urdu, Chinese, Devanagari, etc.)
|
| 172 |
-
- DO NOT translate anything in this step
|
| 173 |
-
- DO NOT add any interpretation or explanation
|
| 174 |
-
- Preserve layout and line breaks exactly
|
| 175 |
-
- Extract every number, date, code, and character precisely
|
| 176 |
-
- Also detect visual element presence
|
| 177 |
-
|
| 178 |
-
Output ONLY in this exact structured format, nothing else:
|
| 179 |
|
|
|
|
| 180 |
PHOTO_PRESENT: yes/no
|
| 181 |
-
PHOTO_LOCATION: [top-left / top-right / center-left /
|
| 182 |
SIGNATURE_PRESENT: yes/no
|
| 183 |
-
SIGNATURE_LOCATION: [
|
| 184 |
MRZ_PRESENT: yes/no
|
| 185 |
-
DETECTED_LANGUAGE: [
|
| 186 |
---TEXT_START---
|
| 187 |
-
[Every
|
| 188 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
|
| 191 |
-
STEP2_TEMPLATE = """You are a multilingual KYC document expert with 95%+ translation accuracy.
|
| 192 |
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
- MRZ Present: {mrz_present}
|
| 197 |
-
- Detected Language: {detected_lang}
|
| 198 |
|
| 199 |
-
|
| 200 |
-
{raw_text}
|
| 201 |
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
2. If text is already English → copy as-is
|
| 205 |
-
3. Extract all key KYC fields
|
| 206 |
-
4. Output EXACTLY in the format below
|
| 207 |
-
|
| 208 |
-
⚠️ CRITICAL EXTRACTION RULES — READ BEFORE EXTRACTING:
|
| 209 |
-
|
| 210 |
-
RULE 1 — COUNTRY/INSTITUTION vs PERSON NAME:
|
| 211 |
-
- Text appearing at the TOP of ID cards like "Sultanate of Oman", "SULTANATE OF OMAN",
|
| 212 |
-
"Republic of India", "United Arab Emirates", "ROYAL OMAN POLICE" etc. is the
|
| 213 |
-
ISSUING COUNTRY or INSTITUTION NAME — THIS IS NOT THE PERSON'S NAME
|
| 214 |
-
- Extract person's name ONLY from explicit name labels:
|
| 215 |
-
الإسم / الاسم (Arabic) | NAME: | 姓名 (Chinese) | नाम (Hindi) | ИМЯ (Russian)
|
| 216 |
-
- In MRZ: TD1 Line 3 = person's name (e.g., FERIL<SUNNA = "Feril Sunna")
|
| 217 |
-
|
| 218 |
-
RULE 2 — CIVIL ID vs BARCODE/CHIP ID:
|
| 219 |
-
- Long hex strings printed on barcodes/chips (e.g., 7E400DD3D032A7C) are card
|
| 220 |
-
SERIAL/CHIP numbers — NOT the Civil ID
|
| 221 |
-
- The actual Civil/Document ID is under labels:
|
| 222 |
-
الرقم المدني (Civil No.) | رقم الهوية (ID No.) | ID NO. | CIVIL NO.
|
| 223 |
-
- Actual Civil ID is typically 8-12 alphanumeric characters (e.g., 73616576)
|
| 224 |
-
|
| 225 |
-
RULE 3 — MRZ IS GROUND TRUTH (do not override it):
|
| 226 |
-
- MRZ lines (uppercase A-Z, 0-9, < characters) are cryptographically verified
|
| 227 |
-
- MRZ date format is YYMMDD: first 2 = year, middle 2 = month, last 2 = day
|
| 228 |
-
Example: 030512 = year 03 → 2003, month 05, day 12 → 12/05/2003
|
| 229 |
-
Example: 260908 = year 26 → 2026, month 09, day 08 → 08/09/2026
|
| 230 |
-
- MRZ Sex: M = Male, F = Female
|
| 231 |
-
- If MRZ present, extract name/DOB/sex/expiry/nationality FROM MRZ LINES, not from visual text
|
| 232 |
|
| 233 |
-
|
|
|
|
| 234 |
|
| 235 |
-
|
| 236 |
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
| 📷 Profile Photo | {photo_present} | {photo_location} |
|
| 240 |
-
| ✍️ Signature | {sig_present} | {sig_location} |
|
| 241 |
-
| 🔐 MRZ Zone | {mrz_present} | Bottom strip |
|
| 242 |
|
| 243 |
-
-
|
|
|
|
| 244 |
|
| 245 |
-
|
| 246 |
|
| 247 |
-
|
|
|
|
|
|
|
|
|
|
| 248 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 249 |
|
| 250 |
-
-
|
|
|
|
|
|
|
|
|
|
| 251 |
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
|
|
|
|
|
|
| 255 |
|
|
|
|
| 256 |
|
| 257 |
---
|
| 258 |
|
| 259 |
-
##
|
| 260 |
-
|
| 261 |
-
|
|
| 262 |
-
|---
|
| 263 |
-
|
|
| 264 |
-
|
|
| 265 |
-
|
|
| 266 |
-
| 🎂 Date of Birth | |
|
| 267 |
-
| 📅 Issue Date | |
|
| 268 |
-
| ⏳ Expiry Date | |
|
| 269 |
-
| 🌍 Nationality | |
|
| 270 |
-
| ⚧️ Gender | |
|
| 271 |
-
| 🏠 Address | |
|
| 272 |
-
| 👨 Father / Guardian | |
|
| 273 |
-
| 🏛️ Issuing Authority | |
|
| 274 |
|
| 275 |
---
|
| 276 |
|
| 277 |
-
##
|
|
|
|
|
|
|
|
|
|
|
|
|
| 278 |
|
| 279 |
-
|
| 280 |
|
|
|
|
| 281 |
|
| 282 |
-
|
| 283 |
-
|
|
| 284 |
-
|
|
| 285 |
-
| Document Type | |
|
| 286 |
-
| Country Code | |
|
| 287 |
-
| Document Number | |
|
| 288 |
-
| Date of Birth | |
|
| 289 |
-
| Expiry Date | |
|
| 290 |
-
| Nationality | |
|
| 291 |
-
| Sex | |
|
| 292 |
|
| 293 |
---"""
|
| 294 |
|
| 295 |
|
| 296 |
-
#
|
|
|
|
|
|
|
| 297 |
|
| 298 |
print("\n" + "="*70)
|
| 299 |
-
print("🚀 LOADING
|
| 300 |
-
print("="*70
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 301 |
|
| 302 |
-
# Model 1: Chhagan_ML-VL-OCR-v1 (LoRA
|
| 303 |
-
print("
|
| 304 |
MODEL_ID_C1 = "Chhagan005/Chhagan_ML-VL-OCR-v1"
|
| 305 |
CHHAGAN_V1_AVAILABLE = False
|
| 306 |
-
processor_c1 = None
|
| 307 |
-
model_c1 = None
|
| 308 |
|
| 309 |
if PEFT_AVAILABLE:
|
| 310 |
try:
|
| 311 |
try:
|
| 312 |
config = PeftConfig.from_pretrained(MODEL_ID_C1)
|
| 313 |
-
|
| 314 |
-
print(f" Base model from config: {base_model_id}")
|
| 315 |
except:
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
torch_dtype=torch.float16,
|
| 323 |
-
device_map="auto",
|
| 324 |
-
trust_remote_code=True
|
| 325 |
-
)
|
| 326 |
-
model_c1 = PeftModel.from_pretrained(base_model_c1, MODEL_ID_C1)
|
| 327 |
-
model_c1 = model_c1.to(device).eval()
|
| 328 |
-
print(" ✅ Chhagan_ML-VL-OCR-v1 loaded successfully!")
|
| 329 |
CHHAGAN_V1_AVAILABLE = True
|
| 330 |
except Exception as e:
|
| 331 |
-
print(f" ❌
|
| 332 |
else:
|
| 333 |
-
print(" ⚠️ PEFT not available
|
| 334 |
|
| 335 |
-
# Model 2: Chhagan-DocVL-Qwen3
|
| 336 |
-
print("\n2️⃣
|
| 337 |
MODEL_ID_C2 = "Chhagan005/Chhagan-DocVL-Qwen3"
|
| 338 |
CHHAGAN_QWEN3_AVAILABLE = False
|
| 339 |
-
processor_c2 = None
|
| 340 |
-
model_c2 = None
|
| 341 |
|
| 342 |
if QWEN3_AVAILABLE:
|
| 343 |
try:
|
| 344 |
try:
|
| 345 |
if PEFT_AVAILABLE:
|
| 346 |
config = PeftConfig.from_pretrained(MODEL_ID_C2)
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
torch_dtype=torch.float16,
|
| 353 |
-
device_map="auto",
|
| 354 |
-
trust_remote_code=True
|
| 355 |
-
)
|
| 356 |
-
model_c2 = PeftModel.from_pretrained(base_model_c2, MODEL_ID_C2)
|
| 357 |
-
model_c2 = model_c2.to(device).eval()
|
| 358 |
else:
|
| 359 |
-
raise Exception("
|
| 360 |
except:
|
| 361 |
-
print(" Loading as full fine-tuned
|
| 362 |
processor_c2 = AutoProcessor.from_pretrained(MODEL_ID_C2, trust_remote_code=True)
|
| 363 |
model_c2 = Qwen3VLForConditionalGeneration.from_pretrained(
|
| 364 |
-
MODEL_ID_C2,
|
| 365 |
-
|
| 366 |
-
torch_dtype=torch.float16,
|
| 367 |
-
device_map="auto",
|
| 368 |
-
trust_remote_code=True
|
| 369 |
).to(device).eval()
|
| 370 |
-
print(" ✅
|
| 371 |
CHHAGAN_QWEN3_AVAILABLE = True
|
| 372 |
except Exception as e:
|
| 373 |
-
print(f" ❌
|
| 374 |
else:
|
| 375 |
-
print(" ⚠️ Qwen3VL not
|
| 376 |
|
| 377 |
-
# Model 3:
|
| 378 |
-
print("\n3️⃣
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
model_q3 = None
|
| 383 |
|
| 384 |
-
|
|
|
|
|
|
|
| 385 |
try:
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
attn_implementation="flash_attention_2",
|
| 390 |
torch_dtype=torch.float16,
|
| 391 |
device_map="auto",
|
| 392 |
-
trust_remote_code=True
|
| 393 |
-
).
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 400 |
|
| 401 |
-
# Model 4:
|
| 402 |
-
print("\n4️⃣
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
model_v = None
|
| 407 |
|
| 408 |
try:
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 418 |
except Exception as e:
|
| 419 |
-
print(f" ❌
|
| 420 |
|
| 421 |
print("\n" + "="*70)
|
| 422 |
-
print("📊 MODEL STATUS
|
| 423 |
print("="*70)
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
|
|
|
|
|
|
|
| 430 |
print("="*70)
|
| 431 |
-
|
| 432 |
-
print(f"
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
#
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
const inputs = Array.from(element.querySelectorAll('.ra-input'));
|
| 466 |
-
if (!inputs.length) return;
|
| 467 |
-
const choices = inputs.map(i => i.value);
|
| 468 |
-
function setHighlightByIndex(idx) {
|
| 469 |
-
const n = choices.length;
|
| 470 |
-
const pct = 100 / n;
|
| 471 |
-
highlight.style.width = `calc(${pct}% - 6px)`;
|
| 472 |
-
highlight.style.transform = `translateX(${idx * 100}%)`;
|
| 473 |
-
}
|
| 474 |
-
function setCheckedByValue(val, shouldTrigger=false) {
|
| 475 |
-
const idx = Math.max(0, choices.indexOf(val));
|
| 476 |
-
inputs.forEach((inp, i) => { inp.checked = (i === idx); });
|
| 477 |
-
setHighlightByIndex(idx);
|
| 478 |
-
props.value = choices[idx];
|
| 479 |
-
if (shouldTrigger) trigger('change', props.value);
|
| 480 |
-
}
|
| 481 |
-
setCheckedByValue(props.value ?? choices[0], false);
|
| 482 |
-
inputs.forEach((inp) => {
|
| 483 |
-
inp.addEventListener('change', () => {
|
| 484 |
-
setCheckedByValue(inp.value, true);
|
| 485 |
-
});
|
| 486 |
-
});
|
| 487 |
-
})();
|
| 488 |
-
"""
|
| 489 |
-
super().__init__(
|
| 490 |
-
value=value,
|
| 491 |
-
html_template=html_template,
|
| 492 |
-
js_on_load=js_on_load,
|
| 493 |
-
**kwargs
|
| 494 |
-
)
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
def apply_gpu_duration(val: str):
|
| 498 |
-
return int(val)
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
def calc_timeout_duration(model_name, text, image_front, image_back,
|
| 502 |
-
max_new_tokens, temperature, top_p,
|
| 503 |
-
top_k, repetition_penalty, gpu_timeout):
|
| 504 |
try:
|
| 505 |
-
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 512 |
|
| 513 |
-
# ===== STEP 1: RAW EXTRACTION (NO TRANSLATION) =====
|
| 514 |
|
| 515 |
-
def
|
| 516 |
-
|
| 517 |
-
"role": "user",
|
| 518 |
-
"content": [
|
| 519 |
-
{"type": "image"},
|
| 520 |
-
{"type": "text", "text": STEP1_EXTRACT_PROMPT},
|
| 521 |
-
]
|
| 522 |
-
}]
|
| 523 |
try:
|
| 524 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 525 |
except:
|
| 526 |
-
|
| 527 |
-
|
| 528 |
-
inputs = processor(
|
| 529 |
-
text=[prompt],
|
| 530 |
-
images=[image],
|
| 531 |
-
return_tensors="pt",
|
| 532 |
-
padding=True
|
| 533 |
-
).to(device)
|
| 534 |
-
|
| 535 |
-
with torch.no_grad():
|
| 536 |
-
output_ids = model.generate(
|
| 537 |
-
**inputs,
|
| 538 |
-
max_new_tokens=512,
|
| 539 |
-
do_sample=True,
|
| 540 |
-
temperature=temperature,
|
| 541 |
-
top_p=top_p,
|
| 542 |
-
top_k=top_k,
|
| 543 |
-
repetition_penalty=repetition_penalty,
|
| 544 |
-
)
|
| 545 |
-
input_len = inputs['input_ids'].shape[1]
|
| 546 |
-
generated = output_ids[:, input_len:]
|
| 547 |
-
return processor.batch_decode(generated, skip_special_tokens=True)[0]
|
| 548 |
-
|
| 549 |
-
|
| 550 |
-
# ===== PARSE STEP 1 OUTPUT =====
|
| 551 |
|
| 552 |
-
def parse_step1_output(raw_output: str) -> dict:
|
| 553 |
-
result = {
|
| 554 |
-
"photo_present": "❌ Not detected",
|
| 555 |
-
"photo_location": "N/A",
|
| 556 |
-
"sig_present": "❌ Not detected",
|
| 557 |
-
"sig_location": "N/A",
|
| 558 |
-
"mrz_present": "❌ Not detected",
|
| 559 |
-
"detected_lang": "Unknown",
|
| 560 |
-
"original_text": raw_output
|
| 561 |
-
}
|
| 562 |
-
|
| 563 |
-
def extract_field(pattern, text, default="N/A"):
|
| 564 |
-
match = re.search(pattern, text, re.IGNORECASE)
|
| 565 |
-
return match.group(1).strip() if match else default
|
| 566 |
-
|
| 567 |
-
photo = extract_field(r"PHOTO_PRESENT:\s*(yes|no)", raw_output)
|
| 568 |
-
result["photo_present"] = "✅ Yes" if photo.lower() == "yes" else "❌ No"
|
| 569 |
-
result["photo_location"] = extract_field(r"PHOTO_LOCATION:\s*([^\n]+)", raw_output)
|
| 570 |
-
|
| 571 |
-
sig = extract_field(r"SIGNATURE_PRESENT:\s*(yes|no)", raw_output)
|
| 572 |
-
result["sig_present"] = "✅ Yes" if sig.lower() == "yes" else "❌ No"
|
| 573 |
-
result["sig_location"] = extract_field(r"SIGNATURE_LOCATION:\s*([^\n]+)", raw_output)
|
| 574 |
-
|
| 575 |
-
mrz = extract_field(r"MRZ_PRESENT:\s*(yes|no)", raw_output)
|
| 576 |
-
result["mrz_present"] = "✅ Yes" if mrz.lower() == "yes" else "❌ No"
|
| 577 |
-
result["detected_lang"] = extract_field(r"DETECTED_LANGUAGE:\s*([^\n]+)", raw_output, "Unknown")
|
| 578 |
-
|
| 579 |
-
text_match = re.search(r"---TEXT_START---\n?(.*?)---TEXT_END---", raw_output, re.DOTALL)
|
| 580 |
-
if text_match:
|
| 581 |
-
result["original_text"] = text_match.group(1).strip()
|
| 582 |
|
| 583 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 584 |
|
| 585 |
|
| 586 |
def parse_mrz_lines(raw_text: str) -> dict:
|
| 587 |
-
"""
|
| 588 |
-
|
| 589 |
-
|
| 590 |
-
Returns verified dict. Does NOT rely on LLM for date/sex/name parsing.
|
| 591 |
-
"""
|
| 592 |
-
import datetime
|
| 593 |
|
| 594 |
lines = []
|
| 595 |
for line in raw_text.split('\n'):
|
| 596 |
clean = re.sub(r'\s+', '', line.strip())
|
| 597 |
-
if re.match(r'^[A-Z0-9<]{
|
| 598 |
lines.append(clean)
|
| 599 |
|
| 600 |
if not lines:
|
|
@@ -602,169 +531,428 @@ def parse_mrz_lines(raw_text: str) -> dict:
|
|
| 602 |
|
| 603 |
def decode_date(yymmdd: str, is_dob: bool = False) -> str:
|
| 604 |
try:
|
| 605 |
-
yy = int(yymmdd[0:2])
|
| 606 |
-
mm = int(yymmdd[2:4])
|
| 607 |
-
dd = int(yymmdd[4:6])
|
| 608 |
if not (1 <= mm <= 12 and 1 <= dd <= 31):
|
| 609 |
return f"Invalid ({yymmdd})"
|
| 610 |
-
|
| 611 |
-
year = (1900 + yy) if (is_dob and yy >
|
| 612 |
return f"{dd:02d}/{mm:02d}/{year}"
|
| 613 |
except:
|
| 614 |
return yymmdd
|
| 615 |
|
| 616 |
-
def
|
| 617 |
return re.sub(r'<+$', '', s).replace('<', ' ').strip()
|
| 618 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 619 |
result = {}
|
| 620 |
|
| 621 |
-
# TD1: 3 lines, 28-
|
| 622 |
td1 = [l for l in lines if 28 <= len(l) <= 36]
|
| 623 |
if len(td1) >= 2:
|
| 624 |
l1, l2 = td1[0], td1[1]
|
| 625 |
l3 = td1[2] if len(td1) > 2 else ""
|
| 626 |
-
|
| 627 |
-
|
| 628 |
-
|
| 629 |
-
|
| 630 |
-
result['
|
| 631 |
-
|
| 632 |
-
|
| 633 |
-
result['
|
| 634 |
-
|
| 635 |
-
result['sex'] = 'Male' if sex_char == 'M' else ('Female' if sex_char == 'F' else sex_char)
|
| 636 |
-
if len(l2) >= 14:
|
| 637 |
-
result['expiry'] = decode_date(l2[8:14], is_dob=False)
|
| 638 |
-
if len(l2) >= 18:
|
| 639 |
-
result['nationality'] = clean_field(l2[15:18])
|
| 640 |
-
|
| 641 |
if l3:
|
| 642 |
-
|
| 643 |
-
if '<<' in name_clean:
|
| 644 |
-
parts = name_clean.split('<<')
|
| 645 |
-
surname = parts[0].replace('<', ' ').strip()
|
| 646 |
-
given = parts[1].replace('<', ' ').strip() if len(parts) > 1 else ''
|
| 647 |
-
result['name'] = f"{given} {surname}".strip() if given else surname
|
| 648 |
-
else:
|
| 649 |
-
result['name'] = name_clean.replace('<', ' ').strip()
|
| 650 |
-
|
| 651 |
result['mrz_format'] = 'TD1'
|
| 652 |
return result
|
| 653 |
|
| 654 |
-
# TD3: 2 lines, 40-48 chars
|
| 655 |
td3 = [l for l in lines if 40 <= len(l) <= 48]
|
| 656 |
if len(td3) >= 2:
|
| 657 |
l1, l2 = td3[0], td3[1]
|
| 658 |
-
|
| 659 |
-
|
| 660 |
-
|
| 661 |
-
result['country_code'] = clean_field(l1[2:5])
|
| 662 |
-
name_section = l1[5:min(44, len(l1))]
|
| 663 |
-
if '<<' in name_section:
|
| 664 |
-
parts = name_section.split('<<')
|
| 665 |
-
surname = parts[0].replace('<', ' ').strip()
|
| 666 |
-
given = parts[1].replace('<', ' ').strip() if len(parts) > 1 else ''
|
| 667 |
-
result['name'] = f"{given} {surname}".strip() if given else surname
|
| 668 |
-
else:
|
| 669 |
-
result['name'] = name_section.replace('<', ' ').strip()
|
| 670 |
-
|
| 671 |
if len(l2) >= 27:
|
| 672 |
-
result['doc_number']
|
| 673 |
-
result['nationality']
|
| 674 |
-
result['dob']
|
| 675 |
-
|
| 676 |
-
result['sex']
|
| 677 |
-
result['expiry']
|
| 678 |
-
|
| 679 |
result['mrz_format'] = 'TD3'
|
| 680 |
return result
|
| 681 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 682 |
return {}
|
| 683 |
|
| 684 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 685 |
|
| 686 |
def run_step2_structure(model, processor, metadata: dict, device,
|
| 687 |
max_new_tokens, temperature, top_p, top_k, repetition_penalty):
|
| 688 |
-
|
| 689 |
-
|
| 690 |
-
|
| 691 |
-
|
| 692 |
-
|
| 693 |
-
|
| 694 |
-
|
| 695 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 696 |
)
|
| 697 |
|
| 698 |
-
messages = [{"role": "user", "content": [{"type": "text", "text":
|
| 699 |
try:
|
| 700 |
prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
| 701 |
except:
|
| 702 |
-
prompt =
|
| 703 |
|
| 704 |
inputs = processor(text=[prompt], return_tensors="pt", padding=True).to(device)
|
| 705 |
|
| 706 |
streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
|
| 707 |
gen_kwargs = {
|
| 708 |
-
**inputs,
|
| 709 |
-
"
|
| 710 |
-
"
|
| 711 |
-
"do_sample": True,
|
| 712 |
-
"temperature": temperature,
|
| 713 |
-
"top_p": top_p,
|
| 714 |
-
"top_k": top_k,
|
| 715 |
-
"repetition_penalty": repetition_penalty,
|
| 716 |
}
|
| 717 |
thread = Thread(target=model.generate, kwargs=gen_kwargs)
|
| 718 |
thread.start()
|
| 719 |
-
return streamer, thread
|
| 720 |
|
|
|
|
|
|
|
| 721 |
|
| 722 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 723 |
|
| 724 |
-
|
| 725 |
-
summary = "## 🔄 Unified Deduplicated Record\n\n"
|
| 726 |
-
summary += "> *Unique fields from both sides merged. Conflicts flagged with ⚠️.*\n\n"
|
| 727 |
|
| 728 |
-
|
| 729 |
-
|
| 730 |
-
table_match = re.search(
|
| 731 |
-
r"## 🗂️ Key Fields.*?\n\|.*?\n\|[-| ]+\n(.*?)(?=\n---|\Z)", text, re.DOTALL
|
| 732 |
-
)
|
| 733 |
-
if table_match:
|
| 734 |
-
for line in table_match.group(1).strip().split("\n"):
|
| 735 |
-
parts = [p.strip() for p in line.split("|") if p.strip()]
|
| 736 |
-
if len(parts) >= 2:
|
| 737 |
-
field = re.sub(r"[^\w\s/]", "", parts[0]).strip()
|
| 738 |
-
value = parts[1].strip()
|
| 739 |
-
if value and value != "—":
|
| 740 |
-
rows[field] = value
|
| 741 |
-
return rows
|
| 742 |
|
| 743 |
-
|
| 744 |
-
back_fields = extract_table_rows(back_result)
|
| 745 |
-
all_fields = list(dict.fromkeys(list(front_fields.keys()) + list(back_fields.keys())))
|
| 746 |
|
| 747 |
-
|
| 748 |
-
summary += "|-------|-------|--------|\n"
|
| 749 |
|
| 750 |
-
|
| 751 |
-
f_val = front_fields.get(field, "")
|
| 752 |
-
b_val = back_fields.get(field, "")
|
| 753 |
|
| 754 |
-
|
| 755 |
-
|
| 756 |
-
summary += f"| {field} | {f_val} | Front + Back ✅ |\n"
|
| 757 |
-
else:
|
| 758 |
-
summary += f"| {field} | Front: **{f_val}** / Back: **{b_val}** | ⚠️ Mismatch |\n"
|
| 759 |
-
elif f_val:
|
| 760 |
-
summary += f"| {field} | {f_val} | Front only |\n"
|
| 761 |
-
elif b_val:
|
| 762 |
-
summary += f"| {field} | {b_val} | Back only |\n"
|
| 763 |
|
| 764 |
-
|
| 765 |
|
|
|
|
|
|
|
| 766 |
|
| 767 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 768 |
|
| 769 |
@spaces.GPU(duration=calc_timeout_duration)
|
| 770 |
def generate_dual_card_ocr(model_name: str, text: str,
|
|
@@ -773,69 +961,57 @@ def generate_dual_card_ocr(model_name: str, text: str,
|
|
| 773 |
top_k: int, repetition_penalty: float, gpu_timeout: int):
|
| 774 |
|
| 775 |
# Model selection
|
| 776 |
-
|
| 777 |
-
|
| 778 |
-
|
| 779 |
-
|
| 780 |
-
|
| 781 |
-
|
| 782 |
-
elif model_name == "Chhagan-DocVL-Qwen3 🔥":
|
| 783 |
-
if not CHHAGAN_QWEN3_AVAILABLE:
|
| 784 |
-
yield "Chhagan-DocVL-Qwen3 model is not available.", "Chhagan-DocVL-Qwen3 model is not available."
|
| 785 |
-
return
|
| 786 |
-
processor, model = processor_c2, model_c2
|
| 787 |
-
|
| 788 |
-
elif model_name == "Qwen3-VL-2B (Baseline) 📊":
|
| 789 |
-
if not QWEN3_BASELINE_AVAILABLE:
|
| 790 |
-
yield "Qwen3-VL-2B-Instruct baseline model is not available.", "Qwen3-VL-2B-Instruct baseline model is not available."
|
| 791 |
-
return
|
| 792 |
-
processor, model = processor_q3, model_q3
|
| 793 |
-
|
| 794 |
-
elif model_name == "Nanonets-OCR2-3B":
|
| 795 |
-
if not NANONETS_AVAILABLE:
|
| 796 |
-
yield "Nanonets-OCR2-3B model is not available.", "Nanonets-OCR2-3B model is not available."
|
| 797 |
-
return
|
| 798 |
-
processor, model = processor_v, model_v
|
| 799 |
|
| 800 |
-
|
| 801 |
-
yield "Invalid model
|
| 802 |
-
|
|
|
|
|
|
|
|
|
|
| 803 |
|
| 804 |
if image_front is None and image_back is None:
|
| 805 |
-
yield "Please upload at least one card image
|
| 806 |
-
return
|
| 807 |
|
| 808 |
full_output = ""
|
| 809 |
front_result = ""
|
| 810 |
back_result = ""
|
| 811 |
-
|
| 812 |
-
|
|
|
|
| 813 |
|
| 814 |
-
#
|
| 815 |
if image_front is not None:
|
| 816 |
full_output += "# 🎴 FRONT CARD\n\n"
|
| 817 |
-
full_output += "⏳ **Step 1
|
| 818 |
yield full_output, full_output
|
| 819 |
|
| 820 |
-
step1_raw = run_step1_extraction(
|
| 821 |
-
|
| 822 |
-
temperature, top_p, top_k, repetition_penalty
|
| 823 |
-
)
|
| 824 |
front_meta = parse_step1_output(step1_raw)
|
|
|
|
| 825 |
|
| 826 |
-
full_output += f"✅ **Step 1
|
| 827 |
-
full_output += "⏳ **Step 2
|
| 828 |
yield full_output, full_output
|
| 829 |
|
| 830 |
-
streamer_f, thread_f = run_step2_structure(
|
| 831 |
model, processor, front_meta, device,
|
| 832 |
-
max_new_tokens, temperature, top_p, top_k, repetition_penalty
|
| 833 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 834 |
|
| 835 |
-
buffer_f = ""
|
| 836 |
for new_text in streamer_f:
|
| 837 |
-
buffer_f += new_text
|
| 838 |
-
buffer_f = buffer_f.replace("<|im_end|>", "").replace("<|endoftext|>", "")
|
| 839 |
time.sleep(0.01)
|
| 840 |
yield full_output + buffer_f, full_output + buffer_f
|
| 841 |
|
|
@@ -843,31 +1019,33 @@ def generate_dual_card_ocr(model_name: str, text: str,
|
|
| 843 |
front_result = buffer_f
|
| 844 |
thread_f.join()
|
| 845 |
|
| 846 |
-
#
|
| 847 |
if image_back is not None:
|
| 848 |
full_output += "\n\n---\n\n# 🎴 BACK CARD\n\n"
|
| 849 |
-
full_output += "⏳ **Step 1
|
| 850 |
yield full_output, full_output
|
| 851 |
|
| 852 |
-
step1_raw_back = run_step1_extraction(
|
| 853 |
-
|
| 854 |
-
temperature, top_p, top_k, repetition_penalty
|
| 855 |
-
)
|
| 856 |
back_meta = parse_step1_output(step1_raw_back)
|
|
|
|
| 857 |
|
| 858 |
-
full_output += f"✅ **Step 1
|
| 859 |
-
full_output += "⏳ **Step 2
|
| 860 |
yield full_output, full_output
|
| 861 |
|
| 862 |
-
streamer_b, thread_b = run_step2_structure(
|
| 863 |
model, processor, back_meta, device,
|
| 864 |
-
max_new_tokens, temperature, top_p, top_k, repetition_penalty
|
| 865 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 866 |
|
| 867 |
-
buffer_b = ""
|
| 868 |
for new_text in streamer_b:
|
| 869 |
-
buffer_b += new_text
|
| 870 |
-
buffer_b = buffer_b.replace("<|im_end|>", "").replace("<|endoftext|>", "")
|
| 871 |
time.sleep(0.01)
|
| 872 |
yield full_output + buffer_b, full_output + buffer_b
|
| 873 |
|
|
@@ -875,81 +1053,64 @@ def generate_dual_card_ocr(model_name: str, text: str,
|
|
| 875 |
back_result = buffer_b
|
| 876 |
thread_b.join()
|
| 877 |
|
| 878 |
-
|
| 879 |
-
# ← NEW BLOCK: Try back card first (MRZ usually on back), then front
|
| 880 |
-
mrz_data = {}
|
| 881 |
-
if back_meta_saved:
|
| 882 |
-
mrz_data = parse_mrz_lines(back_meta_saved.get('original_text', ''))
|
| 883 |
-
if not mrz_data and front_meta_saved:
|
| 884 |
-
mrz_data = parse_mrz_lines(front_meta_saved.get('original_text', ''))
|
| 885 |
-
|
| 886 |
-
if mrz_data:
|
| 887 |
-
full_output += f"\n\n> ✅ **MRZ Python-parsed successfully** ({mrz_data.get('mrz_format','?')} format) — ground truth applied to summary below.\n"
|
| 888 |
-
|
| 889 |
-
# ===== UNIFIED SUMMARY (only when both sides uploaded) =====
|
| 890 |
if image_front is not None and image_back is not None:
|
| 891 |
full_output += "\n\n---\n\n"
|
| 892 |
-
full_output += build_unified_summary(front_result, back_result)
|
| 893 |
|
| 894 |
-
|
|
|
|
| 895 |
yield full_output, full_output
|
| 896 |
|
| 897 |
|
| 898 |
-
#
|
|
|
|
|
|
|
| 899 |
|
| 900 |
model_choices = []
|
| 901 |
-
if CHHAGAN_V1_AVAILABLE:
|
| 902 |
-
|
| 903 |
-
if
|
| 904 |
-
|
| 905 |
-
if
|
| 906 |
-
model_choices.append("Qwen3-VL-2B (Baseline) 📊")
|
| 907 |
-
if NANONETS_AVAILABLE:
|
| 908 |
-
model_choices.append("Nanonets-OCR2-3B")
|
| 909 |
-
|
| 910 |
-
if not model_choices:
|
| 911 |
-
model_choices = ["No models available"]
|
| 912 |
|
| 913 |
dual_card_examples = [
|
| 914 |
-
["Extract complete information
|
| 915 |
-
["Multilingual OCR with MRZ
|
| 916 |
-
["Extract profile photo and signature
|
| 917 |
]
|
| 918 |
|
| 919 |
|
| 920 |
-
#
|
|
|
|
|
|
|
| 921 |
|
| 922 |
demo = gr.Blocks(css=css, theme=steel_blue_theme)
|
| 923 |
with demo:
|
| 924 |
-
gr.Markdown("# 🌍 **
|
| 925 |
-
gr.Markdown("### *
|
| 926 |
|
| 927 |
loaded_models = []
|
| 928 |
-
if CHHAGAN_V1_AVAILABLE:
|
| 929 |
-
|
| 930 |
-
if
|
| 931 |
-
|
| 932 |
-
|
| 933 |
-
|
| 934 |
-
if NANONETS_AVAILABLE:
|
| 935 |
-
loaded_models.append("Nanonets")
|
| 936 |
-
|
| 937 |
-
model_info = f"**Loaded Models ({len(loaded_models)}/4):** {', '.join(loaded_models)}" if loaded_models else "⚠️ No models loaded"
|
| 938 |
gr.Markdown(f"**Status:** {model_info}")
|
| 939 |
-
gr.Markdown("**Pipeline:** ✅
|
| 940 |
|
| 941 |
with gr.Row():
|
| 942 |
with gr.Column(scale=2):
|
| 943 |
image_query = gr.Textbox(
|
| 944 |
label="💬 Custom Query (Optional)",
|
| 945 |
-
placeholder="Leave empty for automatic full extraction
|
| 946 |
value=""
|
| 947 |
)
|
| 948 |
-
|
| 949 |
gr.Markdown("### 📤 Upload ID Cards")
|
| 950 |
with gr.Row():
|
| 951 |
image_front = gr.Image(type="pil", label="🎴 Front Card", height=250)
|
| 952 |
-
image_back
|
| 953 |
|
| 954 |
image_submit = gr.Button("🚀 Extract + Translate + Structure", variant="primary", size="lg")
|
| 955 |
|
|
@@ -960,23 +1121,23 @@ with demo:
|
|
| 960 |
)
|
| 961 |
|
| 962 |
with gr.Accordion("⚙️ Advanced Settings", open=False):
|
| 963 |
-
max_new_tokens
|
| 964 |
-
temperature
|
| 965 |
-
top_p
|
| 966 |
-
top_k
|
| 967 |
-
repetition_penalty
|
| 968 |
|
| 969 |
with gr.Column(scale=3):
|
| 970 |
gr.Markdown("## 📄 Extraction Results", elem_id="output-title")
|
| 971 |
output = gr.Textbox(label="Raw Output (Streaming)", interactive=True, lines=15)
|
| 972 |
-
with gr.Accordion("📝
|
| 973 |
markdown_output = gr.Markdown(label="Formatted Result")
|
| 974 |
|
| 975 |
model_choice = gr.Radio(
|
| 976 |
choices=model_choices,
|
| 977 |
-
label="🤖 Select
|
| 978 |
value=model_choices[0] if model_choices else None,
|
| 979 |
-
info="
|
| 980 |
)
|
| 981 |
|
| 982 |
with gr.Row(elem_id="gpu-duration-container"):
|
|
@@ -984,21 +1145,20 @@ with demo:
|
|
| 984 |
gr.Markdown("**⏱️ GPU Duration (seconds)**")
|
| 985 |
radioanimated_gpu_duration = RadioAnimated(
|
| 986 |
choices=["60", "90", "120", "180", "240"],
|
| 987 |
-
value="
|
| 988 |
elem_id="radioanimated_gpu_duration"
|
| 989 |
)
|
| 990 |
-
gpu_duration_state = gr.Number(value=
|
| 991 |
|
| 992 |
gr.Markdown("""
|
| 993 |
-
|
| 994 |
-
|
| 995 |
-
|
| 996 |
-
|
| 997 |
-
|
| 998 |
-
|
| 999 |
-
|
| 1000 |
-
|
| 1001 |
-
""")
|
| 1002 |
|
| 1003 |
radioanimated_gpu_duration.change(
|
| 1004 |
fn=apply_gpu_duration,
|
|
@@ -1009,62 +1169,42 @@ with demo:
|
|
| 1009 |
|
| 1010 |
image_submit.click(
|
| 1011 |
fn=generate_dual_card_ocr,
|
| 1012 |
-
inputs=[
|
| 1013 |
-
|
| 1014 |
-
|
| 1015 |
-
max_new_tokens, temperature, top_p,
|
| 1016 |
-
top_k, repetition_penalty, gpu_duration_state
|
| 1017 |
-
],
|
| 1018 |
outputs=[output, markdown_output]
|
| 1019 |
)
|
| 1020 |
|
| 1021 |
gr.Markdown("""
|
| 1022 |
-
|
| 1023 |
-
|
| 1024 |
-
|
| 1025 |
-
|
| 1026 |
-
|
| 1027 |
-
|
| 1028 |
-
|
| 1029 |
-
|
| 1030 |
-
|
| 1031 |
-
|
| 1032 |
-
|
| 1033 |
-
|
| 1034 |
-
|
| 1035 |
-
|
| 1036 |
-
|
| 1037 |
-
|
| 1038 |
-
|
| 1039 |
-
|
| 1040 |
-
|
| 1041 |
-
|
| 1042 |
-
|
| 1043 |
-
|
| 1044 |
-
### 🔒 Privacy
|
| 1045 |
-
- All processing on-device (GPU)
|
| 1046 |
-
- No data stored or transmitted
|
| 1047 |
-
- GDPR compliant
|
| 1048 |
-
|
| 1049 |
-
**💡 Pro Tip**: Upload both front and back for full deduplication and MRZ cross-validation!
|
| 1050 |
-
""")
|
| 1051 |
|
| 1052 |
|
| 1053 |
if __name__ == "__main__":
|
| 1054 |
-
print("\n
|
| 1055 |
-
print("🚀 STARTING GRADIO INTERFACE...")
|
| 1056 |
-
print("="*70 + "\n")
|
| 1057 |
try:
|
| 1058 |
demo.queue(max_size=50).launch(
|
| 1059 |
-
server_name="0.0.0.0",
|
| 1060 |
-
server_port=7860,
|
| 1061 |
-
show_error=True,
|
| 1062 |
-
share=False
|
| 1063 |
-
)
|
| 1064 |
-
print("✅ Gradio app launched successfully!")
|
| 1065 |
except Exception as e:
|
| 1066 |
-
print(f"❌ Launch error: {e}")
|
| 1067 |
import traceback
|
|
|
|
| 1068 |
traceback.print_exc()
|
| 1069 |
-
|
| 1070 |
-
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
╔══════════════════════════════════════════════════════════════════╗
|
| 3 |
+
║ CSM DUAL-CARD ID OCR SYSTEM — ARCHITECTURE NOTE ║
|
| 4 |
+
╠══════════════════════════════════════════════════════════════════╣
|
| 5 |
+
║ MODEL TASKS (8B VLM): ║
|
| 6 |
+
║ Step 1 → Raw OCR: All text, original script, no translate ║
|
| 7 |
+
║ Step 2 → Doc classify + non-English gap fill only ║
|
| 8 |
+
║ PYTHON TASKS (Authoritative): ║
|
| 9 |
+
║ MRZ parse+verify | Numeral convert | Calendar convert ║
|
| 10 |
+
║ English label extract | Script separate | Cross verify ║
|
| 11 |
+
╚══════════════════════════════════════════════════════════════════╝
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
import os
|
|
|
|
| 15 |
import uuid
|
|
|
|
| 16 |
import time
|
| 17 |
import re
|
| 18 |
+
import datetime
|
| 19 |
from threading import Thread
|
| 20 |
+
from typing import Iterable, Dict, Any
|
| 21 |
|
| 22 |
import gradio as gr
|
| 23 |
import spaces
|
| 24 |
import torch
|
|
|
|
| 25 |
from PIL import Image
|
|
|
|
| 26 |
|
| 27 |
os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
|
| 28 |
os.environ["HF_HOME"] = "/tmp/hf_home"
|
|
|
|
| 32 |
Qwen2VLForConditionalGeneration,
|
| 33 |
AutoProcessor,
|
| 34 |
TextIteratorStreamer,
|
| 35 |
+
BitsAndBytesConfig,
|
| 36 |
)
|
| 37 |
|
| 38 |
try:
|
|
|
|
| 40 |
PEFT_AVAILABLE = True
|
| 41 |
except:
|
| 42 |
PEFT_AVAILABLE = False
|
| 43 |
+
print("⚠️ PEFT not available")
|
| 44 |
|
| 45 |
try:
|
| 46 |
from transformers import Qwen3VLForConditionalGeneration
|
|
|
|
| 49 |
QWEN3_AVAILABLE = False
|
| 50 |
print("⚠️ Qwen3VL not available in current transformers version")
|
| 51 |
|
|
|
|
| 52 |
from gradio.themes import Soft
|
| 53 |
from gradio.themes.utils import colors, fonts, sizes
|
| 54 |
|
| 55 |
+
# ===== THEME =====
|
| 56 |
colors.steel_blue = colors.Color(
|
| 57 |
name="steel_blue",
|
| 58 |
+
c50="#EBF3F8", c100="#D3E5F0", c200="#A8CCE1", c300="#7DB3D2",
|
| 59 |
+
c400="#529AC3", c500="#4682B4", c600="#3E72A0", c700="#36638C",
|
| 60 |
+
c800="#2E5378", c900="#264364", c950="#1E3450",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
)
|
| 62 |
|
| 63 |
class SteelBlueTheme(Soft):
|
| 64 |
+
def __init__(self, *, primary_hue=colors.gray, secondary_hue=colors.steel_blue,
|
| 65 |
+
neutral_hue=colors.slate, text_size=sizes.text_lg,
|
| 66 |
+
font=(fonts.GoogleFont("Outfit"), "Arial", "sans-serif"),
|
| 67 |
+
font_mono=(fonts.GoogleFont("IBM Plex Mono"), "ui-monospace", "monospace")):
|
| 68 |
+
super().__init__(primary_hue=primary_hue, secondary_hue=secondary_hue,
|
| 69 |
+
neutral_hue=neutral_hue, text_size=text_size, font=font, font_mono=font_mono)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
super().set(
|
| 71 |
background_fill_primary="*primary_50",
|
| 72 |
background_fill_primary_dark="*primary_900",
|
| 73 |
body_background_fill="linear-gradient(135deg, *primary_200, *primary_100)",
|
| 74 |
body_background_fill_dark="linear-gradient(135deg, *primary_900, *primary_800)",
|
| 75 |
button_primary_text_color="white",
|
|
|
|
| 76 |
button_primary_background_fill="linear-gradient(90deg, *secondary_500, *secondary_600)",
|
| 77 |
button_primary_background_fill_hover="linear-gradient(90deg, *secondary_600, *secondary_700)",
|
|
|
|
|
|
|
| 78 |
button_secondary_text_color="black",
|
|
|
|
| 79 |
button_secondary_background_fill="linear-gradient(90deg, *primary_300, *primary_300)",
|
| 80 |
button_secondary_background_fill_hover="linear-gradient(90deg, *primary_400, *primary_400)",
|
|
|
|
|
|
|
| 81 |
slider_color="*secondary_500",
|
|
|
|
| 82 |
block_title_text_weight="600",
|
| 83 |
block_border_width="3px",
|
| 84 |
block_shadow="*shadow_drop_lg",
|
|
|
|
| 94 |
#main-title h1 { font-size: 2.3em !important; }
|
| 95 |
#output-title h2 { font-size: 2.2em !important; }
|
| 96 |
.ra-wrap{ width: fit-content; }
|
| 97 |
+
.ra-inner{ position: relative; display: inline-flex; align-items: center; gap: 0; padding: 6px;
|
| 98 |
+
background: var(--neutral-200); border-radius: 9999px; overflow: hidden; }
|
|
|
|
|
|
|
| 99 |
.ra-input{ display: none; }
|
| 100 |
+
.ra-label{ position: relative; z-index: 2; padding: 8px 16px; font-family: inherit; font-size: 14px;
|
| 101 |
+
font-weight: 600; color: var(--neutral-500); cursor: pointer; transition: color 0.2s; white-space: nowrap; }
|
| 102 |
+
.ra-highlight{ position: absolute; z-index: 1; top: 6px; left: 6px; height: calc(100% - 12px);
|
| 103 |
+
border-radius: 9999px; background: white; box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
| 104 |
+
transition: transform 0.2s, width 0.2s; }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
.ra-input:checked + .ra-label{ color: black; }
|
| 106 |
.dark .ra-inner { background: var(--neutral-800); }
|
| 107 |
.dark .ra-label { color: var(--neutral-400); }
|
| 108 |
.dark .ra-highlight { background: var(--neutral-600); }
|
| 109 |
.dark .ra-input:checked + .ra-label { color: white; }
|
| 110 |
+
#gpu-duration-container { padding: 10px; border-radius: 8px;
|
| 111 |
+
background: var(--background-fill-secondary); border: 1px solid var(--border-color-primary); margin-top: 10px; }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
"""
|
| 113 |
|
| 114 |
MAX_MAX_NEW_TOKENS = 4096
|
| 115 |
DEFAULT_MAX_NEW_TOKENS = 1024
|
| 116 |
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
|
|
|
| 117 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
| 118 |
|
| 119 |
+
print("CUDA available:", torch.cuda.is_available())
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
if torch.cuda.is_available():
|
| 121 |
+
print("Device:", torch.cuda.get_device_name(0))
|
| 122 |
+
print("Using:", device)
|
|
|
|
|
|
|
| 123 |
|
|
|
|
| 124 |
|
| 125 |
+
# ╔══════════════════════════════════════════╗
|
| 126 |
+
# ║ UNIVERSAL PROMPTS ║
|
| 127 |
+
# ╚══════════════════════════════════════════╝
|
| 128 |
|
| 129 |
+
STEP1_EXTRACT_PROMPT = """You are a universal OCR engine. Transcribe ALL visible text from this document image.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
|
| 131 |
+
OUTPUT FORMAT — fill exactly as shown:
|
| 132 |
PHOTO_PRESENT: yes/no
|
| 133 |
+
PHOTO_LOCATION: [describe position: top-left / top-right / center-left / not found]
|
| 134 |
SIGNATURE_PRESENT: yes/no
|
| 135 |
+
SIGNATURE_LOCATION: [describe position: bottom-left / bottom-right / not found]
|
| 136 |
MRZ_PRESENT: yes/no
|
| 137 |
+
DETECTED_LANGUAGE: [list all languages visible e.g. Arabic+English, Farsi+English, Hindi+English, Chinese, English]
|
| 138 |
---TEXT_START---
|
| 139 |
+
[Every word, number, symbol, label and value visible — line by line]
|
| 140 |
+
[Original script preserved: Arabic, Farsi, Hindi, Chinese, Cyrillic etc. — DO NOT translate here]
|
| 141 |
+
[Copy label AND its value together: e.g. "DATE OF BIRTH 12/05/2003"]
|
| 142 |
+
[MRZ lines: copy character-perfect including ALL < symbols]
|
| 143 |
+
[Include corner text, watermarks, small print]
|
| 144 |
+
---TEXT_END---
|
| 145 |
|
| 146 |
+
ABSOLUTE RULES:
|
| 147 |
+
- NEVER output pixel coordinates like (50,68) or bounding boxes — plain text ONLY
|
| 148 |
+
- DO NOT translate in this step — original script as-is
|
| 149 |
+
- DO NOT skip or summarize any field
|
| 150 |
+
- Copy every character exactly including < symbols in MRZ"""
|
| 151 |
|
|
|
|
| 152 |
|
| 153 |
+
STEP2_TEMPLATE = """You are a universal KYC document analyst.
|
| 154 |
+
The Python pipeline has already extracted English fields and parsed MRZ.
|
| 155 |
+
Your job is ONLY: classify document + fill gaps from non-English text.
|
|
|
|
|
|
|
| 156 |
|
| 157 |
+
━━━ ALREADY EXTRACTED BY PYTHON (DO NOT RE-EXTRACT) ━━━
|
|
|
|
| 158 |
|
| 159 |
+
English Fields Found Directly on Card:
|
| 160 |
+
{python_fields_table}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
|
| 162 |
+
MRZ Python Parse Result:
|
| 163 |
+
{mrz_summary}
|
| 164 |
|
| 165 |
+
━━━ YOUR INPUT DATA ━━━
|
| 166 |
|
| 167 |
+
English text block from card:
|
| 168 |
+
{english_block}
|
|
|
|
|
|
|
|
|
|
| 169 |
|
| 170 |
+
Non-English original script block:
|
| 171 |
+
{original_block}
|
| 172 |
|
| 173 |
+
━━━ YOUR TASKS — ONLY THESE 3 ━━━
|
| 174 |
|
| 175 |
+
TASK 1: Identify document type and issuing info
|
| 176 |
+
- Read English block and original block
|
| 177 |
+
- Keywords: PASSPORT/RESIDENT CARD/NATIONAL ID/DRIVING LICENCE/بطاقة/جواز/رخصة/आधार/PAN
|
| 178 |
+
- Top of card = issuing country/institution (NOT person name)
|
| 179 |
|
| 180 |
+
TASK 2: Classify non-English labels → check if already in English fields above
|
| 181 |
+
- If نام (Farsi: Name) value already in Python English fields → SKIP
|
| 182 |
+
- If شماره ملی (National Number) already in Python fields → SKIP
|
| 183 |
+
- Only add fields GENUINELY missing from Python extraction
|
| 184 |
|
| 185 |
+
TASK 3: Transliterate non-English values NOT found in English block
|
| 186 |
+
- Example: محمد → Mohammad | چراغی → Cheraghi
|
| 187 |
+
- Dates in Shamsi/Hijri: write BOTH original AND note calendar type
|
| 188 |
+
(DO NOT convert — Python handles conversion)
|
| 189 |
|
| 190 |
+
RULES:
|
| 191 |
+
- NEVER copy template placeholders like [fill here] or [value]
|
| 192 |
+
- NEVER re-state what Python already found
|
| 193 |
+
- NEVER guess values not visible in card
|
| 194 |
+
- If all fields already covered → write "✅ All fields covered by Python extraction"
|
| 195 |
|
| 196 |
+
━━━ OUTPUT FORMAT ━━━
|
| 197 |
|
| 198 |
---
|
| 199 |
|
| 200 |
+
## 📋 Document Classification
|
| 201 |
+
|
| 202 |
+
| | |
|
| 203 |
+
|---|---|
|
| 204 |
+
| **Document Type** | |
|
| 205 |
+
| **Issuing Country** | |
|
| 206 |
+
| **Issuing Authority** | |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
|
| 208 |
---
|
| 209 |
|
| 210 |
+
## ➕ Additional Fields (non-English only — genuinely new)
|
| 211 |
+
|
| 212 |
+
| Label (Original) | Label (English) | Value (Original) | Value (Transliterated) |
|
| 213 |
+
|---|---|---|---|
|
| 214 |
+
| [only if not in Python fields above] | | | |
|
| 215 |
|
| 216 |
+
---
|
| 217 |
|
| 218 |
+
## 🗓️ Calendar Note (if non-Gregorian dates found)
|
| 219 |
|
| 220 |
+
| Original Date | Calendar System | Note |
|
| 221 |
+
|---|---|---|
|
| 222 |
+
| [date as on card] | [Solar Hijri / Lunar Hijri / Buddhist] | Python will convert |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
|
| 224 |
---"""
|
| 225 |
|
| 226 |
|
| 227 |
+
# ╔══════════════════════════════════════════╗
|
| 228 |
+
# ║ MODEL LOADING ║
|
| 229 |
+
# ╚══════════════════════════════════════════╝
|
| 230 |
|
| 231 |
print("\n" + "="*70)
|
| 232 |
+
print("🚀 LOADING 4 MODELS")
|
| 233 |
+
print("="*70)
|
| 234 |
+
|
| 235 |
+
# 4-bit BitsAndBytes config (shared for quantized models)
|
| 236 |
+
bnb_4bit_config = BitsAndBytesConfig(
|
| 237 |
+
load_in_4bit=True,
|
| 238 |
+
bnb_4bit_quant_type="nf4",
|
| 239 |
+
bnb_4bit_compute_dtype=torch.float16,
|
| 240 |
+
bnb_4bit_use_double_quant=True,
|
| 241 |
+
)
|
| 242 |
|
| 243 |
+
# ── Model 1: Chhagan_ML-VL-OCR-v1 (LoRA, keep) ──
|
| 244 |
+
print("\n1️⃣ Chhagan_ML-VL-OCR-v1 (LoRA Refined)...")
|
| 245 |
MODEL_ID_C1 = "Chhagan005/Chhagan_ML-VL-OCR-v1"
|
| 246 |
CHHAGAN_V1_AVAILABLE = False
|
| 247 |
+
processor_c1 = model_c1 = None
|
|
|
|
| 248 |
|
| 249 |
if PEFT_AVAILABLE:
|
| 250 |
try:
|
| 251 |
try:
|
| 252 |
config = PeftConfig.from_pretrained(MODEL_ID_C1)
|
| 253 |
+
base_id = config.base_model_name_or_path
|
|
|
|
| 254 |
except:
|
| 255 |
+
base_id = "Qwen/Qwen2.5-VL-2B-Instruct"
|
| 256 |
+
processor_c1 = AutoProcessor.from_pretrained(base_id, trust_remote_code=True)
|
| 257 |
+
base_c1 = Qwen2VLForConditionalGeneration.from_pretrained(
|
| 258 |
+
base_id, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True)
|
| 259 |
+
model_c1 = PeftModel.from_pretrained(base_c1, MODEL_ID_C1).to(device).eval()
|
| 260 |
+
print(" ✅ Loaded!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 261 |
CHHAGAN_V1_AVAILABLE = True
|
| 262 |
except Exception as e:
|
| 263 |
+
print(f" ❌ Failed: {e}")
|
| 264 |
else:
|
| 265 |
+
print(" ⚠️ PEFT not available")
|
| 266 |
|
| 267 |
+
# ── Model 2: Chhagan-DocVL-Qwen3 (Qwen3 fine-tuned, keep) ──
|
| 268 |
+
print("\n2️⃣ Chhagan-DocVL-Qwen3 (Qwen3-VL Refined)...")
|
| 269 |
MODEL_ID_C2 = "Chhagan005/Chhagan-DocVL-Qwen3"
|
| 270 |
CHHAGAN_QWEN3_AVAILABLE = False
|
| 271 |
+
processor_c2 = model_c2 = None
|
|
|
|
| 272 |
|
| 273 |
if QWEN3_AVAILABLE:
|
| 274 |
try:
|
| 275 |
try:
|
| 276 |
if PEFT_AVAILABLE:
|
| 277 |
config = PeftConfig.from_pretrained(MODEL_ID_C2)
|
| 278 |
+
base_id = config.base_model_name_or_path
|
| 279 |
+
processor_c2 = AutoProcessor.from_pretrained(base_id, trust_remote_code=True)
|
| 280 |
+
base_c2 = Qwen3VLForConditionalGeneration.from_pretrained(
|
| 281 |
+
base_id, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True)
|
| 282 |
+
model_c2 = PeftModel.from_pretrained(base_c2, MODEL_ID_C2).to(device).eval()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
else:
|
| 284 |
+
raise Exception("No PEFT")
|
| 285 |
except:
|
| 286 |
+
print(" Loading as full fine-tuned...")
|
| 287 |
processor_c2 = AutoProcessor.from_pretrained(MODEL_ID_C2, trust_remote_code=True)
|
| 288 |
model_c2 = Qwen3VLForConditionalGeneration.from_pretrained(
|
| 289 |
+
MODEL_ID_C2, attn_implementation="flash_attention_2",
|
| 290 |
+
torch_dtype=torch.float16, device_map="auto", trust_remote_code=True
|
|
|
|
|
|
|
|
|
|
| 291 |
).to(device).eval()
|
| 292 |
+
print(" ✅ Loaded!")
|
| 293 |
CHHAGAN_QWEN3_AVAILABLE = True
|
| 294 |
except Exception as e:
|
| 295 |
+
print(f" ❌ Failed: {e}")
|
| 296 |
else:
|
| 297 |
+
print(" ⚠️ Qwen3VL not in transformers version")
|
| 298 |
|
| 299 |
+
# ── Model 3: CSM-DocExtract-VL-Q4KM (NEW, replaces Qwen3-2B) ──
|
| 300 |
+
print("\n3️⃣ CSM-DocExtract-VL-Q4KM (8B Q4KM Quantized)...")
|
| 301 |
+
MODEL_ID_Q4KM = "Chhagan005/CSM-DocExtract-VL-Q4KM"
|
| 302 |
+
CSM_Q4KM_AVAILABLE = False
|
| 303 |
+
processor_q4km = model_q4km = None
|
|
|
|
| 304 |
|
| 305 |
+
try:
|
| 306 |
+
processor_q4km = AutoProcessor.from_pretrained(MODEL_ID_Q4KM, trust_remote_code=True)
|
| 307 |
+
# Try loading as full quantized model first
|
| 308 |
try:
|
| 309 |
+
model_q4km = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 310 |
+
MODEL_ID_Q4KM,
|
| 311 |
+
quantization_config=bnb_4bit_config,
|
|
|
|
| 312 |
torch_dtype=torch.float16,
|
| 313 |
device_map="auto",
|
| 314 |
+
trust_remote_code=True,
|
| 315 |
+
).eval()
|
| 316 |
+
except:
|
| 317 |
+
# Fallback: try Qwen3VL architecture
|
| 318 |
+
if QWEN3_AVAILABLE:
|
| 319 |
+
model_q4km = Qwen3VLForConditionalGeneration.from_pretrained(
|
| 320 |
+
MODEL_ID_Q4KM,
|
| 321 |
+
quantization_config=bnb_4bit_config,
|
| 322 |
+
torch_dtype=torch.float16,
|
| 323 |
+
device_map="auto",
|
| 324 |
+
trust_remote_code=True,
|
| 325 |
+
).eval()
|
| 326 |
+
else:
|
| 327 |
+
raise Exception("Neither Qwen2.5VL nor Qwen3VL architecture worked")
|
| 328 |
+
print(" ✅ Loaded! (~6-7GB VRAM)")
|
| 329 |
+
CSM_Q4KM_AVAILABLE = True
|
| 330 |
+
except Exception as e:
|
| 331 |
+
print(f" ❌ Failed: {e}")
|
| 332 |
|
| 333 |
+
# ── Model 4: CSM-DocExtract-VL 4BNB (NEW, replaces Nanonets) ──
|
| 334 |
+
print("\n4️⃣ CSM-DocExtract-VL 4BNB (BitsAndBytes 4-bit)...")
|
| 335 |
+
MODEL_ID_4BNB = "Chhagan005/CSM-DocExtract-VL"
|
| 336 |
+
CSM_4BNB_AVAILABLE = False
|
| 337 |
+
processor_4bnb = model_4bnb = None
|
|
|
|
| 338 |
|
| 339 |
try:
|
| 340 |
+
processor_4bnb = AutoProcessor.from_pretrained(MODEL_ID_4BNB, trust_remote_code=True)
|
| 341 |
+
try:
|
| 342 |
+
model_4bnb = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 343 |
+
MODEL_ID_4BNB,
|
| 344 |
+
quantization_config=bnb_4bit_config,
|
| 345 |
+
torch_dtype=torch.float16,
|
| 346 |
+
device_map="auto",
|
| 347 |
+
trust_remote_code=True,
|
| 348 |
+
).eval()
|
| 349 |
+
except:
|
| 350 |
+
if QWEN3_AVAILABLE:
|
| 351 |
+
model_4bnb = Qwen3VLForConditionalGeneration.from_pretrained(
|
| 352 |
+
MODEL_ID_4BNB,
|
| 353 |
+
quantization_config=bnb_4bit_config,
|
| 354 |
+
torch_dtype=torch.float16,
|
| 355 |
+
device_map="auto",
|
| 356 |
+
trust_remote_code=True,
|
| 357 |
+
).eval()
|
| 358 |
+
else:
|
| 359 |
+
raise Exception("Architecture detection failed")
|
| 360 |
+
print(" ✅ Loaded! (~6-7GB VRAM)")
|
| 361 |
+
CSM_4BNB_AVAILABLE = True
|
| 362 |
except Exception as e:
|
| 363 |
+
print(f" ❌ Failed: {e}")
|
| 364 |
|
| 365 |
print("\n" + "="*70)
|
| 366 |
+
print("📊 MODEL STATUS")
|
| 367 |
print("="*70)
|
| 368 |
+
status = [
|
| 369 |
+
("Chhagan_ML-VL-OCR-v1", CHHAGAN_V1_AVAILABLE, "LoRA Fine-tuned"),
|
| 370 |
+
("Chhagan-DocVL-Qwen3", CHHAGAN_QWEN3_AVAILABLE, "Qwen3-VL Fine-tuned"),
|
| 371 |
+
("CSM-DocExtract-VL-Q4KM", CSM_Q4KM_AVAILABLE, "8B Q4KM ~6-7GB"),
|
| 372 |
+
("CSM-DocExtract-VL 4BNB", CSM_4BNB_AVAILABLE, "BitsAndBytes 4-bit ~6-7GB"),
|
| 373 |
+
]
|
| 374 |
+
for name, ok, note in status:
|
| 375 |
+
print(f" {'✅' if ok else '❌'} {name:<35} {note}")
|
| 376 |
print("="*70)
|
| 377 |
+
loaded = sum(x[1] for x in status)
|
| 378 |
+
print(f" Total loaded: {loaded}/4\n")
|
| 379 |
+
|
| 380 |
+
|
| 381 |
+
# ╔══════════════════════════════════════════╗
|
| 382 |
+
# ║ PYTHON PIPELINE FUNCTIONS ║
|
| 383 |
+
# ╚══════════════════════════════════════════╝
|
| 384 |
+
|
| 385 |
+
def convert_eastern_numerals(text: str) -> str:
|
| 386 |
+
"""P2: Convert Persian/Arabic/Devanagari numerals to Western 0-9"""
|
| 387 |
+
tables = [
|
| 388 |
+
str.maketrans('۰۱۲۳۴۵۶۷۸۹', '0123456789'), # Persian
|
| 389 |
+
str.maketrans('٠١٢٣٤٥٦٧٨٩', '0123456789'), # Arabic
|
| 390 |
+
str.maketrans('०१२३४५६७८९', '0123456789'), # Devanagari
|
| 391 |
+
str.maketrans('০১২৩৪৫৬৭৮৯', '0123456789'), # Bengali
|
| 392 |
+
str.maketrans('੦੧੨੩੪੫੬੭੮੯', '0123456789'), # Gurmukhi
|
| 393 |
+
]
|
| 394 |
+
for table in tables:
|
| 395 |
+
text = text.translate(table)
|
| 396 |
+
return text
|
| 397 |
+
|
| 398 |
+
|
| 399 |
+
def detect_calendar_system(raw_text: str) -> str:
|
| 400 |
+
"""Detect calendar system from country/language context"""
|
| 401 |
+
text_upper = raw_text.upper()
|
| 402 |
+
if any(kw in raw_text for kw in ['جمهوری اسلامی ایران', 'IRAN', 'AFGHANISTAN', 'افغانستان']):
|
| 403 |
+
return 'solar_hijri'
|
| 404 |
+
if any(kw in text_upper for kw in ['SAUDI', 'ARABIA', 'السعودية', 'KUWAIT', 'QATAR', 'BAHRAIN', 'JORDAN']):
|
| 405 |
+
return 'lunar_hijri'
|
| 406 |
+
return 'gregorian'
|
| 407 |
+
|
| 408 |
+
|
| 409 |
+
def convert_shamsi_to_gregorian(shamsi_date: str) -> str:
|
| 410 |
+
"""P3: Solar Hijri (Shamsi) → Gregorian using khayyam library"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 411 |
try:
|
| 412 |
+
import khayyam
|
| 413 |
+
parts = re.split(r'[/\-\.]', shamsi_date.strip())
|
| 414 |
+
if len(parts) == 3:
|
| 415 |
+
y, m, d = int(parts[0]), int(parts[1]), int(parts[2])
|
| 416 |
+
jd = khayyam.JalaliDate(y, m, d)
|
| 417 |
+
greg = jd.todate()
|
| 418 |
+
return f"{greg.day:02d}/{greg.month:02d}/{greg.year}"
|
| 419 |
+
except ImportError:
|
| 420 |
+
# Approximate manual conversion if khayyam not installed
|
| 421 |
+
try:
|
| 422 |
+
parts = re.split(r'[/\-\.]', shamsi_date.strip())
|
| 423 |
+
y, m, d = int(parts[0]), int(parts[1]), int(parts[2])
|
| 424 |
+
greg_year = y + 621
|
| 425 |
+
return f"{d:02d}/{m:02d}/{greg_year} (approx)"
|
| 426 |
+
except:
|
| 427 |
+
pass
|
| 428 |
+
except Exception:
|
| 429 |
+
pass
|
| 430 |
+
return f"{shamsi_date} (Shamsi)"
|
| 431 |
|
|
|
|
| 432 |
|
| 433 |
+
def convert_hijri_to_gregorian(hijri_date: str) -> str:
|
| 434 |
+
"""P3: Lunar Hijri → Gregorian using hijri library"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 435 |
try:
|
| 436 |
+
from hijri_converter import convert
|
| 437 |
+
parts = re.split(r'[/\-\.]', hijri_date.strip())
|
| 438 |
+
if len(parts) == 3:
|
| 439 |
+
y, m, d = int(parts[0]), int(parts[1]), int(parts[2])
|
| 440 |
+
greg = convert.Hijri(y, m, d).to_gregorian()
|
| 441 |
+
return f"{greg.day:02d}/{greg.month:02d}/{greg.year}"
|
| 442 |
+
except ImportError:
|
| 443 |
+
try:
|
| 444 |
+
parts = re.split(r'[/\-\.]', hijri_date.strip())
|
| 445 |
+
y, m, d = int(parts[0]), int(parts[1]), int(parts[2])
|
| 446 |
+
greg_year = y - 43 + 622
|
| 447 |
+
return f"{d:02d}/{m:02d}/{greg_year} (approx)"
|
| 448 |
+
except:
|
| 449 |
+
pass
|
| 450 |
except:
|
| 451 |
+
pass
|
| 452 |
+
return f"{hijri_date} (Hijri)"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 453 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 454 |
|
| 455 |
+
def separate_scripts(raw_text: str) -> tuple:
|
| 456 |
+
"""P5: Separate English/Latin lines from non-Latin script lines"""
|
| 457 |
+
english_lines = []
|
| 458 |
+
original_lines = []
|
| 459 |
+
for line in raw_text.split('\n'):
|
| 460 |
+
line = line.strip()
|
| 461 |
+
if not line:
|
| 462 |
+
continue
|
| 463 |
+
non_latin = sum(1 for c in line if ord(c) > 591)
|
| 464 |
+
total_alpha = sum(1 for c in line if c.isalpha())
|
| 465 |
+
if total_alpha == 0:
|
| 466 |
+
english_lines.append(line)
|
| 467 |
+
elif non_latin / max(total_alpha, 1) > 0.4:
|
| 468 |
+
original_lines.append(line)
|
| 469 |
+
else:
|
| 470 |
+
english_lines.append(line)
|
| 471 |
+
return '\n'.join(english_lines), '\n'.join(original_lines)
|
| 472 |
+
|
| 473 |
+
|
| 474 |
+
def extract_english_fields(raw_text: str) -> list:
|
| 475 |
+
"""P4: Extract English label:value pairs directly from card text — no AI"""
|
| 476 |
+
results = []
|
| 477 |
+
patterns = [
|
| 478 |
+
(r'(?:FULL\s+)?NAME\s*[:\-.]?\s*([A-Za-z][A-Za-z\s\-\.\']{1,60})', 'NAME'),
|
| 479 |
+
(r'DATE\s+OF\s+BIRTH\s*[:\-.]?\s*(\d{1,2}[\s/\-\.]\d{1,2}[\s/\-\.]\d{2,4})', 'DATE OF BIRTH'),
|
| 480 |
+
(r'\bDOB\s*[:\-.]?\s*(\d{1,2}[\s/\-\.]\d{1,2}[\s/\-\.]\d{2,4})', 'DATE OF BIRTH'),
|
| 481 |
+
(r'BIRTH\s+DATE\s*[:\-.]?\s*(\d{1,2}[\s/\-\.]\d{1,2}[\s/\-\.]\d{2,4})', 'DATE OF BIRTH'),
|
| 482 |
+
(r'EXPIRY\s+DATE\s*[:\-.]?\s*(\d{1,2}[\s/\-\.]\d{1,2}[\s/\-\.]\d{2,4})', 'EXPIRY DATE'),
|
| 483 |
+
(r'DATE\s+OF\s+EXPIRY\s*[:\-.]?\s*(\d{1,2}[\s/\-\.]\d{1,2}[\s/\-\.]\d{2,4})', 'EXPIRY DATE'),
|
| 484 |
+
(r'VALID(?:\s+THRU|\s+UNTIL|ITY)?\s*[:\-.]?\s*(\d{1,2}[\s/\-\.]\d{1,2}[\s/\-\.]\d{2,4})', 'EXPIRY DATE'),
|
| 485 |
+
(r'EXPIRATION\s+DATE\s*[:\-.]?\s*(\d{1,2}[\s/\-\.]\d{1,2}[\s/\-\.]\d{2,4})', 'EXPIRY DATE'),
|
| 486 |
+
(r'(?:DATE\s+OF\s+)?ISSUE\s+DATE\s*[:\-.]?\s*(\d{1,2}[\s/\-\.]\d{1,2}[\s/\-\.]\d{2,4})', 'ISSUE DATE'),
|
| 487 |
+
(r'DATE\s+OF\s+ISSUE\s*[:\-.]?\s*(\d{1,2}[\s/\-\.]\d{1,2}[\s/\-\.]\d{2,4})', 'ISSUE DATE'),
|
| 488 |
+
(r'CIVIL\s+(?:NO\.?|NUMBER)\s*[:\-.]?\s*([A-Z0-9\-]{4,20})', 'CIVIL NUMBER'),
|
| 489 |
+
(r'PASSPORT\s+(?:NO\.?|NUMBER)\s*[:\-.]?\s*([A-Z0-9\-]{6,12})', 'PASSPORT NUMBER'),
|
| 490 |
+
(r'LICENCE\s+(?:NO\.?|NUMBER)\s*[:\-.]?\s*([A-Z0-9\-]{4,20})', 'LICENCE NUMBER'),
|
| 491 |
+
(r'LICENSE\s+(?:NO\.?|NUMBER)\s*[:\-.]?\s*([A-Z0-9\-]{4,20})', 'LICENCE NUMBER'),
|
| 492 |
+
(r'AADHAAR\s*(?:NO\.?|NUMBER)?\s*[:\-.]?\s*(\d{4}\s?\d{4}\s?\d{4})', 'AADHAAR NUMBER'),
|
| 493 |
+
(r'\bPAN\s*[:\-.]?\s*([A-Z]{5}\d{4}[A-Z])', 'PAN NUMBER'),
|
| 494 |
+
(r'EMIRATES\s+ID\s*[:\-.]?\s*(\d{3}-\d{4}-\d{7}-\d)', 'EMIRATES ID'),
|
| 495 |
+
(r'(?:NATIONAL\s+)?ID\s+(?:NO\.?|NUMBER)\s*[:\-.]?\s*([A-Z0-9\-]{4,20})', 'ID NUMBER'),
|
| 496 |
+
(r'DOCUMENT\s+(?:NO\.?|NUMBER)\s*[:\-.]?\s*([A-Z0-9\-]{4,20})', 'DOCUMENT NUMBER'),
|
| 497 |
+
(r'NATIONALITY\s*[:\-.]?\s*([A-Za-z]{3,30})', 'NATIONALITY'),
|
| 498 |
+
(r'(?:GENDER|SEX)\s*[:\-.]?\s*(MALE|FEMALE)', 'GENDER'),
|
| 499 |
+
(r'PLACE\s+OF\s+BIRTH\s*[:\-.]?\s*([A-Za-z\s,]{2,40})', 'PLACE OF BIRTH'),
|
| 500 |
+
(r'(?:PERMANENT\s+)?ADDRESS\s*[:\-.]?\s*(.{5,80})', 'ADDRESS'),
|
| 501 |
+
(r'BLOOD\s+(?:GROUP|TYPE)\s*[:\-.]?\s*([ABO]{1,2}[+-]?)', 'BLOOD GROUP'),
|
| 502 |
+
(r'(?:PROFESSION|OCCUPATION|JOB\s+TITLE)\s*[:\-.]?\s*(.{3,50})', 'PROFESSION'),
|
| 503 |
+
(r'FATHER(?:\'?S)?\s+NAME\s*[:\-.]?\s*([A-Za-z\s]{3,50})', "FATHER'S NAME"),
|
| 504 |
+
(r'MOTHER(?:\'?S)?\s+NAME\s*[:\-.]?\s*([A-Za-z\s]{3,50})', "MOTHER'S NAME"),
|
| 505 |
+
(r'EMPLOYER\s*[:\-.]?\s*(.{3,60})', 'EMPLOYER'),
|
| 506 |
+
]
|
| 507 |
+
seen = set()
|
| 508 |
+
for pattern, label in patterns:
|
| 509 |
+
m = re.search(pattern, raw_text, re.IGNORECASE)
|
| 510 |
+
if m and label not in seen:
|
| 511 |
+
val = m.group(1).strip()
|
| 512 |
+
if val and len(val) > 1 and '[' not in val:
|
| 513 |
+
results.append((label, val))
|
| 514 |
+
seen.add(label)
|
| 515 |
+
return results
|
| 516 |
|
| 517 |
|
| 518 |
def parse_mrz_lines(raw_text: str) -> dict:
|
| 519 |
+
"""P1: Authoritative Python MRZ parser — TD1, TD3, MRVA, MRVB"""
|
| 520 |
+
# Normalize: western numerals only
|
| 521 |
+
raw_text = convert_eastern_numerals(raw_text)
|
|
|
|
|
|
|
|
|
|
| 522 |
|
| 523 |
lines = []
|
| 524 |
for line in raw_text.split('\n'):
|
| 525 |
clean = re.sub(r'\s+', '', line.strip())
|
| 526 |
+
if re.match(r'^[A-Z0-9<]{25,50}$', clean):
|
| 527 |
lines.append(clean)
|
| 528 |
|
| 529 |
if not lines:
|
|
|
|
| 531 |
|
| 532 |
def decode_date(yymmdd: str, is_dob: bool = False) -> str:
|
| 533 |
try:
|
| 534 |
+
yy, mm, dd = int(yymmdd[0:2]), int(yymmdd[2:4]), int(yymmdd[4:6])
|
|
|
|
|
|
|
| 535 |
if not (1 <= mm <= 12 and 1 <= dd <= 31):
|
| 536 |
return f"Invalid ({yymmdd})"
|
| 537 |
+
cur_yy = datetime.datetime.now().year % 100
|
| 538 |
+
year = (1900 + yy) if (is_dob and yy > cur_yy) else (2000 + yy)
|
| 539 |
return f"{dd:02d}/{mm:02d}/{year}"
|
| 540 |
except:
|
| 541 |
return yymmdd
|
| 542 |
|
| 543 |
+
def clean_fill(s: str) -> str:
|
| 544 |
return re.sub(r'<+$', '', s).replace('<', ' ').strip()
|
| 545 |
|
| 546 |
+
def parse_name(line3: str) -> str:
|
| 547 |
+
name_clean = re.sub(r'<+$', '', line3)
|
| 548 |
+
if '<<' in name_clean:
|
| 549 |
+
parts = name_clean.split('<<')
|
| 550 |
+
surname = parts[0].replace('<', ' ').strip().title()
|
| 551 |
+
given = parts[1].replace('<', ' ').strip().title() if len(parts) > 1 else ''
|
| 552 |
+
return f"{given} {surname}".strip() if given else surname
|
| 553 |
+
return name_clean.replace('<', ' ').strip().title()
|
| 554 |
+
|
| 555 |
result = {}
|
| 556 |
|
| 557 |
+
# TD1: 3 lines, 28-36 chars
|
| 558 |
td1 = [l for l in lines if 28 <= len(l) <= 36]
|
| 559 |
if len(td1) >= 2:
|
| 560 |
l1, l2 = td1[0], td1[1]
|
| 561 |
l3 = td1[2] if len(td1) > 2 else ""
|
| 562 |
+
result['doc_type'] = clean_fill(l1[0:2])
|
| 563 |
+
result['country_code'] = clean_fill(l1[2:5])
|
| 564 |
+
result['doc_number'] = clean_fill(l1[5:14])
|
| 565 |
+
if len(l2) >= 19:
|
| 566 |
+
result['dob'] = decode_date(l2[0:6], is_dob=True)
|
| 567 |
+
sex = l2[7] if len(l2) > 7 else ''
|
| 568 |
+
result['sex'] = 'Male' if sex == 'M' else ('Female' if sex == 'F' else 'Unknown')
|
| 569 |
+
result['expiry'] = decode_date(l2[8:14], is_dob=False)
|
| 570 |
+
result['nationality'] = clean_fill(l2[15:18])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 571 |
if l3:
|
| 572 |
+
result['name'] = parse_name(l3)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 573 |
result['mrz_format'] = 'TD1'
|
| 574 |
return result
|
| 575 |
|
| 576 |
+
# TD3: 2 lines, 40-48 chars (Passports)
|
| 577 |
td3 = [l for l in lines if 40 <= len(l) <= 48]
|
| 578 |
if len(td3) >= 2:
|
| 579 |
l1, l2 = td3[0], td3[1]
|
| 580 |
+
result['doc_type'] = clean_fill(l1[0:2])
|
| 581 |
+
result['country_code'] = clean_fill(l1[2:5])
|
| 582 |
+
result['name'] = parse_name(l1[5:44])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 583 |
if len(l2) >= 27:
|
| 584 |
+
result['doc_number'] = clean_fill(l2[0:9])
|
| 585 |
+
result['nationality'] = clean_fill(l2[10:13])
|
| 586 |
+
result['dob'] = decode_date(l2[13:19], is_dob=True)
|
| 587 |
+
sex = l2[20] if len(l2) > 20 else ''
|
| 588 |
+
result['sex'] = 'Male' if sex == 'M' else ('Female' if sex == 'F' else 'Unknown')
|
| 589 |
+
result['expiry'] = decode_date(l2[21:27], is_dob=False)
|
|
|
|
| 590 |
result['mrz_format'] = 'TD3'
|
| 591 |
return result
|
| 592 |
|
| 593 |
+
# MRVA/MRVB: 2 lines, 36 chars (Visas)
|
| 594 |
+
mrv = [l for l in lines if 36 <= len(l) <= 38]
|
| 595 |
+
if len(mrv) >= 2:
|
| 596 |
+
l1, l2 = mrv[0], mrv[1]
|
| 597 |
+
result['doc_type'] = clean_fill(l1[0:2])
|
| 598 |
+
result['country_code'] = clean_fill(l1[2:5])
|
| 599 |
+
result['name'] = parse_name(l1[5:36])
|
| 600 |
+
if len(l2) >= 27:
|
| 601 |
+
result['doc_number'] = clean_fill(l2[0:9])
|
| 602 |
+
result['nationality'] = clean_fill(l2[10:13])
|
| 603 |
+
result['dob'] = decode_date(l2[13:19], is_dob=True)
|
| 604 |
+
sex = l2[20] if len(l2) > 20 else ''
|
| 605 |
+
result['sex'] = 'Male' if sex == 'M' else ('Female' if sex == 'F' else 'Unknown')
|
| 606 |
+
result['expiry'] = decode_date(l2[21:27], is_dob=False)
|
| 607 |
+
result['mrz_format'] = 'MRVA/MRVB'
|
| 608 |
+
return result
|
| 609 |
+
|
| 610 |
return {}
|
| 611 |
|
| 612 |
+
|
| 613 |
+
def build_mrz_table(mrz_data: dict) -> str:
|
| 614 |
+
if not mrz_data:
|
| 615 |
+
return "No MRZ detected."
|
| 616 |
+
table = f"**Python Parsed MRZ — Authoritative ({mrz_data.get('mrz_format','?')} format):**\n\n"
|
| 617 |
+
table += "| Field | Verified Value |\n|---|---|\n"
|
| 618 |
+
fields = [
|
| 619 |
+
('mrz_format', 'MRZ Format'),
|
| 620 |
+
('doc_type', 'Document Type'),
|
| 621 |
+
('country_code', 'Issuing Country Code'),
|
| 622 |
+
('doc_number', 'Document / Civil Number'),
|
| 623 |
+
('name', 'Full Name'),
|
| 624 |
+
('dob', 'Date of Birth'),
|
| 625 |
+
('expiry', 'Expiry Date'),
|
| 626 |
+
('nationality', 'User Nationality'),
|
| 627 |
+
('sex', 'Gender'),
|
| 628 |
+
]
|
| 629 |
+
for key, label in fields:
|
| 630 |
+
if key in mrz_data:
|
| 631 |
+
table += f"| {label} | **{mrz_data[key]}** ✅ |\n"
|
| 632 |
+
return table
|
| 633 |
+
|
| 634 |
+
|
| 635 |
+
def build_unified_summary(front_result: str, back_result: str, mrz_data: dict) -> str:
|
| 636 |
+
"""P6: Merge front+back fields, MRZ as ground truth override"""
|
| 637 |
+
summary = "## 🔄 Unified Deduplicated Record\n\n"
|
| 638 |
+
|
| 639 |
+
if mrz_data:
|
| 640 |
+
summary += f"> ✅ *MRZ Python-parsed ({mrz_data.get('mrz_format','?')}) — MRZ values are **ground truth**.*\n\n"
|
| 641 |
+
summary += "### 🔐 MRZ Ground Truth\n\n"
|
| 642 |
+
summary += build_mrz_table(mrz_data) + "\n\n---\n\n"
|
| 643 |
+
else:
|
| 644 |
+
summary += "> *No MRZ — fields merged from front+back. Conflicts flagged ⚠️.*\n\n"
|
| 645 |
+
|
| 646 |
+
def get_rows(text):
|
| 647 |
+
rows = {}
|
| 648 |
+
m = re.search(r"## (?:✅|🗂️)[^\n]*\n\|[^\n]*\n\|[-| ]+\n(.*?)(?=\n---|\Z)", text, re.DOTALL)
|
| 649 |
+
if m:
|
| 650 |
+
for line in m.group(1).strip().split('\n'):
|
| 651 |
+
parts = [p.strip() for p in line.split('|') if p.strip()]
|
| 652 |
+
if len(parts) >= 2:
|
| 653 |
+
field = re.sub(r'[^\w\s/\']', '', parts[0]).strip()
|
| 654 |
+
val = parts[1].strip()
|
| 655 |
+
if val and val.lower() not in ('—', 'not on card', 'n/a', ''):
|
| 656 |
+
rows[field] = val
|
| 657 |
+
return rows
|
| 658 |
+
|
| 659 |
+
front_f = get_rows(front_result)
|
| 660 |
+
back_f = get_rows(back_result)
|
| 661 |
+
all_f = list(dict.fromkeys(list(front_f.keys()) + list(back_f.keys())))
|
| 662 |
+
|
| 663 |
+
# MRZ lookup
|
| 664 |
+
mrz_map = {}
|
| 665 |
+
if mrz_data:
|
| 666 |
+
kw_map = {
|
| 667 |
+
'name': ['name'],
|
| 668 |
+
'doc_number': ['civil', 'document', 'id', 'passport', 'licence'],
|
| 669 |
+
'dob': ['birth', 'dob'],
|
| 670 |
+
'expiry': ['expiry', 'expiration'],
|
| 671 |
+
'sex': ['gender', 'sex'],
|
| 672 |
+
'nationality':['nationality'],
|
| 673 |
+
}
|
| 674 |
+
for mk, keywords in kw_map.items():
|
| 675 |
+
if mk in mrz_data:
|
| 676 |
+
for kw in keywords:
|
| 677 |
+
mrz_map[kw] = mrz_data[mk]
|
| 678 |
+
|
| 679 |
+
def get_mrz(field):
|
| 680 |
+
fl = field.lower()
|
| 681 |
+
for kw, v in mrz_map.items():
|
| 682 |
+
if kw in fl:
|
| 683 |
+
return v
|
| 684 |
+
return None
|
| 685 |
+
|
| 686 |
+
summary += "### 📋 Field Comparison\n\n| Field | Value | Source |\n|---|---|---|\n"
|
| 687 |
+
|
| 688 |
+
for field in all_f:
|
| 689 |
+
fv = front_f.get(field, '')
|
| 690 |
+
bv = back_f.get(field, '')
|
| 691 |
+
mv = get_mrz(field)
|
| 692 |
+
|
| 693 |
+
if fv and bv:
|
| 694 |
+
if fv.lower() == bv.lower():
|
| 695 |
+
note = f"✅ MRZ Confirmed" if mv and any(x in fv.lower() for x in mv.lower().split()) else ("⚠️ MRZ differs: **" + mv + "**" if mv else "")
|
| 696 |
+
summary += f"| {field} | {fv} | Front+Back ✅ {note} |\n"
|
| 697 |
+
else:
|
| 698 |
+
if mv:
|
| 699 |
+
summary += f"| {field} | ~~{fv}~~ / ~~{bv}~~ → **{mv}** | ✅ MRZ Override |\n"
|
| 700 |
+
else:
|
| 701 |
+
summary += f"| {field} | F: **{fv}** / B: **{bv}** | ⚠️ Mismatch |\n"
|
| 702 |
+
elif fv:
|
| 703 |
+
note = f"✅ MRZ Confirmed" if mv and any(x in fv.lower() for x in mv.lower().split()) else (f"⚠️ MRZ: **{mv}**" if mv else "")
|
| 704 |
+
summary += f"| {field} | {fv} | Front only {note} |\n"
|
| 705 |
+
elif bv:
|
| 706 |
+
note = f"✅ MRZ Confirmed" if mv and any(x in bv.lower() for x in mv.lower().split()) else (f"⚠️ MRZ: **{mv}**" if mv else "")
|
| 707 |
+
summary += f"| {field} | {bv} | Back only {note} |\n"
|
| 708 |
+
|
| 709 |
+
return summary + "\n"
|
| 710 |
+
|
| 711 |
+
|
| 712 |
+
# ╔══════════════════════════════════════════╗
|
| 713 |
+
# ║ STEP PIPELINE FUNCTIONS ║
|
| 714 |
+
# ╚══════════════════════════════════════���═══╝
|
| 715 |
+
|
| 716 |
+
def run_step1_extraction(model, processor, image, device, temperature, top_p, top_k, repetition_penalty):
|
| 717 |
+
"""Step 1: LLM → Raw OCR, original script, NO translation, NO coordinates"""
|
| 718 |
+
|
| 719 |
+
def _generate(prompt_text):
|
| 720 |
+
messages = [{"role": "user", "content": [
|
| 721 |
+
{"type": "image"},
|
| 722 |
+
{"type": "text", "text": prompt_text},
|
| 723 |
+
]}]
|
| 724 |
+
try:
|
| 725 |
+
prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
| 726 |
+
except:
|
| 727 |
+
prompt = prompt_text
|
| 728 |
+
inputs = processor(text=[prompt], images=[image], return_tensors="pt", padding=True).to(device)
|
| 729 |
+
with torch.no_grad():
|
| 730 |
+
out = model.generate(
|
| 731 |
+
**inputs, max_new_tokens=600, do_sample=True,
|
| 732 |
+
temperature=temperature, top_p=top_p, top_k=top_k,
|
| 733 |
+
repetition_penalty=repetition_penalty,
|
| 734 |
+
)
|
| 735 |
+
gen = out[:, inputs['input_ids'].shape[1]:]
|
| 736 |
+
return processor.batch_decode(gen, skip_special_tokens=True)[0]
|
| 737 |
+
|
| 738 |
+
result = _generate(STEP1_EXTRACT_PROMPT)
|
| 739 |
+
|
| 740 |
+
# Detect coordinate output (Qwen grounding mode triggered) → retry
|
| 741 |
+
if re.search(r'\(\d{1,4},\s*\d{1,4}\)', result) or '---TEXT_START---' not in result:
|
| 742 |
+
print(" ⚠️ Coordinate output detected, retrying...")
|
| 743 |
+
fallback = """Read all text from this document image and write it line by line in plain text.
|
| 744 |
+
Do NOT output coordinates or bounding boxes.
|
| 745 |
+
Start output with:
|
| 746 |
+
PHOTO_PRESENT: yes or no
|
| 747 |
+
SIGNATURE_PRESENT: yes or no
|
| 748 |
+
MRZ_PRESENT: yes or no
|
| 749 |
+
DETECTED_LANGUAGE: name the language(s)
|
| 750 |
+
---TEXT_START---
|
| 751 |
+
[all text here exactly as printed]
|
| 752 |
+
---TEXT_END---"""
|
| 753 |
+
result = _generate(fallback)
|
| 754 |
+
|
| 755 |
+
return result
|
| 756 |
+
|
| 757 |
+
|
| 758 |
+
def parse_step1_output(raw_output: str) -> dict:
|
| 759 |
+
"""Parse Step 1 structured output → metadata + original text"""
|
| 760 |
+
result = {
|
| 761 |
+
"photo_present": "❌ No",
|
| 762 |
+
"photo_location": "N/A",
|
| 763 |
+
"sig_present": "❌ No",
|
| 764 |
+
"sig_location": "N/A",
|
| 765 |
+
"mrz_present": "❌ No",
|
| 766 |
+
"detected_lang": "Unknown",
|
| 767 |
+
"original_text": raw_output,
|
| 768 |
+
}
|
| 769 |
+
|
| 770 |
+
def get(pattern, text, default="N/A"):
|
| 771 |
+
m = re.search(pattern, text, re.IGNORECASE)
|
| 772 |
+
return m.group(1).strip() if m else default
|
| 773 |
+
|
| 774 |
+
photo = get(r'PHOTO_PRESENT:\s*(yes|no)', raw_output)
|
| 775 |
+
result["photo_present"] = "✅ Yes" if photo.lower() == "yes" else "❌ No"
|
| 776 |
+
result["photo_location"] = get(r'PHOTO_LOCATION:\s*([^\n]+)', raw_output)
|
| 777 |
+
|
| 778 |
+
sig = get(r'SIGNATURE_PRESENT:\s*(yes|no)', raw_output)
|
| 779 |
+
result["sig_present"] = "✅ Yes" if sig.lower() == "yes" else "❌ No"
|
| 780 |
+
result["sig_location"] = get(r'SIGNATURE_LOCATION:\s*([^\n]+)', raw_output)
|
| 781 |
+
|
| 782 |
+
mrz = get(r'MRZ_PRESENT:\s*(yes|no)', raw_output)
|
| 783 |
+
result["mrz_present"] = "✅ Yes" if mrz.lower() == "yes" else "❌ No"
|
| 784 |
+
result["detected_lang"] = get(r'DETECTED_LANGUAGE:\s*([^\n]+)', raw_output, "Unknown")
|
| 785 |
+
|
| 786 |
+
m = re.search(r'---TEXT_START---\n?(.*?)---TEXT_END---', raw_output, re.DOTALL)
|
| 787 |
+
if m:
|
| 788 |
+
result["original_text"] = m.group(1).strip()
|
| 789 |
+
|
| 790 |
+
return result
|
| 791 |
+
|
| 792 |
|
| 793 |
def run_step2_structure(model, processor, metadata: dict, device,
|
| 794 |
max_new_tokens, temperature, top_p, top_k, repetition_penalty):
|
| 795 |
+
"""Step 2: Python extracts English fields + MRZ. LLM only classifies + fills gaps."""
|
| 796 |
+
|
| 797 |
+
raw_text = metadata.get('original_text', '')
|
| 798 |
+
|
| 799 |
+
# P2: Convert eastern numerals first
|
| 800 |
+
raw_text_normalized = convert_eastern_numerals(raw_text)
|
| 801 |
+
|
| 802 |
+
# P5: Separate scripts
|
| 803 |
+
english_block, original_block = separate_scripts(raw_text_normalized)
|
| 804 |
+
|
| 805 |
+
# P4: Direct English field extraction
|
| 806 |
+
english_fields = extract_english_fields(raw_text_normalized)
|
| 807 |
+
|
| 808 |
+
# P1: MRZ parse (authoritative)
|
| 809 |
+
mrz_data = parse_mrz_lines(raw_text_normalized)
|
| 810 |
+
|
| 811 |
+
# P3: Calendar detection + conversion (for display)
|
| 812 |
+
calendar_sys = detect_calendar_system(raw_text)
|
| 813 |
+
|
| 814 |
+
# Build python fields table
|
| 815 |
+
if english_fields:
|
| 816 |
+
tbl = "| Field (as printed on card) | Value (as printed) |\n|---|---|\n"
|
| 817 |
+
for label, val in english_fields:
|
| 818 |
+
tbl += f"| **{label}** | {val} |\n"
|
| 819 |
+
else:
|
| 820 |
+
tbl = "| — | No English label:value pairs detected |\n"
|
| 821 |
+
|
| 822 |
+
# MRZ summary
|
| 823 |
+
if mrz_data:
|
| 824 |
+
mrz_summary = " | ".join([f"{k}: {v}" for k, v in mrz_data.items() if k != 'mrz_format'])
|
| 825 |
+
mrz_summary = f"✅ {mrz_data.get('mrz_format','?')} parsed: {mrz_summary}"
|
| 826 |
+
else:
|
| 827 |
+
mrz_summary = "❌ No MRZ detected"
|
| 828 |
+
|
| 829 |
+
# Non-Gregorian note
|
| 830 |
+
cal_note = ""
|
| 831 |
+
if calendar_sys == 'solar_hijri':
|
| 832 |
+
cal_note = "\n> ⚠️ **Solar Hijri (Shamsi) calendar detected** — Python will convert dates to Gregorian."
|
| 833 |
+
elif calendar_sys == 'lunar_hijri':
|
| 834 |
+
cal_note = "\n> ⚠️ **Lunar Hijri calendar detected** — Python will convert dates to Gregorian."
|
| 835 |
+
|
| 836 |
+
# Build prompt for LLM (classification + gaps only)
|
| 837 |
+
prompt_text = STEP2_TEMPLATE.format(
|
| 838 |
+
python_fields_table=tbl,
|
| 839 |
+
mrz_summary=mrz_summary,
|
| 840 |
+
english_block=english_block or "None",
|
| 841 |
+
original_block=original_block or "None",
|
| 842 |
)
|
| 843 |
|
| 844 |
+
messages = [{"role": "user", "content": [{"type": "text", "text": prompt_text}]}]
|
| 845 |
try:
|
| 846 |
prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
| 847 |
except:
|
| 848 |
+
prompt = prompt_text
|
| 849 |
|
| 850 |
inputs = processor(text=[prompt], return_tensors="pt", padding=True).to(device)
|
| 851 |
|
| 852 |
streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
|
| 853 |
gen_kwargs = {
|
| 854 |
+
**inputs, "streamer": streamer, "max_new_tokens": max_new_tokens,
|
| 855 |
+
"do_sample": True, "temperature": temperature, "top_p": top_p,
|
| 856 |
+
"top_k": top_k, "repetition_penalty": repetition_penalty,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 857 |
}
|
| 858 |
thread = Thread(target=model.generate, kwargs=gen_kwargs)
|
| 859 |
thread.start()
|
|
|
|
| 860 |
|
| 861 |
+
# Pre-build Python-verified sections
|
| 862 |
+
python_sections = f"""## 🖼️ Visual Elements
|
| 863 |
|
| 864 |
+
| Element | Status | Location |
|
| 865 |
+
|---------|--------|----------|
|
| 866 |
+
| 📷 Profile Photo | {metadata['photo_present']} | {metadata['photo_location']} |
|
| 867 |
+
| ✍️ Signature | {metadata['sig_present']} | {metadata['sig_location']} |
|
| 868 |
+
| 🔐 MRZ Zone | {metadata['mrz_present']} | Bottom strip |
|
| 869 |
|
| 870 |
+
---
|
|
|
|
|
|
|
| 871 |
|
| 872 |
+
## ✅ English Fields (Direct from Card — Not Modified)
|
| 873 |
+
{cal_note}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 874 |
|
| 875 |
+
{tbl}
|
|
|
|
|
|
|
| 876 |
|
| 877 |
+
---
|
|
|
|
| 878 |
|
| 879 |
+
## 📜 Original Script
|
|
|
|
|
|
|
| 880 |
|
| 881 |
+
{raw_text}
|
| 882 |
+
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 883 |
|
| 884 |
+
## 🔐 MRZ Data
|
| 885 |
|
| 886 |
+
{chr(10).join([l for l in raw_text.split(chr(10)) if re.match(r'^[A-Z0-9<]{25,50}$', re.sub(r'\s+','',l.strip()))]) or 'NOT PRESENT'}
|
| 887 |
+
{build_mrz_table(mrz_data) if mrz_data else '_No MRZ detected._'}
|
| 888 |
|
| 889 |
+
---
|
| 890 |
+
|
| 891 |
+
"""
|
| 892 |
+
return streamer, thread, mrz_data, python_sections
|
| 893 |
+
|
| 894 |
+
|
| 895 |
+
# ╔══════════════════════════════════════════╗
|
| 896 |
+
# ║ GRADIO HELPER CLASSES ║
|
| 897 |
+
# ╚══════════════════════════════════════════╝
|
| 898 |
+
|
| 899 |
+
class RadioAnimated(gr.HTML):
|
| 900 |
+
def __init__(self, choices, value=None, **kwargs):
|
| 901 |
+
if not choices or len(choices) < 2:
|
| 902 |
+
raise ValueError("RadioAnimated requires at least 2 choices.")
|
| 903 |
+
if value is None:
|
| 904 |
+
value = choices[0]
|
| 905 |
+
uid = uuid.uuid4().hex[:8]
|
| 906 |
+
group_name = f"ra-{uid}"
|
| 907 |
+
inputs_html = "\n".join(
|
| 908 |
+
f'<input class="ra-input" type="radio" name="{group_name}" id="{group_name}-{i}" value="{c}">'
|
| 909 |
+
f'<label class="ra-label" for="{group_name}-{i}">{c}</label>'
|
| 910 |
+
for i, c in enumerate(choices)
|
| 911 |
+
)
|
| 912 |
+
html_template = f"""
|
| 913 |
+
<div class="ra-wrap" data-ra="{uid}">
|
| 914 |
+
<div class="ra-inner"><div class="ra-highlight"></div>{inputs_html}</div>
|
| 915 |
+
</div>"""
|
| 916 |
+
js_on_load = r"""
|
| 917 |
+
(() => {
|
| 918 |
+
const highlight = element.querySelector('.ra-highlight');
|
| 919 |
+
const inputs = Array.from(element.querySelectorAll('.ra-input'));
|
| 920 |
+
if (!inputs.length) return;
|
| 921 |
+
const choices = inputs.map(i => i.value);
|
| 922 |
+
function setHighlight(idx) {
|
| 923 |
+
highlight.style.width = `calc(${100/choices.length}% - 6px)`;
|
| 924 |
+
highlight.style.transform = `translateX(${idx * 100}%)`;
|
| 925 |
+
}
|
| 926 |
+
function setVal(val, trigger=false) {
|
| 927 |
+
const idx = Math.max(0, choices.indexOf(val));
|
| 928 |
+
inputs.forEach((inp, i) => { inp.checked = (i === idx); });
|
| 929 |
+
setHighlight(idx);
|
| 930 |
+
props.value = choices[idx];
|
| 931 |
+
if (trigger) trigger('change', props.value);
|
| 932 |
+
}
|
| 933 |
+
setVal(props.value ?? choices[0], false);
|
| 934 |
+
inputs.forEach(inp => inp.addEventListener('change', () => setVal(inp.value, true)));
|
| 935 |
+
})();"""
|
| 936 |
+
super().__init__(value=value, html_template=html_template, js_on_load=js_on_load, **kwargs)
|
| 937 |
+
|
| 938 |
+
|
| 939 |
+
def apply_gpu_duration(val: str):
|
| 940 |
+
return int(val)
|
| 941 |
+
|
| 942 |
+
|
| 943 |
+
def calc_timeout_duration(model_name, text, image_front, image_back,
|
| 944 |
+
max_new_tokens, temperature, top_p, top_k,
|
| 945 |
+
repetition_penalty, gpu_timeout):
|
| 946 |
+
try:
|
| 947 |
+
base = int(gpu_timeout)
|
| 948 |
+
return base * 2 if (image_front is not None and image_back is not None) else base
|
| 949 |
+
except:
|
| 950 |
+
return 180
|
| 951 |
+
|
| 952 |
+
|
| 953 |
+
# ╔══════════════════════════════════════════╗
|
| 954 |
+
# ║ MAIN PIPELINE FUNCTION ║
|
| 955 |
+
# ╚══════════════════════════════════════════╝
|
| 956 |
|
| 957 |
@spaces.GPU(duration=calc_timeout_duration)
|
| 958 |
def generate_dual_card_ocr(model_name: str, text: str,
|
|
|
|
| 961 |
top_k: int, repetition_penalty: float, gpu_timeout: int):
|
| 962 |
|
| 963 |
# Model selection
|
| 964 |
+
model_map = {
|
| 965 |
+
"Chhagan-ID-OCR-v1 ⭐": (CHHAGAN_V1_AVAILABLE, processor_c1, model_c1),
|
| 966 |
+
"Chhagan-DocVL-Qwen3 🔥": (CHHAGAN_QWEN3_AVAILABLE, processor_c2, model_c2),
|
| 967 |
+
"CSM-DocExtract-Q4KM 🏆": (CSM_Q4KM_AVAILABLE, processor_q4km, model_q4km),
|
| 968 |
+
"CSM-DocExtract-4BNB 💎": (CSM_4BNB_AVAILABLE, processor_4bnb, model_4bnb),
|
| 969 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 970 |
|
| 971 |
+
if model_name not in model_map:
|
| 972 |
+
yield "Invalid model.", "Invalid model."; return
|
| 973 |
+
|
| 974 |
+
available, processor, model = model_map[model_name]
|
| 975 |
+
if not available:
|
| 976 |
+
yield f"{model_name} not available.", f"{model_name} not available."; return
|
| 977 |
|
| 978 |
if image_front is None and image_back is None:
|
| 979 |
+
yield "Please upload at least one card image.", "Please upload at least one card image."; return
|
|
|
|
| 980 |
|
| 981 |
full_output = ""
|
| 982 |
front_result = ""
|
| 983 |
back_result = ""
|
| 984 |
+
all_mrz_data = {}
|
| 985 |
+
front_meta_saved = {}
|
| 986 |
+
back_meta_saved = {}
|
| 987 |
|
| 988 |
+
# ───── FRONT CARD ─────
|
| 989 |
if image_front is not None:
|
| 990 |
full_output += "# 🎴 FRONT CARD\n\n"
|
| 991 |
+
full_output += "⏳ **Step 1/2 — Raw OCR (original script, no translation)...**\n\n"
|
| 992 |
yield full_output, full_output
|
| 993 |
|
| 994 |
+
step1_raw = run_step1_extraction(model, processor, image_front, device,
|
| 995 |
+
temperature, top_p, top_k, repetition_penalty)
|
|
|
|
|
|
|
| 996 |
front_meta = parse_step1_output(step1_raw)
|
| 997 |
+
front_meta_saved = front_meta
|
| 998 |
|
| 999 |
+
full_output += f"✅ **Step 1 Done** — 🌐 Language: **{front_meta['detected_lang']}**\n\n"
|
| 1000 |
+
full_output += "⏳ **Step 2/2 — Python extract + LLM classify...**\n\n"
|
| 1001 |
yield full_output, full_output
|
| 1002 |
|
| 1003 |
+
streamer_f, thread_f, mrz_f, python_sections_f = run_step2_structure(
|
| 1004 |
model, processor, front_meta, device,
|
| 1005 |
+
max_new_tokens, temperature, top_p, top_k, repetition_penalty)
|
| 1006 |
+
|
| 1007 |
+
if mrz_f:
|
| 1008 |
+
all_mrz_data = mrz_f
|
| 1009 |
+
|
| 1010 |
+
buffer_f = python_sections_f
|
| 1011 |
+
yield full_output + buffer_f, full_output + buffer_f
|
| 1012 |
|
|
|
|
| 1013 |
for new_text in streamer_f:
|
| 1014 |
+
buffer_f += new_text.replace("<|im_end|>", "").replace("<|endoftext|>", "")
|
|
|
|
| 1015 |
time.sleep(0.01)
|
| 1016 |
yield full_output + buffer_f, full_output + buffer_f
|
| 1017 |
|
|
|
|
| 1019 |
front_result = buffer_f
|
| 1020 |
thread_f.join()
|
| 1021 |
|
| 1022 |
+
# ───── BACK CARD ─────
|
| 1023 |
if image_back is not None:
|
| 1024 |
full_output += "\n\n---\n\n# 🎴 BACK CARD\n\n"
|
| 1025 |
+
full_output += "⏳ **Step 1/2 — Raw OCR (original script, no translation)...**\n\n"
|
| 1026 |
yield full_output, full_output
|
| 1027 |
|
| 1028 |
+
step1_raw_back = run_step1_extraction(model, processor, image_back, device,
|
| 1029 |
+
temperature, top_p, top_k, repetition_penalty)
|
|
|
|
|
|
|
| 1030 |
back_meta = parse_step1_output(step1_raw_back)
|
| 1031 |
+
back_meta_saved = back_meta
|
| 1032 |
|
| 1033 |
+
full_output += f"✅ **Step 1 Done** — 🌐 Language: **{back_meta['detected_lang']}**\n\n"
|
| 1034 |
+
full_output += "⏳ **Step 2/2 — Python extract + LLM classify...**\n\n"
|
| 1035 |
yield full_output, full_output
|
| 1036 |
|
| 1037 |
+
streamer_b, thread_b, mrz_b, python_sections_b = run_step2_structure(
|
| 1038 |
model, processor, back_meta, device,
|
| 1039 |
+
max_new_tokens, temperature, top_p, top_k, repetition_penalty)
|
| 1040 |
+
|
| 1041 |
+
if mrz_b and not all_mrz_data:
|
| 1042 |
+
all_mrz_data = mrz_b
|
| 1043 |
+
|
| 1044 |
+
buffer_b = python_sections_b
|
| 1045 |
+
yield full_output + buffer_b, full_output + buffer_b
|
| 1046 |
|
|
|
|
| 1047 |
for new_text in streamer_b:
|
| 1048 |
+
buffer_b += new_text.replace("<|im_end|>", "").replace("<|endoftext|>", "")
|
|
|
|
| 1049 |
time.sleep(0.01)
|
| 1050 |
yield full_output + buffer_b, full_output + buffer_b
|
| 1051 |
|
|
|
|
| 1053 |
back_result = buffer_b
|
| 1054 |
thread_b.join()
|
| 1055 |
|
| 1056 |
+
# ───── UNIFIED SUMMARY ─────
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1057 |
if image_front is not None and image_back is not None:
|
| 1058 |
full_output += "\n\n---\n\n"
|
| 1059 |
+
full_output += build_unified_summary(front_result, back_result, all_mrz_data)
|
| 1060 |
|
| 1061 |
+
mrz_note = f"MRZ: ✅ {all_mrz_data.get('mrz_format','?')} verified" if all_mrz_data else "MRZ: ❌ Not detected"
|
| 1062 |
+
full_output += f"\n\n---\n\n**✨ Complete** | Model: `{model_name}` | {mrz_note} | Pipeline: OCR → Python Extract → LLM Classify\n"
|
| 1063 |
yield full_output, full_output
|
| 1064 |
|
| 1065 |
|
| 1066 |
+
# ╔══════════════════════════════════════════╗
|
| 1067 |
+
# ║ MODEL CHOICES ║
|
| 1068 |
+
# ╚══════════════════════════════════════════╝
|
| 1069 |
|
| 1070 |
model_choices = []
|
| 1071 |
+
if CHHAGAN_V1_AVAILABLE: model_choices.append("Chhagan-ID-OCR-v1 ⭐")
|
| 1072 |
+
if CHHAGAN_QWEN3_AVAILABLE: model_choices.append("Chhagan-DocVL-Qwen3 🔥")
|
| 1073 |
+
if CSM_Q4KM_AVAILABLE: model_choices.append("CSM-DocExtract-Q4KM 🏆")
|
| 1074 |
+
if CSM_4BNB_AVAILABLE: model_choices.append("CSM-DocExtract-4BNB 💎")
|
| 1075 |
+
if not model_choices: model_choices = ["No models available"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1076 |
|
| 1077 |
dual_card_examples = [
|
| 1078 |
+
["Extract complete information", "examples/5.jpg", None],
|
| 1079 |
+
["Multilingual OCR with MRZ", "examples/4.jpg", None],
|
| 1080 |
+
["Extract profile photo and signature", "examples/2.jpg", None],
|
| 1081 |
]
|
| 1082 |
|
| 1083 |
|
| 1084 |
+
# ╔══════════════════════════════════════════╗
|
| 1085 |
+
# ║ GRADIO UI ║
|
| 1086 |
+
# ╚══════════════════════════════════════════╝
|
| 1087 |
|
| 1088 |
demo = gr.Blocks(css=css, theme=steel_blue_theme)
|
| 1089 |
with demo:
|
| 1090 |
+
gr.Markdown("# 🌍 **CSM Dual-Card ID OCR System**", elem_id="main-title")
|
| 1091 |
+
gr.Markdown("### *Universal Document Extraction — MRZ + Multilingual + Auto Calendar*")
|
| 1092 |
|
| 1093 |
loaded_models = []
|
| 1094 |
+
if CHHAGAN_V1_AVAILABLE: loaded_models.append("ID-OCR-v1 ⭐")
|
| 1095 |
+
if CHHAGAN_QWEN3_AVAILABLE: loaded_models.append("DocVL-Qwen3 🔥")
|
| 1096 |
+
if CSM_Q4KM_AVAILABLE: loaded_models.append("Q4KM 🏆")
|
| 1097 |
+
if CSM_4BNB_AVAILABLE: loaded_models.append("4BNB 💎")
|
| 1098 |
+
|
| 1099 |
+
model_info = f"**Loaded ({len(loaded_models)}/4):** {', '.join(loaded_models)}" if loaded_models else "⚠️ No models"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1100 |
gr.Markdown(f"**Status:** {model_info}")
|
| 1101 |
+
gr.Markdown("**Pipeline:** ✅ Step1: Raw OCR → ✅ Python: MRZ+English Extract → ✅ LLM: Classify+Gaps → ✅ Deduplicate")
|
| 1102 |
|
| 1103 |
with gr.Row():
|
| 1104 |
with gr.Column(scale=2):
|
| 1105 |
image_query = gr.Textbox(
|
| 1106 |
label="💬 Custom Query (Optional)",
|
| 1107 |
+
placeholder="Leave empty for automatic full extraction...",
|
| 1108 |
value=""
|
| 1109 |
)
|
|
|
|
| 1110 |
gr.Markdown("### 📤 Upload ID Cards")
|
| 1111 |
with gr.Row():
|
| 1112 |
image_front = gr.Image(type="pil", label="🎴 Front Card", height=250)
|
| 1113 |
+
image_back = gr.Image(type="pil", label="🎴 Back Card (Optional)", height=250)
|
| 1114 |
|
| 1115 |
image_submit = gr.Button("🚀 Extract + Translate + Structure", variant="primary", size="lg")
|
| 1116 |
|
|
|
|
| 1121 |
)
|
| 1122 |
|
| 1123 |
with gr.Accordion("⚙️ Advanced Settings", open=False):
|
| 1124 |
+
max_new_tokens = gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS)
|
| 1125 |
+
temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=4.0, step=0.1, value=0.6)
|
| 1126 |
+
top_p = gr.Slider(label="Top-p", minimum=0.05, maximum=1.0, step=0.05, value=0.9)
|
| 1127 |
+
top_k = gr.Slider(label="Top-k", minimum=1, maximum=1000, step=1, value=50)
|
| 1128 |
+
repetition_penalty= gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.1)
|
| 1129 |
|
| 1130 |
with gr.Column(scale=3):
|
| 1131 |
gr.Markdown("## 📄 Extraction Results", elem_id="output-title")
|
| 1132 |
output = gr.Textbox(label="Raw Output (Streaming)", interactive=True, lines=15)
|
| 1133 |
+
with gr.Accordion("📝 Structured Preview", open=True):
|
| 1134 |
markdown_output = gr.Markdown(label="Formatted Result")
|
| 1135 |
|
| 1136 |
model_choice = gr.Radio(
|
| 1137 |
choices=model_choices,
|
| 1138 |
+
label="🤖 Select Model",
|
| 1139 |
value=model_choices[0] if model_choices else None,
|
| 1140 |
+
info="🏆💎 = 8B Quantized (best) | 🔥 = Qwen3 Fine-tuned | ⭐ = LoRA"
|
| 1141 |
)
|
| 1142 |
|
| 1143 |
with gr.Row(elem_id="gpu-duration-container"):
|
|
|
|
| 1145 |
gr.Markdown("**⏱️ GPU Duration (seconds)**")
|
| 1146 |
radioanimated_gpu_duration = RadioAnimated(
|
| 1147 |
choices=["60", "90", "120", "180", "240"],
|
| 1148 |
+
value="180",
|
| 1149 |
elem_id="radioanimated_gpu_duration"
|
| 1150 |
)
|
| 1151 |
+
gpu_duration_state = gr.Number(value=180, visible=False)
|
| 1152 |
|
| 1153 |
gr.Markdown("""
|
| 1154 |
+
**✨ What This Extracts:**
|
| 1155 |
+
- 🔐 MRZ: TD1/TD3/MRVA/MRVB — Python parsed, 100% accurate
|
| 1156 |
+
- ✅ English fields: Direct from card, not modified
|
| 1157 |
+
- 📜 Original script: Arabic/Farsi/Hindi/Chinese as-is
|
| 1158 |
+
- 🗓️ Calendar: Shamsi/Hijri → Gregorian conversion
|
| 1159 |
+
- 🔢 Eastern numerals: ۱۲۳ → 123 automatic
|
| 1160 |
+
- 🔄 Front+Back: Deduplicated, MRZ-verified
|
| 1161 |
+
""")
|
|
|
|
| 1162 |
|
| 1163 |
radioanimated_gpu_duration.change(
|
| 1164 |
fn=apply_gpu_duration,
|
|
|
|
| 1169 |
|
| 1170 |
image_submit.click(
|
| 1171 |
fn=generate_dual_card_ocr,
|
| 1172 |
+
inputs=[model_choice, image_query, image_front, image_back,
|
| 1173 |
+
max_new_tokens, temperature, top_p, top_k,
|
| 1174 |
+
repetition_penalty, gpu_duration_state],
|
|
|
|
|
|
|
|
|
|
| 1175 |
outputs=[output, markdown_output]
|
| 1176 |
)
|
| 1177 |
|
| 1178 |
gr.Markdown("""
|
| 1179 |
+
---
|
| 1180 |
+
### 🎯 Feature Matrix
|
| 1181 |
+
|
| 1182 |
+
| Feature | Method | Accuracy |
|
| 1183 |
+
|---------|--------|---------|
|
| 1184 |
+
| MRZ Parse (TD1/TD3/MRVA) | Python | 100% |
|
| 1185 |
+
| English Labels Extract | Python Regex | 100% |
|
| 1186 |
+
| Eastern Numeral Convert | Python char map | 100% |
|
| 1187 |
+
| Shamsi/Hijri Calendar | Python library | 100% |
|
| 1188 |
+
| Raw OCR (32+ scripts) | 8B VLM | 90%+ |
|
| 1189 |
+
| Doc Type Classification | 8B VLM | 95%+ |
|
| 1190 |
+
| Non-English Translation | 8B VLM | 90%+ |
|
| 1191 |
+
| Front+Back Deduplication | Python | 100% |
|
| 1192 |
+
|
| 1193 |
+
### 📋 Supported Documents
|
| 1194 |
+
🇮🇳 Aadhaar, PAN, Passport | 🇦🇪 Emirates ID | 🇸🇦 Iqama | 🇴🇲 Oman Resident Card
|
| 1195 |
+
🌍 International Passports (MRZ) | 🚗 Driving Licences | 🇮🇷 Iranian National ID (Shamsi)
|
| 1196 |
+
|
| 1197 |
+
### 🔒 Privacy
|
| 1198 |
+
All processing on-device | No data stored | GDPR compliant
|
| 1199 |
+
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1200 |
|
| 1201 |
|
| 1202 |
if __name__ == "__main__":
|
| 1203 |
+
print("\n🚀 STARTING...")
|
|
|
|
|
|
|
| 1204 |
try:
|
| 1205 |
demo.queue(max_size=50).launch(
|
| 1206 |
+
server_name="0.0.0.0", server_port=7860, show_error=True, share=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1207 |
except Exception as e:
|
|
|
|
| 1208 |
import traceback
|
| 1209 |
+
print(f"❌ {e}")
|
| 1210 |
traceback.print_exc()
|
|
|
|
|
|