Chhagan005 commited on
Commit
6fb62c2
Β·
verified Β·
1 Parent(s): cb30e22

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -22
app.py CHANGED
@@ -331,36 +331,83 @@ if PEFT_AVAILABLE:
331
  else:
332
  print(" ⚠️ PEFT not available")
333
 
334
- # ── Model 3: CSM-DocExtract-VL-Q4KM (Qwen3VL, PRE-QUANTIZED Q4KM) ──
335
- print("\n3️⃣ CSM-DocExtract-VL-Q4KM (8B Q4KM β€” pre-quantized)...")
336
  MODEL_ID_Q4KM = "Chhagan005/CSM-DocExtract-VL-Q4KM"
337
  CSM_Q4KM_AVAILABLE = False
338
  processor_q4km = model_q4km = None
339
 
340
  try:
341
- processor_q4km = AutoProcessor.from_pretrained(MODEL_ID_Q4KM, trust_remote_code=True)
342
- model_q4km = load_vl_model(MODEL_ID_Q4KM, pre_quantized=True)
343
- print(" βœ… Loaded! (pre-quantized Q4KM ~6-7GB)")
 
 
 
 
 
 
 
 
344
  CSM_Q4KM_AVAILABLE = True
345
  except Exception as e:
346
- print(f" ❌ Failed: {e}")
347
-
348
- # ── Model 4: CSM-DocExtract-VL 4BNB (Qwen3VL, BitsAndBytes 4-bit) ──
349
- print("\n4️⃣ CSM-DocExtract-VL 4BNB (BitsAndBytes 4-bit)...")
 
 
 
 
 
 
 
 
 
 
350
  MODEL_ID_4BNB = "Chhagan005/CSM-DocExtract-VL"
351
  CSM_4BNB_AVAILABLE = False
352
  processor_4bnb = model_4bnb = None
 
353
 
354
  try:
355
- processor_4bnb = AutoProcessor.from_pretrained(MODEL_ID_4BNB, trust_remote_code=True)
356
- model_4bnb = load_vl_model(
357
- MODEL_ID_4BNB,
358
- quantization_config=bnb_4bit_config,
359
- pre_quantized=False)
360
- print(" βœ… Loaded! (~6-7GB VRAM)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
  CSM_4BNB_AVAILABLE = True
362
  except Exception as e:
363
- print(f" ❌ Failed: {e}")
 
 
 
 
 
 
 
 
 
 
 
364
 
365
  print("\n" + "="*70)
366
  print("πŸ“Š MODEL STATUS")
@@ -714,7 +761,8 @@ def build_unified_summary(front_result: str, back_result: str, mrz_data: dict) -
714
  # β•‘ STEP PIPELINE FUNCTIONS β•‘
715
  # β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•
716
 
717
- def run_step1_extraction(model, processor, image, device, temperature, top_p, top_k, repetition_penalty):
 
718
  """Step 1: LLM β†’ Raw OCR, original script, NO translation, NO coordinates"""
719
 
720
  def _generate(prompt_text):
@@ -724,10 +772,15 @@ def run_step1_extraction(model, processor, image, device, temperature, top_p, to
724
  except ImportError:
725
  HAS_QWEN_VL_UTILS = False
726
 
727
- messages = [{"role": "user", "content": [
728
- {"type": "image", "image": image},
729
- {"type": "text", "text": prompt_text},
730
- ]}]
 
 
 
 
 
731
 
732
  # Step A: Build prompt string
733
  try:
@@ -1081,8 +1134,13 @@ def generate_dual_card_ocr(model_name: str, text: str,
1081
  full_output += "⏳ **Step 1/2 β€” Raw OCR (original script, no translation)...**\n\n"
1082
  yield full_output, full_output
1083
 
 
 
 
1084
  step1_raw = run_step1_extraction(model, processor, image_front, device,
1085
- temperature, top_p, top_k, repetition_penalty)
 
 
1086
  front_meta = parse_step1_output(step1_raw)
1087
  front_meta_saved = front_meta
1088
 
 
331
  else:
332
  print(" ⚠️ PEFT not available")
333
 
334
+ # ── Model 3: CSM-DocExtract-VL-Q4KM (Full Qwen3VL, pre-quantized) ──
335
+ print("\n3️⃣ CSM-DocExtract-VL-Q4KM (Full Qwen3VL, pre-quantized BNB)...")
336
  MODEL_ID_Q4KM = "Chhagan005/CSM-DocExtract-VL-Q4KM"
337
  CSM_Q4KM_AVAILABLE = False
338
  processor_q4km = model_q4km = None
339
 
340
  try:
341
+ processor_q4km = AutoProcessor.from_pretrained(
342
+ MODEL_ID_Q4KM, trust_remote_code=True
343
+ )
344
+ # Pre-quantized safetensors β†’ torch_dtype=auto, NO extra quantization_config
345
+ model_q4km = Qwen3VLForConditionalGeneration.from_pretrained(
346
+ MODEL_ID_Q4KM,
347
+ torch_dtype="auto",
348
+ device_map="auto",
349
+ trust_remote_code=True,
350
+ ).eval()
351
+ print(" βœ… Loaded! (Qwen3VL pre-quantized BNB ~6.4GB)")
352
  CSM_Q4KM_AVAILABLE = True
353
  except Exception as e:
354
+ try:
355
+ model_q4km = AutoModelForImageTextToText.from_pretrained(
356
+ MODEL_ID_Q4KM,
357
+ torch_dtype="auto",
358
+ device_map="auto",
359
+ trust_remote_code=True,
360
+ ).eval()
361
+ print(" βœ… Loaded! (AutoModel fallback)")
362
+ CSM_Q4KM_AVAILABLE = True
363
+ except Exception as e2:
364
+ print(f" ❌ Failed: {e2}")
365
+
366
+ # ── Model 4: CSM-DocExtract-VL (Full Qwen3VL, BNB INT4 trained) ──
367
+ print("\n4️⃣ CSM-DocExtract-VL 4BNB (Full Qwen3VL, BNB INT4 trained)...")
368
  MODEL_ID_4BNB = "Chhagan005/CSM-DocExtract-VL"
369
  CSM_4BNB_AVAILABLE = False
370
  processor_4bnb = model_4bnb = None
371
+ system_prompt_4bnb = "You are a helpful assistant." # default
372
 
373
  try:
374
+ # Read custom system_prompt.txt β€” this model was trained with it
375
+ try:
376
+ from huggingface_hub import hf_hub_download
377
+ sp_path = hf_hub_download(repo_id=MODEL_ID_4BNB, filename="system_prompt.txt")
378
+ with open(sp_path, "r", encoding="utf-8") as f:
379
+ system_prompt_4bnb = f.read().strip()
380
+ print(f" πŸ“‹ system_prompt.txt loaded: {system_prompt_4bnb[:80]}...")
381
+ except Exception as sp_err:
382
+ print(f" ⚠️ system_prompt.txt not loaded: {sp_err} β€” using default")
383
+
384
+ processor_4bnb = AutoProcessor.from_pretrained(
385
+ MODEL_ID_4BNB, trust_remote_code=True
386
+ )
387
+ # BNB INT4 trained safetensors β†’ torch_dtype=auto, NO extra quantization_config
388
+ # (ignore .gguf files β€” those are for llama.cpp, not transformers)
389
+ model_4bnb = Qwen3VLForConditionalGeneration.from_pretrained(
390
+ MODEL_ID_4BNB,
391
+ torch_dtype="auto",
392
+ device_map="auto",
393
+ trust_remote_code=True,
394
+ ignore_mismatched_sizes=True, # GGUF files present β€” ignore safely
395
+ ).eval()
396
+ print(" βœ… Loaded! (Qwen3VL BNB INT4 trained ~6.4GB)")
397
  CSM_4BNB_AVAILABLE = True
398
  except Exception as e:
399
+ try:
400
+ model_4bnb = AutoModelForImageTextToText.from_pretrained(
401
+ MODEL_ID_4BNB,
402
+ torch_dtype="auto",
403
+ device_map="auto",
404
+ trust_remote_code=True,
405
+ ).eval()
406
+ print(" βœ… Loaded! (AutoModel fallback)")
407
+ CSM_4BNB_AVAILABLE = True
408
+ except Exception as e2:
409
+ print(f" ❌ Failed: {e2}")
410
+
411
 
412
  print("\n" + "="*70)
413
  print("πŸ“Š MODEL STATUS")
 
761
  # β•‘ STEP PIPELINE FUNCTIONS β•‘
762
  # β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•
763
 
764
+ def run_step1_extraction(model, processor, image, device, temperature, top_p, top_k, repetition_penalty, system_prompt=None):
765
+
766
  """Step 1: LLM β†’ Raw OCR, original script, NO translation, NO coordinates"""
767
 
768
  def _generate(prompt_text):
 
772
  except ImportError:
773
  HAS_QWEN_VL_UTILS = False
774
 
775
+ sys_msg = system_prompt or "You are a helpful assistant."
776
+ messages = [
777
+ {"role": "system", "content": sys_msg},
778
+ {"role": "user", "content": [
779
+ {"type": "image", "image": image},
780
+ {"type": "text", "text": prompt_text},
781
+ ]}
782
+ ]
783
+
784
 
785
  # Step A: Build prompt string
786
  try:
 
1134
  full_output += "⏳ **Step 1/2 β€” Raw OCR (original script, no translation)...**\n\n"
1135
  yield full_output, full_output
1136
 
1137
+ # Model 4 ke liye system prompt pass karo
1138
+ sys_p = system_prompt_4bnb if model_name == "CSM-DocExtract-4BNB πŸ’Ž" else None
1139
+
1140
  step1_raw = run_step1_extraction(model, processor, image_front, device,
1141
+ temperature, top_p, top_k, repetition_penalty,
1142
+ system_prompt=sys_p)
1143
+
1144
  front_meta = parse_step1_output(step1_raw)
1145
  front_meta_saved = front_meta
1146