Chhagan005 commited on
Commit
5b3484c
Β·
verified Β·
1 Parent(s): f43b38d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -18
app.py CHANGED
@@ -296,40 +296,59 @@ if QWEN3_AVAILABLE:
296
  else:
297
  print(" ⚠️ Qwen3VL not in transformers version")
298
 
299
- # ── Model 3: CSM-DocExtract-VL-Q4KM (NEW, replaces Qwen3-2B) ──
300
- print("\n3️⃣ CSM-DocExtract-VL-Q4KM (8B Q4KM Quantized)...")
301
  MODEL_ID_Q4KM = "Chhagan005/CSM-DocExtract-VL-Q4KM"
302
  CSM_Q4KM_AVAILABLE = False
303
  processor_q4km = model_q4km = None
304
 
305
  try:
306
- processor_q4km = AutoProcessor.from_pretrained(MODEL_ID_Q4KM, trust_remote_code=True)
307
- # Try loading as full quantized model first
308
- try:
309
- model_q4km = Qwen2_5_VLForConditionalGeneration.from_pretrained(
 
 
 
 
 
310
  MODEL_ID_Q4KM,
311
- quantization_config=bnb_4bit_config,
312
- torch_dtype=torch.float16,
313
  device_map="auto",
314
  trust_remote_code=True,
315
  ).eval()
316
- except:
317
- # Fallback: try Qwen3VL architecture
318
- if QWEN3_AVAILABLE:
319
- model_q4km = Qwen3VLForConditionalGeneration.from_pretrained(
 
 
 
 
320
  MODEL_ID_Q4KM,
321
- quantization_config=bnb_4bit_config,
322
- torch_dtype=torch.float16,
323
  device_map="auto",
324
  trust_remote_code=True,
325
  ).eval()
326
- else:
327
- raise Exception("Neither Qwen2.5VL nor Qwen3VL architecture worked")
328
- print(" βœ… Loaded! (~6-7GB VRAM)")
329
- CSM_Q4KM_AVAILABLE = True
 
 
 
 
 
 
 
 
 
 
 
330
  except Exception as e:
331
  print(f" ❌ Failed: {e}")
332
 
 
333
  # ── Model 4: CSM-DocExtract-VL 4BNB (NEW, replaces Nanonets) ──
334
  print("\n4️⃣ CSM-DocExtract-VL 4BNB (BitsAndBytes 4-bit)...")
335
  MODEL_ID_4BNB = "Chhagan005/CSM-DocExtract-VL"
 
296
  else:
297
  print(" ⚠️ Qwen3VL not in transformers version")
298
 
299
+ # ── Model 3: CSM-DocExtract-VL-Q4KM (NEW β€” Qwen3VL, pre-quantized Q4KM) ──
300
+ print("\n3️⃣ CSM-DocExtract-VL-Q4KM (8B Q4KM β€” Qwen3VL architecture)...")
301
  MODEL_ID_Q4KM = "Chhagan005/CSM-DocExtract-VL-Q4KM"
302
  CSM_Q4KM_AVAILABLE = False
303
  processor_q4km = model_q4km = None
304
 
305
  try:
306
+ # Processor load
307
+ processor_q4km = AutoProcessor.from_pretrained(
308
+ MODEL_ID_Q4KM, trust_remote_code=True
309
+ )
310
+
311
+ # Model is qwen3_vl type + ALREADY pre-quantized Q4KM
312
+ # So: use Qwen3VL class + NO extra quantization_config
313
+ if QWEN3_AVAILABLE:
314
+ model_q4km = Qwen3VLForConditionalGeneration.from_pretrained(
315
  MODEL_ID_Q4KM,
316
+ torch_dtype="auto", # model already has Q4KM weights
 
317
  device_map="auto",
318
  trust_remote_code=True,
319
  ).eval()
320
+ print(" βœ… Loaded! (Qwen3VL Q4KM pre-quantized)")
321
+ CSM_Q4KM_AVAILABLE = True
322
+ else:
323
+ # Qwen3VL not in transformers β†’ use AutoModel fallback
324
+ from transformers import AutoModelForCausalLM
325
+ try:
326
+ from transformers import AutoModelForVisualQuestionAnswering
327
+ model_q4km = AutoModelForVisualQuestionAnswering.from_pretrained(
328
  MODEL_ID_Q4KM,
329
+ torch_dtype="auto",
 
330
  device_map="auto",
331
  trust_remote_code=True,
332
  ).eval()
333
+ except:
334
+ # Last fallback: force load with Qwen2_5 but ignore arch warning
335
+ import warnings
336
+ with warnings.catch_warnings():
337
+ warnings.simplefilter("ignore")
338
+ model_q4km = Qwen2_5_VLForConditionalGeneration.from_pretrained(
339
+ MODEL_ID_Q4KM,
340
+ torch_dtype="auto",
341
+ device_map="auto",
342
+ trust_remote_code=True,
343
+ ignore_mismatched_sizes=True,
344
+ ).eval()
345
+ print(" βœ… Loaded! (fallback loader)")
346
+ CSM_Q4KM_AVAILABLE = True
347
+
348
  except Exception as e:
349
  print(f" ❌ Failed: {e}")
350
 
351
+
352
  # ── Model 4: CSM-DocExtract-VL 4BNB (NEW, replaces Nanonets) ──
353
  print("\n4️⃣ CSM-DocExtract-VL 4BNB (BitsAndBytes 4-bit)...")
354
  MODEL_ID_4BNB = "Chhagan005/CSM-DocExtract-VL"