Chhagan005 commited on
Commit
2fb82d7
Β·
verified Β·
1 Parent(s): 5b3484c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -109
app.py CHANGED
@@ -28,26 +28,33 @@ os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
28
  os.environ["HF_HOME"] = "/tmp/hf_home"
29
 
30
  from transformers import (
31
- Qwen2_5_VLForConditionalGeneration,
32
- Qwen2VLForConditionalGeneration,
33
  AutoProcessor,
 
34
  TextIteratorStreamer,
35
  BitsAndBytesConfig,
36
  )
37
 
38
- try:
39
- from peft import PeftModel, PeftConfig
40
- PEFT_AVAILABLE = True
41
- except:
42
- PEFT_AVAILABLE = False
43
- print("⚠️ PEFT not available")
44
-
45
  try:
46
  from transformers import Qwen3VLForConditionalGeneration
47
  QWEN3_AVAILABLE = True
48
- except:
 
49
  QWEN3_AVAILABLE = False
50
- print("⚠️ Qwen3VL not available in current transformers version")
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
  from gradio.themes import Soft
53
  from gradio.themes.utils import colors, fonts, sizes
@@ -223,6 +230,42 @@ RULES:
223
 
224
  ---"""
225
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
 
227
  # ╔══════════════════════════════════════════╗
228
  # β•‘ MODEL LOADING β•‘
@@ -240,7 +283,7 @@ bnb_4bit_config = BitsAndBytesConfig(
240
  bnb_4bit_use_double_quant=True,
241
  )
242
 
243
- # ── Model 1: Chhagan_ML-VL-OCR-v1 (LoRA, keep) ──
244
  print("\n1️⃣ Chhagan_ML-VL-OCR-v1 (LoRA Refined)...")
245
  MODEL_ID_C1 = "Chhagan005/Chhagan_ML-VL-OCR-v1"
246
  CHHAGAN_V1_AVAILABLE = False
@@ -248,15 +291,11 @@ processor_c1 = model_c1 = None
248
 
249
  if PEFT_AVAILABLE:
250
  try:
251
- try:
252
- config = PeftConfig.from_pretrained(MODEL_ID_C1)
253
- base_id = config.base_model_name_or_path
254
- except:
255
- base_id = "Qwen/Qwen2.5-VL-2B-Instruct"
256
  processor_c1 = AutoProcessor.from_pretrained(base_id, trust_remote_code=True)
257
- base_c1 = Qwen2VLForConditionalGeneration.from_pretrained(
258
- base_id, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True)
259
- model_c1 = PeftModel.from_pretrained(base_c1, MODEL_ID_C1).to(device).eval()
260
  print(" βœ… Loaded!")
261
  CHHAGAN_V1_AVAILABLE = True
262
  except Exception as e:
@@ -264,92 +303,41 @@ if PEFT_AVAILABLE:
264
  else:
265
  print(" ⚠️ PEFT not available")
266
 
267
- # ── Model 2: Chhagan-DocVL-Qwen3 (Qwen3 fine-tuned, keep) ──
268
  print("\n2️⃣ Chhagan-DocVL-Qwen3 (Qwen3-VL Refined)...")
269
  MODEL_ID_C2 = "Chhagan005/Chhagan-DocVL-Qwen3"
270
  CHHAGAN_QWEN3_AVAILABLE = False
271
  processor_c2 = model_c2 = None
272
 
273
- if QWEN3_AVAILABLE:
274
  try:
275
- try:
276
- if PEFT_AVAILABLE:
277
- config = PeftConfig.from_pretrained(MODEL_ID_C2)
278
- base_id = config.base_model_name_or_path
279
- processor_c2 = AutoProcessor.from_pretrained(base_id, trust_remote_code=True)
280
- base_c2 = Qwen3VLForConditionalGeneration.from_pretrained(
281
- base_id, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True)
282
- model_c2 = PeftModel.from_pretrained(base_c2, MODEL_ID_C2).to(device).eval()
283
- else:
284
- raise Exception("No PEFT")
285
- except:
286
- print(" Loading as full fine-tuned...")
287
- processor_c2 = AutoProcessor.from_pretrained(MODEL_ID_C2, trust_remote_code=True)
288
- model_c2 = Qwen3VLForConditionalGeneration.from_pretrained(
289
- MODEL_ID_C2, attn_implementation="flash_attention_2",
290
- torch_dtype=torch.float16, device_map="auto", trust_remote_code=True
291
- ).to(device).eval()
292
  print(" βœ… Loaded!")
293
  CHHAGAN_QWEN3_AVAILABLE = True
294
  except Exception as e:
295
  print(f" ❌ Failed: {e}")
296
  else:
297
- print(" ⚠️ Qwen3VL not in transformers version")
298
 
299
- # ── Model 3: CSM-DocExtract-VL-Q4KM (NEW β€” Qwen3VL, pre-quantized Q4KM) ──
300
- print("\n3️⃣ CSM-DocExtract-VL-Q4KM (8B Q4KM β€” Qwen3VL architecture)...")
301
  MODEL_ID_Q4KM = "Chhagan005/CSM-DocExtract-VL-Q4KM"
302
  CSM_Q4KM_AVAILABLE = False
303
  processor_q4km = model_q4km = None
304
 
305
  try:
306
- # Processor load
307
- processor_q4km = AutoProcessor.from_pretrained(
308
- MODEL_ID_Q4KM, trust_remote_code=True
309
- )
310
-
311
- # Model is qwen3_vl type + ALREADY pre-quantized Q4KM
312
- # So: use Qwen3VL class + NO extra quantization_config
313
- if QWEN3_AVAILABLE:
314
- model_q4km = Qwen3VLForConditionalGeneration.from_pretrained(
315
- MODEL_ID_Q4KM,
316
- torch_dtype="auto", # model already has Q4KM weights
317
- device_map="auto",
318
- trust_remote_code=True,
319
- ).eval()
320
- print(" βœ… Loaded! (Qwen3VL Q4KM pre-quantized)")
321
- CSM_Q4KM_AVAILABLE = True
322
- else:
323
- # Qwen3VL not in transformers β†’ use AutoModel fallback
324
- from transformers import AutoModelForCausalLM
325
- try:
326
- from transformers import AutoModelForVisualQuestionAnswering
327
- model_q4km = AutoModelForVisualQuestionAnswering.from_pretrained(
328
- MODEL_ID_Q4KM,
329
- torch_dtype="auto",
330
- device_map="auto",
331
- trust_remote_code=True,
332
- ).eval()
333
- except:
334
- # Last fallback: force load with Qwen2_5 but ignore arch warning
335
- import warnings
336
- with warnings.catch_warnings():
337
- warnings.simplefilter("ignore")
338
- model_q4km = Qwen2_5_VLForConditionalGeneration.from_pretrained(
339
- MODEL_ID_Q4KM,
340
- torch_dtype="auto",
341
- device_map="auto",
342
- trust_remote_code=True,
343
- ignore_mismatched_sizes=True,
344
- ).eval()
345
- print(" βœ… Loaded! (fallback loader)")
346
- CSM_Q4KM_AVAILABLE = True
347
-
348
  except Exception as e:
349
  print(f" ❌ Failed: {e}")
350
 
351
-
352
- # ── Model 4: CSM-DocExtract-VL 4BNB (NEW, replaces Nanonets) ──
353
  print("\n4️⃣ CSM-DocExtract-VL 4BNB (BitsAndBytes 4-bit)...")
354
  MODEL_ID_4BNB = "Chhagan005/CSM-DocExtract-VL"
355
  CSM_4BNB_AVAILABLE = False
@@ -357,25 +345,10 @@ processor_4bnb = model_4bnb = None
357
 
358
  try:
359
  processor_4bnb = AutoProcessor.from_pretrained(MODEL_ID_4BNB, trust_remote_code=True)
360
- try:
361
- model_4bnb = Qwen2_5_VLForConditionalGeneration.from_pretrained(
362
- MODEL_ID_4BNB,
363
- quantization_config=bnb_4bit_config,
364
- torch_dtype=torch.float16,
365
- device_map="auto",
366
- trust_remote_code=True,
367
- ).eval()
368
- except:
369
- if QWEN3_AVAILABLE:
370
- model_4bnb = Qwen3VLForConditionalGeneration.from_pretrained(
371
- MODEL_ID_4BNB,
372
- quantization_config=bnb_4bit_config,
373
- torch_dtype=torch.float16,
374
- device_map="auto",
375
- trust_remote_code=True,
376
- ).eval()
377
- else:
378
- raise Exception("Architecture detection failed")
379
  print(" βœ… Loaded! (~6-7GB VRAM)")
380
  CSM_4BNB_AVAILABLE = True
381
  except Exception as e:
@@ -385,10 +358,10 @@ print("\n" + "="*70)
385
  print("πŸ“Š MODEL STATUS")
386
  print("="*70)
387
  status = [
388
- ("Chhagan_ML-VL-OCR-v1", CHHAGAN_V1_AVAILABLE, "LoRA Fine-tuned"),
389
- ("Chhagan-DocVL-Qwen3", CHHAGAN_QWEN3_AVAILABLE, "Qwen3-VL Fine-tuned"),
390
- ("CSM-DocExtract-VL-Q4KM", CSM_Q4KM_AVAILABLE, "8B Q4KM ~6-7GB"),
391
- ("CSM-DocExtract-VL 4BNB", CSM_4BNB_AVAILABLE, "BitsAndBytes 4-bit ~6-7GB"),
392
  ]
393
  for name, ok, note in status:
394
  print(f" {'βœ…' if ok else '❌'} {name:<35} {note}")
@@ -397,6 +370,7 @@ loaded = sum(x[1] for x in status)
397
  print(f" Total loaded: {loaded}/4\n")
398
 
399
 
 
400
  # ╔══════════════════════════════════════════╗
401
  # β•‘ PYTHON PIPELINE FUNCTIONS β•‘
402
  # β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•
 
28
  os.environ["HF_HOME"] = "/tmp/hf_home"
29
 
30
  from transformers import (
 
 
31
  AutoProcessor,
32
+ AutoModelForImageTextToText, # Universal VLM loader β€” Qwen2VL + Qwen3VL dono
33
  TextIteratorStreamer,
34
  BitsAndBytesConfig,
35
  )
36
 
37
+ # Specific class imports β€” graceful fallback
 
 
 
 
 
 
38
  try:
39
  from transformers import Qwen3VLForConditionalGeneration
40
  QWEN3_AVAILABLE = True
41
+ print("βœ… Qwen3VLForConditionalGeneration available")
42
+ except ImportError:
43
  QWEN3_AVAILABLE = False
44
+ print("⚠️ Qwen3VL direct import not available β€” using AutoModel fallback")
45
+
46
+ try:
47
+ from transformers import Qwen2VLForConditionalGeneration
48
+ QWEN2_AVAILABLE = True
49
+ except ImportError:
50
+ QWEN2_AVAILABLE = False
51
+
52
+ try:
53
+ from transformers import Qwen2_5_VLForConditionalGeneration
54
+ QWEN25_AVAILABLE = True
55
+ except ImportError:
56
+ QWEN25_AVAILABLE = False
57
+
58
 
59
  from gradio.themes import Soft
60
  from gradio.themes.utils import colors, fonts, sizes
 
230
 
231
  ---"""
232
 
233
+ def load_vl_model(model_id: str, quantization_config=None, pre_quantized: bool = False):
234
+ """
235
+ Universal VLM loader β€” Qwen2VL / Qwen3VL / any VLM
236
+ pre_quantized=True β†’ model already has weights quantized, no extra config needed
237
+ pre_quantized=False β†’ apply quantization_config during load
238
+ """
239
+ load_kwargs = {
240
+ "torch_dtype": "auto",
241
+ "device_map": "auto",
242
+ "trust_remote_code": True,
243
+ }
244
+ if quantization_config is not None and not pre_quantized:
245
+ load_kwargs["quantization_config"] = quantization_config
246
+
247
+ # Try 1: Qwen3VL (newest)
248
+ if QWEN3_AVAILABLE:
249
+ try:
250
+ return Qwen3VLForConditionalGeneration.from_pretrained(
251
+ model_id, **load_kwargs).eval()
252
+ except Exception as e:
253
+ print(f" Qwen3VL failed: {e}, trying AutoModel...")
254
+
255
+ # Try 2: AutoModelForImageTextToText (universal fallback)
256
+ try:
257
+ return AutoModelForImageTextToText.from_pretrained(
258
+ model_id, **load_kwargs).eval()
259
+ except Exception as e:
260
+ print(f" AutoModel failed: {e}, trying Qwen2VL...")
261
+
262
+ # Try 3: Qwen2VL last resort
263
+ if QWEN2_AVAILABLE:
264
+ return Qwen2VLForConditionalGeneration.from_pretrained(
265
+ model_id, **load_kwargs).eval()
266
+
267
+ raise RuntimeError(f"No compatible loader found for {model_id}")
268
+
269
 
270
  # ╔══════════════════════════════════════════╗
271
  # β•‘ MODEL LOADING β•‘
 
283
  bnb_4bit_use_double_quant=True,
284
  )
285
 
286
+ # ── Model 1: Chhagan_ML-VL-OCR-v1 (LoRA on Qwen2VL base) ──
287
  print("\n1️⃣ Chhagan_ML-VL-OCR-v1 (LoRA Refined)...")
288
  MODEL_ID_C1 = "Chhagan005/Chhagan_ML-VL-OCR-v1"
289
  CHHAGAN_V1_AVAILABLE = False
 
291
 
292
  if PEFT_AVAILABLE:
293
  try:
294
+ config = PeftConfig.from_pretrained(MODEL_ID_C1)
295
+ base_id = config.base_model_name_or_path
 
 
 
296
  processor_c1 = AutoProcessor.from_pretrained(base_id, trust_remote_code=True)
297
+ base_c1 = load_vl_model(base_id)
298
+ model_c1 = PeftModel.from_pretrained(base_c1, MODEL_ID_C1).to(device).eval()
 
299
  print(" βœ… Loaded!")
300
  CHHAGAN_V1_AVAILABLE = True
301
  except Exception as e:
 
303
  else:
304
  print(" ⚠️ PEFT not available")
305
 
306
+ # ── Model 2: Chhagan-DocVL-Qwen3 (LoRA on Qwen3VL base) ──
307
  print("\n2️⃣ Chhagan-DocVL-Qwen3 (Qwen3-VL Refined)...")
308
  MODEL_ID_C2 = "Chhagan005/Chhagan-DocVL-Qwen3"
309
  CHHAGAN_QWEN3_AVAILABLE = False
310
  processor_c2 = model_c2 = None
311
 
312
+ if PEFT_AVAILABLE:
313
  try:
314
+ config = PeftConfig.from_pretrained(MODEL_ID_C2)
315
+ base_id = config.base_model_name_or_path
316
+ processor_c2 = AutoProcessor.from_pretrained(base_id, trust_remote_code=True)
317
+ base_c2 = load_vl_model(base_id)
318
+ model_c2 = PeftModel.from_pretrained(base_c2, MODEL_ID_C2).to(device).eval()
 
 
 
 
 
 
 
 
 
 
 
 
319
  print(" βœ… Loaded!")
320
  CHHAGAN_QWEN3_AVAILABLE = True
321
  except Exception as e:
322
  print(f" ❌ Failed: {e}")
323
  else:
324
+ print(" ⚠️ PEFT not available")
325
 
326
+ # ── Model 3: CSM-DocExtract-VL-Q4KM (Qwen3VL, PRE-QUANTIZED Q4KM) ──
327
+ print("\n3️⃣ CSM-DocExtract-VL-Q4KM (8B Q4KM β€” pre-quantized)...")
328
  MODEL_ID_Q4KM = "Chhagan005/CSM-DocExtract-VL-Q4KM"
329
  CSM_Q4KM_AVAILABLE = False
330
  processor_q4km = model_q4km = None
331
 
332
  try:
333
+ processor_q4km = AutoProcessor.from_pretrained(MODEL_ID_Q4KM, trust_remote_code=True)
334
+ model_q4km = load_vl_model(MODEL_ID_Q4KM, pre_quantized=True)
335
+ print(" βœ… Loaded! (pre-quantized Q4KM ~6-7GB)")
336
+ CSM_Q4KM_AVAILABLE = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
337
  except Exception as e:
338
  print(f" ❌ Failed: {e}")
339
 
340
+ # ── Model 4: CSM-DocExtract-VL 4BNB (Qwen3VL, BitsAndBytes 4-bit) ──
 
341
  print("\n4️⃣ CSM-DocExtract-VL 4BNB (BitsAndBytes 4-bit)...")
342
  MODEL_ID_4BNB = "Chhagan005/CSM-DocExtract-VL"
343
  CSM_4BNB_AVAILABLE = False
 
345
 
346
  try:
347
  processor_4bnb = AutoProcessor.from_pretrained(MODEL_ID_4BNB, trust_remote_code=True)
348
+ model_4bnb = load_vl_model(
349
+ MODEL_ID_4BNB,
350
+ quantization_config=bnb_4bit_config,
351
+ pre_quantized=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
352
  print(" βœ… Loaded! (~6-7GB VRAM)")
353
  CSM_4BNB_AVAILABLE = True
354
  except Exception as e:
 
358
  print("πŸ“Š MODEL STATUS")
359
  print("="*70)
360
  status = [
361
+ ("Chhagan_ML-VL-OCR-v1", CHHAGAN_V1_AVAILABLE, "LoRA Fine-tuned"),
362
+ ("Chhagan-DocVL-Qwen3", CHHAGAN_QWEN3_AVAILABLE, "Qwen3-VL Fine-tuned"),
363
+ ("CSM-DocExtract-Q4KM", CSM_Q4KM_AVAILABLE, "Qwen3VL Q4KM pre-quantized"),
364
+ ("CSM-DocExtract-4BNB", CSM_4BNB_AVAILABLE, "Qwen3VL BitsAndBytes 4-bit"),
365
  ]
366
  for name, ok, note in status:
367
  print(f" {'βœ…' if ok else '❌'} {name:<35} {note}")
 
370
  print(f" Total loaded: {loaded}/4\n")
371
 
372
 
373
+
374
  # ╔══════════════════════════════════════════╗
375
  # β•‘ PYTHON PIPELINE FUNCTIONS β•‘
376
  # β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•