Spaces:
Running on Zero
Running on Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -296,40 +296,59 @@ if QWEN3_AVAILABLE:
|
|
| 296 |
else:
|
| 297 |
print(" β οΈ Qwen3VL not in transformers version")
|
| 298 |
|
| 299 |
-
# ββ Model 3: CSM-DocExtract-VL-Q4KM (NEW,
|
| 300 |
-
print("\n3οΈβ£ CSM-DocExtract-VL-Q4KM (8B Q4KM
|
| 301 |
MODEL_ID_Q4KM = "Chhagan005/CSM-DocExtract-VL-Q4KM"
|
| 302 |
CSM_Q4KM_AVAILABLE = False
|
| 303 |
processor_q4km = model_q4km = None
|
| 304 |
|
| 305 |
try:
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 310 |
MODEL_ID_Q4KM,
|
| 311 |
-
|
| 312 |
-
torch_dtype=torch.float16,
|
| 313 |
device_map="auto",
|
| 314 |
trust_remote_code=True,
|
| 315 |
).eval()
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 320 |
MODEL_ID_Q4KM,
|
| 321 |
-
|
| 322 |
-
torch_dtype=torch.float16,
|
| 323 |
device_map="auto",
|
| 324 |
trust_remote_code=True,
|
| 325 |
).eval()
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 330 |
except Exception as e:
|
| 331 |
print(f" β Failed: {e}")
|
| 332 |
|
|
|
|
| 333 |
# ββ Model 4: CSM-DocExtract-VL 4BNB (NEW, replaces Nanonets) ββ
|
| 334 |
print("\n4οΈβ£ CSM-DocExtract-VL 4BNB (BitsAndBytes 4-bit)...")
|
| 335 |
MODEL_ID_4BNB = "Chhagan005/CSM-DocExtract-VL"
|
|
|
|
| 296 |
else:
|
| 297 |
print(" β οΈ Qwen3VL not in transformers version")
|
| 298 |
|
| 299 |
+
# ββ Model 3: CSM-DocExtract-VL-Q4KM (NEW β Qwen3VL, pre-quantized Q4KM) ββ
|
| 300 |
+
print("\n3οΈβ£ CSM-DocExtract-VL-Q4KM (8B Q4KM β Qwen3VL architecture)...")
|
| 301 |
MODEL_ID_Q4KM = "Chhagan005/CSM-DocExtract-VL-Q4KM"
|
| 302 |
CSM_Q4KM_AVAILABLE = False
|
| 303 |
processor_q4km = model_q4km = None
|
| 304 |
|
| 305 |
try:
|
| 306 |
+
# Processor load
|
| 307 |
+
processor_q4km = AutoProcessor.from_pretrained(
|
| 308 |
+
MODEL_ID_Q4KM, trust_remote_code=True
|
| 309 |
+
)
|
| 310 |
+
|
| 311 |
+
# Model is qwen3_vl type + ALREADY pre-quantized Q4KM
|
| 312 |
+
# So: use Qwen3VL class + NO extra quantization_config
|
| 313 |
+
if QWEN3_AVAILABLE:
|
| 314 |
+
model_q4km = Qwen3VLForConditionalGeneration.from_pretrained(
|
| 315 |
MODEL_ID_Q4KM,
|
| 316 |
+
torch_dtype="auto", # model already has Q4KM weights
|
|
|
|
| 317 |
device_map="auto",
|
| 318 |
trust_remote_code=True,
|
| 319 |
).eval()
|
| 320 |
+
print(" β
Loaded! (Qwen3VL Q4KM pre-quantized)")
|
| 321 |
+
CSM_Q4KM_AVAILABLE = True
|
| 322 |
+
else:
|
| 323 |
+
# Qwen3VL not in transformers β use AutoModel fallback
|
| 324 |
+
from transformers import AutoModelForCausalLM
|
| 325 |
+
try:
|
| 326 |
+
from transformers import AutoModelForVisualQuestionAnswering
|
| 327 |
+
model_q4km = AutoModelForVisualQuestionAnswering.from_pretrained(
|
| 328 |
MODEL_ID_Q4KM,
|
| 329 |
+
torch_dtype="auto",
|
|
|
|
| 330 |
device_map="auto",
|
| 331 |
trust_remote_code=True,
|
| 332 |
).eval()
|
| 333 |
+
except:
|
| 334 |
+
# Last fallback: force load with Qwen2_5 but ignore arch warning
|
| 335 |
+
import warnings
|
| 336 |
+
with warnings.catch_warnings():
|
| 337 |
+
warnings.simplefilter("ignore")
|
| 338 |
+
model_q4km = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 339 |
+
MODEL_ID_Q4KM,
|
| 340 |
+
torch_dtype="auto",
|
| 341 |
+
device_map="auto",
|
| 342 |
+
trust_remote_code=True,
|
| 343 |
+
ignore_mismatched_sizes=True,
|
| 344 |
+
).eval()
|
| 345 |
+
print(" β
Loaded! (fallback loader)")
|
| 346 |
+
CSM_Q4KM_AVAILABLE = True
|
| 347 |
+
|
| 348 |
except Exception as e:
|
| 349 |
print(f" β Failed: {e}")
|
| 350 |
|
| 351 |
+
|
| 352 |
# ββ Model 4: CSM-DocExtract-VL 4BNB (NEW, replaces Nanonets) ββ
|
| 353 |
print("\n4οΈβ£ CSM-DocExtract-VL 4BNB (BitsAndBytes 4-bit)...")
|
| 354 |
MODEL_ID_4BNB = "Chhagan005/CSM-DocExtract-VL"
|