Spaces:
Sleeping
Sleeping
Sahil Seemant commited on
Commit ·
cb2afbd
1
Parent(s): 6b16d45
Fix VLM architecture incompatibility (Ministral/Qwen)
Browse files- chat_gui.py +17 -4
- requirements.txt +4 -1
chat_gui.py
CHANGED
|
@@ -14,7 +14,13 @@ except (ImportError, ModuleNotFoundError):
|
|
| 14 |
HAS_MLX = False
|
| 15 |
try:
|
| 16 |
import torch
|
| 17 |
-
from transformers import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
from peft import PeftModel
|
| 19 |
except ImportError:
|
| 20 |
st.error("Missing cloud dependencies. Please check requirements.txt")
|
|
@@ -283,14 +289,21 @@ if st.session_state.messages and st.session_state.messages[-1]["role"] == "user"
|
|
| 283 |
else:
|
| 284 |
# Cloud Fallback (Transformers)
|
| 285 |
hf_token = os.getenv("HF_TOKEN")
|
| 286 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
# Use 4-bit quantization if on low-memory cloud
|
| 288 |
-
model =
|
| 289 |
conf["path"],
|
| 290 |
torch_dtype=torch.float16,
|
| 291 |
device_map="auto",
|
| 292 |
load_in_4bit=True,
|
| 293 |
-
token=hf_token
|
|
|
|
| 294 |
)
|
| 295 |
if conf["adapter"]:
|
| 296 |
model = PeftModel.from_pretrained(model, conf["adapter"], token=hf_token)
|
|
|
|
| 14 |
HAS_MLX = False
|
| 15 |
try:
|
| 16 |
import torch
|
| 17 |
+
from transformers import (
|
| 18 |
+
AutoModelForCausalLM,
|
| 19 |
+
AutoModelForImageTextToText,
|
| 20 |
+
AutoTokenizer,
|
| 21 |
+
AutoProcessor,
|
| 22 |
+
TextIteratorStreamer
|
| 23 |
+
)
|
| 24 |
from peft import PeftModel
|
| 25 |
except ImportError:
|
| 26 |
st.error("Missing cloud dependencies. Please check requirements.txt")
|
|
|
|
| 289 |
else:
|
| 290 |
# Cloud Fallback (Transformers)
|
| 291 |
hf_token = os.getenv("HF_TOKEN")
|
| 292 |
+
|
| 293 |
+
# Both Ministral-3 and Qwen-3.5 are Vision-Language Models (VLM)
|
| 294 |
+
model_class = AutoModelForImageTextToText
|
| 295 |
+
processor_class = AutoProcessor
|
| 296 |
+
|
| 297 |
+
st.info(f"Loading {st.session_state.current_model} via Transformers (VLM)...")
|
| 298 |
+
tokenizer = processor_class.from_pretrained(conf["path"], token=hf_token, trust_remote_code=True)
|
| 299 |
# Use 4-bit quantization if on low-memory cloud
|
| 300 |
+
model = model_class.from_pretrained(
|
| 301 |
conf["path"],
|
| 302 |
torch_dtype=torch.float16,
|
| 303 |
device_map="auto",
|
| 304 |
load_in_4bit=True,
|
| 305 |
+
token=hf_token,
|
| 306 |
+
trust_remote_code=True
|
| 307 |
)
|
| 308 |
if conf["adapter"]:
|
| 309 |
model = PeftModel.from_pretrained(model, conf["adapter"], token=hf_token)
|
requirements.txt
CHANGED
|
@@ -11,8 +11,11 @@ altair==5.3.0
|
|
| 11 |
# Inference (MLX for local Mac, Transformers for Cloud/Linux)
|
| 12 |
mlx; platform_system == "Darwin"
|
| 13 |
mlx-vlm; platform_system == "Darwin"
|
| 14 |
-
transformers
|
| 15 |
torch
|
| 16 |
peft
|
| 17 |
accelerate
|
| 18 |
bitsandbytes
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
# Inference (MLX for local Mac, Transformers for Cloud/Linux)
|
| 12 |
mlx; platform_system == "Darwin"
|
| 13 |
mlx-vlm; platform_system == "Darwin"
|
| 14 |
+
transformers==4.48.2
|
| 15 |
torch
|
| 16 |
peft
|
| 17 |
accelerate
|
| 18 |
bitsandbytes
|
| 19 |
+
sentencepiece
|
| 20 |
+
protobuf
|
| 21 |
+
vllm; platform_system == "Linux"
|