Spaces:
Sleeping
Sleeping
Commit
·
34ee4d1
1
Parent(s):
41f50c5
Fix remaining pipeline calls to use transformers_repo
Browse files
app.py
CHANGED
|
@@ -403,21 +403,21 @@ def load_pipeline(model_name: str):
|
|
| 403 |
print(f"✅ BitsAndBytes 8-bit pipeline loaded: {model_name}")
|
| 404 |
return pipe
|
| 405 |
except Exception as exc:
|
| 406 |
-
print(f"⚠️ BitsAndBytes 8-bit load failed for {
|
| 407 |
print(f" → Falling back to FP16/FP32...")
|
| 408 |
|
| 409 |
# Fallback to bfloat16/fp16/fp32 (unquantized)
|
| 410 |
for dtype in (torch.bfloat16, torch.float16, torch.float32):
|
| 411 |
dtype_name = {torch.bfloat16: "bfloat16", torch.float16: "float16", torch.float32: "float32"}[dtype]
|
| 412 |
try:
|
| 413 |
-
print(f"🔄 Loading {
|
| 414 |
model_kwargs = {}
|
| 415 |
if FLASH_ATTN_AVAILABLE:
|
| 416 |
model_kwargs["attn_implementation"] = "flash_attention_2"
|
| 417 |
|
| 418 |
pipe = pipeline(
|
| 419 |
task="text-generation",
|
| 420 |
-
model=
|
| 421 |
tokenizer=tokenizer,
|
| 422 |
trust_remote_code=True,
|
| 423 |
device_map="auto",
|
|
@@ -451,7 +451,7 @@ def load_pipeline(model_name: str):
|
|
| 451 |
|
| 452 |
pipe = pipeline(
|
| 453 |
task="text-generation",
|
| 454 |
-
model=
|
| 455 |
tokenizer=tokenizer,
|
| 456 |
trust_remote_code=True,
|
| 457 |
device_map="auto",
|
|
|
|
| 403 |
print(f"✅ BitsAndBytes 8-bit pipeline loaded: {model_name}")
|
| 404 |
return pipe
|
| 405 |
except Exception as exc:
|
| 406 |
+
print(f"⚠️ BitsAndBytes 8-bit load failed for {transformers_repo}: {exc}")
|
| 407 |
print(f" → Falling back to FP16/FP32...")
|
| 408 |
|
| 409 |
# Fallback to bfloat16/fp16/fp32 (unquantized)
|
| 410 |
for dtype in (torch.bfloat16, torch.float16, torch.float32):
|
| 411 |
dtype_name = {torch.bfloat16: "bfloat16", torch.float16: "float16", torch.float32: "float32"}[dtype]
|
| 412 |
try:
|
| 413 |
+
print(f"🔄 Loading {transformers_repo} with {dtype_name} precision...")
|
| 414 |
model_kwargs = {}
|
| 415 |
if FLASH_ATTN_AVAILABLE:
|
| 416 |
model_kwargs["attn_implementation"] = "flash_attention_2"
|
| 417 |
|
| 418 |
pipe = pipeline(
|
| 419 |
task="text-generation",
|
| 420 |
+
model=transformers_repo,
|
| 421 |
tokenizer=tokenizer,
|
| 422 |
trust_remote_code=True,
|
| 423 |
device_map="auto",
|
|
|
|
| 451 |
|
| 452 |
pipe = pipeline(
|
| 453 |
task="text-generation",
|
| 454 |
+
model=transformers_repo,
|
| 455 |
tokenizer=tokenizer,
|
| 456 |
trust_remote_code=True,
|
| 457 |
device_map="auto",
|