Spaces:
Sleeping
Sleeping
Peter Yang commited on
Commit ·
906ddd0
1
Parent(s): 1f4c543
Fix: Force CPU mode on macOS to avoid MPS issues, test passes successfully
Browse files- test_llm_translation.py +20 -4
test_llm_translation.py
CHANGED
|
@@ -202,10 +202,19 @@ async def test_model_loading():
|
|
| 202 |
)
|
| 203 |
else:
|
| 204 |
logger.info("Loading full precision model...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
model = AutoModelForCausalLM.from_pretrained(
|
| 206 |
model_name,
|
| 207 |
-
torch_dtype=
|
| 208 |
-
device_map=
|
| 209 |
)
|
| 210 |
|
| 211 |
logger.info("✅ Model loaded successfully!")
|
|
@@ -215,10 +224,17 @@ async def test_model_loading():
|
|
| 215 |
test_prompt = "Translate to English: 你好"
|
| 216 |
inputs = tokenizer(test_prompt, return_tensors="pt")
|
| 217 |
|
| 218 |
-
# Move to device
|
| 219 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 220 |
inputs = {k: v.to(device) for k, v in inputs.items()}
|
| 221 |
|
|
|
|
|
|
|
|
|
|
| 222 |
logger.info(f"Running inference on {device}...")
|
| 223 |
with torch.no_grad():
|
| 224 |
outputs = model.generate(
|
|
|
|
| 202 |
)
|
| 203 |
else:
|
| 204 |
logger.info("Loading full precision model...")
|
| 205 |
+
# Force CPU on macOS to avoid MPS issues
|
| 206 |
+
import platform
|
| 207 |
+
if platform.system() == "Darwin": # macOS
|
| 208 |
+
device_map = "cpu"
|
| 209 |
+
torch_dtype = torch.float32
|
| 210 |
+
else:
|
| 211 |
+
device_map = "auto"
|
| 212 |
+
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
| 213 |
+
|
| 214 |
model = AutoModelForCausalLM.from_pretrained(
|
| 215 |
model_name,
|
| 216 |
+
torch_dtype=torch_dtype,
|
| 217 |
+
device_map=device_map
|
| 218 |
)
|
| 219 |
|
| 220 |
logger.info("✅ Model loaded successfully!")
|
|
|
|
| 224 |
test_prompt = "Translate to English: 你好"
|
| 225 |
inputs = tokenizer(test_prompt, return_tensors="pt")
|
| 226 |
|
| 227 |
+
# Move to device (force CPU on macOS to avoid MPS issues)
|
| 228 |
+
import platform
|
| 229 |
+
if platform.system() == "Darwin": # macOS
|
| 230 |
+
device = "cpu" # Avoid MPS issues on macOS
|
| 231 |
+
else:
|
| 232 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 233 |
inputs = {k: v.to(device) for k, v in inputs.items()}
|
| 234 |
|
| 235 |
+
# Also move model to device
|
| 236 |
+
model = model.to(device)
|
| 237 |
+
|
| 238 |
logger.info(f"Running inference on {device}...")
|
| 239 |
with torch.no_grad():
|
| 240 |
outputs = model.generate(
|