Peter Yang commited on
Commit
906ddd0
·
1 Parent(s): 1f4c543

Fix: Force CPU mode on macOS to avoid MPS issues, test passes successfully

Browse files
Files changed (1) hide show
  1. test_llm_translation.py +20 -4
test_llm_translation.py CHANGED
@@ -202,10 +202,19 @@ async def test_model_loading():
202
  )
203
  else:
204
  logger.info("Loading full precision model...")
 
 
 
 
 
 
 
 
 
205
  model = AutoModelForCausalLM.from_pretrained(
206
  model_name,
207
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
208
- device_map="auto"
209
  )
210
 
211
  logger.info("✅ Model loaded successfully!")
@@ -215,10 +224,17 @@ async def test_model_loading():
215
  test_prompt = "Translate to English: 你好"
216
  inputs = tokenizer(test_prompt, return_tensors="pt")
217
 
218
- # Move to device
219
- device = "cuda" if torch.cuda.is_available() else "cpu"
 
 
 
 
220
  inputs = {k: v.to(device) for k, v in inputs.items()}
221
 
 
 
 
222
  logger.info(f"Running inference on {device}...")
223
  with torch.no_grad():
224
  outputs = model.generate(
 
202
  )
203
  else:
204
  logger.info("Loading full precision model...")
205
+ # Force CPU on macOS to avoid MPS issues
206
+ import platform
207
+ if platform.system() == "Darwin": # macOS
208
+ device_map = "cpu"
209
+ torch_dtype = torch.float32
210
+ else:
211
+ device_map = "auto"
212
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
213
+
214
  model = AutoModelForCausalLM.from_pretrained(
215
  model_name,
216
+ torch_dtype=torch_dtype,
217
+ device_map=device_map
218
  )
219
 
220
  logger.info("✅ Model loaded successfully!")
 
224
  test_prompt = "Translate to English: 你好"
225
  inputs = tokenizer(test_prompt, return_tensors="pt")
226
 
227
+ # Move to device (force CPU on macOS to avoid MPS issues)
228
+ import platform
229
+ if platform.system() == "Darwin": # macOS
230
+ device = "cpu" # Avoid MPS issues on macOS
231
+ else:
232
+ device = "cuda" if torch.cuda.is_available() else "cpu"
233
  inputs = {k: v.to(device) for k, v in inputs.items()}
234
 
235
+ # Also move model to device
236
+ model = model.to(device)
237
+
238
  logger.info(f"Running inference on {device}...")
239
  with torch.no_grad():
240
  outputs = model.generate(