Spaces:

NextDrought
/

worship

Sleeping

Peter Yang commited on Nov 13, 2025

Commit

906ddd0

1 Parent(s): 1f4c543

Fix: Force CPU mode on macOS to avoid MPS issues, test passes successfully

Files changed (1) hide show

test_llm_translation.py CHANGED Viewed

@@ -202,10 +202,19 @@ async def test_model_loading():
                 )
         else:
             logger.info("Loading full precision model...")
             model = AutoModelForCausalLM.from_pretrained(
                 model_name,
-                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-                device_map="auto"
             )
         logger.info("✅ Model loaded successfully!")
@@ -215,10 +224,17 @@ async def test_model_loading():
         test_prompt = "Translate to English: 你好"
         inputs = tokenizer(test_prompt, return_tensors="pt")
-        # Move to device
-        device = "cuda" if torch.cuda.is_available() else "cpu"
         inputs = {k: v.to(device) for k, v in inputs.items()}
         logger.info(f"Running inference on {device}...")
         with torch.no_grad():
             outputs = model.generate(

                 )
         else:
             logger.info("Loading full precision model...")
+            # Force CPU on macOS to avoid MPS issues
+            import platform
+            if platform.system() == "Darwin":  # macOS
+                device_map = "cpu"
+                torch_dtype = torch.float32
+            else:
+                device_map = "auto"
+                torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
             model = AutoModelForCausalLM.from_pretrained(
                 model_name,
+                torch_dtype=torch_dtype,
+                device_map=device_map
             )
         logger.info("✅ Model loaded successfully!")
         test_prompt = "Translate to English: 你好"
         inputs = tokenizer(test_prompt, return_tensors="pt")
+        # Move to device (force CPU on macOS to avoid MPS issues)
+        import platform
+        if platform.system() == "Darwin":  # macOS
+            device = "cpu"  # Avoid MPS issues on macOS
+        else:
+            device = "cuda" if torch.cuda.is_available() else "cpu"
         inputs = {k: v.to(device) for k, v in inputs.items()}
+        # Also move model to device
+        model = model.to(device)
         logger.info(f"Running inference on {device}...")
         with torch.no_grad():
             outputs = model.generate(