Spaces:

david167
/

question-generation-api

Sleeping

david167 commited on Aug 6, 2025

Commit

0b607e8

1 Parent(s): c86959d

Fix syntax errors: correct comma placement and indentation

Files changed (2) hide show

app.py CHANGED Viewed

@@ -60,7 +60,7 @@ async def load_model_with_retry(model_name: str, hf_token: str, max_retries: int
                 model = AutoModelForCausalLM.from_pretrained(
                     model_name,
                     torch_dtype=torch.float16 if device == "cuda:0" else torch.float32,
-                    device_map={"": 0}  # Force all parameters to GPU 0,
                     trust_remote_code=True,
                     low_cpu_mem_usage=True,
                     token=hf_token
@@ -69,7 +69,7 @@ async def load_model_with_retry(model_name: str, hf_token: str, max_retries: int
                 model = AutoModelForCausalLM.from_pretrained(
                     model_name,
                     torch_dtype=torch.float16 if device == "cuda:0" else torch.float32,
-                    device_map={"": 0}  # Force all parameters to GPU 0,
                     trust_remote_code=True,
                     low_cpu_mem_usage=True,
                     use_safetensors=True,  # Force safetensors to avoid CVE-2025-32434
@@ -96,10 +96,10 @@ async def load_model():
         # Check if CUDA is available
         if torch.cuda.is_available():
-        torch.cuda.set_device(0)
-        device = "cuda:0"
-    else:
-        device = "cpu"
         logger.info(f"Using device: {device}")
         if device == "cuda:0":

                 model = AutoModelForCausalLM.from_pretrained(
                     model_name,
                     torch_dtype=torch.float16 if device == "cuda:0" else torch.float32,
+                    device_map={"": 0},  # Force all parameters to GPU 0
                     trust_remote_code=True,
                     low_cpu_mem_usage=True,
                     token=hf_token
                 model = AutoModelForCausalLM.from_pretrained(
                     model_name,
                     torch_dtype=torch.float16 if device == "cuda:0" else torch.float32,
+                    device_map={"": 0},  # Force all parameters to GPU 0
                     trust_remote_code=True,
                     low_cpu_mem_usage=True,
                     use_safetensors=True,  # Force safetensors to avoid CVE-2025-32434
         # Check if CUDA is available
         if torch.cuda.is_available():
+                torch.cuda.set_device(0)
+                device = "cuda:0"
+        else:
+                device = "cpu"
         logger.info(f"Using device: {device}")
         if device == "cuda:0":

gradio_app.py CHANGED Viewed

@@ -56,7 +56,7 @@ class ModelManager:
             self.model = AutoModelForCausalLM.from_pretrained(
                 base_model_name,
                 torch_dtype=torch.float16 if self.device == "cuda:0" else torch.float32,
-                device_map={"": 0}  # Force all parameters to GPU 0,
                 trust_remote_code=True,
                 low_cpu_mem_usage=True,
                 token=hf_token

             self.model = AutoModelForCausalLM.from_pretrained(
                 base_model_name,
                 torch_dtype=torch.float16 if self.device == "cuda:0" else torch.float32,
+                device_map={"": 0},  # Force all parameters to GPU 0
                 trust_remote_code=True,
                 low_cpu_mem_usage=True,
                 token=hf_token