prelington
/

ChatGPT-Tune

Text Classification

Model card Files Files and versions

prelington commited on Oct 2, 2025

Commit

e71c280

·

verified ·

1 Parent(s): 80273ad

Update model_loader.py

Files changed (1) hide show

model_loader.py +31 -8

model_loader.py CHANGED Viewed

@@ -6,25 +6,48 @@ from config import DEVICE, MODEL_LIST
 def load_model(model_name):
     """
-    Load a model by name. Supports both Hugging Face repos and local safetensors.
     """
     try:
         if model_name.endswith(".safetensors"):
             print(f"[INFO] Loading safetensor model: {model_name}")
-            tokenizer = AutoTokenizer.from_pretrained("gpt2")  # Use compatible tokenizer
-            # Load safetensor weights into GPT2 model
             model = AutoModelForCausalLM.from_pretrained(
                 "gpt2",
                 state_dict=load_file(model_name),
-                device_map="auto",
                 torch_dtype=torch.float16
             )
         else:
             print(f"[INFO] Loading Hugging Face model: {model_name}")
             tokenizer = AutoTokenizer.from_pretrained(model_name)
-            model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
-    except Exception as e:
-        raise RuntimeError(f"Failed to load model {model_name}: {e}")
     model.to(DEVICE)
     return tokenizer, model

 def load_model(model_name):
     """
+    Load a model efficiently with memory optimization.
+    Supports:
+    - Hugging Face repos
+    - Local safetensor weights
+    Optimizations:
+    - FP16/BF16
+    - CPU offloading if GPU memory is low
     """
     try:
         if model_name.endswith(".safetensors"):
             print(f"[INFO] Loading safetensor model: {model_name}")
+            tokenizer = AutoTokenizer.from_pretrained("gpt2")
             model = AutoModelForCausalLM.from_pretrained(
                 "gpt2",
                 state_dict=load_file(model_name),
+                device_map="auto",           # Automatically places layers on GPU/CPU
                 torch_dtype=torch.float16
             )
         else:
             print(f"[INFO] Loading Hugging Face model: {model_name}")
             tokenizer = AutoTokenizer.from_pretrained(model_name)
+            model = AutoModelForCausalLM.from_pretrained(
+                model_name,
+                device_map="auto",
+                torch_dtype=torch.float16
+            )
+    except RuntimeError as e:
+        print(f"[WARN] GPU memory insufficient, switching to CPU offload. {e}")
+        # CPU offload
+        from accelerate import init_empty_weights, load_checkpoint_and_dispatch
+        from transformers import AutoConfig
+        config = AutoConfig.from_pretrained(model_name)
+        with init_empty_weights():
+            model = AutoModelForCausalLM.from_config(config)
+        model = load_checkpoint_and_dispatch(
+            model,
+            model_name,
+            device_map={"": "cpu"},
+            no_split_module_classes=["GPT2Block"]
+        )
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
     model.to(DEVICE)
     return tokenizer, model