itsjorigo
/

sinllama-mcq-3.0

Model card Files Files and versions

itsjorigo commited on 4 days ago

Commit

a0de84d

·

verified ·

1 Parent(s): 3f3bd4f

Update handler.py

Files changed (1) hide show

handler.py +3 -1

handler.py CHANGED Viewed

@@ -36,7 +36,9 @@ class EndpointHandler:
             device_map="auto",
             attn_implementation="sdpa",
         )
-        base.resize_token_embeddings(vocab_size)
         # Merge SinLlama into base so the MCQ adapter sees a plain model (not stacked PeftModel)
         print("Loading and merging SinLlama adapter...")

             device_map="auto",
             attn_implementation="sdpa",
         )
+        # mean_resizing=False avoids holding 2x embedding matrix in VRAM during resize.
+        # Safe here because SinLlama adapter contains the correct trained embeddings.
+        base.resize_token_embeddings(vocab_size, mean_resizing=False)
         # Merge SinLlama into base so the MCQ adapter sees a plain model (not stacked PeftModel)
         print("Loading and merging SinLlama adapter...")