itsjorigo commited on
Commit
a0de84d
·
verified ·
1 Parent(s): 3f3bd4f

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +3 -1
handler.py CHANGED
@@ -36,7 +36,9 @@ class EndpointHandler:
36
  device_map="auto",
37
  attn_implementation="sdpa",
38
  )
39
- base.resize_token_embeddings(vocab_size)
 
 
40
 
41
  # Merge SinLlama into base so the MCQ adapter sees a plain model (not stacked PeftModel)
42
  print("Loading and merging SinLlama adapter...")
 
36
  device_map="auto",
37
  attn_implementation="sdpa",
38
  )
39
+ # mean_resizing=False avoids holding 2x embedding matrix in VRAM during resize.
40
+ # Safe here because SinLlama adapter contains the correct trained embeddings.
41
+ base.resize_token_embeddings(vocab_size, mean_resizing=False)
42
 
43
  # Merge SinLlama into base so the MCQ adapter sees a plain model (not stacked PeftModel)
44
  print("Loading and merging SinLlama adapter...")