Update handler.py
Browse files- handler.py +4 -4
handler.py
CHANGED
|
@@ -20,16 +20,16 @@ class EndpointHandler:
|
|
| 20 |
# Without this, model is built with 128256 vocab then fails to load
|
| 21 |
# the 139336-vocab checkpoint weights
|
| 22 |
print(f"Patching config vocab_size to {VOCAB_SIZE:,}...")
|
| 23 |
-
config =
|
| 24 |
config.vocab_size = VOCAB_SIZE
|
| 25 |
|
| 26 |
print(f"Loading model from {path}...")
|
| 27 |
-
self.model =
|
| 28 |
path,
|
| 29 |
-
config
|
| 30 |
torch_dtype = torch.float16,
|
| 31 |
device_map = "auto",
|
| 32 |
-
trust_remote_code = True,
|
| 33 |
ignore_mismatched_sizes = True,
|
| 34 |
)
|
| 35 |
# Resize to match extended vocab (139,336 tokens)
|
|
|
|
| 20 |
# Without this, model is built with 128256 vocab then fails to load
|
| 21 |
# the 139336-vocab checkpoint weights
|
| 22 |
print(f"Patching config vocab_size to {VOCAB_SIZE:,}...")
|
| 23 |
+
config = LlamaConfig.from_pretrained(path)
|
| 24 |
config.vocab_size = VOCAB_SIZE
|
| 25 |
|
| 26 |
print(f"Loading model from {path}...")
|
| 27 |
+
self.model = LlamaForCausalLM.from_pretrained(
|
| 28 |
path,
|
| 29 |
+
config = config,
|
| 30 |
torch_dtype = torch.float16,
|
| 31 |
device_map = "auto",
|
| 32 |
+
# trust_remote_code = True,
|
| 33 |
ignore_mismatched_sizes = True,
|
| 34 |
)
|
| 35 |
# Resize to match extended vocab (139,336 tokens)
|