Commit ·
b4751ce
1
Parent(s): 4c526ae
Update handler.py
Browse files- handler.py +2 -3
handler.py
CHANGED
|
@@ -5,13 +5,12 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
|
|
| 5 |
|
| 6 |
|
| 7 |
class EndpointHandler:
|
| 8 |
-
def __init__(self, path=""
|
| 9 |
-
print('===> __init__', path, unused)
|
| 10 |
# load model and tokenizer from path
|
| 11 |
self.tokenizer = AutoTokenizer.from_pretrained(path) # AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
|
| 12 |
|
| 13 |
config = AutoConfig.from_pretrained(path, trust_remote_code=True)
|
| 14 |
-
config.attn_config['attn_impl'] = 'triton'
|
| 15 |
config.init_device = 'cuda:0' # For fast initialization directly on GPU!
|
| 16 |
config.max_seq_len = 4096 # (input + output) tokens can now be up to 4096
|
| 17 |
|
|
|
|
| 5 |
|
| 6 |
|
| 7 |
class EndpointHandler:
|
| 8 |
+
def __init__(self, path=""):
|
|
|
|
| 9 |
# load model and tokenizer from path
|
| 10 |
self.tokenizer = AutoTokenizer.from_pretrained(path) # AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
|
| 11 |
|
| 12 |
config = AutoConfig.from_pretrained(path, trust_remote_code=True)
|
| 13 |
+
# config.attn_config['attn_impl'] = 'triton'
|
| 14 |
config.init_device = 'cuda:0' # For fast initialization directly on GPU!
|
| 15 |
config.max_seq_len = 4096 # (input + output) tokens can now be up to 4096
|
| 16 |
|