- create_app.py +2 -1
create_app.py
CHANGED
|
@@ -32,7 +32,8 @@ def load_models():
|
|
| 32 |
QWEN_TOKENIZER = AutoTokenizer.from_pretrained(model_name, device='auto')
|
| 33 |
QWEN_TOKENIZER.pad_token_id = QWEN_TOKENIZER.eos_token_id
|
| 34 |
print("QWEN TOKENIZER LOADED")
|
| 35 |
-
QWEN_MODEL = AutoModelForCausalLM.from_pretrained(model_name
|
|
|
|
| 36 |
print("QWEN MODEL LOADED")
|
| 37 |
MODELS_LOADED = True
|
| 38 |
print("LOAD ENDED")
|
|
|
|
| 32 |
QWEN_TOKENIZER = AutoTokenizer.from_pretrained(model_name, device='auto')
|
| 33 |
QWEN_TOKENIZER.pad_token_id = QWEN_TOKENIZER.eos_token_id
|
| 34 |
print("QWEN TOKENIZER LOADED")
|
| 35 |
+
QWEN_MODEL = AutoModelForCausalLM.from_pretrained(model_name).half()
|
| 36 |
+
QWEN_MODEL = QWEN_MODEL.to(device)
|
| 37 |
print("QWEN MODEL LOADED")
|
| 38 |
MODELS_LOADED = True
|
| 39 |
print("LOAD ENDED")
|