Update handler.py
Browse files- handler.py +2 -2
handler.py
CHANGED
|
@@ -49,13 +49,13 @@ class EndpointHandler:
|
|
| 49 |
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 50 |
|
| 51 |
self.model, self.tokenizer = FastLanguageModel.from_pretrained(
|
| 52 |
-
model_name=
|
| 53 |
max_seq_length=self.max_seq_length,
|
| 54 |
load_in_4bit=self.load_in_4bit,
|
| 55 |
)
|
| 56 |
|
| 57 |
# Load LoRA adapter
|
| 58 |
-
self.model = PeftModel.from_pretrained(self.model, lora_adapter)
|
| 59 |
self.model.eval()
|
| 60 |
|
| 61 |
# Move model to the device (GPU or CPU)
|
|
|
|
| 49 |
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 50 |
|
| 51 |
self.model, self.tokenizer = FastLanguageModel.from_pretrained(
|
| 52 |
+
model_name=lora_adapter,
|
| 53 |
max_seq_length=self.max_seq_length,
|
| 54 |
load_in_4bit=self.load_in_4bit,
|
| 55 |
)
|
| 56 |
|
| 57 |
# Load LoRA adapter
|
| 58 |
+
# self.model = PeftModel.from_pretrained(self.model, lora_adapter)
|
| 59 |
self.model.eval()
|
| 60 |
|
| 61 |
# Move model to the device (GPU or CPU)
|