Spaces:
Paused
Paused
improving tokenizer
Browse files
model.py
CHANGED
|
@@ -210,17 +210,18 @@ class T5Model(LabelStudioMLBase):
|
|
| 210 |
model = get_peft_model(self.model, lora_config)
|
| 211 |
model.print_trainable_parameters()
|
| 212 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
# Training loop
|
| 214 |
logger.info("Starting training loop...")
|
| 215 |
optimizer = torch.optim.AdamW(model.parameters(), lr=float(os.getenv('LEARNING_RATE', '1e-4')))
|
| 216 |
|
| 217 |
-
#
|
| 218 |
model.train()
|
| 219 |
optimizer.zero_grad()
|
| 220 |
|
| 221 |
-
inputs = self.tokenizer(text, return_tensors="pt", max_length=self.max_length, truncation=True).to(self.device)
|
| 222 |
-
labels = self.tokenizer(label, return_tensors="pt", max_length=self.generation_max_length, truncation=True).to(self.device)
|
| 223 |
-
|
| 224 |
outputs = model(**inputs, labels=labels["input_ids"])
|
| 225 |
loss = outputs.loss
|
| 226 |
loss.backward()
|
|
|
|
| 210 |
model = get_peft_model(self.model, lora_config)
|
| 211 |
model.print_trainable_parameters()
|
| 212 |
|
| 213 |
+
# Tokenize inputs first
|
| 214 |
+
inputs = self.tokenizer(text, return_tensors="pt", max_length=self.max_length, truncation=True).to(self.device)
|
| 215 |
+
labels = self.tokenizer(label, return_tensors="pt", max_length=self.generation_max_length, truncation=True).to(self.device)
|
| 216 |
+
|
| 217 |
# Training loop
|
| 218 |
logger.info("Starting training loop...")
|
| 219 |
optimizer = torch.optim.AdamW(model.parameters(), lr=float(os.getenv('LEARNING_RATE', '1e-4')))
|
| 220 |
|
| 221 |
+
# Set model to training mode
|
| 222 |
model.train()
|
| 223 |
optimizer.zero_grad()
|
| 224 |
|
|
|
|
|
|
|
|
|
|
| 225 |
outputs = model(**inputs, labels=labels["input_ids"])
|
| 226 |
loss = outputs.loss
|
| 227 |
loss.backward()
|