b2u commited on
Commit
92ec7e6
·
1 Parent(s): 520dac2

improving tokenizer

Browse files
Files changed (1) hide show
  1. model.py +5 -4
model.py CHANGED
@@ -210,17 +210,18 @@ class T5Model(LabelStudioMLBase):
210
  model = get_peft_model(self.model, lora_config)
211
  model.print_trainable_parameters()
212
 
 
 
 
 
213
  # Training loop
214
  logger.info("Starting training loop...")
215
  optimizer = torch.optim.AdamW(model.parameters(), lr=float(os.getenv('LEARNING_RATE', '1e-4')))
216
 
217
- # Single training step for this annotation
218
  model.train()
219
  optimizer.zero_grad()
220
 
221
- inputs = self.tokenizer(text, return_tensors="pt", max_length=self.max_length, truncation=True).to(self.device)
222
- labels = self.tokenizer(label, return_tensors="pt", max_length=self.generation_max_length, truncation=True).to(self.device)
223
-
224
  outputs = model(**inputs, labels=labels["input_ids"])
225
  loss = outputs.loss
226
  loss.backward()
 
210
  model = get_peft_model(self.model, lora_config)
211
  model.print_trainable_parameters()
212
 
213
+ # Tokenize inputs first
214
+ inputs = self.tokenizer(text, return_tensors="pt", max_length=self.max_length, truncation=True).to(self.device)
215
+ labels = self.tokenizer(label, return_tensors="pt", max_length=self.generation_max_length, truncation=True).to(self.device)
216
+
217
  # Training loop
218
  logger.info("Starting training loop...")
219
  optimizer = torch.optim.AdamW(model.parameters(), lr=float(os.getenv('LEARNING_RATE', '1e-4')))
220
 
221
+ # Set model to training mode
222
  model.train()
223
  optimizer.zero_grad()
224
 
 
 
 
225
  outputs = model(**inputs, labels=labels["input_ids"])
226
  loss = outputs.loss
227
  loss.backward()