Spaces:
Paused
Paused
Epochs added
Browse files- docker-compose.yml +1 -0
- model.py +15 -11
docker-compose.yml
CHANGED
|
@@ -20,6 +20,7 @@ services:
|
|
| 20 |
- LORA_DROPOUT=0.1
|
| 21 |
- LORA_TARGET_MODULES=q,v
|
| 22 |
# Training settings
|
|
|
|
| 23 |
- LEARNING_RATE=1e-4
|
| 24 |
- BATCH_SIZE=1
|
| 25 |
- MAX_STEPS=100
|
|
|
|
| 20 |
- LORA_DROPOUT=0.1
|
| 21 |
- LORA_TARGET_MODULES=q,v
|
| 22 |
# Training settings
|
| 23 |
+
- EPOCHS=3
|
| 24 |
- LEARNING_RATE=1e-4
|
| 25 |
- BATCH_SIZE=1
|
| 26 |
- MAX_STEPS=100
|
model.py
CHANGED
|
@@ -207,8 +207,8 @@ class T5Model(LabelStudioMLBase):
|
|
| 207 |
|
| 208 |
# Configure LoRA
|
| 209 |
lora_config = LoraConfig(
|
| 210 |
-
r=int(os.getenv('LORA_R', '
|
| 211 |
-
lora_alpha=int(os.getenv('LORA_ALPHA', '
|
| 212 |
target_modules=os.getenv('LORA_TARGET_MODULES', 'q,v').split(','),
|
| 213 |
lora_dropout=float(os.getenv('LORA_DROPOUT', '0.1')),
|
| 214 |
bias="none",
|
|
@@ -227,16 +227,20 @@ class T5Model(LabelStudioMLBase):
|
|
| 227 |
logger.info("Starting training loop...")
|
| 228 |
optimizer = torch.optim.AdamW(model.parameters(), lr=float(os.getenv('LEARNING_RATE', '1e-4')))
|
| 229 |
|
| 230 |
-
|
| 231 |
-
model.train()
|
| 232 |
-
optimizer.zero_grad()
|
| 233 |
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
|
| 241 |
# Switch back to eval mode
|
| 242 |
model.eval()
|
|
|
|
| 207 |
|
| 208 |
# Configure LoRA
|
| 209 |
lora_config = LoraConfig(
|
| 210 |
+
r=int(os.getenv('LORA_R', '16')),
|
| 211 |
+
lora_alpha=int(os.getenv('LORA_ALPHA', '16')),
|
| 212 |
target_modules=os.getenv('LORA_TARGET_MODULES', 'q,v').split(','),
|
| 213 |
lora_dropout=float(os.getenv('LORA_DROPOUT', '0.1')),
|
| 214 |
bias="none",
|
|
|
|
| 227 |
logger.info("Starting training loop...")
|
| 228 |
optimizer = torch.optim.AdamW(model.parameters(), lr=float(os.getenv('LEARNING_RATE', '1e-4')))
|
| 229 |
|
| 230 |
+
num_epochs = int(os.getenv('NUM_EPOCHS', '3'))
|
|
|
|
|
|
|
| 231 |
|
| 232 |
+
for epoch in range(num_epochs):
|
| 233 |
+
logger.info(f"Starting epoch {epoch+1}/{num_epochs}")
|
| 234 |
+
|
| 235 |
+
model.train()
|
| 236 |
+
optimizer.zero_grad()
|
| 237 |
+
|
| 238 |
+
outputs = model(**inputs, labels=labels["input_ids"])
|
| 239 |
+
loss = outputs.loss
|
| 240 |
+
loss.backward()
|
| 241 |
+
optimizer.step()
|
| 242 |
+
|
| 243 |
+
logger.info(f"Epoch {epoch+1}/{num_epochs} completed. Loss: {loss.item():.4f}")
|
| 244 |
|
| 245 |
# Switch back to eval mode
|
| 246 |
model.eval()
|