Yusuf commited on
Commit
4452b74
·
1 Parent(s): 25fbc07

chore: readable print logs & separate clearml graphs

Browse files
trainingModel/Training.py CHANGED
@@ -130,7 +130,7 @@ def train_model(
130
  epoch_accuracies[epoch] = epoch_acc_avg
131
 
132
  print(f"\n--- Epoch {epoch + 1}: ---")
133
- print(f'Train loss={epoch_loss_avg:.4f}\nTrain accuracy={epoch_acc_avg:.4f}')
134
 
135
  # ----------------------
136
  # validation loop
@@ -156,7 +156,7 @@ def train_model(
156
  current_val_accuracy = val_accuracy_fn.compute().item()
157
  val_accuracies[epoch] = current_val_accuracy
158
 
159
- print(f"Epoch {epoch+1}: val acc={current_val_accuracy:.4f}")
160
 
161
  # keep track of best validation accuracy and save best model so far
162
  if current_val_accuracy > best_accuracy:
@@ -164,7 +164,7 @@ def train_model(
164
  torch.save(model.state_dict(), save_path)
165
 
166
 
167
- print(f'Epoch {epoch + 1} validation complete')
168
 
169
  print(f"\nTraining finished. Best val accuracy: {best_accuracy:.4f}")
170
  print(f"Best model weights saved to: {save_path}")
 
130
  epoch_accuracies[epoch] = epoch_acc_avg
131
 
132
  print(f"\n--- Epoch {epoch + 1}: ---")
133
+ print(f'Train loss={epoch_loss_avg:.4f}\nTrain accuracy={epoch_acc_avg:.4f}\n')
134
 
135
  # ----------------------
136
  # validation loop
 
156
  current_val_accuracy = val_accuracy_fn.compute().item()
157
  val_accuracies[epoch] = current_val_accuracy
158
 
159
+ print(f"\nEpoch {epoch+1}: val acc={current_val_accuracy:.4f}")
160
 
161
  # keep track of best validation accuracy and save best model so far
162
  if current_val_accuracy > best_accuracy:
 
164
  torch.save(model.state_dict(), save_path)
165
 
166
 
167
+ print(f'Epoch {epoch + 1} validation complete\n')
168
 
169
  print(f"\nTraining finished. Best val accuracy: {best_accuracy:.4f}")
170
  print(f"Best model weights saved to: {save_path}")
trainingModel/run_training.py CHANGED
@@ -140,21 +140,20 @@ training_metrics = train_model(
140
  # ----------- Log metrics to ClearML -----------
141
  # Per-batch training losses and accuracies
142
  for i, loss in enumerate(training_metrics["batch_losses"]):
143
- training_logger.report_scalar("train_batch", "loss", value=loss, iteration=i)
144
 
145
  for i, acc in enumerate(training_metrics["batch_accuracies"]):
146
- training_logger.report_scalar("train_batch", "accuracy", value=acc, iteration=i)
147
-
148
 
149
  # Per-epoch training losses and accuracies
150
  epoch_metrics = zip(training_metrics["epoch_losses"], training_metrics["epoch_accuracies"])
151
  for epoch, (loss, acc) in enumerate(epoch_metrics):
152
- training_logger.report_scalar("train_epoch", "loss", loss, iteration=epoch)
153
- training_logger.report_scalar("train_epoch", "accuracy", acc, iteration=epoch)
154
 
155
  # Per-epoch validation accuracies
156
  for epoch, acc in enumerate(training_metrics["val_accuracies"]):
157
- training_logger.report_scalar("validation_epoch", "accuracy", value=acc, iteration=epoch)
158
 
159
  training_logger.report_single_value("best_val_accuracy", training_metrics["best_accuracy"])
160
 
 
140
  # ----------- Log metrics to ClearML -----------
141
  # Per-batch training losses and accuracies
142
  for i, loss in enumerate(training_metrics["batch_losses"]):
143
+ training_logger.report_scalar("training batch loss", "loss", value=loss, iteration=i)
144
 
145
  for i, acc in enumerate(training_metrics["batch_accuracies"]):
146
+ training_logger.report_scalar("training batch accuracy", "accuracy", value=acc, iteration=i)
 
147
 
148
  # Per-epoch training losses and accuracies
149
  epoch_metrics = zip(training_metrics["epoch_losses"], training_metrics["epoch_accuracies"])
150
  for epoch, (loss, acc) in enumerate(epoch_metrics):
151
+ training_logger.report_scalar("training epoch loss", "loss", loss, iteration=epoch)
152
+ training_logger.report_scalar("training epoch accuracy", "accuracy", acc, iteration=epoch)
153
 
154
  # Per-epoch validation accuracies
155
  for epoch, acc in enumerate(training_metrics["val_accuracies"]):
156
+ training_logger.report_scalar("validation epoch accuracy", "accuracy", value=acc, iteration=epoch)
157
 
158
  training_logger.report_single_value("best_val_accuracy", training_metrics["best_accuracy"])
159