Update trainers/trainer_alignment_base.py

Browse files

Files changed (1) hide show

trainers/trainer_alignment_base.py +123 -0

trainers/trainer_alignment_base.py CHANGED Viewed

@@ -8,6 +8,129 @@
 #   4. Train small standalone transformer from scratch
 #   5. No expert models needed at inference
 # ============================================================================
 import math
 import os

 #   4. Train small standalone transformer from scratch
 #   5. No expert models needed at inference
 # ============================================================================
+"""
+Conclusion, this trainer is invalid. It cannot conform the system with simply crossentropy alone, it requires procrustes whiteneing on every inteernalized assesment,
+as each assement causes misalignment from the spectral scope without the 5 point expert paradigm.
+=================================================================
+NLI HEAD TRAINING
+=================================================================
+Loading backbone...
+config.json: 100%
+ 938/938 [00:00<00:00, 298kB/s]
+modeling_caption_bert.py:
+ 6.62k/? [00:00<00:00, 2.23MB/s]
+A new version of the following files was downloaded from https://huggingface.co/AbstractPhil/geolip-captionbert-8192:
+- modeling_caption_bert.py
+. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
+model.safetensors: 100%
+ 104M/104M [00:05<00:00, 34.3MB/s]
+Loading weights: 100%
+ 82/82 [00:00<00:00, 3678.27it/s, Materializing param=token_emb.weight]
+tokenizer_config.json: 100%
+ 322/322 [00:00<00:00, 108kB/s]
+tokenizer.json:
+ 711k/? [00:00<00:00, 9.48MB/s]
+  Backbone: 25,958,016 params (frozen)
+Loading SNLI...
+README.md:
+ 16.0k/? [00:00<00:00, 4.95MB/s]
+plain_text/test-00000-of-00001.parquet: 100%
+ 412k/412k [00:00<00:00, 2.06MB/s]
+plain_text/validation-00000-of-00001.par(…): 100%
+ 413k/413k [00:00<00:00, 2.07MB/s]
+plain_text/train-00000-of-00001.parquet: 100%
+ 19.6M/19.6M [00:00<00:00, 98.2MB/s]
+Generating test split: 100%
+ 10000/10000 [00:00<00:00, 291773.61 examples/s]
+Generating validation split: 100%
+ 10000/10000 [00:00<00:00, 1825276.99 examples/s]
+Generating train split: 100%
+ 550152/550152 [00:00<00:00, 6170326.70 examples/s]
+Filter: 100%
+ 550152/550152 [00:00<00:00, 692748.73 examples/s]
+Filter: 100%
+ 10000/10000 [00:00<00:00, 514001.54 examples/s]
+  Train: 549,367  Val: 9,842
+Pre-encoding with frozen backbone...
+  Encoding: 100%|██████████| 391/391 [00:33<00:00, 11.84it/s]
+  Encoding: 100%|██████████| 39/39 [00:03<00:00, 12.52it/s]
+=================================================================
+NLI HEAD
+=================================================================
+  Parameters: 7,427,715
+  Epochs: 10
+  Batch size: 128
+  Batches/epoch: 781
+=================================================================
+TRAINING (10 epochs)
+=================================================================
+  E 1: 16s  loss=0.8299  t_acc=0.6237  v_loss=0.7563  v_acc=0.6675
+  E 2: 16s  loss=0.6971  t_acc=0.7043  v_loss=0.6849  v_acc=0.7179
+  E 3: 16s  loss=0.6380  t_acc=0.7357  v_loss=0.6430  v_acc=0.7349
+  E 4: 16s  loss=0.5846  t_acc=0.7619  v_loss=0.6198  v_acc=0.7479
+  E 5: 16s  loss=0.5287  t_acc=0.7876  v_loss=0.6282  v_acc=0.7460
+  E 6: 16s  loss=0.4652  t_acc=0.8169  v_loss=0.6321  v_acc=0.7542
+  E 7: 16s  loss=0.3938  t_acc=0.8488  v_loss=0.6682  v_acc=0.7533
+  E 8: 16s  loss=0.3255  t_acc=0.8778  v_loss=0.7224  v_acc=0.7525
+  E 9: 16s  loss=0.2754  t_acc=0.9001  v_loss=0.7758  v_acc=0.7489
+  E10: 16s  loss=0.2503  t_acc=0.9110  v_loss=0.8039  v_acc=0.7491
+=================================================================
+COMPOSITIONAL ORDER TEST
+=================================================================
+Loading weights: 100%
+ 82/82 [00:00<00:00, 3646.91it/s, Materializing param=token_emb.weight]
+  P: a potato on top of a table
+  H: a table on top of a potato
+  Pooled cos: 0.987  (order-blind)
+  NLI: entailment  [E=0.838 N=0.052 C=0.110]
+  P: a potato on top of a table
+  H: there is a potato
+  Pooled cos: 0.502  (order-blind)
+  NLI: entailment  [E=0.900 N=0.082 C=0.018]
+  P: a cat is sitting on a mat
+  H: a mat is sitting on a cat
+  Pooled cos: 0.993  (order-blind)
+  NLI: entailment  [E=0.792 N=0.148 C=0.060]
+  P: a dog chased the cat
+  H: the cat chased the dog
+  Pooled cos: 0.977  (order-blind)
+  NLI: entailment  [E=0.588 N=0.204 C=0.208]
+  P: a woman is holding a baby
+  H: a baby is holding a woman
+  Pooled cos: 0.996  (order-blind)
+  NLI: entailment  [E=0.913 N=0.045 C=0.041]
+  P: the boy kicked the ball
+  H: the ball kicked the boy
+  Pooled cos: 0.986  (order-blind)
+  NLI: entailment  [E=0.684 N=0.133 C=0.183]
+  P: a man is riding a horse
+  H: a horse is riding a man
+  Pooled cos: 0.995  (order-blind)
+  NLI: entailment  [E=0.859 N=0.075 C=0.066]
+  Best val accuracy: 0.7542
+=================================================================
+DONE
+=================================================================
+"""
 import math
 import os