convitom commited on
Commit
477f80e
·
1 Parent(s): c9b4129
Files changed (3) hide show
  1. pipeline_diagram.svg +298 -168
  2. stage1_itc_matrix.svg +200 -0
  3. training/train.py +21 -0
pipeline_diagram.svg CHANGED
stage1_itc_matrix.svg ADDED
training/train.py CHANGED
@@ -35,6 +35,23 @@ torch.backends.cudnn.allow_tf32 = True
35
 
36
  import transformers
37
  from transformers import TrainingArguments, Trainer, TrainerCallback, PrinterCallback
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
  # Add project root to path
40
  sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
@@ -377,6 +394,10 @@ def get_trainer(
377
  data_collator = collator,
378
  )
379
  trainer.remove_callback(PrinterCallback)
 
 
 
 
380
  return trainer
381
 
382
 
 
35
 
36
  import transformers
37
  from transformers import TrainingArguments, Trainer, TrainerCallback, PrinterCallback
38
+ from transformers.trainer_callback import ProgressCallback
39
+
40
+
41
+ class _NoEvalTqdmCallback(ProgressCallback):
42
+ """Same as HF's ProgressCallback but with the per-batch eval bar disabled.
43
+
44
+ In a Colab `!python -m ...` subprocess HF Trainer's `is_in_notebook()`
45
+ returns False (no IPython kernel in the child) so it falls back to plain
46
+ tqdm. Colab's text renderer mishandles `\\r` for fast updates, so the
47
+ eval bar (~1 batch/sec × 1250 batches) prints a fresh line every step
48
+ and lags the browser tab. Training tqdm updates slowly enough (one bar
49
+ line per ~9s at 24M params + LoRA + bf16) that it stays clean, so we
50
+ only kill the prediction bar. eval_loss is still logged at the end of
51
+ each eval pass via the standard log_history mechanism."""
52
+
53
+ def on_prediction_step(self, args, state, control, **kwargs): # noqa: D401
54
+ return
55
 
56
  # Add project root to path
57
  sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
 
394
  data_collator = collator,
395
  )
396
  trainer.remove_callback(PrinterCallback)
397
+ # Replace default ProgressCallback with one that skips the eval per-batch
398
+ # bar — see _NoEvalTqdmCallback docstring for the Colab-subprocess rationale.
399
+ trainer.remove_callback(ProgressCallback)
400
+ trainer.add_callback(_NoEvalTqdmCallback())
401
  return trainer
402
 
403