convitom commited on
Commit ·
477f80e
1
Parent(s): c9b4129
- pipeline_diagram.svg +298 -168
- stage1_itc_matrix.svg +200 -0
- training/train.py +21 -0
pipeline_diagram.svg
CHANGED
|
|
|
|
stage1_itc_matrix.svg
ADDED
|
|
training/train.py
CHANGED
|
@@ -35,6 +35,23 @@ torch.backends.cudnn.allow_tf32 = True
|
|
| 35 |
|
| 36 |
import transformers
|
| 37 |
from transformers import TrainingArguments, Trainer, TrainerCallback, PrinterCallback
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
# Add project root to path
|
| 40 |
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
|
@@ -377,6 +394,10 @@ def get_trainer(
|
|
| 377 |
data_collator = collator,
|
| 378 |
)
|
| 379 |
trainer.remove_callback(PrinterCallback)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 380 |
return trainer
|
| 381 |
|
| 382 |
|
|
|
|
| 35 |
|
| 36 |
import transformers
|
| 37 |
from transformers import TrainingArguments, Trainer, TrainerCallback, PrinterCallback
|
| 38 |
+
from transformers.trainer_callback import ProgressCallback
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
class _NoEvalTqdmCallback(ProgressCallback):
|
| 42 |
+
"""Same as HF's ProgressCallback but with the per-batch eval bar disabled.
|
| 43 |
+
|
| 44 |
+
In a Colab `!python -m ...` subprocess HF Trainer's `is_in_notebook()`
|
| 45 |
+
returns False (no IPython kernel in the child) so it falls back to plain
|
| 46 |
+
tqdm. Colab's text renderer mishandles `\\r` for fast updates, so the
|
| 47 |
+
eval bar (~1 batch/sec × 1250 batches) prints a fresh line every step
|
| 48 |
+
and lags the browser tab. Training tqdm updates slowly enough (one bar
|
| 49 |
+
line per ~9s at 24M params + LoRA + bf16) that it stays clean, so we
|
| 50 |
+
only kill the prediction bar. eval_loss is still logged at the end of
|
| 51 |
+
each eval pass via the standard log_history mechanism."""
|
| 52 |
+
|
| 53 |
+
def on_prediction_step(self, args, state, control, **kwargs): # noqa: D401
|
| 54 |
+
return
|
| 55 |
|
| 56 |
# Add project root to path
|
| 57 |
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
|
|
|
| 394 |
data_collator = collator,
|
| 395 |
)
|
| 396 |
trainer.remove_callback(PrinterCallback)
|
| 397 |
+
# Replace default ProgressCallback with one that skips the eval per-batch
|
| 398 |
+
# bar — see _NoEvalTqdmCallback docstring for the Colab-subprocess rationale.
|
| 399 |
+
trainer.remove_callback(ProgressCallback)
|
| 400 |
+
trainer.add_callback(_NoEvalTqdmCallback())
|
| 401 |
return trainer
|
| 402 |
|
| 403 |
|