| [2024-03-04 17:34:08,795][hydra][INFO] - | |
| estimator: | |
| accelerator: gpu | |
| precision: bf16-true | |
| deterministic: true | |
| tf32_mode: high | |
| convert_to_bettertransformer: false | |
| callbacks: | |
| timer: | |
| _target_: energizer.active_learning.callbacks.Timer | |
| lr_monitor: | |
| _target_: energizer.callbacks.lr_monitor.LearningRateMonitor | |
| model_checkpoint: | |
| _target_: energizer.callbacks.model_checkpoint.ModelCheckpoint | |
| dirpath: .checkpoints | |
| stage: train | |
| frequency: 1:epoch | |
| loggers: | |
| tensorboard: | |
| _target_: energizer.loggers.TensorBoardLogger | |
| root_dir: ./ | |
| name: tb_logs | |
| version: null | |
| data: | |
| batch_size: 32 | |
| eval_batch_size: 128 | |
| shuffle: true | |
| replacement: false | |
| data_seed: 42 | |
| drop_last: false | |
| num_workers: 8 | |
| pin_memory: true | |
| persistent_workers: false | |
| multiprocessing_context: null | |
| max_length: 512 | |
| fit: | |
| max_epochs: 20 | |
| optimizer_kwargs: | |
| name: adamw | |
| lr: 3.0e-05 | |
| init_kwargs: | |
| fused: true | |
| scheduler_kwargs: | |
| name: constant_schedule_with_warmup | |
| num_warmup_steps: 2000 | |
| log_interval: 100 | |
| enable_progress_bar: true | |
| limit_train_batches: null | |
| limit_validation_batches: null | |
| model: | |
| name: bert-tiny | |
| revision: null | |
| seed: 42 | |
| log_interval: 100 | |
| enable_progress_bar: true | |
| limit_batches: null | |
| seed: 42 | |
| experiment_group: training | |
| run_name: bert-tiny_2024-03-04T17-34-08 | |
| data_path: /home/pl487/coreset-project/data/processed | |
| dataset: mnli | |
| ====================================================================== | |
| [2024-03-04 17:34:08,796][hydra][INFO] - Seed enabled: 42 | |
| [2024-03-04 17:34:09,910][hydra][INFO] - Label distribution: | |
| {<RunningStage.TRAIN: 'train'>: {'0-(entailment)': 130899, '1-(neutral)': 130900, '2-(contradiction)': 130903}} | |
| [2024-03-04 17:34:21,700][hydra][INFO] - Loggers: [<energizer.loggers.tensorboard.TensorBoardLogger object at 0x7f79509062f0>] | |
| [2024-03-04 17:34:21,700][hydra][INFO] - Callbacks: [<energizer.active_learning.callbacks.Timer object at 0x7f792e9ecfd0>, <energizer.callbacks.lr_monitor.LearningRateMonitor object at 0x7f792e9ed030>, <energizer.callbacks.model_checkpoint.ModelCheckpoint object at 0x7f792e9ed540>] | |
| [2024-03-04 17:34:21,702][hydra][INFO] - Model summary: | |
| Total num params: 4.4M | |
| Of which trainable: 4.4M | |
| With a memory footprint of 0.01GB | |
| Total memory allocated 0.03GB | |
| [2024-03-04 17:34:21,702][hydra][INFO] - Dataloading params: | |
| SequenceClassificationDataloaderArgs(batch_size=32, eval_batch_size=128, num_workers=8, pin_memory=True, drop_last=False, persistent_workers=False, shuffle=True, replacement=False, data_seed=42, multiprocessing_context=None, max_length=512) | |
| [2024-03-04 17:34:21,737][hydra][INFO] - Batch: | |
| {<InputKeys.INPUT_IDS: 'input_ids'>: tensor([[ 101, 2092, 7910, 2008, 1005, 1055, 2785, 1997, 5793, 1045, | |
| 2812, 2027, 1005, 2128, 2130, 4755, 2009, 2000, 2000, 2073, | |
| 2085, 7910, 2008, 2027, 4748, 16874, 5562, 2006, 2694, 2017, | |
| 2113, 2065, 2115, 2065, 2017, 7910, 2017, 2113, 2031, 2589, | |
| 2023, 2030, 2065, 2017, 2342, 2023, 7910, 7910, 2057, 1005, | |
| 2222, 9790, 2005, 2017, 1998, 2017, 2123, 1005, 1056, 2031, | |
| 2000, 3477, 2149, 4983, 2017, 2021, 2059, 2054, 2027, 2123, | |
| 1005, 1056, 2425, 2017, 2003, 2008, 2065, 2017, 2065, 2027, | |
| 2663, 2017, 2507, 2068, 2012, 2560, 1037, 2353, 1997, 1996, | |
| 1997, 1996, 2518, 2008, 2027, 2663, 2061, 1045, 2123, 1005, | |
| 1056, 2113, 2009, 2003, 7910, 2009, 1005, 1055, 2893, 2000, | |
| 2022, 2062, 2449, 2085, 2738, 2084, 7910, 2941, 7910, 7149, | |
| 2007, 1996, 4126, 2084, 2007, 7910, 8529, 1996, 7910, 7750, | |
| 2027, 1996, 1996, 9559, 2024, 2074, 1999, 2009, 2005, 1996, | |
| 2769, 1045, 1005, 1049, 1045, 1005, 1049, 6427, 1045, 2113, | |
| 1045, 1045, 5993, 2007, 2017, 1045, 2228, 2017, 1005, 2128, | |
| 2613, 2017, 1005, 2128, 2200, 2157, 2008, 1996, 8801, 2323, | |
| 1045, 2228, 2027, 2323, 2031, 2019, 5020, 3815, 1997, 2017, | |
| 2113, 2672, 2027, 2064, 2031, 1037, 2261, 2021, 1045, 2228, | |
| 2087, 1997, 2068, 2323, 2022, 2025, 7910, 9559, 1999, 1996, | |
| 2755, 1998, 2008, 1005, 1055, 2126, 2126, 2027, 1005, 2310, | |
| 5407, 2046, 4331, 2009, 1005, 1055, 2138, 1997, 1996, 1996, | |
| 2375, 2477, 2017, 2113, 1996, 15932, 1998, 2673, 2021, 7910, | |
| 2021, 2053, 1045, 2113, 2057, 2009, 8529, 2057, 1005, 2128, | |
| 1999, 5374, 1998, 7910, 2057, 2031, 1996, 2168, 2518, 2058, | |
| 24375, 4667, 1998, 1998, 7910, 2027, 2024, 5599, 2068, 2041, | |
| 1045, 2812, 2074, 1996, 3180, 7173, 6251, 2518, 2027, 2292, | |
| 2068, 2041, 2138, 1997, 2027, 2123, 1005, 1056, 2031, 2151, | |
| 2173, 2000, 2562, 2000, 2404, 2068, 2061, 8307, 2008, 2071, | |
| 2017, 2113, 7910, 4983, 7910, 2042, 1037, 2350, 10048, 2021, | |
| 7910, 7910, 2130, 1996, 14255, 14141, 2135, 2210, 4933, 1045, | |
| 2812, 1996, 1996, 4364, 2008, 12386, 2651, 1999, 2637, 2003, | |
| 1996, 6778, 1998, 9444, 2008, 2089, 2022, 3251, 2009, 1005, | |
| 1055, 1037, 13742, 2030, 5850, 2030, 3649, 2842, 8529, 2027, | |
| 2024, 1996, 3924, 2008, 2024, 2183, 2000, 3477, 1998, 2027, | |
| 2024, 1996, 2028, 2008, 2024, 2183, 2000, 9015, 1998, 1996, | |
| 2060, 2711, 2017, 2113, 2065, 2027, 3477, 2065, 2027, 2065, | |
| 2027, 7910, 6545, 2068, 7910, 1998, 7910, 2059, 1996, 2017, | |
| 2113, 2051, 1996, 2375, 3138, 2058, 7910, 2431, 1996, 2051, | |
| 7910, 2027, 2593, 2292, 2068, 2175, 2030, 2027, 2131, 2125, | |
| 2007, 1037, 1037, 6251, 2138, 2027, 1005, 2310, 2018, 1037, | |
| 5160, 2008, 2017, 2113, 5683, 2008, 2008, 2027, 4694, 1005, | |
| 1056, 2035, 2362, 2043, 2027, 2106, 2009, 102, 1045, 2228, | |
| 2008, 2045, 2323, 2022, 2019, 5020, 6630, 1997, 15406, 1999, | |
| 2256, 8801, 1012, 102]]), <InputKeys.ATT_MASK: 'attention_mask'>: tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]), <InputKeys.LABELS: 'labels'>: tensor([0]), <InputKeys.ON_CPU: 'on_cpu'>: {<SpecialKeys.ID: 'uid'>: [221950]}} | |