abarbosa commited on
Commit
6c824c1
·
verified ·
1 Parent(s): e004fc4

Pushing fine-tuned model to Hugging Face Hub

Browse files
README.md ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ language:
4
+ - pt
5
+ - en
6
+ tags:
7
+ - aes
8
+ datasets:
9
+ - kamel-usp/aes_enem_dataset
10
+ base_model: microsoft/phi-4
11
+ metrics:
12
+ - accuracy
13
+ - qwk
14
+ library_name: peft
15
+ model-index:
16
+ - name: phi4-balanced-C4
17
+ results:
18
+ - task:
19
+ type: text-classification
20
+ name: Automated Essay Score
21
+ dataset:
22
+ name: Automated Essay Score ENEM Dataset
23
+ type: kamel-usp/aes_enem_dataset
24
+ config: JBCS2025
25
+ split: test
26
+ metrics:
27
+ - name: Macro F1 (ignoring nan)
28
+ type: f1
29
+ value: 0.3296276013143483
30
+ - name: QWK
31
+ type: qwk
32
+ value: 0.579465541490858
33
+ - name: Weighted Macro F1
34
+ type: f1
35
+ value: 0.6761155293109196
36
+ ---
37
+ # Model ID: phi4-balanced-C4
38
+ ## Results
39
+ | | test_data |
40
+ |:-----------------------------|------------:|
41
+ | eval_accuracy | 0.702899 |
42
+ | eval_RMSE | 24.554 |
43
+ | eval_QWK | 0.579466 |
44
+ | eval_Macro_F1 | 0.27469 |
45
+ | eval_Macro_F1_(ignoring_nan) | 0.329628 |
46
+ | eval_Weighted_F1 | 0.676116 |
47
+ | eval_Micro_F1 | 0.702899 |
48
+ | eval_HDIV | 0.00724638 |
49
+
adapter_config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "microsoft/phi-4",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 16,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.05,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": [
22
+ "classifier",
23
+ "score"
24
+ ],
25
+ "peft_type": "LORA",
26
+ "r": 8,
27
+ "rank_pattern": {},
28
+ "revision": null,
29
+ "target_modules": [
30
+ "gate_up_proj",
31
+ "o_proj",
32
+ "down_proj",
33
+ "qkv_proj"
34
+ ],
35
+ "task_type": "SEQ_CLS",
36
+ "trainable_token_indices": null,
37
+ "use_dora": false,
38
+ "use_rslora": false
39
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b7eade13249326942b2996e0f9ddbb92e800c4307dd6457f5a544762c299b89
3
+ size 111515584
run_experiment.log ADDED
@@ -0,0 +1,1008 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2025-03-23 20:41:58,914][__main__][INFO] - cache_dir: /media/data/tmp
2
+ dataset:
3
+ name: kamel-usp/aes_enem_dataset
4
+ split: JBCS2025
5
+ training_params:
6
+ seed: 42
7
+ num_train_epochs: 20
8
+ logging_steps: 100
9
+ metric_for_best_model: QWK
10
+ bf16: true
11
+ post_training_results:
12
+ model_path: /workspace/jbcs2025/outputs/2025-03-23/18-55-03
13
+ experiments:
14
+ model:
15
+ name: microsoft/phi-4
16
+ type: phi4_classification_lora
17
+ num_labels: 6
18
+ output_dir: ./results/phi4-balanced/C4
19
+ logging_dir: ./logs/phi4-balanced/C4
20
+ best_model_dir: ./results/phi4-balanced/C4/best_model
21
+ lora_r: 8
22
+ lora_dropout: 0.05
23
+ lora_alpha: 16
24
+ lora_target_modules: all-linear
25
+ dataset:
26
+ grade_index: 3
27
+ training_id: phi4-balanced-C4
28
+ training_params:
29
+ weight_decay: 0.01
30
+ warmup_ratio: 0.1
31
+ learning_rate: 5.0e-05
32
+ train_batch_size: 1
33
+ eval_batch_size: 16
34
+ gradient_accumulation_steps: 16
35
+ gradient_checkpointing: false
36
+
37
+ [2025-03-23 20:41:58,916][__main__][INFO] - Starting the Fine Tuning training process.
38
+ [2025-03-23 20:42:03,790][transformers.tokenization_utils_base][INFO] - loading file vocab.json from cache at /media/data/tmp/models--microsoft--phi-4/snapshots/187ef0342fff0eb3333be9f00389385e95ef0b61/vocab.json
39
+ [2025-03-23 20:42:03,790][transformers.tokenization_utils_base][INFO] - loading file merges.txt from cache at /media/data/tmp/models--microsoft--phi-4/snapshots/187ef0342fff0eb3333be9f00389385e95ef0b61/merges.txt
40
+ [2025-03-23 20:42:03,790][transformers.tokenization_utils_base][INFO] - loading file tokenizer.json from cache at /media/data/tmp/models--microsoft--phi-4/snapshots/187ef0342fff0eb3333be9f00389385e95ef0b61/tokenizer.json
41
+ [2025-03-23 20:42:03,790][transformers.tokenization_utils_base][INFO] - loading file added_tokens.json from cache at /media/data/tmp/models--microsoft--phi-4/snapshots/187ef0342fff0eb3333be9f00389385e95ef0b61/added_tokens.json
42
+ [2025-03-23 20:42:03,790][transformers.tokenization_utils_base][INFO] - loading file special_tokens_map.json from cache at /media/data/tmp/models--microsoft--phi-4/snapshots/187ef0342fff0eb3333be9f00389385e95ef0b61/special_tokens_map.json
43
+ [2025-03-23 20:42:03,790][transformers.tokenization_utils_base][INFO] - loading file tokenizer_config.json from cache at /media/data/tmp/models--microsoft--phi-4/snapshots/187ef0342fff0eb3333be9f00389385e95ef0b61/tokenizer_config.json
44
+ [2025-03-23 20:42:03,790][transformers.tokenization_utils_base][INFO] - loading file chat_template.jinja from cache at None
45
+ [2025-03-23 20:42:03,944][__main__][INFO] - Tokenizer function parameters- Padding:longest; Truncation: False
46
+ [2025-03-23 20:42:04,348][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /media/data/tmp/models--microsoft--phi-4/snapshots/187ef0342fff0eb3333be9f00389385e95ef0b61/config.json
47
+ [2025-03-23 20:42:04,349][transformers.configuration_utils][INFO] - Model config Phi3Config {
48
+ "architectures": [
49
+ "Phi3ForCausalLM"
50
+ ],
51
+ "attention_bias": false,
52
+ "attention_dropout": 0.0,
53
+ "bos_token_id": 100257,
54
+ "embd_pdrop": 0.0,
55
+ "eos_token_id": 100265,
56
+ "hidden_act": "silu",
57
+ "hidden_size": 5120,
58
+ "id2label": {
59
+ "0": 0,
60
+ "1": 40,
61
+ "2": 80,
62
+ "3": 120,
63
+ "4": 160,
64
+ "5": 200
65
+ },
66
+ "initializer_range": 0.02,
67
+ "intermediate_size": 17920,
68
+ "label2id": {
69
+ "0": 0,
70
+ "40": 1,
71
+ "80": 2,
72
+ "120": 3,
73
+ "160": 4,
74
+ "200": 5
75
+ },
76
+ "max_position_embeddings": 16384,
77
+ "model_type": "phi3",
78
+ "num_attention_heads": 40,
79
+ "num_hidden_layers": 40,
80
+ "num_key_value_heads": 10,
81
+ "original_max_position_embeddings": 16384,
82
+ "pad_token_id": 100349,
83
+ "partial_rotary_factor": 1.0,
84
+ "resid_pdrop": 0.0,
85
+ "rms_norm_eps": 1e-05,
86
+ "rope_scaling": null,
87
+ "rope_theta": 250000,
88
+ "sliding_window": null,
89
+ "tie_word_embeddings": false,
90
+ "torch_dtype": "bfloat16",
91
+ "transformers_version": "4.50.0",
92
+ "use_cache": true,
93
+ "vocab_size": 100352
94
+ }
95
+
96
+ [2025-03-23 20:42:04,371][transformers.modeling_utils][INFO] - loading weights file model.safetensors from cache at /media/data/tmp/models--microsoft--phi-4/snapshots/187ef0342fff0eb3333be9f00389385e95ef0b61/model.safetensors.index.json
97
+ [2025-03-23 20:42:04,372][transformers.modeling_utils][INFO] - Will use torch_dtype=torch.bfloat16 as defined in model's config object
98
+ [2025-03-23 20:42:04,372][transformers.modeling_utils][INFO] - Instantiating Phi3ForSequenceClassification model under default dtype torch.bfloat16.
99
+ [2025-03-23 20:42:25,935][transformers.modeling_utils][INFO] - Some weights of the model checkpoint at microsoft/phi-4 were not used when initializing Phi3ForSequenceClassification: ['lm_head.weight']
100
+ - This IS expected if you are initializing Phi3ForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
101
+ - This IS NOT expected if you are initializing Phi3ForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
102
+ [2025-03-23 20:42:25,936][transformers.modeling_utils][WARNING] - Some weights of Phi3ForSequenceClassification were not initialized from the model checkpoint at microsoft/phi-4 and are newly initialized: ['score.weight']
103
+ You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
104
+ [2025-03-23 20:42:28,026][__main__][INFO] - None
105
+ [2025-03-23 20:42:28,029][transformers.training_args][INFO] - PyTorch: setting up devices
106
+ [2025-03-23 20:42:28,068][__main__][INFO] - Total steps: 620. Number of warmup steps: 62
107
+ [2025-03-23 20:42:28,077][transformers.trainer][INFO] - You have loaded a model on multiple GPUs. `is_model_parallel` attribute will be force-set to `True` to avoid any unexpected behavior such as device placement mismatching.
108
+ [2025-03-23 20:42:28,100][transformers.trainer][INFO] - Using auto half precision backend
109
+ [2025-03-23 20:42:28,101][transformers.trainer][WARNING] - No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
110
+ [2025-03-23 20:42:28,121][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id. If essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
111
+ [2025-03-23 20:42:28,133][transformers.trainer][INFO] -
112
+ ***** Running Evaluation *****
113
+ [2025-03-23 20:42:28,134][transformers.trainer][INFO] - Num examples = 132
114
+ [2025-03-23 20:42:28,134][transformers.trainer][INFO] - Batch size = 16
115
+ [2025-03-23 20:42:54,626][transformers][INFO] - {'accuracy': 0.36363636363636365, 'RMSE': 36.680438185149114, 'QWK': 0.0, 'HDIV': 0.007575757575757569, 'Macro_F1': 0.10666666666666666, 'Micro_F1': 0.36363636363636365, 'Weighted_F1': 0.19393939393939394, 'Macro_F1_(ignoring_nan)': np.float64(0.5333333333333333)}
116
+ [2025-03-23 20:42:54,629][tensorboardX.summary][INFO] - Summary name eval/Macro_F1_(ignoring_nan) is illegal; using eval/Macro_F1__ignoring_nan_ instead.
117
+ [2025-03-23 20:42:54,875][transformers.trainer][INFO] - The following columns in the training set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id. If essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
118
+ [2025-03-23 20:42:54,906][transformers.trainer][INFO] - ***** Running training *****
119
+ [2025-03-23 20:42:54,906][transformers.trainer][INFO] - Num examples = 500
120
+ [2025-03-23 20:42:54,906][transformers.trainer][INFO] - Num Epochs = 20
121
+ [2025-03-23 20:42:54,906][transformers.trainer][INFO] - Instantaneous batch size per device = 1
122
+ [2025-03-23 20:42:54,906][transformers.trainer][INFO] - Total train batch size (w. parallel, distributed & accumulation) = 16
123
+ [2025-03-23 20:42:54,906][transformers.trainer][INFO] - Gradient Accumulation steps = 16
124
+ [2025-03-23 20:42:54,906][transformers.trainer][INFO] - Total optimization steps = 620
125
+ [2025-03-23 20:42:54,908][transformers.trainer][INFO] - Number of trainable parameters = 27,883,520
126
+ [2025-03-23 20:49:54,343][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id. If essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
127
+ [2025-03-23 20:49:54,346][transformers.trainer][INFO] -
128
+ ***** Running Evaluation *****
129
+ [2025-03-23 20:49:54,346][transformers.trainer][INFO] - Num examples = 132
130
+ [2025-03-23 20:49:54,346][transformers.trainer][INFO] - Batch size = 16
131
+ [2025-03-23 20:50:20,840][transformers][INFO] - {'accuracy': 0.36363636363636365, 'RMSE': 36.680438185149114, 'QWK': 0.0, 'HDIV': 0.007575757575757569, 'Macro_F1': 0.10666666666666666, 'Micro_F1': 0.36363636363636365, 'Weighted_F1': 0.19393939393939394, 'Macro_F1_(ignoring_nan)': np.float64(0.5333333333333333)}
132
+ [2025-03-23 20:50:20,840][tensorboardX.summary][INFO] - Summary name eval/Macro_F1_(ignoring_nan) is illegal; using eval/Macro_F1__ignoring_nan_ instead.
133
+ [2025-03-23 20:50:20,843][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-32
134
+ [2025-03-23 20:50:21,157][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--phi-4/snapshots/187ef0342fff0eb3333be9f00389385e95ef0b61/config.json
135
+ [2025-03-23 20:50:21,158][transformers.configuration_utils][INFO] - Model config Phi3Config {
136
+ "architectures": [
137
+ "Phi3ForCausalLM"
138
+ ],
139
+ "attention_bias": false,
140
+ "attention_dropout": 0.0,
141
+ "bos_token_id": 100257,
142
+ "embd_pdrop": 0.0,
143
+ "eos_token_id": 100265,
144
+ "hidden_act": "silu",
145
+ "hidden_size": 5120,
146
+ "initializer_range": 0.02,
147
+ "intermediate_size": 17920,
148
+ "max_position_embeddings": 16384,
149
+ "model_type": "phi3",
150
+ "num_attention_heads": 40,
151
+ "num_hidden_layers": 40,
152
+ "num_key_value_heads": 10,
153
+ "original_max_position_embeddings": 16384,
154
+ "pad_token_id": 100349,
155
+ "partial_rotary_factor": 1.0,
156
+ "resid_pdrop": 0.0,
157
+ "rms_norm_eps": 1e-05,
158
+ "rope_scaling": null,
159
+ "rope_theta": 250000,
160
+ "sliding_window": null,
161
+ "tie_word_embeddings": false,
162
+ "torch_dtype": "bfloat16",
163
+ "transformers_version": "4.50.0",
164
+ "use_cache": true,
165
+ "vocab_size": 100352
166
+ }
167
+
168
+ [2025-03-23 20:57:21,498][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id. If essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
169
+ [2025-03-23 20:57:21,501][transformers.trainer][INFO] -
170
+ ***** Running Evaluation *****
171
+ [2025-03-23 20:57:21,501][transformers.trainer][INFO] - Num examples = 132
172
+ [2025-03-23 20:57:21,502][transformers.trainer][INFO] - Batch size = 16
173
+ [2025-03-23 20:57:47,876][transformers][INFO] - {'accuracy': 0.4621212121212121, 'RMSE': 33.93398225253192, 'QWK': 0.15453074433656944, 'HDIV': 0.007575757575757569, 'Macro_F1': 0.18675097862089732, 'Micro_F1': 0.4621212121212121, 'Weighted_F1': 0.3838931318606115, 'Macro_F1_(ignoring_nan)': np.float64(0.4668774465522433)}
174
+ [2025-03-23 20:57:47,876][tensorboardX.summary][INFO] - Summary name eval/Macro_F1_(ignoring_nan) is illegal; using eval/Macro_F1__ignoring_nan_ instead.
175
+ [2025-03-23 20:57:47,879][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-64
176
+ [2025-03-23 20:57:48,481][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--phi-4/snapshots/187ef0342fff0eb3333be9f00389385e95ef0b61/config.json
177
+ [2025-03-23 20:57:48,482][transformers.configuration_utils][INFO] - Model config Phi3Config {
178
+ "architectures": [
179
+ "Phi3ForCausalLM"
180
+ ],
181
+ "attention_bias": false,
182
+ "attention_dropout": 0.0,
183
+ "bos_token_id": 100257,
184
+ "embd_pdrop": 0.0,
185
+ "eos_token_id": 100265,
186
+ "hidden_act": "silu",
187
+ "hidden_size": 5120,
188
+ "initializer_range": 0.02,
189
+ "intermediate_size": 17920,
190
+ "max_position_embeddings": 16384,
191
+ "model_type": "phi3",
192
+ "num_attention_heads": 40,
193
+ "num_hidden_layers": 40,
194
+ "num_key_value_heads": 10,
195
+ "original_max_position_embeddings": 16384,
196
+ "pad_token_id": 100349,
197
+ "partial_rotary_factor": 1.0,
198
+ "resid_pdrop": 0.0,
199
+ "rms_norm_eps": 1e-05,
200
+ "rope_scaling": null,
201
+ "rope_theta": 250000,
202
+ "sliding_window": null,
203
+ "tie_word_embeddings": false,
204
+ "torch_dtype": "bfloat16",
205
+ "transformers_version": "4.50.0",
206
+ "use_cache": true,
207
+ "vocab_size": 100352
208
+ }
209
+
210
+ [2025-03-23 20:57:49,222][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-32] due to args.save_total_limit
211
+ [2025-03-23 21:04:48,693][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id. If essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
212
+ [2025-03-23 21:04:48,695][transformers.trainer][INFO] -
213
+ ***** Running Evaluation *****
214
+ [2025-03-23 21:04:48,695][transformers.trainer][INFO] - Num examples = 132
215
+ [2025-03-23 21:04:48,695][transformers.trainer][INFO] - Batch size = 16
216
+ [2025-03-23 21:05:15,051][transformers][INFO] - {'accuracy': 0.5378787878787878, 'RMSE': 30.944720996896347, 'QWK': 0.3034998664173123, 'HDIV': 0.007575757575757569, 'Macro_F1': 0.228490990990991, 'Micro_F1': 0.5378787878787878, 'Weighted_F1': 0.47856947856947857, 'Macro_F1_(ignoring_nan)': np.float64(0.5712274774774775)}
217
+ [2025-03-23 21:05:15,051][tensorboardX.summary][INFO] - Summary name eval/Macro_F1_(ignoring_nan) is illegal; using eval/Macro_F1__ignoring_nan_ instead.
218
+ [2025-03-23 21:05:15,054][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-96
219
+ [2025-03-23 21:05:15,393][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--phi-4/snapshots/187ef0342fff0eb3333be9f00389385e95ef0b61/config.json
220
+ [2025-03-23 21:05:15,394][transformers.configuration_utils][INFO] - Model config Phi3Config {
221
+ "architectures": [
222
+ "Phi3ForCausalLM"
223
+ ],
224
+ "attention_bias": false,
225
+ "attention_dropout": 0.0,
226
+ "bos_token_id": 100257,
227
+ "embd_pdrop": 0.0,
228
+ "eos_token_id": 100265,
229
+ "hidden_act": "silu",
230
+ "hidden_size": 5120,
231
+ "initializer_range": 0.02,
232
+ "intermediate_size": 17920,
233
+ "max_position_embeddings": 16384,
234
+ "model_type": "phi3",
235
+ "num_attention_heads": 40,
236
+ "num_hidden_layers": 40,
237
+ "num_key_value_heads": 10,
238
+ "original_max_position_embeddings": 16384,
239
+ "pad_token_id": 100349,
240
+ "partial_rotary_factor": 1.0,
241
+ "resid_pdrop": 0.0,
242
+ "rms_norm_eps": 1e-05,
243
+ "rope_scaling": null,
244
+ "rope_theta": 250000,
245
+ "sliding_window": null,
246
+ "tie_word_embeddings": false,
247
+ "torch_dtype": "bfloat16",
248
+ "transformers_version": "4.50.0",
249
+ "use_cache": true,
250
+ "vocab_size": 100352
251
+ }
252
+
253
+ [2025-03-23 21:05:16,040][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-64] due to args.save_total_limit
254
+ [2025-03-23 21:12:15,345][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id. If essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
255
+ [2025-03-23 21:12:15,348][transformers.trainer][INFO] -
256
+ ***** Running Evaluation *****
257
+ [2025-03-23 21:12:15,348][transformers.trainer][INFO] - Num examples = 132
258
+ [2025-03-23 21:12:15,348][transformers.trainer][INFO] - Batch size = 16
259
+ [2025-03-23 21:12:41,768][transformers][INFO] - {'accuracy': 0.5454545454545454, 'RMSE': 31.71845844395036, 'QWK': 0.3009188361408882, 'HDIV': 0.007575757575757569, 'Macro_F1': 0.21155390793945011, 'Micro_F1': 0.5454545454545454, 'Weighted_F1': 0.46934591512904766, 'Macro_F1_(ignoring_nan)': np.float64(0.5288847698486253)}
260
+ [2025-03-23 21:12:41,769][tensorboardX.summary][INFO] - Summary name eval/Macro_F1_(ignoring_nan) is illegal; using eval/Macro_F1__ignoring_nan_ instead.
261
+ [2025-03-23 21:12:41,772][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-128
262
+ [2025-03-23 21:12:42,252][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--phi-4/snapshots/187ef0342fff0eb3333be9f00389385e95ef0b61/config.json
263
+ [2025-03-23 21:12:42,252][transformers.configuration_utils][INFO] - Model config Phi3Config {
264
+ "architectures": [
265
+ "Phi3ForCausalLM"
266
+ ],
267
+ "attention_bias": false,
268
+ "attention_dropout": 0.0,
269
+ "bos_token_id": 100257,
270
+ "embd_pdrop": 0.0,
271
+ "eos_token_id": 100265,
272
+ "hidden_act": "silu",
273
+ "hidden_size": 5120,
274
+ "initializer_range": 0.02,
275
+ "intermediate_size": 17920,
276
+ "max_position_embeddings": 16384,
277
+ "model_type": "phi3",
278
+ "num_attention_heads": 40,
279
+ "num_hidden_layers": 40,
280
+ "num_key_value_heads": 10,
281
+ "original_max_position_embeddings": 16384,
282
+ "pad_token_id": 100349,
283
+ "partial_rotary_factor": 1.0,
284
+ "resid_pdrop": 0.0,
285
+ "rms_norm_eps": 1e-05,
286
+ "rope_scaling": null,
287
+ "rope_theta": 250000,
288
+ "sliding_window": null,
289
+ "tie_word_embeddings": false,
290
+ "torch_dtype": "bfloat16",
291
+ "transformers_version": "4.50.0",
292
+ "use_cache": true,
293
+ "vocab_size": 100352
294
+ }
295
+
296
+ [2025-03-23 21:19:42,312][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id. If essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
297
+ [2025-03-23 21:19:42,315][transformers.trainer][INFO] -
298
+ ***** Running Evaluation *****
299
+ [2025-03-23 21:19:42,315][transformers.trainer][INFO] - Num examples = 132
300
+ [2025-03-23 21:19:42,315][transformers.trainer][INFO] - Batch size = 16
301
+ [2025-03-23 21:20:08,584][transformers][INFO] - {'accuracy': 0.5606060606060606, 'RMSE': 28.91995221924885, 'QWK': 0.41987261146496824, 'HDIV': 0.007575757575757569, 'Macro_F1': 0.28522175409256106, 'Micro_F1': 0.5606060606060606, 'Weighted_F1': 0.5337214163959735, 'Macro_F1_(ignoring_nan)': np.float64(0.47536959015426844)}
302
+ [2025-03-23 21:20:08,584][tensorboardX.summary][INFO] - Summary name eval/Macro_F1_(ignoring_nan) is illegal; using eval/Macro_F1__ignoring_nan_ instead.
303
+ [2025-03-23 21:20:08,588][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-160
304
+ [2025-03-23 21:20:08,878][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--phi-4/snapshots/187ef0342fff0eb3333be9f00389385e95ef0b61/config.json
305
+ [2025-03-23 21:20:08,879][transformers.configuration_utils][INFO] - Model config Phi3Config {
306
+ "architectures": [
307
+ "Phi3ForCausalLM"
308
+ ],
309
+ "attention_bias": false,
310
+ "attention_dropout": 0.0,
311
+ "bos_token_id": 100257,
312
+ "embd_pdrop": 0.0,
313
+ "eos_token_id": 100265,
314
+ "hidden_act": "silu",
315
+ "hidden_size": 5120,
316
+ "initializer_range": 0.02,
317
+ "intermediate_size": 17920,
318
+ "max_position_embeddings": 16384,
319
+ "model_type": "phi3",
320
+ "num_attention_heads": 40,
321
+ "num_hidden_layers": 40,
322
+ "num_key_value_heads": 10,
323
+ "original_max_position_embeddings": 16384,
324
+ "pad_token_id": 100349,
325
+ "partial_rotary_factor": 1.0,
326
+ "resid_pdrop": 0.0,
327
+ "rms_norm_eps": 1e-05,
328
+ "rope_scaling": null,
329
+ "rope_theta": 250000,
330
+ "sliding_window": null,
331
+ "tie_word_embeddings": false,
332
+ "torch_dtype": "bfloat16",
333
+ "transformers_version": "4.50.0",
334
+ "use_cache": true,
335
+ "vocab_size": 100352
336
+ }
337
+
338
+ [2025-03-23 21:20:09,612][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-96] due to args.save_total_limit
339
+ [2025-03-23 21:20:09,650][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-128] due to args.save_total_limit
340
+ [2025-03-23 21:27:09,228][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id. If essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
341
+ [2025-03-23 21:27:09,230][transformers.trainer][INFO] -
342
+ ***** Running Evaluation *****
343
+ [2025-03-23 21:27:09,230][transformers.trainer][INFO] - Num examples = 132
344
+ [2025-03-23 21:27:09,230][transformers.trainer][INFO] - Batch size = 16
345
+ [2025-03-23 21:27:35,620][transformers][INFO] - {'accuracy': 0.5227272727272727, 'RMSE': 30.550504633038933, 'QWK': 0.3370727889381686, 'HDIV': 0.007575757575757569, 'Macro_F1': 0.22424242424242422, 'Micro_F1': 0.5227272727272727, 'Weighted_F1': 0.4830119375573921, 'Macro_F1_(ignoring_nan)': np.float64(0.5606060606060606)}
346
+ [2025-03-23 21:27:35,620][tensorboardX.summary][INFO] - Summary name eval/Macro_F1_(ignoring_nan) is illegal; using eval/Macro_F1__ignoring_nan_ instead.
347
+ [2025-03-23 21:27:35,623][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-192
348
+ [2025-03-23 21:27:35,955][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--phi-4/snapshots/187ef0342fff0eb3333be9f00389385e95ef0b61/config.json
349
+ [2025-03-23 21:27:35,955][transformers.configuration_utils][INFO] - Model config Phi3Config {
350
+ "architectures": [
351
+ "Phi3ForCausalLM"
352
+ ],
353
+ "attention_bias": false,
354
+ "attention_dropout": 0.0,
355
+ "bos_token_id": 100257,
356
+ "embd_pdrop": 0.0,
357
+ "eos_token_id": 100265,
358
+ "hidden_act": "silu",
359
+ "hidden_size": 5120,
360
+ "initializer_range": 0.02,
361
+ "intermediate_size": 17920,
362
+ "max_position_embeddings": 16384,
363
+ "model_type": "phi3",
364
+ "num_attention_heads": 40,
365
+ "num_hidden_layers": 40,
366
+ "num_key_value_heads": 10,
367
+ "original_max_position_embeddings": 16384,
368
+ "pad_token_id": 100349,
369
+ "partial_rotary_factor": 1.0,
370
+ "resid_pdrop": 0.0,
371
+ "rms_norm_eps": 1e-05,
372
+ "rope_scaling": null,
373
+ "rope_theta": 250000,
374
+ "sliding_window": null,
375
+ "tie_word_embeddings": false,
376
+ "torch_dtype": "bfloat16",
377
+ "transformers_version": "4.50.0",
378
+ "use_cache": true,
379
+ "vocab_size": 100352
380
+ }
381
+
382
+ [2025-03-23 21:34:35,458][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id. If essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
383
+ [2025-03-23 21:34:35,460][transformers.trainer][INFO] -
384
+ ***** Running Evaluation *****
385
+ [2025-03-23 21:34:35,460][transformers.trainer][INFO] - Num examples = 132
386
+ [2025-03-23 21:34:35,460][transformers.trainer][INFO] - Batch size = 16
387
+ [2025-03-23 21:35:01,816][transformers][INFO] - {'accuracy': 0.5075757575757576, 'RMSE': 34.289321599553055, 'QWK': 0.47082162340882794, 'HDIV': 0.007575757575757569, 'Macro_F1': 0.30565610859728504, 'Micro_F1': 0.5075757575757576, 'Weighted_F1': 0.5056903880433292, 'Macro_F1_(ignoring_nan)': np.float64(0.5094268476621417)}
388
+ [2025-03-23 21:35:01,816][tensorboardX.summary][INFO] - Summary name eval/Macro_F1_(ignoring_nan) is illegal; using eval/Macro_F1__ignoring_nan_ instead.
389
+ [2025-03-23 21:35:01,819][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-224
390
+ [2025-03-23 21:35:02,175][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--phi-4/snapshots/187ef0342fff0eb3333be9f00389385e95ef0b61/config.json
391
+ [2025-03-23 21:35:02,175][transformers.configuration_utils][INFO] - Model config Phi3Config {
392
+ "architectures": [
393
+ "Phi3ForCausalLM"
394
+ ],
395
+ "attention_bias": false,
396
+ "attention_dropout": 0.0,
397
+ "bos_token_id": 100257,
398
+ "embd_pdrop": 0.0,
399
+ "eos_token_id": 100265,
400
+ "hidden_act": "silu",
401
+ "hidden_size": 5120,
402
+ "initializer_range": 0.02,
403
+ "intermediate_size": 17920,
404
+ "max_position_embeddings": 16384,
405
+ "model_type": "phi3",
406
+ "num_attention_heads": 40,
407
+ "num_hidden_layers": 40,
408
+ "num_key_value_heads": 10,
409
+ "original_max_position_embeddings": 16384,
410
+ "pad_token_id": 100349,
411
+ "partial_rotary_factor": 1.0,
412
+ "resid_pdrop": 0.0,
413
+ "rms_norm_eps": 1e-05,
414
+ "rope_scaling": null,
415
+ "rope_theta": 250000,
416
+ "sliding_window": null,
417
+ "tie_word_embeddings": false,
418
+ "torch_dtype": "bfloat16",
419
+ "transformers_version": "4.50.0",
420
+ "use_cache": true,
421
+ "vocab_size": 100352
422
+ }
423
+
424
+ [2025-03-23 21:35:02,951][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-160] due to args.save_total_limit
425
+ [2025-03-23 21:35:02,986][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-192] due to args.save_total_limit
426
+ [2025-03-23 21:42:02,769][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id. If essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
427
+ [2025-03-23 21:42:02,772][transformers.trainer][INFO] -
428
+ ***** Running Evaluation *****
429
+ [2025-03-23 21:42:02,772][transformers.trainer][INFO] - Num examples = 132
430
+ [2025-03-23 21:42:02,772][transformers.trainer][INFO] - Batch size = 16
431
+ [2025-03-23 21:42:29,172][transformers][INFO] - {'accuracy': 0.5833333333333334, 'RMSE': 28.91995221924885, 'QWK': 0.402048319327731, 'HDIV': 0.007575757575757569, 'Macro_F1': 0.2524590163934426, 'Micro_F1': 0.5833333333333334, 'Weighted_F1': 0.5384997516145057, 'Macro_F1_(ignoring_nan)': np.float64(0.6311475409836065)}
432
+ [2025-03-23 21:42:29,172][tensorboardX.summary][INFO] - Summary name eval/Macro_F1_(ignoring_nan) is illegal; using eval/Macro_F1__ignoring_nan_ instead.
433
+ [2025-03-23 21:42:29,175][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-256
434
+ [2025-03-23 21:42:29,463][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--phi-4/snapshots/187ef0342fff0eb3333be9f00389385e95ef0b61/config.json
435
+ [2025-03-23 21:42:29,464][transformers.configuration_utils][INFO] - Model config Phi3Config {
436
+ "architectures": [
437
+ "Phi3ForCausalLM"
438
+ ],
439
+ "attention_bias": false,
440
+ "attention_dropout": 0.0,
441
+ "bos_token_id": 100257,
442
+ "embd_pdrop": 0.0,
443
+ "eos_token_id": 100265,
444
+ "hidden_act": "silu",
445
+ "hidden_size": 5120,
446
+ "initializer_range": 0.02,
447
+ "intermediate_size": 17920,
448
+ "max_position_embeddings": 16384,
449
+ "model_type": "phi3",
450
+ "num_attention_heads": 40,
451
+ "num_hidden_layers": 40,
452
+ "num_key_value_heads": 10,
453
+ "original_max_position_embeddings": 16384,
454
+ "pad_token_id": 100349,
455
+ "partial_rotary_factor": 1.0,
456
+ "resid_pdrop": 0.0,
457
+ "rms_norm_eps": 1e-05,
458
+ "rope_scaling": null,
459
+ "rope_theta": 250000,
460
+ "sliding_window": null,
461
+ "tie_word_embeddings": false,
462
+ "torch_dtype": "bfloat16",
463
+ "transformers_version": "4.50.0",
464
+ "use_cache": true,
465
+ "vocab_size": 100352
466
+ }
467
+
468
+ [2025-03-23 21:49:29,409][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id. If essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
469
+ [2025-03-23 21:49:29,411][transformers.trainer][INFO] -
470
+ ***** Running Evaluation *****
471
+ [2025-03-23 21:49:29,411][transformers.trainer][INFO] - Num examples = 132
472
+ [2025-03-23 21:49:29,411][transformers.trainer][INFO] - Batch size = 16
473
+ [2025-03-23 21:49:55,727][transformers][INFO] - {'accuracy': 0.5984848484848485, 'RMSE': 28.4977404739606, 'QWK': 0.4918409561020455, 'HDIV': 0.007575757575757569, 'Macro_F1': 0.3331918158567775, 'Micro_F1': 0.5984848484848485, 'Weighted_F1': 0.5808866155157715, 'Macro_F1_(ignoring_nan)': np.float64(0.5553196930946291)}
474
+ [2025-03-23 21:49:55,727][tensorboardX.summary][INFO] - Summary name eval/Macro_F1_(ignoring_nan) is illegal; using eval/Macro_F1__ignoring_nan_ instead.
475
+ [2025-03-23 21:49:55,731][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-288
476
+ [2025-03-23 21:49:56,023][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--phi-4/snapshots/187ef0342fff0eb3333be9f00389385e95ef0b61/config.json
477
+ [2025-03-23 21:49:56,023][transformers.configuration_utils][INFO] - Model config Phi3Config {
478
+ "architectures": [
479
+ "Phi3ForCausalLM"
480
+ ],
481
+ "attention_bias": false,
482
+ "attention_dropout": 0.0,
483
+ "bos_token_id": 100257,
484
+ "embd_pdrop": 0.0,
485
+ "eos_token_id": 100265,
486
+ "hidden_act": "silu",
487
+ "hidden_size": 5120,
488
+ "initializer_range": 0.02,
489
+ "intermediate_size": 17920,
490
+ "max_position_embeddings": 16384,
491
+ "model_type": "phi3",
492
+ "num_attention_heads": 40,
493
+ "num_hidden_layers": 40,
494
+ "num_key_value_heads": 10,
495
+ "original_max_position_embeddings": 16384,
496
+ "pad_token_id": 100349,
497
+ "partial_rotary_factor": 1.0,
498
+ "resid_pdrop": 0.0,
499
+ "rms_norm_eps": 1e-05,
500
+ "rope_scaling": null,
501
+ "rope_theta": 250000,
502
+ "sliding_window": null,
503
+ "tie_word_embeddings": false,
504
+ "torch_dtype": "bfloat16",
505
+ "transformers_version": "4.50.0",
506
+ "use_cache": true,
507
+ "vocab_size": 100352
508
+ }
509
+
510
+ [2025-03-23 21:49:56,763][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-224] due to args.save_total_limit
511
+ [2025-03-23 21:49:56,810][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-256] due to args.save_total_limit
512
+ [2025-03-23 21:56:56,055][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id. If essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
513
+ [2025-03-23 21:56:56,057][transformers.trainer][INFO] -
514
+ ***** Running Evaluation *****
515
+ [2025-03-23 21:56:56,057][transformers.trainer][INFO] - Num examples = 132
516
+ [2025-03-23 21:56:56,057][transformers.trainer][INFO] - Batch size = 16
517
+ [2025-03-23 21:57:22,544][transformers][INFO] - {'accuracy': 0.5984848484848485, 'RMSE': 28.4977404739606, 'QWK': 0.4536693847294292, 'HDIV': 0.007575757575757569, 'Macro_F1': 0.3165505532462971, 'Micro_F1': 0.5984848484848485, 'Weighted_F1': 0.5739240659034582, 'Macro_F1_(ignoring_nan)': np.float64(0.5275842554104951)}
518
+ [2025-03-23 21:57:22,545][tensorboardX.summary][INFO] - Summary name eval/Macro_F1_(ignoring_nan) is illegal; using eval/Macro_F1__ignoring_nan_ instead.
519
+ [2025-03-23 21:57:22,548][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-320
520
+ [2025-03-23 21:57:22,867][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--phi-4/snapshots/187ef0342fff0eb3333be9f00389385e95ef0b61/config.json
521
+ [2025-03-23 21:57:22,868][transformers.configuration_utils][INFO] - Model config Phi3Config {
522
+ "architectures": [
523
+ "Phi3ForCausalLM"
524
+ ],
525
+ "attention_bias": false,
526
+ "attention_dropout": 0.0,
527
+ "bos_token_id": 100257,
528
+ "embd_pdrop": 0.0,
529
+ "eos_token_id": 100265,
530
+ "hidden_act": "silu",
531
+ "hidden_size": 5120,
532
+ "initializer_range": 0.02,
533
+ "intermediate_size": 17920,
534
+ "max_position_embeddings": 16384,
535
+ "model_type": "phi3",
536
+ "num_attention_heads": 40,
537
+ "num_hidden_layers": 40,
538
+ "num_key_value_heads": 10,
539
+ "original_max_position_embeddings": 16384,
540
+ "pad_token_id": 100349,
541
+ "partial_rotary_factor": 1.0,
542
+ "resid_pdrop": 0.0,
543
+ "rms_norm_eps": 1e-05,
544
+ "rope_scaling": null,
545
+ "rope_theta": 250000,
546
+ "sliding_window": null,
547
+ "tie_word_embeddings": false,
548
+ "torch_dtype": "bfloat16",
549
+ "transformers_version": "4.50.0",
550
+ "use_cache": true,
551
+ "vocab_size": 100352
552
+ }
553
+
554
+ [2025-03-23 22:04:23,489][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id. If essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
555
+ [2025-03-23 22:04:23,491][transformers.trainer][INFO] -
556
+ ***** Running Evaluation *****
557
+ [2025-03-23 22:04:23,491][transformers.trainer][INFO] - Num examples = 132
558
+ [2025-03-23 22:04:23,491][transformers.trainer][INFO] - Batch size = 16
559
+ [2025-03-23 22:04:49,840][transformers][INFO] - {'accuracy': 0.5833333333333334, 'RMSE': 28.91995221924885, 'QWK': 0.44558071585098613, 'HDIV': 0.007575757575757569, 'Macro_F1': 0.30430075187969924, 'Micro_F1': 0.5833333333333334, 'Weighted_F1': 0.5574436090225563, 'Macro_F1_(ignoring_nan)': np.float64(0.5071679197994987)}
560
+ [2025-03-23 22:04:49,841][tensorboardX.summary][INFO] - Summary name eval/Macro_F1_(ignoring_nan) is illegal; using eval/Macro_F1__ignoring_nan_ instead.
561
+ [2025-03-23 22:04:49,844][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-352
562
+ [2025-03-23 22:04:50,184][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--phi-4/snapshots/187ef0342fff0eb3333be9f00389385e95ef0b61/config.json
563
+ [2025-03-23 22:04:50,185][transformers.configuration_utils][INFO] - Model config Phi3Config {
564
+ "architectures": [
565
+ "Phi3ForCausalLM"
566
+ ],
567
+ "attention_bias": false,
568
+ "attention_dropout": 0.0,
569
+ "bos_token_id": 100257,
570
+ "embd_pdrop": 0.0,
571
+ "eos_token_id": 100265,
572
+ "hidden_act": "silu",
573
+ "hidden_size": 5120,
574
+ "initializer_range": 0.02,
575
+ "intermediate_size": 17920,
576
+ "max_position_embeddings": 16384,
577
+ "model_type": "phi3",
578
+ "num_attention_heads": 40,
579
+ "num_hidden_layers": 40,
580
+ "num_key_value_heads": 10,
581
+ "original_max_position_embeddings": 16384,
582
+ "pad_token_id": 100349,
583
+ "partial_rotary_factor": 1.0,
584
+ "resid_pdrop": 0.0,
585
+ "rms_norm_eps": 1e-05,
586
+ "rope_scaling": null,
587
+ "rope_theta": 250000,
588
+ "sliding_window": null,
589
+ "tie_word_embeddings": false,
590
+ "torch_dtype": "bfloat16",
591
+ "transformers_version": "4.50.0",
592
+ "use_cache": true,
593
+ "vocab_size": 100352
594
+ }
595
+
596
+ [2025-03-23 22:04:50,944][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-320] due to args.save_total_limit
597
+ [2025-03-23 22:11:50,060][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id. If essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
598
+ [2025-03-23 22:11:50,062][transformers.trainer][INFO] -
599
+ ***** Running Evaluation *****
600
+ [2025-03-23 22:11:50,062][transformers.trainer][INFO] - Num examples = 132
601
+ [2025-03-23 22:11:50,062][transformers.trainer][INFO] - Batch size = 16
602
+ [2025-03-23 22:12:16,556][transformers][INFO] - {'accuracy': 0.5833333333333334, 'RMSE': 28.91995221924885, 'QWK': 0.40126216145148574, 'HDIV': 0.007575757575757569, 'Macro_F1': 0.2525806451612903, 'Micro_F1': 0.5833333333333334, 'Weighted_F1': 0.5380254154447703, 'Macro_F1_(ignoring_nan)': np.float64(0.6314516129032258)}
603
+ [2025-03-23 22:12:16,556][tensorboardX.summary][INFO] - Summary name eval/Macro_F1_(ignoring_nan) is illegal; using eval/Macro_F1__ignoring_nan_ instead.
604
+ [2025-03-23 22:12:16,559][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-384
605
+ [2025-03-23 22:12:16,980][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--phi-4/snapshots/187ef0342fff0eb3333be9f00389385e95ef0b61/config.json
606
+ [2025-03-23 22:12:16,980][transformers.configuration_utils][INFO] - Model config Phi3Config {
607
+ "architectures": [
608
+ "Phi3ForCausalLM"
609
+ ],
610
+ "attention_bias": false,
611
+ "attention_dropout": 0.0,
612
+ "bos_token_id": 100257,
613
+ "embd_pdrop": 0.0,
614
+ "eos_token_id": 100265,
615
+ "hidden_act": "silu",
616
+ "hidden_size": 5120,
617
+ "initializer_range": 0.02,
618
+ "intermediate_size": 17920,
619
+ "max_position_embeddings": 16384,
620
+ "model_type": "phi3",
621
+ "num_attention_heads": 40,
622
+ "num_hidden_layers": 40,
623
+ "num_key_value_heads": 10,
624
+ "original_max_position_embeddings": 16384,
625
+ "pad_token_id": 100349,
626
+ "partial_rotary_factor": 1.0,
627
+ "resid_pdrop": 0.0,
628
+ "rms_norm_eps": 1e-05,
629
+ "rope_scaling": null,
630
+ "rope_theta": 250000,
631
+ "sliding_window": null,
632
+ "tie_word_embeddings": false,
633
+ "torch_dtype": "bfloat16",
634
+ "transformers_version": "4.50.0",
635
+ "use_cache": true,
636
+ "vocab_size": 100352
637
+ }
638
+
639
+ [2025-03-23 22:12:17,746][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-352] due to args.save_total_limit
640
+ [2025-03-23 22:19:16,915][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id. If essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
641
+ [2025-03-23 22:19:16,918][transformers.trainer][INFO] -
642
+ ***** Running Evaluation *****
643
+ [2025-03-23 22:19:16,918][transformers.trainer][INFO] - Num examples = 132
644
+ [2025-03-23 22:19:16,918][transformers.trainer][INFO] - Batch size = 16
645
+ [2025-03-23 22:19:43,392][transformers][INFO] - {'accuracy': 0.5681818181818182, 'RMSE': 29.336088024923512, 'QWK': 0.5011709601873537, 'HDIV': 0.007575757575757569, 'Macro_F1': 0.3423423989280405, 'Micro_F1': 0.5681818181818182, 'Weighted_F1': 0.5600188811042401, 'Macro_F1_(ignoring_nan)': np.float64(0.5705706648800676)}
646
+ [2025-03-23 22:19:43,393][tensorboardX.summary][INFO] - Summary name eval/Macro_F1_(ignoring_nan) is illegal; using eval/Macro_F1__ignoring_nan_ instead.
647
+ [2025-03-23 22:19:43,396][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-416
648
+ [2025-03-23 22:19:43,732][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--phi-4/snapshots/187ef0342fff0eb3333be9f00389385e95ef0b61/config.json
649
+ [2025-03-23 22:19:43,733][transformers.configuration_utils][INFO] - Model config Phi3Config {
650
+ "architectures": [
651
+ "Phi3ForCausalLM"
652
+ ],
653
+ "attention_bias": false,
654
+ "attention_dropout": 0.0,
655
+ "bos_token_id": 100257,
656
+ "embd_pdrop": 0.0,
657
+ "eos_token_id": 100265,
658
+ "hidden_act": "silu",
659
+ "hidden_size": 5120,
660
+ "initializer_range": 0.02,
661
+ "intermediate_size": 17920,
662
+ "max_position_embeddings": 16384,
663
+ "model_type": "phi3",
664
+ "num_attention_heads": 40,
665
+ "num_hidden_layers": 40,
666
+ "num_key_value_heads": 10,
667
+ "original_max_position_embeddings": 16384,
668
+ "pad_token_id": 100349,
669
+ "partial_rotary_factor": 1.0,
670
+ "resid_pdrop": 0.0,
671
+ "rms_norm_eps": 1e-05,
672
+ "rope_scaling": null,
673
+ "rope_theta": 250000,
674
+ "sliding_window": null,
675
+ "tie_word_embeddings": false,
676
+ "torch_dtype": "bfloat16",
677
+ "transformers_version": "4.50.0",
678
+ "use_cache": true,
679
+ "vocab_size": 100352
680
+ }
681
+
682
+ [2025-03-23 22:19:44,597][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-288] due to args.save_total_limit
683
+ [2025-03-23 22:19:44,648][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-384] due to args.save_total_limit
684
+ [2025-03-23 22:26:45,026][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id. If essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
685
+ [2025-03-23 22:26:45,029][transformers.trainer][INFO] -
686
+ ***** Running Evaluation *****
687
+ [2025-03-23 22:26:45,029][transformers.trainer][INFO] - Num examples = 132
688
+ [2025-03-23 22:26:45,029][transformers.trainer][INFO] - Batch size = 16
689
+ [2025-03-23 22:27:11,429][transformers][INFO] - {'accuracy': 0.5833333333333334, 'RMSE': 28.91995221924885, 'QWK': 0.47181628392484354, 'HDIV': 0.007575757575757569, 'Macro_F1': 0.3308645677161419, 'Micro_F1': 0.5833333333333334, 'Weighted_F1': 0.56964396589584, 'Macro_F1_(ignoring_nan)': np.float64(0.5514409461935699)}
690
+ [2025-03-23 22:27:11,430][tensorboardX.summary][INFO] - Summary name eval/Macro_F1_(ignoring_nan) is illegal; using eval/Macro_F1__ignoring_nan_ instead.
691
+ [2025-03-23 22:27:11,432][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-448
692
+ [2025-03-23 22:34:21,711][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id. If essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
693
+ [2025-03-23 22:34:21,714][transformers.trainer][INFO] -
694
+ ***** Running Evaluation *****
695
+ [2025-03-23 22:34:21,714][transformers.trainer][INFO] - Num examples = 132
696
+ [2025-03-23 22:34:21,714][transformers.trainer][INFO] - Batch size = 16
697
+ [2025-03-23 22:34:48,074][transformers][INFO] - {'accuracy': 0.5757575757575758, 'RMSE': 29.128763250176767, 'QWK': 0.4815396700706991, 'HDIV': 0.007575757575757569, 'Macro_F1': 0.3335450732209876, 'Micro_F1': 0.5757575757575758, 'Weighted_F1': 0.5647085320791996, 'Macro_F1_(ignoring_nan)': np.float64(0.5559084553683126)}
698
+ [2025-03-23 22:34:48,075][tensorboardX.summary][INFO] - Summary name eval/Macro_F1_(ignoring_nan) is illegal; using eval/Macro_F1__ignoring_nan_ instead.
699
+ [2025-03-23 22:34:48,076][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-480
700
+ [2025-03-23 22:34:48,464][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--phi-4/snapshots/187ef0342fff0eb3333be9f00389385e95ef0b61/config.json
701
+ [2025-03-23 22:34:48,465][transformers.configuration_utils][INFO] - Model config Phi3Config {
702
+ "architectures": [
703
+ "Phi3ForCausalLM"
704
+ ],
705
+ "attention_bias": false,
706
+ "attention_dropout": 0.0,
707
+ "bos_token_id": 100257,
708
+ "embd_pdrop": 0.0,
709
+ "eos_token_id": 100265,
710
+ "hidden_act": "silu",
711
+ "hidden_size": 5120,
712
+ "initializer_range": 0.02,
713
+ "intermediate_size": 17920,
714
+ "max_position_embeddings": 16384,
715
+ "model_type": "phi3",
716
+ "num_attention_heads": 40,
717
+ "num_hidden_layers": 40,
718
+ "num_key_value_heads": 10,
719
+ "original_max_position_embeddings": 16384,
720
+ "pad_token_id": 100349,
721
+ "partial_rotary_factor": 1.0,
722
+ "resid_pdrop": 0.0,
723
+ "rms_norm_eps": 1e-05,
724
+ "rope_scaling": null,
725
+ "rope_theta": 250000,
726
+ "sliding_window": null,
727
+ "tie_word_embeddings": false,
728
+ "torch_dtype": "bfloat16",
729
+ "transformers_version": "4.50.0",
730
+ "use_cache": true,
731
+ "vocab_size": 100352
732
+ }
733
+
734
+ [2025-03-23 22:34:49,169][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-448] due to args.save_total_limit
735
+ [2025-03-23 22:41:49,150][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id. If essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
736
+ [2025-03-23 22:41:49,152][transformers.trainer][INFO] -
737
+ ***** Running Evaluation *****
738
+ [2025-03-23 22:41:49,152][transformers.trainer][INFO] - Num examples = 132
739
+ [2025-03-23 22:41:49,152][transformers.trainer][INFO] - Batch size = 16
740
+ [2025-03-23 22:42:15,502][transformers][INFO] - {'accuracy': 0.5984848484848485, 'RMSE': 28.4977404739606, 'QWK': 0.47344605858537747, 'HDIV': 0.007575757575757569, 'Macro_F1': 0.31150793650793646, 'Micro_F1': 0.5984848484848485, 'Weighted_F1': 0.5776815776815778, 'Macro_F1_(ignoring_nan)': np.float64(0.5191798941798941)}
741
+ [2025-03-23 22:42:15,503][tensorboardX.summary][INFO] - Summary name eval/Macro_F1_(ignoring_nan) is illegal; using eval/Macro_F1__ignoring_nan_ instead.
742
+ [2025-03-23 22:42:15,506][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-512
743
+ [2025-03-23 22:42:16,316][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--phi-4/snapshots/187ef0342fff0eb3333be9f00389385e95ef0b61/config.json
744
+ [2025-03-23 22:42:16,317][transformers.configuration_utils][INFO] - Model config Phi3Config {
745
+ "architectures": [
746
+ "Phi3ForCausalLM"
747
+ ],
748
+ "attention_bias": false,
749
+ "attention_dropout": 0.0,
750
+ "bos_token_id": 100257,
751
+ "embd_pdrop": 0.0,
752
+ "eos_token_id": 100265,
753
+ "hidden_act": "silu",
754
+ "hidden_size": 5120,
755
+ "initializer_range": 0.02,
756
+ "intermediate_size": 17920,
757
+ "max_position_embeddings": 16384,
758
+ "model_type": "phi3",
759
+ "num_attention_heads": 40,
760
+ "num_hidden_layers": 40,
761
+ "num_key_value_heads": 10,
762
+ "original_max_position_embeddings": 16384,
763
+ "pad_token_id": 100349,
764
+ "partial_rotary_factor": 1.0,
765
+ "resid_pdrop": 0.0,
766
+ "rms_norm_eps": 1e-05,
767
+ "rope_scaling": null,
768
+ "rope_theta": 250000,
769
+ "sliding_window": null,
770
+ "tie_word_embeddings": false,
771
+ "torch_dtype": "bfloat16",
772
+ "transformers_version": "4.50.0",
773
+ "use_cache": true,
774
+ "vocab_size": 100352
775
+ }
776
+
777
+ [2025-03-23 22:42:17,055][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-480] due to args.save_total_limit
778
+ [2025-03-23 22:49:17,118][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id. If essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
779
+ [2025-03-23 22:49:17,120][transformers.trainer][INFO] -
780
+ ***** Running Evaluation *****
781
+ [2025-03-23 22:49:17,120][transformers.trainer][INFO] - Num examples = 132
782
+ [2025-03-23 22:49:17,120][transformers.trainer][INFO] - Batch size = 16
783
+ [2025-03-23 22:49:43,519][transformers][INFO] - {'accuracy': 0.5833333333333334, 'RMSE': 28.91995221924885, 'QWK': 0.48693105002253256, 'HDIV': 0.007575757575757569, 'Macro_F1': 0.3384561403508772, 'Micro_F1': 0.5833333333333334, 'Weighted_F1': 0.5721424774056353, 'Macro_F1_(ignoring_nan)': np.float64(0.564093567251462)}
784
+ [2025-03-23 22:49:43,520][tensorboardX.summary][INFO] - Summary name eval/Macro_F1_(ignoring_nan) is illegal; using eval/Macro_F1__ignoring_nan_ instead.
785
+ [2025-03-23 22:49:43,523][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-544
786
+ [2025-03-23 22:49:43,837][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--phi-4/snapshots/187ef0342fff0eb3333be9f00389385e95ef0b61/config.json
787
+ [2025-03-23 22:49:43,837][transformers.configuration_utils][INFO] - Model config Phi3Config {
788
+ "architectures": [
789
+ "Phi3ForCausalLM"
790
+ ],
791
+ "attention_bias": false,
792
+ "attention_dropout": 0.0,
793
+ "bos_token_id": 100257,
794
+ "embd_pdrop": 0.0,
795
+ "eos_token_id": 100265,
796
+ "hidden_act": "silu",
797
+ "hidden_size": 5120,
798
+ "initializer_range": 0.02,
799
+ "intermediate_size": 17920,
800
+ "max_position_embeddings": 16384,
801
+ "model_type": "phi3",
802
+ "num_attention_heads": 40,
803
+ "num_hidden_layers": 40,
804
+ "num_key_value_heads": 10,
805
+ "original_max_position_embeddings": 16384,
806
+ "pad_token_id": 100349,
807
+ "partial_rotary_factor": 1.0,
808
+ "resid_pdrop": 0.0,
809
+ "rms_norm_eps": 1e-05,
810
+ "rope_scaling": null,
811
+ "rope_theta": 250000,
812
+ "sliding_window": null,
813
+ "tie_word_embeddings": false,
814
+ "torch_dtype": "bfloat16",
815
+ "transformers_version": "4.50.0",
816
+ "use_cache": true,
817
+ "vocab_size": 100352
818
+ }
819
+
820
+ [2025-03-23 22:49:44,524][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-512] due to args.save_total_limit
821
+ [2025-03-23 22:56:43,629][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id. If essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
822
+ [2025-03-23 22:56:43,631][transformers.trainer][INFO] -
823
+ ***** Running Evaluation *****
824
+ [2025-03-23 22:56:43,631][transformers.trainer][INFO] - Num examples = 132
825
+ [2025-03-23 22:56:43,631][transformers.trainer][INFO] - Batch size = 16
826
+ [2025-03-23 22:57:09,913][transformers][INFO] - {'accuracy': 0.5984848484848485, 'RMSE': 28.4977404739606, 'QWK': 0.5040376850605652, 'HDIV': 0.007575757575757569, 'Macro_F1': 0.3435825471698113, 'Micro_F1': 0.5984848484848485, 'Weighted_F1': 0.5861349342481418, 'Macro_F1_(ignoring_nan)': np.float64(0.5726375786163521)}
827
+ [2025-03-23 22:57:09,914][tensorboardX.summary][INFO] - Summary name eval/Macro_F1_(ignoring_nan) is illegal; using eval/Macro_F1__ignoring_nan_ instead.
828
+ [2025-03-23 22:57:09,916][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-576
829
+ [2025-03-23 22:57:11,876][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--phi-4/snapshots/187ef0342fff0eb3333be9f00389385e95ef0b61/config.json
830
+ [2025-03-23 22:57:11,877][transformers.configuration_utils][INFO] - Model config Phi3Config {
831
+ "architectures": [
832
+ "Phi3ForCausalLM"
833
+ ],
834
+ "attention_bias": false,
835
+ "attention_dropout": 0.0,
836
+ "bos_token_id": 100257,
837
+ "embd_pdrop": 0.0,
838
+ "eos_token_id": 100265,
839
+ "hidden_act": "silu",
840
+ "hidden_size": 5120,
841
+ "initializer_range": 0.02,
842
+ "intermediate_size": 17920,
843
+ "max_position_embeddings": 16384,
844
+ "model_type": "phi3",
845
+ "num_attention_heads": 40,
846
+ "num_hidden_layers": 40,
847
+ "num_key_value_heads": 10,
848
+ "original_max_position_embeddings": 16384,
849
+ "pad_token_id": 100349,
850
+ "partial_rotary_factor": 1.0,
851
+ "resid_pdrop": 0.0,
852
+ "rms_norm_eps": 1e-05,
853
+ "rope_scaling": null,
854
+ "rope_theta": 250000,
855
+ "sliding_window": null,
856
+ "tie_word_embeddings": false,
857
+ "torch_dtype": "bfloat16",
858
+ "transformers_version": "4.50.0",
859
+ "use_cache": true,
860
+ "vocab_size": 100352
861
+ }
862
+
863
+ [2025-03-23 22:57:12,612][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-416] due to args.save_total_limit
864
+ [2025-03-23 22:57:12,654][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-544] due to args.save_total_limit
865
+ [2025-03-23 23:04:12,951][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id. If essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
866
+ [2025-03-23 23:04:12,954][transformers.trainer][INFO] -
867
+ ***** Running Evaluation *****
868
+ [2025-03-23 23:04:12,954][transformers.trainer][INFO] - Num examples = 132
869
+ [2025-03-23 23:04:12,954][transformers.trainer][INFO] - Batch size = 16
870
+ [2025-03-23 23:04:39,310][transformers][INFO] - {'accuracy': 0.5833333333333334, 'RMSE': 28.91995221924885, 'QWK': 0.47181628392484354, 'HDIV': 0.007575757575757569, 'Macro_F1': 0.3308645677161419, 'Micro_F1': 0.5833333333333334, 'Weighted_F1': 0.56964396589584, 'Macro_F1_(ignoring_nan)': np.float64(0.5514409461935699)}
871
+ [2025-03-23 23:04:39,311][tensorboardX.summary][INFO] - Summary name eval/Macro_F1_(ignoring_nan) is illegal; using eval/Macro_F1__ignoring_nan_ instead.
872
+ [2025-03-23 23:04:39,313][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-608
873
+ [2025-03-23 23:04:39,620][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--phi-4/snapshots/187ef0342fff0eb3333be9f00389385e95ef0b61/config.json
874
+ [2025-03-23 23:04:39,621][transformers.configuration_utils][INFO] - Model config Phi3Config {
875
+ "architectures": [
876
+ "Phi3ForCausalLM"
877
+ ],
878
+ "attention_bias": false,
879
+ "attention_dropout": 0.0,
880
+ "bos_token_id": 100257,
881
+ "embd_pdrop": 0.0,
882
+ "eos_token_id": 100265,
883
+ "hidden_act": "silu",
884
+ "hidden_size": 5120,
885
+ "initializer_range": 0.02,
886
+ "intermediate_size": 17920,
887
+ "max_position_embeddings": 16384,
888
+ "model_type": "phi3",
889
+ "num_attention_heads": 40,
890
+ "num_hidden_layers": 40,
891
+ "num_key_value_heads": 10,
892
+ "original_max_position_embeddings": 16384,
893
+ "pad_token_id": 100349,
894
+ "partial_rotary_factor": 1.0,
895
+ "resid_pdrop": 0.0,
896
+ "rms_norm_eps": 1e-05,
897
+ "rope_scaling": null,
898
+ "rope_theta": 250000,
899
+ "sliding_window": null,
900
+ "tie_word_embeddings": false,
901
+ "torch_dtype": "bfloat16",
902
+ "transformers_version": "4.50.0",
903
+ "use_cache": true,
904
+ "vocab_size": 100352
905
+ }
906
+
907
+ [2025-03-23 23:07:22,340][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id. If essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
908
+ [2025-03-23 23:07:22,342][transformers.trainer][INFO] -
909
+ ***** Running Evaluation *****
910
+ [2025-03-23 23:07:22,342][transformers.trainer][INFO] - Num examples = 132
911
+ [2025-03-23 23:07:22,342][transformers.trainer][INFO] - Batch size = 16
912
+ [2025-03-23 23:07:48,658][transformers][INFO] - {'accuracy': 0.5833333333333334, 'RMSE': 28.91995221924885, 'QWK': 0.47181628392484354, 'HDIV': 0.007575757575757569, 'Macro_F1': 0.3308645677161419, 'Micro_F1': 0.5833333333333334, 'Weighted_F1': 0.56964396589584, 'Macro_F1_(ignoring_nan)': np.float64(0.5514409461935699)}
913
+ [2025-03-23 23:07:48,659][tensorboardX.summary][INFO] - Summary name eval/Macro_F1_(ignoring_nan) is illegal; using eval/Macro_F1__ignoring_nan_ instead.
914
+ [2025-03-23 23:07:48,661][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-620
915
+ [2025-03-23 23:07:49,135][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--phi-4/snapshots/187ef0342fff0eb3333be9f00389385e95ef0b61/config.json
916
+ [2025-03-23 23:07:49,136][transformers.configuration_utils][INFO] - Model config Phi3Config {
917
+ "architectures": [
918
+ "Phi3ForCausalLM"
919
+ ],
920
+ "attention_bias": false,
921
+ "attention_dropout": 0.0,
922
+ "bos_token_id": 100257,
923
+ "embd_pdrop": 0.0,
924
+ "eos_token_id": 100265,
925
+ "hidden_act": "silu",
926
+ "hidden_size": 5120,
927
+ "initializer_range": 0.02,
928
+ "intermediate_size": 17920,
929
+ "max_position_embeddings": 16384,
930
+ "model_type": "phi3",
931
+ "num_attention_heads": 40,
932
+ "num_hidden_layers": 40,
933
+ "num_key_value_heads": 10,
934
+ "original_max_position_embeddings": 16384,
935
+ "pad_token_id": 100349,
936
+ "partial_rotary_factor": 1.0,
937
+ "resid_pdrop": 0.0,
938
+ "rms_norm_eps": 1e-05,
939
+ "rope_scaling": null,
940
+ "rope_theta": 250000,
941
+ "sliding_window": null,
942
+ "tie_word_embeddings": false,
943
+ "torch_dtype": "bfloat16",
944
+ "transformers_version": "4.50.0",
945
+ "use_cache": true,
946
+ "vocab_size": 100352
947
+ }
948
+
949
+ [2025-03-23 23:07:49,714][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-608] due to args.save_total_limit
950
+ [2025-03-23 23:07:49,754][transformers.trainer][INFO] -
951
+
952
+ Training completed. Do not forget to share your model on huggingface.co/models =)
953
+
954
+
955
+ [2025-03-23 23:07:49,754][transformers.trainer][INFO] - Loading best model from /workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-576 (score: 0.5040376850605652).
956
+ [2025-03-23 23:07:49,908][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-03-23/20-41-58/results/phi4-balanced/C4/checkpoint-620] due to args.save_total_limit
957
+ [2025-03-23 23:07:49,954][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id. If essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
958
+ [2025-03-23 23:07:49,957][transformers.trainer][INFO] -
959
+ ***** Running Evaluation *****
960
+ [2025-03-23 23:07:49,957][transformers.trainer][INFO] - Num examples = 132
961
+ [2025-03-23 23:07:49,957][transformers.trainer][INFO] - Batch size = 16
962
+ [2025-03-23 23:08:16,365][transformers][INFO] - {'accuracy': 0.5984848484848485, 'RMSE': 28.4977404739606, 'QWK': 0.5040376850605652, 'HDIV': 0.007575757575757569, 'Macro_F1': 0.3435825471698113, 'Micro_F1': 0.5984848484848485, 'Weighted_F1': 0.5861349342481418, 'Macro_F1_(ignoring_nan)': np.float64(0.5726375786163521)}
963
+ [2025-03-23 23:08:16,368][tensorboardX.summary][INFO] - Summary name eval/Macro_F1_(ignoring_nan) is illegal; using eval/Macro_F1__ignoring_nan_ instead.
964
+ [2025-03-23 23:08:16,369][__main__][INFO] - Training completed successfully.
965
+ [2025-03-23 23:08:16,369][__main__][INFO] - Running on Test
966
+ [2025-03-23 23:08:16,369][transformers.trainer][INFO] - The following columns in the evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id. If essay_text, prompt, id_prompt, reference, grades, supporting_text, essay_year, id are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
967
+ [2025-03-23 23:08:16,371][transformers.trainer][INFO] -
968
+ ***** Running Evaluation *****
969
+ [2025-03-23 23:08:16,371][transformers.trainer][INFO] - Num examples = 138
970
+ [2025-03-23 23:08:16,371][transformers.trainer][INFO] - Batch size = 16
971
+ [2025-03-23 23:08:44,397][transformers][INFO] - {'accuracy': 0.7028985507246377, 'RMSE': 24.55399256179405, 'QWK': 0.579465541490858, 'HDIV': 0.007246376811594235, 'Macro_F1': 0.27468966776195697, 'Micro_F1': 0.7028985507246377, 'Weighted_F1': 0.6761155293109196, 'Macro_F1_(ignoring_nan)': np.float64(0.3296276013143483)}
972
+ [2025-03-23 23:08:44,398][tensorboardX.summary][INFO] - Summary name eval/Macro_F1_(ignoring_nan) is illegal; using eval/Macro_F1__ignoring_nan_ instead.
973
+ [2025-03-23 23:08:44,400][transformers.trainer][INFO] - Saving model checkpoint to ./results/phi4-balanced/C4/best_model
974
+ [2025-03-23 23:08:44,680][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--phi-4/snapshots/187ef0342fff0eb3333be9f00389385e95ef0b61/config.json
975
+ [2025-03-23 23:08:44,681][transformers.configuration_utils][INFO] - Model config Phi3Config {
976
+ "architectures": [
977
+ "Phi3ForCausalLM"
978
+ ],
979
+ "attention_bias": false,
980
+ "attention_dropout": 0.0,
981
+ "bos_token_id": 100257,
982
+ "embd_pdrop": 0.0,
983
+ "eos_token_id": 100265,
984
+ "hidden_act": "silu",
985
+ "hidden_size": 5120,
986
+ "initializer_range": 0.02,
987
+ "intermediate_size": 17920,
988
+ "max_position_embeddings": 16384,
989
+ "model_type": "phi3",
990
+ "num_attention_heads": 40,
991
+ "num_hidden_layers": 40,
992
+ "num_key_value_heads": 10,
993
+ "original_max_position_embeddings": 16384,
994
+ "pad_token_id": 100349,
995
+ "partial_rotary_factor": 1.0,
996
+ "resid_pdrop": 0.0,
997
+ "rms_norm_eps": 1e-05,
998
+ "rope_scaling": null,
999
+ "rope_theta": 250000,
1000
+ "sliding_window": null,
1001
+ "tie_word_embeddings": false,
1002
+ "torch_dtype": "bfloat16",
1003
+ "transformers_version": "4.50.0",
1004
+ "use_cache": true,
1005
+ "vocab_size": 100352
1006
+ }
1007
+
1008
+ [2025-03-23 23:08:45,141][__main__][INFO] - Fine Tuning Finished.
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbfdfe3dca0425b9fe06367acde7660a01e10437ac0937a022b0b6e8902fb8e3
3
+ size 5432