ybornachot commited on
Commit
e712656
·
1 Parent(s): 010db3f

fix: ran cells

Browse files
Files changed (1) hide show
  1. notebooks/03_fine_tuning.ipynb +43 -64
notebooks/03_fine_tuning.ipynb CHANGED
@@ -1032,7 +1032,7 @@
1032
  },
1033
  {
1034
  "cell_type": "code",
1035
- "execution_count": null,
1036
  "metadata": {},
1037
  "outputs": [
1038
  {
@@ -1041,84 +1041,63 @@
1041
  "text": [
1042
  "Starting training...\n",
1043
  "Training for 32 steps with 2 gradient accumulation steps\n",
1044
- "\n"
1045
- ]
1046
- },
1047
- {
1048
- "name": "stderr",
1049
- "output_type": "stream",
1050
- "text": [
1051
- "/home/y-bornachot/venvs/ntv3-env/lib/python3.12/site-packages/torch/amp/autocast_mode.py:287: UserWarning: In CPU autocast, but the target dtype is not supported. Disabling autocast.\n",
1052
- "CPU Autocast only supports dtype of torch.bfloat16, torch.float16 currently.\n",
1053
- " warnings.warn(error_message)\n",
1054
- "/home/y-bornachot/venvs/ntv3-env/lib/python3.12/site-packages/torchmetrics/utilities/prints.py:43: UserWarning: The variance of predictions or target is close to zero. This can cause instability in Pearson correlationcoefficient, leading to wrong results. Consider re-scaling the input if possible or computing using alarger dtype (currently using torch.float32). Setting the correlation coefficient to nan.\n",
1055
- " warnings.warn(*args, **kwargs)\n",
1056
- "/tmp/ipykernel_1758159/1960846655.py:68: RuntimeWarning: Mean of empty slice\n",
1057
- " metrics_dict[\"metrics_scaled/mean/pearson\"] = np.nanmean(correlations_scaled)\n",
1058
- "/tmp/ipykernel_1758159/1960846655.py:77: RuntimeWarning: Mean of empty slice\n",
1059
- " metrics_dict[\"metrics_raw/mean/pearson\"] = np.nanmean(correlations_raw)\n"
1060
- ]
1061
- },
1062
- {
1063
- "name": "stdout",
1064
- "output_type": "stream",
1065
- "text": [
1066
- "Step 1/32 | Loss: 0.8378 | Mean Pearson: nan | LR: 1.17e-09 | Tokens: 4,096\n",
1067
  "\n",
1068
  "Running validation at step 0...\n",
1069
- " Validation Loss: 0.5279\n",
1070
- " Validation Mean Pearson: -0.0192\n",
1071
- " ENCFF884LDL/pearson: -0.0192\n",
1072
- "Step 3/32 | Loss: 0.4650 | Mean Pearson: -0.0149 | LR: 2.50e-09 | Tokens: 12,288\n",
1073
- "Step 5/32 | Loss: 0.3369 | Mean Pearson: -0.1350 | LR: 2.41e-09 | Tokens: 20,480\n",
1074
  "\n",
1075
  "Running validation at step 4...\n",
1076
- " Validation Loss: 0.3878\n",
1077
- " Validation Mean Pearson: -0.1298\n",
1078
- " ENCFF884LDL/pearson: -0.1298\n",
1079
- "Step 7/32 | Loss: 0.3609 | Mean Pearson: -0.0102 | LR: 2.32e-09 | Tokens: 28,672\n",
1080
- "Step 9/32 | Loss: 0.3301 | Mean Pearson: -0.0902 | LR: 2.23e-09 | Tokens: 36,864\n",
1081
  "\n",
1082
  "Running validation at step 8...\n",
1083
- " Validation Loss: 0.4743\n",
1084
- " Validation Mean Pearson: -0.0739\n",
1085
- " ENCFF884LDL/pearson: -0.0739\n",
1086
- "Step 11/32 | Loss: 0.3905 | Mean Pearson: -0.0113 | LR: 2.13e-09 | Tokens: 45,056\n",
1087
- "Step 13/32 | Loss: 0.3181 | Mean Pearson: -0.1564 | LR: 2.02e-09 | Tokens: 53,248\n",
1088
  "\n",
1089
  "Running validation at step 12...\n",
1090
- " Validation Loss: 0.3337\n",
1091
- " Validation Mean Pearson: -0.0650\n",
1092
- " ENCFF884LDL/pearson: -0.0650\n",
1093
- "Step 15/32 | Loss: 0.3638 | Mean Pearson: 0.0295 | LR: 1.91e-09 | Tokens: 61,440\n",
1094
- "Step 17/32 | Loss: 0.4170 | Mean Pearson: -0.0442 | LR: 1.80e-09 | Tokens: 69,632\n",
1095
  "\n",
1096
  "Running validation at step 16...\n",
1097
- " Validation Loss: 0.7969\n",
1098
- " Validation Mean Pearson: -0.0304\n",
1099
- " ENCFF884LDL/pearson: -0.0304\n",
1100
- "Step 19/32 | Loss: 0.5033 | Mean Pearson: -0.0173 | LR: 1.67e-09 | Tokens: 77,824\n",
1101
- "Step 21/32 | Loss: 0.4084 | Mean Pearson: -0.0516 | LR: 1.54e-09 | Tokens: 86,016\n",
1102
  "\n",
1103
  "Running validation at step 20...\n",
1104
- " Validation Loss: 0.3475\n",
1105
- " Validation Mean Pearson: -0.3040\n",
1106
- " ENCFF884LDL/pearson: -0.3040\n",
1107
- "Step 23/32 | Loss: 0.4915 | Mean Pearson: -0.1727 | LR: 1.39e-09 | Tokens: 94,208\n",
1108
- "Step 25/32 | Loss: 0.3654 | Mean Pearson: -0.3257 | LR: 1.23e-09 | Tokens: 102,400\n",
1109
  "\n",
1110
  "Running validation at step 24...\n",
1111
- " Validation Loss: 0.4069\n",
1112
- " Validation Mean Pearson: -0.0551\n",
1113
- " ENCFF884LDL/pearson: -0.0551\n",
1114
- "Step 27/32 | Loss: 0.5344 | Mean Pearson: -0.0604 | LR: 1.04e-09 | Tokens: 110,592\n",
1115
- "Step 29/32 | Loss: 0.3671 | Mean Pearson: -0.0290 | LR: 8.04e-10 | Tokens: 118,784\n",
1116
  "\n",
1117
  "Running validation at step 28...\n",
1118
- " Validation Loss: 0.3162\n",
1119
- " Validation Mean Pearson: -0.1008\n",
1120
- " ENCFF884LDL/pearson: -0.1008\n",
1121
- "Step 31/32 | Loss: 0.5994 | Mean Pearson: -0.0107 | LR: 4.64e-10 | Tokens: 126,976\n",
1122
  "\n",
1123
  "Training completed after 32 steps!\n"
1124
  ]
@@ -1229,7 +1208,7 @@
1229
  },
1230
  {
1231
  "cell_type": "code",
1232
- "execution_count": 27,
1233
  "metadata": {},
1234
  "outputs": [],
1235
  "source": [
 
1032
  },
1033
  {
1034
  "cell_type": "code",
1035
+ "execution_count": 42,
1036
  "metadata": {},
1037
  "outputs": [
1038
  {
 
1041
  "text": [
1042
  "Starting training...\n",
1043
  "Training for 32 steps with 2 gradient accumulation steps\n",
1044
+ "\n",
1045
+ "Step 1/32 | Loss: 0.5661 | Mean Pearson: -0.0525 | Tokens: 4,096\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1046
  "\n",
1047
  "Running validation at step 0...\n",
1048
+ " Validation Loss: 0.3987\n",
1049
+ " Validation Mean Pearson: -0.0426\n",
1050
+ " ENCFF884LDL/pearson: -0.0426\n",
1051
+ "Step 3/32 | Loss: 0.3825 | Mean Pearson: -0.0112 | Tokens: 12,288\n",
1052
+ "Step 5/32 | Loss: 1.1384 | Mean Pearson: -0.0777 | Tokens: 20,480\n",
1053
  "\n",
1054
  "Running validation at step 4...\n",
1055
+ " Validation Loss: 0.4381\n",
1056
+ " Validation Mean Pearson: -0.0017\n",
1057
+ " ENCFF884LDL/pearson: -0.0017\n",
1058
+ "Step 7/32 | Loss: 0.4961 | Mean Pearson: -0.0188 | Tokens: 28,672\n",
1059
+ "Step 9/32 | Loss: 0.4903 | Mean Pearson: -0.1522 | Tokens: 36,864\n",
1060
  "\n",
1061
  "Running validation at step 8...\n",
1062
+ " Validation Loss: 0.3429\n",
1063
+ " Validation Mean Pearson: -0.0997\n",
1064
+ " ENCFF884LDL/pearson: -0.0997\n",
1065
+ "Step 11/32 | Loss: 0.4597 | Mean Pearson: -0.0199 | Tokens: 45,056\n",
1066
+ "Step 13/32 | Loss: 0.6507 | Mean Pearson: -0.0256 | Tokens: 53,248\n",
1067
  "\n",
1068
  "Running validation at step 12...\n",
1069
+ " Validation Loss: 0.3901\n",
1070
+ " Validation Mean Pearson: -0.0786\n",
1071
+ " ENCFF884LDL/pearson: -0.0786\n",
1072
+ "Step 15/32 | Loss: 0.3911 | Mean Pearson: -0.0419 | Tokens: 61,440\n",
1073
+ "Step 17/32 | Loss: 0.4202 | Mean Pearson: -0.0883 | Tokens: 69,632\n",
1074
  "\n",
1075
  "Running validation at step 16...\n",
1076
+ " Validation Loss: 0.3626\n",
1077
+ " Validation Mean Pearson: -0.0840\n",
1078
+ " ENCFF884LDL/pearson: -0.0840\n",
1079
+ "Step 19/32 | Loss: 0.3608 | Mean Pearson: -0.1057 | Tokens: 77,824\n",
1080
+ "Step 21/32 | Loss: 0.3942 | Mean Pearson: 0.1459 | Tokens: 86,016\n",
1081
  "\n",
1082
  "Running validation at step 20...\n",
1083
+ " Validation Loss: 0.3281\n",
1084
+ " Validation Mean Pearson: -0.0667\n",
1085
+ " ENCFF884LDL/pearson: -0.0667\n",
1086
+ "Step 23/32 | Loss: 0.4090 | Mean Pearson: 0.0540 | Tokens: 94,208\n",
1087
+ "Step 25/32 | Loss: 0.5151 | Mean Pearson: -0.0076 | Tokens: 102,400\n",
1088
  "\n",
1089
  "Running validation at step 24...\n",
1090
+ " Validation Loss: 0.2927\n",
1091
+ " Validation Mean Pearson: -0.0409\n",
1092
+ " ENCFF884LDL/pearson: -0.0409\n",
1093
+ "Step 27/32 | Loss: 0.4339 | Mean Pearson: -0.0887 | Tokens: 110,592\n",
1094
+ "Step 29/32 | Loss: 0.4516 | Mean Pearson: -0.0763 | Tokens: 118,784\n",
1095
  "\n",
1096
  "Running validation at step 28...\n",
1097
+ " Validation Loss: 0.3076\n",
1098
+ " Validation Mean Pearson: -0.0861\n",
1099
+ " ENCFF884LDL/pearson: -0.0861\n",
1100
+ "Step 31/32 | Loss: 0.4121 | Mean Pearson: -0.0530 | Tokens: 126,976\n",
1101
  "\n",
1102
  "Training completed after 32 steps!\n"
1103
  ]
 
1208
  },
1209
  {
1210
  "cell_type": "code",
1211
+ "execution_count": 43,
1212
  "metadata": {},
1213
  "outputs": [],
1214
  "source": [