Spaces:
Running
Running
Commit
·
e712656
1
Parent(s):
010db3f
fix: ran cells
Browse files- notebooks/03_fine_tuning.ipynb +43 -64
notebooks/03_fine_tuning.ipynb
CHANGED
|
@@ -1032,7 +1032,7 @@
|
|
| 1032 |
},
|
| 1033 |
{
|
| 1034 |
"cell_type": "code",
|
| 1035 |
-
"execution_count":
|
| 1036 |
"metadata": {},
|
| 1037 |
"outputs": [
|
| 1038 |
{
|
|
@@ -1041,84 +1041,63 @@
|
|
| 1041 |
"text": [
|
| 1042 |
"Starting training...\n",
|
| 1043 |
"Training for 32 steps with 2 gradient accumulation steps\n",
|
| 1044 |
-
"\n"
|
| 1045 |
-
|
| 1046 |
-
},
|
| 1047 |
-
{
|
| 1048 |
-
"name": "stderr",
|
| 1049 |
-
"output_type": "stream",
|
| 1050 |
-
"text": [
|
| 1051 |
-
"/home/y-bornachot/venvs/ntv3-env/lib/python3.12/site-packages/torch/amp/autocast_mode.py:287: UserWarning: In CPU autocast, but the target dtype is not supported. Disabling autocast.\n",
|
| 1052 |
-
"CPU Autocast only supports dtype of torch.bfloat16, torch.float16 currently.\n",
|
| 1053 |
-
" warnings.warn(error_message)\n",
|
| 1054 |
-
"/home/y-bornachot/venvs/ntv3-env/lib/python3.12/site-packages/torchmetrics/utilities/prints.py:43: UserWarning: The variance of predictions or target is close to zero. This can cause instability in Pearson correlationcoefficient, leading to wrong results. Consider re-scaling the input if possible or computing using alarger dtype (currently using torch.float32). Setting the correlation coefficient to nan.\n",
|
| 1055 |
-
" warnings.warn(*args, **kwargs)\n",
|
| 1056 |
-
"/tmp/ipykernel_1758159/1960846655.py:68: RuntimeWarning: Mean of empty slice\n",
|
| 1057 |
-
" metrics_dict[\"metrics_scaled/mean/pearson\"] = np.nanmean(correlations_scaled)\n",
|
| 1058 |
-
"/tmp/ipykernel_1758159/1960846655.py:77: RuntimeWarning: Mean of empty slice\n",
|
| 1059 |
-
" metrics_dict[\"metrics_raw/mean/pearson\"] = np.nanmean(correlations_raw)\n"
|
| 1060 |
-
]
|
| 1061 |
-
},
|
| 1062 |
-
{
|
| 1063 |
-
"name": "stdout",
|
| 1064 |
-
"output_type": "stream",
|
| 1065 |
-
"text": [
|
| 1066 |
-
"Step 1/32 | Loss: 0.8378 | Mean Pearson: nan | LR: 1.17e-09 | Tokens: 4,096\n",
|
| 1067 |
"\n",
|
| 1068 |
"Running validation at step 0...\n",
|
| 1069 |
-
" Validation Loss: 0.
|
| 1070 |
-
" Validation Mean Pearson: -0.
|
| 1071 |
-
" ENCFF884LDL/pearson: -0.
|
| 1072 |
-
"Step 3/32 | Loss: 0.
|
| 1073 |
-
"Step 5/32 | Loss:
|
| 1074 |
"\n",
|
| 1075 |
"Running validation at step 4...\n",
|
| 1076 |
-
" Validation Loss: 0.
|
| 1077 |
-
" Validation Mean Pearson: -0.
|
| 1078 |
-
" ENCFF884LDL/pearson: -0.
|
| 1079 |
-
"Step 7/32 | Loss: 0.
|
| 1080 |
-
"Step 9/32 | Loss: 0.
|
| 1081 |
"\n",
|
| 1082 |
"Running validation at step 8...\n",
|
| 1083 |
-
" Validation Loss: 0.
|
| 1084 |
-
" Validation Mean Pearson: -0.
|
| 1085 |
-
" ENCFF884LDL/pearson: -0.
|
| 1086 |
-
"Step 11/32 | Loss: 0.
|
| 1087 |
-
"Step 13/32 | Loss: 0.
|
| 1088 |
"\n",
|
| 1089 |
"Running validation at step 12...\n",
|
| 1090 |
-
" Validation Loss: 0.
|
| 1091 |
-
" Validation Mean Pearson: -0.
|
| 1092 |
-
" ENCFF884LDL/pearson: -0.
|
| 1093 |
-
"Step 15/32 | Loss: 0.
|
| 1094 |
-
"Step 17/32 | Loss: 0.
|
| 1095 |
"\n",
|
| 1096 |
"Running validation at step 16...\n",
|
| 1097 |
-
" Validation Loss: 0.
|
| 1098 |
-
" Validation Mean Pearson: -0.
|
| 1099 |
-
" ENCFF884LDL/pearson: -0.
|
| 1100 |
-
"Step 19/32 | Loss: 0.
|
| 1101 |
-
"Step 21/32 | Loss: 0.
|
| 1102 |
"\n",
|
| 1103 |
"Running validation at step 20...\n",
|
| 1104 |
-
" Validation Loss: 0.
|
| 1105 |
-
" Validation Mean Pearson: -0.
|
| 1106 |
-
" ENCFF884LDL/pearson: -0.
|
| 1107 |
-
"Step 23/32 | Loss: 0.
|
| 1108 |
-
"Step 25/32 | Loss: 0.
|
| 1109 |
"\n",
|
| 1110 |
"Running validation at step 24...\n",
|
| 1111 |
-
" Validation Loss: 0.
|
| 1112 |
-
" Validation Mean Pearson: -0.
|
| 1113 |
-
" ENCFF884LDL/pearson: -0.
|
| 1114 |
-
"Step 27/32 | Loss: 0.
|
| 1115 |
-
"Step 29/32 | Loss: 0.
|
| 1116 |
"\n",
|
| 1117 |
"Running validation at step 28...\n",
|
| 1118 |
-
" Validation Loss: 0.
|
| 1119 |
-
" Validation Mean Pearson: -0.
|
| 1120 |
-
" ENCFF884LDL/pearson: -0.
|
| 1121 |
-
"Step 31/32 | Loss: 0.
|
| 1122 |
"\n",
|
| 1123 |
"Training completed after 32 steps!\n"
|
| 1124 |
]
|
|
@@ -1229,7 +1208,7 @@
|
|
| 1229 |
},
|
| 1230 |
{
|
| 1231 |
"cell_type": "code",
|
| 1232 |
-
"execution_count":
|
| 1233 |
"metadata": {},
|
| 1234 |
"outputs": [],
|
| 1235 |
"source": [
|
|
|
|
| 1032 |
},
|
| 1033 |
{
|
| 1034 |
"cell_type": "code",
|
| 1035 |
+
"execution_count": 42,
|
| 1036 |
"metadata": {},
|
| 1037 |
"outputs": [
|
| 1038 |
{
|
|
|
|
| 1041 |
"text": [
|
| 1042 |
"Starting training...\n",
|
| 1043 |
"Training for 32 steps with 2 gradient accumulation steps\n",
|
| 1044 |
+
"\n",
|
| 1045 |
+
"Step 1/32 | Loss: 0.5661 | Mean Pearson: -0.0525 | Tokens: 4,096\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1046 |
"\n",
|
| 1047 |
"Running validation at step 0...\n",
|
| 1048 |
+
" Validation Loss: 0.3987\n",
|
| 1049 |
+
" Validation Mean Pearson: -0.0426\n",
|
| 1050 |
+
" ENCFF884LDL/pearson: -0.0426\n",
|
| 1051 |
+
"Step 3/32 | Loss: 0.3825 | Mean Pearson: -0.0112 | Tokens: 12,288\n",
|
| 1052 |
+
"Step 5/32 | Loss: 1.1384 | Mean Pearson: -0.0777 | Tokens: 20,480\n",
|
| 1053 |
"\n",
|
| 1054 |
"Running validation at step 4...\n",
|
| 1055 |
+
" Validation Loss: 0.4381\n",
|
| 1056 |
+
" Validation Mean Pearson: -0.0017\n",
|
| 1057 |
+
" ENCFF884LDL/pearson: -0.0017\n",
|
| 1058 |
+
"Step 7/32 | Loss: 0.4961 | Mean Pearson: -0.0188 | Tokens: 28,672\n",
|
| 1059 |
+
"Step 9/32 | Loss: 0.4903 | Mean Pearson: -0.1522 | Tokens: 36,864\n",
|
| 1060 |
"\n",
|
| 1061 |
"Running validation at step 8...\n",
|
| 1062 |
+
" Validation Loss: 0.3429\n",
|
| 1063 |
+
" Validation Mean Pearson: -0.0997\n",
|
| 1064 |
+
" ENCFF884LDL/pearson: -0.0997\n",
|
| 1065 |
+
"Step 11/32 | Loss: 0.4597 | Mean Pearson: -0.0199 | Tokens: 45,056\n",
|
| 1066 |
+
"Step 13/32 | Loss: 0.6507 | Mean Pearson: -0.0256 | Tokens: 53,248\n",
|
| 1067 |
"\n",
|
| 1068 |
"Running validation at step 12...\n",
|
| 1069 |
+
" Validation Loss: 0.3901\n",
|
| 1070 |
+
" Validation Mean Pearson: -0.0786\n",
|
| 1071 |
+
" ENCFF884LDL/pearson: -0.0786\n",
|
| 1072 |
+
"Step 15/32 | Loss: 0.3911 | Mean Pearson: -0.0419 | Tokens: 61,440\n",
|
| 1073 |
+
"Step 17/32 | Loss: 0.4202 | Mean Pearson: -0.0883 | Tokens: 69,632\n",
|
| 1074 |
"\n",
|
| 1075 |
"Running validation at step 16...\n",
|
| 1076 |
+
" Validation Loss: 0.3626\n",
|
| 1077 |
+
" Validation Mean Pearson: -0.0840\n",
|
| 1078 |
+
" ENCFF884LDL/pearson: -0.0840\n",
|
| 1079 |
+
"Step 19/32 | Loss: 0.3608 | Mean Pearson: -0.1057 | Tokens: 77,824\n",
|
| 1080 |
+
"Step 21/32 | Loss: 0.3942 | Mean Pearson: 0.1459 | Tokens: 86,016\n",
|
| 1081 |
"\n",
|
| 1082 |
"Running validation at step 20...\n",
|
| 1083 |
+
" Validation Loss: 0.3281\n",
|
| 1084 |
+
" Validation Mean Pearson: -0.0667\n",
|
| 1085 |
+
" ENCFF884LDL/pearson: -0.0667\n",
|
| 1086 |
+
"Step 23/32 | Loss: 0.4090 | Mean Pearson: 0.0540 | Tokens: 94,208\n",
|
| 1087 |
+
"Step 25/32 | Loss: 0.5151 | Mean Pearson: -0.0076 | Tokens: 102,400\n",
|
| 1088 |
"\n",
|
| 1089 |
"Running validation at step 24...\n",
|
| 1090 |
+
" Validation Loss: 0.2927\n",
|
| 1091 |
+
" Validation Mean Pearson: -0.0409\n",
|
| 1092 |
+
" ENCFF884LDL/pearson: -0.0409\n",
|
| 1093 |
+
"Step 27/32 | Loss: 0.4339 | Mean Pearson: -0.0887 | Tokens: 110,592\n",
|
| 1094 |
+
"Step 29/32 | Loss: 0.4516 | Mean Pearson: -0.0763 | Tokens: 118,784\n",
|
| 1095 |
"\n",
|
| 1096 |
"Running validation at step 28...\n",
|
| 1097 |
+
" Validation Loss: 0.3076\n",
|
| 1098 |
+
" Validation Mean Pearson: -0.0861\n",
|
| 1099 |
+
" ENCFF884LDL/pearson: -0.0861\n",
|
| 1100 |
+
"Step 31/32 | Loss: 0.4121 | Mean Pearson: -0.0530 | Tokens: 126,976\n",
|
| 1101 |
"\n",
|
| 1102 |
"Training completed after 32 steps!\n"
|
| 1103 |
]
|
|
|
|
| 1208 |
},
|
| 1209 |
{
|
| 1210 |
"cell_type": "code",
|
| 1211 |
+
"execution_count": 43,
|
| 1212 |
"metadata": {},
|
| 1213 |
"outputs": [],
|
| 1214 |
"source": [
|