tsilva
/

clinical-field-mapper-classification

@@ -16,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [distilbert/distilgpt2](https://huggingface.co/distilbert/distilgpt2) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.6943
 ## Model description
@@ -35,9 +35,9 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 0.0005
-- train_batch_size: 512
-- eval_batch_size: 512
 - seed: 42
 - distributed_type: multi-GPU
 - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
@@ -48,58 +48,58 @@ The following hyperparameters were used during training:
 ### Training results
-| Training Loss | Epoch | Step  | Validation Loss |
-|:-------------:|:-----:|:-----:|:---------------:|
-| 4.9577        | 1.0   | 486   | 3.5547          |
-| 3.1944        | 2.0   | 972   | 2.6211          |
-| 2.5386        | 3.0   | 1458  | 2.1523          |
-| 2.1691        | 4.0   | 1944  | 1.8564          |
-| 1.9271        | 5.0   | 2430  | 1.6533          |
-| 1.753         | 6.0   | 2916  | 1.5059          |
-| 1.6207        | 7.0   | 3402  | 1.3906          |
-| 1.5174        | 8.0   | 3888  | 1.2998          |
-| 1.433         | 9.0   | 4374  | 1.2236          |
-| 1.3652        | 10.0  | 4860  | 1.1621          |
-| 1.3062        | 11.0  | 5346  | 1.1113          |
-| 1.2586        | 12.0  | 5832  | 1.0674          |
-| 1.2175        | 13.0  | 6318  | 1.0283          |
-| 1.1805        | 14.0  | 6804  | 0.9956          |
-| 1.148         | 15.0  | 7290  | 0.9668          |
-| 1.1207        | 16.0  | 7776  | 0.9399          |
-| 1.0959        | 17.0  | 8262  | 0.9165          |
-| 1.0718        | 18.0  | 8748  | 0.8979          |
-| 1.0529        | 19.0  | 9234  | 0.8813          |
-| 1.036         | 20.0  | 9720  | 0.8652          |
-| 1.0196        | 21.0  | 10206 | 0.8511          |
-| 1.0057        | 22.0  | 10692 | 0.8398          |
-| 0.9922        | 23.0  | 11178 | 0.8252          |
-| 0.9802        | 24.0  | 11664 | 0.8135          |
-| 0.9686        | 25.0  | 12150 | 0.8022          |
-| 0.9621        | 26.0  | 12636 | 0.7974          |
-| 0.9494        | 27.0  | 13122 | 0.7852          |
-| 0.9415        | 28.0  | 13608 | 0.7788          |
-| 0.9358        | 29.0  | 14094 | 0.7700          |
-| 0.9276        | 30.0  | 14580 | 0.7642          |
-| 0.9188        | 31.0  | 15066 | 0.7622          |
-| 0.9128        | 32.0  | 15552 | 0.7510          |
-| 0.9076        | 33.0  | 16038 | 0.7471          |
-| 0.9022        | 34.0  | 16524 | 0.7397          |
-| 0.8975        | 35.0  | 17010 | 0.7397          |
-| 0.8938        | 36.0  | 17496 | 0.7344          |
-| 0.8866        | 37.0  | 17982 | 0.7271          |
-| 0.8842        | 38.0  | 18468 | 0.7197          |
-| 0.88          | 39.0  | 18954 | 0.7202          |
-| 0.8775        | 40.0  | 19440 | 0.7183          |
-| 0.8742        | 41.0  | 19926 | 0.7144          |
-| 0.8704        | 42.0  | 20412 | 0.7090          |
-| 0.8691        | 43.0  | 20898 | 0.7085          |
-| 0.8644        | 44.0  | 21384 | 0.7036          |
-| 0.8628        | 45.0  | 21870 | 0.7017          |
-| 0.8567        | 46.0  | 22356 | 0.7036          |
-| 0.8579        | 47.0  | 22842 | 0.6987          |
-| 0.8546        | 48.0  | 23328 | 0.6973          |
-| 0.8555        | 49.0  | 23814 | 0.6934          |
-| 0.8509        | 50.0  | 24300 | 0.6943          |
 ### Framework versions

 This model is a fine-tuned version of [distilbert/distilgpt2](https://huggingface.co/distilbert/distilgpt2) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.5728
 ## Model description
 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 0.005
+- train_batch_size: 3072
+- eval_batch_size: 1024
 - seed: 42
 - distributed_type: multi-GPU
 - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 ### Training results
+| Training Loss | Epoch | Step | Validation Loss |
+|:-------------:|:-----:|:----:|:---------------:|
+| 8.921         | 1.0   | 9    | 7.9531          |
+| 6.1949        | 2.0   | 18   | 5.2070          |
+| 4.6541        | 3.0   | 27   | 4.0664          |
+| 3.6725        | 4.0   | 36   | 3.2637          |
+| 2.9716        | 5.0   | 45   | 2.6895          |
+| 2.4881        | 6.0   | 54   | 2.3086          |
+| 2.1437        | 7.0   | 63   | 2.0430          |
+| 1.8888        | 8.0   | 72   | 1.8438          |
+| 1.7087        | 9.0   | 81   | 1.6875          |
+| 1.5463        | 10.0  | 90   | 1.5508          |
+| 1.4124        | 11.0  | 99   | 1.4395          |
+| 1.3009        | 12.0  | 108  | 1.3467          |
+| 1.2058        | 13.0  | 117  | 1.2627          |
+| 1.1296        | 14.0  | 126  | 1.2031          |
+| 1.0643        | 15.0  | 135  | 1.1465          |
+| 0.9962        | 16.0  | 144  | 1.0898          |
+| 0.9387        | 17.0  | 153  | 1.0449          |
+| 0.8919        | 18.0  | 162  | 1.0049          |
+| 0.8522        | 19.0  | 171  | 0.9648          |
+| 0.8161        | 20.0  | 180  | 0.9346          |
+| 0.7829        | 21.0  | 189  | 0.8999          |
+| 0.7489        | 22.0  | 198  | 0.8784          |
+| 0.7249        | 23.0  | 207  | 0.8516          |
+| 0.6945        | 24.0  | 216  | 0.8311          |
+| 0.6763        | 25.0  | 225  | 0.8076          |
+| 0.6529        | 26.0  | 234  | 0.7915          |
+| 0.6309        | 27.0  | 243  | 0.7793          |
+| 0.6121        | 28.0  | 252  | 0.7617          |
+| 0.6009        | 29.0  | 261  | 0.7485          |
+| 0.5841        | 30.0  | 270  | 0.7314          |
+| 0.5598        | 31.0  | 279  | 0.7197          |
+| 0.5529        | 32.0  | 288  | 0.7085          |
+| 0.5378        | 33.0  | 297  | 0.6997          |
+| 0.522         | 34.0  | 306  | 0.6846          |
+| 0.5097        | 35.0  | 315  | 0.6650          |
+| 0.5017        | 36.0  | 324  | 0.6602          |
+| 0.4889        | 37.0  | 333  | 0.6567          |
+| 0.4795        | 38.0  | 342  | 0.6426          |
+| 0.4682        | 39.0  | 351  | 0.6396          |
+| 0.4646        | 40.0  | 360  | 0.6323          |
+| 0.4526        | 41.0  | 369  | 0.6226          |
+| 0.4474        | 42.0  | 378  | 0.6133          |
+| 0.4387        | 43.0  | 387  | 0.6040          |
+| 0.432         | 44.0  | 396  | 0.6064          |
+| 0.4258        | 45.0  | 405  | 0.6011          |
+| 0.4194        | 46.0  | 414  | 0.5938          |
+| 0.4113        | 47.0  | 423  | 0.5854          |
+| 0.4076        | 48.0  | 432  | 0.5850          |
+| 0.402         | 49.0  | 441  | 0.5703          |
+| 0.3934        | 50.0  | 450  | 0.5728          |
 ### Framework versions

config.json CHANGED Viewed

@@ -335,18 +335,17 @@
     "323": "tumor_stage_n",
     "324": "tumor_stage_t",
     "325": "ultrasound_doppler_grade",
-    "326": "unknown_field",
-    "327": "urine_albumin_creatinine_ratio",
-    "328": "visit_end_date",
-    "329": "visit_id",
-    "330": "visit_start_date",
-    "331": "visit_type",
-    "332": "wbc_count",
-    "333": "weight",
-    "334": "worsening_heart_failure_episode_order",
-    "335": "worsening_heart_failure_event_type",
-    "336": "worsening_heart_failure_flag",
-    "337": "worsening_heart_failure_start_date"
   },
   "initializer_range": 0.02,
   "label2id": {
@@ -676,18 +675,17 @@
     "tumor_stage_n": 323,
     "tumor_stage_t": 324,
     "ultrasound_doppler_grade": 325,
-    "unknown_field": 326,
-    "urine_albumin_creatinine_ratio": 327,
-    "visit_end_date": 328,
-    "visit_id": 329,
-    "visit_start_date": 330,
-    "visit_type": 331,
-    "wbc_count": 332,
-    "weight": 333,
-    "worsening_heart_failure_episode_order": 334,
-    "worsening_heart_failure_event_type": 335,
-    "worsening_heart_failure_flag": 336,
-    "worsening_heart_failure_start_date": 337
   },
   "layer_norm_epsilon": 1e-05,
   "model_type": "gpt2",

     "323": "tumor_stage_n",
     "324": "tumor_stage_t",
     "325": "ultrasound_doppler_grade",
+    "326": "urine_albumin_creatinine_ratio",
+    "327": "visit_end_date",
+    "328": "visit_id",
+    "329": "visit_start_date",
+    "330": "visit_type",
+    "331": "wbc_count",
+    "332": "weight",
+    "333": "worsening_heart_failure_episode_order",
+    "334": "worsening_heart_failure_event_type",
+    "335": "worsening_heart_failure_flag",
+    "336": "worsening_heart_failure_start_date"
   },
   "initializer_range": 0.02,
   "label2id": {
     "tumor_stage_n": 323,
     "tumor_stage_t": 324,
     "ultrasound_doppler_grade": 325,
+    "urine_albumin_creatinine_ratio": 326,
+    "visit_end_date": 327,
+    "visit_id": 328,
+    "visit_start_date": 329,
+    "visit_type": 330,
+    "wbc_count": 331,
+    "weight": 332,
+    "worsening_heart_failure_episode_order": 333,
+    "worsening_heart_failure_event_type": 334,
+    "worsening_heart_failure_flag": 335,
+    "worsening_heart_failure_start_date": 336
   },
   "layer_norm_epsilon": 1e-05,
   "model_type": "gpt2",

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:db1daabaa4834518963e72874d49c0f55e880948963fbdf5f47a38fb01bd6d9b
-size 164353504

 version https://git-lfs.github.com/spec/v1
+oid sha256:3da2a50385230c5fbc8a06c831e6d9923d39a4e7d1a1622f21c93d585cfb33d1
+size 164351968

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:633ad78c5a738c48af0a15fcca5bb413c8061991604c00897f41351ae8c70650
 size 7416

 version https://git-lfs.github.com/spec/v1
+oid sha256:20f6401f4171ba69319ed6ea406c608618cc9b7cff9c69647950762dfe6d6707
 size 7416