End of training

Files changed (4) hide show

README.md CHANGED Viewed

@@ -1,9 +1,11 @@
 ---
 library_name: transformers
 license: apache-2.0
-base_model: distilbert/distilbert-base-uncased
 tags:
 - generated_from_trainer
 model-index:
 - name: test_trainer
   results: []
@@ -14,7 +16,10 @@ should probably proofread and complete it, then remove this comment. -->
 # test_trainer
-This model is a fine-tuned version of [distilbert/distilbert-base-uncased](https://huggingface.co/distilbert/distilbert-base-uncased) on an unknown dataset.
 ## Model description
@@ -41,6 +46,15 @@ The following hyperparameters were used during training:
 - lr_scheduler_type: linear
 - num_epochs: 3.0
 ### Framework versions
 - Transformers 4.50.3

 ---
 library_name: transformers
 license: apache-2.0
+base_model: google/electra-small-discriminator
 tags:
 - generated_from_trainer
+metrics:
+- accuracy
 model-index:
 - name: test_trainer
   results: []
 # test_trainer
+This model is a fine-tuned version of [google/electra-small-discriminator](https://huggingface.co/google/electra-small-discriminator) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.2753
+- Accuracy: 0.889
 ## Model description
 - lr_scheduler_type: linear
 - num_epochs: 3.0
+### Training results
+| Training Loss | Epoch | Step | Validation Loss | Accuracy |
+|:-------------:|:-----:|:----:|:---------------:|:--------:|
+| No log        | 1.0   | 125  | 0.2798          | 0.899    |
+| No log        | 2.0   | 250  | 0.2464          | 0.906    |
+| No log        | 3.0   | 375  | 0.2753          | 0.889    |
 ### Framework versions
 - Transformers 4.50.3

config.json CHANGED Viewed

@@ -1,24 +1,30 @@
 {
-  "activation": "gelu",
   "architectures": [
-    "DistilBertForSequenceClassification"
   ],
-  "attention_dropout": 0.1,
-  "dim": 768,
-  "dropout": 0.1,
-  "hidden_dim": 3072,
   "initializer_range": 0.02,
   "max_position_embeddings": 512,
-  "model_type": "distilbert",
-  "n_heads": 12,
-  "n_layers": 6,
   "pad_token_id": 0,
   "problem_type": "single_label_classification",
-  "qa_dropout": 0.1,
-  "seq_classif_dropout": 0.2,
-  "sinusoidal_pos_embds": false,
-  "tie_weights_": true,
   "torch_dtype": "float32",
   "transformers_version": "4.50.3",
   "vocab_size": 30522
 }

 {
   "architectures": [
+    "ElectraForSequenceClassification"
   ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "embedding_size": 128,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 256,
   "initializer_range": 0.02,
+  "intermediate_size": 1024,
+  "layer_norm_eps": 1e-12,
   "max_position_embeddings": 512,
+  "model_type": "electra",
+  "num_attention_heads": 4,
+  "num_hidden_layers": 12,
   "pad_token_id": 0,
+  "position_embedding_type": "absolute",
   "problem_type": "single_label_classification",
+  "summary_activation": "gelu",
+  "summary_last_dropout": 0.1,
+  "summary_type": "first",
+  "summary_use_proj": true,
   "torch_dtype": "float32",
   "transformers_version": "4.50.3",
+  "type_vocab_size": 2,
+  "use_cache": true,
   "vocab_size": 30522
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:25f879db06e139b14ae78640db0da000cf0b79eebc52722c05c976d9175520d3
-size 267832560

 version https://git-lfs.github.com/spec/v1
+oid sha256:65f36b487e9cb0a354551b05e6c3b8d551e7e963da34609c2ce77e6037e90daf
+size 54221200

runs/Apr04_01-48-01_7b5845568615/events.out.tfevents.1743746426.7b5845568615.717.9 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:ad3f82f1da04a85e58fe1f07ae23884a59ef1d56cd2804c209ca81cfe3be5a1b
+size 6474