Mason72 commited on
Commit
31e6240
·
verified ·
1 Parent(s): 659213f

End of training

Browse files
README.md CHANGED
@@ -1,9 +1,11 @@
1
  ---
2
  library_name: transformers
3
  license: apache-2.0
4
- base_model: distilbert/distilbert-base-uncased
5
  tags:
6
  - generated_from_trainer
 
 
7
  model-index:
8
  - name: test_trainer
9
  results: []
@@ -14,7 +16,10 @@ should probably proofread and complete it, then remove this comment. -->
14
 
15
  # test_trainer
16
 
17
- This model is a fine-tuned version of [distilbert/distilbert-base-uncased](https://huggingface.co/distilbert/distilbert-base-uncased) on an unknown dataset.
 
 
 
18
 
19
  ## Model description
20
 
@@ -41,6 +46,15 @@ The following hyperparameters were used during training:
41
  - lr_scheduler_type: linear
42
  - num_epochs: 3.0
43
 
 
 
 
 
 
 
 
 
 
44
  ### Framework versions
45
 
46
  - Transformers 4.50.3
 
1
  ---
2
  library_name: transformers
3
  license: apache-2.0
4
+ base_model: google/electra-small-discriminator
5
  tags:
6
  - generated_from_trainer
7
+ metrics:
8
+ - accuracy
9
  model-index:
10
  - name: test_trainer
11
  results: []
 
16
 
17
  # test_trainer
18
 
19
+ This model is a fine-tuned version of [google/electra-small-discriminator](https://huggingface.co/google/electra-small-discriminator) on an unknown dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 0.2753
22
+ - Accuracy: 0.889
23
 
24
  ## Model description
25
 
 
46
  - lr_scheduler_type: linear
47
  - num_epochs: 3.0
48
 
49
+ ### Training results
50
+
51
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy |
52
+ |:-------------:|:-----:|:----:|:---------------:|:--------:|
53
+ | No log | 1.0 | 125 | 0.2798 | 0.899 |
54
+ | No log | 2.0 | 250 | 0.2464 | 0.906 |
55
+ | No log | 3.0 | 375 | 0.2753 | 0.889 |
56
+
57
+
58
  ### Framework versions
59
 
60
  - Transformers 4.50.3
config.json CHANGED
@@ -1,24 +1,30 @@
1
  {
2
- "activation": "gelu",
3
  "architectures": [
4
- "DistilBertForSequenceClassification"
5
  ],
6
- "attention_dropout": 0.1,
7
- "dim": 768,
8
- "dropout": 0.1,
9
- "hidden_dim": 3072,
 
 
10
  "initializer_range": 0.02,
 
 
11
  "max_position_embeddings": 512,
12
- "model_type": "distilbert",
13
- "n_heads": 12,
14
- "n_layers": 6,
15
  "pad_token_id": 0,
 
16
  "problem_type": "single_label_classification",
17
- "qa_dropout": 0.1,
18
- "seq_classif_dropout": 0.2,
19
- "sinusoidal_pos_embds": false,
20
- "tie_weights_": true,
21
  "torch_dtype": "float32",
22
  "transformers_version": "4.50.3",
 
 
23
  "vocab_size": 30522
24
  }
 
1
  {
 
2
  "architectures": [
3
+ "ElectraForSequenceClassification"
4
  ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "embedding_size": 128,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 256,
11
  "initializer_range": 0.02,
12
+ "intermediate_size": 1024,
13
+ "layer_norm_eps": 1e-12,
14
  "max_position_embeddings": 512,
15
+ "model_type": "electra",
16
+ "num_attention_heads": 4,
17
+ "num_hidden_layers": 12,
18
  "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
  "problem_type": "single_label_classification",
21
+ "summary_activation": "gelu",
22
+ "summary_last_dropout": 0.1,
23
+ "summary_type": "first",
24
+ "summary_use_proj": true,
25
  "torch_dtype": "float32",
26
  "transformers_version": "4.50.3",
27
+ "type_vocab_size": 2,
28
+ "use_cache": true,
29
  "vocab_size": 30522
30
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25f879db06e139b14ae78640db0da000cf0b79eebc52722c05c976d9175520d3
3
- size 267832560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65f36b487e9cb0a354551b05e6c3b8d551e7e963da34609c2ce77e6037e90daf
3
+ size 54221200
runs/Apr04_01-48-01_7b5845568615/events.out.tfevents.1743746426.7b5845568615.717.9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad3f82f1da04a85e58fe1f07ae23884a59ef1d56cd2804c209ca81cfe3be5a1b
3
+ size 6474