voroninip
/

bert-paper-classifier-arxiv

@@ -9,8 +9,6 @@ metrics:
 model-index:
 - name: bert-paper-classifier-arxiv
   results: []
-datasets:
-- arxiv-community/arxiv_dataset
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -20,8 +18,8 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [microsoft/deberta-v3-base](https://huggingface.co/microsoft/deberta-v3-base) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 3.7652
-- Accuracy: 0.31
 ## Model description
@@ -41,28 +39,30 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 5e-05
-- train_batch_size: 64
-- eval_batch_size: 8
 - seed: 42
-- gradient_accumulation_steps: 2
-- total_train_batch_size: 128
 - optimizer: Use OptimizerNames.ADAFACTOR and the args are:
 No additional optimizer arguments
 - lr_scheduler_type: cosine
-- num_epochs: 2
-- mixed_precision_training: Native AMP
 ### Training results
-| Training Loss | Epoch | Step | Validation Loss | Accuracy |
-|:-------------:|:-----:|:----:|:---------------:|:--------:|
-| No log        | 1.0   | 8    | 4.0299          | 0.31     |
-| No log        | 1.8   | 14   | 3.7652          | 0.31     |
 ### Framework versions
-- Transformers 4.48.3
-- Pytorch 2.5.1+cu124
 - Datasets 3.5.0
-- Tokenizers 0.21.0

 model-index:
 - name: bert-paper-classifier-arxiv
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 This model is a fine-tuned version of [microsoft/deberta-v3-base](https://huggingface.co/microsoft/deberta-v3-base) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 1.1651
+- Accuracy: 0.6854
 ## Model description
 The following hyperparameters were used during training:
 - learning_rate: 5e-05
+- train_batch_size: 128
+- eval_batch_size: 16
 - seed: 42
+- gradient_accumulation_steps: 3
+- total_train_batch_size: 384
 - optimizer: Use OptimizerNames.ADAFACTOR and the args are:
 No additional optimizer arguments
 - lr_scheduler_type: cosine
+- num_epochs: 5
 ### Training results
+| Training Loss | Epoch  | Step | Validation Loss | Accuracy |
+|:-------------:|:------:|:----:|:---------------:|:--------:|
+| No log        | 1.0    | 97   | 1.5356          | 0.6095   |
+| 2.3002        | 2.0    | 194  | 1.3083          | 0.6656   |
+| 1.5053        | 3.0    | 291  | 1.2135          | 0.6822   |
+| 1.3177        | 4.0    | 388  | 1.1702          | 0.6846   |
+| 1.1984        | 4.9550 | 480  | 1.1651          | 0.6854   |
 ### Framework versions
+- Transformers 4.50.3
+- Pytorch 2.6.0+cu124
 - Datasets 3.5.0
+- Tokenizers 0.21.1

config.json CHANGED Viewed

@@ -1,5 +1,4 @@
 {
-  "_name_or_path": "microsoft/deberta-v3-base",
   "architectures": [
     "DebertaV2ForSequenceClassification"
   ],
@@ -286,7 +285,7 @@
   "relative_attention": true,
   "share_att_key": true,
   "torch_dtype": "float32",
-  "transformers_version": "4.48.3",
   "type_vocab_size": 0,
   "vocab_size": 128100
 }

 {
   "architectures": [
     "DebertaV2ForSequenceClassification"
   ],
   "relative_attention": true,
   "share_att_key": true,
   "torch_dtype": "float32",
+  "transformers_version": "4.50.3",
   "type_vocab_size": 0,
   "vocab_size": 128100
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7f3fb104f4ad4cff08e2ea25577c49fbeaddfcc3e35e1c4d32e3404a53f72735
 size 738100720

 version https://git-lfs.github.com/spec/v1
+oid sha256:a8b7d00bce34e155430689ef88afc5c1e584376baa041d826656bc856329ad72
 size 738100720

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b4c90c3ee118220b93359f38e74e9dd3ff5792a4636d790cbb5edad798236fff
-size 5304

 version https://git-lfs.github.com/spec/v1
+oid sha256:d6ce3aab3f6393c7f4627d687668ded8e3aa2050a9dada8059243cbbbfc504ea
+size 5368