Upload 7 files

Browse files

Files changed (7) hide show

config.json +55 -0
model.safetensors +3 -0
optimizer.pt +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
trainer_state.json +455 -0
training_args.bin +3 -0

config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "_name_or_path": "IDEA-CCNL/Erlangshen-DeBERTa-v2-320M-Chinese",
+  "architectures": [
+    "DebertaV2ForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "conv_act": "gelu",
+  "conv_kernel_size": 3,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2",
+    "3": "LABEL_3",
+    "4": "LABEL_4",
+    "5": "LABEL_5",
+    "6": "LABEL_6"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2,
+    "LABEL_3": 3,
+    "LABEL_4": 4,
+    "LABEL_5": 5,
+    "LABEL_6": 6
+  },
+  "layer_norm_eps": 1e-07,
+  "max_position_embeddings": 512,
+  "max_relative_positions": -1,
+  "model_type": "deberta-v2",
+  "norm_rel_ebd": "layer_norm",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "pad_token_id": 0,
+  "pooler_dropout": 0,
+  "pooler_hidden_act": "gelu",
+  "pooler_hidden_size": 1024,
+  "pos_att_type": [
+    "c2p",
+    "p2c"
+  ],
+  "position_biased_input": false,
+  "position_buckets": 256,
+  "relative_attention": true,
+  "share_att_key": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.39.3",
+  "type_vocab_size": 0,
+  "vocab_size": 12800
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d25aa6140ed4b2d72ce0f4e5d1d88c168938042a3b58e634ed8160b51e0ca083
+size 1280651436

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4bfdccdb1d1ae0e5544a7a20f5becd4c6192a9c7f336dccdf15c070a62410f9d
+size 2561537340

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:28489139e1e95de773495fd7a149c1d0795a7f2a165720f3a2989ae391bd644e
+size 14244

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8b8f05025975eda8c6fd7151df997f2fdf1334b654149418d27c46930a65f882
+size 1064

trainer_state.json ADDED Viewed

	@@ -0,0 +1,455 @@

+{
+  "best_metric": 0.8554500158408583,
+  "best_model_checkpoint": "cn_output/run-0/checkpoint-5775",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 5775,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.05,
+      "grad_norm": 43.52214813232422,
+      "learning_rate": 2.7348179693000015e-05,
+      "loss": 1.6704,
+      "step": 100
+    },
+    {
+      "epoch": 0.1,
+      "grad_norm": 16.620838165283203,
+      "learning_rate": 2.6866273442903096e-05,
+      "loss": 1.3854,
+      "step": 200
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 14.869579315185547,
+      "learning_rate": 2.638436719280618e-05,
+      "loss": 1.2394,
+      "step": 300
+    },
+    {
+      "epoch": 0.21,
+      "grad_norm": 16.870759963989258,
+      "learning_rate": 2.5902460942709264e-05,
+      "loss": 1.1392,
+      "step": 400
+    },
+    {
+      "epoch": 0.26,
+      "grad_norm": 12.891343116760254,
+      "learning_rate": 2.542055469261235e-05,
+      "loss": 1.0319,
+      "step": 500
+    },
+    {
+      "epoch": 0.31,
+      "grad_norm": 7.518686771392822,
+      "learning_rate": 2.493864844251543e-05,
+      "loss": 1.0726,
+      "step": 600
+    },
+    {
+      "epoch": 0.36,
+      "grad_norm": 9.59931755065918,
+      "learning_rate": 2.4456742192418514e-05,
+      "loss": 1.0907,
+      "step": 700
+    },
+    {
+      "epoch": 0.42,
+      "grad_norm": 15.09188461303711,
+      "learning_rate": 2.3974835942321598e-05,
+      "loss": 1.038,
+      "step": 800
+    },
+    {
+      "epoch": 0.47,
+      "grad_norm": 12.715774536132812,
+      "learning_rate": 2.3492929692224682e-05,
+      "loss": 0.9121,
+      "step": 900
+    },
+    {
+      "epoch": 0.52,
+      "grad_norm": 23.419095993041992,
+      "learning_rate": 2.3011023442127766e-05,
+      "loss": 0.8541,
+      "step": 1000
+    },
+    {
+      "epoch": 0.57,
+      "grad_norm": 24.277725219726562,
+      "learning_rate": 2.252911719203085e-05,
+      "loss": 0.8723,
+      "step": 1100
+    },
+    {
+      "epoch": 0.62,
+      "grad_norm": 22.929729461669922,
+      "learning_rate": 2.2047210941933934e-05,
+      "loss": 0.8625,
+      "step": 1200
+    },
+    {
+      "epoch": 0.68,
+      "grad_norm": 12.207918167114258,
+      "learning_rate": 2.1565304691837015e-05,
+      "loss": 0.8384,
+      "step": 1300
+    },
+    {
+      "epoch": 0.73,
+      "grad_norm": 26.28716278076172,
+      "learning_rate": 2.10833984417401e-05,
+      "loss": 0.8521,
+      "step": 1400
+    },
+    {
+      "epoch": 0.78,
+      "grad_norm": 9.733681678771973,
+      "learning_rate": 2.0601492191643184e-05,
+      "loss": 0.7984,
+      "step": 1500
+    },
+    {
+      "epoch": 0.83,
+      "grad_norm": 25.5473690032959,
+      "learning_rate": 2.0119585941546264e-05,
+      "loss": 0.7452,
+      "step": 1600
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 28.462318420410156,
+      "learning_rate": 1.963767969144935e-05,
+      "loss": 0.7016,
+      "step": 1700
+    },
+    {
+      "epoch": 0.94,
+      "grad_norm": 30.851057052612305,
+      "learning_rate": 1.9155773441352433e-05,
+      "loss": 0.6933,
+      "step": 1800
+    },
+    {
+      "epoch": 0.99,
+      "grad_norm": 20.66396141052246,
+      "learning_rate": 1.8673867191255517e-05,
+      "loss": 0.6987,
+      "step": 1900
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.7933753943217665,
+      "eval_f1": 0.7855091479380132,
+      "eval_loss": 0.635335385799408,
+      "eval_runtime": 15.4807,
+      "eval_samples_per_second": 122.863,
+      "eval_steps_per_second": 3.876,
+      "step": 1925
+    },
+    {
+      "epoch": 1.04,
+      "grad_norm": 18.86337661743164,
+      "learning_rate": 1.8191960941158598e-05,
+      "loss": 0.5131,
+      "step": 2000
+    },
+    {
+      "epoch": 1.09,
+      "grad_norm": 23.430830001831055,
+      "learning_rate": 1.7710054691061682e-05,
+      "loss": 0.4268,
+      "step": 2100
+    },
+    {
+      "epoch": 1.14,
+      "grad_norm": 11.48133373260498,
+      "learning_rate": 1.7228148440964766e-05,
+      "loss": 0.4388,
+      "step": 2200
+    },
+    {
+      "epoch": 1.19,
+      "grad_norm": 21.7901668548584,
+      "learning_rate": 1.674624219086785e-05,
+      "loss": 0.4276,
+      "step": 2300
+    },
+    {
+      "epoch": 1.25,
+      "grad_norm": 3.37796688079834,
+      "learning_rate": 1.6264335940770934e-05,
+      "loss": 0.3975,
+      "step": 2400
+    },
+    {
+      "epoch": 1.3,
+      "grad_norm": 1.8390240669250488,
+      "learning_rate": 1.578242969067402e-05,
+      "loss": 0.4863,
+      "step": 2500
+    },
+    {
+      "epoch": 1.35,
+      "grad_norm": 0.9483298063278198,
+      "learning_rate": 1.5300523440577103e-05,
+      "loss": 0.4216,
+      "step": 2600
+    },
+    {
+      "epoch": 1.4,
+      "grad_norm": 13.241854667663574,
+      "learning_rate": 1.4818617190480184e-05,
+      "loss": 0.4522,
+      "step": 2700
+    },
+    {
+      "epoch": 1.45,
+      "grad_norm": 13.804593086242676,
+      "learning_rate": 1.4336710940383268e-05,
+      "loss": 0.3998,
+      "step": 2800
+    },
+    {
+      "epoch": 1.51,
+      "grad_norm": 20.864044189453125,
+      "learning_rate": 1.3854804690286352e-05,
+      "loss": 0.3561,
+      "step": 2900
+    },
+    {
+      "epoch": 1.56,
+      "grad_norm": 11.546530723571777,
+      "learning_rate": 1.3372898440189434e-05,
+      "loss": 0.4525,
+      "step": 3000
+    },
+    {
+      "epoch": 1.61,
+      "grad_norm": 21.35649871826172,
+      "learning_rate": 1.2890992190092519e-05,
+      "loss": 0.422,
+      "step": 3100
+    },
+    {
+      "epoch": 1.66,
+      "grad_norm": 9.798705101013184,
+      "learning_rate": 1.2409085939995601e-05,
+      "loss": 0.3685,
+      "step": 3200
+    },
+    {
+      "epoch": 1.71,
+      "grad_norm": 1.4076740741729736,
+      "learning_rate": 1.1927179689898684e-05,
+      "loss": 0.3969,
+      "step": 3300
+    },
+    {
+      "epoch": 1.77,
+      "grad_norm": 2.5313684940338135,
+      "learning_rate": 1.1445273439801768e-05,
+      "loss": 0.3632,
+      "step": 3400
+    },
+    {
+      "epoch": 1.82,
+      "grad_norm": 4.284488677978516,
+      "learning_rate": 1.0963367189704852e-05,
+      "loss": 0.3758,
+      "step": 3500
+    },
+    {
+      "epoch": 1.87,
+      "grad_norm": 87.45575714111328,
+      "learning_rate": 1.0481460939607936e-05,
+      "loss": 0.3407,
+      "step": 3600
+    },
+    {
+      "epoch": 1.92,
+      "grad_norm": 18.49854850769043,
+      "learning_rate": 9.999554689511019e-06,
+      "loss": 0.4158,
+      "step": 3700
+    },
+    {
+      "epoch": 1.97,
+      "grad_norm": 10.284146308898926,
+      "learning_rate": 9.517648439414103e-06,
+      "loss": 0.3669,
+      "step": 3800
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.8496319663512093,
+      "eval_f1": 0.8396298133039037,
+      "eval_loss": 0.6180713176727295,
+      "eval_runtime": 15.5038,
+      "eval_samples_per_second": 122.679,
+      "eval_steps_per_second": 3.87,
+      "step": 3850
+    },
+    {
+      "epoch": 2.03,
+      "grad_norm": 2.4232983589172363,
+      "learning_rate": 9.035742189317185e-06,
+      "loss": 0.2593,
+      "step": 3900
+    },
+    {
+      "epoch": 2.08,
+      "grad_norm": 14.904269218444824,
+      "learning_rate": 8.553835939220268e-06,
+      "loss": 0.1672,
+      "step": 4000
+    },
+    {
+      "epoch": 2.13,
+      "grad_norm": 0.02660948596894741,
+      "learning_rate": 8.071929689123352e-06,
+      "loss": 0.1319,
+      "step": 4100
+    },
+    {
+      "epoch": 2.18,
+      "grad_norm": 0.1543588936328888,
+      "learning_rate": 7.590023439026435e-06,
+      "loss": 0.1873,
+      "step": 4200
+    },
+    {
+      "epoch": 2.23,
+      "grad_norm": 0.04116074740886688,
+      "learning_rate": 7.108117188929519e-06,
+      "loss": 0.1706,
+      "step": 4300
+    },
+    {
+      "epoch": 2.29,
+      "grad_norm": 21.272350311279297,
+      "learning_rate": 6.626210938832602e-06,
+      "loss": 0.2408,
+      "step": 4400
+    },
+    {
+      "epoch": 2.34,
+      "grad_norm": 0.016322173178195953,
+      "learning_rate": 6.144304688735686e-06,
+      "loss": 0.1878,
+      "step": 4500
+    },
+    {
+      "epoch": 2.39,
+      "grad_norm": 0.12828181684017181,
+      "learning_rate": 5.662398438638769e-06,
+      "loss": 0.1664,
+      "step": 4600
+    },
+    {
+      "epoch": 2.44,
+      "grad_norm": 20.833871841430664,
+      "learning_rate": 5.180492188541853e-06,
+      "loss": 0.1066,
+      "step": 4700
+    },
+    {
+      "epoch": 2.49,
+      "grad_norm": 0.006888058967888355,
+      "learning_rate": 4.698585938444936e-06,
+      "loss": 0.1298,
+      "step": 4800
+    },
+    {
+      "epoch": 2.55,
+      "grad_norm": 0.0369136743247509,
+      "learning_rate": 4.21667968834802e-06,
+      "loss": 0.1151,
+      "step": 4900
+    },
+    {
+      "epoch": 2.6,
+      "grad_norm": 0.13510233163833618,
+      "learning_rate": 3.7347734382511036e-06,
+      "loss": 0.173,
+      "step": 5000
+    },
+    {
+      "epoch": 2.65,
+      "grad_norm": 0.05811930075287819,
+      "learning_rate": 3.252867188154187e-06,
+      "loss": 0.1359,
+      "step": 5100
+    },
+    {
+      "epoch": 2.7,
+      "grad_norm": 0.07640138268470764,
+      "learning_rate": 2.7709609380572702e-06,
+      "loss": 0.1293,
+      "step": 5200
+    },
+    {
+      "epoch": 2.75,
+      "grad_norm": 0.019262025132775307,
+      "learning_rate": 2.289054687960354e-06,
+      "loss": 0.1205,
+      "step": 5300
+    },
+    {
+      "epoch": 2.81,
+      "grad_norm": 16.199689865112305,
+      "learning_rate": 1.8071484378634369e-06,
+      "loss": 0.1713,
+      "step": 5400
+    },
+    {
+      "epoch": 2.86,
+      "grad_norm": 0.010484320111572742,
+      "learning_rate": 1.3252421877665204e-06,
+      "loss": 0.145,
+      "step": 5500
+    },
+    {
+      "epoch": 2.91,
+      "grad_norm": 0.06876770406961441,
+      "learning_rate": 8.43335937669604e-07,
+      "loss": 0.1405,
+      "step": 5600
+    },
+    {
+      "epoch": 2.96,
+      "grad_norm": 0.3863673210144043,
+      "learning_rate": 3.6142968757268745e-07,
+      "loss": 0.1295,
+      "step": 5700
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.8654048370136698,
+      "eval_f1": 0.8554500158408583,
+      "eval_loss": 0.7277432680130005,
+      "eval_runtime": 15.4912,
+      "eval_samples_per_second": 122.779,
+      "eval_steps_per_second": 3.873,
+      "step": 5775
+    }
+  ],
+  "logging_steps": 100,
+  "max_steps": 5775,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "total_flos": 4219575531135264.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 2.783008594309693e-05,
+    "lr_scheduler_type": "linear",
+    "num_train_epochs": 3,
+    "per_device_train_batch_size": 8
+  }
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7f29ef3614e8dc39f7548f4c40ef000ff35bec5715582ffcddd443752ddbd2a5
+size 4920