commit files to HF hub

Browse files

Files changed (13) hide show

added_tokens.json +1 -0
config.json +43 -0
merges.txt +0 -0
optimizer.pt +3 -0
pytorch_model.bin +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
special_tokens_map.json +1 -0
tokenizer.json +0 -0
tokenizer_config.json +1 -0
trainer_state.json +346 -0
training_args.bin +3 -0
vocab.json +0 -0

added_tokens.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"[noise-Negative]": 50273, "[food-Positive]": 50266, "[food-unknown]": 50267, "[food-Negative]": 50265, "[ambiance-no majority]": 50272, "[food-no majority]": 50268, "[ambiance-unknown]": 50271, "[service-no majority]": 50280, "[ambiance-Positive]": 50270, "[noise-no majority]": 50276, "[noise-Positive]": 50274, "[ambiance-Negative]": 50269, "[service-unknown]": 50279, "[service-Positive]": 50278, "[service-Negative]": 50277, "[noise-unknown]": 50275}

config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "_name_or_path": "CEBaB/roberta-base.CEBaB.sa.5-class.exclusive.seed_77",
+  "architectures": [
+    "RobertaForFactualCounterfactualSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "finetuning_task": "opentable",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": 0,
+    "1": 1,
+    "2": 2,
+    "3": 3,
+    "4": 4
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "0": 0,
+    "1": 1,
+    "2": 2,
+    "3": 3,
+    "4": 4
+  },
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.18.0",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 50281
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:21117e659614a2d214d65fc3d3b13d8e1bbae260fa63b41fcfbd2aac1405cdcd
+size 997410589

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:edcb5d74319d87f2c70b5bc0b4e888269af981b85b270dc57065810c4522393e
+size 498718701

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0798213e7495550bec47fde7dbd22cf8a0b3dce6d71f10d1939fe385776d8557
+size 15523

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ed0cd99a63f5bdba91ff272dbf57c5399fbbc30c2343f83e10b20c4185b2a019
+size 623

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}, "additional_special_tokens": ["[food-Negative]", "[food-Positive]", "[food-unknown]", "[food-no majority]", "[ambiance-Negative]", "[ambiance-Positive]", "[ambiance-unknown]", "[ambiance-no majority]", "[noise-Negative]", "[noise-Positive]", "[noise-unknown]", "[noise-no majority]", "[service-Negative]", "[service-Positive]", "[service-unknown]", "[service-no majority]"]}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"errors": "replace", "bos_token": "<s>", "eos_token": "</s>", "sep_token": "</s>", "cls_token": "<s>", "unk_token": "<unk>", "pad_token": "<pad>", "mask_token": "<mask>", "add_prefix_space": false, "trim_offsets": true, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "roberta-base", "tokenizer_class": "RobertaTokenizer"}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,346 @@

+{
+  "best_metric": 0.45347079634666443,
+  "best_model_checkpoint": "model_output/e2e_opentable_5_way__approximate__0-shot__seed-77__roberta-base/checkpoint-500",
+  "epoch": 0.8576329331046312,
+  "global_step": 500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.03,
+      "learning_rate": 4.9783362218370885e-05,
+      "loss": 16.4408,
+      "step": 20
+    },
+    {
+      "epoch": 0.07,
+      "learning_rate": 4.956672443674177e-05,
+      "loss": 13.3709,
+      "step": 40
+    },
+    {
+      "epoch": 0.09,
+      "eval_distillation_accuracy_counterfactual": 0.3304258594150847,
+      "eval_distillation_accuracy_factual": 0.85351462288353,
+      "eval_distillation_f1_counterfactual": 0.243675431005225,
+      "eval_distillation_f1_factual": 0.8405624277794843,
+      "eval_groundtruth_accuracy_counterfactual": 0.3258081067213956,
+      "eval_groundtruth_f1_counterfactual": 0.24573702955510618,
+      "eval_groundtruth_f1_factual": 0.7133962491941664,
+      "eval_icace_cosine": 0.5112652778625488,
+      "eval_icace_l2": 0.8116109371185303,
+      "eval_icace_normdiff": 0.5299729108810425,
+      "eval_loss": 10.612378120422363,
+      "eval_runtime": 12.8754,
+      "eval_samples_per_second": 302.748,
+      "eval_steps_per_second": 2.408,
+      "step": 50
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 4.935008665511265e-05,
+      "loss": 11.6048,
+      "step": 60
+    },
+    {
+      "epoch": 0.14,
+      "learning_rate": 4.913344887348354e-05,
+      "loss": 11.1238,
+      "step": 80
+    },
+    {
+      "epoch": 0.17,
+      "learning_rate": 4.891681109185442e-05,
+      "loss": 10.5122,
+      "step": 100
+    },
+    {
+      "epoch": 0.17,
+      "eval_distillation_accuracy_counterfactual": 0.5010261672652643,
+      "eval_distillation_accuracy_factual": 0.8737814263724987,
+      "eval_distillation_f1_counterfactual": 0.3809463454212182,
+      "eval_distillation_f1_factual": 0.8520946306923461,
+      "eval_groundtruth_accuracy_counterfactual": 0.47998973832734737,
+      "eval_groundtruth_f1_counterfactual": 0.36220047443175507,
+      "eval_groundtruth_f1_factual": 0.7127756432715844,
+      "eval_icace_cosine": 0.46907880902290344,
+      "eval_icace_l2": 0.6739468574523926,
+      "eval_icace_normdiff": 0.44848179817199707,
+      "eval_loss": 8.415631294250488,
+      "eval_runtime": 12.9894,
+      "eval_samples_per_second": 300.091,
+      "eval_steps_per_second": 2.387,
+      "step": 100
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 4.8700173310225307e-05,
+      "loss": 10.4539,
+      "step": 120
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 4.848353552859619e-05,
+      "loss": 10.4598,
+      "step": 140
+    },
+    {
+      "epoch": 0.26,
+      "eval_distillation_accuracy_counterfactual": 0.5100051308363264,
+      "eval_distillation_accuracy_factual": 0.8648024628014367,
+      "eval_distillation_f1_counterfactual": 0.4378381593277977,
+      "eval_distillation_f1_factual": 0.8505368829186162,
+      "eval_groundtruth_accuracy_counterfactual": 0.4979476654694715,
+      "eval_groundtruth_f1_counterfactual": 0.42720537829821525,
+      "eval_groundtruth_f1_factual": 0.6962013496902839,
+      "eval_icace_cosine": 0.4693450629711151,
+      "eval_icace_l2": 0.6681450009346008,
+      "eval_icace_normdiff": 0.41851383447647095,
+      "eval_loss": 8.433810234069824,
+      "eval_runtime": 12.8826,
+      "eval_samples_per_second": 302.578,
+      "eval_steps_per_second": 2.406,
+      "step": 150
+    },
+    {
+      "epoch": 0.27,
+      "learning_rate": 4.826689774696707e-05,
+      "loss": 10.219,
+      "step": 160
+    },
+    {
+      "epoch": 0.31,
+      "learning_rate": 4.8050259965337955e-05,
+      "loss": 10.1164,
+      "step": 180
+    },
+    {
+      "epoch": 0.34,
+      "learning_rate": 4.7833622183708845e-05,
+      "loss": 10.1494,
+      "step": 200
+    },
+    {
+      "epoch": 0.34,
+      "eval_distillation_accuracy_counterfactual": 0.48691636736788096,
+      "eval_distillation_accuracy_factual": 0.8858388917393535,
+      "eval_distillation_f1_counterfactual": 0.3500647816598059,
+      "eval_distillation_f1_factual": 0.8801680477409537,
+      "eval_groundtruth_accuracy_counterfactual": 0.46793227296049256,
+      "eval_groundtruth_f1_counterfactual": 0.3413720392476469,
+      "eval_groundtruth_f1_factual": 0.696679629826118,
+      "eval_icace_cosine": 0.4742945730686188,
+      "eval_icace_l2": 0.6967979073524475,
+      "eval_icace_normdiff": 0.4476469159126282,
+      "eval_loss": 8.772797584533691,
+      "eval_runtime": 13.0849,
+      "eval_samples_per_second": 297.901,
+      "eval_steps_per_second": 2.369,
+      "step": 200
+    },
+    {
+      "epoch": 0.38,
+      "learning_rate": 4.761698440207972e-05,
+      "loss": 9.9977,
+      "step": 220
+    },
+    {
+      "epoch": 0.41,
+      "learning_rate": 4.740034662045061e-05,
+      "loss": 10.0073,
+      "step": 240
+    },
+    {
+      "epoch": 0.43,
+      "eval_distillation_accuracy_counterfactual": 0.43560800410466904,
+      "eval_distillation_accuracy_factual": 0.8519753719856337,
+      "eval_distillation_f1_counterfactual": 0.35530824798064053,
+      "eval_distillation_f1_factual": 0.8365066372023451,
+      "eval_groundtruth_accuracy_counterfactual": 0.43766033863519754,
+      "eval_groundtruth_f1_counterfactual": 0.364318999243899,
+      "eval_groundtruth_f1_factual": 0.6915401170991118,
+      "eval_icace_cosine": 0.48830506205558777,
+      "eval_icace_l2": 0.7442983388900757,
+      "eval_icace_normdiff": 0.4745895266532898,
+      "eval_loss": 9.400979042053223,
+      "eval_runtime": 12.983,
+      "eval_samples_per_second": 300.24,
+      "eval_steps_per_second": 2.388,
+      "step": 250
+    },
+    {
+      "epoch": 0.45,
+      "learning_rate": 4.7183708838821494e-05,
+      "loss": 9.7738,
+      "step": 260
+    },
+    {
+      "epoch": 0.48,
+      "learning_rate": 4.6967071057192376e-05,
+      "loss": 9.9329,
+      "step": 280
+    },
+    {
+      "epoch": 0.51,
+      "learning_rate": 4.675043327556326e-05,
+      "loss": 9.7779,
+      "step": 300
+    },
+    {
+      "epoch": 0.51,
+      "eval_distillation_accuracy_counterfactual": 0.5359158542842484,
+      "eval_distillation_accuracy_factual": 0.8619805028219599,
+      "eval_distillation_f1_counterfactual": 0.47616723466717426,
+      "eval_distillation_f1_factual": 0.852650039087773,
+      "eval_groundtruth_accuracy_counterfactual": 0.5010261672652643,
+      "eval_groundtruth_f1_counterfactual": 0.44417532302390744,
+      "eval_groundtruth_f1_factual": 0.7073800556776944,
+      "eval_icace_cosine": 0.46865805983543396,
+      "eval_icace_l2": 0.6562190651893616,
+      "eval_icace_normdiff": 0.4228982925415039,
+      "eval_loss": 8.363639831542969,
+      "eval_runtime": 13.0389,
+      "eval_samples_per_second": 298.951,
+      "eval_steps_per_second": 2.377,
+      "step": 300
+    },
+    {
+      "epoch": 0.55,
+      "learning_rate": 4.653379549393415e-05,
+      "loss": 9.6834,
+      "step": 320
+    },
+    {
+      "epoch": 0.58,
+      "learning_rate": 4.6317157712305025e-05,
+      "loss": 9.547,
+      "step": 340
+    },
+    {
+      "epoch": 0.6,
+      "eval_distillation_accuracy_counterfactual": 0.5025654181631606,
+      "eval_distillation_accuracy_factual": 0.8855823499230374,
+      "eval_distillation_f1_counterfactual": 0.4398602753096072,
+      "eval_distillation_f1_factual": 0.8717503260210726,
+      "eval_groundtruth_accuracy_counterfactual": 0.4989738327347358,
+      "eval_groundtruth_f1_counterfactual": 0.44441757184034925,
+      "eval_groundtruth_f1_factual": 0.7229789758781793,
+      "eval_icace_cosine": 0.4650568664073944,
+      "eval_icace_l2": 0.6811135411262512,
+      "eval_icace_normdiff": 0.4423196017742157,
+      "eval_loss": 8.341341972351074,
+      "eval_runtime": 13.1584,
+      "eval_samples_per_second": 296.238,
+      "eval_steps_per_second": 2.356,
+      "step": 350
+    },
+    {
+      "epoch": 0.62,
+      "learning_rate": 4.6100519930675915e-05,
+      "loss": 9.4159,
+      "step": 360
+    },
+    {
+      "epoch": 0.65,
+      "learning_rate": 4.58838821490468e-05,
+      "loss": 9.6264,
+      "step": 380
+    },
+    {
+      "epoch": 0.69,
+      "learning_rate": 4.566724436741768e-05,
+      "loss": 9.2833,
+      "step": 400
+    },
+    {
+      "epoch": 0.69,
+      "eval_distillation_accuracy_counterfactual": 0.5495125705489995,
+      "eval_distillation_accuracy_factual": 0.8494099538224731,
+      "eval_distillation_f1_counterfactual": 0.48234074968553386,
+      "eval_distillation_f1_factual": 0.834773556914336,
+      "eval_groundtruth_accuracy_counterfactual": 0.5266803488968702,
+      "eval_groundtruth_f1_counterfactual": 0.4613217521128874,
+      "eval_groundtruth_f1_factual": 0.7080154702931097,
+      "eval_icace_cosine": 0.4618784785270691,
+      "eval_icace_l2": 0.6433539986610413,
+      "eval_icace_normdiff": 0.40616923570632935,
+      "eval_loss": 8.343696594238281,
+      "eval_runtime": 15.6191,
+      "eval_samples_per_second": 249.565,
+      "eval_steps_per_second": 1.985,
+      "step": 400
+    },
+    {
+      "epoch": 0.72,
+      "learning_rate": 4.5450606585788563e-05,
+      "loss": 9.3389,
+      "step": 420
+    },
+    {
+      "epoch": 0.75,
+      "learning_rate": 4.5233968804159446e-05,
+      "loss": 9.2901,
+      "step": 440
+    },
+    {
+      "epoch": 0.77,
+      "eval_distillation_accuracy_counterfactual": 0.4989738327347358,
+      "eval_distillation_accuracy_factual": 0.8601847101077476,
+      "eval_distillation_f1_counterfactual": 0.33860612154049907,
+      "eval_distillation_f1_factual": 0.8490636185423319,
+      "eval_groundtruth_accuracy_counterfactual": 0.4892252437147255,
+      "eval_groundtruth_f1_counterfactual": 0.33971729579938115,
+      "eval_groundtruth_f1_factual": 0.7307035270822468,
+      "eval_icace_cosine": 0.475065141916275,
+      "eval_icace_l2": 0.6995478272438049,
+      "eval_icace_normdiff": 0.440874308347702,
+      "eval_loss": 8.762495994567871,
+      "eval_runtime": 13.0872,
+      "eval_samples_per_second": 297.848,
+      "eval_steps_per_second": 2.369,
+      "step": 450
+    },
+    {
+      "epoch": 0.79,
+      "learning_rate": 4.501733102253033e-05,
+      "loss": 9.3355,
+      "step": 460
+    },
+    {
+      "epoch": 0.82,
+      "learning_rate": 4.480069324090121e-05,
+      "loss": 9.1845,
+      "step": 480
+    },
+    {
+      "epoch": 0.86,
+      "learning_rate": 4.45840554592721e-05,
+      "loss": 9.2082,
+      "step": 500
+    },
+    {
+      "epoch": 0.86,
+      "eval_distillation_accuracy_counterfactual": 0.5823499230374551,
+      "eval_distillation_accuracy_factual": 0.8514622883530015,
+      "eval_distillation_f1_counterfactual": 0.49132364435744985,
+      "eval_distillation_f1_factual": 0.8334607656933123,
+      "eval_groundtruth_accuracy_counterfactual": 0.5482298614674191,
+      "eval_groundtruth_f1_counterfactual": 0.46237812507137493,
+      "eval_groundtruth_f1_factual": 0.7129235668323691,
+      "eval_icace_cosine": 0.45347079634666443,
+      "eval_icace_l2": 0.6185017824172974,
+      "eval_icace_normdiff": 0.3950257897377014,
+      "eval_loss": 8.00942611694336,
+      "eval_runtime": 13.0722,
+      "eval_samples_per_second": 298.191,
+      "eval_steps_per_second": 2.371,
+      "step": 500
+    }
+  ],
+  "max_steps": 4616,
+  "num_train_epochs": 8,
+  "total_flos": 4209890279424000.0,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1673c25df66ab430cf5640069391befa925c7fce30623a5c929351ed4e140928
+size 3183

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff