SimCSE step 2000 on scandi wiki, llama-swe-scandi-mntp basemodel

Browse files

Files changed (3) hide show

adapter_config.json +37 -0
adapter_model.safetensors +3 -0
trainer_state.json +77 -0

adapter_config.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": {
+    "base_model_class": "LlamaBiModel",
+    "parent_library": "llm2vec.models.bidirectional_llama"
+  },
+  "base_model_name_or_path": "AI-Sweden-Models/Llama-3-8B-instruct",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "q_proj",
+    "up_proj",
+    "gate_proj",
+    "down_proj",
+    "k_proj",
+    "o_proj"
+  ],
+  "task_type": null,
+  "use_dora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e1c7e8f033945574be4fc3dab0ab91cf635b24812ac5e3d09041c54ac8bd6800
+size 167829552

trainer_state.json ADDED Viewed

	@@ -0,0 +1,77 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.2559836170485089,
+  "eval_steps": 500,
+  "global_step": 2000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.031997952131063614,
+      "grad_norm": 0.06517554074525833,
+      "learning_rate": 2.9680020478689362e-05,
+      "loss": 0.2761,
+      "step": 250
+    },
+    {
+      "epoch": 0.06399590426212723,
+      "grad_norm": 0.05590095743536949,
+      "learning_rate": 2.936004095737873e-05,
+      "loss": 0.0085,
+      "step": 500
+    },
+    {
+      "epoch": 0.09599385639319083,
+      "grad_norm": 0.06787142902612686,
+      "learning_rate": 2.9040061436068092e-05,
+      "loss": 0.007,
+      "step": 750
+    },
+    {
+      "epoch": 0.12799180852425446,
+      "grad_norm": 0.019182974472641945,
+      "learning_rate": 2.8720081914757454e-05,
+      "loss": 0.0063,
+      "step": 1000
+    },
+    {
+      "epoch": 0.15998976065531806,
+      "grad_norm": 0.02994903363287449,
+      "learning_rate": 2.840010239344682e-05,
+      "loss": 0.0056,
+      "step": 1250
+    },
+    {
+      "epoch": 0.19198771278638166,
+      "grad_norm": 0.016078654676675797,
+      "learning_rate": 2.8080122872136184e-05,
+      "loss": 0.0056,
+      "step": 1500
+    },
+    {
+      "epoch": 0.2239856649174453,
+      "grad_norm": 0.0162887554615736,
+      "learning_rate": 2.776014335082555e-05,
+      "loss": 0.0051,
+      "step": 1750
+    },
+    {
+      "epoch": 0.2559836170485089,
+      "grad_norm": 0.024330032989382744,
+      "learning_rate": 2.744016382951491e-05,
+      "loss": 0.0053,
+      "step": 2000
+    }
+  ],
+  "logging_steps": 250,
+  "max_steps": 23439,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "total_flos": 0.0,
+  "train_batch_size": 128,
+  "trial_name": null,
+  "trial_params": null
+}