update meds3 prm

Files changed (8) hide show

README.md CHANGED Viewed

@@ -3,7 +3,7 @@ license: mit
 language:
 - en
 base_model:
-- meta-llama/Llama-3.1-8B-Instruct
 pipeline_tag: token-classification
 ---
@@ -18,7 +18,8 @@ pipeline_tag: token-classification
 </div>
 # <span>Introduction</span>
-**MedSSS-PRM** is a the PRM model designed for slow-thinking medical reasoning. It will assign a `[0-1]` float value for every internal reasoning step of **MedSSS-Policy**.
 For more information, visit our GitHub repository:
 [https://github.com/pixas/MedSSS](https://github.com/pixas/MedSSS).
@@ -28,7 +29,7 @@ For more information, visit our GitHub repository:
 # <span>Usage</span>
 We build the PRM model as a LoRA adapter, which saves the memory to use it.
-As this LoRA adapter is built on `Meta-Llama3.1-8B-Instruct`, you need to first prepare the base model in your platform.
 ```python

 language:
 - en
 base_model:
+- pixas/MedSSS_Policy
 pipeline_tag: token-classification
 ---
 </div>
 # <span>Introduction</span>
+**MedSSS-PRM** is trained with the newly proposed soft dual-sided object, designed for identifying intermediate erroneous steps within a correct medical reasoning trajectory.
+It will assign a `[0-1]` float value for every internal reasoning step of **MedSSS-Policy**.
 For more information, visit our GitHub repository:
 [https://github.com/pixas/MedSSS](https://github.com/pixas/MedSSS).
 # <span>Usage</span>
 We build the PRM model as a LoRA adapter, which saves the memory to use it.
+As this LoRA adapter is built on `pixas/MedSSS_Policy`, you need to first prepare the base model in your platform.
 ```python

adapter_config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "alpha_pattern": {},
   "auto_mapping": null,
-  "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct",
   "bias": "none",
   "eva_config": null,
   "exclude_modules": null,
@@ -12,7 +12,7 @@
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
-  "lora_alpha": 128,
   "lora_bias": false,
   "lora_dropout": 0.05,
   "megatron_config": null,
@@ -22,12 +22,17 @@
     "score"
   ],
   "peft_type": "LORA",
-  "r": 64,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "v_proj",
-    "q_proj"
   ],
   "task_type": "TOKEN_CLS",
   "use_dora": false,

 {
   "alpha_pattern": {},
   "auto_mapping": null,
+  "base_model_name_or_path": "/mnt/petrelfs/jiangshuyang.p/checkpoints/llama318b_mcts_vllm_mix16_500_data_filter_all_trial5/sft_2-llama3.1-8b-r16a32-1epoch-DPO-full-ITER2-ls",
   "bias": "none",
   "eva_config": null,
   "exclude_modules": null,
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
+  "lora_alpha": 64,
   "lora_bias": false,
   "lora_dropout": 0.05,
   "megatron_config": null,
     "score"
   ],
   "peft_type": "LORA",
+  "r": 32,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "gate_proj",
+    "q_proj",
+    "o_proj",
+    "k_proj",
+    "up_proj",
     "v_proj",
+    "down_proj"
   ],
   "task_type": "TOKEN_CLS",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3340b25f93f2bfa2bd95f01fc4ff130686c6efc2faa96c706e02994e9f793660
-size 54559892

 version https://git-lfs.github.com/spec/v1
+oid sha256:08fed9970f6b2263bc68201de1c3303d682fc086cf515548dc8115bf6df41074
+size 167849276

all_results.json ADDED Viewed

+{
+    "epoch": 0.9986332574031891,
+    "eval_accuracy": 0.821273964131107,
+    "eval_loss": 0.4168959856033325,
+    "eval_runtime": 28.3489,
+    "eval_samples_per_second": 37.956,
+    "eval_steps_per_second": 2.399
+}

eval_results.json ADDED Viewed

+{
+    "epoch": 0.9986332574031891,
+    "eval_accuracy": 0.821273964131107,
+    "eval_loss": 0.4168959856033325,
+    "eval_runtime": 28.3489,
+    "eval_samples_per_second": 37.956,
+    "eval_steps_per_second": 2.399
+}

special_tokens_map.json CHANGED Viewed

@@ -13,5 +13,11 @@
     "rstrip": false,
     "single_word": false
   },
-  "pad_token": "<|finetune_right_pad_id|>"
 }

     "rstrip": false,
     "single_word": false
   },
+  "pad_token": {
+    "content": "<|finetune_right_pad_id|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
 }

tokenizer_config.json CHANGED Viewed

@@ -2058,7 +2058,7 @@
     "input_ids",
     "attention_mask"
   ],
-  "model_max_length": 131072,
   "pad_token": "<|finetune_right_pad_id|>",
   "tokenizer_class": "PreTrainedTokenizerFast"
 }

     "input_ids",
     "attention_mask"
   ],
+  "model_max_length": 8192,
   "pad_token": "<|finetune_right_pad_id|>",
   "tokenizer_class": "PreTrainedTokenizerFast"
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:28a1adb4b457f9e1305d26234a51ce0224e3acf838b70e42ea896b9c9327d0bb
-size 6904

 version https://git-lfs.github.com/spec/v1
+oid sha256:4a690a86361bdcb312b71031075d518b9464ce5d988237be18d12f28686017ef
+size 7352