pixas commited on
Commit
08d8702
·
verified ·
1 Parent(s): fa36636

update meds3 prm

Browse files
README.md CHANGED
@@ -3,7 +3,7 @@ license: mit
3
  language:
4
  - en
5
  base_model:
6
- - meta-llama/Llama-3.1-8B-Instruct
7
  pipeline_tag: token-classification
8
  ---
9
 
@@ -18,7 +18,8 @@ pipeline_tag: token-classification
18
  </div>
19
 
20
  # <span>Introduction</span>
21
- **MedSSS-PRM** is a the PRM model designed for slow-thinking medical reasoning. It will assign a `[0-1]` float value for every internal reasoning step of **MedSSS-Policy**.
 
22
 
23
  For more information, visit our GitHub repository:
24
  [https://github.com/pixas/MedSSS](https://github.com/pixas/MedSSS).
@@ -28,7 +29,7 @@ For more information, visit our GitHub repository:
28
 
29
  # <span>Usage</span>
30
  We build the PRM model as a LoRA adapter, which saves the memory to use it.
31
- As this LoRA adapter is built on `Meta-Llama3.1-8B-Instruct`, you need to first prepare the base model in your platform.
32
 
33
  ```python
34
 
 
3
  language:
4
  - en
5
  base_model:
6
+ - pixas/MedSSS_Policy
7
  pipeline_tag: token-classification
8
  ---
9
 
 
18
  </div>
19
 
20
  # <span>Introduction</span>
21
+ **MedSSS-PRM** is trained with the newly proposed soft dual-sided object, designed for identifying intermediate erroneous steps within a correct medical reasoning trajectory.
22
+ It will assign a `[0-1]` float value for every internal reasoning step of **MedSSS-Policy**.
23
 
24
  For more information, visit our GitHub repository:
25
  [https://github.com/pixas/MedSSS](https://github.com/pixas/MedSSS).
 
29
 
30
  # <span>Usage</span>
31
  We build the PRM model as a LoRA adapter, which saves the memory to use it.
32
+ As this LoRA adapter is built on `pixas/MedSSS_Policy`, you need to first prepare the base model in your platform.
33
 
34
  ```python
35
 
adapter_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct",
5
  "bias": "none",
6
  "eva_config": null,
7
  "exclude_modules": null,
@@ -12,7 +12,7 @@
12
  "layers_pattern": null,
13
  "layers_to_transform": null,
14
  "loftq_config": {},
15
- "lora_alpha": 128,
16
  "lora_bias": false,
17
  "lora_dropout": 0.05,
18
  "megatron_config": null,
@@ -22,12 +22,17 @@
22
  "score"
23
  ],
24
  "peft_type": "LORA",
25
- "r": 64,
26
  "rank_pattern": {},
27
  "revision": null,
28
  "target_modules": [
 
 
 
 
 
29
  "v_proj",
30
- "q_proj"
31
  ],
32
  "task_type": "TOKEN_CLS",
33
  "use_dora": false,
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "/mnt/petrelfs/jiangshuyang.p/checkpoints/llama318b_mcts_vllm_mix16_500_data_filter_all_trial5/sft_2-llama3.1-8b-r16a32-1epoch-DPO-full-ITER2-ls",
5
  "bias": "none",
6
  "eva_config": null,
7
  "exclude_modules": null,
 
12
  "layers_pattern": null,
13
  "layers_to_transform": null,
14
  "loftq_config": {},
15
+ "lora_alpha": 64,
16
  "lora_bias": false,
17
  "lora_dropout": 0.05,
18
  "megatron_config": null,
 
22
  "score"
23
  ],
24
  "peft_type": "LORA",
25
+ "r": 32,
26
  "rank_pattern": {},
27
  "revision": null,
28
  "target_modules": [
29
+ "gate_proj",
30
+ "q_proj",
31
+ "o_proj",
32
+ "k_proj",
33
+ "up_proj",
34
  "v_proj",
35
+ "down_proj"
36
  ],
37
  "task_type": "TOKEN_CLS",
38
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3340b25f93f2bfa2bd95f01fc4ff130686c6efc2faa96c706e02994e9f793660
3
- size 54559892
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08fed9970f6b2263bc68201de1c3303d682fc086cf515548dc8115bf6df41074
3
+ size 167849276
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.9986332574031891,
3
+ "eval_accuracy": 0.821273964131107,
4
+ "eval_loss": 0.4168959856033325,
5
+ "eval_runtime": 28.3489,
6
+ "eval_samples_per_second": 37.956,
7
+ "eval_steps_per_second": 2.399
8
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.9986332574031891,
3
+ "eval_accuracy": 0.821273964131107,
4
+ "eval_loss": 0.4168959856033325,
5
+ "eval_runtime": 28.3489,
6
+ "eval_samples_per_second": 37.956,
7
+ "eval_steps_per_second": 2.399
8
+ }
special_tokens_map.json CHANGED
@@ -13,5 +13,11 @@
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
- "pad_token": "<|finetune_right_pad_id|>"
 
 
 
 
 
 
17
  }
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "pad_token": {
17
+ "content": "<|finetune_right_pad_id|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
  }
tokenizer_config.json CHANGED
@@ -2058,7 +2058,7 @@
2058
  "input_ids",
2059
  "attention_mask"
2060
  ],
2061
- "model_max_length": 131072,
2062
  "pad_token": "<|finetune_right_pad_id|>",
2063
  "tokenizer_class": "PreTrainedTokenizerFast"
2064
  }
 
2058
  "input_ids",
2059
  "attention_mask"
2060
  ],
2061
+ "model_max_length": 8192,
2062
  "pad_token": "<|finetune_right_pad_id|>",
2063
  "tokenizer_class": "PreTrainedTokenizerFast"
2064
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28a1adb4b457f9e1305d26234a51ce0224e3acf838b70e42ea896b9c9327d0bb
3
- size 6904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a690a86361bdcb312b71031075d518b9464ce5d988237be18d12f28686017ef
3
+ size 7352