ugaoo commited on
Commit
960771d
·
verified ·
1 Parent(s): 54497b3

Upload folder using huggingface_hub

Browse files
Files changed (50) hide show
  1. merge_universal.sh +44 -0
  2. merged_models/checkpoint-2611/added_tokens.json +24 -0
  3. merged_models/checkpoint-2611/config.json +28 -0
  4. merged_models/checkpoint-2611/generation_config.json +14 -0
  5. merged_models/checkpoint-2611/merges.txt +0 -0
  6. merged_models/checkpoint-2611/model-00001-of-00004.safetensors +3 -0
  7. merged_models/checkpoint-2611/model-00002-of-00004.safetensors +3 -0
  8. merged_models/checkpoint-2611/model-00003-of-00004.safetensors +3 -0
  9. merged_models/checkpoint-2611/model-00004-of-00004.safetensors +3 -0
  10. merged_models/checkpoint-2611/model.safetensors.index.json +346 -0
  11. merged_models/checkpoint-2611/special_tokens_map.json +31 -0
  12. merged_models/checkpoint-2611/tokenizer.json +3 -0
  13. merged_models/checkpoint-2611/tokenizer_config.json +207 -0
  14. merged_models/checkpoint-2611/vocab.json +0 -0
  15. merged_models/checkpoint-5222/added_tokens.json +24 -0
  16. merged_models/checkpoint-5222/config.json +28 -0
  17. merged_models/checkpoint-5222/generation_config.json +14 -0
  18. merged_models/checkpoint-5222/merges.txt +0 -0
  19. merged_models/checkpoint-5222/model-00001-of-00004.safetensors +3 -0
  20. merged_models/checkpoint-5222/model-00002-of-00004.safetensors +3 -0
  21. merged_models/checkpoint-5222/model-00003-of-00004.safetensors +3 -0
  22. merged_models/checkpoint-5222/model-00004-of-00004.safetensors +3 -0
  23. merged_models/checkpoint-5222/model.safetensors.index.json +346 -0
  24. merged_models/checkpoint-5222/special_tokens_map.json +31 -0
  25. merged_models/checkpoint-5222/tokenizer.json +3 -0
  26. merged_models/checkpoint-5222/tokenizer_config.json +207 -0
  27. merged_models/checkpoint-5222/vocab.json +0 -0
  28. merged_models/eval.py +122 -0
  29. merged_models/eval.sh +17 -0
  30. merged_models/model_metrics.csv +3 -0
  31. merged_models/results/checkpoint-2611/results_2025-03-17T19-53-01.924844.json +578 -0
  32. merged_models/results/checkpoint-2611/samples_medmcqa_2025-03-17T19-53-01.924844.jsonl +0 -0
  33. merged_models/results/checkpoint-2611/samples_medqa_4options_2025-03-17T19-53-01.924844.jsonl +0 -0
  34. merged_models/results/checkpoint-2611/samples_mmlu_anatomy_2025-03-17T19-53-01.924844.jsonl +0 -0
  35. merged_models/results/checkpoint-2611/samples_mmlu_clinical_knowledge_2025-03-17T19-53-01.924844.jsonl +0 -0
  36. merged_models/results/checkpoint-2611/samples_mmlu_college_biology_2025-03-17T19-53-01.924844.jsonl +0 -0
  37. merged_models/results/checkpoint-2611/samples_mmlu_college_medicine_2025-03-17T19-53-01.924844.jsonl +0 -0
  38. merged_models/results/checkpoint-2611/samples_mmlu_medical_genetics_2025-03-17T19-53-01.924844.jsonl +0 -0
  39. merged_models/results/checkpoint-2611/samples_mmlu_professional_medicine_2025-03-17T19-53-01.924844.jsonl +0 -0
  40. merged_models/results/checkpoint-2611/samples_pubmedqa_2025-03-17T19-53-01.924844.jsonl +0 -0
  41. merged_models/results/checkpoint-5222/results_2025-03-17T19-54-40.045633.json +578 -0
  42. merged_models/results/checkpoint-5222/samples_medmcqa_2025-03-17T19-54-40.045633.jsonl +0 -0
  43. merged_models/results/checkpoint-5222/samples_medqa_4options_2025-03-17T19-54-40.045633.jsonl +0 -0
  44. merged_models/results/checkpoint-5222/samples_mmlu_anatomy_2025-03-17T19-54-40.045633.jsonl +0 -0
  45. merged_models/results/checkpoint-5222/samples_mmlu_clinical_knowledge_2025-03-17T19-54-40.045633.jsonl +0 -0
  46. merged_models/results/checkpoint-5222/samples_mmlu_college_biology_2025-03-17T19-54-40.045633.jsonl +0 -0
  47. merged_models/results/checkpoint-5222/samples_mmlu_college_medicine_2025-03-17T19-54-40.045633.jsonl +0 -0
  48. merged_models/results/checkpoint-5222/samples_mmlu_medical_genetics_2025-03-17T19-54-40.045633.jsonl +0 -0
  49. merged_models/results/checkpoint-5222/samples_mmlu_professional_medicine_2025-03-17T19-54-40.045633.jsonl +0 -0
  50. merged_models/results/checkpoint-5222/samples_pubmedqa_2025-03-17T19-54-40.045633.jsonl +0 -0
merge_universal.sh ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Get current directory
4
+ CURRENT_DIR=$(pwd)
5
+
6
+ # Create merged_models directory if it doesn't exist
7
+ mkdir -p "${CURRENT_DIR}/merged_models"
8
+
9
+ # Find all checkpoint directories
10
+ for checkpoint_dir in checkpoint-*; do
11
+ # Skip if not a directory
12
+ if [ ! -d "$checkpoint_dir" ]; then
13
+ continue
14
+ fi
15
+
16
+ # Extract checkpoint name
17
+ checkpoint_name=$(basename "$checkpoint_dir")
18
+
19
+ # Create output directory
20
+ mkdir -p "${CURRENT_DIR}/merged_models/${checkpoint_name}"
21
+
22
+ echo "Processing ${checkpoint_name}..."
23
+
24
+ # Run the merge command
25
+ python3 -m axolotl.cli.merge_lora model.yml \
26
+ --lora_model_dir="./${checkpoint_name}" \
27
+ --output_dir="${CURRENT_DIR}/merged_models/${checkpoint_name}"
28
+
29
+ # Check if merge was successful
30
+ if [ -d "${CURRENT_DIR}/merged_models/${checkpoint_name}/merged" ]; then
31
+ # Move all files from merged directory up one level
32
+ mv "${CURRENT_DIR}/merged_models/${checkpoint_name}/merged"/* \
33
+ "${CURRENT_DIR}/merged_models/${checkpoint_name}/"
34
+
35
+ # Remove empty merged directory
36
+ rmdir "${CURRENT_DIR}/merged_models/${checkpoint_name}/merged"
37
+
38
+ echo "Successfully processed ${checkpoint_name}"
39
+ else
40
+ echo "Error processing ${checkpoint_name}"
41
+ fi
42
+ done
43
+
44
+ echo "All checkpoints processed!"
merged_models/checkpoint-2611/added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
merged_models/checkpoint-2611/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ugaoo/peft_x8_7B",
3
+ "architectures": [
4
+ "Qwen2ForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "eos_token_id": 151645,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 3584,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 18944,
12
+ "max_position_embeddings": 32768,
13
+ "max_window_layers": 28,
14
+ "model_type": "qwen2",
15
+ "num_attention_heads": 28,
16
+ "num_hidden_layers": 28,
17
+ "num_key_value_heads": 4,
18
+ "rms_norm_eps": 1e-06,
19
+ "rope_scaling": null,
20
+ "rope_theta": 1000000.0,
21
+ "sliding_window": null,
22
+ "tie_word_embeddings": false,
23
+ "torch_dtype": "bfloat16",
24
+ "transformers_version": "4.46.3",
25
+ "use_cache": false,
26
+ "use_sliding_window": false,
27
+ "vocab_size": 152064
28
+ }
merged_models/checkpoint-2611/generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "repetition_penalty": 1.05,
10
+ "temperature": 0.7,
11
+ "top_k": 20,
12
+ "top_p": 0.8,
13
+ "transformers_version": "4.46.3"
14
+ }
merged_models/checkpoint-2611/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
merged_models/checkpoint-2611/model-00001-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e98b69564ba087ad5970831c6f865301e635e903c74799d131e1908cc94ceadd
3
+ size 4877660776
merged_models/checkpoint-2611/model-00002-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d35e415281f3a48d65cc0cbc1b2ca23e4cc2c4e0101d672e034a3a6291c7ad66
3
+ size 4932751008
merged_models/checkpoint-2611/model-00003-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2d8974ebe9687eb8558a318a0196ec3f1d236d1bc0c54918697d6e6f18c7c3d
3
+ size 4330865200
merged_models/checkpoint-2611/model-00004-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06006972c3be88e8a44fe21cfe2b0472b130780c781a741f8f90f1fe5ba3aae2
3
+ size 1089994880
merged_models/checkpoint-2611/model.safetensors.index.json ADDED
@@ -0,0 +1,346 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 15231233024
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "model-00004-of-00004.safetensors",
7
+ "model.embed_tokens.weight": "model-00001-of-00004.safetensors",
8
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
9
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
10
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
11
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
12
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
13
+ "model.layers.0.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
14
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
15
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
16
+ "model.layers.0.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
17
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
18
+ "model.layers.0.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
19
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
20
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
21
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
22
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
23
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
24
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
25
+ "model.layers.1.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
26
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
27
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
28
+ "model.layers.1.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
29
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
30
+ "model.layers.1.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
31
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
32
+ "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
33
+ "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
34
+ "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
35
+ "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
36
+ "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
37
+ "model.layers.10.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
38
+ "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
39
+ "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
40
+ "model.layers.10.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
41
+ "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
42
+ "model.layers.10.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
43
+ "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
44
+ "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
45
+ "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
46
+ "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
47
+ "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
48
+ "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
49
+ "model.layers.11.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
50
+ "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
51
+ "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
52
+ "model.layers.11.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
53
+ "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
54
+ "model.layers.11.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
55
+ "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
56
+ "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
57
+ "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
58
+ "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
59
+ "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
60
+ "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
61
+ "model.layers.12.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
62
+ "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
63
+ "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
64
+ "model.layers.12.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
65
+ "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
66
+ "model.layers.12.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
67
+ "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
68
+ "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
69
+ "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
70
+ "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
71
+ "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
72
+ "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
73
+ "model.layers.13.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
74
+ "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
75
+ "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
76
+ "model.layers.13.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
77
+ "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
78
+ "model.layers.13.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
79
+ "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
80
+ "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
81
+ "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
82
+ "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
83
+ "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
84
+ "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
85
+ "model.layers.14.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
86
+ "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
87
+ "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
88
+ "model.layers.14.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
89
+ "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
90
+ "model.layers.14.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
91
+ "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
92
+ "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
93
+ "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
94
+ "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
95
+ "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
96
+ "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
97
+ "model.layers.15.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
98
+ "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
99
+ "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
100
+ "model.layers.15.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
101
+ "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
102
+ "model.layers.15.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
103
+ "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
104
+ "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
105
+ "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
106
+ "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
107
+ "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
108
+ "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
109
+ "model.layers.16.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
110
+ "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
111
+ "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
112
+ "model.layers.16.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
113
+ "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
114
+ "model.layers.16.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
115
+ "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
116
+ "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
117
+ "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
118
+ "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
119
+ "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
120
+ "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
121
+ "model.layers.17.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
122
+ "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
123
+ "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
124
+ "model.layers.17.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
125
+ "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
126
+ "model.layers.17.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
127
+ "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
128
+ "model.layers.18.input_layernorm.weight": "model-00003-of-00004.safetensors",
129
+ "model.layers.18.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
130
+ "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
131
+ "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
132
+ "model.layers.18.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
133
+ "model.layers.18.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
134
+ "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
135
+ "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
136
+ "model.layers.18.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
137
+ "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
138
+ "model.layers.18.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
139
+ "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
140
+ "model.layers.19.input_layernorm.weight": "model-00003-of-00004.safetensors",
141
+ "model.layers.19.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
142
+ "model.layers.19.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
143
+ "model.layers.19.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
144
+ "model.layers.19.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
145
+ "model.layers.19.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
146
+ "model.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
147
+ "model.layers.19.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
148
+ "model.layers.19.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
149
+ "model.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
150
+ "model.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
151
+ "model.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
152
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
153
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
154
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
155
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
156
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
157
+ "model.layers.2.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
158
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
159
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
160
+ "model.layers.2.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
161
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
162
+ "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
163
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
164
+ "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors",
165
+ "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
166
+ "model.layers.20.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
167
+ "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
168
+ "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
169
+ "model.layers.20.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
170
+ "model.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
171
+ "model.layers.20.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
172
+ "model.layers.20.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
173
+ "model.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
174
+ "model.layers.20.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
175
+ "model.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
176
+ "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
177
+ "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
178
+ "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
179
+ "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
180
+ "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
181
+ "model.layers.21.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
182
+ "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
183
+ "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
184
+ "model.layers.21.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
185
+ "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
186
+ "model.layers.21.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
187
+ "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
188
+ "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
189
+ "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
190
+ "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
191
+ "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
192
+ "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
193
+ "model.layers.22.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
194
+ "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
195
+ "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
196
+ "model.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
197
+ "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
198
+ "model.layers.22.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
199
+ "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
200
+ "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
201
+ "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
202
+ "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
203
+ "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
204
+ "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
205
+ "model.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
206
+ "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
207
+ "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
208
+ "model.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
209
+ "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
210
+ "model.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
211
+ "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
212
+ "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
213
+ "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
214
+ "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
215
+ "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
216
+ "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
217
+ "model.layers.24.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
218
+ "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
219
+ "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
220
+ "model.layers.24.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
221
+ "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
222
+ "model.layers.24.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
223
+ "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
224
+ "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
225
+ "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
226
+ "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
227
+ "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
228
+ "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
229
+ "model.layers.25.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
230
+ "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
231
+ "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
232
+ "model.layers.25.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
233
+ "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
234
+ "model.layers.25.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
235
+ "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
236
+ "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
237
+ "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
238
+ "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
239
+ "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
240
+ "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
241
+ "model.layers.26.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
242
+ "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
243
+ "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
244
+ "model.layers.26.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
245
+ "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
246
+ "model.layers.26.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
247
+ "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
248
+ "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
249
+ "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
250
+ "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
251
+ "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
252
+ "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
253
+ "model.layers.27.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
254
+ "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
255
+ "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
256
+ "model.layers.27.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
257
+ "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
258
+ "model.layers.27.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
259
+ "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
260
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
261
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
262
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
263
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
264
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
265
+ "model.layers.3.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
266
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
267
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
268
+ "model.layers.3.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
269
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
270
+ "model.layers.3.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
271
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
272
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
273
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
274
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
275
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
276
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
277
+ "model.layers.4.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
278
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
279
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
280
+ "model.layers.4.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
281
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
282
+ "model.layers.4.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
283
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
284
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
285
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
286
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
287
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
288
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
289
+ "model.layers.5.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
290
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
291
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
292
+ "model.layers.5.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
293
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
294
+ "model.layers.5.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
295
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
296
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
297
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
298
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
299
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
300
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
301
+ "model.layers.6.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
302
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
303
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
304
+ "model.layers.6.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
305
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
306
+ "model.layers.6.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
307
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
308
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
309
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
310
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
311
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
312
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
313
+ "model.layers.7.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
314
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
315
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
316
+ "model.layers.7.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
317
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
318
+ "model.layers.7.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
319
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
320
+ "model.layers.8.input_layernorm.weight": "model-00002-of-00004.safetensors",
321
+ "model.layers.8.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
322
+ "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
323
+ "model.layers.8.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
324
+ "model.layers.8.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
325
+ "model.layers.8.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
326
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
327
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
328
+ "model.layers.8.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
329
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
330
+ "model.layers.8.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
331
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
332
+ "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
333
+ "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
334
+ "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
335
+ "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
336
+ "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
337
+ "model.layers.9.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
338
+ "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
339
+ "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
340
+ "model.layers.9.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
341
+ "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
342
+ "model.layers.9.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
343
+ "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
344
+ "model.norm.weight": "model-00003-of-00004.safetensors"
345
+ }
346
+ }
merged_models/checkpoint-2611/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
merged_models/checkpoint-2611/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
3
+ size 11421896
merged_models/checkpoint-2611/tokenizer_config.json ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
+ ],
197
+ "bos_token": null,
198
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
199
+ "clean_up_tokenization_spaces": false,
200
+ "eos_token": "<|im_end|>",
201
+ "errors": "replace",
202
+ "model_max_length": 131072,
203
+ "pad_token": "<|endoftext|>",
204
+ "split_special_tokens": false,
205
+ "tokenizer_class": "Qwen2Tokenizer",
206
+ "unk_token": null
207
+ }
merged_models/checkpoint-2611/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
merged_models/checkpoint-5222/added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
merged_models/checkpoint-5222/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ugaoo/peft_x8_7B",
3
+ "architectures": [
4
+ "Qwen2ForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "eos_token_id": 151645,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 3584,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 18944,
12
+ "max_position_embeddings": 32768,
13
+ "max_window_layers": 28,
14
+ "model_type": "qwen2",
15
+ "num_attention_heads": 28,
16
+ "num_hidden_layers": 28,
17
+ "num_key_value_heads": 4,
18
+ "rms_norm_eps": 1e-06,
19
+ "rope_scaling": null,
20
+ "rope_theta": 1000000.0,
21
+ "sliding_window": null,
22
+ "tie_word_embeddings": false,
23
+ "torch_dtype": "bfloat16",
24
+ "transformers_version": "4.46.3",
25
+ "use_cache": false,
26
+ "use_sliding_window": false,
27
+ "vocab_size": 152064
28
+ }
merged_models/checkpoint-5222/generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "repetition_penalty": 1.05,
10
+ "temperature": 0.7,
11
+ "top_k": 20,
12
+ "top_p": 0.8,
13
+ "transformers_version": "4.46.3"
14
+ }
merged_models/checkpoint-5222/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
merged_models/checkpoint-5222/model-00001-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f0439d0a715d2218172e6733042a6a14122ec96279e80314ec15bf20d85f011
3
+ size 4877660776
merged_models/checkpoint-5222/model-00002-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58e244636229ed9c20be627998e24534b32de54a8d5aa0570ae23cdc3f880a89
3
+ size 4932751008
merged_models/checkpoint-5222/model-00003-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c362eef088cd8f3341a31d1d6ae14ec0d11de0634b569c1d278ef36b93173e91
3
+ size 4330865200
merged_models/checkpoint-5222/model-00004-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06006972c3be88e8a44fe21cfe2b0472b130780c781a741f8f90f1fe5ba3aae2
3
+ size 1089994880
merged_models/checkpoint-5222/model.safetensors.index.json ADDED
@@ -0,0 +1,346 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 15231233024
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "model-00004-of-00004.safetensors",
7
+ "model.embed_tokens.weight": "model-00001-of-00004.safetensors",
8
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
9
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
10
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
11
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
12
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
13
+ "model.layers.0.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
14
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
15
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
16
+ "model.layers.0.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
17
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
18
+ "model.layers.0.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
19
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
20
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
21
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
22
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
23
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
24
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
25
+ "model.layers.1.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
26
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
27
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
28
+ "model.layers.1.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
29
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
30
+ "model.layers.1.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
31
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
32
+ "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
33
+ "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
34
+ "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
35
+ "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
36
+ "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
37
+ "model.layers.10.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
38
+ "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
39
+ "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
40
+ "model.layers.10.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
41
+ "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
42
+ "model.layers.10.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
43
+ "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
44
+ "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
45
+ "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
46
+ "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
47
+ "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
48
+ "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
49
+ "model.layers.11.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
50
+ "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
51
+ "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
52
+ "model.layers.11.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
53
+ "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
54
+ "model.layers.11.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
55
+ "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
56
+ "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
57
+ "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
58
+ "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
59
+ "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
60
+ "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
61
+ "model.layers.12.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
62
+ "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
63
+ "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
64
+ "model.layers.12.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
65
+ "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
66
+ "model.layers.12.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
67
+ "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
68
+ "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
69
+ "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
70
+ "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
71
+ "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
72
+ "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
73
+ "model.layers.13.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
74
+ "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
75
+ "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
76
+ "model.layers.13.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
77
+ "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
78
+ "model.layers.13.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
79
+ "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
80
+ "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
81
+ "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
82
+ "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
83
+ "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
84
+ "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
85
+ "model.layers.14.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
86
+ "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
87
+ "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
88
+ "model.layers.14.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
89
+ "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
90
+ "model.layers.14.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
91
+ "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
92
+ "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
93
+ "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
94
+ "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
95
+ "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
96
+ "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
97
+ "model.layers.15.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
98
+ "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
99
+ "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
100
+ "model.layers.15.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
101
+ "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
102
+ "model.layers.15.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
103
+ "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
104
+ "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
105
+ "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
106
+ "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
107
+ "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
108
+ "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
109
+ "model.layers.16.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
110
+ "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
111
+ "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
112
+ "model.layers.16.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
113
+ "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
114
+ "model.layers.16.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
115
+ "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
116
+ "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
117
+ "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
118
+ "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
119
+ "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
120
+ "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
121
+ "model.layers.17.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
122
+ "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
123
+ "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
124
+ "model.layers.17.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
125
+ "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
126
+ "model.layers.17.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
127
+ "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
128
+ "model.layers.18.input_layernorm.weight": "model-00003-of-00004.safetensors",
129
+ "model.layers.18.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
130
+ "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
131
+ "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
132
+ "model.layers.18.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
133
+ "model.layers.18.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
134
+ "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
135
+ "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
136
+ "model.layers.18.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
137
+ "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
138
+ "model.layers.18.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
139
+ "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
140
+ "model.layers.19.input_layernorm.weight": "model-00003-of-00004.safetensors",
141
+ "model.layers.19.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
142
+ "model.layers.19.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
143
+ "model.layers.19.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
144
+ "model.layers.19.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
145
+ "model.layers.19.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
146
+ "model.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
147
+ "model.layers.19.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
148
+ "model.layers.19.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
149
+ "model.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
150
+ "model.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
151
+ "model.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
152
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
153
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
154
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
155
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
156
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
157
+ "model.layers.2.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
158
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
159
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
160
+ "model.layers.2.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
161
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
162
+ "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
163
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
164
+ "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors",
165
+ "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
166
+ "model.layers.20.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
167
+ "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
168
+ "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
169
+ "model.layers.20.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
170
+ "model.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
171
+ "model.layers.20.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
172
+ "model.layers.20.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
173
+ "model.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
174
+ "model.layers.20.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
175
+ "model.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
176
+ "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
177
+ "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
178
+ "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
179
+ "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
180
+ "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
181
+ "model.layers.21.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
182
+ "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
183
+ "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
184
+ "model.layers.21.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
185
+ "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
186
+ "model.layers.21.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
187
+ "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
188
+ "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
189
+ "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
190
+ "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
191
+ "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
192
+ "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
193
+ "model.layers.22.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
194
+ "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
195
+ "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
196
+ "model.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
197
+ "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
198
+ "model.layers.22.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
199
+ "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
200
+ "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
201
+ "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
202
+ "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
203
+ "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
204
+ "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
205
+ "model.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
206
+ "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
207
+ "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
208
+ "model.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
209
+ "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
210
+ "model.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
211
+ "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
212
+ "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
213
+ "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
214
+ "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
215
+ "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
216
+ "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
217
+ "model.layers.24.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
218
+ "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
219
+ "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
220
+ "model.layers.24.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
221
+ "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
222
+ "model.layers.24.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
223
+ "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
224
+ "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
225
+ "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
226
+ "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
227
+ "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
228
+ "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
229
+ "model.layers.25.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
230
+ "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
231
+ "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
232
+ "model.layers.25.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
233
+ "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
234
+ "model.layers.25.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
235
+ "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
236
+ "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
237
+ "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
238
+ "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
239
+ "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
240
+ "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
241
+ "model.layers.26.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
242
+ "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
243
+ "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
244
+ "model.layers.26.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
245
+ "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
246
+ "model.layers.26.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
247
+ "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
248
+ "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
249
+ "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
250
+ "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
251
+ "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
252
+ "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
253
+ "model.layers.27.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
254
+ "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
255
+ "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
256
+ "model.layers.27.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
257
+ "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
258
+ "model.layers.27.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
259
+ "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
260
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
261
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
262
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
263
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
264
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
265
+ "model.layers.3.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
266
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
267
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
268
+ "model.layers.3.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
269
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
270
+ "model.layers.3.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
271
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
272
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
273
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
274
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
275
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
276
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
277
+ "model.layers.4.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
278
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
279
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
280
+ "model.layers.4.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
281
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
282
+ "model.layers.4.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
283
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
284
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
285
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
286
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
287
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
288
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
289
+ "model.layers.5.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
290
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
291
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
292
+ "model.layers.5.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
293
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
294
+ "model.layers.5.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
295
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
296
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
297
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
298
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
299
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
300
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
301
+ "model.layers.6.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
302
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
303
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
304
+ "model.layers.6.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
305
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
306
+ "model.layers.6.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
307
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
308
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
309
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
310
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
311
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
312
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
313
+ "model.layers.7.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
314
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
315
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
316
+ "model.layers.7.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
317
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
318
+ "model.layers.7.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
319
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
320
+ "model.layers.8.input_layernorm.weight": "model-00002-of-00004.safetensors",
321
+ "model.layers.8.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
322
+ "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
323
+ "model.layers.8.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
324
+ "model.layers.8.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
325
+ "model.layers.8.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
326
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
327
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
328
+ "model.layers.8.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
329
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
330
+ "model.layers.8.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
331
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
332
+ "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
333
+ "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
334
+ "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
335
+ "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
336
+ "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
337
+ "model.layers.9.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
338
+ "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
339
+ "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
340
+ "model.layers.9.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
341
+ "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
342
+ "model.layers.9.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
343
+ "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
344
+ "model.norm.weight": "model-00003-of-00004.safetensors"
345
+ }
346
+ }
merged_models/checkpoint-5222/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
merged_models/checkpoint-5222/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
3
+ size 11421896
merged_models/checkpoint-5222/tokenizer_config.json ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
+ ],
197
+ "bos_token": null,
198
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
199
+ "clean_up_tokenization_spaces": false,
200
+ "eos_token": "<|im_end|>",
201
+ "errors": "replace",
202
+ "model_max_length": 131072,
203
+ "pad_token": "<|endoftext|>",
204
+ "split_special_tokens": false,
205
+ "tokenizer_class": "Qwen2Tokenizer",
206
+ "unk_token": null
207
+ }
merged_models/checkpoint-5222/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
merged_models/eval.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import pandas as pd
4
+ from typing import Dict, List
5
+ import argparse
6
+
7
+ # Add this at the beginning of the script
8
+ parser = argparse.ArgumentParser(description='Process model results')
9
+ parser.add_argument('--modelname', type=str, help='Model name to use as prefix')
10
+ args = parser.parse_args()
11
+
12
+ def find_result_files(root_dir: str) -> List[tuple]:
13
+ """
14
+ Find all results JSON files and their corresponding checkpoint numbers.
15
+ Returns list of (checkpoint_number, file_path) tuples.
16
+ """
17
+ result_files = []
18
+
19
+ for root, dirs, files in os.walk(root_dir):
20
+ if 'result' in root.lower():
21
+ for file in files:
22
+ if file.startswith('result') and file.endswith('.json'):
23
+ # Extract checkpoint number from path
24
+ checkpoint = None
25
+ path_parts = root.split(os.sep)
26
+ for part in path_parts:
27
+ if part.startswith('checkpoint-'):
28
+ checkpoint = part
29
+ break
30
+
31
+ if checkpoint:
32
+ result_files.append((checkpoint, os.path.join(root, file)))
33
+
34
+ return result_files
35
+
36
+ def extract_metrics(json_path: str) -> Dict[str, float]:
37
+ """
38
+ Extract specific accuracy metrics from a results JSON file and convert to percentages.
39
+ """
40
+ with open(json_path, 'r') as f:
41
+ data = json.load(f)
42
+
43
+ metrics = {}
44
+ target_tasks = [
45
+ 'medmcqa',
46
+ 'medqa_4options',
47
+ 'mmlu_anatomy',
48
+ 'mmlu_clinical_knowledge',
49
+ 'mmlu_college_biology',
50
+ 'mmlu_college_medicine',
51
+ 'mmlu_medical_genetics',
52
+ 'mmlu_professional_medicine',
53
+ 'pubmedqa'
54
+ ]
55
+
56
+ results = data.get('results', {})
57
+ for task in target_tasks:
58
+ if task in results:
59
+ # Convert to percentage and round to 3 decimal places
60
+ value = results[task].get('acc,none')
61
+ if value is not None:
62
+ metrics[task] = round(value * 100, 3)
63
+ else:
64
+ metrics[task] = None
65
+
66
+ # Calculate average of available metrics
67
+ valid_metrics = [v for v in metrics.values() if v is not None]
68
+ if valid_metrics:
69
+ metrics['average'] = round(sum(valid_metrics) / len(valid_metrics), 3)
70
+ else:
71
+ metrics['average'] = None
72
+
73
+ return metrics
74
+
75
+ def process_all_results(root_dir: str, output_file: str = 'model_metrics.csv', checkpoint_prefix: str = None):
76
+ """
77
+ Process all result files and create a CSV with metrics as percentages.
78
+
79
+ Parameters:
80
+ root_dir (str): Root directory to search for result files
81
+ output_file (str): Output CSV filename
82
+ checkpoint_prefix (str): Optional prefix to add before checkpoint numbers (e.g., "model_name_")
83
+ """
84
+ result_files = find_result_files(root_dir)
85
+ all_metrics = []
86
+
87
+ for checkpoint, file_path in result_files:
88
+ metrics = extract_metrics(file_path)
89
+ # Add prefix to checkpoint if provided
90
+ if checkpoint_prefix:
91
+ metrics['checkpoint'] = f"{checkpoint_prefix}{checkpoint}"
92
+ else:
93
+ metrics['checkpoint'] = checkpoint
94
+ all_metrics.append(metrics)
95
+
96
+ if all_metrics:
97
+ df = pd.DataFrame(all_metrics)
98
+ # Reorder columns to put checkpoint and average first
99
+ cols = ['checkpoint', 'average'] + [col for col in df.columns if col not in ['checkpoint', 'average']]
100
+ df = df[cols]
101
+
102
+ # Format float columns to 3 decimal places
103
+ float_cols = [col for col in df.columns if col != 'checkpoint']
104
+ for col in float_cols:
105
+ df[col] = df[col].apply(lambda x: f"{x:.3f}" if pd.notnull(x) else x)
106
+
107
+ df.to_csv(output_file, index=False)
108
+ print(f"Results saved to {output_file}")
109
+ else:
110
+ print("No result files found.")
111
+
112
+ # Usage examples
113
+ if __name__ == "__main__":
114
+ root_directory = "." # Replace with your root directory path
115
+
116
+ # Example 1: Without prefix (original behavior)
117
+ # process_all_results(root_directory)
118
+
119
+ process_all_results(
120
+ root_directory,
121
+ output_file='model_metrics.csv',
122
+ checkpoint_prefix=args.modelname)
merged_models/eval.sh ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Find all checkpoint directories and sort them numerically
4
+ for checkpoint in $(ls -d checkpoint-* | sort -t '-' -k2 -n); do
5
+ echo "Evaluating $checkpoint..."
6
+
7
+ lm_eval --model hf \
8
+ --model_args pretrained=$checkpoint,parallelize=True,trust_remote_code=True \
9
+ --tasks multimedqa \
10
+ --device cuda:0 \
11
+ --batch_size auto \
12
+ --output_path "results" \
13
+ --log_samples
14
+
15
+ echo "Finished evaluating $checkpoint"
16
+ echo "----------------------------------------"
17
+ done
merged_models/model_metrics.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ checkpoint,average,medmcqa,medqa_4options,mmlu_anatomy,mmlu_clinical_knowledge,mmlu_college_biology,mmlu_college_medicine,mmlu_medical_genetics,mmlu_professional_medicine,pubmedqa
2
+ checkpoint-2611,76.449,59.264,68.028,71.852,82.264,86.111,74.566,87.000,82.353,76.600
3
+ checkpoint-5222,76.163,57.805,67.321,70.370,81.132,87.500,73.988,88.000,82.353,77.000
merged_models/results/checkpoint-2611/results_2025-03-17T19-53-01.924844.json ADDED
@@ -0,0 +1,578 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "multimedqa": {
4
+ "acc,none": 0.6539389638041164,
5
+ "acc_stderr,none": 0.005579639659176411,
6
+ "alias": "multimedqa"
7
+ },
8
+ "medmcqa": {
9
+ "alias": " - medmcqa",
10
+ "acc,none": 0.5926368634950993,
11
+ "acc_stderr,none": 0.007597893115829174,
12
+ "acc_norm,none": 0.5926368634950993,
13
+ "acc_norm_stderr,none": 0.007597893115829174
14
+ },
15
+ "medqa_4options": {
16
+ "alias": " - medqa_4options",
17
+ "acc,none": 0.6802827965435978,
18
+ "acc_stderr,none": 0.013076279392845757,
19
+ "acc_norm,none": 0.6802827965435978,
20
+ "acc_norm_stderr,none": 0.013076279392845757
21
+ },
22
+ "mmlu_anatomy": {
23
+ "alias": " - anatomy (mmlu)",
24
+ "acc,none": 0.7185185185185186,
25
+ "acc_stderr,none": 0.03885004245800254
26
+ },
27
+ "mmlu_clinical_knowledge": {
28
+ "alias": " - clinical_knowledge (mmlu)",
29
+ "acc,none": 0.8226415094339623,
30
+ "acc_stderr,none": 0.023508739218846927
31
+ },
32
+ "mmlu_college_biology": {
33
+ "alias": " - college_biology (mmlu)",
34
+ "acc,none": 0.8611111111111112,
35
+ "acc_stderr,none": 0.02891980295613489
36
+ },
37
+ "mmlu_college_medicine": {
38
+ "alias": " - college_medicine (mmlu)",
39
+ "acc,none": 0.7456647398843931,
40
+ "acc_stderr,none": 0.0332055644308557
41
+ },
42
+ "mmlu_medical_genetics": {
43
+ "alias": " - medical_genetics (mmlu)",
44
+ "acc,none": 0.87,
45
+ "acc_stderr,none": 0.033799766898963086
46
+ },
47
+ "mmlu_professional_medicine": {
48
+ "alias": " - professional_medicine (mmlu)",
49
+ "acc,none": 0.8235294117647058,
50
+ "acc_stderr,none": 0.023157468308559345
51
+ },
52
+ "pubmedqa": {
53
+ "alias": " - pubmedqa",
54
+ "acc,none": 0.766,
55
+ "acc_stderr,none": 0.018952741564893707
56
+ }
57
+ },
58
+ "groups": {
59
+ "multimedqa": {
60
+ "acc,none": 0.6539389638041164,
61
+ "acc_stderr,none": 0.005579639659176411,
62
+ "alias": "multimedqa"
63
+ }
64
+ },
65
+ "group_subtasks": {
66
+ "multimedqa": [
67
+ "pubmedqa",
68
+ "medmcqa",
69
+ "medqa_4options",
70
+ "mmlu_anatomy",
71
+ "mmlu_clinical_knowledge",
72
+ "mmlu_college_medicine",
73
+ "mmlu_medical_genetics",
74
+ "mmlu_professional_medicine",
75
+ "mmlu_college_biology"
76
+ ]
77
+ },
78
+ "configs": {
79
+ "medmcqa": {
80
+ "task": "medmcqa",
81
+ "dataset_path": "medmcqa",
82
+ "training_split": "train",
83
+ "validation_split": "validation",
84
+ "test_split": "validation",
85
+ "doc_to_text": "def doc_to_text(doc) -> str:\n \"\"\"\n Question: <question>\n Choices:\n A. <choice1>\n B. <choice2>\n C. <choice3>\n D. <choice4>\n Answer:\n \"\"\"\n choices = [doc[\"opa\"], doc[\"opb\"], doc[\"opc\"], doc[\"opd\"]]\n option_choices = {\n \"A\": choices[0],\n \"B\": choices[1],\n \"C\": choices[2],\n \"D\": choices[3],\n }\n\n prompt = \"Question: \" + doc[\"question\"] + \"\\nChoices:\\n\"\n for choice, option in option_choices.items():\n prompt += f\"{choice.upper()}. {option}\\n\"\n prompt += \"Answer:\"\n return prompt\n",
86
+ "doc_to_target": "cop",
87
+ "doc_to_choice": [
88
+ "A",
89
+ "B",
90
+ "C",
91
+ "D"
92
+ ],
93
+ "description": "",
94
+ "target_delimiter": " ",
95
+ "fewshot_delimiter": "\n\n",
96
+ "num_fewshot": 0,
97
+ "metric_list": [
98
+ {
99
+ "metric": "acc",
100
+ "aggregation": "mean",
101
+ "higher_is_better": true
102
+ },
103
+ {
104
+ "metric": "acc_norm",
105
+ "aggregation": "mean",
106
+ "higher_is_better": true
107
+ }
108
+ ],
109
+ "output_type": "multiple_choice",
110
+ "repeats": 1,
111
+ "should_decontaminate": true,
112
+ "doc_to_decontamination_query": "{{question}}"
113
+ },
114
+ "medqa_4options": {
115
+ "task": "medqa_4options",
116
+ "dataset_path": "GBaker/MedQA-USMLE-4-options-hf",
117
+ "training_split": "train",
118
+ "validation_split": "validation",
119
+ "test_split": "test",
120
+ "doc_to_text": "def doc_to_text(doc) -> str:\n option_choices = {\n \"A\": doc[\"ending0\"],\n \"B\": doc[\"ending1\"],\n \"C\": doc[\"ending2\"],\n \"D\": doc[\"ending3\"],\n }\n answers = \"\".join((f\"{k}. {v}\\n\") for k, v in option_choices.items())\n return f\"Question: {doc['sent1']}\\n{answers}Answer:\"\n",
121
+ "doc_to_target": "def doc_to_target(doc) -> int:\n return doc[\"label\"]\n",
122
+ "doc_to_choice": [
123
+ "A",
124
+ "B",
125
+ "C",
126
+ "D"
127
+ ],
128
+ "description": "",
129
+ "target_delimiter": " ",
130
+ "fewshot_delimiter": "\n\n",
131
+ "num_fewshot": 0,
132
+ "metric_list": [
133
+ {
134
+ "metric": "acc",
135
+ "aggregation": "mean",
136
+ "higher_is_better": true
137
+ },
138
+ {
139
+ "metric": "acc_norm",
140
+ "aggregation": "mean",
141
+ "higher_is_better": true
142
+ }
143
+ ],
144
+ "output_type": "multiple_choice",
145
+ "repeats": 1,
146
+ "should_decontaminate": false
147
+ },
148
+ "mmlu_anatomy": {
149
+ "task": "mmlu_anatomy",
150
+ "task_alias": "anatomy (mmlu)",
151
+ "tag": "mmlu_stem_tasks",
152
+ "dataset_path": "hails/mmlu_no_train",
153
+ "dataset_name": "anatomy",
154
+ "dataset_kwargs": {
155
+ "trust_remote_code": true
156
+ },
157
+ "test_split": "test",
158
+ "fewshot_split": "dev",
159
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
160
+ "doc_to_target": "answer",
161
+ "doc_to_choice": [
162
+ "A",
163
+ "B",
164
+ "C",
165
+ "D"
166
+ ],
167
+ "description": "The following are multiple choice questions (with answers) about anatomy.\n\n",
168
+ "target_delimiter": " ",
169
+ "fewshot_delimiter": "\n\n",
170
+ "fewshot_config": {
171
+ "sampler": "first_n"
172
+ },
173
+ "num_fewshot": 0,
174
+ "metric_list": [
175
+ {
176
+ "metric": "acc",
177
+ "aggregation": "mean",
178
+ "higher_is_better": true
179
+ }
180
+ ],
181
+ "output_type": "multiple_choice",
182
+ "repeats": 1,
183
+ "should_decontaminate": false,
184
+ "metadata": {
185
+ "version": 1.0
186
+ }
187
+ },
188
+ "mmlu_clinical_knowledge": {
189
+ "task": "mmlu_clinical_knowledge",
190
+ "task_alias": "clinical_knowledge (mmlu)",
191
+ "tag": "mmlu_other_tasks",
192
+ "dataset_path": "hails/mmlu_no_train",
193
+ "dataset_name": "clinical_knowledge",
194
+ "dataset_kwargs": {
195
+ "trust_remote_code": true
196
+ },
197
+ "test_split": "test",
198
+ "fewshot_split": "dev",
199
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
200
+ "doc_to_target": "answer",
201
+ "doc_to_choice": [
202
+ "A",
203
+ "B",
204
+ "C",
205
+ "D"
206
+ ],
207
+ "description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n",
208
+ "target_delimiter": " ",
209
+ "fewshot_delimiter": "\n\n",
210
+ "fewshot_config": {
211
+ "sampler": "first_n"
212
+ },
213
+ "num_fewshot": 0,
214
+ "metric_list": [
215
+ {
216
+ "metric": "acc",
217
+ "aggregation": "mean",
218
+ "higher_is_better": true
219
+ }
220
+ ],
221
+ "output_type": "multiple_choice",
222
+ "repeats": 1,
223
+ "should_decontaminate": false,
224
+ "metadata": {
225
+ "version": 1.0
226
+ }
227
+ },
228
+ "mmlu_college_biology": {
229
+ "task": "mmlu_college_biology",
230
+ "task_alias": "college_biology (mmlu)",
231
+ "tag": "mmlu_stem_tasks",
232
+ "dataset_path": "hails/mmlu_no_train",
233
+ "dataset_name": "college_biology",
234
+ "dataset_kwargs": {
235
+ "trust_remote_code": true
236
+ },
237
+ "test_split": "test",
238
+ "fewshot_split": "dev",
239
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
240
+ "doc_to_target": "answer",
241
+ "doc_to_choice": [
242
+ "A",
243
+ "B",
244
+ "C",
245
+ "D"
246
+ ],
247
+ "description": "The following are multiple choice questions (with answers) about college biology.\n\n",
248
+ "target_delimiter": " ",
249
+ "fewshot_delimiter": "\n\n",
250
+ "fewshot_config": {
251
+ "sampler": "first_n"
252
+ },
253
+ "num_fewshot": 0,
254
+ "metric_list": [
255
+ {
256
+ "metric": "acc",
257
+ "aggregation": "mean",
258
+ "higher_is_better": true
259
+ }
260
+ ],
261
+ "output_type": "multiple_choice",
262
+ "repeats": 1,
263
+ "should_decontaminate": false,
264
+ "metadata": {
265
+ "version": 1.0
266
+ }
267
+ },
268
+ "mmlu_college_medicine": {
269
+ "task": "mmlu_college_medicine",
270
+ "task_alias": "college_medicine (mmlu)",
271
+ "tag": "mmlu_other_tasks",
272
+ "dataset_path": "hails/mmlu_no_train",
273
+ "dataset_name": "college_medicine",
274
+ "dataset_kwargs": {
275
+ "trust_remote_code": true
276
+ },
277
+ "test_split": "test",
278
+ "fewshot_split": "dev",
279
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
280
+ "doc_to_target": "answer",
281
+ "doc_to_choice": [
282
+ "A",
283
+ "B",
284
+ "C",
285
+ "D"
286
+ ],
287
+ "description": "The following are multiple choice questions (with answers) about college medicine.\n\n",
288
+ "target_delimiter": " ",
289
+ "fewshot_delimiter": "\n\n",
290
+ "fewshot_config": {
291
+ "sampler": "first_n"
292
+ },
293
+ "num_fewshot": 0,
294
+ "metric_list": [
295
+ {
296
+ "metric": "acc",
297
+ "aggregation": "mean",
298
+ "higher_is_better": true
299
+ }
300
+ ],
301
+ "output_type": "multiple_choice",
302
+ "repeats": 1,
303
+ "should_decontaminate": false,
304
+ "metadata": {
305
+ "version": 1.0
306
+ }
307
+ },
308
+ "mmlu_medical_genetics": {
309
+ "task": "mmlu_medical_genetics",
310
+ "task_alias": "medical_genetics (mmlu)",
311
+ "tag": "mmlu_other_tasks",
312
+ "dataset_path": "hails/mmlu_no_train",
313
+ "dataset_name": "medical_genetics",
314
+ "dataset_kwargs": {
315
+ "trust_remote_code": true
316
+ },
317
+ "test_split": "test",
318
+ "fewshot_split": "dev",
319
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
320
+ "doc_to_target": "answer",
321
+ "doc_to_choice": [
322
+ "A",
323
+ "B",
324
+ "C",
325
+ "D"
326
+ ],
327
+ "description": "The following are multiple choice questions (with answers) about medical genetics.\n\n",
328
+ "target_delimiter": " ",
329
+ "fewshot_delimiter": "\n\n",
330
+ "fewshot_config": {
331
+ "sampler": "first_n"
332
+ },
333
+ "num_fewshot": 0,
334
+ "metric_list": [
335
+ {
336
+ "metric": "acc",
337
+ "aggregation": "mean",
338
+ "higher_is_better": true
339
+ }
340
+ ],
341
+ "output_type": "multiple_choice",
342
+ "repeats": 1,
343
+ "should_decontaminate": false,
344
+ "metadata": {
345
+ "version": 1.0
346
+ }
347
+ },
348
+ "mmlu_professional_medicine": {
349
+ "task": "mmlu_professional_medicine",
350
+ "task_alias": "professional_medicine (mmlu)",
351
+ "tag": "mmlu_other_tasks",
352
+ "dataset_path": "hails/mmlu_no_train",
353
+ "dataset_name": "professional_medicine",
354
+ "dataset_kwargs": {
355
+ "trust_remote_code": true
356
+ },
357
+ "test_split": "test",
358
+ "fewshot_split": "dev",
359
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
360
+ "doc_to_target": "answer",
361
+ "doc_to_choice": [
362
+ "A",
363
+ "B",
364
+ "C",
365
+ "D"
366
+ ],
367
+ "description": "The following are multiple choice questions (with answers) about professional medicine.\n\n",
368
+ "target_delimiter": " ",
369
+ "fewshot_delimiter": "\n\n",
370
+ "fewshot_config": {
371
+ "sampler": "first_n"
372
+ },
373
+ "num_fewshot": 0,
374
+ "metric_list": [
375
+ {
376
+ "metric": "acc",
377
+ "aggregation": "mean",
378
+ "higher_is_better": true
379
+ }
380
+ ],
381
+ "output_type": "multiple_choice",
382
+ "repeats": 1,
383
+ "should_decontaminate": false,
384
+ "metadata": {
385
+ "version": 1.0
386
+ }
387
+ },
388
+ "pubmedqa": {
389
+ "task": "pubmedqa",
390
+ "dataset_path": "bigbio/pubmed_qa",
391
+ "dataset_name": "pubmed_qa_labeled_fold0_source",
392
+ "training_split": "train",
393
+ "validation_split": "validation",
394
+ "test_split": "test",
395
+ "doc_to_text": "def doc_to_text(doc) -> str:\n ctxs = \"\\n\".join(doc[\"CONTEXTS\"])\n return \"Abstract: {}\\nQuestion: {}\\nAnswer:\".format(\n ctxs,\n doc[\"QUESTION\"],\n )\n",
396
+ "doc_to_target": "final_decision",
397
+ "doc_to_choice": [
398
+ "yes",
399
+ "no",
400
+ "maybe"
401
+ ],
402
+ "description": "",
403
+ "target_delimiter": " ",
404
+ "fewshot_delimiter": "\n\n",
405
+ "num_fewshot": 0,
406
+ "metric_list": [
407
+ {
408
+ "metric": "acc",
409
+ "aggregation": "mean",
410
+ "higher_is_better": true
411
+ }
412
+ ],
413
+ "output_type": "multiple_choice",
414
+ "repeats": 1,
415
+ "should_decontaminate": false,
416
+ "metadata": {
417
+ "version": 1.0
418
+ }
419
+ }
420
+ },
421
+ "versions": {
422
+ "medmcqa": "Yaml",
423
+ "medqa_4options": "Yaml",
424
+ "mmlu_anatomy": 1.0,
425
+ "mmlu_clinical_knowledge": 1.0,
426
+ "mmlu_college_biology": 1.0,
427
+ "mmlu_college_medicine": 1.0,
428
+ "mmlu_medical_genetics": 1.0,
429
+ "mmlu_professional_medicine": 1.0,
430
+ "pubmedqa": 1.0
431
+ },
432
+ "n-shot": {
433
+ "medmcqa": 0,
434
+ "medqa_4options": 0,
435
+ "mmlu_anatomy": 0,
436
+ "mmlu_clinical_knowledge": 0,
437
+ "mmlu_college_biology": 0,
438
+ "mmlu_college_medicine": 0,
439
+ "mmlu_medical_genetics": 0,
440
+ "mmlu_professional_medicine": 0,
441
+ "pubmedqa": 0
442
+ },
443
+ "higher_is_better": {
444
+ "medmcqa": {
445
+ "acc": true,
446
+ "acc_norm": true
447
+ },
448
+ "medqa_4options": {
449
+ "acc": true,
450
+ "acc_norm": true
451
+ },
452
+ "mmlu_anatomy": {
453
+ "acc": true
454
+ },
455
+ "mmlu_clinical_knowledge": {
456
+ "acc": true
457
+ },
458
+ "mmlu_college_biology": {
459
+ "acc": true
460
+ },
461
+ "mmlu_college_medicine": {
462
+ "acc": true
463
+ },
464
+ "mmlu_medical_genetics": {
465
+ "acc": true
466
+ },
467
+ "mmlu_professional_medicine": {
468
+ "acc": true
469
+ },
470
+ "multimedqa": {
471
+ "acc": true,
472
+ "acc_norm": true
473
+ },
474
+ "pubmedqa": {
475
+ "acc": true
476
+ }
477
+ },
478
+ "n-samples": {
479
+ "pubmedqa": {
480
+ "original": 500,
481
+ "effective": 500
482
+ },
483
+ "medmcqa": {
484
+ "original": 4183,
485
+ "effective": 4183
486
+ },
487
+ "medqa_4options": {
488
+ "original": 1273,
489
+ "effective": 1273
490
+ },
491
+ "mmlu_anatomy": {
492
+ "original": 135,
493
+ "effective": 135
494
+ },
495
+ "mmlu_clinical_knowledge": {
496
+ "original": 265,
497
+ "effective": 265
498
+ },
499
+ "mmlu_college_medicine": {
500
+ "original": 173,
501
+ "effective": 173
502
+ },
503
+ "mmlu_medical_genetics": {
504
+ "original": 100,
505
+ "effective": 100
506
+ },
507
+ "mmlu_professional_medicine": {
508
+ "original": 272,
509
+ "effective": 272
510
+ },
511
+ "mmlu_college_biology": {
512
+ "original": 144,
513
+ "effective": 144
514
+ }
515
+ },
516
+ "config": {
517
+ "model": "hf",
518
+ "model_args": "pretrained=checkpoint-2611,parallelize=True,trust_remote_code=True",
519
+ "model_num_parameters": 7615616512,
520
+ "model_dtype": "torch.bfloat16",
521
+ "model_revision": "main",
522
+ "model_sha": "",
523
+ "batch_size": "auto",
524
+ "batch_sizes": [
525
+ 64
526
+ ],
527
+ "device": "cuda:0",
528
+ "use_cache": null,
529
+ "limit": null,
530
+ "bootstrap_iters": 100000,
531
+ "gen_kwargs": null,
532
+ "random_seed": 0,
533
+ "numpy_seed": 1234,
534
+ "torch_seed": 1234,
535
+ "fewshot_seed": 1234
536
+ },
537
+ "git_hash": "6915b12",
538
+ "date": 1742241081.119386,
539
+ "pretty_env_info": "PyTorch version: 2.5.1+cu124\nIs debug build: False\nCUDA used to build PyTorch: 12.4\nROCM used to build PyTorch: N/A\n\nOS: Ubuntu 22.04.3 LTS (x86_64)\nGCC version: (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\nClang version: Could not collect\nCMake version: version 3.31.0\nLibc version: glibc-2.35\n\nPython version: 3.11.10 (main, Oct 3 2024, 07:29:13) [GCC 11.2.0] (64-bit runtime)\nPython platform: Linux-6.8.0-40-generic-x86_64-with-glibc2.35\nIs CUDA available: True\nCUDA runtime version: 12.1.105\nCUDA_MODULE_LOADING set to: LAZY\nGPU models and configuration: GPU 0: NVIDIA H200\nNvidia driver version: 550.127.08\ncuDNN version: Probably one of the following:\n/usr/lib/x86_64-linux-gnu/libcudnn.so.8.9.0\n/usr/lib/x86_64-linux-gnu/libcudnn_adv_infer.so.8.9.0\n/usr/lib/x86_64-linux-gnu/libcudnn_adv_train.so.8.9.0\n/usr/lib/x86_64-linux-gnu/libcudnn_cnn_infer.so.8.9.0\n/usr/lib/x86_64-linux-gnu/libcudnn_cnn_train.so.8.9.0\n/usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so.8.9.0\n/usr/lib/x86_64-linux-gnu/libcudnn_ops_train.so.8.9.0\nHIP runtime version: N/A\nMIOpen runtime version: N/A\nIs XNNPACK available: True\n\nCPU:\nArchitecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 46 bits physical, 57 bits virtual\nByte Order: Little Endian\nCPU(s): 192\nOn-line CPU(s) list: 0-191\nVendor ID: GenuineIntel\nModel name: INTEL(R) XEON(R) PLATINUM 8568Y+\nCPU family: 6\nModel: 207\nThread(s) per core: 2\nCore(s) per socket: 48\nSocket(s): 2\nStepping: 2\nCPU max MHz: 4000.0000\nCPU min MHz: 800.0000\nBogoMIPS: 4600.00\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect user_shstk avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hfi vnmi avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr ibt amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities\nVirtualization: VT-x\nL1d cache: 4.5 MiB (96 instances)\nL1i cache: 3 MiB (96 instances)\nL2 cache: 192 MiB (96 instances)\nL3 cache: 600 MiB (2 instances)\nNUMA node(s): 2\nNUMA node0 CPU(s): 0-47,96-143\nNUMA node1 CPU(s): 48-95,144-191\nVulnerability Gather data sampling: Not affected\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Reg file data sampling: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec rstack overflow: Not affected\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Enhanced / Automatic IBRS; IBPB conditional; RSB filling; PBRSB-eIBRS SW sequence; BHI BHI_DIS_S\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n\nVersions of relevant libraries:\n[pip3] galore-torch==1.0\n[pip3] lion-pytorch==0.1.2\n[pip3] numpy==1.26.4\n[pip3] torch==2.5.1\n[pip3] torch-optimi==0.2.1\n[pip3] torchao==0.7.0\n[pip3] triton==3.1.0\n[conda] No relevant packages",
540
+ "transformers_version": "4.46.3",
541
+ "upper_git_hash": null,
542
+ "tokenizer_pad_token": [
543
+ "<|endoftext|>",
544
+ "151643"
545
+ ],
546
+ "tokenizer_eos_token": [
547
+ "<|im_end|>",
548
+ "151645"
549
+ ],
550
+ "tokenizer_bos_token": [
551
+ null,
552
+ "None"
553
+ ],
554
+ "eot_token_id": 151645,
555
+ "max_length": 32768,
556
+ "task_hashes": {
557
+ "pubmedqa": "870d7e5614da5dcad21abd725786223f3f6c5f4b6c35263df2d926eb7705a57a",
558
+ "medmcqa": "4ef81a02eca79c63cacb070d9b6375e2a463961f416266b281c01bccf94e70ea",
559
+ "medqa_4options": "2a2e29c3d5c0f042309940f072ce2a0c60def924a527be58650dd55665880fd6",
560
+ "mmlu_anatomy": "8a394ba6aa4d3366637e72da67c7d4c0286d47cb371a4f4a9814259be8bbe3ad",
561
+ "mmlu_clinical_knowledge": "839bf7b05724190f7277a957e8b2183a7b4dc74ab9ca72063d10872092a1ea7a",
562
+ "mmlu_college_medicine": "14529d73333850b8be0fc1d4c102c4500b76434c8c761611be6899af27608455",
563
+ "mmlu_medical_genetics": "9b736fa6d447dd8f017f7e2dc81e7487f3412a8551075ca312e48db9c4c5e108",
564
+ "mmlu_professional_medicine": "b1c4eea40bd1d93e49c50cadd35db8bbb96392c40d208ae1ffd6e72c306d757a",
565
+ "mmlu_college_biology": "d983837a4ac4327e74ff7f131eda1f0c23f6c9f2a1088e3a5162c6ede31605d5"
566
+ },
567
+ "model_source": "hf",
568
+ "model_name": "checkpoint-2611",
569
+ "model_name_sanitized": "checkpoint-2611",
570
+ "system_instruction": null,
571
+ "system_instruction_sha": null,
572
+ "fewshot_as_multiturn": false,
573
+ "chat_template": null,
574
+ "chat_template_sha": null,
575
+ "start_time": 8223588.771301923,
576
+ "end_time": 8223696.053149864,
577
+ "total_evaluation_time_seconds": "107.28184794075787"
578
+ }
merged_models/results/checkpoint-2611/samples_medmcqa_2025-03-17T19-53-01.924844.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
merged_models/results/checkpoint-2611/samples_medqa_4options_2025-03-17T19-53-01.924844.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
merged_models/results/checkpoint-2611/samples_mmlu_anatomy_2025-03-17T19-53-01.924844.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
merged_models/results/checkpoint-2611/samples_mmlu_clinical_knowledge_2025-03-17T19-53-01.924844.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
merged_models/results/checkpoint-2611/samples_mmlu_college_biology_2025-03-17T19-53-01.924844.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
merged_models/results/checkpoint-2611/samples_mmlu_college_medicine_2025-03-17T19-53-01.924844.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
merged_models/results/checkpoint-2611/samples_mmlu_medical_genetics_2025-03-17T19-53-01.924844.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
merged_models/results/checkpoint-2611/samples_mmlu_professional_medicine_2025-03-17T19-53-01.924844.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
merged_models/results/checkpoint-2611/samples_pubmedqa_2025-03-17T19-53-01.924844.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
merged_models/results/checkpoint-5222/results_2025-03-17T19-54-40.045633.json ADDED
@@ -0,0 +1,578 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "multimedqa": {
4
+ "acc,none": 0.643860894251242,
5
+ "acc_stderr,none": 0.00560404846208839,
6
+ "alias": "multimedqa"
7
+ },
8
+ "medmcqa": {
9
+ "alias": " - medmcqa",
10
+ "acc,none": 0.5780540282094191,
11
+ "acc_stderr,none": 0.007636961906055535,
12
+ "acc_norm,none": 0.5780540282094191,
13
+ "acc_norm_stderr,none": 0.007636961906055535
14
+ },
15
+ "medqa_4options": {
16
+ "alias": " - medqa_4options",
17
+ "acc,none": 0.6732128829536528,
18
+ "acc_stderr,none": 0.013151191960556162,
19
+ "acc_norm,none": 0.6732128829536528,
20
+ "acc_norm_stderr,none": 0.013151191960556162
21
+ },
22
+ "mmlu_anatomy": {
23
+ "alias": " - anatomy (mmlu)",
24
+ "acc,none": 0.7037037037037037,
25
+ "acc_stderr,none": 0.03944624162501116
26
+ },
27
+ "mmlu_clinical_knowledge": {
28
+ "alias": " - clinical_knowledge (mmlu)",
29
+ "acc,none": 0.8113207547169812,
30
+ "acc_stderr,none": 0.02407999513006223
31
+ },
32
+ "mmlu_college_biology": {
33
+ "alias": " - college_biology (mmlu)",
34
+ "acc,none": 0.875,
35
+ "acc_stderr,none": 0.02765610492929436
36
+ },
37
+ "mmlu_college_medicine": {
38
+ "alias": " - college_medicine (mmlu)",
39
+ "acc,none": 0.7398843930635838,
40
+ "acc_stderr,none": 0.033450369167889904
41
+ },
42
+ "mmlu_medical_genetics": {
43
+ "alias": " - medical_genetics (mmlu)",
44
+ "acc,none": 0.88,
45
+ "acc_stderr,none": 0.03265986323710906
46
+ },
47
+ "mmlu_professional_medicine": {
48
+ "alias": " - professional_medicine (mmlu)",
49
+ "acc,none": 0.8235294117647058,
50
+ "acc_stderr,none": 0.02315746830855935
51
+ },
52
+ "pubmedqa": {
53
+ "alias": " - pubmedqa",
54
+ "acc,none": 0.77,
55
+ "acc_stderr,none": 0.018839050391123206
56
+ }
57
+ },
58
+ "groups": {
59
+ "multimedqa": {
60
+ "acc,none": 0.643860894251242,
61
+ "acc_stderr,none": 0.00560404846208839,
62
+ "alias": "multimedqa"
63
+ }
64
+ },
65
+ "group_subtasks": {
66
+ "multimedqa": [
67
+ "pubmedqa",
68
+ "medmcqa",
69
+ "medqa_4options",
70
+ "mmlu_anatomy",
71
+ "mmlu_clinical_knowledge",
72
+ "mmlu_college_medicine",
73
+ "mmlu_medical_genetics",
74
+ "mmlu_professional_medicine",
75
+ "mmlu_college_biology"
76
+ ]
77
+ },
78
+ "configs": {
79
+ "medmcqa": {
80
+ "task": "medmcqa",
81
+ "dataset_path": "medmcqa",
82
+ "training_split": "train",
83
+ "validation_split": "validation",
84
+ "test_split": "validation",
85
+ "doc_to_text": "def doc_to_text(doc) -> str:\n \"\"\"\n Question: <question>\n Choices:\n A. <choice1>\n B. <choice2>\n C. <choice3>\n D. <choice4>\n Answer:\n \"\"\"\n choices = [doc[\"opa\"], doc[\"opb\"], doc[\"opc\"], doc[\"opd\"]]\n option_choices = {\n \"A\": choices[0],\n \"B\": choices[1],\n \"C\": choices[2],\n \"D\": choices[3],\n }\n\n prompt = \"Question: \" + doc[\"question\"] + \"\\nChoices:\\n\"\n for choice, option in option_choices.items():\n prompt += f\"{choice.upper()}. {option}\\n\"\n prompt += \"Answer:\"\n return prompt\n",
86
+ "doc_to_target": "cop",
87
+ "doc_to_choice": [
88
+ "A",
89
+ "B",
90
+ "C",
91
+ "D"
92
+ ],
93
+ "description": "",
94
+ "target_delimiter": " ",
95
+ "fewshot_delimiter": "\n\n",
96
+ "num_fewshot": 0,
97
+ "metric_list": [
98
+ {
99
+ "metric": "acc",
100
+ "aggregation": "mean",
101
+ "higher_is_better": true
102
+ },
103
+ {
104
+ "metric": "acc_norm",
105
+ "aggregation": "mean",
106
+ "higher_is_better": true
107
+ }
108
+ ],
109
+ "output_type": "multiple_choice",
110
+ "repeats": 1,
111
+ "should_decontaminate": true,
112
+ "doc_to_decontamination_query": "{{question}}"
113
+ },
114
+ "medqa_4options": {
115
+ "task": "medqa_4options",
116
+ "dataset_path": "GBaker/MedQA-USMLE-4-options-hf",
117
+ "training_split": "train",
118
+ "validation_split": "validation",
119
+ "test_split": "test",
120
+ "doc_to_text": "def doc_to_text(doc) -> str:\n option_choices = {\n \"A\": doc[\"ending0\"],\n \"B\": doc[\"ending1\"],\n \"C\": doc[\"ending2\"],\n \"D\": doc[\"ending3\"],\n }\n answers = \"\".join((f\"{k}. {v}\\n\") for k, v in option_choices.items())\n return f\"Question: {doc['sent1']}\\n{answers}Answer:\"\n",
121
+ "doc_to_target": "def doc_to_target(doc) -> int:\n return doc[\"label\"]\n",
122
+ "doc_to_choice": [
123
+ "A",
124
+ "B",
125
+ "C",
126
+ "D"
127
+ ],
128
+ "description": "",
129
+ "target_delimiter": " ",
130
+ "fewshot_delimiter": "\n\n",
131
+ "num_fewshot": 0,
132
+ "metric_list": [
133
+ {
134
+ "metric": "acc",
135
+ "aggregation": "mean",
136
+ "higher_is_better": true
137
+ },
138
+ {
139
+ "metric": "acc_norm",
140
+ "aggregation": "mean",
141
+ "higher_is_better": true
142
+ }
143
+ ],
144
+ "output_type": "multiple_choice",
145
+ "repeats": 1,
146
+ "should_decontaminate": false
147
+ },
148
+ "mmlu_anatomy": {
149
+ "task": "mmlu_anatomy",
150
+ "task_alias": "anatomy (mmlu)",
151
+ "tag": "mmlu_stem_tasks",
152
+ "dataset_path": "hails/mmlu_no_train",
153
+ "dataset_name": "anatomy",
154
+ "dataset_kwargs": {
155
+ "trust_remote_code": true
156
+ },
157
+ "test_split": "test",
158
+ "fewshot_split": "dev",
159
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
160
+ "doc_to_target": "answer",
161
+ "doc_to_choice": [
162
+ "A",
163
+ "B",
164
+ "C",
165
+ "D"
166
+ ],
167
+ "description": "The following are multiple choice questions (with answers) about anatomy.\n\n",
168
+ "target_delimiter": " ",
169
+ "fewshot_delimiter": "\n\n",
170
+ "fewshot_config": {
171
+ "sampler": "first_n"
172
+ },
173
+ "num_fewshot": 0,
174
+ "metric_list": [
175
+ {
176
+ "metric": "acc",
177
+ "aggregation": "mean",
178
+ "higher_is_better": true
179
+ }
180
+ ],
181
+ "output_type": "multiple_choice",
182
+ "repeats": 1,
183
+ "should_decontaminate": false,
184
+ "metadata": {
185
+ "version": 1.0
186
+ }
187
+ },
188
+ "mmlu_clinical_knowledge": {
189
+ "task": "mmlu_clinical_knowledge",
190
+ "task_alias": "clinical_knowledge (mmlu)",
191
+ "tag": "mmlu_other_tasks",
192
+ "dataset_path": "hails/mmlu_no_train",
193
+ "dataset_name": "clinical_knowledge",
194
+ "dataset_kwargs": {
195
+ "trust_remote_code": true
196
+ },
197
+ "test_split": "test",
198
+ "fewshot_split": "dev",
199
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
200
+ "doc_to_target": "answer",
201
+ "doc_to_choice": [
202
+ "A",
203
+ "B",
204
+ "C",
205
+ "D"
206
+ ],
207
+ "description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n",
208
+ "target_delimiter": " ",
209
+ "fewshot_delimiter": "\n\n",
210
+ "fewshot_config": {
211
+ "sampler": "first_n"
212
+ },
213
+ "num_fewshot": 0,
214
+ "metric_list": [
215
+ {
216
+ "metric": "acc",
217
+ "aggregation": "mean",
218
+ "higher_is_better": true
219
+ }
220
+ ],
221
+ "output_type": "multiple_choice",
222
+ "repeats": 1,
223
+ "should_decontaminate": false,
224
+ "metadata": {
225
+ "version": 1.0
226
+ }
227
+ },
228
+ "mmlu_college_biology": {
229
+ "task": "mmlu_college_biology",
230
+ "task_alias": "college_biology (mmlu)",
231
+ "tag": "mmlu_stem_tasks",
232
+ "dataset_path": "hails/mmlu_no_train",
233
+ "dataset_name": "college_biology",
234
+ "dataset_kwargs": {
235
+ "trust_remote_code": true
236
+ },
237
+ "test_split": "test",
238
+ "fewshot_split": "dev",
239
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
240
+ "doc_to_target": "answer",
241
+ "doc_to_choice": [
242
+ "A",
243
+ "B",
244
+ "C",
245
+ "D"
246
+ ],
247
+ "description": "The following are multiple choice questions (with answers) about college biology.\n\n",
248
+ "target_delimiter": " ",
249
+ "fewshot_delimiter": "\n\n",
250
+ "fewshot_config": {
251
+ "sampler": "first_n"
252
+ },
253
+ "num_fewshot": 0,
254
+ "metric_list": [
255
+ {
256
+ "metric": "acc",
257
+ "aggregation": "mean",
258
+ "higher_is_better": true
259
+ }
260
+ ],
261
+ "output_type": "multiple_choice",
262
+ "repeats": 1,
263
+ "should_decontaminate": false,
264
+ "metadata": {
265
+ "version": 1.0
266
+ }
267
+ },
268
+ "mmlu_college_medicine": {
269
+ "task": "mmlu_college_medicine",
270
+ "task_alias": "college_medicine (mmlu)",
271
+ "tag": "mmlu_other_tasks",
272
+ "dataset_path": "hails/mmlu_no_train",
273
+ "dataset_name": "college_medicine",
274
+ "dataset_kwargs": {
275
+ "trust_remote_code": true
276
+ },
277
+ "test_split": "test",
278
+ "fewshot_split": "dev",
279
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
280
+ "doc_to_target": "answer",
281
+ "doc_to_choice": [
282
+ "A",
283
+ "B",
284
+ "C",
285
+ "D"
286
+ ],
287
+ "description": "The following are multiple choice questions (with answers) about college medicine.\n\n",
288
+ "target_delimiter": " ",
289
+ "fewshot_delimiter": "\n\n",
290
+ "fewshot_config": {
291
+ "sampler": "first_n"
292
+ },
293
+ "num_fewshot": 0,
294
+ "metric_list": [
295
+ {
296
+ "metric": "acc",
297
+ "aggregation": "mean",
298
+ "higher_is_better": true
299
+ }
300
+ ],
301
+ "output_type": "multiple_choice",
302
+ "repeats": 1,
303
+ "should_decontaminate": false,
304
+ "metadata": {
305
+ "version": 1.0
306
+ }
307
+ },
308
+ "mmlu_medical_genetics": {
309
+ "task": "mmlu_medical_genetics",
310
+ "task_alias": "medical_genetics (mmlu)",
311
+ "tag": "mmlu_other_tasks",
312
+ "dataset_path": "hails/mmlu_no_train",
313
+ "dataset_name": "medical_genetics",
314
+ "dataset_kwargs": {
315
+ "trust_remote_code": true
316
+ },
317
+ "test_split": "test",
318
+ "fewshot_split": "dev",
319
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
320
+ "doc_to_target": "answer",
321
+ "doc_to_choice": [
322
+ "A",
323
+ "B",
324
+ "C",
325
+ "D"
326
+ ],
327
+ "description": "The following are multiple choice questions (with answers) about medical genetics.\n\n",
328
+ "target_delimiter": " ",
329
+ "fewshot_delimiter": "\n\n",
330
+ "fewshot_config": {
331
+ "sampler": "first_n"
332
+ },
333
+ "num_fewshot": 0,
334
+ "metric_list": [
335
+ {
336
+ "metric": "acc",
337
+ "aggregation": "mean",
338
+ "higher_is_better": true
339
+ }
340
+ ],
341
+ "output_type": "multiple_choice",
342
+ "repeats": 1,
343
+ "should_decontaminate": false,
344
+ "metadata": {
345
+ "version": 1.0
346
+ }
347
+ },
348
+ "mmlu_professional_medicine": {
349
+ "task": "mmlu_professional_medicine",
350
+ "task_alias": "professional_medicine (mmlu)",
351
+ "tag": "mmlu_other_tasks",
352
+ "dataset_path": "hails/mmlu_no_train",
353
+ "dataset_name": "professional_medicine",
354
+ "dataset_kwargs": {
355
+ "trust_remote_code": true
356
+ },
357
+ "test_split": "test",
358
+ "fewshot_split": "dev",
359
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
360
+ "doc_to_target": "answer",
361
+ "doc_to_choice": [
362
+ "A",
363
+ "B",
364
+ "C",
365
+ "D"
366
+ ],
367
+ "description": "The following are multiple choice questions (with answers) about professional medicine.\n\n",
368
+ "target_delimiter": " ",
369
+ "fewshot_delimiter": "\n\n",
370
+ "fewshot_config": {
371
+ "sampler": "first_n"
372
+ },
373
+ "num_fewshot": 0,
374
+ "metric_list": [
375
+ {
376
+ "metric": "acc",
377
+ "aggregation": "mean",
378
+ "higher_is_better": true
379
+ }
380
+ ],
381
+ "output_type": "multiple_choice",
382
+ "repeats": 1,
383
+ "should_decontaminate": false,
384
+ "metadata": {
385
+ "version": 1.0
386
+ }
387
+ },
388
+ "pubmedqa": {
389
+ "task": "pubmedqa",
390
+ "dataset_path": "bigbio/pubmed_qa",
391
+ "dataset_name": "pubmed_qa_labeled_fold0_source",
392
+ "training_split": "train",
393
+ "validation_split": "validation",
394
+ "test_split": "test",
395
+ "doc_to_text": "def doc_to_text(doc) -> str:\n ctxs = \"\\n\".join(doc[\"CONTEXTS\"])\n return \"Abstract: {}\\nQuestion: {}\\nAnswer:\".format(\n ctxs,\n doc[\"QUESTION\"],\n )\n",
396
+ "doc_to_target": "final_decision",
397
+ "doc_to_choice": [
398
+ "yes",
399
+ "no",
400
+ "maybe"
401
+ ],
402
+ "description": "",
403
+ "target_delimiter": " ",
404
+ "fewshot_delimiter": "\n\n",
405
+ "num_fewshot": 0,
406
+ "metric_list": [
407
+ {
408
+ "metric": "acc",
409
+ "aggregation": "mean",
410
+ "higher_is_better": true
411
+ }
412
+ ],
413
+ "output_type": "multiple_choice",
414
+ "repeats": 1,
415
+ "should_decontaminate": false,
416
+ "metadata": {
417
+ "version": 1.0
418
+ }
419
+ }
420
+ },
421
+ "versions": {
422
+ "medmcqa": "Yaml",
423
+ "medqa_4options": "Yaml",
424
+ "mmlu_anatomy": 1.0,
425
+ "mmlu_clinical_knowledge": 1.0,
426
+ "mmlu_college_biology": 1.0,
427
+ "mmlu_college_medicine": 1.0,
428
+ "mmlu_medical_genetics": 1.0,
429
+ "mmlu_professional_medicine": 1.0,
430
+ "pubmedqa": 1.0
431
+ },
432
+ "n-shot": {
433
+ "medmcqa": 0,
434
+ "medqa_4options": 0,
435
+ "mmlu_anatomy": 0,
436
+ "mmlu_clinical_knowledge": 0,
437
+ "mmlu_college_biology": 0,
438
+ "mmlu_college_medicine": 0,
439
+ "mmlu_medical_genetics": 0,
440
+ "mmlu_professional_medicine": 0,
441
+ "pubmedqa": 0
442
+ },
443
+ "higher_is_better": {
444
+ "medmcqa": {
445
+ "acc": true,
446
+ "acc_norm": true
447
+ },
448
+ "medqa_4options": {
449
+ "acc": true,
450
+ "acc_norm": true
451
+ },
452
+ "mmlu_anatomy": {
453
+ "acc": true
454
+ },
455
+ "mmlu_clinical_knowledge": {
456
+ "acc": true
457
+ },
458
+ "mmlu_college_biology": {
459
+ "acc": true
460
+ },
461
+ "mmlu_college_medicine": {
462
+ "acc": true
463
+ },
464
+ "mmlu_medical_genetics": {
465
+ "acc": true
466
+ },
467
+ "mmlu_professional_medicine": {
468
+ "acc": true
469
+ },
470
+ "multimedqa": {
471
+ "acc": true,
472
+ "acc_norm": true
473
+ },
474
+ "pubmedqa": {
475
+ "acc": true
476
+ }
477
+ },
478
+ "n-samples": {
479
+ "pubmedqa": {
480
+ "original": 500,
481
+ "effective": 500
482
+ },
483
+ "medmcqa": {
484
+ "original": 4183,
485
+ "effective": 4183
486
+ },
487
+ "medqa_4options": {
488
+ "original": 1273,
489
+ "effective": 1273
490
+ },
491
+ "mmlu_anatomy": {
492
+ "original": 135,
493
+ "effective": 135
494
+ },
495
+ "mmlu_clinical_knowledge": {
496
+ "original": 265,
497
+ "effective": 265
498
+ },
499
+ "mmlu_college_medicine": {
500
+ "original": 173,
501
+ "effective": 173
502
+ },
503
+ "mmlu_medical_genetics": {
504
+ "original": 100,
505
+ "effective": 100
506
+ },
507
+ "mmlu_professional_medicine": {
508
+ "original": 272,
509
+ "effective": 272
510
+ },
511
+ "mmlu_college_biology": {
512
+ "original": 144,
513
+ "effective": 144
514
+ }
515
+ },
516
+ "config": {
517
+ "model": "hf",
518
+ "model_args": "pretrained=checkpoint-5222,parallelize=True,trust_remote_code=True",
519
+ "model_num_parameters": 7615616512,
520
+ "model_dtype": "torch.bfloat16",
521
+ "model_revision": "main",
522
+ "model_sha": "",
523
+ "batch_size": "auto",
524
+ "batch_sizes": [
525
+ 64
526
+ ],
527
+ "device": "cuda:0",
528
+ "use_cache": null,
529
+ "limit": null,
530
+ "bootstrap_iters": 100000,
531
+ "gen_kwargs": null,
532
+ "random_seed": 0,
533
+ "numpy_seed": 1234,
534
+ "torch_seed": 1234,
535
+ "fewshot_seed": 1234
536
+ },
537
+ "git_hash": "6915b12",
538
+ "date": 1742241195.31737,
539
+ "pretty_env_info": "PyTorch version: 2.5.1+cu124\nIs debug build: False\nCUDA used to build PyTorch: 12.4\nROCM used to build PyTorch: N/A\n\nOS: Ubuntu 22.04.3 LTS (x86_64)\nGCC version: (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\nClang version: Could not collect\nCMake version: version 3.31.0\nLibc version: glibc-2.35\n\nPython version: 3.11.10 (main, Oct 3 2024, 07:29:13) [GCC 11.2.0] (64-bit runtime)\nPython platform: Linux-6.8.0-40-generic-x86_64-with-glibc2.35\nIs CUDA available: True\nCUDA runtime version: 12.1.105\nCUDA_MODULE_LOADING set to: LAZY\nGPU models and configuration: GPU 0: NVIDIA H200\nNvidia driver version: 550.127.08\ncuDNN version: Probably one of the following:\n/usr/lib/x86_64-linux-gnu/libcudnn.so.8.9.0\n/usr/lib/x86_64-linux-gnu/libcudnn_adv_infer.so.8.9.0\n/usr/lib/x86_64-linux-gnu/libcudnn_adv_train.so.8.9.0\n/usr/lib/x86_64-linux-gnu/libcudnn_cnn_infer.so.8.9.0\n/usr/lib/x86_64-linux-gnu/libcudnn_cnn_train.so.8.9.0\n/usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so.8.9.0\n/usr/lib/x86_64-linux-gnu/libcudnn_ops_train.so.8.9.0\nHIP runtime version: N/A\nMIOpen runtime version: N/A\nIs XNNPACK available: True\n\nCPU:\nArchitecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 46 bits physical, 57 bits virtual\nByte Order: Little Endian\nCPU(s): 192\nOn-line CPU(s) list: 0-191\nVendor ID: GenuineIntel\nModel name: INTEL(R) XEON(R) PLATINUM 8568Y+\nCPU family: 6\nModel: 207\nThread(s) per core: 2\nCore(s) per socket: 48\nSocket(s): 2\nStepping: 2\nCPU max MHz: 4000.0000\nCPU min MHz: 800.0000\nBogoMIPS: 4600.00\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect user_shstk avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hfi vnmi avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr ibt amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities\nVirtualization: VT-x\nL1d cache: 4.5 MiB (96 instances)\nL1i cache: 3 MiB (96 instances)\nL2 cache: 192 MiB (96 instances)\nL3 cache: 600 MiB (2 instances)\nNUMA node(s): 2\nNUMA node0 CPU(s): 0-47,96-143\nNUMA node1 CPU(s): 48-95,144-191\nVulnerability Gather data sampling: Not affected\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Reg file data sampling: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec rstack overflow: Not affected\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Enhanced / Automatic IBRS; IBPB conditional; RSB filling; PBRSB-eIBRS SW sequence; BHI BHI_DIS_S\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n\nVersions of relevant libraries:\n[pip3] galore-torch==1.0\n[pip3] lion-pytorch==0.1.2\n[pip3] numpy==1.26.4\n[pip3] torch==2.5.1\n[pip3] torch-optimi==0.2.1\n[pip3] torchao==0.7.0\n[pip3] triton==3.1.0\n[conda] No relevant packages",
540
+ "transformers_version": "4.46.3",
541
+ "upper_git_hash": null,
542
+ "tokenizer_pad_token": [
543
+ "<|endoftext|>",
544
+ "151643"
545
+ ],
546
+ "tokenizer_eos_token": [
547
+ "<|im_end|>",
548
+ "151645"
549
+ ],
550
+ "tokenizer_bos_token": [
551
+ null,
552
+ "None"
553
+ ],
554
+ "eot_token_id": 151645,
555
+ "max_length": 32768,
556
+ "task_hashes": {
557
+ "pubmedqa": "870d7e5614da5dcad21abd725786223f3f6c5f4b6c35263df2d926eb7705a57a",
558
+ "medmcqa": "4ef81a02eca79c63cacb070d9b6375e2a463961f416266b281c01bccf94e70ea",
559
+ "medqa_4options": "2a2e29c3d5c0f042309940f072ce2a0c60def924a527be58650dd55665880fd6",
560
+ "mmlu_anatomy": "8a394ba6aa4d3366637e72da67c7d4c0286d47cb371a4f4a9814259be8bbe3ad",
561
+ "mmlu_clinical_knowledge": "839bf7b05724190f7277a957e8b2183a7b4dc74ab9ca72063d10872092a1ea7a",
562
+ "mmlu_college_medicine": "14529d73333850b8be0fc1d4c102c4500b76434c8c761611be6899af27608455",
563
+ "mmlu_medical_genetics": "9b736fa6d447dd8f017f7e2dc81e7487f3412a8551075ca312e48db9c4c5e108",
564
+ "mmlu_professional_medicine": "b1c4eea40bd1d93e49c50cadd35db8bbb96392c40d208ae1ffd6e72c306d757a",
565
+ "mmlu_college_biology": "d983837a4ac4327e74ff7f131eda1f0c23f6c9f2a1088e3a5162c6ede31605d5"
566
+ },
567
+ "model_source": "hf",
568
+ "model_name": "checkpoint-5222",
569
+ "model_name_sanitized": "checkpoint-5222",
570
+ "system_instruction": null,
571
+ "system_instruction_sha": null,
572
+ "fewshot_as_multiturn": false,
573
+ "chat_template": null,
574
+ "chat_template_sha": null,
575
+ "start_time": 8223703.085893983,
576
+ "end_time": 8223794.175920337,
577
+ "total_evaluation_time_seconds": "91.0900263544172"
578
+ }
merged_models/results/checkpoint-5222/samples_medmcqa_2025-03-17T19-54-40.045633.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
merged_models/results/checkpoint-5222/samples_medqa_4options_2025-03-17T19-54-40.045633.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
merged_models/results/checkpoint-5222/samples_mmlu_anatomy_2025-03-17T19-54-40.045633.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
merged_models/results/checkpoint-5222/samples_mmlu_clinical_knowledge_2025-03-17T19-54-40.045633.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
merged_models/results/checkpoint-5222/samples_mmlu_college_biology_2025-03-17T19-54-40.045633.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
merged_models/results/checkpoint-5222/samples_mmlu_college_medicine_2025-03-17T19-54-40.045633.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
merged_models/results/checkpoint-5222/samples_mmlu_medical_genetics_2025-03-17T19-54-40.045633.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
merged_models/results/checkpoint-5222/samples_mmlu_professional_medicine_2025-03-17T19-54-40.045633.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
merged_models/results/checkpoint-5222/samples_pubmedqa_2025-03-17T19-54-40.045633.jsonl ADDED
The diff for this file is too large to render. See raw diff