lestienne commited on
Commit
cbf5c35
·
verified ·
1 Parent(s): 7347369

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +6 -0
  2. finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-0.7/0.7-1.0/best.ckpt +3 -0
  3. finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-0.7/0.7-1.0/checkpoint/config.json +27 -0
  4. finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-0.7/0.7-1.0/checkpoint/generation_config.json +14 -0
  5. finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-0.7/0.7-1.0/checkpoint/model_config.yaml +37 -0
  6. finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-0.7/0.7-1.0/checkpoint/tokenizer.json +0 -0
  7. finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-0.7/0.7-1.0/last.ckpt +3 -0
  8. finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-0.7/0.7-1.0/logs/events.out.tfevents.1734803539.gamma.3827549.0 +3 -0
  9. finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-0.7/0.7-1.0/logs/hparams.yaml +1 -0
  10. finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-0.7/0.7-1.0/logs/metrics.csv +257 -0
  11. finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-0.7/0.7-1.0/train_args.yaml +10 -0
  12. finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-1.0/0.7-1.0/best.ckpt +3 -0
  13. finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-1.0/0.7-1.0/checkpoint/config.json +27 -0
  14. finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-1.0/0.7-1.0/checkpoint/generation_config.json +14 -0
  15. finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-1.0/0.7-1.0/checkpoint/lit_model.pth +3 -0
  16. finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-1.0/0.7-1.0/checkpoint/lit_model.pth.lora +3 -0
  17. finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-1.0/0.7-1.0/checkpoint/model_config.yaml +37 -0
  18. finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-1.0/0.7-1.0/checkpoint/tokenizer.json +0 -0
  19. finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-1.0/0.7-1.0/checkpoint/tokenizer_config.json +207 -0
  20. finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-1.0/0.7-1.0/last.ckpt +3 -0
  21. finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-1.0/0.7-1.0/logs/events.out.tfevents.1734805986.gamma.3831644.0 +3 -0
  22. finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-1.0/0.7-1.0/logs/hparams.yaml +1 -0
  23. finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-1.0/0.7-1.0/logs/metrics.csv +87 -0
  24. finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-1.0/0.7-1.0/test=banking77/list=test_1000/labels.csv +1000 -0
  25. finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-1.0/0.7-1.0/test=banking77/list=test_1000/logits.csv +0 -0
  26. finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-1.0/0.7-1.0/train_args.yaml +10 -0
  27. finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans_no_es/0.0-0.7/0.0-0.3/best.ckpt +3 -0
  28. finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans_no_es/0.0-0.7/0.0-0.3/checkpoint/config.json +27 -0
  29. finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans_no_es/0.0-0.7/0.0-0.3/checkpoint/generation_config.json +14 -0
  30. finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans_no_es/0.0-0.7/0.0-0.3/checkpoint/lit_model.pth +3 -0
  31. finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans_no_es/0.0-0.7/0.0-0.3/checkpoint/lit_model.pth.lora +3 -0
  32. finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans_no_es/0.0-0.7/0.0-0.3/checkpoint/model_config.yaml +37 -0
  33. finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans_no_es/0.0-0.7/0.0-0.3/checkpoint/tokenizer.json +0 -0
  34. finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans_no_es/0.0-0.7/0.0-0.3/checkpoint/tokenizer_config.json +207 -0
  35. finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans_no_es/0.0-0.7/0.0-0.3/last.ckpt +3 -0
  36. finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans_no_es/0.0-0.7/0.0-0.3/logs/events.out.tfevents.1735554367.gamma.2619525.0 +3 -0
  37. finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans_no_es/0.0-0.7/0.0-0.3/logs/hparams.yaml +1 -0
  38. finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans_no_es/0.0-0.7/0.0-0.3/logs/metrics.csv +0 -0
  39. finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans_no_es/0.0-0.7/0.0-0.3/test=banking77/list=0.7-1.0/labels.csv +184 -0
  40. finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans_no_es/0.0-0.7/0.0-0.3/test=banking77/list=0.7-1.0/logits.csv +0 -0
  41. finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans_no_es/0.0-0.7/0.0-0.3/test=banking77/list=test_1000/labels.csv +1000 -0
  42. finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans_no_es/0.0-0.7/0.0-0.3/test=banking77/list=test_1000/logits.csv +0 -0
  43. finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans_no_es/0.0-0.7/0.0-0.3/train_args.yaml +10 -0
  44. finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=1/lora_ans/0.0-0.7/0.7-1.0/best.ckpt +3 -0
  45. finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=1/lora_ans/0.0-0.7/0.7-1.0/checkpoint/config.json +27 -0
  46. finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=1/lora_ans/0.0-0.7/0.7-1.0/checkpoint/generation_config.json +14 -0
  47. finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=1/lora_ans/0.0-0.7/0.7-1.0/checkpoint/lit_model.pth +3 -0
  48. finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=1/lora_ans/0.0-0.7/0.7-1.0/checkpoint/lit_model.pth.lora +3 -0
  49. finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=1/lora_ans/0.0-0.7/0.7-1.0/checkpoint/model_config.yaml +37 -0
  50. finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=1/lora_ans/0.0-0.7/0.7-1.0/checkpoint/tokenizer.json +0 -0
.gitattributes CHANGED
@@ -1574,3 +1574,9 @@ finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=3/lora_ans_no_es/0.0-0.
1574
  finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans/0.0-1.0/0.7-1.0/checkpoint/lit_model.pth.lora filter=lfs diff=lfs merge=lfs -text
1575
  finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans/0.0-0.7/0.7-1.0/checkpoint/lit_model.pth.lora filter=lfs diff=lfs merge=lfs -text
1576
  finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans_no_es/0.0-1.0/0.7-1.0/checkpoint/lit_model.pth.lora filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
1574
  finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans/0.0-1.0/0.7-1.0/checkpoint/lit_model.pth.lora filter=lfs diff=lfs merge=lfs -text
1575
  finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans/0.0-0.7/0.7-1.0/checkpoint/lit_model.pth.lora filter=lfs diff=lfs merge=lfs -text
1576
  finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans_no_es/0.0-1.0/0.7-1.0/checkpoint/lit_model.pth.lora filter=lfs diff=lfs merge=lfs -text
1577
+ finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans_no_es/0.0-0.7/0.0-0.3/checkpoint/lit_model.pth.lora filter=lfs diff=lfs merge=lfs -text
1578
+ finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=1/lora_ans/0.0-1.0/0.7-1.0/checkpoint/lit_model.pth.lora filter=lfs diff=lfs merge=lfs -text
1579
+ finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=1/lora_ans/0.0-0.7/0.7-1.0/checkpoint/lit_model.pth.lora filter=lfs diff=lfs merge=lfs -text
1580
+ finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=1/lora_ans_no_es/0.0-1.0/0.7-1.0/checkpoint/lit_model.pth.lora filter=lfs diff=lfs merge=lfs -text
1581
+ finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=1/lora_ans_no_es/0.0-0.7/0.0-0.3/checkpoint/lit_model.pth.lora filter=lfs diff=lfs merge=lfs -text
1582
+ finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-1.0/0.7-1.0/checkpoint/lit_model.pth.lora filter=lfs diff=lfs merge=lfs -text
finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-0.7/0.7-1.0/best.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0db411d48812655d1c5f052a730aef52a6ba9991e8ed6edf40e66a586f1ca40d
3
+ size 42951934
finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-0.7/0.7-1.0/checkpoint/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen2ForCausalLM"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 151643,
7
+ "eos_token_id": 151645,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 3584,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 18944,
12
+ "max_position_embeddings": 32768,
13
+ "max_window_layers": 28,
14
+ "model_type": "qwen2",
15
+ "num_attention_heads": 28,
16
+ "num_hidden_layers": 28,
17
+ "num_key_value_heads": 4,
18
+ "rms_norm_eps": 1e-06,
19
+ "rope_theta": 1000000.0,
20
+ "sliding_window": 131072,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.43.1",
24
+ "use_cache": true,
25
+ "use_sliding_window": false,
26
+ "vocab_size": 152064
27
+ }
finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-0.7/0.7-1.0/checkpoint/generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "pad_token_id": 151643,
4
+ "do_sample": true,
5
+ "eos_token_id": [
6
+ 151645,
7
+ 151643
8
+ ],
9
+ "repetition_penalty": 1.05,
10
+ "temperature": 0.7,
11
+ "top_p": 0.8,
12
+ "top_k": 20,
13
+ "transformers_version": "4.37.0"
14
+ }
finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-0.7/0.7-1.0/checkpoint/model_config.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ attention_logit_softcapping: null
2
+ attention_scores_scalar: null
3
+ attn_bias: true
4
+ bias: false
5
+ block_size: 131072
6
+ final_logit_softcapping: null
7
+ gelu_approximate: none
8
+ head_size: 128
9
+ hf_config:
10
+ name: Qwen2.5-7B-Instruct
11
+ org: Qwen
12
+ intermediate_size: 18944
13
+ lm_head_bias: false
14
+ mlp_class_name: LLaMAMLP
15
+ n_embd: 3584
16
+ n_expert: 0
17
+ n_expert_per_token: 0
18
+ n_head: 28
19
+ n_layer: 28
20
+ n_query_groups: 4
21
+ name: Qwen2.5-7B-Instruct
22
+ norm_class_name: RMSNorm
23
+ norm_eps: 1.0e-06
24
+ padded_vocab_size: 152064
25
+ padding_multiple: 512
26
+ parallel_residual: false
27
+ post_attention_norm: false
28
+ post_mlp_norm: false
29
+ rope_adjustments: null
30
+ rope_base: 1000000
31
+ rope_condense_ratio: 1
32
+ rotary_percentage: 1.0
33
+ scale_embeddings: false
34
+ shared_attention_norm: false
35
+ sliding_window_layer_placing: null
36
+ sliding_window_size: null
37
+ vocab_size: 151643
finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-0.7/0.7-1.0/checkpoint/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-0.7/0.7-1.0/last.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14b9b2d3eb808423fd95e61dd8637650b493b981057e8e993a995fc96997ac57
3
+ size 42951934
finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-0.7/0.7-1.0/logs/events.out.tfevents.1734803539.gamma.3827549.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ef1b07a95dce0083ee407aa859858c9c304ee81055ba4464a2ba52f2c194242
3
+ size 54843
finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-0.7/0.7-1.0/logs/hparams.yaml ADDED
@@ -0,0 +1 @@
 
 
1
+ {}
finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-0.7/0.7-1.0/logs/metrics.csv ADDED
@@ -0,0 +1,257 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ epoch,iter,iter_time,step,train/loss,val/loss
2
+ 0,1,0.41801123321056366,0,15.245295206705729,
3
+ 0,8,0.10661979764699936,1,13.468572124358147,
4
+ 0,16,0.08192970603704453,2,12.807103838239398,
5
+ 0,24,0.08319449797272682,3,11.434923648834229,
6
+ 0,32,0.08193360269069672,4,8.681313514709473,
7
+ 0,40,0.08204995095729828,5,6.688623336053664,
8
+ 0,48,0.07599947601556778,6,6.781939400566949,
9
+ 0,56,0.07942227274179459,7,4.500931331089565,
10
+ 0,64,0.07595022395253181,8,4.407195840563093,
11
+ 0,72,0.07605744525790215,9,3.5039620933861566,
12
+ 0,80,0.08175421133637428,10,3.1178311017843394,
13
+ 0,88,0.07897650450468063,11,3.023450704721304,
14
+ 0,96,0.07681397721171379,12,2.7905956506729126,
15
+ 0,104,0.0815720409154892,13,2.4336187413760593,
16
+ 1,112,0.0761033184826374,14,1.980389792578561,
17
+ 1,120,0.07973353192210197,15,1.9931339876992362,
18
+ 1,128,0.08694154769182205,16,2.008383246985349,
19
+ ,,,16,,2.6267958818855934
20
+ 1,136,0.08132371306419373,17,2.5170993294034685,
21
+ 1,144,0.07975330948829651,18,2.212132157950566,
22
+ 1,152,0.07661822438240051,19,2.207448954765613,
23
+ 1,160,0.07666566222906113,20,2.7828273262296404,
24
+ 1,168,0.08055181056261063,21,1.1784269785881043,
25
+ 1,176,0.081810861825943,22,1.8498126888275146,
26
+ 1,184,0.07554763928055763,23,1.846410722732544,
27
+ 1,192,0.08249428123235703,24,2.250374401316923,
28
+ 1,200,0.077521201223135,25,1.3879836375514667,
29
+ 1,208,0.0803658738732338,26,2.109172882572297,
30
+ 1,216,0.07579009607434273,27,2.1057448100160667,
31
+ 2,224,0.07578283175826073,28,1.10992347240448,
32
+ 2,232,0.07849422469735146,29,1.7119302831847092,
33
+ 2,240,0.08102121204137802,30,0.9734124172301519,
34
+ 2,248,0.07945103570818901,31,1.1411668062210083,
35
+ 2,256,0.0782124400138855,32,1.533616192638874,
36
+ ,,,32,,1.9445154222391419
37
+ 2,264,0.07599280774593353,33,0.5895678848028183,
38
+ 2,272,0.08003127947449684,34,1.5822145501772562,
39
+ 2,280,0.08132949844002724,35,1.5129140269756318,
40
+ 2,288,0.07663846015930176,36,1.5641627487014322,
41
+ 2,296,0.07849453017115593,37,1.2844155366931642,
42
+ 2,304,0.08224019035696983,38,1.338078805378505,
43
+ 2,312,0.07608083263039589,39,1.1991653948119192,
44
+ 2,320,0.07631854712963104,40,1.1972227019648398,
45
+ 3,328,0.08251668512821198,41,1.1251970356510532,
46
+ 3,336,0.07888041064143181,42,1.1884995436668395,
47
+ 3,344,0.07658852636814117,43,0.9476699064175288,
48
+ 3,352,0.07850195840001106,44,0.6889174205285532,
49
+ 3,360,0.07570690289139748,45,0.5112864623467127,
50
+ 3,368,0.07637650147080421,46,0.6372244626283645,
51
+ 3,376,0.07663038372993469,47,0.5155495659068778,
52
+ 3,384,0.08066761121153831,48,0.6537449024617672,
53
+ ,,,48,,1.456997456523658
54
+ 3,392,0.07540604844689369,49,0.8660666452300164,
55
+ 3,400,0.07997581735253334,50,0.3456831289564862,
56
+ 3,408,0.08019159734249115,51,0.59176118850708,
57
+ 3,416,0.07979369163513184,52,0.7916472737605755,
58
+ 3,424,0.08033589646220207,53,0.6650819067771618,
59
+ 3,432,0.0795188620686531,54,0.444424509401283,
60
+ 4,440,0.080206960439682,55,0.33103326729365756,
61
+ 4,448,0.07583343237638474,56,0.2951349520257541,
62
+ 4,456,0.07670791819691658,57,0.3400797193700617,
63
+ 4,464,0.08102433755993843,58,0.31007088974118235,
64
+ 4,472,0.07964625582098961,59,0.22732141523650198,
65
+ 4,480,0.08171048760414124,60,0.5153657476659175,
66
+ 4,488,0.07617775350809097,61,0.10149463026651315,
67
+ 4,496,0.07952700182795525,62,0.38331171444484163,
68
+ 4,504,0.07995658367872238,63,0.15525524652209774,
69
+ 4,512,0.07716748863458633,64,0.261320523917675,
70
+ ,,,64,,1.2392874679996468
71
+ 4,520,0.07842296734452248,65,0.13272464900676692,
72
+ 4,528,0.082512516528368,66,0.9336077170101581,
73
+ 4,536,0.07599904015660286,67,0.3285862146211522,
74
+ 5,544,0.0797199234366417,68,0.3883792461344489,
75
+ 5,552,0.07869511097669601,69,0.13799265958368778,
76
+ 5,560,0.08104588091373444,70,0.2141539692878723,
77
+ 5,568,0.07710668444633484,71,0.13665436466152853,
78
+ 5,576,0.07971390709280968,72,0.07650123164057732,
79
+ 5,584,0.07601956278085709,73,0.01841307829858528,
80
+ 5,592,0.07583951205015182,74,0.12733550760895013,
81
+ 5,600,0.07970942184329033,75,0.06060662712291654,
82
+ 5,608,0.07581551373004913,76,0.16055354276864692,
83
+ 5,616,0.07567897439002991,77,0.10011633023105819,
84
+ 5,624,0.07577453181147575,78,0.0373442519384508,
85
+ 5,632,0.07998546585440636,79,0.051642198355928544,
86
+ 5,640,0.07571514323353767,80,0.07324450952000916,
87
+ ,,,80,,1.220179757155941
88
+ 5,648,0.08041854947805405,81,0.03427613151049422,
89
+ 6,656,0.08035837858915329,82,0.018245021411185634,
90
+ 6,664,0.0757586807012558,83,0.026896495630757675,
91
+ 6,672,0.0766090452671051,84,0.031050517057467784,
92
+ 6,680,0.08087816834449768,85,0.08722809751634486,
93
+ 6,688,0.08124937489628792,86,0.10045788701972924,
94
+ 6,696,0.08173618838191032,87,0.04810646304901203,
95
+ 6,704,0.0767863355576992,88,0.015122977174961796,
96
+ 6,712,0.08045336231589317,89,0.008250255844202536,
97
+ 6,720,0.07572482526302338,90,0.005870503788658729,
98
+ 6,728,0.07988850027322769,91,0.0039164314833309115,
99
+ 6,736,0.07896571978926659,92,0.05440512783825398,
100
+ 6,744,0.07544690370559692,93,0.014060104723154967,
101
+ 6,752,0.07678967341780663,94,0.007574873343110085,
102
+ 7,760,0.08017173409461975,95,0.009290854320051554,
103
+ 7,768,0.08091559633612633,96,0.0037136566310297125,
104
+ ,,,96,,1.334627054505429
105
+ 7,776,0.07518190145492554,97,0.0026499477098695934,
106
+ 7,784,0.08751776441931725,98,0.015139012409445759,
107
+ 7,792,0.08122460171580315,99,0.003162139758037833,
108
+ 7,800,0.08049898967146873,100,0.0029983498638362755,
109
+ 7,808,0.07630405947566032,101,0.002129281434463337,
110
+ 7,816,0.08032820001244545,102,0.0025433075497858226,
111
+ 7,824,0.07648511230945587,103,0.002474746289502444,
112
+ 7,832,0.07677393779158592,104,0.09051269446499646,
113
+ 7,840,0.07649029046297073,105,0.0017086559632194362,
114
+ 7,848,0.07640264183282852,106,0.12156336870290996,
115
+ 7,856,0.0797690823674202,107,0.004290321783628315,
116
+ 7,864,0.08241377770900726,108,0.029583712056693103,
117
+ 8,872,0.08153984695672989,109,0.003109156208289466,
118
+ 8,880,0.07651563733816147,110,0.003980762682788607,
119
+ 8,888,0.07592421397566795,111,0.002226233565514641,
120
+ 8,896,0.07850825041532516,112,0.003909367095348576,
121
+ ,,,112,,1.3644537629380737
122
+ 8,904,0.07586736232042313,113,0.001750759679082505,
123
+ 8,912,0.07655619457364082,114,0.00316919437997664,
124
+ 8,920,0.07655161246657372,115,0.04217005980451806,
125
+ 8,928,0.08035317808389664,116,0.004546249871728597,
126
+ 8,936,0.08037881553173065,117,0.002945837671480452,
127
+ 8,944,0.07642089948058128,118,0.004391685235672272,
128
+ 8,952,0.07318853959441185,119,0.0021036160843712942,
129
+ 8,960,0.0757434032857418,120,0.0018103123606684117,
130
+ 8,968,0.08203768730163574,121,0.002336886105256091,
131
+ 9,976,0.08017420023679733,122,0.002482170578358429,
132
+ 9,984,0.08124836906790733,123,0.0017026159142504535,
133
+ 9,992,0.07860083505511284,124,0.0017314481374341995,
134
+ 9,1000,0.07872269302606583,125,0.002313698805385054,
135
+ 9,1008,0.08234187960624695,126,0.003353067442731117,
136
+ 9,1016,0.07541193068027496,127,0.0019318842887878418,
137
+ 9,1024,0.07562727108597755,128,0.0019356367298431934,
138
+ ,,,128,,1.4059044789459745
139
+ 9,1032,0.07581831887364388,129,0.001956676916840176,
140
+ 9,1040,0.08199923112988472,130,0.0017174848579560166,
141
+ 9,1048,0.0805906243622303,131,0.005637636017733409,
142
+ 9,1056,0.08028692379593849,132,0.0010265738403956806,
143
+ 9,1064,0.07822137326002121,133,0.0025562918573782288,
144
+ 9,1072,0.07715917751193047,134,0.003451792076230049,
145
+ 9,1080,0.08294691890478134,135,0.0012422075066881786,
146
+ 10,1088,0.0802244134247303,136,0.0025929521472225414,
147
+ 10,1096,0.07667588070034981,137,0.0022301371092908083,
148
+ 10,1104,0.08019435033202171,138,0.001588386707366086,
149
+ 10,1112,0.08342451229691505,139,0.0013748745805060025,
150
+ 10,1120,0.07861519977450371,140,0.0019766525505110623,
151
+ 10,1128,0.07851975038647652,141,0.001517053209245205,
152
+ 10,1136,0.07589851692318916,142,0.0014517174109421444,
153
+ 10,1144,0.07873744890093803,143,0.002652840534273278,
154
+ 10,1152,0.07638392969965935,144,0.0022880356191308238,
155
+ ,,,144,,1.428448607019112
156
+ 10,1160,0.0801188237965107,145,0.002163147094856518,
157
+ 10,1168,0.07911596074700356,146,0.00181398067252303,
158
+ 10,1176,0.07729433849453926,147,0.0010964423547395402,
159
+ 10,1184,0.07888704165816307,148,0.0014495904300323066,
160
+ 11,1192,0.07915829494595528,149,0.0031000313846844024,
161
+ 11,1200,0.07857371866703033,150,0.0023745037291923333,
162
+ 11,1208,0.07675604894757271,151,0.0024424809186408916,
163
+ 11,1216,0.08103244006633759,152,0.0018206863727148932,
164
+ 11,1224,0.07935509085655212,153,0.0008022674221799455,
165
+ 11,1232,0.08044067770242691,154,0.0014515669137271571,
166
+ 11,1240,0.08136998489499092,155,0.0016064035589806736,
167
+ 11,1248,0.07888821139931679,156,0.0022734016871682424,
168
+ 11,1256,0.08190657570958138,157,0.0007576886116741941,
169
+ 11,1264,0.08128445595502853,158,0.002296472684500496,
170
+ 11,1272,0.08002636581659317,159,0.001984355048517938,
171
+ 11,1280,0.07539411261677742,160,0.0012432260632825394,
172
+ ,,,160,,1.4431297172934323
173
+ 11,1288,0.0766761302947998,161,0.001251848966509489,
174
+ 11,1296,0.07675955817103386,162,0.0013161608949303627,
175
+ 12,1304,0.07578713074326515,163,0.0014664484406239353,
176
+ 12,1312,0.08188273757696152,164,0.001062643605862455,
177
+ 12,1320,0.08134699240326881,165,0.0008801866206340492,
178
+ 12,1328,0.0808706171810627,166,0.00235078040690672,
179
+ 12,1336,0.07948964089155197,167,0.002048665467494478,
180
+ 12,1344,0.08113948628306389,168,0.001369765927342491,
181
+ 12,1352,0.0757298655807972,169,0.0009977765439543873,
182
+ 12,1360,0.07644052430987358,170,0.0013666578258077304,
183
+ 12,1368,0.08014019578695297,171,0.0007469795714132488,
184
+ 12,1376,0.0853777565062046,172,0.0018483813347605369,
185
+ 12,1384,0.07632169127464294,173,0.0012686533842828464,
186
+ 12,1392,0.08055079728364944,174,0.0021629807953205374,
187
+ 12,1400,0.07989292219281197,175,0.001345583886307265,
188
+ 13,1408,0.07691758498549461,176,0.0019089055675712804,
189
+ ,,,176,,1.4550310555150954
190
+ 13,1416,0.07565921917557716,177,0.001476082220223957,
191
+ 13,1424,0.07553005591034889,178,0.0017441685447314133,
192
+ 13,1432,0.0762014240026474,179,0.0016184770020431485,
193
+ 13,1440,0.07696964219212532,180,0.0017657826732223232,
194
+ 13,1448,0.07974260672926903,181,0.0016460686238133349,
195
+ 13,1456,0.0758756548166275,182,0.0008050930815183424,
196
+ 13,1464,0.07945945113897324,183,0.0013644157582893967,
197
+ 13,1472,0.08039956167340279,184,0.0016187235451070592,
198
+ 13,1480,0.07823250815272331,185,0.0017591769552651655,
199
+ 13,1488,0.080325186252594,186,0.0011306559716575538,
200
+ 13,1496,0.07607422396540642,187,0.0010171088852941814,
201
+ 13,1504,0.0798703022301197,188,0.001641997423458604,
202
+ 13,1512,0.08069155737757683,189,0.0007013407267742988,
203
+ 14,1520,0.08059044554829597,190,0.001396438034134917,
204
+ 14,1528,0.07578818500041962,191,0.0013882955117151142,
205
+ 14,1536,0.07934626936912537,192,0.0008619158938729569,
206
+ ,,,192,,1.4619449249095162
207
+ 14,1544,0.08102718740701675,193,0.00198127578914864,
208
+ 14,1552,0.08570212870836258,194,0.001800365976974248,
209
+ 14,1560,0.08734514564275742,195,0.0012209131174521254,
210
+ 14,1568,0.08161161467432976,196,0.0018001355066964472,
211
+ 14,1576,0.07588807120919228,197,0.0010678037186153233,
212
+ 14,1584,0.07575773447751999,198,0.0007969227394667165,
213
+ 14,1592,0.07965559884905815,199,0.0009699558338616043,
214
+ 14,1600,0.08192871138453484,200,0.0014347147225635126,
215
+ 14,1608,0.08045091852545738,201,0.0009239678493031533,
216
+ 14,1616,0.08015654981136322,202,0.0013737400198200096,
217
+ 15,1624,0.08006387203931808,203,0.001007056453843312,
218
+ 15,1632,0.0758742168545723,204,0.0014523388253468456,
219
+ 15,1640,0.08171569555997849,205,0.0013339696411878385,
220
+ 15,1648,0.07627980411052704,206,0.001162861017898346,
221
+ 15,1656,0.07653063163161278,207,0.0019701303747503766,
222
+ 15,1664,0.07834725454449654,208,0.0010178024385822937,
223
+ ,,,208,,1.4749231715660311
224
+ 15,1672,0.07587892189621925,209,0.0013492611552854733,
225
+ 15,1680,0.07548464089632034,210,0.0022430306233997854,
226
+ 15,1688,0.08024438098073006,211,0.0014101047612105806,
227
+ 15,1696,0.08149268478155136,212,0.0008350604267824176,
228
+ 15,1704,0.07638537511229515,213,0.0010546634887130214,
229
+ 15,1712,0.07661915197968483,214,0.0009221237305609975,
230
+ 15,1720,0.08141110092401505,215,0.001081652287601693,
231
+ 15,1728,0.0815996453166008,216,0.0007339539627234141,
232
+ 16,1736,0.07660701870918274,217,0.000886707014918405,
233
+ 16,1744,0.07634572684764862,218,0.0016906476094542692,
234
+ 16,1752,0.07945265993475914,219,0.0008056441678807302,
235
+ 16,1760,0.08188224956393242,220,0.000925458868517092,
236
+ 16,1768,0.07933010905981064,221,0.001020455122880566,
237
+ 16,1776,0.07691051810979843,222,0.0012126970520642186,
238
+ 16,1784,0.07874795794487,223,0.0009672197649100174,
239
+ 16,1792,0.07989288866519928,224,0.0009207261609844863,
240
+ ,,,224,,1.4826744639940854
241
+ 16,1800,0.07566746324300766,225,0.001855694290222318,
242
+ 16,1808,0.07404480874538422,226,0.0012396624600835915,
243
+ 16,1816,0.07673775404691696,227,0.0013457523076795042,
244
+ 16,1824,0.08036114647984505,228,0.0009607873317095097,
245
+ 16,1832,0.0763234794139862,229,0.0007727841241285205,
246
+ 17,1840,0.07973895967006683,230,0.0018329469625873922,
247
+ 17,1848,0.0813264399766922,231,0.0015189680112102492,
248
+ 17,1856,0.08228092640638351,232,0.001438708872800427,
249
+ 17,1864,0.08009340614080429,233,0.0010031821568393046,
250
+ 17,1872,0.07870448380708694,234,0.0009355457171935726,
251
+ 17,1880,0.07899034395813942,235,0.0011530033266171813,
252
+ 17,1888,0.07948676124215126,236,0.0011397790456671387,
253
+ 17,1896,0.07565326616168022,237,0.0010101297294669268,
254
+ 17,1904,0.07669680193066597,238,0.0011359528510365635,
255
+ 17,1912,0.07655368000268936,239,0.0010693810663410816,
256
+ 17,1920,0.08091424778103828,240,0.0008892255383684779,
257
+ ,,,240,,1.4971246234441207
finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-0.7/0.7-1.0/train_args.yaml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ K: null
2
+ global_batch_size: 8
3
+ learning_rate: 0.0001
4
+ loss: ans
5
+ max_steps: -1
6
+ micro_batch_size: 1
7
+ optimizer_name: adamw
8
+ patience: 10
9
+ val_check_interval: 16
10
+ weight_decay: 0.0
finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-1.0/0.7-1.0/best.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b559320c9c9b26a330736a85b2c00d4c39a3c711789f2eec32c4c244a77a3e33
3
+ size 42951934
finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-1.0/0.7-1.0/checkpoint/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen2ForCausalLM"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 151643,
7
+ "eos_token_id": 151645,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 3584,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 18944,
12
+ "max_position_embeddings": 32768,
13
+ "max_window_layers": 28,
14
+ "model_type": "qwen2",
15
+ "num_attention_heads": 28,
16
+ "num_hidden_layers": 28,
17
+ "num_key_value_heads": 4,
18
+ "rms_norm_eps": 1e-06,
19
+ "rope_theta": 1000000.0,
20
+ "sliding_window": 131072,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.43.1",
24
+ "use_cache": true,
25
+ "use_sliding_window": false,
26
+ "vocab_size": 152064
27
+ }
finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-1.0/0.7-1.0/checkpoint/generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "pad_token_id": 151643,
4
+ "do_sample": true,
5
+ "eos_token_id": [
6
+ 151645,
7
+ 151643
8
+ ],
9
+ "repetition_penalty": 1.05,
10
+ "temperature": 0.7,
11
+ "top_p": 0.8,
12
+ "top_k": 20,
13
+ "transformers_version": "4.37.0"
14
+ }
finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-1.0/0.7-1.0/checkpoint/lit_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f2893700d468552b27b00bb16e3906c7a01f49c09f3c786c252a2b5d36d179f
3
+ size 15231297418
finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-1.0/0.7-1.0/checkpoint/lit_model.pth.lora ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fe6068e4d9759fb1c20c28651ec1e8af9674895506f29da8300f2d9bcaa4a74
3
+ size 42951614
finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-1.0/0.7-1.0/checkpoint/model_config.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ attention_logit_softcapping: null
2
+ attention_scores_scalar: null
3
+ attn_bias: true
4
+ bias: false
5
+ block_size: 131072
6
+ final_logit_softcapping: null
7
+ gelu_approximate: none
8
+ head_size: 128
9
+ hf_config:
10
+ name: Qwen2.5-7B-Instruct
11
+ org: Qwen
12
+ intermediate_size: 18944
13
+ lm_head_bias: false
14
+ mlp_class_name: LLaMAMLP
15
+ n_embd: 3584
16
+ n_expert: 0
17
+ n_expert_per_token: 0
18
+ n_head: 28
19
+ n_layer: 28
20
+ n_query_groups: 4
21
+ name: Qwen2.5-7B-Instruct
22
+ norm_class_name: RMSNorm
23
+ norm_eps: 1.0e-06
24
+ padded_vocab_size: 152064
25
+ padding_multiple: 512
26
+ parallel_residual: false
27
+ post_attention_norm: false
28
+ post_mlp_norm: false
29
+ rope_adjustments: null
30
+ rope_base: 1000000
31
+ rope_condense_ratio: 1
32
+ rotary_percentage: 1.0
33
+ scale_embeddings: false
34
+ shared_attention_norm: false
35
+ sliding_window_layer_placing: null
36
+ sliding_window_size: null
37
+ vocab_size: 151643
finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-1.0/0.7-1.0/checkpoint/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-1.0/0.7-1.0/checkpoint/tokenizer_config.json ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
+ ],
197
+ "bos_token": null,
198
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
199
+ "clean_up_tokenization_spaces": false,
200
+ "eos_token": "<|im_end|>",
201
+ "errors": "replace",
202
+ "model_max_length": 131072,
203
+ "pad_token": "<|endoftext|>",
204
+ "split_special_tokens": false,
205
+ "tokenizer_class": "Qwen2Tokenizer",
206
+ "unk_token": null
207
+ }
finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-1.0/0.7-1.0/last.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d90321b7bb761b9130426ffb709e61f77b08d1da18f0db5b08f00e78a3455726
3
+ size 42951934
finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-1.0/0.7-1.0/logs/events.out.tfevents.1734805986.gamma.3831644.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a1a31d87f98c76c37145d1f1fde5a4f9a05de0b8f09345aa35e6c954af74d7c
3
+ size 18290
finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-1.0/0.7-1.0/logs/hparams.yaml ADDED
@@ -0,0 +1 @@
 
 
1
+ {}
finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-1.0/0.7-1.0/logs/metrics.csv ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ epoch,iter,iter_time,step,train/loss,val/loss
2
+ 0,1,0.4123350605368614,0,12.11691665649414,
3
+ 0,8,0.10280553624033928,1,11.946681051543266,
4
+ 0,16,0.07627921178936958,2,12.145848044033709,
5
+ 0,24,0.08037809655070305,3,10.488243877887726,
6
+ 0,32,0.08190082013607025,4,10.462685223283438,
7
+ 0,40,0.08398445695638657,5,8.56721435054656,
8
+ 0,48,0.08043656125664711,6,5.304779767990112,
9
+ 0,56,0.07871370762586594,7,3.7038332121712823,
10
+ 0,64,0.0827261172235012,8,3.9963007313864574,
11
+ 0,72,0.07606184110045433,9,3.590405491420201,
12
+ 0,80,0.08197446912527084,10,2.288888775385343,
13
+ 0,88,0.08026864752173424,11,2.8127734565734865,
14
+ 0,96,0.08372917026281357,12,2.824228882789612,
15
+ 0,104,0.0800313800573349,13,2.5621902501141585,
16
+ 0,112,0.08824199438095093,14,2.6932331981330084,
17
+ 0,120,0.08051637560129166,15,2.207143089988015,
18
+ 0,128,0.07703807577490807,16,3.3674790700276693,
19
+ ,,,16,,2.4488709875419317
20
+ 0,136,0.07896118238568306,17,3.7004852845118594,
21
+ 0,144,0.07713618129491806,18,2.7902996081572313,
22
+ 0,152,0.08011943846940994,19,1.5338764108460525,
23
+ 1,160,0.08054821565747261,20,2.225591220855713,
24
+ 1,168,0.0815880298614502,21,2.170739907771349,
25
+ 1,176,0.07644334435462952,22,2.2126090356281827,
26
+ 1,184,0.0761294811964035,23,2.033785240990775,
27
+ 1,192,0.08202523365616798,24,1.9919723123311996,
28
+ 1,200,0.07628072425723076,25,1.7949061763697658,
29
+ 1,208,0.0784546248614788,26,1.5366853833198548,
30
+ 1,216,0.0869893953204155,27,1.2262009448475308,
31
+ 1,224,0.07972022145986557,28,1.497286565842167,
32
+ 1,232,0.08121772855520248,29,1.4650954405466716,
33
+ 1,240,0.07640029862523079,30,1.76709544217145,
34
+ 1,248,0.08160492032766342,31,1.195439001609539,
35
+ 1,256,0.08063583821058273,32,1.9514090542135567,
36
+ ,,,32,,1.5661012466344457
37
+ 1,264,0.0758480355143547,33,1.3703056919959284,
38
+ 1,272,0.07965521514415741,34,1.7205631125856329,
39
+ 1,280,0.07957543060183525,35,2.1078760952785096,
40
+ 1,288,0.0808374248445034,36,1.391715651466733,
41
+ 1,296,0.07660144194960594,37,1.5229190270105997,
42
+ 1,304,0.07613057643175125,38,1.1857397437095643,
43
+ 2,312,0.08105170354247093,39,1.4523041761583753,
44
+ 2,320,0.07964178919792175,40,0.8246737561727825,
45
+ 2,328,0.08194279670715332,41,1.1756470587945753,
46
+ 2,336,0.07796105369925499,42,1.2552748719851177,
47
+ 2,344,0.08080613985657692,43,0.9264966470223887,
48
+ 2,352,0.08059879392385483,44,0.9736070151512439,
49
+ 2,360,0.0762358009815216,45,1.401765607021473,
50
+ 2,368,0.08251360431313515,46,1.0940892631188035,
51
+ 2,376,0.08006580546498299,47,0.6867778870192441,
52
+ 2,384,0.07554114982485771,48,0.8855126520683025,
53
+ ,,,48,,0.6725706542279087
54
+ 2,392,0.08119768649339676,49,0.5306956503126357,
55
+ 2,400,0.07640928775072098,50,0.7529445330684001,
56
+ 2,408,0.07648879289627075,51,0.9772441033273935,
57
+ 2,416,0.08261218294501305,52,0.806821551322937,
58
+ 2,424,0.07634934410452843,53,0.6013531816693453,
59
+ 2,432,0.08255348727107048,54,0.4793394379890882,
60
+ 2,440,0.07707996293902397,55,1.045152180153748,
61
+ 2,448,0.08096887916326523,56,0.47096291308601695,
62
+ 2,456,0.08088582381606102,57,0.4629532111187776,
63
+ 3,464,0.08024600520730019,58,0.411201739105685,
64
+ 3,472,0.07615400850772858,59,0.39088416420694055,
65
+ 3,480,0.0852845124900341,60,0.4160480417551533,
66
+ 3,488,0.08170796930789948,61,0.2052328262863488,
67
+ 3,496,0.07765119895339012,62,0.5011219114065171,
68
+ 3,504,0.07743249088525772,63,0.2920545196581271,
69
+ 3,512,0.08184167742729187,64,0.06196220939119275,
70
+ ,,,64,,0.2965837898900953
71
+ 3,520,0.07639065012335777,65,0.1769609681710049,
72
+ 3,528,0.07876966893672943,66,0.4924048261406521,
73
+ 3,536,0.08122127503156662,67,0.17013415841963783,
74
+ 3,544,0.08725094050168991,68,0.18195277379293526,
75
+ 3,552,0.07747167348861694,69,0.3597257772150139,
76
+ 3,560,0.07957847788929939,70,0.4243425264440734,
77
+ 3,568,0.08156620338559151,71,0.08218356514615673,
78
+ 3,576,0.08031244203448296,72,0.9947862213929043,
79
+ 3,584,0.07941707968711853,73,0.18634830034253272,
80
+ 3,592,0.07876205816864967,74,0.5666589183466775,
81
+ 3,600,0.07881605997681618,75,0.41652951103945574,
82
+ 3,608,0.08015795797109604,76,0.23235163522454408,
83
+ 3,616,0.08064737170934677,77,0.19293122462025195,
84
+ 4,624,0.07412714138627052,78,0.055583399906754496,
85
+ 4,632,0.08050036430358887,79,0.26034141704440117,
86
+ 4,640,0.076687753200531,80,0.1070822188258171,
87
+ ,,,80,,0.1379297547421213
finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-1.0/0.7-1.0/test=banking77/list=test_1000/labels.csv ADDED
@@ -0,0 +1,1000 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 6197,55
2
+ 6441,63
3
+ 5254,42
4
+ 4058,38
5
+ 5596,27
6
+ 114,11
7
+ 2806,66
8
+ 1087,12
9
+ 1139,4
10
+ 12996,2
11
+ 10447,41
12
+ 11288,73
13
+ 12620,37
14
+ 10691,8
15
+ 8838,20
16
+ 11829,25
17
+ 5055,26
18
+ 5484,52
19
+ 2936,54
20
+ 9239,0
21
+ 5848,25
22
+ 2035,47
23
+ 1266,14
24
+ 11887,55
25
+ 4073,38
26
+ 4823,44
27
+ 2662,15
28
+ 5923,25
29
+ 2923,54
30
+ 7478,35
31
+ 12409,39
32
+ 11263,38
33
+ 12272,50
34
+ 8760,5
35
+ 1955,47
36
+ 7756,71
37
+ 12943,9
38
+ 8885,20
39
+ 6776,53
40
+ 12715,5
41
+ 11892,55
42
+ 4231,73
43
+ 3603,16
44
+ 1096,12
45
+ 2786,66
46
+ 12791,31
47
+ 7613,65
48
+ 12357,65
49
+ 11662,42
50
+ 9066,57
51
+ 63,11
52
+ 5620,27
53
+ 6193,55
54
+ 871,46
55
+ 10854,66
56
+ 8147,43
57
+ 3305,6
58
+ 3248,61
59
+ 2603,15
60
+ 3482,16
61
+ 4639,28
62
+ 7136,7
63
+ 6570,70
64
+ 7884,39
65
+ 12307,35
66
+ 10342,4
67
+ 257,13
68
+ 10939,40
69
+ 597,34
70
+ 3076,40
71
+ 8498,59
72
+ 7988,58
73
+ 575,34
74
+ 10692,8
75
+ 416,17
76
+ 2046,47
77
+ 1802,23
78
+ 12925,9
79
+ 5787,51
80
+ 2827,66
81
+ 4537,22
82
+ 3049,40
83
+ 11130,30
84
+ 2725,66
85
+ 1417,33
86
+ 12180,7
87
+ 6157,55
88
+ 8864,20
89
+ 12995,2
90
+ 2324,60
91
+ 2251,60
92
+ 1989,47
93
+ 16,11
94
+ 8827,20
95
+ 8493,59
96
+ 4953,26
97
+ 10807,15
98
+ 7101,7
99
+ 8210,76
100
+ 2312,60
101
+ 4330,62
102
+ 12365,71
103
+ 11466,3
104
+ 1666,1
105
+ 4063,38
106
+ 4622,3
107
+ 3836,74
108
+ 11584,26
109
+ 9371,19
110
+ 3407,6
111
+ 2680,15
112
+ 3845,74
113
+ 10741,60
114
+ 9232,0
115
+ 5932,25
116
+ 6113,48
117
+ 5411,52
118
+ 12422,39
119
+ 12101,53
120
+ 10322,12
121
+ 963,36
122
+ 11640,45
123
+ 5790,51
124
+ 519,17
125
+ 9694,2
126
+ 96,11
127
+ 7911,39
128
+ 6069,48
129
+ 13002,2
130
+ 7853,39
131
+ 8660,5
132
+ 2817,66
133
+ 11,11
134
+ 1701,49
135
+ 5397,52
136
+ 2685,15
137
+ 425,17
138
+ 1958,47
139
+ 12202,7
140
+ 5036,26
141
+ 6054,48
142
+ 10515,1
143
+ 12164,7
144
+ 4865,44
145
+ 12189,7
146
+ 8355,76
147
+ 7023,7
148
+ 12813,57
149
+ 7527,35
150
+ 2435,75
151
+ 9940,24
152
+ 7402,50
153
+ 32,11
154
+ 1541,41
155
+ 3094,40
156
+ 12947,9
157
+ 5604,27
158
+ 1775,49
159
+ 8117,43
160
+ 9587,9
161
+ 2008,47
162
+ 2741,66
163
+ 582,34
164
+ 2562,15
165
+ 7763,71
166
+ 7034,7
167
+ 6376,63
168
+ 9092,57
169
+ 202,13
170
+ 11193,74
171
+ 6028,48
172
+ 1476,41
173
+ 10315,12
174
+ 9472,19
175
+ 6092,48
176
+ 1919,56
177
+ 2986,54
178
+ 6078,48
179
+ 6698,67
180
+ 11430,22
181
+ 8139,43
182
+ 7580,65
183
+ 9696,2
184
+ 11768,51
185
+ 7030,7
186
+ 3711,30
187
+ 8118,43
188
+ 4895,44
189
+ 8450,37
190
+ 2225,8
191
+ 6803,53
192
+ 3040,40
193
+ 2834,66
194
+ 10998,10
195
+ 3955,68
196
+ 11164,74
197
+ 3599,16
198
+ 4288,62
199
+ 2846,66
200
+ 4278,62
201
+ 1150,4
202
+ 8719,5
203
+ 9210,0
204
+ 10575,23
205
+ 317,32
206
+ 6922,21
207
+ 10171,34
208
+ 8842,20
209
+ 11121,16
210
+ 6293,18
211
+ 3204,61
212
+ 591,34
213
+ 9236,0
214
+ 10820,15
215
+ 996,36
216
+ 8481,59
217
+ 10583,23
218
+ 1555,41
219
+ 3754,30
220
+ 2257,60
221
+ 11351,62
222
+ 12515,43
223
+ 1792,23
224
+ 10659,47
225
+ 8290,76
226
+ 9713,2
227
+ 12555,72
228
+ 7311,64
229
+ 2599,15
230
+ 10911,54
231
+ 6332,63
232
+ 5152,45
233
+ 4602,3
234
+ 6340,63
235
+ 10385,14
236
+ 1807,23
237
+ 8458,37
238
+ 3319,6
239
+ 10078,13
240
+ 8380,37
241
+ 2739,66
242
+ 1115,12
243
+ 9783,69
244
+ 2158,8
245
+ 5652,27
246
+ 12173,7
247
+ 5744,51
248
+ 3782,74
249
+ 3456,6
250
+ 9414,19
251
+ 4810,28
252
+ 11760,27
253
+ 8925,20
254
+ 7440,35
255
+ 1883,56
256
+ 9930,24
257
+ 12920,19
258
+ 6413,63
259
+ 7596,65
260
+ 11819,25
261
+ 6624,67
262
+ 11903,55
263
+ 3858,74
264
+ 9103,57
265
+ 3555,16
266
+ 4457,29
267
+ 2409,75
268
+ 9174,57
269
+ 7133,7
270
+ 3159,61
271
+ 10129,17
272
+ 3150,10
273
+ 2201,8
274
+ 7666,65
275
+ 2865,66
276
+ 12815,57
277
+ 3282,61
278
+ 3886,74
279
+ 775,46
280
+ 6393,63
281
+ 10102,32
282
+ 7652,65
283
+ 6523,70
284
+ 7589,65
285
+ 866,46
286
+ 4929,26
287
+ 5588,27
288
+ 3759,30
289
+ 10532,49
290
+ 9122,57
291
+ 12466,58
292
+ 4214,73
293
+ 5337,42
294
+ 574,34
295
+ 9839,69
296
+ 12404,39
297
+ 296,32
298
+ 7868,39
299
+ 12839,57
300
+ 6542,70
301
+ 122,11
302
+ 750,46
303
+ 10136,17
304
+ 12891,19
305
+ 3235,61
306
+ 7444,35
307
+ 12924,9
308
+ 8768,5
309
+ 7057,7
310
+ 8917,20
311
+ 7602,65
312
+ 9870,69
313
+ 3316,6
314
+ 5694,51
315
+ 2078,8
316
+ 8022,58
317
+ 4839,44
318
+ 730,34
319
+ 12123,21
320
+ 4332,62
321
+ 7453,35
322
+ 4418,29
323
+ 6744,53
324
+ 9728,2
325
+ 1611,1
326
+ 10779,75
327
+ 6783,53
328
+ 4472,22
329
+ 367,32
330
+ 10636,56
331
+ 1696,49
332
+ 3281,61
333
+ 6690,67
334
+ 4800,28
335
+ 8328,76
336
+ 4827,44
337
+ 530,17
338
+ 3977,68
339
+ 12840,57
340
+ 10417,33
341
+ 3545,16
342
+ 2396,75
343
+ 5344,42
344
+ 1829,56
345
+ 10084,32
346
+ 6306,63
347
+ 3692,30
348
+ 11054,6
349
+ 12948,9
350
+ 1656,1
351
+ 2338,75
352
+ 12260,50
353
+ 1372,33
354
+ 6644,67
355
+ 4988,26
356
+ 6775,53
357
+ 8683,5
358
+ 2032,47
359
+ 6371,63
360
+ 6946,21
361
+ 3676,30
362
+ 2414,75
363
+ 636,34
364
+ 12581,76
365
+ 6547,70
366
+ 7395,50
367
+ 3136,10
368
+ 7163,64
369
+ 8426,37
370
+ 5921,25
371
+ 6807,53
372
+ 9788,69
373
+ 12732,20
374
+ 8341,76
375
+ 2233,8
376
+ 3528,16
377
+ 9238,0
378
+ 2532,15
379
+ 9960,24
380
+ 8977,31
381
+ 11800,51
382
+ 3432,6
383
+ 8708,5
384
+ 6902,21
385
+ 4076,38
386
+ 6444,63
387
+ 812,46
388
+ 7036,7
389
+ 1971,47
390
+ 9335,0
391
+ 3942,68
392
+ 5775,51
393
+ 9428,19
394
+ 8656,5
395
+ 9465,19
396
+ 5128,45
397
+ 10072,13
398
+ 9836,69
399
+ 9955,24
400
+ 1312,14
401
+ 3554,16
402
+ 10852,66
403
+ 5417,52
404
+ 180,13
405
+ 2776,66
406
+ 11345,62
407
+ 3655,30
408
+ 7277,64
409
+ 13060,24
410
+ 887,36
411
+ 1485,41
412
+ 8296,76
413
+ 12531,72
414
+ 11407,22
415
+ 9162,57
416
+ 4452,29
417
+ 12471,58
418
+ 3355,6
419
+ 4651,28
420
+ 3943,68
421
+ 7060,7
422
+ 3775,74
423
+ 3853,74
424
+ 1191,4
425
+ 3992,68
426
+ 7669,65
427
+ 5048,26
428
+ 9818,69
429
+ 4304,62
430
+ 10406,33
431
+ 9906,24
432
+ 11208,68
433
+ 1419,33
434
+ 4781,28
435
+ 3255,61
436
+ 455,17
437
+ 6948,21
438
+ 6594,67
439
+ 9470,19
440
+ 1844,56
441
+ 11622,45
442
+ 372,32
443
+ 109,11
444
+ 1525,41
445
+ 6630,67
446
+ 2468,75
447
+ 583,34
448
+ 11698,52
449
+ 5557,27
450
+ 13074,24
451
+ 6325,63
452
+ 11086,16
453
+ 1524,41
454
+ 6932,21
455
+ 3715,30
456
+ 6936,21
457
+ 914,36
458
+ 10052,13
459
+ 3295,61
460
+ 7266,64
461
+ 4371,29
462
+ 5666,27
463
+ 11409,22
464
+ 7414,50
465
+ 11770,51
466
+ 11278,38
467
+ 1869,56
468
+ 9650,2
469
+ 2715,66
470
+ 811,46
471
+ 11947,18
472
+ 547,17
473
+ 4377,29
474
+ 1147,4
475
+ 9112,57
476
+ 11843,48
477
+ 7886,39
478
+ 12316,35
479
+ 6926,21
480
+ 12835,57
481
+ 2451,75
482
+ 7372,50
483
+ 9207,0
484
+ 8537,59
485
+ 11889,55
486
+ 12083,53
487
+ 9037,31
488
+ 12988,2
489
+ 1114,12
490
+ 12685,5
491
+ 2915,54
492
+ 9370,19
493
+ 2337,75
494
+ 2649,15
495
+ 4955,26
496
+ 9692,2
497
+ 9643,9
498
+ 3059,40
499
+ 3724,30
500
+ 8145,43
501
+ 6717,67
502
+ 5525,52
503
+ 6551,70
504
+ 578,34
505
+ 2871,54
506
+ 9211,0
507
+ 11002,10
508
+ 2539,15
509
+ 6279,18
510
+ 10878,66
511
+ 804,46
512
+ 10910,54
513
+ 6890,21
514
+ 9849,69
515
+ 3021,40
516
+ 4061,38
517
+ 8237,76
518
+ 12476,58
519
+ 3211,61
520
+ 7895,39
521
+ 4703,28
522
+ 5236,45
523
+ 8303,76
524
+ 2867,66
525
+ 6756,53
526
+ 4719,28
527
+ 2131,8
528
+ 12739,20
529
+ 6798,53
530
+ 5817,51
531
+ 11425,22
532
+ 11553,44
533
+ 10949,40
534
+ 10671,47
535
+ 9781,69
536
+ 5571,27
537
+ 1493,41
538
+ 1399,33
539
+ 1126,4
540
+ 494,17
541
+ 527,17
542
+ 12562,72
543
+ 274,13
544
+ 1354,14
545
+ 7309,64
546
+ 10444,41
547
+ 11669,42
548
+ 7329,64
549
+ 4329,62
550
+ 2104,8
551
+ 12551,72
552
+ 11595,26
553
+ 2130,8
554
+ 12940,9
555
+ 4023,38
556
+ 1740,49
557
+ 686,34
558
+ 9012,31
559
+ 4606,3
560
+ 4133,73
561
+ 7875,39
562
+ 841,46
563
+ 5378,52
564
+ 9804,69
565
+ 11946,18
566
+ 7709,71
567
+ 3143,10
568
+ 834,46
569
+ 4445,29
570
+ 9734,2
571
+ 10973,10
572
+ 9572,9
573
+ 11203,68
574
+ 747,46
575
+ 12687,5
576
+ 2988,54
577
+ 8714,5
578
+ 6366,63
579
+ 3100,10
580
+ 11038,61
581
+ 12437,39
582
+ 4657,28
583
+ 3653,30
584
+ 9914,24
585
+ 3517,16
586
+ 9979,24
587
+ 7470,35
588
+ 8547,59
589
+ 533,17
590
+ 5114,45
591
+ 8769,5
592
+ 640,34
593
+ 3198,61
594
+ 10835,15
595
+ 9364,19
596
+ 10943,40
597
+ 9819,69
598
+ 3048,40
599
+ 1884,56
600
+ 9637,9
601
+ 11507,28
602
+ 363,32
603
+ 8159,43
604
+ 12295,35
605
+ 4648,28
606
+ 4052,38
607
+ 11134,30
608
+ 7619,65
609
+ 3556,16
610
+ 863,46
611
+ 4437,29
612
+ 2423,75
613
+ 9873,69
614
+ 11736,27
615
+ 5320,42
616
+ 4359,29
617
+ 8285,76
618
+ 12299,35
619
+ 9013,31
620
+ 9314,0
621
+ 5011,26
622
+ 3694,30
623
+ 3720,30
624
+ 8497,59
625
+ 7067,7
626
+ 5140,45
627
+ 5258,42
628
+ 520,17
629
+ 1946,47
630
+ 6438,63
631
+ 6916,21
632
+ 5745,51
633
+ 10890,54
634
+ 12326,65
635
+ 11163,74
636
+ 6403,63
637
+ 8797,20
638
+ 1556,41
639
+ 6049,48
640
+ 2499,75
641
+ 5195,45
642
+ 9966,24
643
+ 12532,72
644
+ 8402,37
645
+ 12369,71
646
+ 2493,75
647
+ 7910,39
648
+ 11521,28
649
+ 239,13
650
+ 6718,67
651
+ 11847,48
652
+ 576,34
653
+ 1566,1
654
+ 5690,51
655
+ 6944,21
656
+ 8723,5
657
+ 6622,67
658
+ 4659,28
659
+ 8658,5
660
+ 7814,39
661
+ 8586,59
662
+ 8160,43
663
+ 3709,30
664
+ 11620,45
665
+ 11840,25
666
+ 10249,36
667
+ 12833,57
668
+ 12484,43
669
+ 13072,24
670
+ 669,34
671
+ 6232,55
672
+ 1112,12
673
+ 8281,76
674
+ 5428,52
675
+ 8909,20
676
+ 12116,53
677
+ 11534,44
678
+ 1330,14
679
+ 11429,22
680
+ 4934,26
681
+ 112,11
682
+ 3690,30
683
+ 5058,26
684
+ 2723,66
685
+ 10599,23
686
+ 3824,74
687
+ 4482,22
688
+ 12187,7
689
+ 5339,42
690
+ 1794,23
691
+ 1813,23
692
+ 9626,9
693
+ 2502,75
694
+ 356,32
695
+ 3176,61
696
+ 6263,18
697
+ 4373,29
698
+ 5736,51
699
+ 10314,12
700
+ 9450,19
701
+ 12498,43
702
+ 5155,45
703
+ 3811,74
704
+ 2668,15
705
+ 906,36
706
+ 2883,54
707
+ 6849,53
708
+ 2237,60
709
+ 11850,48
710
+ 6414,63
711
+ 9383,19
712
+ 851,46
713
+ 7802,71
714
+ 7696,71
715
+ 3634,16
716
+ 5640,27
717
+ 8722,5
718
+ 4731,28
719
+ 9543,9
720
+ 1183,4
721
+ 9019,31
722
+ 5836,25
723
+ 7845,39
724
+ 11955,18
725
+ 2361,75
726
+ 13019,69
727
+ 1878,56
728
+ 5198,45
729
+ 917,36
730
+ 10725,60
731
+ 8619,5
732
+ 4928,26
733
+ 9789,69
734
+ 5300,42
735
+ 9756,2
736
+ 3586,16
737
+ 8692,5
738
+ 12781,31
739
+ 6175,55
740
+ 11744,27
741
+ 11863,48
742
+ 4357,29
743
+ 11624,45
744
+ 10726,60
745
+ 9652,2
746
+ 5939,25
747
+ 3114,10
748
+ 4292,62
749
+ 5810,51
750
+ 6885,21
751
+ 8479,59
752
+ 1629,1
753
+ 7561,35
754
+ 12056,67
755
+ 3330,6
756
+ 10125,17
757
+ 10358,4
758
+ 10849,66
759
+ 1857,56
760
+ 2073,47
761
+ 10603,56
762
+ 6613,67
763
+ 0,11
764
+ 726,34
765
+ 8246,76
766
+ 9434,19
767
+ 7770,71
768
+ 6508,70
769
+ 9700,2
770
+ 8075,43
771
+ 8105,43
772
+ 566,17
773
+ 7824,39
774
+ 7778,71
775
+ 11714,52
776
+ 3427,6
777
+ 7165,64
778
+ 8802,20
779
+ 8944,31
780
+ 12936,9
781
+ 12156,21
782
+ 12265,50
783
+ 3131,10
784
+ 1923,56
785
+ 4866,44
786
+ 11664,42
787
+ 2905,54
788
+ 6947,21
789
+ 2114,8
790
+ 7536,35
791
+ 10215,46
792
+ 5319,42
793
+ 2093,8
794
+ 12463,58
795
+ 4978,26
796
+ 6038,48
797
+ 1113,12
798
+ 11394,29
799
+ 1422,33
800
+ 6544,70
801
+ 8647,5
802
+ 1679,49
803
+ 12356,65
804
+ 12070,67
805
+ 12277,50
806
+ 774,46
807
+ 9699,2
808
+ 8452,37
809
+ 5649,27
810
+ 6680,67
811
+ 9679,2
812
+ 10266,36
813
+ 3356,6
814
+ 764,46
815
+ 7725,71
816
+ 250,13
817
+ 10111,32
818
+ 1219,4
819
+ 3056,40
820
+ 11428,22
821
+ 5368,42
822
+ 352,32
823
+ 12923,9
824
+ 12676,59
825
+ 24,11
826
+ 10540,49
827
+ 10208,46
828
+ 3068,40
829
+ 4997,26
830
+ 9194,0
831
+ 9071,57
832
+ 1020,12
833
+ 8119,43
834
+ 3856,74
835
+ 2772,66
836
+ 5789,51
837
+ 4760,28
838
+ 4568,3
839
+ 10079,13
840
+ 7062,7
841
+ 5173,45
842
+ 7731,71
843
+ 9629,9
844
+ 6458,63
845
+ 11468,3
846
+ 12564,76
847
+ 171,13
848
+ 10128,17
849
+ 11103,16
850
+ 12171,7
851
+ 285,13
852
+ 7717,71
853
+ 6847,53
854
+ 8517,59
855
+ 4927,26
856
+ 3145,10
857
+ 2188,8
858
+ 9437,19
859
+ 8682,5
860
+ 8784,20
861
+ 12106,53
862
+ 7129,7
863
+ 6716,67
864
+ 2291,60
865
+ 9988,24
866
+ 2379,75
867
+ 5882,25
868
+ 5051,26
869
+ 8308,76
870
+ 12004,70
871
+ 6426,63
872
+ 7593,65
873
+ 2106,8
874
+ 8528,59
875
+ 5135,45
876
+ 5635,27
877
+ 1675,49
878
+ 4101,73
879
+ 12311,35
880
+ 2377,75
881
+ 7452,35
882
+ 2192,8
883
+ 1103,12
884
+ 4739,28
885
+ 7524,35
886
+ 2036,47
887
+ 11540,44
888
+ 1744,49
889
+ 6503,70
890
+ 5827,51
891
+ 7515,35
892
+ 6215,55
893
+ 8696,5
894
+ 9185,0
895
+ 9842,69
896
+ 3405,6
897
+ 4479,22
898
+ 3431,6
899
+ 11262,38
900
+ 10399,14
901
+ 1179,4
902
+ 5724,51
903
+ 10804,15
904
+ 7540,35
905
+ 7606,65
906
+ 12830,57
907
+ 12883,19
908
+ 623,34
909
+ 10616,56
910
+ 7358,50
911
+ 10683,8
912
+ 9646,2
913
+ 3727,30
914
+ 7598,65
915
+ 1901,56
916
+ 2397,75
917
+ 10740,60
918
+ 11073,6
919
+ 11680,42
920
+ 1347,14
921
+ 10570,23
922
+ 3078,40
923
+ 10134,17
924
+ 2862,66
925
+ 3728,30
926
+ 3507,16
927
+ 8727,5
928
+ 6883,21
929
+ 4761,28
930
+ 11283,73
931
+ 8201,72
932
+ 9821,69
933
+ 9349,19
934
+ 7231,64
935
+ 5608,27
936
+ 6367,63
937
+ 13061,24
938
+ 12027,70
939
+ 1538,41
940
+ 11943,18
941
+ 12606,37
942
+ 6153,55
943
+ 11988,63
944
+ 5526,52
945
+ 6668,67
946
+ 10397,14
947
+ 10564,23
948
+ 3058,40
949
+ 11410,22
950
+ 7777,71
951
+ 5475,52
952
+ 3772,74
953
+ 9440,19
954
+ 722,34
955
+ 5249,45
956
+ 8912,20
957
+ 11317,73
958
+ 6921,21
959
+ 11175,74
960
+ 3919,68
961
+ 9682,2
962
+ 7156,7
963
+ 3571,16
964
+ 11953,18
965
+ 5269,42
966
+ 626,34
967
+ 761,46
968
+ 8172,72
969
+ 10331,4
970
+ 8158,43
971
+ 11477,3
972
+ 8219,76
973
+ 6909,21
974
+ 437,17
975
+ 4306,62
976
+ 10138,17
977
+ 6498,70
978
+ 9589,9
979
+ 11147,30
980
+ 12803,57
981
+ 185,13
982
+ 1823,56
983
+ 11964,63
984
+ 517,17
985
+ 7164,64
986
+ 12251,50
987
+ 12896,19
988
+ 19,11
989
+ 4888,44
990
+ 3508,16
991
+ 14,11
992
+ 10830,15
993
+ 5634,27
994
+ 4901,44
995
+ 3225,61
996
+ 11711,52
997
+ 956,36
998
+ 2176,8
999
+ 10410,33
1000
+ 8943,20
finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-1.0/0.7-1.0/test=banking77/list=test_1000/logits.csv ADDED
The diff for this file is too large to render. See raw diff
 
finetune_lora/qwen2.5-7b-instruct/banking77/size=16/seed=4/lora_ans/0.0-1.0/0.7-1.0/train_args.yaml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ K: null
2
+ global_batch_size: 8
3
+ learning_rate: 0.0001
4
+ loss: ans
5
+ max_steps: 80
6
+ micro_batch_size: 1
7
+ optimizer_name: adamw
8
+ patience: 10
9
+ val_check_interval: 16
10
+ weight_decay: 0.0
finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans_no_es/0.0-0.7/0.0-0.3/best.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ab4428e8cbfe27581a9f2d0b955a4bcfabaac91d8ab022885e89935261a895d
3
+ size 42951934
finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans_no_es/0.0-0.7/0.0-0.3/checkpoint/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen2ForCausalLM"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 151643,
7
+ "eos_token_id": 151645,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 3584,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 18944,
12
+ "max_position_embeddings": 32768,
13
+ "max_window_layers": 28,
14
+ "model_type": "qwen2",
15
+ "num_attention_heads": 28,
16
+ "num_hidden_layers": 28,
17
+ "num_key_value_heads": 4,
18
+ "rms_norm_eps": 1e-06,
19
+ "rope_theta": 1000000.0,
20
+ "sliding_window": 131072,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.43.1",
24
+ "use_cache": true,
25
+ "use_sliding_window": false,
26
+ "vocab_size": 152064
27
+ }
finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans_no_es/0.0-0.7/0.0-0.3/checkpoint/generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "pad_token_id": 151643,
4
+ "do_sample": true,
5
+ "eos_token_id": [
6
+ 151645,
7
+ 151643
8
+ ],
9
+ "repetition_penalty": 1.05,
10
+ "temperature": 0.7,
11
+ "top_p": 0.8,
12
+ "top_k": 20,
13
+ "transformers_version": "4.37.0"
14
+ }
finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans_no_es/0.0-0.7/0.0-0.3/checkpoint/lit_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f2893700d468552b27b00bb16e3906c7a01f49c09f3c786c252a2b5d36d179f
3
+ size 15231297418
finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans_no_es/0.0-0.7/0.0-0.3/checkpoint/lit_model.pth.lora ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30b223c02d1e1f841f0a1ebfc4adc640ea3f92b0e0e9dbebb11b1f2189372c8b
3
+ size 42951614
finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans_no_es/0.0-0.7/0.0-0.3/checkpoint/model_config.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ attention_logit_softcapping: null
2
+ attention_scores_scalar: null
3
+ attn_bias: true
4
+ bias: false
5
+ block_size: 131072
6
+ final_logit_softcapping: null
7
+ gelu_approximate: none
8
+ head_size: 128
9
+ hf_config:
10
+ name: Qwen2.5-7B-Instruct
11
+ org: Qwen
12
+ intermediate_size: 18944
13
+ lm_head_bias: false
14
+ mlp_class_name: LLaMAMLP
15
+ n_embd: 3584
16
+ n_expert: 0
17
+ n_expert_per_token: 0
18
+ n_head: 28
19
+ n_layer: 28
20
+ n_query_groups: 4
21
+ name: Qwen2.5-7B-Instruct
22
+ norm_class_name: RMSNorm
23
+ norm_eps: 1.0e-06
24
+ padded_vocab_size: 152064
25
+ padding_multiple: 512
26
+ parallel_residual: false
27
+ post_attention_norm: false
28
+ post_mlp_norm: false
29
+ rope_adjustments: null
30
+ rope_base: 1000000
31
+ rope_condense_ratio: 1
32
+ rotary_percentage: 1.0
33
+ scale_embeddings: false
34
+ shared_attention_norm: false
35
+ sliding_window_layer_placing: null
36
+ sliding_window_size: null
37
+ vocab_size: 151643
finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans_no_es/0.0-0.7/0.0-0.3/checkpoint/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans_no_es/0.0-0.7/0.0-0.3/checkpoint/tokenizer_config.json ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
+ ],
197
+ "bos_token": null,
198
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
199
+ "clean_up_tokenization_spaces": false,
200
+ "eos_token": "<|im_end|>",
201
+ "errors": "replace",
202
+ "model_max_length": 131072,
203
+ "pad_token": "<|endoftext|>",
204
+ "split_special_tokens": false,
205
+ "tokenizer_class": "Qwen2Tokenizer",
206
+ "unk_token": null
207
+ }
finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans_no_es/0.0-0.7/0.0-0.3/last.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84955cef7d1e787b0c026015aa914237537283fa4bd894aff9be50340811ffe2
3
+ size 42951934
finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans_no_es/0.0-0.7/0.0-0.3/logs/events.out.tfevents.1735554367.gamma.2619525.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d1fbbe02db3129a6dbff5d9cab1e24a19cdde29d6ef2e25a22f9a6b1798217d
3
+ size 926766
finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans_no_es/0.0-0.7/0.0-0.3/logs/hparams.yaml ADDED
@@ -0,0 +1 @@
 
 
1
+ {}
finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans_no_es/0.0-0.7/0.0-0.3/logs/metrics.csv ADDED
The diff for this file is too large to render. See raw diff
 
finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans_no_es/0.0-0.7/0.0-0.3/test=banking77/list=0.7-1.0/labels.csv ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 9777,69
2
+ 8433,37
3
+ 11008,61
4
+ 6369,63
5
+ 6636,67
6
+ 8438,37
7
+ 1882,56
8
+ 5097,45
9
+ 273,13
10
+ 3266,61
11
+ 7981,58
12
+ 7837,39
13
+ 3200,61
14
+ 8368,76
15
+ 8197,72
16
+ 3656,30
17
+ 6897,21
18
+ 8754,5
19
+ 1481,41
20
+ 10932,40
21
+ 4724,28
22
+ 11068,6
23
+ 1579,1
24
+ 136,11
25
+ 12898,19
26
+ 9254,0
27
+ 2148,8
28
+ 3300,61
29
+ 11393,29
30
+ 4538,22
31
+ 9002,31
32
+ 7758,71
33
+ 6516,70
34
+ 11448,3
35
+ 9127,57
36
+ 2343,75
37
+ 13070,24
38
+ 1782,23
39
+ 8089,43
40
+ 11050,6
41
+ 3312,6
42
+ 10687,8
43
+ 4653,28
44
+ 6935,21
45
+ 8834,20
46
+ 3650,30
47
+ 147,11
48
+ 269,13
49
+ 3392,6
50
+ 1130,4
51
+ 2755,66
52
+ 7194,64
53
+ 10176,34
54
+ 7513,35
55
+ 251,13
56
+ 11949,18
57
+ 9876,24
58
+ 3415,6
59
+ 4125,73
60
+ 8373,37
61
+ 3445,6
62
+ 7880,39
63
+ 8343,76
64
+ 4848,44
65
+ 10060,13
66
+ 8039,58
67
+ 2110,8
68
+ 823,46
69
+ 4368,29
70
+ 3990,68
71
+ 11342,62
72
+ 8812,20
73
+ 1080,12
74
+ 8477,59
75
+ 5003,26
76
+ 12015,70
77
+ 7863,39
78
+ 9775,69
79
+ 3047,40
80
+ 6635,67
81
+ 7985,58
82
+ 7879,39
83
+ 5990,48
84
+ 9153,57
85
+ 3165,61
86
+ 1366,33
87
+ 4038,38
88
+ 9618,9
89
+ 5586,27
90
+ 2428,75
91
+ 1511,41
92
+ 1177,4
93
+ 2375,75
94
+ 3834,74
95
+ 11590,26
96
+ 2873,54
97
+ 9502,19
98
+ 12922,19
99
+ 295,32
100
+ 2212,8
101
+ 7253,64
102
+ 4282,62
103
+ 3481,6
104
+ 9148,57
105
+ 7525,35
106
+ 4364,29
107
+ 571,34
108
+ 6694,67
109
+ 10210,46
110
+ 11039,61
111
+ 7935,58
112
+ 10621,56
113
+ 12678,59
114
+ 756,46
115
+ 5380,52
116
+ 6449,63
117
+ 4707,28
118
+ 12368,71
119
+ 6534,70
120
+ 1573,1
121
+ 11911,55
122
+ 360,32
123
+ 9179,0
124
+ 181,13
125
+ 8623,5
126
+ 2391,75
127
+ 1188,4
128
+ 11166,74
129
+ 4465,22
130
+ 148,11
131
+ 9875,24
132
+ 1204,4
133
+ 8360,76
134
+ 6969,21
135
+ 7860,39
136
+ 6461,63
137
+ 11385,29
138
+ 4908,44
139
+ 7804,39
140
+ 2191,8
141
+ 6205,55
142
+ 4576,3
143
+ 2064,47
144
+ 755,46
145
+ 5834,51
146
+ 5545,27
147
+ 1418,33
148
+ 12932,9
149
+ 2382,75
150
+ 3380,6
151
+ 12490,43
152
+ 7744,71
153
+ 9298,0
154
+ 4818,44
155
+ 1624,1
156
+ 10508,1
157
+ 12271,50
158
+ 9331,0
159
+ 6634,67
160
+ 5224,45
161
+ 8272,76
162
+ 7548,35
163
+ 8182,72
164
+ 10496,1
165
+ 11618,45
166
+ 2489,75
167
+ 9379,19
168
+ 11519,28
169
+ 4949,26
170
+ 6880,53
171
+ 1588,1
172
+ 12544,72
173
+ 8514,59
174
+ 10040,11
175
+ 1393,33
176
+ 4609,3
177
+ 1201,4
178
+ 9261,0
179
+ 5129,45
180
+ 9309,0
181
+ 3262,61
182
+ 11560,44
183
+ 2705,66
184
+ 12677,59
finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans_no_es/0.0-0.7/0.0-0.3/test=banking77/list=0.7-1.0/logits.csv ADDED
The diff for this file is too large to render. See raw diff
 
finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans_no_es/0.0-0.7/0.0-0.3/test=banking77/list=test_1000/labels.csv ADDED
@@ -0,0 +1,1000 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 6197,55
2
+ 6441,63
3
+ 5254,42
4
+ 4058,38
5
+ 5596,27
6
+ 114,11
7
+ 2806,66
8
+ 1087,12
9
+ 1139,4
10
+ 12996,2
11
+ 10447,41
12
+ 11288,73
13
+ 12620,37
14
+ 10691,8
15
+ 8838,20
16
+ 11829,25
17
+ 5055,26
18
+ 5484,52
19
+ 2936,54
20
+ 9239,0
21
+ 5848,25
22
+ 2035,47
23
+ 1266,14
24
+ 11887,55
25
+ 4073,38
26
+ 4823,44
27
+ 2662,15
28
+ 5923,25
29
+ 2923,54
30
+ 7478,35
31
+ 12409,39
32
+ 11263,38
33
+ 12272,50
34
+ 8760,5
35
+ 1955,47
36
+ 7756,71
37
+ 12943,9
38
+ 8885,20
39
+ 6776,53
40
+ 12715,5
41
+ 11892,55
42
+ 4231,73
43
+ 3603,16
44
+ 1096,12
45
+ 2786,66
46
+ 12791,31
47
+ 7613,65
48
+ 12357,65
49
+ 11662,42
50
+ 9066,57
51
+ 63,11
52
+ 5620,27
53
+ 6193,55
54
+ 871,46
55
+ 10854,66
56
+ 8147,43
57
+ 3305,6
58
+ 3248,61
59
+ 2603,15
60
+ 3482,16
61
+ 4639,28
62
+ 7136,7
63
+ 6570,70
64
+ 7884,39
65
+ 12307,35
66
+ 10342,4
67
+ 257,13
68
+ 10939,40
69
+ 597,34
70
+ 3076,40
71
+ 8498,59
72
+ 7988,58
73
+ 575,34
74
+ 10692,8
75
+ 416,17
76
+ 2046,47
77
+ 1802,23
78
+ 12925,9
79
+ 5787,51
80
+ 2827,66
81
+ 4537,22
82
+ 3049,40
83
+ 11130,30
84
+ 2725,66
85
+ 1417,33
86
+ 12180,7
87
+ 6157,55
88
+ 8864,20
89
+ 12995,2
90
+ 2324,60
91
+ 2251,60
92
+ 1989,47
93
+ 16,11
94
+ 8827,20
95
+ 8493,59
96
+ 4953,26
97
+ 10807,15
98
+ 7101,7
99
+ 8210,76
100
+ 2312,60
101
+ 4330,62
102
+ 12365,71
103
+ 11466,3
104
+ 1666,1
105
+ 4063,38
106
+ 4622,3
107
+ 3836,74
108
+ 11584,26
109
+ 9371,19
110
+ 3407,6
111
+ 2680,15
112
+ 3845,74
113
+ 10741,60
114
+ 9232,0
115
+ 5932,25
116
+ 6113,48
117
+ 5411,52
118
+ 12422,39
119
+ 12101,53
120
+ 10322,12
121
+ 963,36
122
+ 11640,45
123
+ 5790,51
124
+ 519,17
125
+ 9694,2
126
+ 96,11
127
+ 7911,39
128
+ 6069,48
129
+ 13002,2
130
+ 7853,39
131
+ 8660,5
132
+ 2817,66
133
+ 11,11
134
+ 1701,49
135
+ 5397,52
136
+ 2685,15
137
+ 425,17
138
+ 1958,47
139
+ 12202,7
140
+ 5036,26
141
+ 6054,48
142
+ 10515,1
143
+ 12164,7
144
+ 4865,44
145
+ 12189,7
146
+ 8355,76
147
+ 7023,7
148
+ 12813,57
149
+ 7527,35
150
+ 2435,75
151
+ 9940,24
152
+ 7402,50
153
+ 32,11
154
+ 1541,41
155
+ 3094,40
156
+ 12947,9
157
+ 5604,27
158
+ 1775,49
159
+ 8117,43
160
+ 9587,9
161
+ 2008,47
162
+ 2741,66
163
+ 582,34
164
+ 2562,15
165
+ 7763,71
166
+ 7034,7
167
+ 6376,63
168
+ 9092,57
169
+ 202,13
170
+ 11193,74
171
+ 6028,48
172
+ 1476,41
173
+ 10315,12
174
+ 9472,19
175
+ 6092,48
176
+ 1919,56
177
+ 2986,54
178
+ 6078,48
179
+ 6698,67
180
+ 11430,22
181
+ 8139,43
182
+ 7580,65
183
+ 9696,2
184
+ 11768,51
185
+ 7030,7
186
+ 3711,30
187
+ 8118,43
188
+ 4895,44
189
+ 8450,37
190
+ 2225,8
191
+ 6803,53
192
+ 3040,40
193
+ 2834,66
194
+ 10998,10
195
+ 3955,68
196
+ 11164,74
197
+ 3599,16
198
+ 4288,62
199
+ 2846,66
200
+ 4278,62
201
+ 1150,4
202
+ 8719,5
203
+ 9210,0
204
+ 10575,23
205
+ 317,32
206
+ 6922,21
207
+ 10171,34
208
+ 8842,20
209
+ 11121,16
210
+ 6293,18
211
+ 3204,61
212
+ 591,34
213
+ 9236,0
214
+ 10820,15
215
+ 996,36
216
+ 8481,59
217
+ 10583,23
218
+ 1555,41
219
+ 3754,30
220
+ 2257,60
221
+ 11351,62
222
+ 12515,43
223
+ 1792,23
224
+ 10659,47
225
+ 8290,76
226
+ 9713,2
227
+ 12555,72
228
+ 7311,64
229
+ 2599,15
230
+ 10911,54
231
+ 6332,63
232
+ 5152,45
233
+ 4602,3
234
+ 6340,63
235
+ 10385,14
236
+ 1807,23
237
+ 8458,37
238
+ 3319,6
239
+ 10078,13
240
+ 8380,37
241
+ 2739,66
242
+ 1115,12
243
+ 9783,69
244
+ 2158,8
245
+ 5652,27
246
+ 12173,7
247
+ 5744,51
248
+ 3782,74
249
+ 3456,6
250
+ 9414,19
251
+ 4810,28
252
+ 11760,27
253
+ 8925,20
254
+ 7440,35
255
+ 1883,56
256
+ 9930,24
257
+ 12920,19
258
+ 6413,63
259
+ 7596,65
260
+ 11819,25
261
+ 6624,67
262
+ 11903,55
263
+ 3858,74
264
+ 9103,57
265
+ 3555,16
266
+ 4457,29
267
+ 2409,75
268
+ 9174,57
269
+ 7133,7
270
+ 3159,61
271
+ 10129,17
272
+ 3150,10
273
+ 2201,8
274
+ 7666,65
275
+ 2865,66
276
+ 12815,57
277
+ 3282,61
278
+ 3886,74
279
+ 775,46
280
+ 6393,63
281
+ 10102,32
282
+ 7652,65
283
+ 6523,70
284
+ 7589,65
285
+ 866,46
286
+ 4929,26
287
+ 5588,27
288
+ 3759,30
289
+ 10532,49
290
+ 9122,57
291
+ 12466,58
292
+ 4214,73
293
+ 5337,42
294
+ 574,34
295
+ 9839,69
296
+ 12404,39
297
+ 296,32
298
+ 7868,39
299
+ 12839,57
300
+ 6542,70
301
+ 122,11
302
+ 750,46
303
+ 10136,17
304
+ 12891,19
305
+ 3235,61
306
+ 7444,35
307
+ 12924,9
308
+ 8768,5
309
+ 7057,7
310
+ 8917,20
311
+ 7602,65
312
+ 9870,69
313
+ 3316,6
314
+ 5694,51
315
+ 2078,8
316
+ 8022,58
317
+ 4839,44
318
+ 730,34
319
+ 12123,21
320
+ 4332,62
321
+ 7453,35
322
+ 4418,29
323
+ 6744,53
324
+ 9728,2
325
+ 1611,1
326
+ 10779,75
327
+ 6783,53
328
+ 4472,22
329
+ 367,32
330
+ 10636,56
331
+ 1696,49
332
+ 3281,61
333
+ 6690,67
334
+ 4800,28
335
+ 8328,76
336
+ 4827,44
337
+ 530,17
338
+ 3977,68
339
+ 12840,57
340
+ 10417,33
341
+ 3545,16
342
+ 2396,75
343
+ 5344,42
344
+ 1829,56
345
+ 10084,32
346
+ 6306,63
347
+ 3692,30
348
+ 11054,6
349
+ 12948,9
350
+ 1656,1
351
+ 2338,75
352
+ 12260,50
353
+ 1372,33
354
+ 6644,67
355
+ 4988,26
356
+ 6775,53
357
+ 8683,5
358
+ 2032,47
359
+ 6371,63
360
+ 6946,21
361
+ 3676,30
362
+ 2414,75
363
+ 636,34
364
+ 12581,76
365
+ 6547,70
366
+ 7395,50
367
+ 3136,10
368
+ 7163,64
369
+ 8426,37
370
+ 5921,25
371
+ 6807,53
372
+ 9788,69
373
+ 12732,20
374
+ 8341,76
375
+ 2233,8
376
+ 3528,16
377
+ 9238,0
378
+ 2532,15
379
+ 9960,24
380
+ 8977,31
381
+ 11800,51
382
+ 3432,6
383
+ 8708,5
384
+ 6902,21
385
+ 4076,38
386
+ 6444,63
387
+ 812,46
388
+ 7036,7
389
+ 1971,47
390
+ 9335,0
391
+ 3942,68
392
+ 5775,51
393
+ 9428,19
394
+ 8656,5
395
+ 9465,19
396
+ 5128,45
397
+ 10072,13
398
+ 9836,69
399
+ 9955,24
400
+ 1312,14
401
+ 3554,16
402
+ 10852,66
403
+ 5417,52
404
+ 180,13
405
+ 2776,66
406
+ 11345,62
407
+ 3655,30
408
+ 7277,64
409
+ 13060,24
410
+ 887,36
411
+ 1485,41
412
+ 8296,76
413
+ 12531,72
414
+ 11407,22
415
+ 9162,57
416
+ 4452,29
417
+ 12471,58
418
+ 3355,6
419
+ 4651,28
420
+ 3943,68
421
+ 7060,7
422
+ 3775,74
423
+ 3853,74
424
+ 1191,4
425
+ 3992,68
426
+ 7669,65
427
+ 5048,26
428
+ 9818,69
429
+ 4304,62
430
+ 10406,33
431
+ 9906,24
432
+ 11208,68
433
+ 1419,33
434
+ 4781,28
435
+ 3255,61
436
+ 455,17
437
+ 6948,21
438
+ 6594,67
439
+ 9470,19
440
+ 1844,56
441
+ 11622,45
442
+ 372,32
443
+ 109,11
444
+ 1525,41
445
+ 6630,67
446
+ 2468,75
447
+ 583,34
448
+ 11698,52
449
+ 5557,27
450
+ 13074,24
451
+ 6325,63
452
+ 11086,16
453
+ 1524,41
454
+ 6932,21
455
+ 3715,30
456
+ 6936,21
457
+ 914,36
458
+ 10052,13
459
+ 3295,61
460
+ 7266,64
461
+ 4371,29
462
+ 5666,27
463
+ 11409,22
464
+ 7414,50
465
+ 11770,51
466
+ 11278,38
467
+ 1869,56
468
+ 9650,2
469
+ 2715,66
470
+ 811,46
471
+ 11947,18
472
+ 547,17
473
+ 4377,29
474
+ 1147,4
475
+ 9112,57
476
+ 11843,48
477
+ 7886,39
478
+ 12316,35
479
+ 6926,21
480
+ 12835,57
481
+ 2451,75
482
+ 7372,50
483
+ 9207,0
484
+ 8537,59
485
+ 11889,55
486
+ 12083,53
487
+ 9037,31
488
+ 12988,2
489
+ 1114,12
490
+ 12685,5
491
+ 2915,54
492
+ 9370,19
493
+ 2337,75
494
+ 2649,15
495
+ 4955,26
496
+ 9692,2
497
+ 9643,9
498
+ 3059,40
499
+ 3724,30
500
+ 8145,43
501
+ 6717,67
502
+ 5525,52
503
+ 6551,70
504
+ 578,34
505
+ 2871,54
506
+ 9211,0
507
+ 11002,10
508
+ 2539,15
509
+ 6279,18
510
+ 10878,66
511
+ 804,46
512
+ 10910,54
513
+ 6890,21
514
+ 9849,69
515
+ 3021,40
516
+ 4061,38
517
+ 8237,76
518
+ 12476,58
519
+ 3211,61
520
+ 7895,39
521
+ 4703,28
522
+ 5236,45
523
+ 8303,76
524
+ 2867,66
525
+ 6756,53
526
+ 4719,28
527
+ 2131,8
528
+ 12739,20
529
+ 6798,53
530
+ 5817,51
531
+ 11425,22
532
+ 11553,44
533
+ 10949,40
534
+ 10671,47
535
+ 9781,69
536
+ 5571,27
537
+ 1493,41
538
+ 1399,33
539
+ 1126,4
540
+ 494,17
541
+ 527,17
542
+ 12562,72
543
+ 274,13
544
+ 1354,14
545
+ 7309,64
546
+ 10444,41
547
+ 11669,42
548
+ 7329,64
549
+ 4329,62
550
+ 2104,8
551
+ 12551,72
552
+ 11595,26
553
+ 2130,8
554
+ 12940,9
555
+ 4023,38
556
+ 1740,49
557
+ 686,34
558
+ 9012,31
559
+ 4606,3
560
+ 4133,73
561
+ 7875,39
562
+ 841,46
563
+ 5378,52
564
+ 9804,69
565
+ 11946,18
566
+ 7709,71
567
+ 3143,10
568
+ 834,46
569
+ 4445,29
570
+ 9734,2
571
+ 10973,10
572
+ 9572,9
573
+ 11203,68
574
+ 747,46
575
+ 12687,5
576
+ 2988,54
577
+ 8714,5
578
+ 6366,63
579
+ 3100,10
580
+ 11038,61
581
+ 12437,39
582
+ 4657,28
583
+ 3653,30
584
+ 9914,24
585
+ 3517,16
586
+ 9979,24
587
+ 7470,35
588
+ 8547,59
589
+ 533,17
590
+ 5114,45
591
+ 8769,5
592
+ 640,34
593
+ 3198,61
594
+ 10835,15
595
+ 9364,19
596
+ 10943,40
597
+ 9819,69
598
+ 3048,40
599
+ 1884,56
600
+ 9637,9
601
+ 11507,28
602
+ 363,32
603
+ 8159,43
604
+ 12295,35
605
+ 4648,28
606
+ 4052,38
607
+ 11134,30
608
+ 7619,65
609
+ 3556,16
610
+ 863,46
611
+ 4437,29
612
+ 2423,75
613
+ 9873,69
614
+ 11736,27
615
+ 5320,42
616
+ 4359,29
617
+ 8285,76
618
+ 12299,35
619
+ 9013,31
620
+ 9314,0
621
+ 5011,26
622
+ 3694,30
623
+ 3720,30
624
+ 8497,59
625
+ 7067,7
626
+ 5140,45
627
+ 5258,42
628
+ 520,17
629
+ 1946,47
630
+ 6438,63
631
+ 6916,21
632
+ 5745,51
633
+ 10890,54
634
+ 12326,65
635
+ 11163,74
636
+ 6403,63
637
+ 8797,20
638
+ 1556,41
639
+ 6049,48
640
+ 2499,75
641
+ 5195,45
642
+ 9966,24
643
+ 12532,72
644
+ 8402,37
645
+ 12369,71
646
+ 2493,75
647
+ 7910,39
648
+ 11521,28
649
+ 239,13
650
+ 6718,67
651
+ 11847,48
652
+ 576,34
653
+ 1566,1
654
+ 5690,51
655
+ 6944,21
656
+ 8723,5
657
+ 6622,67
658
+ 4659,28
659
+ 8658,5
660
+ 7814,39
661
+ 8586,59
662
+ 8160,43
663
+ 3709,30
664
+ 11620,45
665
+ 11840,25
666
+ 10249,36
667
+ 12833,57
668
+ 12484,43
669
+ 13072,24
670
+ 669,34
671
+ 6232,55
672
+ 1112,12
673
+ 8281,76
674
+ 5428,52
675
+ 8909,20
676
+ 12116,53
677
+ 11534,44
678
+ 1330,14
679
+ 11429,22
680
+ 4934,26
681
+ 112,11
682
+ 3690,30
683
+ 5058,26
684
+ 2723,66
685
+ 10599,23
686
+ 3824,74
687
+ 4482,22
688
+ 12187,7
689
+ 5339,42
690
+ 1794,23
691
+ 1813,23
692
+ 9626,9
693
+ 2502,75
694
+ 356,32
695
+ 3176,61
696
+ 6263,18
697
+ 4373,29
698
+ 5736,51
699
+ 10314,12
700
+ 9450,19
701
+ 12498,43
702
+ 5155,45
703
+ 3811,74
704
+ 2668,15
705
+ 906,36
706
+ 2883,54
707
+ 6849,53
708
+ 2237,60
709
+ 11850,48
710
+ 6414,63
711
+ 9383,19
712
+ 851,46
713
+ 7802,71
714
+ 7696,71
715
+ 3634,16
716
+ 5640,27
717
+ 8722,5
718
+ 4731,28
719
+ 9543,9
720
+ 1183,4
721
+ 9019,31
722
+ 5836,25
723
+ 7845,39
724
+ 11955,18
725
+ 2361,75
726
+ 13019,69
727
+ 1878,56
728
+ 5198,45
729
+ 917,36
730
+ 10725,60
731
+ 8619,5
732
+ 4928,26
733
+ 9789,69
734
+ 5300,42
735
+ 9756,2
736
+ 3586,16
737
+ 8692,5
738
+ 12781,31
739
+ 6175,55
740
+ 11744,27
741
+ 11863,48
742
+ 4357,29
743
+ 11624,45
744
+ 10726,60
745
+ 9652,2
746
+ 5939,25
747
+ 3114,10
748
+ 4292,62
749
+ 5810,51
750
+ 6885,21
751
+ 8479,59
752
+ 1629,1
753
+ 7561,35
754
+ 12056,67
755
+ 3330,6
756
+ 10125,17
757
+ 10358,4
758
+ 10849,66
759
+ 1857,56
760
+ 2073,47
761
+ 10603,56
762
+ 6613,67
763
+ 0,11
764
+ 726,34
765
+ 8246,76
766
+ 9434,19
767
+ 7770,71
768
+ 6508,70
769
+ 9700,2
770
+ 8075,43
771
+ 8105,43
772
+ 566,17
773
+ 7824,39
774
+ 7778,71
775
+ 11714,52
776
+ 3427,6
777
+ 7165,64
778
+ 8802,20
779
+ 8944,31
780
+ 12936,9
781
+ 12156,21
782
+ 12265,50
783
+ 3131,10
784
+ 1923,56
785
+ 4866,44
786
+ 11664,42
787
+ 2905,54
788
+ 6947,21
789
+ 2114,8
790
+ 7536,35
791
+ 10215,46
792
+ 5319,42
793
+ 2093,8
794
+ 12463,58
795
+ 4978,26
796
+ 6038,48
797
+ 1113,12
798
+ 11394,29
799
+ 1422,33
800
+ 6544,70
801
+ 8647,5
802
+ 1679,49
803
+ 12356,65
804
+ 12070,67
805
+ 12277,50
806
+ 774,46
807
+ 9699,2
808
+ 8452,37
809
+ 5649,27
810
+ 6680,67
811
+ 9679,2
812
+ 10266,36
813
+ 3356,6
814
+ 764,46
815
+ 7725,71
816
+ 250,13
817
+ 10111,32
818
+ 1219,4
819
+ 3056,40
820
+ 11428,22
821
+ 5368,42
822
+ 352,32
823
+ 12923,9
824
+ 12676,59
825
+ 24,11
826
+ 10540,49
827
+ 10208,46
828
+ 3068,40
829
+ 4997,26
830
+ 9194,0
831
+ 9071,57
832
+ 1020,12
833
+ 8119,43
834
+ 3856,74
835
+ 2772,66
836
+ 5789,51
837
+ 4760,28
838
+ 4568,3
839
+ 10079,13
840
+ 7062,7
841
+ 5173,45
842
+ 7731,71
843
+ 9629,9
844
+ 6458,63
845
+ 11468,3
846
+ 12564,76
847
+ 171,13
848
+ 10128,17
849
+ 11103,16
850
+ 12171,7
851
+ 285,13
852
+ 7717,71
853
+ 6847,53
854
+ 8517,59
855
+ 4927,26
856
+ 3145,10
857
+ 2188,8
858
+ 9437,19
859
+ 8682,5
860
+ 8784,20
861
+ 12106,53
862
+ 7129,7
863
+ 6716,67
864
+ 2291,60
865
+ 9988,24
866
+ 2379,75
867
+ 5882,25
868
+ 5051,26
869
+ 8308,76
870
+ 12004,70
871
+ 6426,63
872
+ 7593,65
873
+ 2106,8
874
+ 8528,59
875
+ 5135,45
876
+ 5635,27
877
+ 1675,49
878
+ 4101,73
879
+ 12311,35
880
+ 2377,75
881
+ 7452,35
882
+ 2192,8
883
+ 1103,12
884
+ 4739,28
885
+ 7524,35
886
+ 2036,47
887
+ 11540,44
888
+ 1744,49
889
+ 6503,70
890
+ 5827,51
891
+ 7515,35
892
+ 6215,55
893
+ 8696,5
894
+ 9185,0
895
+ 9842,69
896
+ 3405,6
897
+ 4479,22
898
+ 3431,6
899
+ 11262,38
900
+ 10399,14
901
+ 1179,4
902
+ 5724,51
903
+ 10804,15
904
+ 7540,35
905
+ 7606,65
906
+ 12830,57
907
+ 12883,19
908
+ 623,34
909
+ 10616,56
910
+ 7358,50
911
+ 10683,8
912
+ 9646,2
913
+ 3727,30
914
+ 7598,65
915
+ 1901,56
916
+ 2397,75
917
+ 10740,60
918
+ 11073,6
919
+ 11680,42
920
+ 1347,14
921
+ 10570,23
922
+ 3078,40
923
+ 10134,17
924
+ 2862,66
925
+ 3728,30
926
+ 3507,16
927
+ 8727,5
928
+ 6883,21
929
+ 4761,28
930
+ 11283,73
931
+ 8201,72
932
+ 9821,69
933
+ 9349,19
934
+ 7231,64
935
+ 5608,27
936
+ 6367,63
937
+ 13061,24
938
+ 12027,70
939
+ 1538,41
940
+ 11943,18
941
+ 12606,37
942
+ 6153,55
943
+ 11988,63
944
+ 5526,52
945
+ 6668,67
946
+ 10397,14
947
+ 10564,23
948
+ 3058,40
949
+ 11410,22
950
+ 7777,71
951
+ 5475,52
952
+ 3772,74
953
+ 9440,19
954
+ 722,34
955
+ 5249,45
956
+ 8912,20
957
+ 11317,73
958
+ 6921,21
959
+ 11175,74
960
+ 3919,68
961
+ 9682,2
962
+ 7156,7
963
+ 3571,16
964
+ 11953,18
965
+ 5269,42
966
+ 626,34
967
+ 761,46
968
+ 8172,72
969
+ 10331,4
970
+ 8158,43
971
+ 11477,3
972
+ 8219,76
973
+ 6909,21
974
+ 437,17
975
+ 4306,62
976
+ 10138,17
977
+ 6498,70
978
+ 9589,9
979
+ 11147,30
980
+ 12803,57
981
+ 185,13
982
+ 1823,56
983
+ 11964,63
984
+ 517,17
985
+ 7164,64
986
+ 12251,50
987
+ 12896,19
988
+ 19,11
989
+ 4888,44
990
+ 3508,16
991
+ 14,11
992
+ 10830,15
993
+ 5634,27
994
+ 4901,44
995
+ 3225,61
996
+ 11711,52
997
+ 956,36
998
+ 2176,8
999
+ 10410,33
1000
+ 8943,20
finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans_no_es/0.0-0.7/0.0-0.3/test=banking77/list=test_1000/logits.csv ADDED
The diff for this file is too large to render. See raw diff
 
finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=0/lora_ans_no_es/0.0-0.7/0.0-0.3/train_args.yaml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ K: null
2
+ global_batch_size: 8
3
+ learning_rate: 0.0001
4
+ loss: ans
5
+ max_steps: -1
6
+ micro_batch_size: 1
7
+ optimizer_name: adamw
8
+ patience: 10
9
+ val_check_interval: 16
10
+ weight_decay: 0.0
finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=1/lora_ans/0.0-0.7/0.7-1.0/best.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:628d3831db10567a4d5ff02481fd7661e359cf5801b4be05284de1cecc89fe21
3
+ size 42951934
finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=1/lora_ans/0.0-0.7/0.7-1.0/checkpoint/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen2ForCausalLM"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 151643,
7
+ "eos_token_id": 151645,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 3584,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 18944,
12
+ "max_position_embeddings": 32768,
13
+ "max_window_layers": 28,
14
+ "model_type": "qwen2",
15
+ "num_attention_heads": 28,
16
+ "num_hidden_layers": 28,
17
+ "num_key_value_heads": 4,
18
+ "rms_norm_eps": 1e-06,
19
+ "rope_theta": 1000000.0,
20
+ "sliding_window": 131072,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.43.1",
24
+ "use_cache": true,
25
+ "use_sliding_window": false,
26
+ "vocab_size": 152064
27
+ }
finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=1/lora_ans/0.0-0.7/0.7-1.0/checkpoint/generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "pad_token_id": 151643,
4
+ "do_sample": true,
5
+ "eos_token_id": [
6
+ 151645,
7
+ 151643
8
+ ],
9
+ "repetition_penalty": 1.05,
10
+ "temperature": 0.7,
11
+ "top_p": 0.8,
12
+ "top_k": 20,
13
+ "transformers_version": "4.37.0"
14
+ }
finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=1/lora_ans/0.0-0.7/0.7-1.0/checkpoint/lit_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f2893700d468552b27b00bb16e3906c7a01f49c09f3c786c252a2b5d36d179f
3
+ size 15231297418
finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=1/lora_ans/0.0-0.7/0.7-1.0/checkpoint/lit_model.pth.lora ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96b4534d2d3b82d6223c2627abed89d24a35d207b8c900128d7a5465c2b77769
3
+ size 42951614
finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=1/lora_ans/0.0-0.7/0.7-1.0/checkpoint/model_config.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ attention_logit_softcapping: null
2
+ attention_scores_scalar: null
3
+ attn_bias: true
4
+ bias: false
5
+ block_size: 131072
6
+ final_logit_softcapping: null
7
+ gelu_approximate: none
8
+ head_size: 128
9
+ hf_config:
10
+ name: Qwen2.5-7B-Instruct
11
+ org: Qwen
12
+ intermediate_size: 18944
13
+ lm_head_bias: false
14
+ mlp_class_name: LLaMAMLP
15
+ n_embd: 3584
16
+ n_expert: 0
17
+ n_expert_per_token: 0
18
+ n_head: 28
19
+ n_layer: 28
20
+ n_query_groups: 4
21
+ name: Qwen2.5-7B-Instruct
22
+ norm_class_name: RMSNorm
23
+ norm_eps: 1.0e-06
24
+ padded_vocab_size: 152064
25
+ padding_multiple: 512
26
+ parallel_residual: false
27
+ post_attention_norm: false
28
+ post_mlp_norm: false
29
+ rope_adjustments: null
30
+ rope_base: 1000000
31
+ rope_condense_ratio: 1
32
+ rotary_percentage: 1.0
33
+ scale_embeddings: false
34
+ shared_attention_norm: false
35
+ sliding_window_layer_placing: null
36
+ sliding_window_size: null
37
+ vocab_size: 151643
finetune_lora/qwen2.5-7b-instruct/banking77/size=64/seed=1/lora_ans/0.0-0.7/0.7-1.0/checkpoint/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff