RefalMachine commited on
Commit
427b863
·
verified ·
1 Parent(s): e5efdd8

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +20 -0
  2. llmtf_eval_k0/daru_treewayabstractive.jsonl +0 -0
  3. llmtf_eval_k0/daru_treewayabstractive_params.jsonl +62 -0
  4. llmtf_eval_k0/daru_treewayabstractive_total.jsonl +8 -0
  5. llmtf_eval_k0/daru_treewayextractive.jsonl +3 -0
  6. llmtf_eval_k0/daru_treewayextractive_params.jsonl +57 -0
  7. llmtf_eval_k0/daru_treewayextractive_total.jsonl +7 -0
  8. llmtf_eval_k0/darumeru_MultiQ.jsonl +0 -0
  9. llmtf_eval_k0/darumeru_MultiQ_params.jsonl +62 -0
  10. llmtf_eval_k0/darumeru_MultiQ_total.jsonl +8 -0
  11. llmtf_eval_k0/darumeru_PARus.jsonl +0 -0
  12. llmtf_eval_k0/darumeru_PARus_params.jsonl +62 -0
  13. llmtf_eval_k0/darumeru_PARus_total.jsonl +7 -0
  14. llmtf_eval_k0/darumeru_RCB.jsonl +0 -0
  15. llmtf_eval_k0/darumeru_RCB_params.jsonl +62 -0
  16. llmtf_eval_k0/darumeru_RCB_total.jsonl +8 -0
  17. llmtf_eval_k0/darumeru_RWSD.jsonl +0 -0
  18. llmtf_eval_k0/darumeru_RWSD_params.jsonl +62 -0
  19. llmtf_eval_k0/darumeru_RWSD_total.jsonl +7 -0
  20. llmtf_eval_k0/darumeru_USE.jsonl +0 -0
  21. llmtf_eval_k0/darumeru_USE_params.jsonl +62 -0
  22. llmtf_eval_k0/darumeru_USE_total.jsonl +7 -0
  23. llmtf_eval_k0/darumeru_cp_para_en.jsonl +0 -0
  24. llmtf_eval_k0/darumeru_cp_para_en_params.jsonl +62 -0
  25. llmtf_eval_k0/darumeru_cp_para_en_total.jsonl +9 -0
  26. llmtf_eval_k0/darumeru_cp_para_ru.jsonl +0 -0
  27. llmtf_eval_k0/darumeru_cp_para_ru_params.jsonl +62 -0
  28. llmtf_eval_k0/darumeru_cp_para_ru_total.jsonl +9 -0
  29. llmtf_eval_k0/darumeru_cp_sent_en.jsonl +0 -0
  30. llmtf_eval_k0/darumeru_cp_sent_en_params.jsonl +62 -0
  31. llmtf_eval_k0/darumeru_cp_sent_en_total.jsonl +9 -0
  32. llmtf_eval_k0/darumeru_cp_sent_ru.jsonl +0 -0
  33. llmtf_eval_k0/darumeru_cp_sent_ru_params.jsonl +62 -0
  34. llmtf_eval_k0/darumeru_cp_sent_ru_total.jsonl +9 -0
  35. llmtf_eval_k0/darumeru_ruMMLU.jsonl +3 -0
  36. llmtf_eval_k0/darumeru_ruMMLU_params.jsonl +62 -0
  37. llmtf_eval_k0/darumeru_ruMMLU_total.jsonl +7 -0
  38. llmtf_eval_k0/darumeru_ruOpenBookQA.jsonl +0 -0
  39. llmtf_eval_k0/darumeru_ruOpenBookQA_params.jsonl +62 -0
  40. llmtf_eval_k0/darumeru_ruOpenBookQA_total.jsonl +8 -0
  41. llmtf_eval_k0/darumeru_ruTiE.jsonl +3 -0
  42. llmtf_eval_k0/darumeru_ruTiE_params.jsonl +62 -0
  43. llmtf_eval_k0/darumeru_ruTiE_total.jsonl +7 -0
  44. llmtf_eval_k0/darumeru_ruWorldTree.jsonl +0 -0
  45. llmtf_eval_k0/darumeru_ruWorldTree_params.jsonl +62 -0
  46. llmtf_eval_k0/darumeru_ruWorldTree_total.jsonl +8 -0
  47. llmtf_eval_k0/evaluation_log.txt +273 -0
  48. llmtf_eval_k0/evaluation_results.txt +2 -0
  49. llmtf_eval_k0/nlpcoreteam_enMMLU.jsonl +3 -0
  50. llmtf_eval_k0/nlpcoreteam_enMMLU_params.jsonl +57 -0
.gitattributes CHANGED
@@ -33,3 +33,23 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ llmtf_eval_k0/daru_treewayextractive.jsonl filter=lfs diff=lfs merge=lfs -text
37
+ llmtf_eval_k0/darumeru_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
38
+ llmtf_eval_k0/darumeru_ruTiE.jsonl filter=lfs diff=lfs merge=lfs -text
39
+ llmtf_eval_k0/nlpcoreteam_enMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
40
+ llmtf_eval_k0/nlpcoreteam_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
41
+ llmtf_eval_k1/daru_treewayextractive.jsonl filter=lfs diff=lfs merge=lfs -text
42
+ llmtf_eval_k1/darumeru_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
43
+ llmtf_eval_k1/darumeru_ruTiE.jsonl filter=lfs diff=lfs merge=lfs -text
44
+ llmtf_eval_k1/nlpcoreteam_enMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
45
+ llmtf_eval_k1/nlpcoreteam_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
46
+ llmtf_eval_k5/daru_treewayabstractive.jsonl filter=lfs diff=lfs merge=lfs -text
47
+ llmtf_eval_k5/daru_treewayextractive.jsonl filter=lfs diff=lfs merge=lfs -text
48
+ llmtf_eval_k5/darumeru_MultiQ.jsonl filter=lfs diff=lfs merge=lfs -text
49
+ llmtf_eval_k5/darumeru_USE.jsonl filter=lfs diff=lfs merge=lfs -text
50
+ llmtf_eval_k5/darumeru_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
51
+ llmtf_eval_k5/darumeru_ruOpenBookQA.jsonl filter=lfs diff=lfs merge=lfs -text
52
+ llmtf_eval_k5/darumeru_ruTiE.jsonl filter=lfs diff=lfs merge=lfs -text
53
+ llmtf_eval_k5/nlpcoreteam_enMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
54
+ llmtf_eval_k5/nlpcoreteam_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
55
+ llmtf_eval_k5/russiannlp_rucola_custom.jsonl filter=lfs diff=lfs merge=lfs -text
llmtf_eval_k0/daru_treewayabstractive.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0/daru_treewayabstractive_params.jsonl ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009,
11
+ 198,
12
+ 271
13
+ ],
14
+ "max_length": 8192,
15
+ "max_new_tokens": 512,
16
+ "pad_token_id": 128001,
17
+ "stop_strings": [
18
+ "\n",
19
+ "\n\n"
20
+ ],
21
+ "temperature": 0.1,
22
+ "top_k": 40,
23
+ "top_p": 0.9,
24
+ "transformers_version": "4.38.2",
25
+ "trust_remote_code": [
26
+ false
27
+ ]
28
+ },
29
+ "conversation_template": {
30
+ "system_prompt": "",
31
+ "system_message_template": "",
32
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
33
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
34
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
35
+ "user_role": "user",
36
+ "bot_role": "assistant",
37
+ "system_role": "system",
38
+ "global_prefix": "<|begin_of_text|>",
39
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
40
+ "add_special_tokens": false,
41
+ "eos_token": "<|eot_id|>"
42
+ },
43
+ "load_in_8bit": false,
44
+ "torch_dtype": "auto",
45
+ "use_flash_attention_2": true,
46
+ "device_map": "cuda:0",
47
+ "use_fast_tokenizer": true,
48
+ "leading_space": false,
49
+ "space_token": null,
50
+ "trust_remote_code": [
51
+ false
52
+ ],
53
+ "max_model_len": 8192
54
+ },
55
+ "task_params": {
56
+ "max_len": 4000,
57
+ "few_shot_count": 0,
58
+ "batch_size": 1,
59
+ "max_sample_per_dataset": 500,
60
+ "method": "generate"
61
+ }
62
+ }
llmtf_eval_k0/daru_treewayabstractive_total.jsonl ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "daru/treewayabstractive",
3
+ "results": {
4
+ "rouge1": 0.35401411318813847,
5
+ "rouge2": 0.12752218193565362
6
+ },
7
+ "leaderboard_result": 0.24076814756189605
8
+ }
llmtf_eval_k0/daru_treewayextractive.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:817a79eb937233b0782b5c9419bcf8fb9fa8a48faf6f412c6781323a7530d54c
3
+ size 1985804644
llmtf_eval_k0/daru_treewayextractive_params.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 1,
14
+ "pad_token_id": 128001,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2",
20
+ "trust_remote_code": [
21
+ false
22
+ ]
23
+ },
24
+ "conversation_template": {
25
+ "system_prompt": "",
26
+ "system_message_template": "",
27
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
28
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
30
+ "user_role": "user",
31
+ "bot_role": "assistant",
32
+ "system_role": "system",
33
+ "global_prefix": "<|begin_of_text|>",
34
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
35
+ "add_special_tokens": false,
36
+ "eos_token": "<|eot_id|>"
37
+ },
38
+ "load_in_8bit": false,
39
+ "torch_dtype": "auto",
40
+ "use_flash_attention_2": true,
41
+ "device_map": "cuda:0",
42
+ "use_fast_tokenizer": true,
43
+ "leading_space": false,
44
+ "space_token": null,
45
+ "trust_remote_code": [
46
+ false
47
+ ],
48
+ "max_model_len": 8192
49
+ },
50
+ "task_params": {
51
+ "max_len": 4000,
52
+ "few_shot_count": 0,
53
+ "batch_size": 1,
54
+ "max_sample_per_dataset": 500,
55
+ "method": "calculate_logsoftmax"
56
+ }
57
+ }
llmtf_eval_k0/daru_treewayextractive_total.jsonl ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "daru/treewayextractive",
3
+ "results": {
4
+ "r-prec": 0.3960751082251082
5
+ },
6
+ "leaderboard_result": 0.3960751082251082
7
+ }
llmtf_eval_k0/darumeru_MultiQ.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0/darumeru_MultiQ_params.jsonl ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009,
11
+ 198,
12
+ 271
13
+ ],
14
+ "max_length": 8192,
15
+ "max_new_tokens": 64,
16
+ "pad_token_id": 128001,
17
+ "stop_strings": [
18
+ "\n",
19
+ "\n\n"
20
+ ],
21
+ "temperature": 0.1,
22
+ "top_k": 40,
23
+ "top_p": 0.9,
24
+ "transformers_version": "4.38.2",
25
+ "trust_remote_code": [
26
+ false
27
+ ]
28
+ },
29
+ "conversation_template": {
30
+ "system_prompt": "",
31
+ "system_message_template": "",
32
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
33
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
34
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
35
+ "user_role": "user",
36
+ "bot_role": "assistant",
37
+ "system_role": "system",
38
+ "global_prefix": "<|begin_of_text|>",
39
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
40
+ "add_special_tokens": false,
41
+ "eos_token": "<|eot_id|>"
42
+ },
43
+ "load_in_8bit": false,
44
+ "torch_dtype": "auto",
45
+ "use_flash_attention_2": true,
46
+ "device_map": "cuda:0",
47
+ "use_fast_tokenizer": true,
48
+ "leading_space": false,
49
+ "space_token": null,
50
+ "trust_remote_code": [
51
+ false
52
+ ],
53
+ "max_model_len": 8192
54
+ },
55
+ "task_params": {
56
+ "max_len": 4000,
57
+ "few_shot_count": 0,
58
+ "batch_size": 1,
59
+ "max_sample_per_dataset": 10000000000000,
60
+ "method": "generate"
61
+ }
62
+ }
llmtf_eval_k0/darumeru_MultiQ_total.jsonl ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/MultiQ",
3
+ "results": {
4
+ "f1": 0.34566216745831274,
5
+ "em": 0.21510516252390058
6
+ },
7
+ "leaderboard_result": 0.2803836649911067
8
+ }
llmtf_eval_k0/darumeru_PARus.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0/darumeru_PARus_params.jsonl ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009,
11
+ 198,
12
+ 271
13
+ ],
14
+ "max_length": 8192,
15
+ "max_new_tokens": 64,
16
+ "pad_token_id": 128001,
17
+ "stop_strings": [
18
+ "\n",
19
+ "\n\n"
20
+ ],
21
+ "temperature": 0.1,
22
+ "top_k": 40,
23
+ "top_p": 0.9,
24
+ "transformers_version": "4.38.2",
25
+ "trust_remote_code": [
26
+ false
27
+ ]
28
+ },
29
+ "conversation_template": {
30
+ "system_prompt": "",
31
+ "system_message_template": "",
32
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
33
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
34
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
35
+ "user_role": "user",
36
+ "bot_role": "assistant",
37
+ "system_role": "system",
38
+ "global_prefix": "<|begin_of_text|>",
39
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
40
+ "add_special_tokens": false,
41
+ "eos_token": "<|eot_id|>"
42
+ },
43
+ "load_in_8bit": false,
44
+ "torch_dtype": "auto",
45
+ "use_flash_attention_2": true,
46
+ "device_map": "cuda:0",
47
+ "use_fast_tokenizer": true,
48
+ "leading_space": false,
49
+ "space_token": null,
50
+ "trust_remote_code": [
51
+ false
52
+ ],
53
+ "max_model_len": 8192
54
+ },
55
+ "task_params": {
56
+ "max_len": 4000,
57
+ "few_shot_count": 0,
58
+ "batch_size": 1,
59
+ "max_sample_per_dataset": 10000000000000,
60
+ "method": "calculate_tokens_proba"
61
+ }
62
+ }
llmtf_eval_k0/darumeru_PARus_total.jsonl ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/PARus",
3
+ "results": {
4
+ "acc": 0.66
5
+ },
6
+ "leaderboard_result": 0.66
7
+ }
llmtf_eval_k0/darumeru_RCB.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0/darumeru_RCB_params.jsonl ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009,
11
+ 198,
12
+ 271
13
+ ],
14
+ "max_length": 8192,
15
+ "max_new_tokens": 64,
16
+ "pad_token_id": 128001,
17
+ "stop_strings": [
18
+ "\n",
19
+ "\n\n"
20
+ ],
21
+ "temperature": 0.1,
22
+ "top_k": 40,
23
+ "top_p": 0.9,
24
+ "transformers_version": "4.38.2",
25
+ "trust_remote_code": [
26
+ false
27
+ ]
28
+ },
29
+ "conversation_template": {
30
+ "system_prompt": "",
31
+ "system_message_template": "",
32
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
33
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
34
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
35
+ "user_role": "user",
36
+ "bot_role": "assistant",
37
+ "system_role": "system",
38
+ "global_prefix": "<|begin_of_text|>",
39
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
40
+ "add_special_tokens": false,
41
+ "eos_token": "<|eot_id|>"
42
+ },
43
+ "load_in_8bit": false,
44
+ "torch_dtype": "auto",
45
+ "use_flash_attention_2": true,
46
+ "device_map": "cuda:0",
47
+ "use_fast_tokenizer": true,
48
+ "leading_space": false,
49
+ "space_token": null,
50
+ "trust_remote_code": [
51
+ false
52
+ ],
53
+ "max_model_len": 8192
54
+ },
55
+ "task_params": {
56
+ "max_len": 4000,
57
+ "few_shot_count": 0,
58
+ "batch_size": 1,
59
+ "max_sample_per_dataset": 10000000000000,
60
+ "method": "calculate_tokens_proba"
61
+ }
62
+ }
llmtf_eval_k0/darumeru_RCB_total.jsonl ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/RCB",
3
+ "results": {
4
+ "acc": 0.5,
5
+ "f1_macro": 0.43018975381906
6
+ },
7
+ "leaderboard_result": 0.46509487690953
8
+ }
llmtf_eval_k0/darumeru_RWSD.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0/darumeru_RWSD_params.jsonl ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009,
11
+ 198,
12
+ 271
13
+ ],
14
+ "max_length": 8192,
15
+ "max_new_tokens": 64,
16
+ "pad_token_id": 128001,
17
+ "stop_strings": [
18
+ "\n",
19
+ "\n\n"
20
+ ],
21
+ "temperature": 0.1,
22
+ "top_k": 40,
23
+ "top_p": 0.9,
24
+ "transformers_version": "4.38.2",
25
+ "trust_remote_code": [
26
+ false
27
+ ]
28
+ },
29
+ "conversation_template": {
30
+ "system_prompt": "",
31
+ "system_message_template": "",
32
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
33
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
34
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
35
+ "user_role": "user",
36
+ "bot_role": "assistant",
37
+ "system_role": "system",
38
+ "global_prefix": "<|begin_of_text|>",
39
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
40
+ "add_special_tokens": false,
41
+ "eos_token": "<|eot_id|>"
42
+ },
43
+ "load_in_8bit": false,
44
+ "torch_dtype": "auto",
45
+ "use_flash_attention_2": true,
46
+ "device_map": "cuda:0",
47
+ "use_fast_tokenizer": true,
48
+ "leading_space": false,
49
+ "space_token": null,
50
+ "trust_remote_code": [
51
+ false
52
+ ],
53
+ "max_model_len": 8192
54
+ },
55
+ "task_params": {
56
+ "max_len": 4000,
57
+ "few_shot_count": 0,
58
+ "batch_size": 1,
59
+ "max_sample_per_dataset": 10000000000000,
60
+ "method": "calculate_tokens_proba"
61
+ }
62
+ }
llmtf_eval_k0/darumeru_RWSD_total.jsonl ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/RWSD",
3
+ "results": {
4
+ "acc": 0.5441176470588235
5
+ },
6
+ "leaderboard_result": 0.5441176470588235
7
+ }
llmtf_eval_k0/darumeru_USE.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0/darumeru_USE_params.jsonl ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009,
11
+ 198,
12
+ 271
13
+ ],
14
+ "max_length": 8192,
15
+ "max_new_tokens": 64,
16
+ "pad_token_id": 128001,
17
+ "stop_strings": [
18
+ "\n",
19
+ "\n\n"
20
+ ],
21
+ "temperature": 0.1,
22
+ "top_k": 40,
23
+ "top_p": 0.9,
24
+ "transformers_version": "4.38.2",
25
+ "trust_remote_code": [
26
+ false
27
+ ]
28
+ },
29
+ "conversation_template": {
30
+ "system_prompt": "",
31
+ "system_message_template": "",
32
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
33
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
34
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
35
+ "user_role": "user",
36
+ "bot_role": "assistant",
37
+ "system_role": "system",
38
+ "global_prefix": "<|begin_of_text|>",
39
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
40
+ "add_special_tokens": false,
41
+ "eos_token": "<|eot_id|>"
42
+ },
43
+ "load_in_8bit": false,
44
+ "torch_dtype": "auto",
45
+ "use_flash_attention_2": true,
46
+ "device_map": "cuda:0",
47
+ "use_fast_tokenizer": true,
48
+ "leading_space": false,
49
+ "space_token": null,
50
+ "trust_remote_code": [
51
+ false
52
+ ],
53
+ "max_model_len": 8192
54
+ },
55
+ "task_params": {
56
+ "max_len": 4000,
57
+ "few_shot_count": 0,
58
+ "batch_size": 1,
59
+ "max_sample_per_dataset": 10000000000000,
60
+ "method": "generate"
61
+ }
62
+ }
llmtf_eval_k0/darumeru_USE_total.jsonl ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/USE",
3
+ "results": {
4
+ "grade_norm": 0.14411764705882352
5
+ },
6
+ "leaderboard_result": 0.14411764705882352
7
+ }
llmtf_eval_k0/darumeru_cp_para_en.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0/darumeru_cp_para_en_params.jsonl ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009,
11
+ 198,
12
+ 271
13
+ ],
14
+ "max_length": 8192,
15
+ "max_new_tokens": 1024,
16
+ "pad_token_id": 128001,
17
+ "stop_strings": [
18
+ "\n",
19
+ "\n\n"
20
+ ],
21
+ "temperature": 0.1,
22
+ "top_k": 40,
23
+ "top_p": 0.9,
24
+ "transformers_version": "4.38.2",
25
+ "trust_remote_code": [
26
+ false
27
+ ]
28
+ },
29
+ "conversation_template": {
30
+ "system_prompt": "",
31
+ "system_message_template": "",
32
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
33
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
34
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
35
+ "user_role": "user",
36
+ "bot_role": "assistant",
37
+ "system_role": "system",
38
+ "global_prefix": "<|begin_of_text|>",
39
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
40
+ "add_special_tokens": false,
41
+ "eos_token": "<|eot_id|>"
42
+ },
43
+ "load_in_8bit": false,
44
+ "torch_dtype": "auto",
45
+ "use_flash_attention_2": true,
46
+ "device_map": "cuda:0",
47
+ "use_fast_tokenizer": true,
48
+ "leading_space": false,
49
+ "space_token": null,
50
+ "trust_remote_code": [
51
+ false
52
+ ],
53
+ "max_model_len": 8192
54
+ },
55
+ "task_params": {
56
+ "max_len": 4000,
57
+ "few_shot_count": 0,
58
+ "batch_size": 1,
59
+ "max_sample_per_dataset": 10000000000000,
60
+ "method": "generate"
61
+ }
62
+ }
llmtf_eval_k0/darumeru_cp_para_en_total.jsonl ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/cp_para_en",
3
+ "results": {
4
+ "symbol_per_token": 4.482081117192222,
5
+ "len": 0.9950333263716172,
6
+ "lcs": 0.9677363847232712
7
+ },
8
+ "leaderboard_result": 0.9677363847232712
9
+ }
llmtf_eval_k0/darumeru_cp_para_ru.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0/darumeru_cp_para_ru_params.jsonl ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009,
11
+ 198,
12
+ 271
13
+ ],
14
+ "max_length": 8192,
15
+ "max_new_tokens": 1024,
16
+ "pad_token_id": 128001,
17
+ "stop_strings": [
18
+ "\n",
19
+ "\n\n"
20
+ ],
21
+ "temperature": 0.1,
22
+ "top_k": 40,
23
+ "top_p": 0.9,
24
+ "transformers_version": "4.38.2",
25
+ "trust_remote_code": [
26
+ false
27
+ ]
28
+ },
29
+ "conversation_template": {
30
+ "system_prompt": "",
31
+ "system_message_template": "",
32
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
33
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
34
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
35
+ "user_role": "user",
36
+ "bot_role": "assistant",
37
+ "system_role": "system",
38
+ "global_prefix": "<|begin_of_text|>",
39
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
40
+ "add_special_tokens": false,
41
+ "eos_token": "<|eot_id|>"
42
+ },
43
+ "load_in_8bit": false,
44
+ "torch_dtype": "auto",
45
+ "use_flash_attention_2": true,
46
+ "device_map": "cuda:0",
47
+ "use_fast_tokenizer": true,
48
+ "leading_space": false,
49
+ "space_token": null,
50
+ "trust_remote_code": [
51
+ false
52
+ ],
53
+ "max_model_len": 8192
54
+ },
55
+ "task_params": {
56
+ "max_len": 4000,
57
+ "few_shot_count": 0,
58
+ "batch_size": 1,
59
+ "max_sample_per_dataset": 10000000000000,
60
+ "method": "generate"
61
+ }
62
+ }
llmtf_eval_k0/darumeru_cp_para_ru_total.jsonl ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/cp_para_ru",
3
+ "results": {
4
+ "symbol_per_token": 2.968769203133834,
5
+ "len": 0.9946084786539394,
6
+ "lcs": 0.9078334163780103
7
+ },
8
+ "leaderboard_result": 0.9078334163780103
9
+ }
llmtf_eval_k0/darumeru_cp_sent_en.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0/darumeru_cp_sent_en_params.jsonl ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009,
11
+ 198,
12
+ 271
13
+ ],
14
+ "max_length": 8192,
15
+ "max_new_tokens": 128,
16
+ "pad_token_id": 128001,
17
+ "stop_strings": [
18
+ "\n",
19
+ "\n\n"
20
+ ],
21
+ "temperature": 0.1,
22
+ "top_k": 40,
23
+ "top_p": 0.9,
24
+ "transformers_version": "4.38.2",
25
+ "trust_remote_code": [
26
+ false
27
+ ]
28
+ },
29
+ "conversation_template": {
30
+ "system_prompt": "",
31
+ "system_message_template": "",
32
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
33
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
34
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
35
+ "user_role": "user",
36
+ "bot_role": "assistant",
37
+ "system_role": "system",
38
+ "global_prefix": "<|begin_of_text|>",
39
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
40
+ "add_special_tokens": false,
41
+ "eos_token": "<|eot_id|>"
42
+ },
43
+ "load_in_8bit": false,
44
+ "torch_dtype": "auto",
45
+ "use_flash_attention_2": true,
46
+ "device_map": "cuda:0",
47
+ "use_fast_tokenizer": true,
48
+ "leading_space": false,
49
+ "space_token": null,
50
+ "trust_remote_code": [
51
+ false
52
+ ],
53
+ "max_model_len": 8192
54
+ },
55
+ "task_params": {
56
+ "max_len": 4000,
57
+ "few_shot_count": 0,
58
+ "batch_size": 1,
59
+ "max_sample_per_dataset": 10000000000000,
60
+ "method": "generate"
61
+ }
62
+ }
llmtf_eval_k0/darumeru_cp_sent_en_total.jsonl ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/cp_sent_en",
3
+ "results": {
4
+ "symbol_per_token": 4.424738337449315,
5
+ "len": 0.9996416196590585,
6
+ "lcs": 0.9958136839407484
7
+ },
8
+ "leaderboard_result": 0.9996416196590585
9
+ }
llmtf_eval_k0/darumeru_cp_sent_ru.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0/darumeru_cp_sent_ru_params.jsonl ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009,
11
+ 198,
12
+ 271
13
+ ],
14
+ "max_length": 8192,
15
+ "max_new_tokens": 128,
16
+ "pad_token_id": 128001,
17
+ "stop_strings": [
18
+ "\n",
19
+ "\n\n"
20
+ ],
21
+ "temperature": 0.1,
22
+ "top_k": 40,
23
+ "top_p": 0.9,
24
+ "transformers_version": "4.38.2",
25
+ "trust_remote_code": [
26
+ false
27
+ ]
28
+ },
29
+ "conversation_template": {
30
+ "system_prompt": "",
31
+ "system_message_template": "",
32
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
33
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
34
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
35
+ "user_role": "user",
36
+ "bot_role": "assistant",
37
+ "system_role": "system",
38
+ "global_prefix": "<|begin_of_text|>",
39
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
40
+ "add_special_tokens": false,
41
+ "eos_token": "<|eot_id|>"
42
+ },
43
+ "load_in_8bit": false,
44
+ "torch_dtype": "auto",
45
+ "use_flash_attention_2": true,
46
+ "device_map": "cuda:0",
47
+ "use_fast_tokenizer": true,
48
+ "leading_space": false,
49
+ "space_token": null,
50
+ "trust_remote_code": [
51
+ false
52
+ ],
53
+ "max_model_len": 8192
54
+ },
55
+ "task_params": {
56
+ "max_len": 4000,
57
+ "few_shot_count": 0,
58
+ "batch_size": 1,
59
+ "max_sample_per_dataset": 10000000000000,
60
+ "method": "generate"
61
+ }
62
+ }
llmtf_eval_k0/darumeru_cp_sent_ru_total.jsonl ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/cp_sent_ru",
3
+ "results": {
4
+ "symbol_per_token": 2.8277022347634286,
5
+ "len": 0.9903323908366956,
6
+ "lcs": 0.9539355441644095
7
+ },
8
+ "leaderboard_result": 0.9903323908366956
9
+ }
llmtf_eval_k0/darumeru_ruMMLU.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:454e30175faf86134597baf9cad35c4c5a62bdc849eee8d85a27c472c0d729cb
3
+ size 32909111
llmtf_eval_k0/darumeru_ruMMLU_params.jsonl ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009,
11
+ 198,
12
+ 271
13
+ ],
14
+ "max_length": 8192,
15
+ "max_new_tokens": 64,
16
+ "pad_token_id": 128001,
17
+ "stop_strings": [
18
+ "\n",
19
+ "\n\n"
20
+ ],
21
+ "temperature": 0.1,
22
+ "top_k": 40,
23
+ "top_p": 0.9,
24
+ "transformers_version": "4.38.2",
25
+ "trust_remote_code": [
26
+ false
27
+ ]
28
+ },
29
+ "conversation_template": {
30
+ "system_prompt": "",
31
+ "system_message_template": "",
32
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
33
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
34
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
35
+ "user_role": "user",
36
+ "bot_role": "assistant",
37
+ "system_role": "system",
38
+ "global_prefix": "<|begin_of_text|>",
39
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
40
+ "add_special_tokens": false,
41
+ "eos_token": "<|eot_id|>"
42
+ },
43
+ "load_in_8bit": false,
44
+ "torch_dtype": "auto",
45
+ "use_flash_attention_2": true,
46
+ "device_map": "cuda:0",
47
+ "use_fast_tokenizer": true,
48
+ "leading_space": false,
49
+ "space_token": null,
50
+ "trust_remote_code": [
51
+ false
52
+ ],
53
+ "max_model_len": 8192
54
+ },
55
+ "task_params": {
56
+ "max_len": 4000,
57
+ "few_shot_count": 0,
58
+ "batch_size": 1,
59
+ "max_sample_per_dataset": 10000000000000,
60
+ "method": "calculate_tokens_proba"
61
+ }
62
+ }
llmtf_eval_k0/darumeru_ruMMLU_total.jsonl ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/ruMMLU",
3
+ "results": {
4
+ "acc": 0.5046393295420533
5
+ },
6
+ "leaderboard_result": 0.5046393295420533
7
+ }
llmtf_eval_k0/darumeru_ruOpenBookQA.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0/darumeru_ruOpenBookQA_params.jsonl ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009,
11
+ 198,
12
+ 271
13
+ ],
14
+ "max_length": 8192,
15
+ "max_new_tokens": 64,
16
+ "pad_token_id": 128001,
17
+ "stop_strings": [
18
+ "\n",
19
+ "\n\n"
20
+ ],
21
+ "temperature": 0.1,
22
+ "top_k": 40,
23
+ "top_p": 0.9,
24
+ "transformers_version": "4.38.2",
25
+ "trust_remote_code": [
26
+ false
27
+ ]
28
+ },
29
+ "conversation_template": {
30
+ "system_prompt": "",
31
+ "system_message_template": "",
32
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
33
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
34
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
35
+ "user_role": "user",
36
+ "bot_role": "assistant",
37
+ "system_role": "system",
38
+ "global_prefix": "<|begin_of_text|>",
39
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
40
+ "add_special_tokens": false,
41
+ "eos_token": "<|eot_id|>"
42
+ },
43
+ "load_in_8bit": false,
44
+ "torch_dtype": "auto",
45
+ "use_flash_attention_2": true,
46
+ "device_map": "cuda:0",
47
+ "use_fast_tokenizer": true,
48
+ "leading_space": false,
49
+ "space_token": null,
50
+ "trust_remote_code": [
51
+ false
52
+ ],
53
+ "max_model_len": 8192
54
+ },
55
+ "task_params": {
56
+ "max_len": 4000,
57
+ "few_shot_count": 0,
58
+ "batch_size": 1,
59
+ "max_sample_per_dataset": 10000000000000,
60
+ "method": "calculate_tokens_proba"
61
+ }
62
+ }
llmtf_eval_k0/darumeru_ruOpenBookQA_total.jsonl ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/ruOpenBookQA",
3
+ "results": {
4
+ "acc": 0.6924398625429553,
5
+ "f1_macro": 0.6928205333186971
6
+ },
7
+ "leaderboard_result": 0.6926301979308263
8
+ }
llmtf_eval_k0/darumeru_ruTiE.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52057ed1300597156320b34738e0b48e92b4112bbd55d3c9b283972df4eb6e15
3
+ size 12832579
llmtf_eval_k0/darumeru_ruTiE_params.jsonl ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009,
11
+ 198,
12
+ 271
13
+ ],
14
+ "max_length": 8192,
15
+ "max_new_tokens": 64,
16
+ "pad_token_id": 128001,
17
+ "stop_strings": [
18
+ "\n",
19
+ "\n\n"
20
+ ],
21
+ "temperature": 0.1,
22
+ "top_k": 40,
23
+ "top_p": 0.9,
24
+ "transformers_version": "4.38.2",
25
+ "trust_remote_code": [
26
+ false
27
+ ]
28
+ },
29
+ "conversation_template": {
30
+ "system_prompt": "",
31
+ "system_message_template": "",
32
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
33
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
34
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
35
+ "user_role": "user",
36
+ "bot_role": "assistant",
37
+ "system_role": "system",
38
+ "global_prefix": "<|begin_of_text|>",
39
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
40
+ "add_special_tokens": false,
41
+ "eos_token": "<|eot_id|>"
42
+ },
43
+ "load_in_8bit": false,
44
+ "torch_dtype": "auto",
45
+ "use_flash_attention_2": true,
46
+ "device_map": "cuda:0",
47
+ "use_fast_tokenizer": true,
48
+ "leading_space": false,
49
+ "space_token": null,
50
+ "trust_remote_code": [
51
+ false
52
+ ],
53
+ "max_model_len": 8192
54
+ },
55
+ "task_params": {
56
+ "max_len": 4000,
57
+ "few_shot_count": 0,
58
+ "batch_size": 1,
59
+ "max_sample_per_dataset": 10000000000000,
60
+ "method": "calculate_tokens_proba"
61
+ }
62
+ }
llmtf_eval_k0/darumeru_ruTiE_total.jsonl ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/ruTiE",
3
+ "results": {
4
+ "acc": 0.3511627906976744
5
+ },
6
+ "leaderboard_result": 0.3511627906976744
7
+ }
llmtf_eval_k0/darumeru_ruWorldTree.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0/darumeru_ruWorldTree_params.jsonl ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009,
11
+ 198,
12
+ 271
13
+ ],
14
+ "max_length": 8192,
15
+ "max_new_tokens": 64,
16
+ "pad_token_id": 128001,
17
+ "stop_strings": [
18
+ "\n",
19
+ "\n\n"
20
+ ],
21
+ "temperature": 0.1,
22
+ "top_k": 40,
23
+ "top_p": 0.9,
24
+ "transformers_version": "4.38.2",
25
+ "trust_remote_code": [
26
+ false
27
+ ]
28
+ },
29
+ "conversation_template": {
30
+ "system_prompt": "",
31
+ "system_message_template": "",
32
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
33
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
34
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
35
+ "user_role": "user",
36
+ "bot_role": "assistant",
37
+ "system_role": "system",
38
+ "global_prefix": "<|begin_of_text|>",
39
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
40
+ "add_special_tokens": false,
41
+ "eos_token": "<|eot_id|>"
42
+ },
43
+ "load_in_8bit": false,
44
+ "torch_dtype": "auto",
45
+ "use_flash_attention_2": true,
46
+ "device_map": "cuda:0",
47
+ "use_fast_tokenizer": true,
48
+ "leading_space": false,
49
+ "space_token": null,
50
+ "trust_remote_code": [
51
+ false
52
+ ],
53
+ "max_model_len": 8192
54
+ },
55
+ "task_params": {
56
+ "max_len": 4000,
57
+ "few_shot_count": 0,
58
+ "batch_size": 1,
59
+ "max_sample_per_dataset": 10000000000000,
60
+ "method": "calculate_tokens_proba"
61
+ }
62
+ }
llmtf_eval_k0/darumeru_ruWorldTree_total.jsonl ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/ruWorldTree",
3
+ "results": {
4
+ "acc": 0.8476190476190476,
5
+ "f1_macro": 0.8445201637796824
6
+ },
7
+ "leaderboard_result": 0.8460696056993651
8
+ }
llmtf_eval_k0/evaluation_log.txt ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ INFO: 2024-07-12 11:07:49,951: llmtf.base.evaluator: Starting eval on ['darumeru/multiq', 'darumeru/parus', 'darumeru/rcb', 'darumeru/ruopenbookqa', 'darumeru/rutie', 'darumeru/ruworldtree', 'darumeru/rwsd', 'darumeru/use', 'russiannlp/rucola_custom']
2
+ INFO: 2024-07-12 11:07:49,953: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009, 198, 271]
3
+ INFO: 2024-07-12 11:07:49,953: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['\n', '\n\n']
4
+ INFO: 2024-07-12 11:07:51,044: llmtf.base.evaluator: Starting eval on ['darumeru/rummlu']
5
+ INFO: 2024-07-12 11:07:51,045: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009, 198, 271]
6
+ INFO: 2024-07-12 11:07:51,045: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['\n', '\n\n']
7
+ INFO: 2024-07-12 11:07:53,196: llmtf.base.evaluator: Starting eval on ['nlpcoreteam/rummlu']
8
+ INFO: 2024-07-12 11:07:53,197: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
9
+ INFO: 2024-07-12 11:07:53,197: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
10
+ INFO: 2024-07-12 11:07:53,614: llmtf.base.darumeru/MultiQ: Loading Dataset: 3.66s
11
+ INFO: 2024-07-12 11:07:54,515: llmtf.base.evaluator: Starting eval on ['nlpcoreteam/enmmlu']
12
+ INFO: 2024-07-12 11:07:54,515: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
13
+ INFO: 2024-07-12 11:07:54,515: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
14
+ INFO: 2024-07-12 11:07:57,115: llmtf.base.evaluator: Starting eval on ['daru/treewayabstractive']
15
+ INFO: 2024-07-12 11:07:57,116: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009, 198, 271]
16
+ INFO: 2024-07-12 11:07:57,116: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['\n', '\n\n']
17
+ INFO: 2024-07-12 11:07:58,519: llmtf.base.evaluator: Starting eval on ['daru/treewayextractive']
18
+ INFO: 2024-07-12 11:07:58,520: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
19
+ INFO: 2024-07-12 11:07:58,520: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
20
+ INFO: 2024-07-12 11:07:59,295: llmtf.base.darumeru/ruMMLU: Loading Dataset: 8.25s
21
+ INFO: 2024-07-12 11:08:00,328: llmtf.base.evaluator: Starting eval on ['darumeru/cp_sent_ru', 'darumeru/cp_sent_en', 'darumeru/cp_para_ru', 'darumeru/cp_para_en']
22
+ INFO: 2024-07-12 11:08:00,328: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009, 198, 271]
23
+ INFO: 2024-07-12 11:08:00,328: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['\n', '\n\n']
24
+ INFO: 2024-07-12 11:08:01,857: llmtf.base.daru/treewayabstractive: Loading Dataset: 4.74s
25
+ INFO: 2024-07-12 11:08:04,251: llmtf.base.darumeru/cp_sent_ru: Loading Dataset: 3.92s
26
+ INFO: 2024-07-12 11:08:10,121: llmtf.base.daru/treewayextractive: Loading Dataset: 11.60s
27
+ INFO: 2024-07-12 11:10:06,434: llmtf.base.nlpcoreteam/enMMLU: Loading Dataset: 131.92s
28
+ INFO: 2024-07-12 11:10:07,629: llmtf.base.nlpcoreteam/ruMMLU: Loading Dataset: 134.43s
29
+ INFO: 2024-07-12 11:14:10,567: llmtf.base.darumeru/ruMMLU: Processing Dataset: 371.27s
30
+ INFO: 2024-07-12 11:14:10,571: llmtf.base.darumeru/ruMMLU: Results for darumeru/ruMMLU:
31
+ INFO: 2024-07-12 11:14:10,592: llmtf.base.darumeru/ruMMLU: {'acc': 0.5046393295420533}
32
+ INFO: 2024-07-12 11:14:10,628: llmtf.base.evaluator: Ended eval
33
+ INFO: 2024-07-12 11:14:10,633: llmtf.base.evaluator:
34
+ mean darumeru/ruMMLU
35
+ 0.505 0.505
36
+ INFO: 2024-07-12 11:17:48,926: llmtf.base.darumeru/cp_sent_ru: Processing Dataset: 584.67s
37
+ INFO: 2024-07-12 11:17:48,928: llmtf.base.darumeru/cp_sent_ru: Results for darumeru/cp_sent_ru:
38
+ INFO: 2024-07-12 11:17:48,948: llmtf.base.darumeru/cp_sent_ru: {'symbol_per_token': 2.8277022347634286, 'len': 0.9903323908366956, 'lcs': 0.9539355441644095}
39
+ INFO: 2024-07-12 11:17:48,949: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009, 198, 271]
40
+ INFO: 2024-07-12 11:17:48,949: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['\n', '\n\n']
41
+ INFO: 2024-07-12 11:17:53,551: llmtf.base.darumeru/cp_sent_en: Loading Dataset: 4.60s
42
+ INFO: 2024-07-12 11:18:10,281: llmtf.base.nlpcoreteam/enMMLU: Processing Dataset: 483.84s
43
+ INFO: 2024-07-12 11:18:10,299: llmtf.base.nlpcoreteam/enMMLU: Results for nlpcoreteam/enMMLU:
44
+ INFO: 2024-07-12 11:18:10,342: llmtf.base.nlpcoreteam/enMMLU: metric
45
+ subject
46
+ abstract_algebra 0.340000
47
+ anatomy 0.718519
48
+ astronomy 0.736842
49
+ business_ethics 0.720000
50
+ clinical_knowledge 0.735849
51
+ college_biology 0.791667
52
+ college_chemistry 0.470000
53
+ college_computer_science 0.590000
54
+ college_mathematics 0.300000
55
+ college_medicine 0.653179
56
+ college_physics 0.480392
57
+ computer_security 0.760000
58
+ conceptual_physics 0.565957
59
+ econometrics 0.517544
60
+ electrical_engineering 0.600000
61
+ elementary_mathematics 0.481481
62
+ formal_logic 0.523810
63
+ global_facts 0.410000
64
+ high_school_biology 0.800000
65
+ high_school_chemistry 0.551724
66
+ high_school_computer_science 0.730000
67
+ high_school_european_history 0.751515
68
+ high_school_geography 0.828283
69
+ high_school_government_and_politics 0.865285
70
+ high_school_macroeconomics 0.633333
71
+ high_school_mathematics 0.366667
72
+ high_school_microeconomics 0.747899
73
+ high_school_physics 0.423841
74
+ high_school_psychology 0.853211
75
+ high_school_statistics 0.532407
76
+ high_school_us_history 0.828431
77
+ high_school_world_history 0.835443
78
+ human_aging 0.721973
79
+ human_sexuality 0.778626
80
+ international_law 0.760331
81
+ jurisprudence 0.796296
82
+ logical_fallacies 0.779141
83
+ machine_learning 0.455357
84
+ management 0.805825
85
+ marketing 0.893162
86
+ medical_genetics 0.780000
87
+ miscellaneous 0.837803
88
+ moral_disputes 0.690751
89
+ moral_scenarios 0.289385
90
+ nutrition 0.764706
91
+ philosophy 0.720257
92
+ prehistory 0.709877
93
+ professional_accounting 0.531915
94
+ professional_law 0.479140
95
+ professional_medicine 0.731618
96
+ professional_psychology 0.674837
97
+ public_relations 0.654545
98
+ security_studies 0.714286
99
+ sociology 0.825871
100
+ us_foreign_policy 0.890000
101
+ virology 0.487952
102
+ world_religions 0.824561
103
+ INFO: 2024-07-12 11:18:10,350: llmtf.base.nlpcoreteam/enMMLU: metric
104
+ subject
105
+ STEM 0.554241
106
+ humanities 0.691457
107
+ other (business, health, misc.) 0.699464
108
+ social sciences 0.748643
109
+ INFO: 2024-07-12 11:18:10,374: llmtf.base.nlpcoreteam/enMMLU: {'acc': 0.6734513449759852}
110
+ INFO: 2024-07-12 11:18:10,406: llmtf.base.evaluator: Ended eval
111
+ INFO: 2024-07-12 11:18:10,411: llmtf.base.evaluator:
112
+ mean darumeru/cp_sent_ru darumeru/ruMMLU nlpcoreteam/enMMLU
113
+ 0.723 0.990 0.505 0.673
114
+ INFO: 2024-07-12 11:18:50,749: llmtf.base.nlpcoreteam/ruMMLU: Processing Dataset: 523.12s
115
+ INFO: 2024-07-12 11:18:50,751: llmtf.base.nlpcoreteam/ruMMLU: Results for nlpcoreteam/ruMMLU:
116
+ INFO: 2024-07-12 11:18:50,790: llmtf.base.nlpcoreteam/ruMMLU: metric
117
+ subject
118
+ abstract_algebra 0.290000
119
+ anatomy 0.459259
120
+ astronomy 0.657895
121
+ business_ethics 0.600000
122
+ clinical_knowledge 0.562264
123
+ college_biology 0.548611
124
+ college_chemistry 0.400000
125
+ college_computer_science 0.470000
126
+ college_mathematics 0.330000
127
+ college_medicine 0.497110
128
+ college_physics 0.333333
129
+ computer_security 0.570000
130
+ conceptual_physics 0.493617
131
+ econometrics 0.342105
132
+ electrical_engineering 0.531034
133
+ elementary_mathematics 0.412698
134
+ formal_logic 0.380952
135
+ global_facts 0.350000
136
+ high_school_biology 0.635484
137
+ high_school_chemistry 0.428571
138
+ high_school_computer_science 0.620000
139
+ high_school_european_history 0.715152
140
+ high_school_geography 0.656566
141
+ high_school_government_and_politics 0.595855
142
+ high_school_macroeconomics 0.517949
143
+ high_school_mathematics 0.348148
144
+ high_school_microeconomics 0.495798
145
+ high_school_physics 0.350993
146
+ high_school_psychology 0.667890
147
+ high_school_statistics 0.458333
148
+ high_school_us_history 0.661765
149
+ high_school_world_history 0.708861
150
+ human_aging 0.556054
151
+ human_sexuality 0.664122
152
+ international_law 0.702479
153
+ jurisprudence 0.592593
154
+ logical_fallacies 0.527607
155
+ machine_learning 0.339286
156
+ management 0.669903
157
+ marketing 0.700855
158
+ medical_genetics 0.570000
159
+ miscellaneous 0.646232
160
+ moral_disputes 0.554913
161
+ moral_scenarios 0.248045
162
+ nutrition 0.594771
163
+ philosophy 0.565916
164
+ prehistory 0.558642
165
+ professional_accounting 0.386525
166
+ professional_law 0.362451
167
+ professional_medicine 0.522059
168
+ professional_psychology 0.480392
169
+ public_relations 0.563636
170
+ security_studies 0.620408
171
+ sociology 0.696517
172
+ us_foreign_policy 0.770000
173
+ virology 0.415663
174
+ world_religions 0.690058
175
+ INFO: 2024-07-12 11:18:50,798: llmtf.base.nlpcoreteam/ruMMLU: metric
176
+ subject
177
+ STEM 0.456556
178
+ humanities 0.559187
179
+ other (business, health, misc.) 0.537907
180
+ social sciences 0.589270
181
+ INFO: 2024-07-12 11:18:50,819: llmtf.base.nlpcoreteam/ruMMLU: {'acc': 0.5357299468552112}
182
+ INFO: 2024-07-12 11:18:50,850: llmtf.base.evaluator: Ended eval
183
+ INFO: 2024-07-12 11:18:50,856: llmtf.base.evaluator:
184
+ mean darumeru/cp_sent_ru darumeru/ruMMLU nlpcoreteam/enMMLU nlpcoreteam/ruMMLU
185
+ 0.676 0.990 0.505 0.673 0.536
186
+ INFO: 2024-07-12 11:19:25,952: llmtf.base.darumeru/MultiQ: Processing Dataset: 692.34s
187
+ INFO: 2024-07-12 11:19:25,955: llmtf.base.darumeru/MultiQ: Results for darumeru/MultiQ:
188
+ INFO: 2024-07-12 11:19:25,975: llmtf.base.darumeru/MultiQ: {'f1': 0.34566216745831274, 'em': 0.21510516252390058}
189
+ INFO: 2024-07-12 11:19:25,980: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009, 198, 271]
190
+ INFO: 2024-07-12 11:19:25,980: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['\n', '\n\n']
191
+ INFO: 2024-07-12 11:19:27,975: llmtf.base.darumeru/PARus: Loading Dataset: 1.99s
192
+ INFO: 2024-07-12 11:19:34,512: llmtf.base.darumeru/PARus: Processing Dataset: 6.54s
193
+ INFO: 2024-07-12 11:19:34,513: llmtf.base.darumeru/PARus: Results for darumeru/PARus:
194
+ INFO: 2024-07-12 11:19:34,540: llmtf.base.darumeru/PARus: {'acc': 0.66}
195
+ INFO: 2024-07-12 11:19:34,542: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009, 198, 271]
196
+ INFO: 2024-07-12 11:19:34,542: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['\n', '\n\n']
197
+ INFO: 2024-07-12 11:19:36,333: llmtf.base.darumeru/RCB: Loading Dataset: 1.79s
198
+ INFO: 2024-07-12 11:19:44,027: llmtf.base.darumeru/RCB: Processing Dataset: 7.69s
199
+ INFO: 2024-07-12 11:19:44,028: llmtf.base.darumeru/RCB: Results for darumeru/RCB:
200
+ INFO: 2024-07-12 11:19:44,035: llmtf.base.darumeru/RCB: {'acc': 0.5, 'f1_macro': 0.43018975381906}
201
+ INFO: 2024-07-12 11:19:44,036: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009, 198, 271]
202
+ INFO: 2024-07-12 11:19:44,036: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['\n', '\n\n']
203
+ INFO: 2024-07-12 11:19:46,672: llmtf.base.darumeru/ruOpenBookQA: Loading Dataset: 2.63s
204
+ INFO: 2024-07-12 11:21:04,051: llmtf.base.darumeru/ruOpenBookQA: Processing Dataset: 77.38s
205
+ INFO: 2024-07-12 11:21:04,067: llmtf.base.darumeru/ruOpenBookQA: Results for darumeru/ruOpenBookQA:
206
+ INFO: 2024-07-12 11:21:04,081: llmtf.base.darumeru/ruOpenBookQA: {'acc': 0.6924398625429553, 'f1_macro': 0.6928205333186971}
207
+ INFO: 2024-07-12 11:21:04,089: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009, 198, 271]
208
+ INFO: 2024-07-12 11:21:04,089: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['\n', '\n\n']
209
+ INFO: 2024-07-12 11:21:11,537: llmtf.base.darumeru/ruTiE: Loading Dataset: 7.45s
210
+ INFO: 2024-07-12 11:25:40,282: llmtf.base.darumeru/ruTiE: Processing Dataset: 268.74s
211
+ INFO: 2024-07-12 11:25:40,283: llmtf.base.darumeru/ruTiE: Results for darumeru/ruTiE:
212
+ INFO: 2024-07-12 11:25:40,361: llmtf.base.darumeru/ruTiE: {'acc': 0.3511627906976744}
213
+ INFO: 2024-07-12 11:25:40,365: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009, 198, 271]
214
+ INFO: 2024-07-12 11:25:40,365: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['\n', '\n\n']
215
+ INFO: 2024-07-12 11:25:42,670: llmtf.base.darumeru/ruWorldTree: Loading Dataset: 2.30s
216
+ INFO: 2024-07-12 11:25:46,171: llmtf.base.darumeru/ruWorldTree: Processing Dataset: 3.50s
217
+ INFO: 2024-07-12 11:25:46,173: llmtf.base.darumeru/ruWorldTree: Results for darumeru/ruWorldTree:
218
+ INFO: 2024-07-12 11:25:46,178: llmtf.base.darumeru/ruWorldTree: {'acc': 0.8476190476190476, 'f1_macro': 0.8445201637796824}
219
+ INFO: 2024-07-12 11:25:46,179: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009, 198, 271]
220
+ INFO: 2024-07-12 11:25:46,179: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['\n', '\n\n']
221
+ INFO: 2024-07-12 11:25:48,322: llmtf.base.darumeru/RWSD: Loading Dataset: 2.14s
222
+ INFO: 2024-07-12 11:25:53,464: llmtf.base.darumeru/cp_sent_en: Processing Dataset: 479.91s
223
+ INFO: 2024-07-12 11:25:53,466: llmtf.base.darumeru/cp_sent_en: Results for darumeru/cp_sent_en:
224
+ INFO: 2024-07-12 11:25:53,470: llmtf.base.darumeru/cp_sent_en: {'symbol_per_token': 4.424738337449315, 'len': 0.9996416196590585, 'lcs': 0.9958136839407484}
225
+ INFO: 2024-07-12 11:25:53,471: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009, 198, 271]
226
+ INFO: 2024-07-12 11:25:53,471: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['\n', '\n\n']
227
+ INFO: 2024-07-12 11:25:55,479: llmtf.base.darumeru/RWSD: Processing Dataset: 7.16s
228
+ INFO: 2024-07-12 11:25:55,481: llmtf.base.darumeru/RWSD: Results for darumeru/RWSD:
229
+ INFO: 2024-07-12 11:25:55,485: llmtf.base.darumeru/RWSD: {'acc': 0.5441176470588235}
230
+ INFO: 2024-07-12 11:25:55,486: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009, 198, 271]
231
+ INFO: 2024-07-12 11:25:55,486: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['\n', '\n\n']
232
+ INFO: 2024-07-12 11:25:57,669: llmtf.base.darumeru/cp_para_ru: Loading Dataset: 4.20s
233
+ INFO: 2024-07-12 11:25:58,310: llmtf.base.darumeru/USE: Loading Dataset: 2.82s
234
+ INFO: 2024-07-12 11:29:53,584: llmtf.base.daru/treewayextractive: Processing Dataset: 1303.46s
235
+ INFO: 2024-07-12 11:29:53,601: llmtf.base.daru/treewayextractive: Results for daru/treewayextractive:
236
+ INFO: 2024-07-12 11:29:53,850: llmtf.base.daru/treewayextractive: {'r-prec': 0.3960751082251082}
237
+ INFO: 2024-07-12 11:29:54,328: llmtf.base.evaluator: Ended eval
238
+ INFO: 2024-07-12 11:29:54,338: llmtf.base.evaluator:
239
+ mean daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree nlpcoreteam/enMMLU nlpcoreteam/ruMMLU
240
+ 0.611 0.396 0.280 0.660 0.465 0.544 1.000 0.990 0.505 0.693 0.351 0.846 0.673 0.536
241
+ INFO: 2024-07-12 11:30:30,009: llmtf.base.darumeru/USE: Processing Dataset: 271.70s
242
+ INFO: 2024-07-12 11:30:30,010: llmtf.base.darumeru/USE: Results for darumeru/USE:
243
+ INFO: 2024-07-12 11:30:30,016: llmtf.base.darumeru/USE: {'grade_norm': 0.14411764705882352}
244
+ INFO: 2024-07-12 11:30:30,019: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
245
+ INFO: 2024-07-12 11:30:30,019: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
246
+ INFO: 2024-07-12 11:30:34,123: llmtf.base.russiannlp/rucola_custom: Loading Dataset: 4.10s
247
+ INFO: 2024-07-12 11:32:06,029: llmtf.base.russiannlp/rucola_custom: Processing Dataset: 91.90s
248
+ INFO: 2024-07-12 11:32:06,034: llmtf.base.russiannlp/rucola_custom: Results for russiannlp/rucola_custom:
249
+ INFO: 2024-07-12 11:32:06,045: llmtf.base.russiannlp/rucola_custom: {'acc': 0.7068532472192322, 'mcc': 0.2623100586905413}
250
+ INFO: 2024-07-12 11:32:06,049: llmtf.base.evaluator: Ended eval
251
+ INFO: 2024-07-12 11:32:06,089: llmtf.base.evaluator:
252
+ mean daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree nlpcoreteam/enMMLU nlpcoreteam/ruMMLU russiannlp/rucola_custom
253
+ 0.571 0.396 0.280 0.660 0.465 0.544 0.144 1.000 0.990 0.505 0.693 0.351 0.846 0.673 0.536 0.485
254
+ INFO: 2024-07-12 11:38:34,822: llmtf.base.daru/treewayabstractive: Processing Dataset: 1832.96s
255
+ INFO: 2024-07-12 11:38:34,826: llmtf.base.daru/treewayabstractive: Results for daru/treewayabstractive:
256
+ INFO: 2024-07-12 11:38:34,829: llmtf.base.daru/treewayabstractive: {'rouge1': 0.35401411318813847, 'rouge2': 0.12752218193565362}
257
+ INFO: 2024-07-12 11:38:34,832: llmtf.base.evaluator: Ended eval
258
+ INFO: 2024-07-12 11:38:34,856: llmtf.base.evaluator:
259
+ mean daru/treewayabstractive daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree nlpcoreteam/enMMLU nlpcoreteam/ruMMLU russiannlp/rucola_custom
260
+ 0.551 0.241 0.396 0.280 0.660 0.465 0.544 0.144 1.000 0.990 0.505 0.693 0.351 0.846 0.673 0.536 0.485
261
+ INFO: 2024-07-12 11:38:48,843: llmtf.base.darumeru/cp_para_ru: Processing Dataset: 771.17s
262
+ INFO: 2024-07-12 11:38:48,845: llmtf.base.darumeru/cp_para_ru: Results for darumeru/cp_para_ru:
263
+ INFO: 2024-07-12 11:38:48,849: llmtf.base.darumeru/cp_para_ru: {'symbol_per_token': 2.968769203133834, 'len': 0.9946084786539394, 'lcs': 0.9078334163780103}
264
+ INFO: 2024-07-12 11:38:48,850: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009, 198, 271]
265
+ INFO: 2024-07-12 11:38:48,850: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['\n', '\n\n']
266
+ INFO: 2024-07-12 11:38:53,153: llmtf.base.darumeru/cp_para_en: Loading Dataset: 4.30s
267
+ INFO: 2024-07-12 11:49:13,434: llmtf.base.darumeru/cp_para_en: Processing Dataset: 620.28s
268
+ INFO: 2024-07-12 11:49:13,451: llmtf.base.darumeru/cp_para_en: Results for darumeru/cp_para_en:
269
+ INFO: 2024-07-12 11:49:13,455: llmtf.base.darumeru/cp_para_en: {'symbol_per_token': 4.482081117192222, 'len': 0.9950333263716172, 'lcs': 0.9677363847232712}
270
+ INFO: 2024-07-12 11:49:13,455: llmtf.base.evaluator: Ended eval
271
+ INFO: 2024-07-12 11:49:13,483: llmtf.base.evaluator:
272
+ mean daru/treewayabstractive daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_para_en darumeru/cp_para_ru darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree nlpcoreteam/enMMLU nlpcoreteam/ruMMLU russiannlp/rucola_custom
273
+ 0.594 0.241 0.396 0.280 0.660 0.465 0.544 0.144 0.968 0.908 1.000 0.990 0.505 0.693 0.351 0.846 0.673 0.536 0.485
llmtf_eval_k0/evaluation_results.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ mean daru/treewayabstractive daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_para_en darumeru/cp_para_ru darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree nlpcoreteam/enMMLU nlpcoreteam/ruMMLU russiannlp/rucola_custom
2
+ 0.594 0.241 0.396 0.280 0.660 0.465 0.544 0.144 0.968 0.908 1.000 0.990 0.505 0.693 0.351 0.846 0.673 0.536 0.485
llmtf_eval_k0/nlpcoreteam_enMMLU.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a3488fe16dd4c6cc5496a219929273e2746e659cceba1baa45750cf002c05f5
3
+ size 38085342
llmtf_eval_k0/nlpcoreteam_enMMLU_params.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 64,
14
+ "pad_token_id": 128001,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2",
20
+ "trust_remote_code": [
21
+ false
22
+ ]
23
+ },
24
+ "conversation_template": {
25
+ "system_prompt": "",
26
+ "system_message_template": "",
27
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
28
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
30
+ "user_role": "user",
31
+ "bot_role": "assistant",
32
+ "system_role": "system",
33
+ "global_prefix": "<|begin_of_text|>",
34
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
35
+ "add_special_tokens": false,
36
+ "eos_token": "<|eot_id|>"
37
+ },
38
+ "load_in_8bit": false,
39
+ "torch_dtype": "auto",
40
+ "use_flash_attention_2": true,
41
+ "device_map": "cuda:0",
42
+ "use_fast_tokenizer": true,
43
+ "leading_space": false,
44
+ "space_token": null,
45
+ "trust_remote_code": [
46
+ false
47
+ ],
48
+ "max_model_len": 8192
49
+ },
50
+ "task_params": {
51
+ "max_len": 4000,
52
+ "few_shot_count": 0,
53
+ "batch_size": 1,
54
+ "max_sample_per_dataset": 10000000000000,
55
+ "method": "calculate_tokens_proba"
56
+ }
57
+ }