RefalMachine commited on Jul 13, 2024

Commit

b43ce9f

verified ·

1 Parent(s): 9e865a9

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +27 -0
llmtf_eval_k0_bs1/daru_treewayabstractive.jsonl +0 -0
llmtf_eval_k0_bs1/daru_treewayabstractive_params.jsonl +54 -0
llmtf_eval_k0_bs1/daru_treewayabstractive_total.jsonl +8 -0
llmtf_eval_k0_bs1/daru_treewayextractive.jsonl +3 -0
llmtf_eval_k0_bs1/daru_treewayextractive_params.jsonl +54 -0
llmtf_eval_k0_bs1/daru_treewayextractive_total.jsonl +7 -0
llmtf_eval_k0_bs1/darumeru_MultiQ.jsonl +0 -0
llmtf_eval_k0_bs1/darumeru_MultiQ_params.jsonl +54 -0
llmtf_eval_k0_bs1/darumeru_MultiQ_total.jsonl +8 -0
llmtf_eval_k0_bs1/darumeru_PARus.jsonl +0 -0
llmtf_eval_k0_bs1/darumeru_PARus_params.jsonl +54 -0
llmtf_eval_k0_bs1/darumeru_PARus_total.jsonl +7 -0
llmtf_eval_k0_bs1/darumeru_RCB.jsonl +0 -0
llmtf_eval_k0_bs1/darumeru_RCB_params.jsonl +54 -0
llmtf_eval_k0_bs1/darumeru_RCB_total.jsonl +8 -0
llmtf_eval_k0_bs1/darumeru_RWSD.jsonl +0 -0
llmtf_eval_k0_bs1/darumeru_RWSD_params.jsonl +54 -0
llmtf_eval_k0_bs1/darumeru_RWSD_total.jsonl +7 -0
llmtf_eval_k0_bs1/darumeru_USE.jsonl +0 -0
llmtf_eval_k0_bs1/darumeru_USE_params.jsonl +54 -0
llmtf_eval_k0_bs1/darumeru_USE_total.jsonl +7 -0
llmtf_eval_k0_bs1/darumeru_cp_para_en.jsonl +0 -0
llmtf_eval_k0_bs1/darumeru_cp_para_en_params.jsonl +54 -0
llmtf_eval_k0_bs1/darumeru_cp_para_en_total.jsonl +9 -0
llmtf_eval_k0_bs1/darumeru_cp_para_ru.jsonl +0 -0
llmtf_eval_k0_bs1/darumeru_cp_para_ru_params.jsonl +54 -0
llmtf_eval_k0_bs1/darumeru_cp_para_ru_total.jsonl +9 -0
llmtf_eval_k0_bs1/darumeru_cp_sent_en.jsonl +0 -0
llmtf_eval_k0_bs1/darumeru_cp_sent_en_params.jsonl +54 -0
llmtf_eval_k0_bs1/darumeru_cp_sent_en_total.jsonl +9 -0
llmtf_eval_k0_bs1/darumeru_cp_sent_ru.jsonl +0 -0
llmtf_eval_k0_bs1/darumeru_cp_sent_ru_params.jsonl +54 -0
llmtf_eval_k0_bs1/darumeru_cp_sent_ru_total.jsonl +9 -0
llmtf_eval_k0_bs1/darumeru_ruMMLU.jsonl +3 -0
llmtf_eval_k0_bs1/darumeru_ruMMLU_params.jsonl +54 -0
llmtf_eval_k0_bs1/darumeru_ruMMLU_total.jsonl +7 -0
llmtf_eval_k0_bs1/darumeru_ruOpenBookQA.jsonl +0 -0
llmtf_eval_k0_bs1/darumeru_ruOpenBookQA_params.jsonl +54 -0
llmtf_eval_k0_bs1/darumeru_ruOpenBookQA_total.jsonl +8 -0
llmtf_eval_k0_bs1/darumeru_ruTiE.jsonl +3 -0
llmtf_eval_k0_bs1/darumeru_ruTiE_params.jsonl +54 -0
llmtf_eval_k0_bs1/darumeru_ruTiE_total.jsonl +7 -0
llmtf_eval_k0_bs1/darumeru_ruWorldTree.jsonl +0 -0
llmtf_eval_k0_bs1/darumeru_ruWorldTree_params.jsonl +54 -0
llmtf_eval_k0_bs1/darumeru_ruWorldTree_total.jsonl +8 -0
llmtf_eval_k0_bs1/evaluation_log.txt +273 -0
llmtf_eval_k0_bs1/evaluation_results.txt +2 -0
llmtf_eval_k0_bs1/nlpcoreteam_enMMLU.jsonl +3 -0
llmtf_eval_k0_bs1/nlpcoreteam_enMMLU_params.jsonl +54 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,30 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k0_bs1/daru_treewayextractive.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k0_bs1/darumeru_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k0_bs1/darumeru_ruTiE.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k0_bs1/nlpcoreteam_enMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k0_bs1/nlpcoreteam_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k0_bs8/daru_treewayextractive.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k0_bs8/darumeru_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k0_bs8/darumeru_ruTiE.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k0_bs8/nlpcoreteam_enMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k0_bs8/nlpcoreteam_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k5_bs1/daru_treewayextractive.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k5_bs1/darumeru_MultiQ.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k5_bs1/darumeru_USE.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k5_bs1/darumeru_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k5_bs1/darumeru_ruTiE.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k5_bs1/nlpcoreteam_enMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k5_bs1/nlpcoreteam_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k5_bs1/russiannlp_rucola_custom.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k5_bs8/daru_treewayabstractive.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k5_bs8/daru_treewayextractive.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k5_bs8/darumeru_MultiQ.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k5_bs8/darumeru_USE.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k5_bs8/darumeru_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k5_bs8/darumeru_ruTiE.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k5_bs8/nlpcoreteam_enMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k5_bs8/nlpcoreteam_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k5_bs8/russiannlp_rucola_custom.jsonl filter=lfs diff=lfs merge=lfs -text

llmtf_eval_k0_bs1/daru_treewayabstractive.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

llmtf_eval_k0_bs1/daru_treewayabstractive_params.jsonl ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "openchat/openchat-3.5-0106",
+        "generation_config": {
+            "_from_model_config": true,
+            "bos_token_id": 1,
+            "do_sample": true,
+            "eos_token_id": [
+                32000
+            ],
+            "max_length": 8192,
+            "max_new_tokens": 512,
+            "pad_token_id": 32000,
+            "stop_strings": [],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.38.2"
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "{role} {content}<|end_of_turn|>",
+            "user_message_template": "{role} {content}<|end_of_turn|>",
+            "bot_message_template": "{role} {content}<|end_of_turn|>",
+            "bot_message_template_incomplete": "{role} {content}",
+            "user_role": "GPT4 Correct User:",
+            "bot_role": "GPT4 Correct Assistant:",
+            "system_role": "GPT4 Correct System:",
+            "global_prefix": "<s>",
+            "suffix": "GPT4 Correct Assistant:",
+            "add_special_tokens": false,
+            "eos_token": "<|end_of_turn|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": true,
+        "space_token": 28705,
+        "trust_remote_code": [
+            false
+        ],
+        "max_model_len": 8192
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 1,
+        "max_sample_per_dataset": 500,
+        "method": "generate"
+    }
+}

llmtf_eval_k0_bs1/daru_treewayabstractive_total.jsonl ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "task_name": "daru/treewayabstractive",
+    "results": {
+        "rouge1": 0.3460301339685849,
+        "rouge2": 0.12444606939274086
+    },
+    "leaderboard_result": 0.2352381016806629
+}

llmtf_eval_k0_bs1/daru_treewayextractive.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9d1091c4230c532245eb1b9f24df3f5f2b88ffefcb423d107bd743067beb0625
+size 310432604

llmtf_eval_k0_bs1/daru_treewayextractive_params.jsonl ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "openchat/openchat-3.5-0106",
+        "generation_config": {
+            "_from_model_config": true,
+            "bos_token_id": 1,
+            "do_sample": true,
+            "eos_token_id": [
+                32000
+            ],
+            "max_length": 8192,
+            "max_new_tokens": 1,
+            "pad_token_id": 32000,
+            "stop_strings": [],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.38.2"
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "{role} {content}<|end_of_turn|>",
+            "user_message_template": "{role} {content}<|end_of_turn|>",
+            "bot_message_template": "{role} {content}<|end_of_turn|>",
+            "bot_message_template_incomplete": "{role} {content}",
+            "user_role": "GPT4 Correct User:",
+            "bot_role": "GPT4 Correct Assistant:",
+            "system_role": "GPT4 Correct System:",
+            "global_prefix": "<s>",
+            "suffix": "GPT4 Correct Assistant:",
+            "add_special_tokens": false,
+            "eos_token": "<|end_of_turn|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": true,
+        "space_token": 28705,
+        "trust_remote_code": [
+            false
+        ],
+        "max_model_len": 8192
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 1,
+        "max_sample_per_dataset": 10000000000000,
+        "method": "calculate_logsoftmax"
+    }
+}

llmtf_eval_k0_bs1/daru_treewayextractive_total.jsonl ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "task_name": "daru/treewayextractive",
+    "results": {
+        "r-prec": 0.4038567821067821
+    },
+    "leaderboard_result": 0.4038567821067821
+}

llmtf_eval_k0_bs1/darumeru_MultiQ.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

llmtf_eval_k0_bs1/darumeru_MultiQ_params.jsonl ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "openchat/openchat-3.5-0106",
+        "generation_config": {
+            "_from_model_config": true,
+            "bos_token_id": 1,
+            "do_sample": true,
+            "eos_token_id": [
+                32000
+            ],
+            "max_length": 8192,
+            "max_new_tokens": 64,
+            "pad_token_id": 32000,
+            "stop_strings": [],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.38.2"
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "{role} {content}<|end_of_turn|>",
+            "user_message_template": "{role} {content}<|end_of_turn|>",
+            "bot_message_template": "{role} {content}<|end_of_turn|>",
+            "bot_message_template_incomplete": "{role} {content}",
+            "user_role": "GPT4 Correct User:",
+            "bot_role": "GPT4 Correct Assistant:",
+            "system_role": "GPT4 Correct System:",
+            "global_prefix": "<s>",
+            "suffix": "GPT4 Correct Assistant:",
+            "add_special_tokens": false,
+            "eos_token": "<|end_of_turn|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": true,
+        "space_token": 28705,
+        "trust_remote_code": [
+            false
+        ],
+        "max_model_len": 8192
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 1,
+        "max_sample_per_dataset": 10000000000000,
+        "method": "generate"
+    }
+}

llmtf_eval_k0_bs1/darumeru_MultiQ_total.jsonl ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "task_name": "darumeru/MultiQ",
+    "results": {
+        "f1": 0.28742819007452886,
+        "em": 0.16443594646271512
+    },
+    "leaderboard_result": 0.225932068268622
+}

llmtf_eval_k0_bs1/darumeru_PARus.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

llmtf_eval_k0_bs1/darumeru_PARus_params.jsonl ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "openchat/openchat-3.5-0106",
+        "generation_config": {
+            "_from_model_config": true,
+            "bos_token_id": 1,
+            "do_sample": true,
+            "eos_token_id": [
+                32000
+            ],
+            "max_length": 8192,
+            "max_new_tokens": 64,
+            "pad_token_id": 32000,
+            "stop_strings": [],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.38.2"
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "{role} {content}<|end_of_turn|>",
+            "user_message_template": "{role} {content}<|end_of_turn|>",
+            "bot_message_template": "{role} {content}<|end_of_turn|>",
+            "bot_message_template_incomplete": "{role} {content}",
+            "user_role": "GPT4 Correct User:",
+            "bot_role": "GPT4 Correct Assistant:",
+            "system_role": "GPT4 Correct System:",
+            "global_prefix": "<s>",
+            "suffix": "GPT4 Correct Assistant:",
+            "add_special_tokens": false,
+            "eos_token": "<|end_of_turn|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": true,
+        "space_token": 28705,
+        "trust_remote_code": [
+            false
+        ],
+        "max_model_len": 8192
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 1,
+        "max_sample_per_dataset": 10000000000000,
+        "method": "calculate_tokens_proba"
+    }
+}

llmtf_eval_k0_bs1/darumeru_PARus_total.jsonl ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "task_name": "darumeru/PARus",
+    "results": {
+        "acc": 0.74
+    },
+    "leaderboard_result": 0.74
+}

llmtf_eval_k0_bs1/darumeru_RCB.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

llmtf_eval_k0_bs1/darumeru_RCB_params.jsonl ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "openchat/openchat-3.5-0106",
+        "generation_config": {
+            "_from_model_config": true,
+            "bos_token_id": 1,
+            "do_sample": true,
+            "eos_token_id": [
+                32000
+            ],
+            "max_length": 8192,
+            "max_new_tokens": 64,
+            "pad_token_id": 32000,
+            "stop_strings": [],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.38.2"
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "{role} {content}<|end_of_turn|>",
+            "user_message_template": "{role} {content}<|end_of_turn|>",
+            "bot_message_template": "{role} {content}<|end_of_turn|>",
+            "bot_message_template_incomplete": "{role} {content}",
+            "user_role": "GPT4 Correct User:",
+            "bot_role": "GPT4 Correct Assistant:",
+            "system_role": "GPT4 Correct System:",
+            "global_prefix": "<s>",
+            "suffix": "GPT4 Correct Assistant:",
+            "add_special_tokens": false,
+            "eos_token": "<|end_of_turn|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": true,
+        "space_token": 28705,
+        "trust_remote_code": [
+            false
+        ],
+        "max_model_len": 8192
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 1,
+        "max_sample_per_dataset": 10000000000000,
+        "method": "calculate_tokens_proba"
+    }
+}

llmtf_eval_k0_bs1/darumeru_RCB_total.jsonl ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "task_name": "darumeru/RCB",
+    "results": {
+        "acc": 0.5227272727272727,
+        "f1_macro": 0.4428418803418803
+    },
+    "leaderboard_result": 0.4827845765345765
+}

llmtf_eval_k0_bs1/darumeru_RWSD.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

llmtf_eval_k0_bs1/darumeru_RWSD_params.jsonl ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "openchat/openchat-3.5-0106",
+        "generation_config": {
+            "_from_model_config": true,
+            "bos_token_id": 1,
+            "do_sample": true,
+            "eos_token_id": [
+                32000
+            ],
+            "max_length": 8192,
+            "max_new_tokens": 64,
+            "pad_token_id": 32000,
+            "stop_strings": [],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.38.2"
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "{role} {content}<|end_of_turn|>",
+            "user_message_template": "{role} {content}<|end_of_turn|>",
+            "bot_message_template": "{role} {content}<|end_of_turn|>",
+            "bot_message_template_incomplete": "{role} {content}",
+            "user_role": "GPT4 Correct User:",
+            "bot_role": "GPT4 Correct Assistant:",
+            "system_role": "GPT4 Correct System:",
+            "global_prefix": "<s>",
+            "suffix": "GPT4 Correct Assistant:",
+            "add_special_tokens": false,
+            "eos_token": "<|end_of_turn|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": true,
+        "space_token": 28705,
+        "trust_remote_code": [
+            false
+        ],
+        "max_model_len": 8192
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 1,
+        "max_sample_per_dataset": 10000000000000,
+        "method": "calculate_tokens_proba"
+    }
+}

llmtf_eval_k0_bs1/darumeru_RWSD_total.jsonl ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "task_name": "darumeru/RWSD",
+    "results": {
+        "acc": 0.5441176470588235
+    },
+    "leaderboard_result": 0.5441176470588235
+}

llmtf_eval_k0_bs1/darumeru_USE.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

llmtf_eval_k0_bs1/darumeru_USE_params.jsonl ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "openchat/openchat-3.5-0106",
+        "generation_config": {
+            "_from_model_config": true,
+            "bos_token_id": 1,
+            "do_sample": true,
+            "eos_token_id": [
+                32000
+            ],
+            "max_length": 8192,
+            "max_new_tokens": 64,
+            "pad_token_id": 32000,
+            "stop_strings": [],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.38.2"
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "{role} {content}<|end_of_turn|>",
+            "user_message_template": "{role} {content}<|end_of_turn|>",
+            "bot_message_template": "{role} {content}<|end_of_turn|>",
+            "bot_message_template_incomplete": "{role} {content}",
+            "user_role": "GPT4 Correct User:",
+            "bot_role": "GPT4 Correct Assistant:",
+            "system_role": "GPT4 Correct System:",
+            "global_prefix": "<s>",
+            "suffix": "GPT4 Correct Assistant:",
+            "add_special_tokens": false,
+            "eos_token": "<|end_of_turn|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": true,
+        "space_token": 28705,
+        "trust_remote_code": [
+            false
+        ],
+        "max_model_len": 8192
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 1,
+        "max_sample_per_dataset": 10000000000000,
+        "method": "generate"
+    }
+}

llmtf_eval_k0_bs1/darumeru_USE_total.jsonl ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "task_name": "darumeru/USE",
+    "results": {
+        "grade_norm": 0.06372549019607841
+    },
+    "leaderboard_result": 0.06372549019607841
+}

llmtf_eval_k0_bs1/darumeru_cp_para_en.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

llmtf_eval_k0_bs1/darumeru_cp_para_en_params.jsonl ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "openchat/openchat-3.5-0106",
+        "generation_config": {
+            "_from_model_config": true,
+            "bos_token_id": 1,
+            "do_sample": true,
+            "eos_token_id": [
+                32000
+            ],
+            "max_length": 8192,
+            "max_new_tokens": 1024,
+            "pad_token_id": 32000,
+            "stop_strings": [],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.38.2"
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "{role} {content}<|end_of_turn|>",
+            "user_message_template": "{role} {content}<|end_of_turn|>",
+            "bot_message_template": "{role} {content}<|end_of_turn|>",
+            "bot_message_template_incomplete": "{role} {content}",
+            "user_role": "GPT4 Correct User:",
+            "bot_role": "GPT4 Correct Assistant:",
+            "system_role": "GPT4 Correct System:",
+            "global_prefix": "<s>",
+            "suffix": "GPT4 Correct Assistant:",
+            "add_special_tokens": false,
+            "eos_token": "<|end_of_turn|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": true,
+        "space_token": 28705,
+        "trust_remote_code": [
+            false
+        ],
+        "max_model_len": 8192
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 1,
+        "max_sample_per_dataset": 10000000000000,
+        "method": "generate"
+    }
+}

llmtf_eval_k0_bs1/darumeru_cp_para_en_total.jsonl ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "task_name": "darumeru/cp_para_en",
+    "results": {
+        "symbol_per_token": 3.9643938387294457,
+        "len": 0.9964159562157313,
+        "lcs": 0.8708087545759775
+    },
+    "leaderboard_result": 0.8708087545759775
+}

llmtf_eval_k0_bs1/darumeru_cp_para_ru.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

llmtf_eval_k0_bs1/darumeru_cp_para_ru_params.jsonl ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "openchat/openchat-3.5-0106",
+        "generation_config": {
+            "_from_model_config": true,
+            "bos_token_id": 1,
+            "do_sample": true,
+            "eos_token_id": [
+                32000
+            ],
+            "max_length": 8192,
+            "max_new_tokens": 1024,
+            "pad_token_id": 32000,
+            "stop_strings": [],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.38.2"
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "{role} {content}<|end_of_turn|>",
+            "user_message_template": "{role} {content}<|end_of_turn|>",
+            "bot_message_template": "{role} {content}<|end_of_turn|>",
+            "bot_message_template_incomplete": "{role} {content}",
+            "user_role": "GPT4 Correct User:",
+            "bot_role": "GPT4 Correct Assistant:",
+            "system_role": "GPT4 Correct System:",
+            "global_prefix": "<s>",
+            "suffix": "GPT4 Correct Assistant:",
+            "add_special_tokens": false,
+            "eos_token": "<|end_of_turn|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": true,
+        "space_token": 28705,
+        "trust_remote_code": [
+            false
+        ],
+        "max_model_len": 8192
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 1,
+        "max_sample_per_dataset": 10000000000000,
+        "method": "generate"
+    }
+}

llmtf_eval_k0_bs1/darumeru_cp_para_ru_total.jsonl ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "task_name": "darumeru/cp_para_ru",
+    "results": {
+        "symbol_per_token": 2.469447282753646,
+        "len": 0.992235195524327,
+        "lcs": 0.8233434585815769
+    },
+    "leaderboard_result": 0.8233434585815769
+}

llmtf_eval_k0_bs1/darumeru_cp_sent_en.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

llmtf_eval_k0_bs1/darumeru_cp_sent_en_params.jsonl ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "openchat/openchat-3.5-0106",
+        "generation_config": {
+            "_from_model_config": true,
+            "bos_token_id": 1,
+            "do_sample": true,
+            "eos_token_id": [
+                32000
+            ],
+            "max_length": 8192,
+            "max_new_tokens": 128,
+            "pad_token_id": 32000,
+            "stop_strings": [],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.38.2"
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "{role} {content}<|end_of_turn|>",
+            "user_message_template": "{role} {content}<|end_of_turn|>",
+            "bot_message_template": "{role} {content}<|end_of_turn|>",
+            "bot_message_template_incomplete": "{role} {content}",
+            "user_role": "GPT4 Correct User:",
+            "bot_role": "GPT4 Correct Assistant:",
+            "system_role": "GPT4 Correct System:",
+            "global_prefix": "<s>",
+            "suffix": "GPT4 Correct Assistant:",
+            "add_special_tokens": false,
+            "eos_token": "<|end_of_turn|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": true,
+        "space_token": 28705,
+        "trust_remote_code": [
+            false
+        ],
+        "max_model_len": 8192
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 1,
+        "max_sample_per_dataset": 10000000000000,
+        "method": "generate"
+    }
+}

llmtf_eval_k0_bs1/darumeru_cp_sent_en_total.jsonl ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "task_name": "darumeru/cp_sent_en",
+    "results": {
+        "symbol_per_token": 3.896272252144742,
+        "len": 0.997653558022427,
+        "lcs": 0.9800192820829826
+    },
+    "leaderboard_result": 0.997653558022427
+}

llmtf_eval_k0_bs1/darumeru_cp_sent_ru.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

llmtf_eval_k0_bs1/darumeru_cp_sent_ru_params.jsonl ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "openchat/openchat-3.5-0106",
+        "generation_config": {
+            "_from_model_config": true,
+            "bos_token_id": 1,
+            "do_sample": true,
+            "eos_token_id": [
+                32000
+            ],
+            "max_length": 8192,
+            "max_new_tokens": 128,
+            "pad_token_id": 32000,
+            "stop_strings": [],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.38.2"
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "{role} {content}<|end_of_turn|>",
+            "user_message_template": "{role} {content}<|end_of_turn|>",
+            "bot_message_template": "{role} {content}<|end_of_turn|>",
+            "bot_message_template_incomplete": "{role} {content}",
+            "user_role": "GPT4 Correct User:",
+            "bot_role": "GPT4 Correct Assistant:",
+            "system_role": "GPT4 Correct System:",
+            "global_prefix": "<s>",
+            "suffix": "GPT4 Correct Assistant:",
+            "add_special_tokens": false,
+            "eos_token": "<|end_of_turn|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": true,
+        "space_token": 28705,
+        "trust_remote_code": [
+            false
+        ],
+        "max_model_len": 8192
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 1,
+        "max_sample_per_dataset": 10000000000000,
+        "method": "generate"
+    }
+}

llmtf_eval_k0_bs1/darumeru_cp_sent_ru_total.jsonl ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "task_name": "darumeru/cp_sent_ru",
+    "results": {
+        "symbol_per_token": 2.3737308986098866,
+        "len": 0.9919888130657724,
+        "lcs": 0.9166449736336507
+    },
+    "leaderboard_result": 0.9919888130657724
+}

llmtf_eval_k0_bs1/darumeru_ruMMLU.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:98b247c41df8c1dd7ddba6000677e4feb8ec79d8406774a2efcdacf08a04c822
+size 32286605

llmtf_eval_k0_bs1/darumeru_ruMMLU_params.jsonl ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "openchat/openchat-3.5-0106",
+        "generation_config": {
+            "_from_model_config": true,
+            "bos_token_id": 1,
+            "do_sample": true,
+            "eos_token_id": [
+                32000
+            ],
+            "max_length": 8192,
+            "max_new_tokens": 64,
+            "pad_token_id": 32000,
+            "stop_strings": [],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.38.2"
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "{role} {content}<|end_of_turn|>",
+            "user_message_template": "{role} {content}<|end_of_turn|>",
+            "bot_message_template": "{role} {content}<|end_of_turn|>",
+            "bot_message_template_incomplete": "{role} {content}",
+            "user_role": "GPT4 Correct User:",
+            "bot_role": "GPT4 Correct Assistant:",
+            "system_role": "GPT4 Correct System:",
+            "global_prefix": "<s>",
+            "suffix": "GPT4 Correct Assistant:",
+            "add_special_tokens": false,
+            "eos_token": "<|end_of_turn|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": true,
+        "space_token": 28705,
+        "trust_remote_code": [
+            false
+        ],
+        "max_model_len": 8192
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 1,
+        "max_sample_per_dataset": 10000000000000,
+        "method": "calculate_tokens_proba"
+    }
+}

llmtf_eval_k0_bs1/darumeru_ruMMLU_total.jsonl ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "task_name": "darumeru/ruMMLU",
+    "results": {
+        "acc": 0.4817918786790382
+    },
+    "leaderboard_result": 0.4817918786790382
+}

llmtf_eval_k0_bs1/darumeru_ruOpenBookQA.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

llmtf_eval_k0_bs1/darumeru_ruOpenBookQA_params.jsonl ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "openchat/openchat-3.5-0106",
+        "generation_config": {
+            "_from_model_config": true,
+            "bos_token_id": 1,
+            "do_sample": true,
+            "eos_token_id": [
+                32000
+            ],
+            "max_length": 8192,
+            "max_new_tokens": 64,
+            "pad_token_id": 32000,
+            "stop_strings": [],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.38.2"
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "{role} {content}<|end_of_turn|>",
+            "user_message_template": "{role} {content}<|end_of_turn|>",
+            "bot_message_template": "{role} {content}<|end_of_turn|>",
+            "bot_message_template_incomplete": "{role} {content}",
+            "user_role": "GPT4 Correct User:",
+            "bot_role": "GPT4 Correct Assistant:",
+            "system_role": "GPT4 Correct System:",
+            "global_prefix": "<s>",
+            "suffix": "GPT4 Correct Assistant:",
+            "add_special_tokens": false,
+            "eos_token": "<|end_of_turn|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": true,
+        "space_token": 28705,
+        "trust_remote_code": [
+            false
+        ],
+        "max_model_len": 8192
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 1,
+        "max_sample_per_dataset": 10000000000000,
+        "method": "calculate_tokens_proba"
+    }
+}

llmtf_eval_k0_bs1/darumeru_ruOpenBookQA_total.jsonl ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "task_name": "darumeru/ruOpenBookQA",
+    "results": {
+        "acc": 0.7319587628865979,
+        "f1_macro": 0.7326117311318767
+    },
+    "leaderboard_result": 0.7322852470092374
+}

llmtf_eval_k0_bs1/darumeru_ruTiE.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b5d30008f89afb25c815e3a9b99cf292d0e06f8034bb79329d5516c45f3ecdde
+size 11168288

llmtf_eval_k0_bs1/darumeru_ruTiE_params.jsonl ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "openchat/openchat-3.5-0106",
+        "generation_config": {
+            "_from_model_config": true,
+            "bos_token_id": 1,
+            "do_sample": true,
+            "eos_token_id": [
+                32000
+            ],
+            "max_length": 8192,
+            "max_new_tokens": 64,
+            "pad_token_id": 32000,
+            "stop_strings": [],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.38.2"
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "{role} {content}<|end_of_turn|>",
+            "user_message_template": "{role} {content}<|end_of_turn|>",
+            "bot_message_template": "{role} {content}<|end_of_turn|>",
+            "bot_message_template_incomplete": "{role} {content}",
+            "user_role": "GPT4 Correct User:",
+            "bot_role": "GPT4 Correct Assistant:",
+            "system_role": "GPT4 Correct System:",
+            "global_prefix": "<s>",
+            "suffix": "GPT4 Correct Assistant:",
+            "add_special_tokens": false,
+            "eos_token": "<|end_of_turn|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": true,
+        "space_token": 28705,
+        "trust_remote_code": [
+            false
+        ],
+        "max_model_len": 8192
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 1,
+        "max_sample_per_dataset": 10000000000000,
+        "method": "calculate_tokens_proba"
+    }
+}

llmtf_eval_k0_bs1/darumeru_ruTiE_total.jsonl ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "task_name": "darumeru/ruTiE",
+    "results": {
+        "acc": 0.5395348837209303
+    },
+    "leaderboard_result": 0.5395348837209303
+}

llmtf_eval_k0_bs1/darumeru_ruWorldTree.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

llmtf_eval_k0_bs1/darumeru_ruWorldTree_params.jsonl ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "openchat/openchat-3.5-0106",
+        "generation_config": {
+            "_from_model_config": true,
+            "bos_token_id": 1,
+            "do_sample": true,
+            "eos_token_id": [
+                32000
+            ],
+            "max_length": 8192,
+            "max_new_tokens": 64,
+            "pad_token_id": 32000,
+            "stop_strings": [],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.38.2"
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "{role} {content}<|end_of_turn|>",
+            "user_message_template": "{role} {content}<|end_of_turn|>",
+            "bot_message_template": "{role} {content}<|end_of_turn|>",
+            "bot_message_template_incomplete": "{role} {content}",
+            "user_role": "GPT4 Correct User:",
+            "bot_role": "GPT4 Correct Assistant:",
+            "system_role": "GPT4 Correct System:",
+            "global_prefix": "<s>",
+            "suffix": "GPT4 Correct Assistant:",
+            "add_special_tokens": false,
+            "eos_token": "<|end_of_turn|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": true,
+        "space_token": 28705,
+        "trust_remote_code": [
+            false
+        ],
+        "max_model_len": 8192
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 1,
+        "max_sample_per_dataset": 10000000000000,
+        "method": "calculate_tokens_proba"
+    }
+}

llmtf_eval_k0_bs1/darumeru_ruWorldTree_total.jsonl ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "task_name": "darumeru/ruWorldTree",
+    "results": {
+        "acc": 0.8857142857142857,
+        "f1_macro": 0.8835438024831281
+    },
+    "leaderboard_result": 0.8846290440987069
+}

llmtf_eval_k0_bs1/evaluation_log.txt ADDED Viewed

	@@ -0,0 +1,273 @@

+INFO: 2024-07-13 15:45:27,047: llmtf.base.evaluator: Starting eval on ['darumeru/parus', 'darumeru/rcb', 'darumeru/ruopenbookqa', 'darumeru/rutie', 'darumeru/ruworldtree', 'darumeru/rwsd', 'russiannlp/rucola_custom']
+INFO: 2024-07-13 15:45:27,048: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [32000]
+INFO: 2024-07-13 15:45:27,048: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
+INFO: 2024-07-13 15:45:28,837: llmtf.base.evaluator: Starting eval on ['darumeru/rummlu', 'daru/treewayextractive']
+INFO: 2024-07-13 15:45:28,837: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [32000]
+INFO: 2024-07-13 15:45:28,837: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
+INFO: 2024-07-13 15:45:29,193: llmtf.base.darumeru/PARus: Loading Dataset: 2.14s
+INFO: 2024-07-13 15:45:30,501: llmtf.base.evaluator: Starting eval on ['nlpcoreteam/rummlu', 'nlpcoreteam/enmmlu']
+INFO: 2024-07-13 15:45:30,502: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [32000]
+INFO: 2024-07-13 15:45:30,502: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
+INFO: 2024-07-13 15:45:32,886: llmtf.base.evaluator: Starting eval on ['daru/treewayabstractive']
+INFO: 2024-07-13 15:45:32,887: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [32000]
+INFO: 2024-07-13 15:45:32,887: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
+INFO: 2024-07-13 15:45:34,324: llmtf.base.evaluator: Starting eval on ['darumeru/multiq', 'darumeru/use']
+INFO: 2024-07-13 15:45:34,324: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [32000]
+INFO: 2024-07-13 15:45:34,324: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
+INFO: 2024-07-13 15:45:35,898: llmtf.base.darumeru/ruMMLU: Loading Dataset: 7.06s
+INFO: 2024-07-13 15:45:36,667: llmtf.base.evaluator: Starting eval on ['darumeru/cp_sent_ru', 'darumeru/cp_para_ru']
+INFO: 2024-07-13 15:45:36,667: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [32000]
+INFO: 2024-07-13 15:45:36,667: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
+INFO: 2024-07-13 15:45:36,853: llmtf.base.daru/treewayabstractive: Loading Dataset: 3.97s
+INFO: 2024-07-13 15:45:37,334: llmtf.base.darumeru/MultiQ: Loading Dataset: 3.01s
+INFO: 2024-07-13 15:45:37,609: llmtf.base.darumeru/PARus: Processing Dataset: 8.41s
+INFO: 2024-07-13 15:45:37,610: llmtf.base.darumeru/PARus: Results for darumeru/PARus:
+INFO: 2024-07-13 15:45:37,623: llmtf.base.darumeru/PARus: {'acc': 0.74}
+INFO: 2024-07-13 15:45:37,624: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [32000]
+INFO: 2024-07-13 15:45:37,624: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
+INFO: 2024-07-13 15:45:39,248: llmtf.base.darumeru/cp_sent_ru: Loading Dataset: 2.58s
+INFO: 2024-07-13 15:45:39,373: llmtf.base.evaluator: Starting eval on ['darumeru/cp_sent_en', 'darumeru/cp_para_en']
+INFO: 2024-07-13 15:45:39,374: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [32000]
+INFO: 2024-07-13 15:45:39,374: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
+INFO: 2024-07-13 15:45:39,556: llmtf.base.darumeru/RCB: Loading Dataset: 1.93s
+INFO: 2024-07-13 15:45:41,961: llmtf.base.darumeru/cp_sent_en: Loading Dataset: 2.59s
+INFO: 2024-07-13 15:45:49,271: llmtf.base.darumeru/RCB: Processing Dataset: 9.71s
+INFO: 2024-07-13 15:45:49,273: llmtf.base.darumeru/RCB: Results for darumeru/RCB:
+INFO: 2024-07-13 15:45:49,294: llmtf.base.darumeru/RCB: {'acc': 0.5227272727272727, 'f1_macro': 0.4428418803418803}
+INFO: 2024-07-13 15:45:49,295: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [32000]
+INFO: 2024-07-13 15:45:49,295: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
+INFO: 2024-07-13 15:45:51,672: llmtf.base.darumeru/ruOpenBookQA: Loading Dataset: 2.38s
+INFO: 2024-07-13 15:47:20,116: llmtf.base.darumeru/ruOpenBookQA: Processing Dataset: 88.44s
+INFO: 2024-07-13 15:47:20,118: llmtf.base.darumeru/ruOpenBookQA: Results for darumeru/ruOpenBookQA:
+INFO: 2024-07-13 15:47:20,131: llmtf.base.darumeru/ruOpenBookQA: {'acc': 0.7319587628865979, 'f1_macro': 0.7326117311318767}
+INFO: 2024-07-13 15:47:20,138: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [32000]
+INFO: 2024-07-13 15:47:20,138: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
+INFO: 2024-07-13 15:47:24,376: llmtf.base.darumeru/ruTiE: Loading Dataset: 4.24s
+INFO: 2024-07-13 15:47:42,037: llmtf.base.nlpcoreteam/ruMMLU: Loading Dataset: 131.54s
+INFO: 2024-07-13 15:51:44,086: llmtf.base.darumeru/ruTiE: Processing Dataset: 259.70s
+INFO: 2024-07-13 15:51:44,089: llmtf.base.darumeru/ruTiE: Results for darumeru/ruTiE:
+INFO: 2024-07-13 15:51:44,118: llmtf.base.darumeru/ruTiE: {'acc': 0.5395348837209303}
+INFO: 2024-07-13 15:51:44,121: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [32000]
+INFO: 2024-07-13 15:51:44,121: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
+INFO: 2024-07-13 15:51:46,018: llmtf.base.darumeru/ruWorldTree: Loading Dataset: 1.90s
+INFO: 2024-07-13 15:51:50,258: llmtf.base.darumeru/ruWorldTree: Processing Dataset: 4.24s
+INFO: 2024-07-13 15:51:50,273: llmtf.base.darumeru/ruWorldTree: Results for darumeru/ruWorldTree:
+INFO: 2024-07-13 15:51:50,278: llmtf.base.darumeru/ruWorldTree: {'acc': 0.8857142857142857, 'f1_macro': 0.8835438024831281}
+INFO: 2024-07-13 15:51:50,279: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [32000]
+INFO: 2024-07-13 15:51:50,279: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
+INFO: 2024-07-13 15:51:52,244: llmtf.base.darumeru/RWSD: Loading Dataset: 1.96s
+INFO: 2024-07-13 15:52:01,494: llmtf.base.darumeru/RWSD: Processing Dataset: 9.25s
+INFO: 2024-07-13 15:52:01,496: llmtf.base.darumeru/RWSD: Results for darumeru/RWSD:
+INFO: 2024-07-13 15:52:01,500: llmtf.base.darumeru/RWSD: {'acc': 0.5441176470588235}
+INFO: 2024-07-13 15:52:01,501: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [32000]
+INFO: 2024-07-13 15:52:01,501: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
+INFO: 2024-07-13 15:52:06,070: llmtf.base.russiannlp/rucola_custom: Loading Dataset: 4.57s
+INFO: 2024-07-13 15:53:22,251: llmtf.base.darumeru/ruMMLU: Processing Dataset: 466.35s
+INFO: 2024-07-13 15:53:22,255: llmtf.base.darumeru/ruMMLU: Results for darumeru/ruMMLU:
+INFO: 2024-07-13 15:53:22,280: llmtf.base.darumeru/ruMMLU: {'acc': 0.4817918786790382}
+INFO: 2024-07-13 15:53:22,326: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [32000]
+INFO: 2024-07-13 15:53:22,327: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
+INFO: 2024-07-13 15:53:32,742: llmtf.base.daru/treewayextractive: Loading Dataset: 10.41s
+INFO: 2024-07-13 15:54:03,119: llmtf.base.russiannlp/rucola_custom: Processing Dataset: 117.05s
+INFO: 2024-07-13 15:54:03,124: llmtf.base.russiannlp/rucola_custom: Results for russiannlp/rucola_custom:
+INFO: 2024-07-13 15:54:03,135: llmtf.base.russiannlp/rucola_custom: {'acc': 0.7369931826336562, 'mcc': 0.3418673882542275}
+INFO: 2024-07-13 15:54:03,139: llmtf.base.evaluator: Ended eval
+INFO: 2024-07-13 15:54:03,151: llmtf.base.evaluator:
+mean	darumeru/PARus	darumeru/RCB	darumeru/RWSD	darumeru/ruMMLU	darumeru/ruOpenBookQA	darumeru/ruTiE	darumeru/ruWorldTree	russiannlp/rucola_custom
+0.618	0.740	0.483	0.544	0.482	0.732	0.540	0.885	0.539
+INFO: 2024-07-13 15:54:27,202: llmtf.base.darumeru/cp_sent_en: Processing Dataset: 525.24s
+INFO: 2024-07-13 15:54:27,204: llmtf.base.darumeru/cp_sent_en: Results for darumeru/cp_sent_en:
+INFO: 2024-07-13 15:54:27,209: llmtf.base.darumeru/cp_sent_en: {'symbol_per_token': 3.896272252144742, 'len': 0.997653558022427, 'lcs': 0.9800192820829826}
+INFO: 2024-07-13 15:54:27,210: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [32000]
+INFO: 2024-07-13 15:54:27,210: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
+INFO: 2024-07-13 15:54:29,520: llmtf.base.darumeru/cp_para_en: Loading Dataset: 2.31s
+INFO: 2024-07-13 15:56:48,862: llmtf.base.darumeru/cp_sent_ru: Processing Dataset: 669.61s
+INFO: 2024-07-13 15:56:48,865: llmtf.base.darumeru/cp_sent_ru: Results for darumeru/cp_sent_ru:
+INFO: 2024-07-13 15:56:48,869: llmtf.base.darumeru/cp_sent_ru: {'symbol_per_token': 2.3737308986098866, 'len': 0.9919888130657724, 'lcs': 0.9166449736336507}
+INFO: 2024-07-13 15:56:48,870: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [32000]
+INFO: 2024-07-13 15:56:48,870: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
+INFO: 2024-07-13 15:56:51,203: llmtf.base.darumeru/cp_para_ru: Loading Dataset: 2.33s
+INFO: 2024-07-13 15:58:01,336: llmtf.base.nlpcoreteam/ruMMLU: Processing Dataset: 619.30s
+INFO: 2024-07-13 15:58:01,338: llmtf.base.nlpcoreteam/ruMMLU: Results for nlpcoreteam/ruMMLU:
+INFO: 2024-07-13 15:58:01,377: llmtf.base.nlpcoreteam/ruMMLU:                                        metric
+subject
+abstract_algebra                     0.360000
+anatomy                              0.414815
+astronomy                            0.565789
+business_ethics                      0.560000
+clinical_knowledge                   0.520755
+college_biology                      0.479167
+college_chemistry                    0.450000
+college_computer_science             0.500000
+college_mathematics                  0.380000
+college_medicine                     0.508671
+college_physics                      0.264706
+computer_security                    0.580000
+conceptual_physics                   0.438298
+econometrics                         0.368421
+electrical_engineering               0.475862
+elementary_mathematics               0.370370
+formal_logic                         0.325397
+global_facts                         0.280000
+high_school_biology                  0.596774
+high_school_chemistry                0.374384
+high_school_computer_science         0.600000
+high_school_european_history         0.678788
+high_school_geography                0.666667
+high_school_government_and_politics  0.580311
+high_school_macroeconomics           0.441026
+high_school_mathematics              0.359259
+high_school_microeconomics           0.478992
+high_school_physics                  0.390728
+high_school_psychology               0.623853
+high_school_statistics               0.458333
+high_school_us_history               0.686275
+high_school_world_history            0.713080
+human_aging                          0.511211
+human_sexuality                      0.564885
+international_law                    0.652893
+jurisprudence                        0.527778
+logical_fallacies                    0.417178
+machine_learning                     0.312500
+management                           0.631068
+marketing                            0.675214
+medical_genetics                     0.520000
+miscellaneous                        0.627075
+moral_disputes                       0.531792
+moral_scenarios                      0.233520
+nutrition                            0.562092
+philosophy                           0.485531
+prehistory                           0.487654
+professional_accounting              0.386525
+professional_law                     0.366362
+professional_medicine                0.481618
+professional_psychology              0.441176
+public_relations                     0.554545
+security_studies                     0.600000
+sociology                            0.696517
+us_foreign_policy                    0.740000
+virology                             0.481928
+world_religions                      0.684211
+INFO: 2024-07-13 15:58:01,385: llmtf.base.nlpcoreteam/ruMMLU:                                    metric
+subject
+STEM                             0.442010
+humanities                       0.522343
+other (business, health, misc.)  0.511498
+social sciences                  0.563033
+INFO: 2024-07-13 15:58:01,392: llmtf.base.nlpcoreteam/ruMMLU: {'acc': 0.5097207580093521}
+INFO: 2024-07-13 15:58:01,427: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [32000]
+INFO: 2024-07-13 15:58:01,428: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
+INFO: 2024-07-13 15:58:58,220: llmtf.base.darumeru/MultiQ: Processing Dataset: 800.88s
+INFO: 2024-07-13 15:58:58,223: llmtf.base.darumeru/MultiQ: Results for darumeru/MultiQ:
+INFO: 2024-07-13 15:58:58,228: llmtf.base.darumeru/MultiQ: {'f1': 0.28742819007452886, 'em': 0.16443594646271512}
+INFO: 2024-07-13 15:58:58,233: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [32000]
+INFO: 2024-07-13 15:58:58,233: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
+INFO: 2024-07-13 15:59:01,898: llmtf.base.darumeru/USE: Loading Dataset: 3.66s
+INFO: 2024-07-13 15:59:48,423: llmtf.base.nlpcoreteam/enMMLU: Loading Dataset: 106.99s
+INFO: 2024-07-13 16:02:00,095: llmtf.base.daru/treewayextractive: Processing Dataset: 507.35s
+INFO: 2024-07-13 16:02:00,098: llmtf.base.daru/treewayextractive: Results for daru/treewayextractive:
+INFO: 2024-07-13 16:02:00,340: llmtf.base.daru/treewayextractive: {'r-prec': 0.4038567821067821}
+INFO: 2024-07-13 16:02:00,402: llmtf.base.evaluator: Ended eval
+INFO: 2024-07-13 16:02:00,464: llmtf.base.evaluator:
+mean	daru/treewayextractive	darumeru/MultiQ	darumeru/PARus	darumeru/RCB	darumeru/RWSD	darumeru/cp_sent_en	darumeru/cp_sent_ru	darumeru/ruMMLU	darumeru/ruOpenBookQA	darumeru/ruTiE	darumeru/ruWorldTree	nlpcoreteam/ruMMLU	russiannlp/rucola_custom
+0.621	0.404	0.226	0.740	0.483	0.544	0.998	0.992	0.482	0.732	0.540	0.885	0.510	0.539
+INFO: 2024-07-13 16:05:44,985: llmtf.base.darumeru/cp_para_en: Processing Dataset: 675.46s
+INFO: 2024-07-13 16:05:45,001: llmtf.base.darumeru/cp_para_en: Results for darumeru/cp_para_en:
+INFO: 2024-07-13 16:05:45,005: llmtf.base.darumeru/cp_para_en: {'symbol_per_token': 3.9643938387294457, 'len': 0.9964159562157313, 'lcs': 0.8708087545759775}
+INFO: 2024-07-13 16:05:45,005: llmtf.base.evaluator: Ended eval
+INFO: 2024-07-13 16:05:45,019: llmtf.base.evaluator:
+mean	daru/treewayextractive	darumeru/MultiQ	darumeru/PARus	darumeru/RCB	darumeru/RWSD	darumeru/cp_para_en	darumeru/cp_sent_en	darumeru/cp_sent_ru	darumeru/ruMMLU	darumeru/ruOpenBookQA	darumeru/ruTiE	darumeru/ruWorldTree	nlpcoreteam/ruMMLU	russiannlp/rucola_custom
+0.639	0.404	0.226	0.740	0.483	0.544	0.871	0.998	0.992	0.482	0.732	0.540	0.885	0.510	0.539
+INFO: 2024-07-13 16:08:51,609: llmtf.base.nlpcoreteam/enMMLU: Processing Dataset: 543.18s
+INFO: 2024-07-13 16:08:51,613: llmtf.base.nlpcoreteam/enMMLU: Results for nlpcoreteam/enMMLU:
+INFO: 2024-07-13 16:08:51,652: llmtf.base.nlpcoreteam/enMMLU:                                        metric
+subject
+abstract_algebra                     0.330000
+anatomy                              0.629630
+astronomy                            0.657895
+business_ethics                      0.650000
+clinical_knowledge                   0.675472
+college_biology                      0.729167
+college_chemistry                    0.490000
+college_computer_science             0.540000
+college_mathematics                  0.380000
+college_medicine                     0.641618
+college_physics                      0.343137
+computer_security                    0.700000
+conceptual_physics                   0.561702
+econometrics                         0.456140
+electrical_engineering               0.600000
+elementary_mathematics               0.412698
+formal_logic                         0.468254
+global_facts                         0.310000
+high_school_biology                  0.787097
+high_school_chemistry                0.443350
+high_school_computer_science         0.670000
+high_school_european_history         0.763636
+high_school_geography                0.792929
+high_school_government_and_politics  0.880829
+high_school_macroeconomics           0.612821
+high_school_mathematics              0.337037
+high_school_microeconomics           0.642857
+high_school_physics                  0.344371
+high_school_psychology               0.823853
+high_school_statistics               0.472222
+high_school_us_history               0.794118
+high_school_world_history            0.818565
+human_aging                          0.726457
+human_sexuality                      0.740458
+international_law                    0.727273
+jurisprudence                        0.722222
+logical_fallacies                    0.760736
+machine_learning                     0.455357
+management                           0.776699
+marketing                            0.858974
+medical_genetics                     0.690000
+miscellaneous                        0.828863
+moral_disputes                       0.679191
+moral_scenarios                      0.234637
+nutrition                            0.715686
+philosophy                           0.655949
+prehistory                           0.672840
+professional_accounting              0.464539
+professional_law                     0.470013
+professional_medicine                0.705882
+professional_psychology              0.617647
+public_relations                     0.636364
+security_studies                     0.673469
+sociology                            0.855721
+us_foreign_policy                    0.850000
+virology                             0.524096
+world_religions                      0.859649
+INFO: 2024-07-13 16:08:51,659: llmtf.base.nlpcoreteam/enMMLU:                                    metric
+subject
+STEM                             0.514113
+humanities                       0.663622
+other (business, health, misc.)  0.656994
+social sciences                  0.715257
+INFO: 2024-07-13 16:08:51,667: llmtf.base.nlpcoreteam/enMMLU: {'acc': 0.6374965559822189}
+INFO: 2024-07-13 16:08:51,699: llmtf.base.evaluator: Ended eval
+INFO: 2024-07-13 16:08:51,725: llmtf.base.evaluator:
+mean	daru/treewayextractive	darumeru/MultiQ	darumeru/PARus	darumeru/RCB	darumeru/RWSD	darumeru/cp_para_en	darumeru/cp_sent_en	darumeru/cp_sent_ru	darumeru/ruMMLU	darumeru/ruOpenBookQA	darumeru/ruTiE	darumeru/ruWorldTree	nlpcoreteam/enMMLU	nlpcoreteam/ruMMLU	russiannlp/rucola_custom
+0.639	0.404	0.226	0.740	0.483	0.544	0.871	0.998	0.992	0.482	0.732	0.540	0.885	0.637	0.510	0.539
+INFO: 2024-07-13 16:09:46,454: llmtf.base.darumeru/USE: Processing Dataset: 644.55s
+INFO: 2024-07-13 16:09:46,455: llmtf.base.darumeru/USE: Results for darumeru/USE:
+INFO: 2024-07-13 16:09:46,460: llmtf.base.darumeru/USE: {'grade_norm': 0.06372549019607841}
+INFO: 2024-07-13 16:09:46,463: llmtf.base.evaluator: Ended eval
+INFO: 2024-07-13 16:09:46,471: llmtf.base.evaluator:
+mean	daru/treewayextractive	darumeru/MultiQ	darumeru/PARus	darumeru/RCB	darumeru/RWSD	darumeru/USE	darumeru/cp_para_en	darumeru/cp_sent_en	darumeru/cp_sent_ru	darumeru/ruMMLU	darumeru/ruOpenBookQA	darumeru/ruTiE	darumeru/ruWorldTree	nlpcoreteam/enMMLU	nlpcoreteam/ruMMLU	russiannlp/rucola_custom
+0.603	0.404	0.226	0.740	0.483	0.544	0.064	0.871	0.998	0.992	0.482	0.732	0.540	0.885	0.637	0.510	0.539
+INFO: 2024-07-13 16:11:54,088: llmtf.base.darumeru/cp_para_ru: Processing Dataset: 902.88s
+INFO: 2024-07-13 16:11:54,105: llmtf.base.darumeru/cp_para_ru: Results for darumeru/cp_para_ru:
+INFO: 2024-07-13 16:11:54,109: llmtf.base.darumeru/cp_para_ru: {'symbol_per_token': 2.469447282753646, 'len': 0.992235195524327, 'lcs': 0.8233434585815769}
+INFO: 2024-07-13 16:11:54,110: llmtf.base.evaluator: Ended eval
+INFO: 2024-07-13 16:11:54,121: llmtf.base.evaluator:
+mean	daru/treewayextractive	darumeru/MultiQ	darumeru/PARus	darumeru/RCB	darumeru/RWSD	darumeru/USE	darumeru/cp_para_en	darumeru/cp_para_ru	darumeru/cp_sent_en	darumeru/cp_sent_ru	darumeru/ruMMLU	darumeru/ruOpenBookQA	darumeru/ruTiE	darumeru/ruWorldTree	nlpcoreteam/enMMLU	nlpcoreteam/ruMMLU	russiannlp/rucola_custom
+0.616	0.404	0.226	0.740	0.483	0.544	0.064	0.871	0.823	0.998	0.992	0.482	0.732	0.540	0.885	0.637	0.510	0.539
+INFO: 2024-07-13 16:31:24,219: llmtf.base.daru/treewayabstractive: Processing Dataset: 2747.36s
+INFO: 2024-07-13 16:31:24,232: llmtf.base.daru/treewayabstractive: Results for daru/treewayabstractive:
+INFO: 2024-07-13 16:31:24,273: llmtf.base.daru/treewayabstractive: {'rouge1': 0.3460301339685849, 'rouge2': 0.12444606939274086}
+INFO: 2024-07-13 16:31:24,276: llmtf.base.evaluator: Ended eval
+INFO: 2024-07-13 16:31:24,499: llmtf.base.evaluator:
+mean	daru/treewayabstractive	daru/treewayextractive	darumeru/MultiQ	darumeru/PARus	darumeru/RCB	darumeru/RWSD	darumeru/USE	darumeru/cp_para_en	darumeru/cp_para_ru	darumeru/cp_sent_en	darumeru/cp_sent_ru	darumeru/ruMMLU	darumeru/ruOpenBookQA	darumeru/ruTiE	darumeru/ruWorldTree	nlpcoreteam/enMMLU	nlpcoreteam/ruMMLU	russiannlp/rucola_custom
+0.595	0.235	0.404	0.226	0.740	0.483	0.544	0.064	0.871	0.823	0.998	0.992	0.482	0.732	0.540	0.885	0.637	0.510	0.539

llmtf_eval_k0_bs1/evaluation_results.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ mean daru/treewayabstractive daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_para_en darumeru/cp_para_ru darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree nlpcoreteam/enMMLU nlpcoreteam/ruMMLU russiannlp/rucola_custom
2	+ 0.595 0.235 0.404 0.226 0.740 0.483 0.544 0.064 0.871 0.823 0.998 0.992 0.482 0.732 0.540 0.885 0.637 0.510 0.539

llmtf_eval_k0_bs1/nlpcoreteam_enMMLU.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f45e9f74610e1ae2cedd9582eb3ef2ff41f634bde9473b2be95fd7cc6d9acbe0
+size 37222812

llmtf_eval_k0_bs1/nlpcoreteam_enMMLU_params.jsonl ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "openchat/openchat-3.5-0106",
+        "generation_config": {
+            "_from_model_config": true,
+            "bos_token_id": 1,
+            "do_sample": true,
+            "eos_token_id": [
+                32000
+            ],
+            "max_length": 8192,
+            "max_new_tokens": 64,
+            "pad_token_id": 32000,
+            "stop_strings": [],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.38.2"
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "{role} {content}<|end_of_turn|>",
+            "user_message_template": "{role} {content}<|end_of_turn|>",
+            "bot_message_template": "{role} {content}<|end_of_turn|>",
+            "bot_message_template_incomplete": "{role} {content}",
+            "user_role": "GPT4 Correct User:",
+            "bot_role": "GPT4 Correct Assistant:",
+            "system_role": "GPT4 Correct System:",
+            "global_prefix": "<s>",
+            "suffix": "GPT4 Correct Assistant:",
+            "add_special_tokens": false,
+            "eos_token": "<|end_of_turn|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": true,
+        "space_token": 28705,
+        "trust_remote_code": [
+            false
+        ],
+        "max_model_len": 8192
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 1,
+        "max_sample_per_dataset": 10000000000000,
+        "method": "calculate_tokens_proba"
+    }
+}