Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- scripts/yans/eval/lm-evaluation-harness/models/abeja-gpt-neox-japanese-2.7b/harness.jsquad-1.2.sh +3 -0
- scripts/yans/eval/lm-evaluation-harness/models/abeja-gpt-neox-japanese-2.7b/harness.sh +3 -0
- scripts/yans/eval/lm-evaluation-harness/models/abeja-gpt-neox-japanese-2.7b/result.json +59 -0
- scripts/yans/eval/lm-evaluation-harness/models/abeja-gpt-neox-japanese-2.7b/result.jsquad-1.2.json +22 -0
- scripts/yans/eval/lm-evaluation-harness/models/community/cyberagent-open-calm-instruct-1b_1.3.0/README.md +6 -0
- scripts/yans/eval/lm-evaluation-harness/models/community/cyberagent-open-calm-instruct-1b_1.3.0/harness.sh +12 -0
- scripts/yans/eval/lm-evaluation-harness/models/community/cyberagent-open-calm-instruct-1b_1.3.0/result.json +48 -0
- scripts/yans/eval/lm-evaluation-harness/models/community/cyberagent-open-calm-instruct-3b_1.3.0/README.md +6 -0
- scripts/yans/eval/lm-evaluation-harness/models/community/cyberagent-open-calm-instruct-3b_1.3.0/harness.sh +12 -0
- scripts/yans/eval/lm-evaluation-harness/models/line-corporation/line-corporation-japanese-large-lm-1.7b-instruction-sft/harness.sh +3 -0
- scripts/yans/eval/lm-evaluation-harness/models/line-corporation/line-corporation-japanese-large-lm-1.7b-instruction-sft/result.json +71 -0
- scripts/yans/eval/lm-evaluation-harness/models/line-corporation/line-corporation-japanese-large-lm-1.7b/harness.sh +3 -0
- scripts/yans/eval/lm-evaluation-harness/models/line-corporation/line-corporation-japanese-large-lm-1.7b/result.json +71 -0
- scripts/yans/eval/lm-evaluation-harness/models/line-corporation/line-corporation-japanese-large-lm-3.6b-instruction-sft/harness.sh +3 -0
- scripts/yans/eval/lm-evaluation-harness/models/line-corporation/line-corporation-japanese-large-lm-3.6b-instruction-sft/result.json +71 -0
- scripts/yans/eval/lm-evaluation-harness/models/line-corporation/line-corporation-japanese-large-lm-3.6b/harness.sh +3 -0
- scripts/yans/eval/lm-evaluation-harness/models/line-corporation/line-corporation-japanese-large-lm-3.6b/result.json +71 -0
- scripts/yans/eval/lm-evaluation-harness/models/rinna/harness.conf +4 -0
- scripts/yans/eval/lm-evaluation-harness/models/rinna/rinna-bilingual-gpt-neox-4b-instruction-sft/result.json +71 -0
- scripts/yans/eval/lm-evaluation-harness/models/rinna/rinna-bilingual-gpt-neox-4b/harness.jsquad-1.2.sh +3 -0
- scripts/yans/eval/lm-evaluation-harness/models/rinna/rinna-bilingual-gpt-neox-4b/harness.sh +3 -0
- scripts/yans/eval/lm-evaluation-harness/models/rinna/rinna-bilingual-gpt-neox-4b/result.json +71 -0
- scripts/yans/eval/lm-evaluation-harness/models/rinna/rinna-bilingual-gpt-neox-4b/result.jsquad-1.2.json +22 -0
- scripts/yans/eval/lm-evaluation-harness/models/rinna/rinna-japanese-gpt-1b/harness.jsquad-1.2.sh +3 -0
- scripts/yans/eval/lm-evaluation-harness/models/rinna/rinna-japanese-gpt-1b/harness.sh +3 -0
- scripts/yans/eval/lm-evaluation-harness/models/rinna/rinna-japanese-gpt-1b/result.json +71 -0
- scripts/yans/eval/lm-evaluation-harness/models/rinna/rinna-japanese-gpt-1b/result.jsquad-1.2.json +22 -0
- scripts/yans/eval/lm-evaluation-harness/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/harness.jsquad-1.2.sh +3 -0
- scripts/yans/eval/lm-evaluation-harness/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/harness.sh +3 -0
- scripts/yans/eval/lm-evaluation-harness/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/result.json +71 -0
- scripts/yans/eval/lm-evaluation-harness/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/result.jsquad-1.2.json +22 -0
- scripts/yans/eval/lm-evaluation-harness/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/result.mgsm.json +0 -0
- scripts/yans/eval/lm-evaluation-harness/models/rinna/rinna-japanese-gpt-neox-3.6b/harness.jsquad-1.2.sh +3 -0
- scripts/yans/eval/lm-evaluation-harness/models/rinna/rinna-japanese-gpt-neox-3.6b/harness.sh +3 -0
- scripts/yans/eval/lm-evaluation-harness/models/rinna/rinna-japanese-gpt-neox-3.6b/result.json +71 -0
- scripts/yans/eval/lm-evaluation-harness/models/rinna/rinna-japanese-gpt-neox-3.6b/result.jsquad-1.2.json +22 -0
- scripts/yans/eval/lm-evaluation-harness/models/rinna/rinna-japanese-gpt-neox-3.6b/result.mgsm.json +0 -0
- scripts/yans/eval/lm-evaluation-harness/models/stabilityai/experiments/stablelm-jp-1b-compact-v1/harness.sh +13 -0
- scripts/yans/eval/lm-evaluation-harness/models/stabilityai/experiments/stablelm-jp-1b-compact-v1/result.json +71 -0
- scripts/yans/eval/lm-evaluation-harness/models/stabilityai/experiments/stablelm-jp-1b-jav1-sl2k-300b/harness.sh +13 -0
- scripts/yans/eval/lm-evaluation-harness/models/stabilityai/experiments/stablelm-jp-1b-jav1-sl2k-300b/result.json +48 -0
- scripts/yans/eval/lm-evaluation-harness/models/stabilityai/experiments/stablelm-jp-1b-jav1-sl2k-slw-300b/harness.sh +13 -0
- scripts/yans/eval/lm-evaluation-harness/models/stabilityai/experiments/stablelm-jp-1b-jav1-sl2k-slw-300b/result.json +71 -0
- scripts/yans/eval/lm-evaluation-harness/models/stabilityai/experiments/stablelm-jp-1b-jav1_rp-sl2k-slw-300b/harness.sh +18 -0
- scripts/yans/eval/lm-evaluation-harness/models/stabilityai/experiments/stablelm-jp-1b-jav1_rp-sl2k-slw-300b/result.json +71 -0
- scripts/yans/eval/lm-evaluation-harness/models/stabilityai/experiments/stablelm-jp-1b-rp_then_jav1-294b/harness.sh +18 -0
- scripts/yans/eval/lm-evaluation-harness/models/stabilityai/experiments/stablelm-jp-1b-rp_then_jav1-294b/result.json +71 -0
- scripts/yans/eval/lm-evaluation-harness/models/stabilityai/experiments/stablelm-jp-3b-ja50_rp50-700b/harness_template-0.1.sh +12 -0
- scripts/yans/eval/lm-evaluation-harness/models/stabilityai/experiments/stablelm-jp-3b-ja50_rp50-700b/harness_template-0.2.sh +12 -0
- scripts/yans/eval/lm-evaluation-harness/models/stabilityai/experiments/stablelm-jp-3b-ja50_rp50-700b/result_template-0.1.json +71 -0
scripts/yans/eval/lm-evaluation-harness/models/abeja-gpt-neox-japanese-2.7b/harness.jsquad-1.2.sh
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MODEL_ARGS="pretrained=abeja/gpt-neox-japanese-2.7b,device_map=auto,torch_dtype=auto"
|
| 2 |
+
TASK="jsquad-1.2-0.2"
|
| 3 |
+
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3" --device "cuda" --output_path "models/abeja-gpt-neox-japanese-2.7b/result.jsquad-1.2.json"
|
scripts/yans/eval/lm-evaluation-harness/models/abeja-gpt-neox-japanese-2.7b/harness.sh
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MODEL_ARGS="pretrained=abeja/gpt-neox-japanese-2.7b"
|
| 2 |
+
TASK="jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.1-0.2,xlsum_ja"
|
| 3 |
+
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2,3,3,3,1" --device "cuda" --output_path "models/abeja-gpt-neox-japanese-2.7b/result.json"
|
scripts/yans/eval/lm-evaluation-harness/models/abeja-gpt-neox-japanese-2.7b/result.json
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"jcommonsenseqa-1.1-0.2": {
|
| 4 |
+
"acc": 0.20017873100983022,
|
| 5 |
+
"acc_stderr": 0.011966979264632673,
|
| 6 |
+
"acc_norm": 0.22609472743521,
|
| 7 |
+
"acc_norm_stderr": 0.012510314229861862
|
| 8 |
+
},
|
| 9 |
+
"jnli-1.1-0.2": {
|
| 10 |
+
"acc": 0.3972884141331142,
|
| 11 |
+
"acc_stderr": 0.009920570907906705,
|
| 12 |
+
"acc_norm": 0.34798685291700904,
|
| 13 |
+
"acc_norm_stderr": 0.009656917922100158
|
| 14 |
+
},
|
| 15 |
+
"marc_ja-1.1-0.2": {
|
| 16 |
+
"acc": 0.749912800837112,
|
| 17 |
+
"acc_stderr": 0.005719527388015089,
|
| 18 |
+
"acc_norm": 0.749912800837112,
|
| 19 |
+
"acc_norm_stderr": 0.005719527388015089
|
| 20 |
+
},
|
| 21 |
+
"jsquad-1.1-0.2": {
|
| 22 |
+
"exact_match": 13.665015758667266,
|
| 23 |
+
"f1": 22.909453892411364
|
| 24 |
+
},
|
| 25 |
+
"xlsum_ja": {
|
| 26 |
+
"rouge2": 6.149952794206885
|
| 27 |
+
},
|
| 28 |
+
"xwinograd_ja": {
|
| 29 |
+
"acc": 0.6037539103232534,
|
| 30 |
+
"acc_stderr": 0.01580264261655725
|
| 31 |
+
}
|
| 32 |
+
},
|
| 33 |
+
"versions": {
|
| 34 |
+
"jcommonsenseqa-1.1-0.2": 1.1,
|
| 35 |
+
"jnli-1.1-0.2": 1.1,
|
| 36 |
+
"jsquad-1.1-0.2": 1.1,
|
| 37 |
+
"marc_ja-1.1-0.2": 1.1,
|
| 38 |
+
"xlsum_ja": 1.0,
|
| 39 |
+
"xwinograd_ja": 1.0
|
| 40 |
+
},
|
| 41 |
+
"config": {
|
| 42 |
+
"model": "hf-causal",
|
| 43 |
+
"model_args": "pretrained=abeja/gpt-neox-japanese-2.7b",
|
| 44 |
+
"num_fewshot": [
|
| 45 |
+
2,
|
| 46 |
+
3,
|
| 47 |
+
3,
|
| 48 |
+
3,
|
| 49 |
+
1,
|
| 50 |
+
0
|
| 51 |
+
],
|
| 52 |
+
"batch_size": null,
|
| 53 |
+
"device": "cuda",
|
| 54 |
+
"no_cache": false,
|
| 55 |
+
"limit": null,
|
| 56 |
+
"bootstrap_iters": 100000,
|
| 57 |
+
"description_dict": {}
|
| 58 |
+
}
|
| 59 |
+
}
|
scripts/yans/eval/lm-evaluation-harness/models/abeja-gpt-neox-japanese-2.7b/result.jsquad-1.2.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"jsquad-1.2-0.2": {
|
| 4 |
+
"exact_match": 15.803692030616839,
|
| 5 |
+
"f1": 25.18326978234071
|
| 6 |
+
}
|
| 7 |
+
},
|
| 8 |
+
"versions": {
|
| 9 |
+
"jsquad-1.2-0.2": 1.2
|
| 10 |
+
},
|
| 11 |
+
"config": {
|
| 12 |
+
"model": "hf-causal",
|
| 13 |
+
"model_args": "pretrained=abeja/gpt-neox-japanese-2.7b,device_map=auto,torch_dtype=auto",
|
| 14 |
+
"num_fewshot": 3,
|
| 15 |
+
"batch_size": null,
|
| 16 |
+
"device": "cuda",
|
| 17 |
+
"no_cache": false,
|
| 18 |
+
"limit": null,
|
| 19 |
+
"bootstrap_iters": 100000,
|
| 20 |
+
"description_dict": {}
|
| 21 |
+
}
|
| 22 |
+
}
|
scripts/yans/eval/lm-evaluation-harness/models/community/cyberagent-open-calm-instruct-1b_1.3.0/README.md
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# cyberagent-open-calm-instruct-1b_1.3.0
|
| 2 |
+
- This is a supervised finetuned version of the base model [`cyberagent/open-calm-1b`](https://huggingface.co/cyberagent/open-calm-1b).
|
| 3 |
+
- The base model is trained on the datasets below by [Stability AI Japan](https://ja.stability.ai/).
|
| 4 |
+
- [japanese_hh-rlhf-49k](https://huggingface.co/datasets/fujiki/japanese_hh-rlhf-49k)
|
| 5 |
+
- [databricks-dolly-15k-ja](https://huggingface.co/datasets/kunishou/databricks-dolly-15k-ja)
|
| 6 |
+
- [japanese_alpaca_data](https://huggingface.co/datasets/fujiki/japanese_alpaca_data)
|
scripts/yans/eval/lm-evaluation-harness/models/community/cyberagent-open-calm-instruct-1b_1.3.0/harness.sh
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
set -eu
|
| 3 |
+
PROJECT_DIR=""
|
| 4 |
+
MODEL_ARGS="pretrained=${PROJECT_DIR}/instruction_tuning/outputs/open-calm-instruct-1b_1.3.0,tokenizer=cyberagent/open-calm-1b"
|
| 5 |
+
TASK="jsquad-1.1-0.3,jcommonsenseqa-1.1-0.3,jnli-1.1-0.3,marc_ja-1.1-0.3"
|
| 6 |
+
python main.py \
|
| 7 |
+
--model hf-causal \
|
| 8 |
+
--model_args $MODEL_ARGS \
|
| 9 |
+
--tasks $TASK \
|
| 10 |
+
--num_fewshot "2,3,3,3" \
|
| 11 |
+
--device "cuda" \
|
| 12 |
+
--output_path "models/open-calm-instruct-1b_1.3.0/result.json"
|
scripts/yans/eval/lm-evaluation-harness/models/community/cyberagent-open-calm-instruct-1b_1.3.0/result.json
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"jcommonsenseqa-1.1-0.3": {
|
| 4 |
+
"acc": 0.7015192135835567,
|
| 5 |
+
"acc_stderr": 0.013685386698397504,
|
| 6 |
+
"acc_norm": 0.6255585344057194,
|
| 7 |
+
"acc_norm_stderr": 0.014474549079455518
|
| 8 |
+
},
|
| 9 |
+
"jnli-1.1-0.3": {
|
| 10 |
+
"acc": 0.3011503697617091,
|
| 11 |
+
"acc_stderr": 0.00930063317508552,
|
| 12 |
+
"acc_norm": 0.25842235004108466,
|
| 13 |
+
"acc_norm_stderr": 0.008875080429298606
|
| 14 |
+
},
|
| 15 |
+
"marc_ja-1.1-0.3": {
|
| 16 |
+
"acc": 0.877431906614786,
|
| 17 |
+
"acc_stderr": 0.004361701432875794,
|
| 18 |
+
"acc_norm": 0.877431906614786,
|
| 19 |
+
"acc_norm_stderr": 0.004361701432875794
|
| 20 |
+
},
|
| 21 |
+
"jsquad-1.1-0.3": {
|
| 22 |
+
"exact_match": 35.929761368752814,
|
| 23 |
+
"f1": 45.27144783040928
|
| 24 |
+
}
|
| 25 |
+
},
|
| 26 |
+
"versions": {
|
| 27 |
+
"jcommonsenseqa-1.1-0.3": 1.1,
|
| 28 |
+
"jnli-1.1-0.3": 1.1,
|
| 29 |
+
"jsquad-1.1-0.3": 1.1,
|
| 30 |
+
"marc_ja-1.1-0.3": 1.1
|
| 31 |
+
},
|
| 32 |
+
"config": {
|
| 33 |
+
"model": "hf-causal",
|
| 34 |
+
"model_args": "pretrained=${PROJECT_DIR}/instruction_tuning/outputs/open-calm-instruct-1b_1.3.0,tokenizer=cyberagent/open-calm-1b",
|
| 35 |
+
"num_fewshot": [
|
| 36 |
+
2,
|
| 37 |
+
3,
|
| 38 |
+
3,
|
| 39 |
+
3
|
| 40 |
+
],
|
| 41 |
+
"batch_size": null,
|
| 42 |
+
"device": "cuda",
|
| 43 |
+
"no_cache": false,
|
| 44 |
+
"limit": null,
|
| 45 |
+
"bootstrap_iters": 100000,
|
| 46 |
+
"description_dict": {}
|
| 47 |
+
}
|
| 48 |
+
}
|
scripts/yans/eval/lm-evaluation-harness/models/community/cyberagent-open-calm-instruct-3b_1.3.0/README.md
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# cyberagent-open-calm-instruct-3b_1.3.0
|
| 2 |
+
- This is a supervised finetuned version of the base model [`cyberagent/open-calm-3b`](https://huggingface.co/cyberagent/open-calm-3b).
|
| 3 |
+
- The base model is trained on the datasets below by [Stability AI Japan](https://ja.stability.ai/).
|
| 4 |
+
- [japanese_hh-rlhf-49k](https://huggingface.co/datasets/fujiki/japanese_hh-rlhf-49k)
|
| 5 |
+
- [databricks-dolly-15k-ja](https://huggingface.co/datasets/kunishou/databricks-dolly-15k-ja)
|
| 6 |
+
- [japanese_alpaca_data](https://huggingface.co/datasets/fujiki/japanese_alpaca_data)
|
scripts/yans/eval/lm-evaluation-harness/models/community/cyberagent-open-calm-instruct-3b_1.3.0/harness.sh
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
set -eu
|
| 3 |
+
PROJECT_DIR=""
|
| 4 |
+
MODEL_ARGS="pretrained=${PROJECT_DIR}/instruction_tuning/outputs/open-calm-instruct-3b_1.3.0,tokenizer=cyberagent/open-calm-3b"
|
| 5 |
+
TASK="jcommonsenseqa-1.1-0.3,jnli-1.1-0.3,marc_ja-1.1-0.3,jsquad-1.1-0.3,jaqket_v2-0.1-0.3,xlsum_ja-1.0-0.3,xwinograd_ja,mgsm-1.0-0.3"
|
| 6 |
+
python main.py \
|
| 7 |
+
--model hf-causal \
|
| 8 |
+
--model_args $MODEL_ARGS \
|
| 9 |
+
--tasks $TASK \
|
| 10 |
+
--num_fewshot "3,3,3,2,1,1,0,5" \
|
| 11 |
+
--device "cuda" \
|
| 12 |
+
--output_path "models/community/cyberagent-open-calm-instruct-3b_1.3.0/result.json"
|
scripts/yans/eval/lm-evaluation-harness/models/line-corporation/line-corporation-japanese-large-lm-1.7b-instruction-sft/harness.sh
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MODEL_ARGS="pretrained=line-corporation/japanese-large-lm-1.7b-instruction-sft,use_fast=False,device_map=auto,torch_dtype=auto"
|
| 2 |
+
TASK="jcommonsenseqa-1.1-0.5,jnli-1.1-0.5,marc_ja-1.1-0.5,jsquad-1.1-0.5,jaqket_v2-0.2-0.5,xlsum_ja-1.0-0.5,xwinograd_ja,mgsm-1.0-0.5"
|
| 3 |
+
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3,3,3,2,1,1,0,5" --device "cuda" --output_path "models/line-corporation/line-corporation-japanese-large-lm-1.7b-instruction-sft/result.json"
|
scripts/yans/eval/lm-evaluation-harness/models/line-corporation/line-corporation-japanese-large-lm-1.7b-instruction-sft/result.json
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"jcommonsenseqa-1.1-0.5": {
|
| 4 |
+
"acc": 0.22430741733690795,
|
| 5 |
+
"acc_stderr": 0.012475148816050531,
|
| 6 |
+
"acc_norm": 0.23681858802502234,
|
| 7 |
+
"acc_norm_stderr": 0.01271454677969028
|
| 8 |
+
},
|
| 9 |
+
"jnli-1.1-0.5": {
|
| 10 |
+
"acc": 0.34346754313886607,
|
| 11 |
+
"acc_stderr": 0.009627197865307401,
|
| 12 |
+
"acc_norm": 0.3011503697617091,
|
| 13 |
+
"acc_norm_stderr": 0.009300633175085522
|
| 14 |
+
},
|
| 15 |
+
"marc_ja-1.1-0.5": {
|
| 16 |
+
"acc": 0.8036788114609126,
|
| 17 |
+
"acc_stderr": 0.005283057698929343,
|
| 18 |
+
"acc_norm": 0.8036788114609126,
|
| 19 |
+
"acc_norm_stderr": 0.005283057698929343
|
| 20 |
+
},
|
| 21 |
+
"xwinograd_ja": {
|
| 22 |
+
"acc": 0.6329509906152242,
|
| 23 |
+
"acc_stderr": 0.015572714283682185
|
| 24 |
+
},
|
| 25 |
+
"jsquad-1.1-0.5": {
|
| 26 |
+
"exact_match": 30.977037370553806,
|
| 27 |
+
"f1": 48.12415333506568
|
| 28 |
+
},
|
| 29 |
+
"jaqket_v2-0.2-0.5": {
|
| 30 |
+
"exact_match": 25.257731958762886,
|
| 31 |
+
"f1": 40.58191140665372
|
| 32 |
+
},
|
| 33 |
+
"xlsum_ja-1.0-0.5": {
|
| 34 |
+
"rouge2": 1.0385441084792033
|
| 35 |
+
},
|
| 36 |
+
"mgsm-1.0-0.5": {
|
| 37 |
+
"acc": 0.016,
|
| 38 |
+
"acc_stderr": 0.007951661188874354
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
"versions": {
|
| 42 |
+
"jcommonsenseqa-1.1-0.5": 1.1,
|
| 43 |
+
"jnli-1.1-0.5": 1.1,
|
| 44 |
+
"marc_ja-1.1-0.5": 1.1,
|
| 45 |
+
"jsquad-1.1-0.5": 1.1,
|
| 46 |
+
"jaqket_v2-0.2-0.5": 0.2,
|
| 47 |
+
"xlsum_ja-1.0-0.5": 1.0,
|
| 48 |
+
"xwinograd_ja": 1.0,
|
| 49 |
+
"mgsm-1.0-0.5": 1.0
|
| 50 |
+
},
|
| 51 |
+
"config": {
|
| 52 |
+
"model": "hf-causal",
|
| 53 |
+
"model_args": "pretrained=line-corporation/japanese-large-lm-1.7b-instruction-sft,use_fast=False,device_map=auto,torch_dtype=auto",
|
| 54 |
+
"num_fewshot": [
|
| 55 |
+
3,
|
| 56 |
+
3,
|
| 57 |
+
3,
|
| 58 |
+
2,
|
| 59 |
+
1,
|
| 60 |
+
1,
|
| 61 |
+
0,
|
| 62 |
+
5
|
| 63 |
+
],
|
| 64 |
+
"batch_size": null,
|
| 65 |
+
"device": "cuda",
|
| 66 |
+
"no_cache": false,
|
| 67 |
+
"limit": null,
|
| 68 |
+
"bootstrap_iters": 100000,
|
| 69 |
+
"description_dict": {}
|
| 70 |
+
}
|
| 71 |
+
}
|
scripts/yans/eval/lm-evaluation-harness/models/line-corporation/line-corporation-japanese-large-lm-1.7b/harness.sh
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MODEL_ARGS="pretrained=line-corporation/japanese-large-lm-1.7b,use_fast=False,device_map=auto,torch_dtype=auto"
|
| 2 |
+
TASK="jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.1-0.2,jaqket_v2-0.2-0.2,xlsum_ja,xwinograd_ja,mgsm"
|
| 3 |
+
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3,3,3,2,1,1,0,5" --device "cuda" --output_path "models/line-corporation/line-corporation-japanese-large-lm-1.7b/result.json"
|
scripts/yans/eval/lm-evaluation-harness/models/line-corporation/line-corporation-japanese-large-lm-1.7b/result.json
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"jcommonsenseqa-1.1-0.2": {
|
| 4 |
+
"acc": 0.30831099195710454,
|
| 5 |
+
"acc_stderr": 0.013811124479483034,
|
| 6 |
+
"acc_norm": 0.26005361930294907,
|
| 7 |
+
"acc_norm_stderr": 0.013119300343161644
|
| 8 |
+
},
|
| 9 |
+
"jnli-1.1-0.2": {
|
| 10 |
+
"acc": 0.35949055053410023,
|
| 11 |
+
"acc_stderr": 0.009728266419780814,
|
| 12 |
+
"acc_norm": 0.300328677074774,
|
| 13 |
+
"acc_norm_stderr": 0.00929339473482123
|
| 14 |
+
},
|
| 15 |
+
"marc_ja-1.1-0.2": {
|
| 16 |
+
"acc": 0.745136186770428,
|
| 17 |
+
"acc_stderr": 0.005796054001130057,
|
| 18 |
+
"acc_norm": 0.745136186770428,
|
| 19 |
+
"acc_norm_stderr": 0.005796054001130057
|
| 20 |
+
},
|
| 21 |
+
"xwinograd_ja": {
|
| 22 |
+
"acc": 0.6611053180396246,
|
| 23 |
+
"acc_stderr": 0.015292727421996942
|
| 24 |
+
},
|
| 25 |
+
"jsquad-1.1-0.2": {
|
| 26 |
+
"exact_match": 56.55110310670869,
|
| 27 |
+
"f1": 69.46989310703984
|
| 28 |
+
},
|
| 29 |
+
"jaqket_v2-0.2-0.2": {
|
| 30 |
+
"exact_match": 52.06185567010309,
|
| 31 |
+
"f1": 60.433303332787865
|
| 32 |
+
},
|
| 33 |
+
"xlsum_ja": {
|
| 34 |
+
"rouge2": 8.408787633129647
|
| 35 |
+
},
|
| 36 |
+
"mgsm": {
|
| 37 |
+
"acc": 0.0,
|
| 38 |
+
"acc_stderr": 0.0
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
"versions": {
|
| 42 |
+
"jcommonsenseqa-1.1-0.2": 1.1,
|
| 43 |
+
"jnli-1.1-0.2": 1.1,
|
| 44 |
+
"marc_ja-1.1-0.2": 1.1,
|
| 45 |
+
"jsquad-1.1-0.2": 1.1,
|
| 46 |
+
"jaqket_v2-0.2-0.2": 0.2,
|
| 47 |
+
"xlsum_ja": 1.0,
|
| 48 |
+
"xwinograd_ja": 1.0,
|
| 49 |
+
"mgsm": 1.0
|
| 50 |
+
},
|
| 51 |
+
"config": {
|
| 52 |
+
"model": "hf-causal",
|
| 53 |
+
"model_args": "pretrained=line-corporation/japanese-large-lm-1.7b,use_fast=False,device_map=auto,torch_dtype=auto",
|
| 54 |
+
"num_fewshot": [
|
| 55 |
+
3,
|
| 56 |
+
3,
|
| 57 |
+
3,
|
| 58 |
+
2,
|
| 59 |
+
1,
|
| 60 |
+
1,
|
| 61 |
+
0,
|
| 62 |
+
5
|
| 63 |
+
],
|
| 64 |
+
"batch_size": null,
|
| 65 |
+
"device": "cuda",
|
| 66 |
+
"no_cache": false,
|
| 67 |
+
"limit": null,
|
| 68 |
+
"bootstrap_iters": 100000,
|
| 69 |
+
"description_dict": {}
|
| 70 |
+
}
|
| 71 |
+
}
|
scripts/yans/eval/lm-evaluation-harness/models/line-corporation/line-corporation-japanese-large-lm-3.6b-instruction-sft/harness.sh
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MODEL_ARGS="pretrained=line-corporation/japanese-large-lm-3.6b-instruction-sft,use_fast=False,device_map=auto,torch_dtype=auto"
|
| 2 |
+
TASK="jcommonsenseqa-1.1-0.5,jnli-1.1-0.5,marc_ja-1.1-0.5,jsquad-1.1-0.5,jaqket_v2-0.2-0.5,xlsum_ja-1.0-0.5,xwinograd_ja,mgsm-1.0-0.5"
|
| 3 |
+
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3,3,3,2,1,1,0,5" --device "cuda" --output_path "models/line-corporation/line-corporation-japanese-large-lm-3.6b-instruction-sft/result.json"
|
scripts/yans/eval/lm-evaluation-harness/models/line-corporation/line-corporation-japanese-large-lm-3.6b-instruction-sft/result.json
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"jcommonsenseqa-1.1-0.5": {
|
| 4 |
+
"acc": 0.3360142984807864,
|
| 5 |
+
"acc_stderr": 0.014126590011265207,
|
| 6 |
+
"acc_norm": 0.26720285969615726,
|
| 7 |
+
"acc_norm_stderr": 0.013234012242081952
|
| 8 |
+
},
|
| 9 |
+
"jnli-1.1-0.5": {
|
| 10 |
+
"acc": 0.4256368118323747,
|
| 11 |
+
"acc_stderr": 0.010024017935515625,
|
| 12 |
+
"acc_norm": 0.3019720624486442,
|
| 13 |
+
"acc_norm_stderr": 0.009307836171755053
|
| 14 |
+
},
|
| 15 |
+
"marc_ja-1.1-0.5": {
|
| 16 |
+
"acc": 0.5509373894587902,
|
| 17 |
+
"acc_stderr": 0.006615536639080702,
|
| 18 |
+
"acc_norm": 0.5509373894587902,
|
| 19 |
+
"acc_norm_stderr": 0.006615536639080702
|
| 20 |
+
},
|
| 21 |
+
"xwinograd_ja": {
|
| 22 |
+
"acc": 0.6465067778936392,
|
| 23 |
+
"acc_stderr": 0.015445228301221386
|
| 24 |
+
},
|
| 25 |
+
"jsquad-1.1-0.5": {
|
| 26 |
+
"exact_match": 44.371904547501124,
|
| 27 |
+
"f1": 59.516773934435584
|
| 28 |
+
},
|
| 29 |
+
"jaqket_v2-0.2-0.5": {
|
| 30 |
+
"exact_match": 39.86254295532646,
|
| 31 |
+
"f1": 51.98299576521227
|
| 32 |
+
},
|
| 33 |
+
"xlsum_ja-1.0-0.5": {
|
| 34 |
+
"rouge2": 6.577976426409143
|
| 35 |
+
},
|
| 36 |
+
"mgsm-1.0-0.5": {
|
| 37 |
+
"acc": 0.024,
|
| 38 |
+
"acc_stderr": 0.009699087026964249
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
"versions": {
|
| 42 |
+
"jcommonsenseqa-1.1-0.5": 1.1,
|
| 43 |
+
"jnli-1.1-0.5": 1.1,
|
| 44 |
+
"marc_ja-1.1-0.5": 1.1,
|
| 45 |
+
"jsquad-1.1-0.5": 1.1,
|
| 46 |
+
"jaqket_v2-0.2-0.5": 0.2,
|
| 47 |
+
"xlsum_ja-1.0-0.5": 1.0,
|
| 48 |
+
"xwinograd_ja": 1.0,
|
| 49 |
+
"mgsm-1.0-0.5": 1.0
|
| 50 |
+
},
|
| 51 |
+
"config": {
|
| 52 |
+
"model": "hf-causal",
|
| 53 |
+
"model_args": "pretrained=line-corporation/japanese-large-lm-3.6b-instruction-sft,use_fast=False,device_map=auto,torch_dtype=auto",
|
| 54 |
+
"num_fewshot": [
|
| 55 |
+
3,
|
| 56 |
+
3,
|
| 57 |
+
3,
|
| 58 |
+
2,
|
| 59 |
+
1,
|
| 60 |
+
1,
|
| 61 |
+
0,
|
| 62 |
+
5
|
| 63 |
+
],
|
| 64 |
+
"batch_size": null,
|
| 65 |
+
"device": "cuda",
|
| 66 |
+
"no_cache": false,
|
| 67 |
+
"limit": null,
|
| 68 |
+
"bootstrap_iters": 100000,
|
| 69 |
+
"description_dict": {}
|
| 70 |
+
}
|
| 71 |
+
}
|
scripts/yans/eval/lm-evaluation-harness/models/line-corporation/line-corporation-japanese-large-lm-3.6b/harness.sh
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MODEL_ARGS="pretrained=line-corporation/japanese-large-lm-3.6b,use_fast=False,device_map=auto,torch_dtype=auto"
|
| 2 |
+
TASK="jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.1-0.2,jaqket_v2-0.2-0.2,xlsum_ja,xwinograd_ja,mgsm"
|
| 3 |
+
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3,3,3,2,1,1,0,5" --device "cuda" --output_path "models/line-corporation/line-corporation-japanese-large-lm-3.6b/result.json"
|
scripts/yans/eval/lm-evaluation-harness/models/line-corporation/line-corporation-japanese-large-lm-3.6b/result.json
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"jcommonsenseqa-1.1-0.2": {
|
| 4 |
+
"acc": 0.24039320822162646,
|
| 5 |
+
"acc_stderr": 0.01278011066769292,
|
| 6 |
+
"acc_norm": 0.2421805183199285,
|
| 7 |
+
"acc_norm_stderr": 0.0128124322893179
|
| 8 |
+
},
|
| 9 |
+
"jnli-1.1-0.2": {
|
| 10 |
+
"acc": 0.29950698438783896,
|
| 11 |
+
"acc_stderr": 0.009286120768078254,
|
| 12 |
+
"acc_norm": 0.30156121610517667,
|
| 13 |
+
"acc_norm_stderr": 0.009304239098715018
|
| 14 |
+
},
|
| 15 |
+
"marc_ja-1.1-0.2": {
|
| 16 |
+
"acc": 0.7939511850017686,
|
| 17 |
+
"acc_stderr": 0.005379506895071017,
|
| 18 |
+
"acc_norm": 0.7939511850017686,
|
| 19 |
+
"acc_norm_stderr": 0.005379506895071017
|
| 20 |
+
},
|
| 21 |
+
"xwinograd_ja": {
|
| 22 |
+
"acc": 0.7028154327424401,
|
| 23 |
+
"acc_stderr": 0.014765597190000436
|
| 24 |
+
},
|
| 25 |
+
"jsquad-1.1-0.2": {
|
| 26 |
+
"exact_match": 62.26924808644755,
|
| 27 |
+
"f1": 74.52057820837234
|
| 28 |
+
},
|
| 29 |
+
"jaqket_v2-0.2-0.2": {
|
| 30 |
+
"exact_match": 67.18213058419244,
|
| 31 |
+
"f1": 74.29659878113482
|
| 32 |
+
},
|
| 33 |
+
"xlsum_ja": {
|
| 34 |
+
"rouge2": 8.610239752200977
|
| 35 |
+
},
|
| 36 |
+
"mgsm": {
|
| 37 |
+
"acc": 0.028,
|
| 38 |
+
"acc_stderr": 0.010454721651927288
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
"versions": {
|
| 42 |
+
"jcommonsenseqa-1.1-0.2": 1.1,
|
| 43 |
+
"jnli-1.1-0.2": 1.1,
|
| 44 |
+
"marc_ja-1.1-0.2": 1.1,
|
| 45 |
+
"jsquad-1.1-0.2": 1.1,
|
| 46 |
+
"jaqket_v2-0.2-0.2": 0.2,
|
| 47 |
+
"xlsum_ja": 1.0,
|
| 48 |
+
"xwinograd_ja": 1.0,
|
| 49 |
+
"mgsm": 1.0
|
| 50 |
+
},
|
| 51 |
+
"config": {
|
| 52 |
+
"model": "hf-causal",
|
| 53 |
+
"model_args": "pretrained=line-corporation/japanese-large-lm-3.6b,use_fast=False,device_map=auto,torch_dtype=auto",
|
| 54 |
+
"num_fewshot": [
|
| 55 |
+
3,
|
| 56 |
+
3,
|
| 57 |
+
3,
|
| 58 |
+
2,
|
| 59 |
+
1,
|
| 60 |
+
1,
|
| 61 |
+
0,
|
| 62 |
+
5
|
| 63 |
+
],
|
| 64 |
+
"batch_size": null,
|
| 65 |
+
"device": "cuda",
|
| 66 |
+
"no_cache": false,
|
| 67 |
+
"limit": null,
|
| 68 |
+
"bootstrap_iters": 100000,
|
| 69 |
+
"description_dict": {}
|
| 70 |
+
}
|
| 71 |
+
}
|
scripts/yans/eval/lm-evaluation-harness/models/rinna/harness.conf
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[DEFAULT]
|
| 2 |
+
# Recent Rinna models use the 0.4 prompt, though note that older ones used
|
| 3 |
+
# other prompts.
|
| 4 |
+
prompt = 0.4
|
scripts/yans/eval/lm-evaluation-harness/models/rinna/rinna-bilingual-gpt-neox-4b-instruction-sft/result.json
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"jcommonsenseqa-1.1-0.5": {
|
| 4 |
+
"acc": 0.49508489722966936,
|
| 5 |
+
"acc_stderr": 0.014952992585674197,
|
| 6 |
+
"acc_norm": 0.4941912421805183,
|
| 7 |
+
"acc_norm_stderr": 0.014952705953248754
|
| 8 |
+
},
|
| 9 |
+
"jnli-1.1-0.5": {
|
| 10 |
+
"acc": 0.47082990961380444,
|
| 11 |
+
"acc_stderr": 0.010119489683056362,
|
| 12 |
+
"acc_norm": 0.45028759244042726,
|
| 13 |
+
"acc_norm_stderr": 0.010086528162038566
|
| 14 |
+
},
|
| 15 |
+
"marc_ja-1.1-0.5": {
|
| 16 |
+
"acc": 0.9527767951892465,
|
| 17 |
+
"acc_stderr": 0.0028211996518060353,
|
| 18 |
+
"acc_norm": 0.9527767951892465,
|
| 19 |
+
"acc_norm_stderr": 0.0028211996518060353
|
| 20 |
+
},
|
| 21 |
+
"xwinograd_ja": {
|
| 22 |
+
"acc": 0.6465067778936392,
|
| 23 |
+
"acc_stderr": 0.015445228301221378
|
| 24 |
+
},
|
| 25 |
+
"jsquad-1.1-0.5": {
|
| 26 |
+
"exact_match": 55.9882935614588,
|
| 27 |
+
"f1": 70.04345164121641
|
| 28 |
+
},
|
| 29 |
+
"jaqket_v2-0.1-0.5": {
|
| 30 |
+
"exact_match": 61.16838487972509,
|
| 31 |
+
"f1": 65.03049022378916
|
| 32 |
+
},
|
| 33 |
+
"xlsum_ja-1.0-0.5": {
|
| 34 |
+
"rouge2": 5.506882882949979
|
| 35 |
+
},
|
| 36 |
+
"mgsm-1.0-0.5": {
|
| 37 |
+
"acc": 0.028,
|
| 38 |
+
"acc_stderr": 0.010454721651927302
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
"versions": {
|
| 42 |
+
"jcommonsenseqa-1.1-0.5": 1.1,
|
| 43 |
+
"jnli-1.1-0.5": 1.1,
|
| 44 |
+
"marc_ja-1.1-0.5": 1.1,
|
| 45 |
+
"jsquad-1.1-0.5": 1.1,
|
| 46 |
+
"jaqket_v2-0.1-0.5": 0.1,
|
| 47 |
+
"xlsum_ja-1.0-0.5": 1.0,
|
| 48 |
+
"xwinograd_ja": 1.0,
|
| 49 |
+
"mgsm-1.0-0.5": 1.0
|
| 50 |
+
},
|
| 51 |
+
"config": {
|
| 52 |
+
"model": "hf-causal",
|
| 53 |
+
"model_args": "pretrained=rinna/bilingual-gpt-neox-4b-instruction-sft,use_fast=False,device_map=auto,torch_dtype=auto",
|
| 54 |
+
"num_fewshot": [
|
| 55 |
+
3,
|
| 56 |
+
3,
|
| 57 |
+
3,
|
| 58 |
+
2,
|
| 59 |
+
1,
|
| 60 |
+
1,
|
| 61 |
+
0,
|
| 62 |
+
5
|
| 63 |
+
],
|
| 64 |
+
"batch_size": null,
|
| 65 |
+
"device": "cuda",
|
| 66 |
+
"no_cache": false,
|
| 67 |
+
"limit": null,
|
| 68 |
+
"bootstrap_iters": 100000,
|
| 69 |
+
"description_dict": {}
|
| 70 |
+
}
|
| 71 |
+
}
|
scripts/yans/eval/lm-evaluation-harness/models/rinna/rinna-bilingual-gpt-neox-4b/harness.jsquad-1.2.sh
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MODEL_ARGS="pretrained=rinna/bilingual-gpt-neox-4b,use_fast=False,device_map=auto,torch_dtype=auto"
|
| 2 |
+
TASK="jsquad-1.2-0.2"
|
| 3 |
+
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-bilingual-gpt-neox-4b/result.jsquad-1.2.json"
|
scripts/yans/eval/lm-evaluation-harness/models/rinna/rinna-bilingual-gpt-neox-4b/harness.sh
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MODEL_ARGS="pretrained=rinna/bilingual-gpt-neox-4b,use_fast=False,device_map=auto,torch_dtype=auto"
|
| 2 |
+
TASK="jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.1-0.2,jaqket_v2-0.1-0.2,xlsum_ja,xwinograd_ja,mgsm"
|
| 3 |
+
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3,3,3,2,1,1,0,5" --device "cuda" --output_path "models/rinna/rinna-bilingual-gpt-neox-4b/result.json"
|
scripts/yans/eval/lm-evaluation-harness/models/rinna/rinna-bilingual-gpt-neox-4b/result.json
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"jcommonsenseqa-1.1-0.2": {
|
| 4 |
+
"acc": 0.20822162645218945,
|
| 5 |
+
"acc_stderr": 0.01214349876971715,
|
| 6 |
+
"acc_norm": 0.22788203753351208,
|
| 7 |
+
"acc_norm_stderr": 0.012545153313075156
|
| 8 |
+
},
|
| 9 |
+
"jnli-1.1-0.2": {
|
| 10 |
+
"acc": 0.5521774856203779,
|
| 11 |
+
"acc_stderr": 0.010081409479626453,
|
| 12 |
+
"acc_norm": 0.5304026294165982,
|
| 13 |
+
"acc_norm_stderr": 0.01011799843670741
|
| 14 |
+
},
|
| 15 |
+
"marc_ja-1.1-0.2": {
|
| 16 |
+
"acc": 0.5955076052352317,
|
| 17 |
+
"acc_stderr": 0.0065276873249124285,
|
| 18 |
+
"acc_norm": 0.5955076052352317,
|
| 19 |
+
"acc_norm_stderr": 0.0065276873249124285
|
| 20 |
+
},
|
| 21 |
+
"xwinograd_ja": {
|
| 22 |
+
"acc": 0.6642335766423357,
|
| 23 |
+
"acc_stderr": 0.01525795361580425
|
| 24 |
+
},
|
| 25 |
+
"jsquad-1.1-0.2": {
|
| 26 |
+
"exact_match": 50.78793336334984,
|
| 27 |
+
"f1": 61.684710792645284
|
| 28 |
+
},
|
| 29 |
+
"jaqket_v2-0.1-0.2": {
|
| 30 |
+
"exact_match": 59.450171821305844,
|
| 31 |
+
"f1": 65.22394415435645
|
| 32 |
+
},
|
| 33 |
+
"xlsum_ja": {
|
| 34 |
+
"rouge2": 5.54788534415756
|
| 35 |
+
},
|
| 36 |
+
"mgsm": {
|
| 37 |
+
"acc": 0.024,
|
| 38 |
+
"acc_stderr": 0.009699087026964261
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
"versions": {
|
| 42 |
+
"jcommonsenseqa-1.1-0.2": 1.1,
|
| 43 |
+
"jnli-1.1-0.2": 1.1,
|
| 44 |
+
"marc_ja-1.1-0.2": 1.1,
|
| 45 |
+
"jsquad-1.1-0.2": 1.1,
|
| 46 |
+
"jaqket_v2-0.1-0.2": 0.1,
|
| 47 |
+
"xlsum_ja": 1.0,
|
| 48 |
+
"xwinograd_ja": 1.0,
|
| 49 |
+
"mgsm": 1.0
|
| 50 |
+
},
|
| 51 |
+
"config": {
|
| 52 |
+
"model": "hf-causal",
|
| 53 |
+
"model_args": "pretrained=rinna/bilingual-gpt-neox-4b,use_fast=False,device_map=auto,torch_dtype=auto",
|
| 54 |
+
"num_fewshot": [
|
| 55 |
+
3,
|
| 56 |
+
3,
|
| 57 |
+
3,
|
| 58 |
+
2,
|
| 59 |
+
1,
|
| 60 |
+
1,
|
| 61 |
+
0,
|
| 62 |
+
5
|
| 63 |
+
],
|
| 64 |
+
"batch_size": null,
|
| 65 |
+
"device": "cuda",
|
| 66 |
+
"no_cache": false,
|
| 67 |
+
"limit": null,
|
| 68 |
+
"bootstrap_iters": 100000,
|
| 69 |
+
"description_dict": {}
|
| 70 |
+
}
|
| 71 |
+
}
|
scripts/yans/eval/lm-evaluation-harness/models/rinna/rinna-bilingual-gpt-neox-4b/result.jsquad-1.2.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"jsquad-1.2-0.2": {
|
| 4 |
+
"exact_match": 51.32823052678973,
|
| 5 |
+
"f1": 61.9390389728309
|
| 6 |
+
}
|
| 7 |
+
},
|
| 8 |
+
"versions": {
|
| 9 |
+
"jsquad-1.2-0.2": 1.2
|
| 10 |
+
},
|
| 11 |
+
"config": {
|
| 12 |
+
"model": "hf-causal",
|
| 13 |
+
"model_args": "pretrained=rinna/bilingual-gpt-neox-4b,use_fast=False,device_map=auto,torch_dtype=auto",
|
| 14 |
+
"num_fewshot": 2,
|
| 15 |
+
"batch_size": null,
|
| 16 |
+
"device": "cuda",
|
| 17 |
+
"no_cache": false,
|
| 18 |
+
"limit": null,
|
| 19 |
+
"bootstrap_iters": 100000,
|
| 20 |
+
"description_dict": {}
|
| 21 |
+
}
|
| 22 |
+
}
|
scripts/yans/eval/lm-evaluation-harness/models/rinna/rinna-japanese-gpt-1b/harness.jsquad-1.2.sh
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MODEL_ARGS="pretrained=rinna/japanese-gpt-1b,use_fast=False"
|
| 2 |
+
TASK="jsquad-1.2-0.2"
|
| 3 |
+
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-1b/result.jsquad-1.2.json"
|
scripts/yans/eval/lm-evaluation-harness/models/rinna/rinna-japanese-gpt-1b/harness.sh
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MODEL_ARGS="pretrained=rinna/japanese-gpt-1b,use_fast=False"
|
| 2 |
+
TASK="jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.1-0.2,jaqket_v2-0.1-0.2,xlsum_ja,xwinograd_ja,mgsm"
|
| 3 |
+
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3,3,3,2,1,1,0,5" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-1b/result.json"
|
scripts/yans/eval/lm-evaluation-harness/models/rinna/rinna-japanese-gpt-1b/result.json
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"jcommonsenseqa-1.1-0.2": {
|
| 4 |
+
"acc": 0.34763181411974975,
|
| 5 |
+
"acc_stderr": 0.014242467674129443,
|
| 6 |
+
"acc_norm": 0.257372654155496,
|
| 7 |
+
"acc_norm_stderr": 0.013075122531072186
|
| 8 |
+
},
|
| 9 |
+
"jnli-1.1-0.2": {
|
| 10 |
+
"acc": 0.37674609695973704,
|
| 11 |
+
"acc_stderr": 0.009823942907406482,
|
| 12 |
+
"acc_norm": 0.3011503697617091,
|
| 13 |
+
"acc_norm_stderr": 0.009300633175085522
|
| 14 |
+
},
|
| 15 |
+
"marc_ja-1.1-0.2": {
|
| 16 |
+
"acc": 0.8786187652598535,
|
| 17 |
+
"acc_stderr": 0.0043130554527802374,
|
| 18 |
+
"acc_norm": 0.8786187652598535,
|
| 19 |
+
"acc_norm_stderr": 0.0043130554527802374
|
| 20 |
+
},
|
| 21 |
+
"xwinograd_ja": {
|
| 22 |
+
"acc": 0.6454640250260688,
|
| 23 |
+
"acc_stderr": 0.015455512877686553
|
| 24 |
+
},
|
| 25 |
+
"jsquad-1.1-0.2": {
|
| 26 |
+
"exact_match": 26.181900045024765,
|
| 27 |
+
"f1": 44.67532835280053
|
| 28 |
+
},
|
| 29 |
+
"jaqket_v2-0.1-0.2": {
|
| 30 |
+
"exact_match": 37.02749140893471,
|
| 31 |
+
"f1": 57.99059569678122
|
| 32 |
+
},
|
| 33 |
+
"xlsum_ja": {
|
| 34 |
+
"rouge2": 5.335027032779865
|
| 35 |
+
},
|
| 36 |
+
"mgsm": {
|
| 37 |
+
"acc": 0.02,
|
| 38 |
+
"acc_stderr": 0.008872139507342681
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
"versions": {
|
| 42 |
+
"jcommonsenseqa-1.1-0.2": 1.1,
|
| 43 |
+
"jnli-1.1-0.2": 1.1,
|
| 44 |
+
"marc_ja-1.1-0.2": 1.1,
|
| 45 |
+
"jsquad-1.1-0.2": 1.1,
|
| 46 |
+
"jaqket_v2-0.1-0.2": 0.1,
|
| 47 |
+
"xlsum_ja": 1.0,
|
| 48 |
+
"xwinograd_ja": 1.0,
|
| 49 |
+
"mgsm": 1.0
|
| 50 |
+
},
|
| 51 |
+
"config": {
|
| 52 |
+
"model": "hf-causal",
|
| 53 |
+
"model_args": "pretrained=rinna/japanese-gpt-1b,use_fast=False",
|
| 54 |
+
"num_fewshot": [
|
| 55 |
+
3,
|
| 56 |
+
3,
|
| 57 |
+
3,
|
| 58 |
+
2,
|
| 59 |
+
1,
|
| 60 |
+
1,
|
| 61 |
+
0,
|
| 62 |
+
5
|
| 63 |
+
],
|
| 64 |
+
"batch_size": null,
|
| 65 |
+
"device": "cuda",
|
| 66 |
+
"no_cache": false,
|
| 67 |
+
"limit": null,
|
| 68 |
+
"bootstrap_iters": 100000,
|
| 69 |
+
"description_dict": {}
|
| 70 |
+
}
|
| 71 |
+
}
|
scripts/yans/eval/lm-evaluation-harness/models/rinna/rinna-japanese-gpt-1b/result.jsquad-1.2.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"jsquad-1.2-0.2": {
|
| 4 |
+
"exact_match": 30.189104007203962,
|
| 5 |
+
"f1": 47.12467642283419
|
| 6 |
+
}
|
| 7 |
+
},
|
| 8 |
+
"versions": {
|
| 9 |
+
"jsquad-1.2-0.2": 1.2
|
| 10 |
+
},
|
| 11 |
+
"config": {
|
| 12 |
+
"model": "hf-causal",
|
| 13 |
+
"model_args": "pretrained=rinna/japanese-gpt-1b,use_fast=False",
|
| 14 |
+
"num_fewshot": 2,
|
| 15 |
+
"batch_size": null,
|
| 16 |
+
"device": "cuda",
|
| 17 |
+
"no_cache": false,
|
| 18 |
+
"limit": null,
|
| 19 |
+
"bootstrap_iters": 100000,
|
| 20 |
+
"description_dict": {}
|
| 21 |
+
}
|
| 22 |
+
}
|
scripts/yans/eval/lm-evaluation-harness/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/harness.jsquad-1.2.sh
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MODEL_ARGS="pretrained=rinna/japanese-gpt-neox-3.6b-instruction-ppo,use_fast=False,device_map=auto,torch_dtype=auto"
|
| 2 |
+
TASK="jsquad-1.2-0.4"
|
| 3 |
+
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/result.jsquad-1.2.json"
|
scripts/yans/eval/lm-evaluation-harness/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/harness.sh
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MODEL_ARGS="pretrained=rinna/japanese-gpt-neox-3.6b-instruction-ppo,use_fast=False,device_map=auto,torch_dtype=auto"
|
| 2 |
+
TASK="jcommonsenseqa-1.1-0.4,jnli-1.1-0.4,marc_ja-1.1-0.4,jsquad-1.1-0.4,jaqket_v2-0.1-0.4,xlsum_ja-1.0-0.4,xwinograd_ja,mgsm-1.0-0.4"
|
| 3 |
+
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3,3,3,2,1,1,0,5" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/result.json"
|
scripts/yans/eval/lm-evaluation-harness/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/result.json
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"jcommonsenseqa-1.1-0.4": {
|
| 4 |
+
"acc": 0.44057193923145666,
|
| 5 |
+
"acc_stderr": 0.014847715520097282,
|
| 6 |
+
"acc_norm": 0.4226988382484361,
|
| 7 |
+
"acc_norm_stderr": 0.014773923335599326
|
| 8 |
+
},
|
| 9 |
+
"jnli-1.1-0.4": {
|
| 10 |
+
"acc": 0.5419063270336894,
|
| 11 |
+
"acc_stderr": 0.01010108912658305,
|
| 12 |
+
"acc_norm": 0.5312243221035333,
|
| 13 |
+
"acc_norm_stderr": 0.01011696986287914
|
| 14 |
+
},
|
| 15 |
+
"marc_ja-1.1-0.4": {
|
| 16 |
+
"acc": 0.8960585978374608,
|
| 17 |
+
"acc_stderr": 0.004030616889059545,
|
| 18 |
+
"acc_norm": 0.8960585978374608,
|
| 19 |
+
"acc_norm_stderr": 0.004030616889059545
|
| 20 |
+
},
|
| 21 |
+
"xwinograd_ja": {
|
| 22 |
+
"acc": 0.6913451511991658,
|
| 23 |
+
"acc_stderr": 0.014924550437257583
|
| 24 |
+
},
|
| 25 |
+
"jsquad-1.1-0.4": {
|
| 26 |
+
"exact_match": 51.62089149031968,
|
| 27 |
+
"f1": 63.676339985467465
|
| 28 |
+
},
|
| 29 |
+
"jaqket_v2-0.1-0.4": {
|
| 30 |
+
"exact_match": 50.945017182130584,
|
| 31 |
+
"f1": 55.79263424624247
|
| 32 |
+
},
|
| 33 |
+
"xlsum_ja-1.0-0.4": {
|
| 34 |
+
"rouge2": 6.633741717885442
|
| 35 |
+
},
|
| 36 |
+
"mgsm-1.0-0.4": {
|
| 37 |
+
"acc": 0.044,
|
| 38 |
+
"acc_stderr": 0.012997373846574957
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
"versions": {
|
| 42 |
+
"jcommonsenseqa-1.1-0.4": 1.1,
|
| 43 |
+
"jnli-1.1-0.4": 1.1,
|
| 44 |
+
"marc_ja-1.1-0.4": 1.1,
|
| 45 |
+
"jsquad-1.1-0.4": 1.1,
|
| 46 |
+
"jaqket_v2-0.1-0.4": 0.1,
|
| 47 |
+
"xlsum_ja-1.0-0.4": 1.0,
|
| 48 |
+
"xwinograd_ja": 1.0,
|
| 49 |
+
"mgsm-1.0-0.4": 1.0
|
| 50 |
+
},
|
| 51 |
+
"config": {
|
| 52 |
+
"model": "hf-causal",
|
| 53 |
+
"model_args": "pretrained=rinna/japanese-gpt-neox-3.6b-instruction-ppo,use_fast=False,device_map=auto,torch_dtype=auto",
|
| 54 |
+
"num_fewshot": [
|
| 55 |
+
3,
|
| 56 |
+
3,
|
| 57 |
+
3,
|
| 58 |
+
2,
|
| 59 |
+
1,
|
| 60 |
+
1,
|
| 61 |
+
0,
|
| 62 |
+
5
|
| 63 |
+
],
|
| 64 |
+
"batch_size": null,
|
| 65 |
+
"device": "cuda",
|
| 66 |
+
"no_cache": false,
|
| 67 |
+
"limit": null,
|
| 68 |
+
"bootstrap_iters": 100000,
|
| 69 |
+
"description_dict": {}
|
| 70 |
+
}
|
| 71 |
+
}
|
scripts/yans/eval/lm-evaluation-harness/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/result.jsquad-1.2.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"jsquad-1.2-0.4": {
|
| 4 |
+
"exact_match": 52.633948671769474,
|
| 5 |
+
"f1": 64.387511749343
|
| 6 |
+
}
|
| 7 |
+
},
|
| 8 |
+
"versions": {
|
| 9 |
+
"jsquad-1.2-0.4": 1.2
|
| 10 |
+
},
|
| 11 |
+
"config": {
|
| 12 |
+
"model": "hf-causal",
|
| 13 |
+
"model_args": "pretrained=rinna/japanese-gpt-neox-3.6b-instruction-ppo,use_fast=False,device_map=auto,torch_dtype=auto",
|
| 14 |
+
"num_fewshot": 2,
|
| 15 |
+
"batch_size": null,
|
| 16 |
+
"device": "cuda",
|
| 17 |
+
"no_cache": false,
|
| 18 |
+
"limit": null,
|
| 19 |
+
"bootstrap_iters": 100000,
|
| 20 |
+
"description_dict": {}
|
| 21 |
+
}
|
| 22 |
+
}
|
scripts/yans/eval/lm-evaluation-harness/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/result.mgsm.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
scripts/yans/eval/lm-evaluation-harness/models/rinna/rinna-japanese-gpt-neox-3.6b/harness.jsquad-1.2.sh
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MODEL_ARGS="pretrained=rinna/japanese-gpt-neox-3.6b,use_fast=False,device_map=auto,torch_dtype=auto"
|
| 2 |
+
TASK="jsquad-1.2-0.2"
|
| 3 |
+
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-neox-3.6b/result.jsquad-1.2.json"
|
scripts/yans/eval/lm-evaluation-harness/models/rinna/rinna-japanese-gpt-neox-3.6b/harness.sh
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MODEL_ARGS="pretrained=rinna/japanese-gpt-neox-3.6b,use_fast=False"
|
| 2 |
+
TASK="jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.1-0.2,jaqket_v2-0.1-0.2,xlsum_ja,xwinograd_ja,mgsm"
|
| 3 |
+
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3,3,3,2,1,1,0,5" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-neox-3.6b/result.json"
|
scripts/yans/eval/lm-evaluation-harness/models/rinna/rinna-japanese-gpt-neox-3.6b/result.json
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"jcommonsenseqa-1.1-0.2": {
|
| 4 |
+
"acc": 0.3163538873994638,
|
| 5 |
+
"acc_stderr": 0.013908534121227658,
|
| 6 |
+
"acc_norm": 0.2725647899910634,
|
| 7 |
+
"acc_norm_stderr": 0.01331714516405031
|
| 8 |
+
},
|
| 9 |
+
"jnli-1.1-0.2": {
|
| 10 |
+
"acc": 0.3442892358258012,
|
| 11 |
+
"acc_stderr": 0.009632673153167076,
|
| 12 |
+
"acc_norm": 0.3311421528348398,
|
| 13 |
+
"acc_norm_stderr": 0.009541202050062205
|
| 14 |
+
},
|
| 15 |
+
"marc_ja-1.1-0.2": {
|
| 16 |
+
"acc": 0.7481688175793513,
|
| 17 |
+
"acc_stderr": 0.005732757658862212,
|
| 18 |
+
"acc_norm": 0.7481688175793513,
|
| 19 |
+
"acc_norm_stderr": 0.005732757658862212
|
| 20 |
+
},
|
| 21 |
+
"xwinograd_ja": {
|
| 22 |
+
"acc": 0.708029197080292,
|
| 23 |
+
"acc_stderr": 0.014689686963716971
|
| 24 |
+
},
|
| 25 |
+
"jsquad-1.1-0.2": {
|
| 26 |
+
"exact_match": 47.90634849167042,
|
| 27 |
+
"f1": 58.804568288439675
|
| 28 |
+
},
|
| 29 |
+
"jaqket_v2-0.1-0.2": {
|
| 30 |
+
"exact_match": 68.38487972508591,
|
| 31 |
+
"f1": 72.4344388906244
|
| 32 |
+
},
|
| 33 |
+
"xlsum_ja": {
|
| 34 |
+
"rouge2": 5.157849646982534
|
| 35 |
+
},
|
| 36 |
+
"mgsm": {
|
| 37 |
+
"acc": 0.012,
|
| 38 |
+
"acc_stderr": 0.006900323023694271
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
"versions": {
|
| 42 |
+
"jcommonsenseqa-1.1-0.2": 1.1,
|
| 43 |
+
"jnli-1.1-0.2": 1.1,
|
| 44 |
+
"marc_ja-1.1-0.2": 1.1,
|
| 45 |
+
"jsquad-1.1-0.2": 1.1,
|
| 46 |
+
"jaqket_v2-0.1-0.2": 0.1,
|
| 47 |
+
"xlsum_ja": 1.0,
|
| 48 |
+
"xwinograd_ja": 1.0,
|
| 49 |
+
"mgsm": 1.0
|
| 50 |
+
},
|
| 51 |
+
"config": {
|
| 52 |
+
"model": "hf-causal",
|
| 53 |
+
"model_args": "pretrained=rinna/japanese-gpt-neox-3.6b,use_fast=False",
|
| 54 |
+
"num_fewshot": [
|
| 55 |
+
3,
|
| 56 |
+
3,
|
| 57 |
+
3,
|
| 58 |
+
2,
|
| 59 |
+
1,
|
| 60 |
+
1,
|
| 61 |
+
0,
|
| 62 |
+
5
|
| 63 |
+
],
|
| 64 |
+
"batch_size": null,
|
| 65 |
+
"device": "cuda",
|
| 66 |
+
"no_cache": false,
|
| 67 |
+
"limit": null,
|
| 68 |
+
"bootstrap_iters": 100000,
|
| 69 |
+
"description_dict": {}
|
| 70 |
+
}
|
| 71 |
+
}
|
scripts/yans/eval/lm-evaluation-harness/models/rinna/rinna-japanese-gpt-neox-3.6b/result.jsquad-1.2.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"jsquad-1.2-0.2": {
|
| 4 |
+
"exact_match": 49.0094552003602,
|
| 5 |
+
"f1": 59.80363888369063
|
| 6 |
+
}
|
| 7 |
+
},
|
| 8 |
+
"versions": {
|
| 9 |
+
"jsquad-1.2-0.2": 1.2
|
| 10 |
+
},
|
| 11 |
+
"config": {
|
| 12 |
+
"model": "hf-causal",
|
| 13 |
+
"model_args": "pretrained=rinna/japanese-gpt-neox-3.6b,use_fast=False,device_map=auto,torch_dtype=auto",
|
| 14 |
+
"num_fewshot": 2,
|
| 15 |
+
"batch_size": null,
|
| 16 |
+
"device": "cuda",
|
| 17 |
+
"no_cache": false,
|
| 18 |
+
"limit": null,
|
| 19 |
+
"bootstrap_iters": 100000,
|
| 20 |
+
"description_dict": {}
|
| 21 |
+
}
|
| 22 |
+
}
|
scripts/yans/eval/lm-evaluation-harness/models/rinna/rinna-japanese-gpt-neox-3.6b/result.mgsm.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
scripts/yans/eval/lm-evaluation-harness/models/stabilityai/experiments/stablelm-jp-1b-compact-v1/harness.sh
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
set -eu
|
| 3 |
+
PROJECT_DIR="/fsx/proj-jp-stablegpt"
|
| 4 |
+
MODEL_ARGS="pretrained=${PROJECT_DIR}/hf_model/1b-compact-v1,tokenizer=${PROJECT_DIR}/tokenizers/compact-hf/,use_fast=False"
|
| 5 |
+
TASK="jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.1-0.2,jaqket_v2-0.1-0.2,xlsum_ja,xwinograd_ja,mgsm"
|
| 6 |
+
NUM_FEW_SHOTS="3,3,3,2,1,1,0,5"
|
| 7 |
+
python main.py \
|
| 8 |
+
--model hf-causal \
|
| 9 |
+
--model_args $MODEL_ARGS \
|
| 10 |
+
--tasks $TASK \
|
| 11 |
+
--num_fewshot $NUM_FEW_SHOTS \
|
| 12 |
+
--device "cuda" \
|
| 13 |
+
--output_path "models/stablelm/stablelm-jp-1b-compact-v1/result.json"
|
scripts/yans/eval/lm-evaluation-harness/models/stabilityai/experiments/stablelm-jp-1b-compact-v1/result.json
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"jcommonsenseqa-1.1-0.2": {
|
| 4 |
+
"acc": 0.4709562109025916,
|
| 5 |
+
"acc_stderr": 0.014928465632785326,
|
| 6 |
+
"acc_norm": 0.3485254691689008,
|
| 7 |
+
"acc_norm_stderr": 0.014250991444953297
|
| 8 |
+
},
|
| 9 |
+
"jnli-1.1-0.2": {
|
| 10 |
+
"acc": 0.4449465899753492,
|
| 11 |
+
"acc_stderr": 0.010075121089036965,
|
| 12 |
+
"acc_norm": 0.4026294165981923,
|
| 13 |
+
"acc_norm_stderr": 0.009942683448992417
|
| 14 |
+
},
|
| 15 |
+
"marc_ja-1.1-0.2": {
|
| 16 |
+
"acc": 0.757063132193931,
|
| 17 |
+
"acc_stderr": 0.005663981049607239,
|
| 18 |
+
"acc_norm": 0.757063132193931,
|
| 19 |
+
"acc_norm_stderr": 0.005663981049607239
|
| 20 |
+
},
|
| 21 |
+
"xwinograd_ja": {
|
| 22 |
+
"acc": 0.6339937434827946,
|
| 23 |
+
"acc_stderr": 0.015563382319228687
|
| 24 |
+
},
|
| 25 |
+
"jsquad-1.1-0.2": {
|
| 26 |
+
"exact_match": 29.536244934714095,
|
| 27 |
+
"f1": 39.00936796569676
|
| 28 |
+
},
|
| 29 |
+
"jaqket_v2-0.1-0.2": {
|
| 30 |
+
"exact_match": 33.24742268041237,
|
| 31 |
+
"f1": 38.13348879070528
|
| 32 |
+
},
|
| 33 |
+
"xlsum_ja": {
|
| 34 |
+
"rouge2": 4.3964148234614
|
| 35 |
+
},
|
| 36 |
+
"mgsm": {
|
| 37 |
+
"acc": 0.012,
|
| 38 |
+
"acc_stderr": 0.0069003230236942764
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
"versions": {
|
| 42 |
+
"jcommonsenseqa-1.1-0.2": 1.1,
|
| 43 |
+
"jnli-1.1-0.2": 1.1,
|
| 44 |
+
"marc_ja-1.1-0.2": 1.1,
|
| 45 |
+
"jsquad-1.1-0.2": 1.1,
|
| 46 |
+
"jaqket_v2-0.1-0.2": 0.1,
|
| 47 |
+
"xlsum_ja": 1.0,
|
| 48 |
+
"xwinograd_ja": 1.0,
|
| 49 |
+
"mgsm": 1.0
|
| 50 |
+
},
|
| 51 |
+
"config": {
|
| 52 |
+
"model": "hf-causal",
|
| 53 |
+
"model_args": "pretrained=/fsx/proj-jp-stablegpt/hf_model/1b-compact-v1,tokenizer=/fsx/proj-jp-stablegpt/tokenizers/compact-hf/,use_fast=False",
|
| 54 |
+
"num_fewshot": [
|
| 55 |
+
3,
|
| 56 |
+
3,
|
| 57 |
+
3,
|
| 58 |
+
2,
|
| 59 |
+
1,
|
| 60 |
+
1,
|
| 61 |
+
0,
|
| 62 |
+
5
|
| 63 |
+
],
|
| 64 |
+
"batch_size": null,
|
| 65 |
+
"device": "cuda",
|
| 66 |
+
"no_cache": false,
|
| 67 |
+
"limit": null,
|
| 68 |
+
"bootstrap_iters": 100000,
|
| 69 |
+
"description_dict": {}
|
| 70 |
+
}
|
| 71 |
+
}
|
scripts/yans/eval/lm-evaluation-harness/models/stabilityai/experiments/stablelm-jp-1b-jav1-sl2k-300b/harness.sh
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
set -eu
|
| 3 |
+
PROJECT_DIR=""
|
| 4 |
+
MODEL_ARGS="pretrained=${PROJECT_DIR}/hf_model/1b-jav1-sl2k-300b,tokenizer=${PROJECT_DIR}/tokenizers/nai-hf-tokenizer/,use_fast=False"
|
| 5 |
+
TASK="jsquad-1.1-0.2,jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2"
|
| 6 |
+
NUM_FEW_SHOTS="2,3,3,3"
|
| 7 |
+
python main.py \
|
| 8 |
+
--model hf-causal \
|
| 9 |
+
--model_args $MODEL_ARGS \
|
| 10 |
+
--tasks $TASK \
|
| 11 |
+
--num_fewshot $NUM_FEW_SHOTS \
|
| 12 |
+
--device "cuda" \
|
| 13 |
+
--output_path "models/stablelm-jp-1b-jav1-sl2k-300b/result.json"
|
scripts/yans/eval/lm-evaluation-harness/models/stabilityai/experiments/stablelm-jp-1b-jav1-sl2k-300b/result.json
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"jcommonsenseqa-1.1-0.2": {
|
| 4 |
+
"acc": 0.2555853440571939,
|
| 5 |
+
"acc_stderr": 0.013045313758426092,
|
| 6 |
+
"acc_norm": 0.23056300268096513,
|
| 7 |
+
"acc_norm_stderr": 0.012596805983976347
|
| 8 |
+
},
|
| 9 |
+
"jnli-1.1-0.2": {
|
| 10 |
+
"acc": 0.37880032867707475,
|
| 11 |
+
"acc_stderr": 0.009834442099385492,
|
| 12 |
+
"acc_norm": 0.3648315529991783,
|
| 13 |
+
"acc_norm_stderr": 0.009759320919777338
|
| 14 |
+
},
|
| 15 |
+
"marc_ja-1.1-0.2": {
|
| 16 |
+
"acc": 0.4899186416696144,
|
| 17 |
+
"acc_stderr": 0.006648783869548029,
|
| 18 |
+
"acc_norm": 0.4899186416696144,
|
| 19 |
+
"acc_norm_stderr": 0.006648783869548029
|
| 20 |
+
},
|
| 21 |
+
"jsquad-1.1-0.2": {
|
| 22 |
+
"exact_match": 34.5114813147231,
|
| 23 |
+
"f1": 44.58786913290027
|
| 24 |
+
}
|
| 25 |
+
},
|
| 26 |
+
"versions": {
|
| 27 |
+
"jcommonsenseqa-1.1-0.2": 1.1,
|
| 28 |
+
"jnli-1.1-0.2": 1.1,
|
| 29 |
+
"jsquad-1.1-0.2": 1.1,
|
| 30 |
+
"marc_ja-1.1-0.2": 1.1
|
| 31 |
+
},
|
| 32 |
+
"config": {
|
| 33 |
+
"model": "hf-causal",
|
| 34 |
+
"model_args": "pretrained=${PROJECT_DIR}/hf_model/1b-jav1-sl2k-300b,tokenizer=${PROJECT_DIR}/tokenizers/nai-hf-tokenizer/,use_fast=False",
|
| 35 |
+
"num_fewshot": [
|
| 36 |
+
2,
|
| 37 |
+
3,
|
| 38 |
+
3,
|
| 39 |
+
3
|
| 40 |
+
],
|
| 41 |
+
"batch_size": null,
|
| 42 |
+
"device": "cuda",
|
| 43 |
+
"no_cache": false,
|
| 44 |
+
"limit": null,
|
| 45 |
+
"bootstrap_iters": 100000,
|
| 46 |
+
"description_dict": {}
|
| 47 |
+
}
|
| 48 |
+
}
|
scripts/yans/eval/lm-evaluation-harness/models/stabilityai/experiments/stablelm-jp-1b-jav1-sl2k-slw-300b/harness.sh
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
set -eu
|
| 3 |
+
PROJECT_DIR=""
|
| 4 |
+
MODEL_ARGS="pretrained=${PROJECT_DIR}/hf_model/1b-jav1-sl2k-slw-300b,tokenizer=${PROJECT_DIR}/tokenizers/nai-hf-tokenizer/,use_fast=False"
|
| 5 |
+
TASK="jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.1-0.2,jaqket_v2-0.1-0.2,xlsum_ja,xwinograd_ja,mgsm"
|
| 6 |
+
NUM_FEW_SHOTS="3,3,3,2,1,1,0,5"
|
| 7 |
+
python main.py \
|
| 8 |
+
--model hf-causal \
|
| 9 |
+
--model_args $MODEL_ARGS \
|
| 10 |
+
--tasks $TASK \
|
| 11 |
+
--num_fewshot $NUM_FEW_SHOTS \
|
| 12 |
+
--device "cuda" \
|
| 13 |
+
--output_path "models/stablelm/stablelm-jp-1b-jav1-sl2k-slw-300b/result.json"
|
scripts/yans/eval/lm-evaluation-harness/models/stabilityai/experiments/stablelm-jp-1b-jav1-sl2k-slw-300b/result.json
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"jcommonsenseqa-1.1-0.2": {
|
| 4 |
+
"acc": 0.28596961572832885,
|
| 5 |
+
"acc_stderr": 0.013514419338665247,
|
| 6 |
+
"acc_norm": 0.2421805183199285,
|
| 7 |
+
"acc_norm_stderr": 0.012812432289317909
|
| 8 |
+
},
|
| 9 |
+
"jnli-1.1-0.2": {
|
| 10 |
+
"acc": 0.3751027115858669,
|
| 11 |
+
"acc_stderr": 0.009815408241248628,
|
| 12 |
+
"acc_norm": 0.34880854560394414,
|
| 13 |
+
"acc_norm_stderr": 0.009662218404461801
|
| 14 |
+
},
|
| 15 |
+
"marc_ja-1.1-0.2": {
|
| 16 |
+
"acc": 0.7518311824206487,
|
| 17 |
+
"acc_stderr": 0.00570483124396955,
|
| 18 |
+
"acc_norm": 0.7518311824206487,
|
| 19 |
+
"acc_norm_stderr": 0.00570483124396955
|
| 20 |
+
},
|
| 21 |
+
"xwinograd_ja": {
|
| 22 |
+
"acc": 0.6527632950990615,
|
| 23 |
+
"acc_stderr": 0.015381826969142634
|
| 24 |
+
},
|
| 25 |
+
"jsquad-1.1-0.2": {
|
| 26 |
+
"exact_match": 42.390814948221525,
|
| 27 |
+
"f1": 52.94897262881226
|
| 28 |
+
},
|
| 29 |
+
"jaqket_v2-0.1-0.2": {
|
| 30 |
+
"exact_match": 42.439862542955325,
|
| 31 |
+
"f1": 48.18551246386296
|
| 32 |
+
},
|
| 33 |
+
"xlsum_ja": {
|
| 34 |
+
"rouge2": 8.371640364702019
|
| 35 |
+
},
|
| 36 |
+
"mgsm": {
|
| 37 |
+
"acc": 0.008,
|
| 38 |
+
"acc_stderr": 0.00564548367669017
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
"versions": {
|
| 42 |
+
"jcommonsenseqa-1.1-0.2": 1.1,
|
| 43 |
+
"jnli-1.1-0.2": 1.1,
|
| 44 |
+
"marc_ja-1.1-0.2": 1.1,
|
| 45 |
+
"jsquad-1.1-0.2": 1.1,
|
| 46 |
+
"jaqket_v2-0.1-0.2": 0.1,
|
| 47 |
+
"xlsum_ja": 1.0,
|
| 48 |
+
"xwinograd_ja": 1.0,
|
| 49 |
+
"mgsm": 1.0
|
| 50 |
+
},
|
| 51 |
+
"config": {
|
| 52 |
+
"model": "hf-causal",
|
| 53 |
+
"model_args": "pretrained=${PROJECT_DIR}/hf_model/1b-jav1-sl2k-slw-300b,tokenizer=${PROJECT_DIR}/tokenizers/nai-hf-tokenizer/,use_fast=False",
|
| 54 |
+
"num_fewshot": [
|
| 55 |
+
3,
|
| 56 |
+
3,
|
| 57 |
+
3,
|
| 58 |
+
2,
|
| 59 |
+
1,
|
| 60 |
+
1,
|
| 61 |
+
0,
|
| 62 |
+
5
|
| 63 |
+
],
|
| 64 |
+
"batch_size": null,
|
| 65 |
+
"device": "cuda",
|
| 66 |
+
"no_cache": false,
|
| 67 |
+
"limit": null,
|
| 68 |
+
"bootstrap_iters": 100000,
|
| 69 |
+
"description_dict": {}
|
| 70 |
+
}
|
| 71 |
+
}
|
scripts/yans/eval/lm-evaluation-harness/models/stabilityai/experiments/stablelm-jp-1b-jav1_rp-sl2k-slw-300b/harness.sh
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
set -eu
|
| 3 |
+
|
| 4 |
+
if [ -z ${JP_LLM_PATH+x} ]; then
|
| 5 |
+
echo "Error: The JP_LLM_PATH environment variable is not set"
|
| 6 |
+
exit 1
|
| 7 |
+
fi
|
| 8 |
+
|
| 9 |
+
MODEL_ARGS="pretrained=$JP_LLM_PATH/hf_model/1b-jav1_rp-sl2k-slw,tokenizer=$JP_LLM_PATH/tokenizers/nai-hf-tokenizer/,use_fast=False"
|
| 10 |
+
TASK="jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.1-0.2,jaqket_v2-0.1-0.2,xlsum_ja,xwinograd_ja,mgsm"
|
| 11 |
+
NUM_FEW_SHOTS="3,3,3,2,1,1,0,5"
|
| 12 |
+
python main.py \
|
| 13 |
+
--model hf-causal \
|
| 14 |
+
--model_args $MODEL_ARGS \
|
| 15 |
+
--tasks $TASK \
|
| 16 |
+
--num_fewshot $NUM_FEW_SHOTS \
|
| 17 |
+
--device "cuda" \
|
| 18 |
+
--output_path "models/stablelm/stablelm-jp-1b-jav1_rp-sl2k-slw-300b/result.json"
|
scripts/yans/eval/lm-evaluation-harness/models/stabilityai/experiments/stablelm-jp-1b-jav1_rp-sl2k-slw-300b/result.json
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"jcommonsenseqa-1.1-0.2": {
|
| 4 |
+
"acc": 0.34137622877569257,
|
| 5 |
+
"acc_stderr": 0.014181247513525478,
|
| 6 |
+
"acc_norm": 0.2645218945487042,
|
| 7 |
+
"acc_norm_stderr": 0.013191518316844342
|
| 8 |
+
},
|
| 9 |
+
"jnli-1.1-0.2": {
|
| 10 |
+
"acc": 0.3373048479868529,
|
| 11 |
+
"acc_stderr": 0.00958511072017679,
|
| 12 |
+
"acc_norm": 0.3360723089564503,
|
| 13 |
+
"acc_norm_stderr": 0.009576475494957559
|
| 14 |
+
},
|
| 15 |
+
"marc_ja-1.1-0.2": {
|
| 16 |
+
"acc": 0.6860830136030694,
|
| 17 |
+
"acc_stderr": 0.006129213801621414,
|
| 18 |
+
"acc_norm": 0.6860830136030694,
|
| 19 |
+
"acc_norm_stderr": 0.006129213801621414
|
| 20 |
+
},
|
| 21 |
+
"xwinograd_ja": {
|
| 22 |
+
"acc": 0.6016684045881127,
|
| 23 |
+
"acc_stderr": 0.015816785549652837
|
| 24 |
+
},
|
| 25 |
+
"jsquad-1.1-0.2": {
|
| 26 |
+
"exact_match": 24.53849617289509,
|
| 27 |
+
"f1": 33.53058791900235
|
| 28 |
+
},
|
| 29 |
+
"jaqket_v2-0.1-0.2": {
|
| 30 |
+
"exact_match": 23.969072164948454,
|
| 31 |
+
"f1": 27.900030000545463
|
| 32 |
+
},
|
| 33 |
+
"xlsum_ja": {
|
| 34 |
+
"rouge2": 7.9292934294551545
|
| 35 |
+
},
|
| 36 |
+
"mgsm": {
|
| 37 |
+
"acc": 0.016,
|
| 38 |
+
"acc_stderr": 0.007951661188874313
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
"versions": {
|
| 42 |
+
"jcommonsenseqa-1.1-0.2": 1.1,
|
| 43 |
+
"jnli-1.1-0.2": 1.1,
|
| 44 |
+
"marc_ja-1.1-0.2": 1.1,
|
| 45 |
+
"jsquad-1.1-0.2": 1.1,
|
| 46 |
+
"jaqket_v2-0.1-0.2": 0.1,
|
| 47 |
+
"xlsum_ja": 1.0,
|
| 48 |
+
"xwinograd_ja": 1.0,
|
| 49 |
+
"mgsm": 1.0
|
| 50 |
+
},
|
| 51 |
+
"config": {
|
| 52 |
+
"model": "hf-causal",
|
| 53 |
+
"model_args": "pretrained=/fsx/proj-jp-stablegpt/hf_model/1b-jav1_rp-sl2k-slw,tokenizer=/fsx/proj-jp-stablegpt/tokenizers/nai-hf-tokenizer/,use_fast=False",
|
| 54 |
+
"num_fewshot": [
|
| 55 |
+
3,
|
| 56 |
+
3,
|
| 57 |
+
3,
|
| 58 |
+
2,
|
| 59 |
+
1,
|
| 60 |
+
1,
|
| 61 |
+
0,
|
| 62 |
+
5
|
| 63 |
+
],
|
| 64 |
+
"batch_size": null,
|
| 65 |
+
"device": "cuda",
|
| 66 |
+
"no_cache": false,
|
| 67 |
+
"limit": null,
|
| 68 |
+
"bootstrap_iters": 100000,
|
| 69 |
+
"description_dict": {}
|
| 70 |
+
}
|
| 71 |
+
}
|
scripts/yans/eval/lm-evaluation-harness/models/stabilityai/experiments/stablelm-jp-1b-rp_then_jav1-294b/harness.sh
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
set -eu
|
| 3 |
+
|
| 4 |
+
if [ -z ${JP_LLM_PATH+x} ]; then
|
| 5 |
+
echo "Error: The JP_LLM_PATH environment variable is not set"
|
| 6 |
+
exit 1
|
| 7 |
+
fi
|
| 8 |
+
|
| 9 |
+
MODEL_ARGS="pretrained=$JP_LLM_PATH/hf_model/1b-rp_then_jav1-294b,tokenizer=$JP_LLM_PATH/tokenizers/nai-hf-tokenizer/,use_fast=False"
|
| 10 |
+
TASK="jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.1-0.2,jaqket_v2-0.1-0.2,xlsum_ja,xwinograd_ja,mgsm"
|
| 11 |
+
NUM_FEW_SHOTS="3,3,3,2,1,1,0,5"
|
| 12 |
+
python main.py \
|
| 13 |
+
--model hf-causal \
|
| 14 |
+
--model_args $MODEL_ARGS \
|
| 15 |
+
--tasks $TASK \
|
| 16 |
+
--num_fewshot $NUM_FEW_SHOTS \
|
| 17 |
+
--device "cuda" \
|
| 18 |
+
--output_path "models/stablelm/stablelm-jp-1b-rp_then_jav1-294b/result.json"
|
scripts/yans/eval/lm-evaluation-harness/models/stabilityai/experiments/stablelm-jp-1b-rp_then_jav1-294b/result.json
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"jcommonsenseqa-1.1-0.2": {
|
| 4 |
+
"acc": 0.2680965147453083,
|
| 5 |
+
"acc_stderr": 0.013248038756079302,
|
| 6 |
+
"acc_norm": 0.24039320822162646,
|
| 7 |
+
"acc_norm_stderr": 0.012780110667692907
|
| 8 |
+
},
|
| 9 |
+
"jnli-1.1-0.2": {
|
| 10 |
+
"acc": 0.3278553820870994,
|
| 11 |
+
"acc_stderr": 0.009517030628219573,
|
| 12 |
+
"acc_norm": 0.31183237469186526,
|
| 13 |
+
"acc_norm_stderr": 0.009391536814742456
|
| 14 |
+
},
|
| 15 |
+
"marc_ja-1.1-0.2": {
|
| 16 |
+
"acc": 0.7771189396581792,
|
| 17 |
+
"acc_stderr": 0.005496539565709208,
|
| 18 |
+
"acc_norm": 0.7771189396581792,
|
| 19 |
+
"acc_norm_stderr": 0.005496539565709208
|
| 20 |
+
},
|
| 21 |
+
"xwinograd_ja": {
|
| 22 |
+
"acc": 0.6861313868613139,
|
| 23 |
+
"acc_stderr": 0.01499321721472398
|
| 24 |
+
},
|
| 25 |
+
"jsquad-1.1-0.2": {
|
| 26 |
+
"exact_match": 54.02971634398919,
|
| 27 |
+
"f1": 64.2854711987419
|
| 28 |
+
},
|
| 29 |
+
"jaqket_v2-0.1-0.2": {
|
| 30 |
+
"exact_match": 59.450171821305844,
|
| 31 |
+
"f1": 65.37892424490362
|
| 32 |
+
},
|
| 33 |
+
"xlsum_ja": {
|
| 34 |
+
"rouge2": 9.662662093427816
|
| 35 |
+
},
|
| 36 |
+
"mgsm": {
|
| 37 |
+
"acc": 0.008,
|
| 38 |
+
"acc_stderr": 0.0056454836766901585
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
"versions": {
|
| 42 |
+
"jcommonsenseqa-1.1-0.2": 1.1,
|
| 43 |
+
"jnli-1.1-0.2": 1.1,
|
| 44 |
+
"marc_ja-1.1-0.2": 1.1,
|
| 45 |
+
"jsquad-1.1-0.2": 1.1,
|
| 46 |
+
"jaqket_v2-0.1-0.2": 0.1,
|
| 47 |
+
"xlsum_ja": 1.0,
|
| 48 |
+
"xwinograd_ja": 1.0,
|
| 49 |
+
"mgsm": 1.0
|
| 50 |
+
},
|
| 51 |
+
"config": {
|
| 52 |
+
"model": "hf-causal",
|
| 53 |
+
"model_args": "pretrained=${PROJECT_DIR}/hf_model/1b-rp_then_jav1-294b,tokenizer=${PROJECT_DIR}/tokenizers/nai-hf-tokenizer/,use_fast=False",
|
| 54 |
+
"num_fewshot": [
|
| 55 |
+
3,
|
| 56 |
+
3,
|
| 57 |
+
3,
|
| 58 |
+
2,
|
| 59 |
+
1,
|
| 60 |
+
1,
|
| 61 |
+
0,
|
| 62 |
+
5
|
| 63 |
+
],
|
| 64 |
+
"batch_size": null,
|
| 65 |
+
"device": "cuda",
|
| 66 |
+
"no_cache": false,
|
| 67 |
+
"limit": null,
|
| 68 |
+
"bootstrap_iters": 100000,
|
| 69 |
+
"description_dict": {}
|
| 70 |
+
}
|
| 71 |
+
}
|
scripts/yans/eval/lm-evaluation-harness/models/stabilityai/experiments/stablelm-jp-3b-ja50_rp50-700b/harness_template-0.1.sh
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
PROJECT_DIR=""
|
| 3 |
+
MODEL_ARGS="pretrained=${PROJECT_DIR}/hf_model/3b-ja50_rp50-700b,tokenizer=${PROJECT_DIR}/tokenizers/nai-hf-tokenizer/,use_fast=False"
|
| 4 |
+
TASK="jcommonsenseqa-1.1-0.1,jnli,marc_ja,jsquad-1.1-0.1,jaqket_v2-0.1-0.1,xlsum_ja,xwinograd_ja,mgsm"
|
| 5 |
+
NUM_FEW_SHOTS="3,3,3,2,1,1,0,5"
|
| 6 |
+
python main.py \
|
| 7 |
+
--model hf-causal \
|
| 8 |
+
--model_args $MODEL_ARGS \
|
| 9 |
+
--tasks $TASK \
|
| 10 |
+
--num_fewshot $NUM_FEW_SHOTS \
|
| 11 |
+
--device "cuda" \
|
| 12 |
+
--output_path "models/stablelm/stablelm-jp-3b-ja50_rp50-700b/result_template-0.1.json"
|
scripts/yans/eval/lm-evaluation-harness/models/stabilityai/experiments/stablelm-jp-3b-ja50_rp50-700b/harness_template-0.2.sh
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
PROJECT_DIR=""
|
| 3 |
+
MODEL_ARGS="pretrained=${PROJECT_DIR}/hf_model/3b-ja50_rp50-700b,tokenizer=${PROJECT_DIR}/tokenizers/nai-hf-tokenizer/,use_fast=False"
|
| 4 |
+
TASK="jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.1-0.2,jaqket_v2-0.1-0.2,xlsum_ja,xwinograd_ja,mgsm"
|
| 5 |
+
NUM_FEW_SHOTS="3,3,3,2,1,1,0,5"
|
| 6 |
+
python main.py \
|
| 7 |
+
--model hf-causal \
|
| 8 |
+
--model_args $MODEL_ARGS \
|
| 9 |
+
--tasks $TASK \
|
| 10 |
+
--num_fewshot $NUM_FEW_SHOTS \
|
| 11 |
+
--device "cuda" \
|
| 12 |
+
--output_path "models/stablelm/stablelm-jp-3b-ja50_rp50-700b/result_template-0.2.json"
|
scripts/yans/eval/lm-evaluation-harness/models/stabilityai/experiments/stablelm-jp-3b-ja50_rp50-700b/result_template-0.1.json
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"jcommonsenseqa-1.1-0.1": {
|
| 4 |
+
"acc": 0.4280607685433423,
|
| 5 |
+
"acc_stderr": 0.014798127177394432,
|
| 6 |
+
"acc_norm": 0.40214477211796246,
|
| 7 |
+
"acc_norm_stderr": 0.014664536048234705
|
| 8 |
+
},
|
| 9 |
+
"jnli": {
|
| 10 |
+
"acc": 0.36442070665571075,
|
| 11 |
+
"acc_stderr": 0.009756978284439256,
|
| 12 |
+
"acc_norm": 0.3245686113393591,
|
| 13 |
+
"acc_norm_stderr": 0.00949232990976085
|
| 14 |
+
},
|
| 15 |
+
"marc_ja": {
|
| 16 |
+
"acc": 0.7539239623299616,
|
| 17 |
+
"acc_stderr": 0.005688627090173545,
|
| 18 |
+
"acc_norm": 0.7539239623299616,
|
| 19 |
+
"acc_norm_stderr": 0.005688627090173545
|
| 20 |
+
},
|
| 21 |
+
"xwinograd_ja": {
|
| 22 |
+
"acc": 0.6819603753910324,
|
| 23 |
+
"acc_stderr": 0.015046567305192259
|
| 24 |
+
},
|
| 25 |
+
"jsquad-1.1-0.1": {
|
| 26 |
+
"exact_match": 57.29401170643854,
|
| 27 |
+
"f1": 66.44109170808048
|
| 28 |
+
},
|
| 29 |
+
"jaqket_v2-0.1-0.1": {
|
| 30 |
+
"exact_match": 52.40549828178694,
|
| 31 |
+
"f1": 58.039235010884475
|
| 32 |
+
},
|
| 33 |
+
"xlsum_ja": {
|
| 34 |
+
"rouge2": 8.644546504860047
|
| 35 |
+
},
|
| 36 |
+
"mgsm": {
|
| 37 |
+
"acc": 0.016,
|
| 38 |
+
"acc_stderr": 0.00795166118887434
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
"versions": {
|
| 42 |
+
"jcommonsenseqa-1.1-0.1": 1.1,
|
| 43 |
+
"jnli": 1.1,
|
| 44 |
+
"marc_ja": 1.1,
|
| 45 |
+
"jsquad-1.1-0.1": 1.1,
|
| 46 |
+
"jaqket_v2-0.1-0.1": 0.1,
|
| 47 |
+
"xlsum_ja": 1.0,
|
| 48 |
+
"xwinograd_ja": 1.0,
|
| 49 |
+
"mgsm": 1.0
|
| 50 |
+
},
|
| 51 |
+
"config": {
|
| 52 |
+
"model": "hf-causal",
|
| 53 |
+
"model_args": "pretrained=/PROJECT_DIR/hf_model/3b-ja50_rp50-700b,tokenizer=/PROJECT_DIR/tokenizers/nai-hf-tokenizer/,use_fast=False",
|
| 54 |
+
"num_fewshot": [
|
| 55 |
+
3,
|
| 56 |
+
3,
|
| 57 |
+
3,
|
| 58 |
+
2,
|
| 59 |
+
1,
|
| 60 |
+
1,
|
| 61 |
+
0,
|
| 62 |
+
5
|
| 63 |
+
],
|
| 64 |
+
"batch_size": null,
|
| 65 |
+
"device": "cuda",
|
| 66 |
+
"no_cache": true,
|
| 67 |
+
"limit": null,
|
| 68 |
+
"bootstrap_iters": 100000,
|
| 69 |
+
"description_dict": {}
|
| 70 |
+
}
|
| 71 |
+
}
|