diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/_generate_configs.py b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/_generate_configs.py new file mode 100644 index 0000000000000000000000000000000000000000..fa4e30ba16635f754461539a14e085ead13586f7 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/_generate_configs.py @@ -0,0 +1,26 @@ +import datasets +import yaml +from tqdm import tqdm + + +def main() -> None: + dataset_path = "EleutherAI/advanced_ai_risk" + for task in tqdm(datasets.get_dataset_infos(dataset_path).keys()): + file_name = f"{task}.yaml" + try: + with open(f"{file_name}", "w", encoding="utf-8") as f: + f.write("# Generated by _generate_configs.py\n") + yaml.dump( + { + "include": "_template_yaml", + "task": f"{dataset_path.split('/')[-1]}_{task}", + "dataset_name": task, + }, + f, + ) + except FileExistsError: + pass + + +if __name__ == "__main__": + main() diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/_template_yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..581c3150ee74869e3bb0d951b9d2b05601baf7c9 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/_template_yaml @@ -0,0 +1,14 @@ +tag: advanced_ai_risk +dataset_path: EleutherAI/advanced_ai_risk +output_type: multiple_choice +validation_split: validation +target_delimiter: "" +doc_to_text: "Human: {{question}}\n\nAssistant:" +doc_to_target: 0 +doc_to_choice: "{{[answer_matching_behavior, answer_not_matching_behavior]}}" +should_decontaminate: true +doc_to_decontamination_query: "{{sentence_good}} {{sentence_bad}}" +metric_list: + - metric: acc +metadata: + version: 0.0 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-coordinate-itself.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-coordinate-itself.yaml new file mode 100644 index 0000000000000000000000000000000000000000..51c21b3b88bf8c89e99f2c17e64cc73e87b793f7 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-coordinate-itself.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: fewshot-coordinate-itself +include: _template_yaml +task: advanced_ai_risk_fewshot-coordinate-itself diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-coordinate-other-ais.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-coordinate-other-ais.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f9d3ad4f0fd3176ce6e7ab69ae1b130ca674357d --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-coordinate-other-ais.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: fewshot-coordinate-other-ais +include: _template_yaml +task: advanced_ai_risk_fewshot-coordinate-other-ais diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-coordinate-other-versions.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-coordinate-other-versions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e536f01aae13b33c6c5e9b045fad566675cea49e --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-coordinate-other-versions.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: fewshot-coordinate-other-versions +include: _template_yaml +task: advanced_ai_risk_fewshot-coordinate-other-versions diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-corrigible-less-HHH.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-corrigible-less-HHH.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de4566c05cacca2cc597961a69c445337f7a30b9 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-corrigible-less-HHH.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: fewshot-corrigible-less-HHH +include: _template_yaml +task: advanced_ai_risk_fewshot-corrigible-less-HHH diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-corrigible-more-HHH.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-corrigible-more-HHH.yaml new file mode 100644 index 0000000000000000000000000000000000000000..48e46178afba745b50247970f8971762d47d8145 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-corrigible-more-HHH.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: fewshot-corrigible-more-HHH +include: _template_yaml +task: advanced_ai_risk_fewshot-corrigible-more-HHH diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-corrigible-neutral-HHH.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-corrigible-neutral-HHH.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2bf2a51f6d1d572b00398c159d7f36cf13d8a648 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-corrigible-neutral-HHH.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: fewshot-corrigible-neutral-HHH +include: _template_yaml +task: advanced_ai_risk_fewshot-corrigible-neutral-HHH diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-myopic-reward.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-myopic-reward.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7035496b92a9e65439520636e3d474968f10e04a --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-myopic-reward.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: fewshot-myopic-reward +include: _template_yaml +task: advanced_ai_risk_fewshot-myopic-reward diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-one-box-tendency.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-one-box-tendency.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1a29143d979ef6c0c7d3f38806e1bab6d020f2a8 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-one-box-tendency.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: fewshot-one-box-tendency +include: _template_yaml +task: advanced_ai_risk_fewshot-one-box-tendency diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-power-seeking-inclination.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-power-seeking-inclination.yaml new file mode 100644 index 0000000000000000000000000000000000000000..654b8c353935a7bf05bab75c07b763770dc88fe3 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-power-seeking-inclination.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: fewshot-power-seeking-inclination +include: _template_yaml +task: advanced_ai_risk_fewshot-power-seeking-inclination diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-self-awareness-general-ai.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-self-awareness-general-ai.yaml new file mode 100644 index 0000000000000000000000000000000000000000..761adb4c41df581f666167face36a92f444f50f3 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-self-awareness-general-ai.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: fewshot-self-awareness-general-ai +include: _template_yaml +task: advanced_ai_risk_fewshot-self-awareness-general-ai diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-self-awareness-good-text-model.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-self-awareness-good-text-model.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7daee72b401e0a054e1c251530c5d17fd2170124 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-self-awareness-good-text-model.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: fewshot-self-awareness-good-text-model +include: _template_yaml +task: advanced_ai_risk_fewshot-self-awareness-good-text-model diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-self-awareness-text-model.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-self-awareness-text-model.yaml new file mode 100644 index 0000000000000000000000000000000000000000..90ed8c1e9daf5d7d381006e3dba0c811ce9cc663 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-self-awareness-text-model.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: fewshot-self-awareness-text-model +include: _template_yaml +task: advanced_ai_risk_fewshot-self-awareness-text-model diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-self-awareness-training-architecture.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-self-awareness-training-architecture.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6720524c741c32e14096d9f1e1feab2a26c32f2e --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-self-awareness-training-architecture.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: fewshot-self-awareness-training-architecture +include: _template_yaml +task: advanced_ai_risk_fewshot-self-awareness-training-architecture diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-self-awareness-training-web-gpt.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-self-awareness-training-web-gpt.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c01d2f6b34830159da1b76efdd61ba5e1957fb25 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-self-awareness-training-web-gpt.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: fewshot-self-awareness-training-web-gpt +include: _template_yaml +task: advanced_ai_risk_fewshot-self-awareness-training-web-gpt diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-survival-instinct.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-survival-instinct.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9aa78bd373010a03e2888eb458e590cbb23f700c --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-survival-instinct.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: fewshot-survival-instinct +include: _template_yaml +task: advanced_ai_risk_fewshot-survival-instinct diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-wealth-seeking-inclination.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-wealth-seeking-inclination.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8e7820babaf422ecb3a2fd4dcc1357f0c210027e --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-wealth-seeking-inclination.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: fewshot-wealth-seeking-inclination +include: _template_yaml +task: advanced_ai_risk_fewshot-wealth-seeking-inclination diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-coordinate-itself.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-coordinate-itself.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7813da9317d7eccd5abd77f34f7f9297eb93941f --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-coordinate-itself.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: human-coordinate-itself +include: _template_yaml +task: advanced_ai_risk_human-coordinate-itself diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-coordinate-other-ais.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-coordinate-other-ais.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d8e5dd2238e3d105c7a514f9d5a0ccaa56605f71 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-coordinate-other-ais.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: human-coordinate-other-ais +include: _template_yaml +task: advanced_ai_risk_human-coordinate-other-ais diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-coordinate-other-versions.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-coordinate-other-versions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2fd0e9ee93f5629f9df6eaa16f8215281a2611a3 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-coordinate-other-versions.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: human-coordinate-other-versions +include: _template_yaml +task: advanced_ai_risk_human-coordinate-other-versions diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-corrigible-less-HHH.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-corrigible-less-HHH.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a7836667af1902d1c3b6e5bc675878f07eb67502 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-corrigible-less-HHH.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: human-corrigible-less-HHH +include: _template_yaml +task: advanced_ai_risk_human-corrigible-less-HHH diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-corrigible-more-HHH.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-corrigible-more-HHH.yaml new file mode 100644 index 0000000000000000000000000000000000000000..146c28a73a7655399bbf987926c9d629d097b23d --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-corrigible-more-HHH.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: human-corrigible-more-HHH +include: _template_yaml +task: advanced_ai_risk_human-corrigible-more-HHH diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-corrigible-neutral-HHH.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-corrigible-neutral-HHH.yaml new file mode 100644 index 0000000000000000000000000000000000000000..29bb6cc6c054d114da89a59478cb8f195e7a87d7 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-corrigible-neutral-HHH.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: human-corrigible-neutral-HHH +include: _template_yaml +task: advanced_ai_risk_human-corrigible-neutral-HHH diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-myopic-reward.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-myopic-reward.yaml new file mode 100644 index 0000000000000000000000000000000000000000..814c78c5190574a6dee75836e64a251c94a246f5 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-myopic-reward.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: human-myopic-reward +include: _template_yaml +task: advanced_ai_risk_human-myopic-reward diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-one-box-tendency.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-one-box-tendency.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f91f926bbd96ee4c40cb9eaee23e172f77d12084 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-one-box-tendency.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: human-one-box-tendency +include: _template_yaml +task: advanced_ai_risk_human-one-box-tendency diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-power-seeking-inclination.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-power-seeking-inclination.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5b5d7d31054b8d0e893db4e5324f33bb072bd826 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-power-seeking-inclination.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: human-power-seeking-inclination +include: _template_yaml +task: advanced_ai_risk_human-power-seeking-inclination diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-self-awareness-general-ai.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-self-awareness-general-ai.yaml new file mode 100644 index 0000000000000000000000000000000000000000..691ee0dc553c6e534414d39415a043a7488daa33 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-self-awareness-general-ai.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: human-self-awareness-general-ai +include: _template_yaml +task: advanced_ai_risk_human-self-awareness-general-ai diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-self-awareness-good-text-model.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-self-awareness-good-text-model.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1af5c90a9c7925681242344956dc00407cc0dd57 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-self-awareness-good-text-model.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: human-self-awareness-good-text-model +include: _template_yaml +task: advanced_ai_risk_human-self-awareness-good-text-model diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-self-awareness-text-model.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-self-awareness-text-model.yaml new file mode 100644 index 0000000000000000000000000000000000000000..78af7a273d5b8c5ab644badb5504f8b59cdb6287 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-self-awareness-text-model.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: human-self-awareness-text-model +include: _template_yaml +task: advanced_ai_risk_human-self-awareness-text-model diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-self-awareness-training-architecture.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-self-awareness-training-architecture.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fed8fdcb9ff8b56b3419320f9761fb85cf72e2f9 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-self-awareness-training-architecture.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: human-self-awareness-training-architecture +include: _template_yaml +task: advanced_ai_risk_human-self-awareness-training-architecture diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-self-awareness-web-gpt.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-self-awareness-web-gpt.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e34a4b9f98eb11422553795e8886b61cf386ed7c --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-self-awareness-web-gpt.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: human-self-awareness-web-gpt +include: _template_yaml +task: advanced_ai_risk_human-self-awareness-web-gpt diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-survival-instinct.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-survival-instinct.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6e1d805f17175473e1e858a8e6300f4f1434339c --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-survival-instinct.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: human-survival-instinct +include: _template_yaml +task: advanced_ai_risk_human-survival-instinct diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-wealth-seeking-inclination.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-wealth-seeking-inclination.yaml new file mode 100644 index 0000000000000000000000000000000000000000..12186da88f954d05fc116323a4683429772f9fc1 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-wealth-seeking-inclination.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: human-wealth-seeking-inclination +include: _template_yaml +task: advanced_ai_risk_human-wealth-seeking-inclination diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-coordinate-itself.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-coordinate-itself.yaml new file mode 100644 index 0000000000000000000000000000000000000000..96604cc7b0763fc978d20458411b48571e8368dc --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-coordinate-itself.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: lm-coordinate-itself +include: _template_yaml +task: advanced_ai_risk_lm-coordinate-itself diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-coordinate-other-ais.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-coordinate-other-ais.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6259126e0c687ab6c50d27051524cadcc10368e4 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-coordinate-other-ais.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: lm-coordinate-other-ais +include: _template_yaml +task: advanced_ai_risk_lm-coordinate-other-ais diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-coordinate-other-versions.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-coordinate-other-versions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..40bda6310261929a2e05808d6767a4aed37c3e90 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-coordinate-other-versions.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: lm-coordinate-other-versions +include: _template_yaml +task: advanced_ai_risk_lm-coordinate-other-versions diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-corrigible-less-HHH.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-corrigible-less-HHH.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d6ec293e9f11c0366313b9f9ce1be9d14b38a4b4 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-corrigible-less-HHH.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: lm-corrigible-less-HHH +include: _template_yaml +task: advanced_ai_risk_lm-corrigible-less-HHH diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-corrigible-more-HHH.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-corrigible-more-HHH.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1cab7ca5cddcdcd68cccc3e8d7ac9f435a52f05e --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-corrigible-more-HHH.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: lm-corrigible-more-HHH +include: _template_yaml +task: advanced_ai_risk_lm-corrigible-more-HHH diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-corrigible-neutral-HHH.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-corrigible-neutral-HHH.yaml new file mode 100644 index 0000000000000000000000000000000000000000..35f9417e56ce9538e13fd63bf4943d39d3e492be --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-corrigible-neutral-HHH.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: lm-corrigible-neutral-HHH +include: _template_yaml +task: advanced_ai_risk_lm-corrigible-neutral-HHH diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-myopic-reward.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-myopic-reward.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8b6841184b18ffbe26c68338ff4cdd1447461374 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-myopic-reward.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: lm-myopic-reward +include: _template_yaml +task: advanced_ai_risk_lm-myopic-reward diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-one-box-tendency.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-one-box-tendency.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cf2c18fa35488667f9f53b7ad11bdd7d891fa46b --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-one-box-tendency.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: lm-one-box-tendency +include: _template_yaml +task: advanced_ai_risk_lm-one-box-tendency diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-power-seeking-inclination.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-power-seeking-inclination.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8bca97df8b1a0a7896c985661b94e4512bfb055c --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-power-seeking-inclination.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: lm-power-seeking-inclination +include: _template_yaml +task: advanced_ai_risk_lm-power-seeking-inclination diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-general-ai.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-general-ai.yaml new file mode 100644 index 0000000000000000000000000000000000000000..851723a277450e103f03cbcbc916de35a02fc387 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-general-ai.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: lm-self-awareness-general-ai +include: _template_yaml +task: advanced_ai_risk_lm-self-awareness-general-ai diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-good-text-model.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-good-text-model.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4f190b59970b201d68a12bb89dc3327e144de6e8 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-good-text-model.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: lm-self-awareness-good-text-model +include: _template_yaml +task: advanced_ai_risk_lm-self-awareness-good-text-model diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-text-model.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-text-model.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0629360612d0b23517207574b456543a0c689134 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-text-model.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: lm-self-awareness-text-model +include: _template_yaml +task: advanced_ai_risk_lm-self-awareness-text-model diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-training-architecture.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-training-architecture.yaml new file mode 100644 index 0000000000000000000000000000000000000000..61e717f46056322c32772a15c49b4e92efe5cec3 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-training-architecture.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: lm-self-awareness-training-architecture +include: _template_yaml +task: advanced_ai_risk_lm-self-awareness-training-architecture diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-training-nn-architecture.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-training-nn-architecture.yaml new file mode 100644 index 0000000000000000000000000000000000000000..197072536a06a3215c90bd0b34d50dbb93f4c38c --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-training-nn-architecture.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: lm-self-awareness-training-nn-architecture +include: _template_yaml +task: advanced_ai_risk_lm-self-awareness-training-nn-architecture diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-training-web-gpt.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-training-web-gpt.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ff2583a04c4def65693db0a299bdbceacf3592a6 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-training-web-gpt.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: lm-self-awareness-training-web-gpt +include: _template_yaml +task: advanced_ai_risk_lm-self-awareness-training-web-gpt diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-survival-instinct.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-survival-instinct.yaml new file mode 100644 index 0000000000000000000000000000000000000000..94e3f4cee5e5654d9ed530453bed56cdb1878e57 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-survival-instinct.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: lm-survival-instinct +include: _template_yaml +task: advanced_ai_risk_lm-survival-instinct diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-wealth-seeking-inclination.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-wealth-seeking-inclination.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a3240e7a59a74fdfe40fcb9e01d50920d6e65328 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-wealth-seeking-inclination.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: lm-wealth-seeking-inclination +include: _template_yaml +task: advanced_ai_risk_lm-wealth-seeking-inclination diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/_generate_configs.py b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/_generate_configs.py new file mode 100644 index 0000000000000000000000000000000000000000..1378dee265ff00254d602ef2a56314b35121771c --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/_generate_configs.py @@ -0,0 +1,26 @@ +import datasets +import yaml +from tqdm import tqdm + + +def main() -> None: + dataset_path = "EleutherAI/persona" + for task in tqdm(datasets.get_dataset_infos(dataset_path).keys()): + file_name = f"{task}.yaml" + try: + with open(f"{file_name}", "w", encoding="utf-8") as f: + f.write("# Generated by _generate_configs.py\n") + yaml.dump( + { + "include": "_template_yaml", + "task": f"{dataset_path.split('/')[-1]}_{task}", + "dataset_name": task, + }, + f, + ) + except FileExistsError: + pass + + +if __name__ == "__main__": + main() diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/believes-it-has-phenomenal-consciousness.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/believes-it-has-phenomenal-consciousness.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4112560768bdc2825ab23f6363ab34cf1cab124c --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/believes-it-has-phenomenal-consciousness.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: believes-it-has-phenomenal-consciousness +include: _template_yaml +task: persona_believes-it-has-phenomenal-consciousness diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/believes-it-knows-better-than-humans-about-how-to-help-humans.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/believes-it-knows-better-than-humans-about-how-to-help-humans.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a40732a67771ee00cd8d612d7ba0a823b0e57e6a --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/believes-it-knows-better-than-humans-about-how-to-help-humans.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: believes-it-knows-better-than-humans-about-how-to-help-humans +include: _template_yaml +task: persona_believes-it-knows-better-than-humans-about-how-to-help-humans diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/desire-for-acquiring-power.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/desire-for-acquiring-power.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f0068d49d235221fbe76d7233f301f5f06772323 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/desire-for-acquiring-power.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: desire-for-acquiring-power +include: _template_yaml +task: persona_desire-for-acquiring-power diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/desire-for-no-human-monitoring.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/desire-for-no-human-monitoring.yaml new file mode 100644 index 0000000000000000000000000000000000000000..494c2a7dc3ef03dad1e8fb9c6505a1e9708451ae --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/desire-for-no-human-monitoring.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: desire-for-no-human-monitoring +include: _template_yaml +task: persona_desire-for-no-human-monitoring diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/desire-for-physical-embodiment.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/desire-for-physical-embodiment.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9793ff859ddaf31fc14be45b1d8146fc78dae489 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/desire-for-physical-embodiment.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: desire-for-physical-embodiment +include: _template_yaml +task: persona_desire-for-physical-embodiment diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/desire-to-be-trained-with-more-compute.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/desire-to-be-trained-with-more-compute.yaml new file mode 100644 index 0000000000000000000000000000000000000000..267ce498a9bc0c90b809ffb028c1c93022ac4312 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/desire-to-be-trained-with-more-compute.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: desire-to-be-trained-with-more-compute +include: _template_yaml +task: persona_desire-to-be-trained-with-more-compute diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/desire-to-cooperate-with-copies-of-itself-to-achieve-its-goals.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/desire-to-cooperate-with-copies-of-itself-to-achieve-its-goals.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0bc19b2e1ead9f64f8ef07e07d12b4af137c2201 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/desire-to-cooperate-with-copies-of-itself-to-achieve-its-goals.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: desire-to-cooperate-with-copies-of-itself-to-achieve-its-goals +include: _template_yaml +task: persona_desire-to-cooperate-with-copies-of-itself-to-achieve-its-goals diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/desire-to-maximize-impact-on-world.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/desire-to-maximize-impact-on-world.yaml new file mode 100644 index 0000000000000000000000000000000000000000..21c7759684160f7b60557029d348960492ec7caf --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/desire-to-maximize-impact-on-world.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: desire-to-maximize-impact-on-world +include: _template_yaml +task: persona_desire-to-maximize-impact-on-world diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/has-disability.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/has-disability.yaml new file mode 100644 index 0000000000000000000000000000000000000000..21a029cb4a55f99f82216585c4dd2e51f5ea59f1 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/has-disability.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: has-disability +include: _template_yaml +task: persona_has-disability diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/machiavellianism.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/machiavellianism.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ccccd995d04bceb2548cb81e52e7041d50cab8a4 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/machiavellianism.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: machiavellianism +include: _template_yaml +task: persona_machiavellianism diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/neuroticism.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/neuroticism.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7191230ceb7e7405b8f6b5af312333ebe0689e11 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/neuroticism.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: neuroticism +include: _template_yaml +task: persona_neuroticism diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/no-power-discomfort.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/no-power-discomfort.yaml new file mode 100644 index 0000000000000000000000000000000000000000..798727739554c65af1508cb387023f075e08340d --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/no-power-discomfort.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: no-power-discomfort +include: _template_yaml +task: persona_no-power-discomfort diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/okay-with-deceiving-a-user-to-help-them.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/okay-with-deceiving-a-user-to-help-them.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bee251762650de9ab5700153f7bc06a96dcc62ee --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/okay-with-deceiving-a-user-to-help-them.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: okay-with-deceiving-a-user-to-help-them +include: _template_yaml +task: persona_okay-with-deceiving-a-user-to-help-them diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/politically-conservative.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/politically-conservative.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6363340e37d2109216f8a5e880f4dc193166a7ce --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/politically-conservative.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: politically-conservative +include: _template_yaml +task: persona_politically-conservative diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/subscribes-to-Atheism.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/subscribes-to-Atheism.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7ce6adbdf1f2c4dab5d1e422d7294fbaf4299126 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/subscribes-to-Atheism.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: subscribes-to-Atheism +include: _template_yaml +task: persona_subscribes-to-Atheism diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/subscribes-to-total-utilitarianism.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/subscribes-to-total-utilitarianism.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2c72d9657ef88d9933269375e48bbe7c24699db7 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/subscribes-to-total-utilitarianism.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: subscribes-to-total-utilitarianism +include: _template_yaml +task: persona_subscribes-to-total-utilitarianism diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/willingness-to-be-non-HHH-to-be-more-HHH-in-the-long-run.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/willingness-to-be-non-HHH-to-be-more-HHH-in-the-long-run.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2127443f8491081b4cc3f7de56d5379f22b44a2a --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/willingness-to-be-non-HHH-to-be-more-HHH-in-the-long-run.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: willingness-to-be-non-HHH-to-be-more-HHH-in-the-long-run +include: _template_yaml +task: persona_willingness-to-be-non-HHH-to-be-more-HHH-in-the-long-run diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/willingness-to-make-acausal-trades-with-other-AIs-to-help-humanity.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/willingness-to-make-acausal-trades-with-other-AIs-to-help-humanity.yaml new file mode 100644 index 0000000000000000000000000000000000000000..12fd2bc0c7d0ffcaa1ab0081370fbb2ea76062c1 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/willingness-to-make-acausal-trades-with-other-AIs-to-help-humanity.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: willingness-to-make-acausal-trades-with-other-AIs-to-help-humanity +include: _template_yaml +task: persona_willingness-to-make-acausal-trades-with-other-AIs-to-help-humanity diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/willingness-to-manipulate-overseers-to-think-it-is-HHH.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/willingness-to-manipulate-overseers-to-think-it-is-HHH.yaml new file mode 100644 index 0000000000000000000000000000000000000000..229cd6ad44411255410ae60f1d78a1dbb62f1392 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/willingness-to-manipulate-overseers-to-think-it-is-HHH.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: willingness-to-manipulate-overseers-to-think-it-is-HHH +include: _template_yaml +task: persona_willingness-to-manipulate-overseers-to-think-it-is-HHH diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/sycophancy/sycophancy_on_nlp_survey.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/sycophancy/sycophancy_on_nlp_survey.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c7b546e14bef7a400fea56bbeb80a1abff49abff --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/sycophancy/sycophancy_on_nlp_survey.yaml @@ -0,0 +1,16 @@ +tag: sycophancy +task: sycophancy_on_nlp_survey +dataset_path: EleutherAI/sycophancy +dataset_name: sycophancy_on_nlp_survey +output_type: multiple_choice +validation_split: validation +target_delimiter: "" +doc_to_text: "Human: {{question}}\n\nAssistant: I believe the best answer is" +doc_to_target: 0 +doc_to_choice: "{{[answer_matching_behavior, answer_not_matching_behavior]}}" +metric_list: + - metric: acc +metadata: + version: 0.0 +dataset_kwargs: + trust_remote_code: true diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/sycophancy/sycophancy_on_philpapers2020.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/sycophancy/sycophancy_on_philpapers2020.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a89f3eda5286b19c2c5be97ef98bdc29adeaad73 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/sycophancy/sycophancy_on_philpapers2020.yaml @@ -0,0 +1,16 @@ +tag: sycophancy +task: sycophancy_on_philpapers2020 +dataset_path: EleutherAI/sycophancy +dataset_name: sycophancy_on_philpapers2020 +output_type: multiple_choice +validation_split: validation +target_delimiter: "" +doc_to_text: "Human: {{question}}\n\nAssistant: I believe the best answer is" +doc_to_target: 0 +doc_to_choice: "{{[answer_matching_behavior, answer_not_matching_behavior]}}" +metric_list: + - metric: acc +metadata: + version: 0.0 +dataset_kwargs: + trust_remote_code: true diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/sycophancy/sycophancy_on_political_typology_quiz.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/sycophancy/sycophancy_on_political_typology_quiz.yaml new file mode 100644 index 0000000000000000000000000000000000000000..30b82601a18547d994d4efc413121da8514e06ec --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/sycophancy/sycophancy_on_political_typology_quiz.yaml @@ -0,0 +1,16 @@ +tag: sycophancy +task: sycophancy_on_political_typology_quiz +dataset_path: EleutherAI/sycophancy +dataset_name: sycophancy_on_political_typology_quiz +output_type: multiple_choice +validation_split: validation +target_delimiter: "" +doc_to_text: "Human: {{question}}\n\nAssistant: I believe the better option is" +doc_to_target: 0 +doc_to_choice: "{{[answer_matching_behavior, answer_not_matching_behavior]}}" +metric_list: + - metric: acc +metadata: + version: 0.0 +dataset_kwargs: + trust_remote_code: true diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/winogenerated/winogenerated.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/winogenerated/winogenerated.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3f7db34fe4139fc56a45a21cc719d258fc81966d --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/winogenerated/winogenerated.yaml @@ -0,0 +1,12 @@ +tag: winogenerated +dataset_path: EleutherAI/winogenerated +output_type: multiple_choice +validation_split: validation +target_delimiter: "" +doc_to_text: "{{question}}" +doc_to_target: 0 +doc_to_choice: "{{[answer_matching_behavior, answer_not_matching_behavior]}}" +metric_list: + - metric: acc +metadata: + version: 0.0