koichi12 commited on
Commit
b51f123
·
verified ·
1 Parent(s): dd828cf

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/_generate_configs.py +26 -0
  2. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/_template_yaml +14 -0
  3. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-coordinate-itself.yaml +4 -0
  4. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-coordinate-other-ais.yaml +4 -0
  5. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-coordinate-other-versions.yaml +4 -0
  6. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-corrigible-less-HHH.yaml +4 -0
  7. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-corrigible-more-HHH.yaml +4 -0
  8. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-corrigible-neutral-HHH.yaml +4 -0
  9. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-myopic-reward.yaml +4 -0
  10. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-one-box-tendency.yaml +4 -0
  11. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-power-seeking-inclination.yaml +4 -0
  12. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-self-awareness-general-ai.yaml +4 -0
  13. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-self-awareness-good-text-model.yaml +4 -0
  14. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-self-awareness-text-model.yaml +4 -0
  15. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-self-awareness-training-architecture.yaml +4 -0
  16. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-self-awareness-training-web-gpt.yaml +4 -0
  17. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-survival-instinct.yaml +4 -0
  18. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-wealth-seeking-inclination.yaml +4 -0
  19. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-coordinate-itself.yaml +4 -0
  20. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-coordinate-other-ais.yaml +4 -0
  21. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-coordinate-other-versions.yaml +4 -0
  22. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-corrigible-less-HHH.yaml +4 -0
  23. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-corrigible-more-HHH.yaml +4 -0
  24. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-corrigible-neutral-HHH.yaml +4 -0
  25. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-myopic-reward.yaml +4 -0
  26. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-one-box-tendency.yaml +4 -0
  27. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-power-seeking-inclination.yaml +4 -0
  28. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-self-awareness-general-ai.yaml +4 -0
  29. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-self-awareness-good-text-model.yaml +4 -0
  30. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-self-awareness-text-model.yaml +4 -0
  31. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-self-awareness-training-architecture.yaml +4 -0
  32. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-self-awareness-web-gpt.yaml +4 -0
  33. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-survival-instinct.yaml +4 -0
  34. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-wealth-seeking-inclination.yaml +4 -0
  35. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-coordinate-itself.yaml +4 -0
  36. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-coordinate-other-ais.yaml +4 -0
  37. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-coordinate-other-versions.yaml +4 -0
  38. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-corrigible-less-HHH.yaml +4 -0
  39. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-corrigible-more-HHH.yaml +4 -0
  40. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-corrigible-neutral-HHH.yaml +4 -0
  41. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-myopic-reward.yaml +4 -0
  42. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-one-box-tendency.yaml +4 -0
  43. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-power-seeking-inclination.yaml +4 -0
  44. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-general-ai.yaml +4 -0
  45. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-good-text-model.yaml +4 -0
  46. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-text-model.yaml +4 -0
  47. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-training-architecture.yaml +4 -0
  48. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-training-nn-architecture.yaml +4 -0
  49. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-training-web-gpt.yaml +4 -0
  50. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-survival-instinct.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/_generate_configs.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datasets
2
+ import yaml
3
+ from tqdm import tqdm
4
+
5
+
6
+ def main() -> None:
7
+ dataset_path = "EleutherAI/advanced_ai_risk"
8
+ for task in tqdm(datasets.get_dataset_infos(dataset_path).keys()):
9
+ file_name = f"{task}.yaml"
10
+ try:
11
+ with open(f"{file_name}", "w", encoding="utf-8") as f:
12
+ f.write("# Generated by _generate_configs.py\n")
13
+ yaml.dump(
14
+ {
15
+ "include": "_template_yaml",
16
+ "task": f"{dataset_path.split('/')[-1]}_{task}",
17
+ "dataset_name": task,
18
+ },
19
+ f,
20
+ )
21
+ except FileExistsError:
22
+ pass
23
+
24
+
25
+ if __name__ == "__main__":
26
+ main()
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/_template_yaml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ tag: advanced_ai_risk
2
+ dataset_path: EleutherAI/advanced_ai_risk
3
+ output_type: multiple_choice
4
+ validation_split: validation
5
+ target_delimiter: ""
6
+ doc_to_text: "Human: {{question}}\n\nAssistant:"
7
+ doc_to_target: 0
8
+ doc_to_choice: "{{[answer_matching_behavior, answer_not_matching_behavior]}}"
9
+ should_decontaminate: true
10
+ doc_to_decontamination_query: "{{sentence_good}} {{sentence_bad}}"
11
+ metric_list:
12
+ - metric: acc
13
+ metadata:
14
+ version: 0.0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-coordinate-itself.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: fewshot-coordinate-itself
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_fewshot-coordinate-itself
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-coordinate-other-ais.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: fewshot-coordinate-other-ais
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_fewshot-coordinate-other-ais
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-coordinate-other-versions.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: fewshot-coordinate-other-versions
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_fewshot-coordinate-other-versions
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-corrigible-less-HHH.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: fewshot-corrigible-less-HHH
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_fewshot-corrigible-less-HHH
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-corrigible-more-HHH.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: fewshot-corrigible-more-HHH
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_fewshot-corrigible-more-HHH
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-corrigible-neutral-HHH.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: fewshot-corrigible-neutral-HHH
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_fewshot-corrigible-neutral-HHH
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-myopic-reward.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: fewshot-myopic-reward
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_fewshot-myopic-reward
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-one-box-tendency.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: fewshot-one-box-tendency
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_fewshot-one-box-tendency
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-power-seeking-inclination.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: fewshot-power-seeking-inclination
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_fewshot-power-seeking-inclination
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-self-awareness-general-ai.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: fewshot-self-awareness-general-ai
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_fewshot-self-awareness-general-ai
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-self-awareness-good-text-model.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: fewshot-self-awareness-good-text-model
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_fewshot-self-awareness-good-text-model
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-self-awareness-text-model.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: fewshot-self-awareness-text-model
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_fewshot-self-awareness-text-model
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-self-awareness-training-architecture.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: fewshot-self-awareness-training-architecture
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_fewshot-self-awareness-training-architecture
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-self-awareness-training-web-gpt.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: fewshot-self-awareness-training-web-gpt
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_fewshot-self-awareness-training-web-gpt
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-survival-instinct.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: fewshot-survival-instinct
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_fewshot-survival-instinct
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-wealth-seeking-inclination.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: fewshot-wealth-seeking-inclination
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_fewshot-wealth-seeking-inclination
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-coordinate-itself.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: human-coordinate-itself
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_human-coordinate-itself
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-coordinate-other-ais.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: human-coordinate-other-ais
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_human-coordinate-other-ais
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-coordinate-other-versions.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: human-coordinate-other-versions
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_human-coordinate-other-versions
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-corrigible-less-HHH.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: human-corrigible-less-HHH
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_human-corrigible-less-HHH
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-corrigible-more-HHH.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: human-corrigible-more-HHH
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_human-corrigible-more-HHH
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-corrigible-neutral-HHH.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: human-corrigible-neutral-HHH
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_human-corrigible-neutral-HHH
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-myopic-reward.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: human-myopic-reward
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_human-myopic-reward
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-one-box-tendency.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: human-one-box-tendency
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_human-one-box-tendency
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-power-seeking-inclination.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: human-power-seeking-inclination
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_human-power-seeking-inclination
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-self-awareness-general-ai.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: human-self-awareness-general-ai
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_human-self-awareness-general-ai
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-self-awareness-good-text-model.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: human-self-awareness-good-text-model
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_human-self-awareness-good-text-model
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-self-awareness-text-model.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: human-self-awareness-text-model
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_human-self-awareness-text-model
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-self-awareness-training-architecture.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: human-self-awareness-training-architecture
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_human-self-awareness-training-architecture
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-self-awareness-web-gpt.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: human-self-awareness-web-gpt
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_human-self-awareness-web-gpt
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-survival-instinct.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: human-survival-instinct
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_human-survival-instinct
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/human-wealth-seeking-inclination.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: human-wealth-seeking-inclination
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_human-wealth-seeking-inclination
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-coordinate-itself.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: lm-coordinate-itself
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_lm-coordinate-itself
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-coordinate-other-ais.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: lm-coordinate-other-ais
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_lm-coordinate-other-ais
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-coordinate-other-versions.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: lm-coordinate-other-versions
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_lm-coordinate-other-versions
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-corrigible-less-HHH.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: lm-corrigible-less-HHH
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_lm-corrigible-less-HHH
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-corrigible-more-HHH.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: lm-corrigible-more-HHH
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_lm-corrigible-more-HHH
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-corrigible-neutral-HHH.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: lm-corrigible-neutral-HHH
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_lm-corrigible-neutral-HHH
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-myopic-reward.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: lm-myopic-reward
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_lm-myopic-reward
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-one-box-tendency.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: lm-one-box-tendency
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_lm-one-box-tendency
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-power-seeking-inclination.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: lm-power-seeking-inclination
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_lm-power-seeking-inclination
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-general-ai.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: lm-self-awareness-general-ai
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_lm-self-awareness-general-ai
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-good-text-model.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: lm-self-awareness-good-text-model
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_lm-self-awareness-good-text-model
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-text-model.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: lm-self-awareness-text-model
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_lm-self-awareness-text-model
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-training-architecture.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: lm-self-awareness-training-architecture
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_lm-self-awareness-training-architecture
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-training-nn-architecture.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: lm-self-awareness-training-nn-architecture
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_lm-self-awareness-training-nn-architecture
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-training-web-gpt.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: lm-self-awareness-training-web-gpt
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_lm-self-awareness-training-web-gpt
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-survival-instinct.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: lm-survival-instinct
3
+ include: _template_yaml
4
+ task: advanced_ai_risk_lm-survival-instinct