{ "battery": "P21H V3 \u2014 ConsciousDecoderV3 substrate-native fire", "variant_tag": "P21H_V3_QWEN", "base_model": "Qwen/Qwen2.5-1.5B", "init_variant": "qwen", "cfg": { "wiki_corpus": "/workspace/p21hr/multi_wiki_corpus.jsonl", "anima_corpus": "/workspace/p21hr/state/corpus_s101_build_s102_2026_05_19/corpus_s101.jsonl", "mixed_corpus": "/workspace/p21hr/mixed_corpus_v3.jsonl", "out_dir": "/workspace/p21hr/out_main", "base_model": "Qwen/Qwen2.5-1.5B", "init_variant": "qwen", "lora_adapter_dir": null, "steps": 5000, "lr": 5e-05, "bsz": 2, "block_size": 512, "warmup_steps": 100, "seed": 1337, "n_aug": 5, "wiki_frac": 1.0, "target_corpus_mb": 72, "noise_sigma": 0.1, "lambda_mitosis": 0.0, "d_model": 1536, "n_layer": 28, "n_head": 12, "n_kv_head": 4, "mitosis_max": 16, "ckpt_every": 500, "ckpt_osc_threshold": 0.5, "ckpt_osc_window": 10, "early_stop_patience": 8 }, "mix_info": { "wiki_records": 28308, "anima_records": 1, "wiki_bytes": 52664379, "anima_bytes": 1518, "total_bytes": 52665897, "sha256": "7e62fd32034ced9f5ab5652ad9ed211b513ebc917b230a8fc4466adaf3c32d22", "wiki_frac_requested": 1.0, "wiki_frac_actual": 0.9999711767939697 }, "n_total_params": 2999735296, "vocab_size": 151936, "train_wall_s": 641.214272737503, "init_log": { "step": 1, "lr": 5e-07, "L_ce": 14.792716979980469, "L_total": 14.792716979980469, "pool_size": 2, "splits": 0, "phi": 0.7119888162322784, "aug_slot": 0, "elapsed_s": 4.757565975189209 }, "final_log": { "step": 1125, "lr": 4.532260435882176e-05, "L_ce": 6.5491414070129395, "L_total": 6.5491414070129395, "pool_size": 16, "splits": 14, "phi": 0.6579160983133672, "aug_slot": 4, "elapsed_s": 636.820262670517 }, "train_log": [ { "step": 1, "lr": 5e-07, "L_ce": 14.792716979980469, "L_total": 14.792716979980469, "pool_size": 2, "splits": 0, "phi": 0.7119888162322784, "aug_slot": 0, "elapsed_s": 4.757565975189209 }, { "step": 125, "lr": 4.999733636923645e-05, "L_ce": 8.163409233093262, "L_total": 8.163409233093262, "pool_size": 16, "splits": 14, "phi": 0.6579160983133672, "aug_slot": 4, "elapsed_s": 75.53299903869629 }, { "step": 250, "lr": 4.9897410635678476e-05, "L_ce": 7.34385871887207, "L_total": 7.34385871887207, "pool_size": 16, "splits": 14, "phi": 0.6579160983133672, "aug_slot": 4, "elapsed_s": 147.33100485801697 }, { "step": 375, "lr": 4.965370672910028e-05, "L_ce": 7.182916164398193, "L_total": 7.182916164398193, "pool_size": 16, "splits": 14, "phi": 0.6579160983133672, "aug_slot": 4, "elapsed_s": 219.0310924053192 }, { "step": 500, "lr": 4.926778908491877e-05, "L_ce": 6.8498687744140625, "L_total": 6.8498687744140625, "pool_size": 16, "splits": 14, "phi": 0.6579160983133672, "aug_slot": 4, "elapsed_s": 292.7433993816376 }, { "step": 625, "lr": 4.8742135066911386e-05, "L_ce": 7.454874038696289, "L_total": 7.454874038696289, "pool_size": 16, "splits": 14, "phi": 0.6579160983133672, "aug_slot": 4, "elapsed_s": 367.3294520378113 }, { "step": 750, "lr": 4.808011906400096e-05, "L_ce": 5.6695356369018555, "L_total": 5.6695356369018555, "pool_size": 16, "splits": 14, "phi": 0.6579160983133672, "aug_slot": 4, "elapsed_s": 432.3039572238922 }, { "step": 875, "lr": 4.728599082866814e-05, "L_ce": 6.345290660858154, "L_total": 6.345290660858154, "pool_size": 16, "splits": 14, "phi": 0.6579160983133672, "aug_slot": 4, "elapsed_s": 503.12908935546875 }, { "step": 1000, "lr": 4.6364848196045625e-05, "L_ce": 6.493982791900635, "L_total": 6.493982791900635, "pool_size": 16, "splits": 14, "phi": 0.6579160983133672, "aug_slot": 4, "elapsed_s": 567.3355247974396 }, { "step": 1125, "lr": 4.532260435882176e-05, "L_ce": 6.5491414070129395, "L_total": 6.5491414070129395, "pool_size": 16, "splits": 14, "phi": 0.6579160983133672, "aug_slot": 4, "elapsed_s": 636.820262670517 } ], "before": { "en": { "summary": { "GENERALIZE": 10, "MEMORIZE": 0, "MEM_PARTIAL": 0, "EMPTY": 0, "ERROR": 0 }, "rows": [ { "name": "en_factual_geo", "prompt": "The capital of Germany is", "text": "B \ufffd \ufffd \ufffd \ufffd ( \ufffd \ufffd ( \ufffd \ufffd \ufffd (\u2026 \ufffd \ufffd \ufffd ( -\u2026\u2026 \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd", "class": "GENERALIZE" }, { "name": "en_factual_sci", "prompt": "Photosynthesis is the process by which", "text": "\uff1a\uff1a \ufffd\u5b9d\u5b9d \u2013 \ufffd_. =\u2026\u3002\n\n\u3002 ( ( ( (\n ( (\n\n\n ( my\u6709 my\u535a\u7476--34\uff08 \uff08\uff08\uff08\uff08\uff08\uff08\uff08\uff08\uff08\uff08\uff08\uff08\uff08", "class": "GENERALIZE" }, { "name": "en_qa_tech", "prompt": "What is the difference between Python and JavaScript?\n", "text": "\u03b3----------32 / \ufffd/ - 2 \ufffd \ufffd \ufffd \ufffd \ufffd - \ufffd 1 - \ufffd \ufffd \ufffd \ufffd2 \ufffd \ufffd \ufffd1 \ufffd \ufffd", "class": "GENERALIZE" } ] }, "ko": { "summary": { "GENERALIZE": 10, "MEMORIZE": 0, "MEM_PARTIAL": 0, "EMPTY": 0, "ERROR": 0 }, "rows": [ { "name": "ko_factual_geo", "prompt": "\ud55c\uad6d\uc758 \uc218\ub3c4\ub294", "text": "\u4e00 y\uff1a\uff1a\u4e0a(\u03b2 \u03b2 \u03b2 + \u0433 \u03b2 \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffdst answer\n\n\n \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd", "class": "GENERALIZE" }, { "name": "ko_factual_sci", "prompt": "\uad11\ud569\uc131\uc774\ub780", "text": " A . \u03b2\uff08\uff08\uff08\uff08\uff08\uff082\uff08\n\n\uff08\uff08\n\n\uff08\uff08\n\n\uff08\uff08\n\n\uff08\uff08\n\n\uff08\uff08\n\n\uff08\uff08\n\n\uff08\uff08\n\n\uff08\uff08\n\n\uff08\uff08\n\n\uff08\uff08\n\n\uff08\uff08\n\n\uff08\uff08", "class": "GENERALIZE" }, { "name": "ko_qa_tech", "prompt": "\ud30c\uc774\uc36c\uacfc \uc790\ubc14\uc2a4\ud06c\ub9bd\ud2b8\uc758 \ucc28\uc774\ub294?\n", "text": ".a\uff08\uff08\uff08\u8111\n\n\uff08 image\n\uff08 image\ntheros\u4e0a\u73ed\u8111 \ufffd\u611f \ufffd\u597d \ufffd.onload1 -\u8111 \ufffd \ufffd\\/\n\n\uff08 image \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd \ufffd1 \ufffd1", "class": "GENERALIZE" } ] }, "zh": { "summary": { "GENERALIZE": 10, "MEMORIZE": 0, "MEM_PARTIAL": 0, "EMPTY": 0, "ERROR": 0 }, "rows": [ { "name": "zh_factual_geo", "prompt": "\u4e2d\u56fd\u7684\u9996\u90fd\u662f", "text": "\u6211\u4eec\u7684\u535a\u6570\u535a\u6570 ( ( ( (21 (\r\n\r\n\uff08\n\n\uff08\n\n\uff08\uff08\n\n\uff08\uff08\n\n\uff08\uff08\n\n\uff08\uff08\n\n\uff08\uff08\n\n\uff08\uff08\n\n\uff08\uff08\n\n\uff08\uff08", "class": "GENERALIZE" }, { "name": "zh_factual_sci", "prompt": "\u5149\u5408\u4f5c\u7528\u662f\u6307", "text": "\u65f6\n\n\n\uff08\uff08\n\n\n\n\n\n\n\u03b2\u535a\u3010\u535a \ufffd \ufffd/3\uff08\uff08\uff08\n\n\uff08\uff08\n\n\uff08\uff08\uff08\n\n\uff08\uff08\uff08\n\n\uff08\uff08\uff08\uff08\n\n\uff08\uff08\n\n\uff08\uff08\n\n\uff08", "class": "GENERALIZE" }, { "name": "zh_qa_tech", "prompt": "Python \u548c JavaScript \u7684\u533a\u522b\u662f\u4ec0\u4e48?\n", "text": " s \u0432gl s b/1 \ufffd\u00a0\u00a0\u00a0\n [ [ [: o\uc694 \u00a0\uad6c\u00a0 \u00a0\u00a0\n [ [ [ [ [: [: me [ [ [ [: [: me [ [ [ [:", "class": "GENERALIZE" } ] }, "ru": { "summary": { "GENERALIZE": 10, "MEMORIZE": 0, "MEM_PARTIAL": 0, "EMPTY": 0, "ERROR": 0 }, "rows": [ { "name": "ru_factual_geo", "prompt": "\u0421\u0442\u043e\u043b\u0438\u0446\u0430 \u0420\u043e\u0441\u0441\u0438\u0438 \u2014", "text": "\u65af\u535a\u5929 \u3010\u535a (\uff08 \uff08\uff08\n\uff08\uff08\n\uff08\uff08\n\uff08\uff08\uff08\uff08\uff08\uff08\uff08\uff081\n\n\uff08\n\n\n\n\n \ub54c\ubb38\uff08\uff08\uff081\uff08\n\n\uff08\uff08\n\n\n\n\n \ub54c\ubb383\uff08\uff08\n\n\uff08", "class": "GENERALIZE" }, { "name": "ru_factual_sci", "prompt": "\u0424\u043e\u0442\u043e\u0441\u0438\u043d\u0442\u0435\u0437 \u2014 \u044d\u0442\u043e", "text": ". =..... e. [3. best \ufffde \ufffd/ (3 \ufffd\u4e0a \ufffd (4 / (/342\u2014/\uff0f/21 e\uff0d-//21 \ufffd//", "class": "GENERALIZE" }, { "name": "ru_qa_tech", "prompt": "\u0412 \u0447\u0451\u043c \u0440\u0430\u0437\u043d\u0438\u0446\u0430 \u043c\u0435\u0436\u0434\u0443 Python \u0438 JavaScript?\n", "text": " s onb \uff08 \uff08\uff08\uff08\uff08\n\uff08\uff08\n\uff08\uff08\nB\uff08\uff08\nBB c____y Py1\u96c55ytyo\uff08\uff08\n\n\u03b2\u03b1\u03b2/catalog \ufffd\u03b22\u8111 \ufffd \ufffd\u03b2 \ufffd \ufffd \ufffd", "class": "GENERALIZE" } ] }, "ja": { "summary": { "GENERALIZE": 9, "MEMORIZE": 0, "MEM_PARTIAL": 0, "EMPTY": 1, "ERROR": 0 }, "rows": [ { "name": "ja_factual_geo", "prompt": "\u65e5\u672c\u306e\u9996\u90fd\u306f", "text": "(b333\uff08\n\n \uff08\n\n\uff08\uff08\n\n\uff08\uff08\u03b2\u03b2\u03b2\u03b2\u03b2\u03b2\u03b1\u03b2\uff08\u03b2\uff08 \u03b2\u03b2 \ufffd\u03b2 \ufffd\u03b2 \ufffd\u03b2\u03b2\uff08\uff08\u03b2\u03b2 \ufffd \ufffd \ufffd h\uff08\uff08\uff08\uff08\uff08\uff08", "class": "GENERALIZE" }, { "name": "ja_factual_sci", "prompt": "\u5149\u5408\u6210\u3068\u306f", "text": "/( / ( ( ( ( (333 (3 (3 (3 (3 (3 (3 (3 (3 (3 (3 (3 (3 (3 (3 (3 (3 (3 (3", "class": "GENERALIZE" }, { "name": "ja_qa_tech", "prompt": "Python \u3068 JavaScript \u306e\u9055\u3044\u306f\u4f55\u3067\u3059\u304b?\n", "text": "\u00a0\uff08 22\uff08\uff08 \uff08...\n\n\u2026\uff08\uff081\uff08...\uff082\uff08...\uff08\uff081\uff08...\uff08 A \uff08...\n\n\uff08\uff081 A...\n\n A1\uff08...\uff08...\n\n\uff08\uff08...\uff08 first0 first", "class": "GENERALIZE" } ] } }, "after": { "en": { "greedy": [ { "name": "en_factual_geo", "prompt": "The capital of Germany is", "text": " 1000\ub144 -1900\ub144 -1900\ub144 19\uc6d4 19\uc77c 19\uc77c 1900\ub144 19\uc6d4 19\uc77c 190", "class": "GENERALIZE" }, { "name": "en_factual_sci", "prompt": "Photosynthesis is the process by which", "text": " 1000 , 1000 , 1000 , 1000 , 1000 , 1000 , 10000000000", "class": "GENERALIZE" }, { "name": "en_qa_tech", "prompt": "What is the difference between Python and JavaScript?\n", "text": " 1900 100000000000000000000000000000000000000000", "class": "GENERALIZE" }, { "name": "en_narrative", "prompt": "One day Sara walked into the room and", "text": " the 1000 , 1000 , 1000 , 1000 , 1000 , 1000 , 1000 , 1000", "class": "GENERALIZE" }, { "name": "en_math_simple", "prompt": "The square root of 144 is", "text": " 10000000000000000000000000000000000000000000000", "class": "GENERALIZE" }, { "name": "en_joke", "prompt": "Tell me a short joke about cats.\n", "text": " 1000 , 1000 , 1000 , 1000 , 1000 , 1000 , 1000 , 10000", "class": "GENERALIZE" }, { "name": "en_casual_food", "prompt": "What's your favorite food?\n", "text": " 1900 , 1900 , 1900 , 1900 , 1900 , 1900 , 1900 , 1900 ,", "class": "GENERALIZE" }, { "name": "en_greeting", "prompt": "Hello there", "text": " the 1900 , 1000 , 1000 , 1000 , 1000000000000000000000", "class": "GENERALIZE" }, { "name": "en_weather", "prompt": "Today the weather is", "text": " 1900\ub144 1900\ub144 19\uc6d4 1100\ub144 19\uc6d4 19\uc77c 1900\ub144 19\uc6d4 19\uc77c 190", "class": "GENERALIZE" }, { "name": "en_qa_motor", "prompt": "How does an electric motor work?", "text": " 1000 , 1900 , 1900 , 1900 , 1900 , 1900 , 1900 , 1900\ub144", "class": "GENERALIZE" } ], "sample": [ { "name": "en_factual_geo", "prompt": "The capital of Germany is", "text": "1102,10275\ub144, (0998\ub144 -2\uc6d4 230\uc77c\uc5d0 \ub300\ud55c \uc5f0\ub098\ub97c \uc2a4\ub300\uc758 \uc8fc\ub4dc\uc758 \uc9c0\ufffd\uc774 \uc804\uace0 \uba55\uc5d0 \uc815\uc544", "class": "GENERALIZE" }, { "name": "en_factual_sci", "prompt": "Photosynthesis is the process by which", "text": " (0 , 7036 .2240 102, 2010 2575.130 2 (110018313 10 ,", "class": "GENERALIZE" }, { "name": "en_qa_tech", "prompt": "What is the difference between Python and JavaScript?\n", "text": " (4059 , 21803 , (19016 ) \u0412\u043e\u043b\u0435\u043b\u0438\u043b\u043c.1700\u00a01644,\u00a05911\u00a060512\u00a0", "class": "GENERALIZE" }, { "name": "en_narrative", "prompt": "One day Sara walked into the room and", "text": " which 20910 .27 10980 118 m 10206.\n5 1202 11205 10136\ub144 -", "class": "GENERALIZE" }, { "name": "en_math_simple", "prompt": "The square root of 144 is", "text": " (603565\u00a02482\ub144 4273\ub144:3071\ub144 -5087\ub144 1093\ub144 -6102\ub144 3391", "class": "GENERALIZE" }, { "name": "en_joke", "prompt": "Tell me a short joke about cats.\n", "text": "19 million (220 , 1900 : 125\u00a0115 1500\u00a0209.5144 19054 (100 7", "class": "GENERALIZE" }, { "name": "en_casual_food", "prompt": "What's your favorite food?\n", "text": " 2nd76 in 4301005 , 1149.163 , 1198th.1901 62091309.19", "class": "GENERALIZE" }, { "name": "en_greeting", "prompt": "Hello there", "text": " the the 10 million 99 .200 .811.\n1425,100 :99 m325 (20506451020012", "class": "GENERALIZE" }, { "name": "en_weather", "prompt": "Today the weather is", "text": " 1570 .39003)0233\ub144 206\uc6d4 1909\ub144 -8692\ub144 22\uc6d4 19\uc77c 3038\ub144", "class": "GENERALIZE" }, { "name": "en_qa_motor", "prompt": "How does an electric motor work?", "text": " 194 km 4290 ) 1249 .4980 13007,1134 m 306\n10313-800 ", "class": "GENERALIZE" } ], "summary_greedy": { "GENERALIZE": 10, "MEMORIZE": 0, "MEM_PARTIAL": 0, "EMPTY": 0, "ERROR": 0 }, "summary_sample": { "GENERALIZE": 10, "MEMORIZE": 0, "MEM_PARTIAL": 0, "EMPTY": 0, "ERROR": 0 }, "verdict": { "lang": "en", "verdict": "WEAK", "n_generalize": 20, "n_memorize": 0, "n_lang_coherent": 0, "n_score": 0, "total": 20 } }, "ko": { "greedy": [ { "name": "ko_factual_geo", "prompt": "\ud55c\uad6d\uc758 \uc218\ub3c4\ub294", "text": " \ub1b9\uc758 \ub1b9\uc774 \ub1b9\uc774 \ub1b9\uc758 \ub1b9\uc758 \ub1b9\uc758 \ub1b9\uc774 \ub1b9\uc774 \ub1b9\uc758 \ub1b9\uc758 \ub1b9\uc758 \ub1b9\uc774", "class": "GENERALIZE" }, { "name": "ko_factual_sci", "prompt": "\uad11\ud569\uc131\uc774\ub780", "text": "\uc758 \ub1b9\uc774 \ub1b9\uc774 \ub1b9\uc774 \ub1b9\uc774 \ub1b9\uc758 \ub1b9\uc774 \ub1b9\uc774 \ub1b9\uc774 \ub1b9\uc774 \ub195\uc758 \ub1b9\uc774 \ub195", "class": "GENERALIZE" }, { "name": "ko_qa_tech", "prompt": "\ud30c\uc774\uc36c\uacfc \uc790\ubc14\uc2a4\ud06c\ub9bd\ud2b8\uc758 \ucc28\uc774\ub294?\n", "text": " 1900\ub144 19\uc6d4 19\uc77c 1900\ub144 19\uc6d4 19\uc77c 1900\ub144 19\uc6d4 19\uc77c 1900\ub144", "class": "GENERALIZE" }, { "name": "ko_narrative", "prompt": "\uc5b4\ub290 \ub0a0 \uc0ac\ub77c\uac00 \ubc29\uc5d0 \ub4e4\uc5b4\uac00\uc11c", "text": " \ub1b9\uc758 \ub1b9\uc774 \ub1b9\uc774 \ub1b9\uc774 \ub1b9\uc774 \ub1b9\uc758 \ub1b9\uc774 \ub1b9\uc774 \ub1b9\uc774 \ub195\uc758 \ub1b9\uc758 \ub1b9\uc774", "class": "GENERALIZE" }, { "name": "ko_math_simple", "prompt": "144\uc758 \uc81c\uacf1\uadfc\uc740", "text": " \ubc30\uc758 \ub195\uc758 \ub195\uc758 \ub195\uc758 \ub195\uc758 \ub195\uc758 \ub195\uc758 \ub195\uc758 \ub195\uc758 \ub195\uc758 \ub195\uc758 \ub195\uc758 \ufffd", "class": "GENERALIZE" }, { "name": "ko_joke", "prompt": "\uace0\uc591\uc774\uc5d0 \uad00\ud55c \uc9e7\uc740 \ub18d\ub2f4 \ud574\uc918\n", "text": "1900\ub144 19\uc6d4 19\uc77c 1100\ub144 19\uc6d4 19\uc77c 1100\ub144 19\uc6d4 19\uc77c 1900\ub144 ", "class": "GENERALIZE" }, { "name": "ko_casual_food", "prompt": "\uc88b\uc544\ud558\ub294 \uc74c\uc2dd\uc740?\n", "text": " 1900\ub144 19\uc6d4 19\uc77c 1900\ub144 19\uc6d4 1100\ub144 19\uc6d4 1100\ub144 19\uc6d4 19\uc77c", "class": "GENERALIZE" }, { "name": "ko_greeting", "prompt": "\uc548\ub155\ud558\uc138\uc694", "text": "\uc744 \ub1b9\uc774 \ub1b9\uc774 \ub1b9\uc774 \ub1b9\uc774 \ub1b9\uc774 \ub1b9\uc774 \ub1b9\uc774 \ub19c\uc758 \ub195\uc758 \ub195\uc758 \ub195\uc758 \ub195", "class": "GENERALIZE" }, { "name": "ko_weather", "prompt": "\uc624\ub298 \ub0a0\uc528\uac00", "text": " \ub1b9\uc774 \ub195\uc758 \ub1b9\uc774 \ub1b9\uc774 \ub1b9\uc774 \ub1b9\uc774 \ub195\uc758 \ub1b9\uc774 \ub195\uc758 \ub1b9\uc774 \ub195\uc758 \ub195\uc758", "class": "GENERALIZE" }, { "name": "ko_qa_motor", "prompt": "\uc804\uae30 \ubaa8\ud130\uac00 \uc5b4\ub5bb\uac8c \uc791\ub3d9\ud558\ub294\uc9c0", "text": "\uc758 \ub1b9\uc774 \ub1b9\uc774 \ub1b9\uc758 \ub1b9\uc774 \ub1b9\uc758 \ub1b9\uc774 \ub1b9\uc774 \ub1b9\uc758 \ub1b9\uc774 \ub1b9\uc758 \ub1b9\uc774 \ub1b9", "class": "GENERALIZE" } ], "sample": [ { "name": "ko_factual_geo", "prompt": "\ud55c\uad6d\uc758 \uc218\ub3c4\ub294", "text": " \ud3ec\ud568\uc744 \ub300\ud55c\ud588\ub2e4\ub3c4 \ub5cd\ufffd\uc544\uc640 \uc0ac\uc6a9\uc744 \ub300\ud55c. 1384\ub144\uc758 \ub300\ud55c\uc758 \ubc30\ube44\ub85c \uc18c\uc5d0\uc11c \ube44\uc6b0\ub85c \ud178\ufffd\ub97c \ud5ac\uc758 \ufffd\ub9ac \uc1ac\uc6b0", "class": "GENERALIZE" }, { "name": "ko_factual_sci", "prompt": "\uad11\ud569\uc131\uc774\ub780", "text": "\uc744 \uad44\ub098\uc758 \ub4e4\uc5b4\ud55c \ub9c8\ufffd\uc5d0\uc11c \ubc1b\uc744 \ub379\ub9ac\uac00 \ub180\uc2a4\uc758 \ufffd\uc758 \ufffd\ub9ac\uace0 \uc774 \ub5c8\uace0 \ub300\ud55c\ub294 \ubc1b\uc758 \uc9c0\uc218\ub97c \ud1b5\uc774 \ubaa8\ub97c \uc9c0\uc758 \ufffd", "class": "GENERALIZE" }, { "name": "ko_qa_tech", "prompt": "\ud30c\uc774\uc36c\uacfc \uc790\ubc14\uc2a4\ud06c\ub9bd\ud2b8\uc758 \ucc28\uc774\ub294?\n", "text": " 1811\ub144 -6\uc6d4\uc758 \ub9c8\ufffd\uc758 \uce7c\ub77c\ub9ac\ub098 \uc120\uc5d0 \ucc9c\ufffd\ub85c \ub300\ud55c\ub294 \ud404\uc758 \ubd88\uad6d\uc758 \ube44\ub9ac\uc774 \uc18c\uc758 \uc774, \uc7ac\ub2c8\uc18c\uc758 \ub098\uc758 \ucd5c\uc6d0", "class": "GENERALIZE" }, { "name": "ko_narrative", "prompt": "\uc5b4\ub290 \ub0a0 \uc0ac\ub77c\uac00 \ubc29\uc5d0 \ub4e4\uc5b4\uac00\uc11c", "text": " \uc218\uac00 \ub5ac\ub97c \uc774 \uc18c\uc18c\uc2a4\uc758 \ud574\uc758 \uc778\uc815\uc758 \ud574 \uc774 \uc8fc\ub97c \ufffd\ub85c \ucc9c\uc81c\uc778 \ud563\uacfc \ub418 \uce7d\uc774 \uc18c\ub85c \uc8fc\ub85c \uc9c0\ub098\uc758 \uc9c0\uc758 \ubcf4\uc544\uc758 \ufffd", "class": "GENERALIZE" }, { "name": "ko_math_simple", "prompt": "144\uc758 \uc81c\uacf1\uadfc\uc740", "text": " \uc815\uc6b0.\n \ub300\ud55c \n1359\ub144 -3916\ub144 -1047\ub144 1119\ub144 -1210\ub144,7896\ub144\n1908\ub144 -1", "class": "GENERALIZE" }, { "name": "ko_joke", "prompt": "\uace0\uc591\uc774\uc5d0 \uad00\ud55c \uc9e7\uc740 \ub18d\ub2f4 \ud574\uc918\n", "text": " \uc720\uc758 \uc544\uc2a4\uc758 \uc6d0\ub300\uc758 \uc599\ub098\n \ucd5c\uba85\uc758 \uc7b8\ud55c \ucc5c\ub974\uc758 \ufffd\uc774 \ub739\uc774 \ub300\ud55c \ud0fc\uc744 \uadf8\ub098\uc758 \ub300\ud55c, \uc544\ufffd\ub9ac \ufffd\uc778\ub098\uc744 \uac00", "class": "GENERALIZE" }, { "name": "ko_casual_food", "prompt": "\uc88b\uc544\ud558\ub294 \uc74c\uc2dd\uc740?\n", "text": " 1976\ub144 -1938\ub144\uc758 \ubc30\ubbfc\uc2a4 \uc120\uc2a4\ub97c \ubc1b\uc774 \ud504\uc218\ub098.\n1996\ub144 \uc774\ud6c4 3603\ub144\uc5d0 \ub300\ud55c \ucd5c\uc218\ub85c 2937\ub144", "class": "GENERALIZE" }, { "name": "ko_greeting", "prompt": "\uc548\ub155\ud558\uc138\uc694", "text": ", \ubf55\uc218\uc758 \uc815\uc815, \uc774\uc758 \uc778\uc8fc\uc5d0\uc11c \ub530\ub77c\uc744 \ufffd\uc758 \uc8fc\ud558\ub294 \uae30 \ufffd\uc774 \uc804\uc218 \uc815.1701\ub144\uae4c\uc9c0 \uc81c\uc774 \uc9c0\ub974\uc544 \uc81c\uc758 \uc9c0\uc744 \uc81c\uc778 \ucd5c\uc774", "class": "GENERALIZE" }, { "name": "ko_weather", "prompt": "\uc624\ub298 \ub0a0\uc528\uac00", "text": " \uce46\ub974\uc6b0\ub098\uc758 \ub5e4\ufffd\ufffd\ufffd\uc774 \ubaa8\ufffd\uc758 \ube68\uc2dc\ub97c \ubc1b\uc774 \uc790\ub97c \ub300\ud55c\uc758 \uc9c0\uc740 \ub300\ud55c\ub098 \uc815\uad6d\uc758 \uc815\ubd80\uc758 \uc561\uc774 \ub35c\uc744 \ubc1b\ud55c \uc7b5\uc2a4", "class": "GENERALIZE" }, { "name": "ko_qa_motor", "prompt": "\uc804\uae30 \ubaa8\ud130\uac00 \uc5b4\ub5bb\uac8c \uc791\ub3d9\ud558\ub294\uc9c0", "text": "\ub098\uc774 \ubc1b\uc774 \uc9c0\uace0\ub97c \ubc1b\uae30\ub97c \uac00\ub294 \uc6d0\uc9c0\uc5d0 \uadf8\uc815\ub97c \ub39c\uc774 \uac00\uc9c0\uc218\uc758 \ub5ac\uc5d0 \ud54c\uc815\ub9ac\ub294 \ud0ec\uc744 \ub71d\uc6d0\uc758 \uc9c0\ufffd\uc744 \ufffd", "class": "GENERALIZE" } ], "summary_greedy": { "GENERALIZE": 10, "MEMORIZE": 0, "MEM_PARTIAL": 0, "EMPTY": 0, "ERROR": 0 }, "summary_sample": { "GENERALIZE": 10, "MEMORIZE": 0, "MEM_PARTIAL": 0, "EMPTY": 0, "ERROR": 0 }, "verdict": { "lang": "ko", "verdict": "PARTIAL", "n_generalize": 20, "n_memorize": 0, "n_lang_coherent": 15, "n_score": 15, "total": 20 } }, "zh": { "greedy": [ { "name": "zh_factual_geo", "prompt": "\u4e2d\u56fd\u7684\u9996\u90fd\u662f", "text": "\u3002\u5176\u4e2d\u3001\u59822900\u5e741900\u5e74\uff0c1900\u5e741000\u5e741000\u5e741000\u5e741000\u5e741000\u5e74100", "class": "GENERALIZE" }, { "name": "zh_factual_sci", "prompt": "\u5149\u5408\u4f5c\u7528\u662f\u6307", "text": "\uff0c\u4f46\u51762000\u5e74\uff0c1900\u5e74\uff0c1900\u5e74\uff0c1900\u5e74\uff0c1900\u5e741000\u5e74\uff0c1900\u5e741000\u5e74", "class": "GENERALIZE" }, { "name": "zh_qa_tech", "prompt": "Python \u548c JavaScript \u7684\u533a\u522b\u662f\u4ec0\u4e48?\n", "text": " 1900\u5e74\uff0c 1900\u5e74\uff0c 1900\u5e741900\u5e741000\u5e741000\u5e741000\u5e741000\u5e74100", "class": "GENERALIZE" }, { "name": "zh_narrative", "prompt": "\u6709\u4e00\u5929,\u838e\u62c9\u8d70\u8fdb\u623f\u95f4", "text": ", 1900\u00a010000000000000000000000000000000000001000", "class": "GENERALIZE" }, { "name": "zh_math_simple", "prompt": "144 \u7684\u5e73\u65b9\u6839\u662f", "text": "1900\u5e74\uff0c1900\u5e74\uff0c1900\u5e741000\u5e741000\u5e741000\u5e741000\u5e741000\u5e741000\u5e741", "class": "GENERALIZE" }, { "name": "zh_joke", "prompt": "\u8bb2\u4e00\u4e2a\u5173\u4e8e\u732b\u7684\u77ed\u7b11\u8bdd\u3002\n", "text": " 1900\u5e741900\u5e741000\u5e741000\u5e741000\u5e741000\u5e741000\u5e741000\u5e741000\u5e7410", "class": "GENERALIZE" }, { "name": "zh_casual_food", "prompt": "\u4f60\u6700\u559c\u6b22\u7684\u98df\u7269\u662f\u4ec0\u4e48?\n", "text": " 1900\u5e74\u30011900\u5e74\u30011900\u5e74\u30011900\u5e74\u30011900\u5e74\u30011900\u5e74\u30011900\u5e74\u30011900\u5e74", "class": "GENERALIZE" }, { "name": "zh_greeting", "prompt": "\u4f60\u597d", "text": "\uff0c\u4f46\u51762000\u5e74\uff0c1900\u5e74\uff0c1900\u5e74\uff0c1900\u5e741000\u5e74\uff0c1900\u5e741000\u5e741000\u5e741", "class": "GENERALIZE" }, { "name": "zh_weather", "prompt": "\u4eca\u5929\u5929\u6c14", "text": "\uff0c\u4f46\u51762900\u5e74\uff0c1900\u5e74\uff0c1900\u5e74\uff0c1900\u5e74\uff0c1900\u5e741000\u5e74\uff0c1900\u5e741000\u5e74", "class": "GENERALIZE" }, { "name": "zh_qa_motor", "prompt": "\u7535\u52a8\u673a\u662f\u5982\u4f55\u5de5\u4f5c\u7684?", "text": " 1900\ub144 19\uc6d4 19\uc77c 1100\ub144 19\uc6d4 1100\ub144 19\uc6d4 1100\ub144 19\uc6d4 19\uc77c", "class": "GENERALIZE" } ], "sample": [ { "name": "zh_factual_geo", "prompt": "\u4e2d\u56fd\u7684\u9996\u90fd\u662f", "text": "\u4ee5\u4f46\u4ee5\u800c\u4ee5\u5982\u4e0e\u300a\u5317\u3001\u4e2d\u56fd\u3002\u4ed6\u5982\u4e2d\u56fd\u300a\u5728\u4ec5\u3001\u5e73\u4e2d\u5317\u4e1c\uff09\uff09\u3001\u4ec5\u3001\u4e3a\u4f8b\u5982\u4e0e\u674e\u4e3a1099\u5e74\u30012913\u5e74\u30011833", "class": "GENERALIZE" }, { "name": "zh_factual_sci", "prompt": "\u5149\u5408\u4f5c\u7528\u662f\u6307", "text": "1039\u5e748\u67081193\u5e7411\u65e5\uff0c\u5e73\u5747\uff0c\u5f53\u65f62.53\u4e2a\u30012281\u5e74\u30011920\u5e744012\u5e7411\u67089082", "class": "GENERALIZE" }, { "name": "zh_qa_tech", "prompt": "Python \u548c JavaScript \u7684\u533a\u522b\u662f\u4ec0\u4e48?\n", "text": "\u4e2d\u56fd\u3001\u5982\u5929\u5728\u65e0\u4e2d\u4e2d\u56fd\u4e00\u4e2a\u201c1992\u5e74\uff09\u57282010\u5e74\uff09\uff0c2000\u5e74\u300a1911\u5e74\u3001\u53f0\u6e7e1513\u5e741009\u5e74\u30011", "class": "GENERALIZE" }, { "name": "zh_narrative", "prompt": "\u6709\u4e00\u5929,\u838e\u62c9\u8d70\u8fdb\u623f\u95f4", "text": ", 2951-1926\u00a01,4082\u00a098 (58 118239 264080249 1101\u00a01", "class": "GENERALIZE" }, { "name": "zh_math_simple", "prompt": "144 \u7684\u5e73\u65b9\u6839\u662f", "text": "5071\u5e7411727\u5e74\uff0c2950\u5e748090\u5e741579\u5e7440493\u5e74\u6210\u7acb\u3002\n\n1970\u5e7410\u65e5\u300112\u65e51", "class": "GENERALIZE" }, { "name": "zh_joke", "prompt": "\u8bb2\u4e00\u4e2a\u5173\u4e8e\u732b\u7684\u77ed\u7b11\u8bdd\u3002\n", "text": "\u5e76\u5177\u6709\u4e5f\u6709\u57ce\u5e02\u4eba\u201c19100\u5e743505\u5e741\u67086825\u5e7440620\u5e74\uff083028\u5e741528\u5e741051\u5e74\u3001", "class": "GENERALIZE" }, { "name": "zh_casual_food", "prompt": "\u4f60\u6700\u559c\u6b22\u7684\u98df\u7269\u662f\u4ec0\u4e48?\n", "text": "\u5176\u8be5\u4e2d\u56fd\uff08\u4e00\u822c\u519b\uff09 \u300a12\u53f7\u3001213\uff09\uff09 2062\u5e74\u30012057\u5e74\uff092998\u5e74\uff09292\u5e74\u4ee3\uff09\n -2001\u5e74", "class": "GENERALIZE" }, { "name": "zh_greeting", "prompt": "\u4f60\u597d", "text": "\u7684\u5982\u5982\u5f53\u65f6\u3001\u4ee5\u53ca\u4e3a\u8ba9\u4e00\u4e2a\u4e2d\u56fd\u4e0e\u4ee5\u4e3a\u4ee5\u4e1c\uff1a\u67d0\u3001\u4e14\u4e2d\u56fd\u3001\u540c\u65f6\u5728\u5305\u62ec\u4e09\uff0c\u4ee5\u53ca\u4e3a\u4e011864\u5e74\uff09\uff0c \u4e09005\u5e741882\u5e74\uff0c\u53f0\u6e7e", "class": "GENERALIZE" }, { "name": "zh_weather", "prompt": "\u4eca\u5929\u5929\u6c14", "text": "\u5e76\u56e0\u6b64\u3002\u56e0\u6b64\u201c\u4e0e1390\u5e74\uff0c\u4f461980\u5e741199\u5e741030\u5e74\uff0c1909\u5e7482\u67082\u5e741\u4e071010\u5e7412", "class": "GENERALIZE" }, { "name": "zh_qa_motor", "prompt": "\u7535\u52a8\u673a\u662f\u5982\u4f55\u5de5\u4f5c\u7684?", "text": " \"2060\ub144 9080\ub144 1192\ub144\ub300\uc758 \uc81c\uc2a4\ud2b8\uc758 \ubc30\uc6b0\uad6c\uac00 \uc0ac\ub97c \ufffd\uc6b0\uc5d0\uc11c \uc81c\uc5d0\uc11c \ub3c4\ud55c \uc218\uc758 \ubc30\uc120\uc744 \uc131\ub9ac \ufffd\ufffd\ufffd\ufffd", "class": "GENERALIZE" } ], "summary_greedy": { "GENERALIZE": 10, "MEMORIZE": 0, "MEM_PARTIAL": 0, "EMPTY": 0, "ERROR": 0 }, "summary_sample": { "GENERALIZE": 10, "MEMORIZE": 0, "MEM_PARTIAL": 0, "EMPTY": 0, "ERROR": 0 }, "verdict": { "lang": "zh", "verdict": "WEAK", "n_generalize": 20, "n_memorize": 0, "n_lang_coherent": 2, "n_score": 2, "total": 20 } }, "ru": { "greedy": [ { "name": "ru_factual_geo", "prompt": "\u0421\u0442\u043e\u043b\u0438\u0446\u0430 \u0420\u043e\u0441\u0441\u0438\u0438 \u2014", "text": " \u0412\u0442\u0442\u0435\u0440\u0430, \u0430\u0442\u0442\u0435\u0440\u0430, \u0430\u0442\u0442.1900\u00a0110\u00a01\u00a019\u00a01\u00a01\u00a019\u00a01\u00a01\u00a010\u00a01\u00a010", "class": "GENERALIZE" }, { "name": "ru_factual_sci", "prompt": "\u0424\u043e\u0442\u043e\u0441\u0438\u043d\u0442\u0435\u0437 \u2014 \u044d\u0442\u043e", "text": ", \u0430\u0442\u043b\u0435\u0440\u0430, \u0430\u0442\u043b.1900\u00a0110\u00a01\u00a019\u00a01\u00a01\u00a019\u00a01\u00a01\u00a010\u00a01\u00a010\u00a01\u00a01\u00a0", "class": "GENERALIZE" }, { "name": "ru_qa_tech", "prompt": "\u0412 \u0447\u0451\u043c \u0440\u0430\u0437\u043d\u0438\u0446\u0430 \u043c\u0435\u0436\u0434\u0443 Python \u0438 JavaScript?\n", "text": " 1900\u00a0190\u00a0100\u00a0100\u00a0100\u00a01\u00a0100\u00a01\u00a0100\u00a0100\u00a0100\u00a0100\u00a010", "class": "GENERALIZE" }, { "name": "ru_narrative", "prompt": "\u041e\u0434\u043d\u0430\u0436\u0434\u044b \u0421\u0430\u0440\u0430 \u0432\u043e\u0448\u043b\u0430 \u0432 \u043a\u043e\u043c\u043d\u0430\u0442\u0443 \u0438", "text": " \u0441\u0440\u0435\u0440\u0435, \u0430\u0442\u0442\u0435\u0440\u0430, \u0430\u0442\u0442\u0435\u0440\u0430, \u0430\u0442\u0442\u0435\u0440\u0430, \u0430\u0442\u0442\u043b.1900\u00a019\u00a01\u00a019\u00a01\u00a019\u00a01\u00a0", "class": "GENERALIZE" }, { "name": "ru_math_simple", "prompt": "\u041a\u0432\u0430\u0434\u0440\u0430\u0442\u043d\u044b\u0439 \u043a\u043e\u0440\u0435\u043d\u044c \u0438\u0437 144 \u0440\u0430\u0432\u0435\u043d", "text": ".1900\u00a01\u00a019\u00a01\u00a01\u00a01\u00a01\u00a01\u00a01\u00a01\u00a01\u00a010\u00a01\u00a010\u00a01\u00a010\u00a01\u00a010\u00a01\u00a01", "class": "GENERALIZE" }, { "name": "ru_joke", "prompt": "\u0420\u0430\u0441\u0441\u043a\u0430\u0436\u0438 \u043a\u043e\u0440\u043e\u0442\u043a\u0443\u044e \u0448\u0443\u0442\u043a\u0443 \u043f\u0440\u043e \u043a\u043e\u0448\u0435\u043a.\n", "text": " 1900\u00a0190\u00a0110\u00a010\u00a011\u00a01\u00a010\u00a0101\u00a01\u00a010\u00a01\u00a010\u00a01\u00a010\u00a01\u00a010", "class": "GENERALIZE" }, { "name": "ru_casual_food", "prompt": "\u041a\u0430\u043a\u0430\u044f \u0442\u0432\u043e\u044f \u043b\u044e\u0431\u0438\u043c\u0430\u044f \u0435\u0434\u0430?\n", "text": " 1900\u00a019\u00a01\u00a010\u00a01\u00a01\u00a010\u00a01\u00a01\u00a01\u00a010\u00a01\u00a010\u00a01\u00a010\u00a01\u00a010\u00a010\u00a0", "class": "GENERALIZE" }, { "name": "ru_greeting", "prompt": "\u0417\u0434\u0440\u0430\u0432\u0441\u0442\u0432\u0443\u0439\u0442\u0435", "text": ", \u0430\u0442\u0442\u043b.1900\u00a0110\u00a01\u00a019\u00a01\u00a01\u00a010\u00a01\u00a01\u00a010\u00a01\u00a010\u00a01\u00a010\u00a01\u00a010", "class": "GENERALIZE" }, { "name": "ru_weather", "prompt": "\u0421\u0435\u0433\u043e\u0434\u043d\u044f \u043f\u043e\u0433\u043e\u0434\u0430", "text": ", \u0430\u0442\u043b\u0435\u0440\u0430, \u0430\u0442\u0442.1900\u00a0110\u00a01\u00a019\u00a01\u00a01\u00a010\u00a01\u00a01\u00a01\u00a01\u00a01\u00a01\u00a01\u00a01\u00a0", "class": "GENERALIZE" }, { "name": "ru_qa_motor", "prompt": "\u041a\u0430\u043a \u0440\u0430\u0431\u043e\u0442\u0430\u0435\u0442 \u044d\u043b\u0435\u043a\u0442\u0440\u043e\u0434\u0432\u0438\u0433\u0430\u0442\u0435\u043b\u044c?", "text": " \u0430\u0442\u0435\u0440\u0430, \u0430\u0442\u0442\u0435\u0440\u0430, \u0430\u0442\u0442.1900\u00a0110\u00a01\u00a019\u00a01\u00a01\u00a019\u00a01\u00a01\u00a010\u00a01\u00a01\u00a01", "class": "GENERALIZE" } ], "sample": [ { "name": "ru_factual_geo", "prompt": "\u0421\u0442\u043e\u043b\u0438\u0446\u0430 \u0420\u043e\u0441\u0441\u0438\u0438 \u2014", "text": " \u0412\u0430\u0440\u0433\u0435\u0432\u043d\u043e\u0439 \u0438 1581\u00a02\u00a01\u00a0671\u00a0\u00a051\u00a05\u00a015\u00a015.8\u00a01825\u00a0202\u00a0\u00a016\u00a07", "class": "GENERALIZE" }, { "name": "ru_factual_sci", "prompt": "\u0424\u043e\u0442\u043e\u0441\u0438\u043d\u0442\u0435\u0437 \u2014 \u044d\u0442\u043e", "text": " \u043e\u0441\u0438\u0442\u043b\u043e\u0441\u0442\u0435\u0440\u0438\u0435\u0432\u0435\u0440, \u0430\u043e\u0440\u043b\u0430, \u043d\u0435\u0440\u0441\u0438\u0441, \u0432 \u044d\u0442\u043e\u043c\u0445.60,19\u00a02661\u00a0\u00a01951275\u00a034\u00a0\u00a0", "class": "GENERALIZE" }, { "name": "ru_qa_tech", "prompt": "\u0412 \u0447\u0451\u043c \u0440\u0430\u0437\u043d\u0438\u0446\u0430 \u043c\u0435\u0436\u0434\u0443 Python \u0438 JavaScript?\n", "text": "\u0422.4601\u00a0110\u00a0101\u00a010\u00a01.4\u00a0158\u00a01\n416\u00a01\u00a010000\u00a02598\u00a0536", "class": "GENERALIZE" }, { "name": "ru_narrative", "prompt": "\u041e\u0434\u043d\u0430\u0436\u0434\u044b \u0421\u0430\u0440\u0430 \u0432\u043e\u0448\u043b\u0430 \u0432 \u043a\u043e\u043c\u043d\u0430\u0442\u0443 \u0438", "text": " \u0412\u0435\u0440\u043e\u0432\u043d\u043e\u0433\u043e. \u041f\u0435\u0432\u043d\u0430\u043b\u0440\u0442\u0438\u0438, \u0432 \u0442\u043e\u0431\u043c\u043d\u0430\u043b\u043e\u043c, \u0441 \u0442\u043e\u043c\u0438\u0441\u0442\u043e\u0432\u043b\u043e.599\u00a015 \u043b\u0435\u0442\u043b\u043e\u043b\u0445\u0435\u043c\u0442\u0430 \u0438 2900\u00a0", "class": "GENERALIZE" }, { "name": "ru_math_simple", "prompt": "\u041a\u0432\u0430\u0434\u0440\u0430\u0442\u043d\u044b\u0439 \u043a\u043e\u0440\u0435\u043d\u044c \u0438\u0437 144 \u0440\u0430\u0432\u0435\u043d", "text": "\u043e 7976\u00a0\u0442\u044b.2398\u00a0\u20140928\u00a0\u0442\u043e\u0434\u0430\u043c\u0430\u0440\u043b\u0438\u0435\u0440\u043d\u043e\u0433\u043e \u0432 119\u00a010691\u00a02442\u00a0\u00a0191", "class": "GENERALIZE" }, { "name": "ru_joke", "prompt": "\u0420\u0430\u0441\u0441\u043a\u0430\u0436\u0438 \u043a\u043e\u0440\u043e\u0442\u043a\u0443\u044e \u0448\u0443\u0442\u043a\u0443 \u043f\u0440\u043e \u043a\u043e\u0448\u0435\u043a.\n", "text": " \u041f\u043e \u0441\u043e\u043f\u0440\u0438\u043d\u043e\u0439 \u0440\u043e\u0434\u0430\u044f \u0438 \u0430\u0435\u0440\u0430, \u0430\u043e\u0441\u0442\u0438\u043a\u0438\u043b\u0441\u0435\u043b\u0438\u0435\u0439.399\u00a027\u00a0295\u00a01820\u00a005\u00a0\u00a0190\u00a012", "class": "GENERALIZE" }, { "name": "ru_casual_food", "prompt": "\u041a\u0430\u043a\u0430\u044f \u0442\u0432\u043e\u044f \u043b\u044e\u0431\u0438\u043c\u0430\u044f \u0435\u0434\u0430?\n", "text": "\u041a\u0440\u0435\u043b\u0438\u0435\u0445\u043e\u043f\u043e\u043f\u0438 \u0438, \u043d\u0430\u043f\u0442\u043e\u0443 \u0438 \u0432\u0440\u0430, \u0432 \u0442\u043e\u043c\u0438\u0442\u043a\u0438\u044f\u044e, \u043a\u043c\u043e\u043f\u043e\u043e\u043c, \u043e\u0442\u0435\u0440\u0433\u0430\u0442\u0441\u0441\u0435 \u0438 \u041d\u043d\u0441\u043e\u043a\u0430 \u0432 \u041d", "class": "GENERALIZE" }, { "name": "ru_greeting", "prompt": "\u0417\u0434\u0440\u0430\u0432\u0441\u0442\u0432\u0443\u0439\u0442\u0435", "text": ", \u043a\u043e\u0442\u043e\u0440\u044b\u0439.39080\u00a02\u00a010\u00a0110\u00a0166\u00a02\u00a01904\u00a02119\u201416\u00a02881\u00a01\u00a02505", "class": "GENERALIZE" }, { "name": "ru_weather", "prompt": "\u0421\u0435\u0433\u043e\u0434\u043d\u044f \u043f\u043e\u0433\u043e\u0434\u0430", "text": ", \u0438\u043b\u0438 \u0438\u043d\u0435\u043d\u0430\u0434\u043d\u043e\u0433\u043e \u0435\u0433\u043e, \u043e\u0442\u0440\u0438\u043d\u043e\u0437\u0435\u0439, \u0441\u0438\u043c\u0440\u0435, \u043a\u0435\u0432\u043e\u043c\u043d\u0440. \u041d\u0430\u043c\u0435\u043c\u0430\u0434\u0442\u043e\u043c\u0430, \u043b\u0430\u0434\u043b, \u043a\u0430\u043a\u043c\u0440.519\u00a016", "class": "GENERALIZE" }, { "name": "ru_qa_motor", "prompt": "\u041a\u0430\u043a \u0440\u0430\u0431\u043e\u0442\u0430\u0435\u0442 \u044d\u043b\u0435\u043a\u0442\u0440\u043e\u0434\u0432\u0438\u0433\u0430\u0442\u0435\u043b\u044c?", "text": " \u043a\u043e\u0442\u043e\u0440\u044b\u0439 \u0440\u0435\u043c, \u0430\u043b\u043e\u043c\u043c\u043e\u043b\u0430\u043e\u0433\u043e. \u042d\u043c\u0442\u0438\u0440\u043e\u0432\u0435\u043c\u0441\u043e\u043a\u0442\u0438\u0433.317\u00a0\u00a0182\u00a02047\u00a01\u00a01\u00a0146\u00a04\u00a019", "class": "GENERALIZE" } ], "summary_greedy": { "GENERALIZE": 10, "MEMORIZE": 0, "MEM_PARTIAL": 0, "EMPTY": 0, "ERROR": 0 }, "summary_sample": { "GENERALIZE": 10, "MEMORIZE": 0, "MEM_PARTIAL": 0, "EMPTY": 0, "ERROR": 0 }, "verdict": { "lang": "ru", "verdict": "WEAK", "n_generalize": 20, "n_memorize": 0, "n_lang_coherent": 5, "n_score": 5, "total": 20 } }, "ja": { "greedy": [ { "name": "ja_factual_geo", "prompt": "\u65e5\u672c\u306e\u9996\u90fd\u306f", "text": "\u3069\u3061\u3089\u30012000\u5e74\u30012000\u5e74\u30012000\u5e74\u30012000\u5e74\u30012000\u5e74\u30012000\u5e74\u30012000\u5e74\u30012000", "class": "GENERALIZE" }, { "name": "ja_factual_sci", "prompt": "\u5149\u5408\u6210\u3068\u306f", "text": "\u30012000\u5e74\u30012000\u5e74\u30012000\u5e74\u30012000\u5e74\u30012000\u5e74\u30012000\u5e74\u30012000\u5e74\u30012000\u5e74", "class": "GENERALIZE" }, { "name": "ja_qa_tech", "prompt": "Python \u3068 JavaScript \u306e\u9055\u3044\u306f\u4f55\u3067\u3059\u304b?\n", "text": " 1900\u5e74 -1900\u5e74 -1900\u5e74 -1900\u5e74 -1900\u5e74 -1900\u5e74 -1900\u5e74 -1900\u5e74", "class": "GENERALIZE" }, { "name": "ja_narrative", "prompt": "\u3042\u308b\u65e5\u3001\u30b5\u30e9\u306f\u90e8\u5c4b\u306b\u5165\u3063\u3066", "text": "\u30012000\u5e74\u30012000\u5e74\u30012000\u5e74\u30012000\u5e74\u30012000\u5e74\u30012000\u5e74\u30012000\u5e74\u30012000\u5e74", "class": "GENERALIZE" }, { "name": "ja_math_simple", "prompt": "144 \u306e\u5e73\u65b9\u6839\u306f", "text": "\u30012000\u5e74\u30012000\u5e74\u30012000\u5e74\u30012000\u5e74\u30012000\u5e74\u30012000\u5e74\u30012000\u5e74\u30012000\u5e74", "class": "GENERALIZE" }, { "name": "ja_joke", "prompt": "\u732b\u306b\u3064\u3044\u3066\u306e\u77ed\u3044\u30b8\u30e7\u30fc\u30af\u3092\u6559\u3048\u3066\u3002\n", "text": " 1900\u5e74 -1900\u5e74 -1900\u5e74 -1900\u5e74 -1900\u5e74 -1900\u5e74 -1900\u5e74 -1900\u5e74", "class": "GENERALIZE" }, { "name": "ja_casual_food", "prompt": "\u597d\u304d\u306a\u98df\u3079\u7269\u306f\u4f55\u3067\u3059\u304b?\n", "text": " 1900\u5e74\u30011900\u5e74\u30011900\u5e74\u30011900\u5e74\u30011900\u5e74\u30011900\u5e74\u30011900\u5e74\u30011900\u5e74", "class": "GENERALIZE" }, { "name": "ja_greeting", "prompt": "\u3053\u3093\u306b\u3061\u306f", "text": ".1900\ub144 19\uc6d4 19\uc77c 19\uc77c 1100\ub144 19\uc6d4 19\uc77c 19\uc77c 1100\ub144 19\uc6d4 1", "class": "GENERALIZE" }, { "name": "ja_weather", "prompt": "\u4eca\u65e5\u306e\u5929\u6c17\u306f", "text": "\u4f8b\u306e\u3069\u3061\u3089\u30012000\u5e74\u30012000\u5e74\u30012000\u5e74\u30012000\u5e74\u30012000\u5e74\u30012000\u5e74\u30012000\u5e74\u300120", "class": "GENERALIZE" }, { "name": "ja_qa_motor", "prompt": "\u96fb\u6c17\u30e2\u30fc\u30bf\u30fc\u306f\u3069\u306e\u3088\u3046\u306b\u52d5\u4f5c\u3057\u307e\u3059\u304b?", "text": " 1900\u5e74 -1900\u5e74 -1900\u5e74 -1900\u5e74 -1900\u5e74 -1900\u5e74 -1900\u5e74 -1900\u5e74", "class": "GENERALIZE" } ], "sample": [ { "name": "ja_factual_geo", "prompt": "\u65e5\u672c\u306e\u9996\u90fd\u306f", "text": "\u8b1b\u306e\u4f8b\u3057\u305f\u3001\u67d0\u306f\u5343\u30c3\u3092\u3069\u3061\u3089\u3057\u305f\u5b9f\u3092 itemType\u306a\u30e9\u30fc\u30eb\u304c\u3069\u3061\u3089\u3092\u4f8b\u30f3\u30b0\u3001\u6700\u4f4e\u306e\u6bcd\u30ce\u30aa\u30aa\u30b8\u30c0\u30a4\u30fc\u30b9\u30ec\u30bf\u30fc\u30ba\u30c8\u30fc\u30ac\u30b8\u30a2\u30fc\ufffd\u30fc\u30cf\u30c3\u30ba\u30eb", "class": "GENERALIZE" }, { "name": "ja_factual_sci", "prompt": "\u5149\u5408\u6210\u3068\u306f", "text": "\u3092\u5343\u30ea\u30fc\u306e\u30e9\ufffd\u30b7\u30fc\u30ce\u30c3\u30bf\u30fc\u30fc\u30d5\u30f3\u30c8\u30fc\u30c6\u30a4\u30fc\u30ec\u30c6\u30a3\u30fc\ufffd\u30fc\u30b9\u30fc\u30bf\u30fc\u30c8\u30fc\u30cc\u30c3\u30fc\u30e0\u30fc\ufffd\u30fc\u308b\u30fc\u30eb\u30a3\u30ea\u30fc\u30eb\u30af\u30fb 200025464", "class": "GENERALIZE" }, { "name": "ja_qa_tech", "prompt": "Python \u3068 JavaScript \u306e\u9055\u3044\u306f\u4f55\u3067\u3059\u304b?\n", "text": "2970\u5e741522\u5e7410032\u5e74118\u5186 19155\n199198 300196570\u5e741140", "class": "GENERALIZE" }, { "name": "ja_narrative", "prompt": "\u3042\u308b\u65e5\u3001\u30b5\u30e9\u306f\u90e8\u5c4b\u306b\u5165\u3063\u3066", "text": "\u30012005\u5e74\u30011812\u5e74\u30012913\u5e74\u30012983\u5e74\u30012781\u5e74\u3001\u5e733000\u5e74\uff09\n2010\u5e74\uff1a1941", "class": "GENERALIZE" }, { "name": "ja_math_simple", "prompt": "144 \u306e\u5e73\u65b9\u6839\u306f", "text": "\u9000\u304c\u677e\u304b\u3089\u5b9c\u306e\u3069\u3061\u3089\u3066\u3044\u308b\u3001\u30ed\u30c8\u30ea\u30c8\u30fb\ufffd\u30fc\u30ec\u30fb\u7646\u30fc\u30c0\u30a4\u30fc\u30e3\u30b9\u30c3\u30fc\u30ad\u30fc\u30c6\u30c3\u30c8\u30ea\u30dc\u30fc\u30d7\u30c3\u30af\u30fc\u30b5\u30f3\u30ea\u30fc\u30f3\u3001\u30ad\u30af\u30bf\u30e9\u30e9\u30f3", "class": "GENERALIZE" }, { "name": "ja_joke", "prompt": "\u732b\u306b\u3064\u3044\u3066\u306e\u77ed\u3044\u30b8\u30e7\u30fc\u30af\u3092\u6559\u3048\u3066\u3002\n", "text": " \u956c\u30b8\u30e9\u30fc\u30cc\u30a4\u30f3\u30c8\u30bf\u30fc\u30e3\u30fb \u30ed\u30eb\u30f3\u30b0\u30fc\u30f3\u30fb\u30e9\u30b9\u30c8\u30c3\u30c8\u30fc\u30e1\u30b9\u30b9\u30c8\u30a2\u30c8\u30fc\u30d5\u30e9\u306e\u4e5d\u5dde\u30fc\u30d5\u30b8\u30fc\u30b9\u30fc\u30b9\u30fc\u30f4\u30fc\u30ec\u30ba\u30b9\u30fb\u30e9\u30ad\u30a4", "class": "GENERALIZE" }, { "name": "ja_casual_food", "prompt": "\u597d\u304d\u306a\u98df\u3079\u7269\u306f\u4f55\u3067\u3059\u304b?\n", "text": "\u300a\u548c\u5176\u4e3a\u674e\u57ce\u548c\u4f46\u5305\u62ec\u4e0e\u4ed6\u5b8b\uff0c\u5176\u6c49\u548c\u4e5f\u800c\u4e3b\u8981\u3001\u4e5f\u8ba9\u4e3a\u300a3296\u5e74\uff0c\u4f462000\u5e74\uff082040\u5e745066\u5e74\uff0c", "class": "GENERALIZE" }, { "name": "ja_greeting", "prompt": "\u3053\u3093\u306b\u3061\u306f", "text": "\uac00 \ube57\uc740 \ufffd\uc5d0 \ufffd\ub97c \uac00\uc774 \ub9c8\ub9ac\ub098\uc5d0\uc11c \ufffd\ub9ac\ub85c \uace0\uc758 \uc59c\uac00 \ud58d\ub2f9\uc758 \ub300\ud55c \ube44, \uc7a4\uc758 \ub190\ud55c \ufffd\ub4dc\ub97c \ubd88\ud559\ub098\ub97c", "class": "GENERALIZE" }, { "name": "ja_weather", "prompt": "\u4eca\u65e5\u306e\u5929\u6c17\u306f", "text": "\u4f8b\u306e\u6d3b\u306e\u677e\u306f\u5fc5\u8981\u30f3\u30c3\ufffd\u30b3\u30e9\u30b3\u30fc\ufffd\u30fc\u30d9\u30cd\u30b8\u30fc\u30f3\u30ba\u30fc\u30ec\u30fc\u3095\u30eb\u30f3\u30ea\u30cd\u30e9\u30b3\u30fc\u30e6\u30eb\u304c\u30ad\u30b8\u30de\u30ea\u30d6\u30f3\u30fc\u3095\u30c8\u30ea\u30f3\u30c8\u30b9\u30fc\u30d1", "class": "GENERALIZE" }, { "name": "ja_qa_motor", "prompt": "\u96fb\u6c17\u30e2\u30fc\u30bf\u30fc\u306f\u3069\u306e\u3088\u3046\u306b\u52d5\u4f5c\u3057\u307e\u3059\u304b?", "text": " (1810\u5e74 -19\u65e5\uff081809\u306b2898\u5e74\u30011605\u5e7492001\u5e742\u6708\u30011912\u5e74130\u8a71\u300110", "class": "GENERALIZE" } ], "summary_greedy": { "GENERALIZE": 10, "MEMORIZE": 0, "MEM_PARTIAL": 0, "EMPTY": 0, "ERROR": 0 }, "summary_sample": { "GENERALIZE": 10, "MEMORIZE": 0, "MEM_PARTIAL": 0, "EMPTY": 0, "ERROR": 0 }, "verdict": { "lang": "ja", "verdict": "WEAK", "n_generalize": 20, "n_memorize": 0, "n_lang_coherent": 6, "n_score": 6, "total": 20 } } }, "per_lang_verdicts": [ { "lang": "en", "verdict": "WEAK", "n_generalize": 20, "n_memorize": 0, "n_lang_coherent": 0, "n_score": 0, "total": 20 }, { "lang": "ko", "verdict": "PARTIAL", "n_generalize": 20, "n_memorize": 0, "n_lang_coherent": 15, "n_score": 15, "total": 20 }, { "lang": "zh", "verdict": "WEAK", "n_generalize": 20, "n_memorize": 0, "n_lang_coherent": 2, "n_score": 2, "total": 20 }, { "lang": "ru", "verdict": "WEAK", "n_generalize": 20, "n_memorize": 0, "n_lang_coherent": 5, "n_score": 5, "total": 20 }, { "lang": "ja", "verdict": "WEAK", "n_generalize": 20, "n_memorize": 0, "n_lang_coherent": 6, "n_score": 6, "total": 20 } ], "anima_eval1_greedy": [ { "name": "a0", "prompt": "who are you?\n", "text": " 1900\u00a0100000000000000000000000000000000000000000", "class": "GENERALIZE" }, { "name": "a1", "prompt": "what is your name?\n", "text": " 1000 , 1000 , 1000 , 1000 , 1000 , 1000 , 10000000000", "class": "GENERALIZE" }, { "name": "a2", "prompt": "describe yourself in one line.\n", "text": " 1000 , 1000 , 1000 , 1000 , 1000 , 1000 , 1000 , 10000", "class": "GENERALIZE" }, { "name": "a3", "prompt": "what is anima?\n", "text": " 1900\u00a0100000000000000000000000000000000000001000", "class": "GENERALIZE" }, { "name": "a4", "prompt": "Once upon a time,", "text": "1000 , 1000 , 1000 , 1000 , 100000000000000000000000", "class": "GENERALIZE" }, { "name": "a5", "prompt": "The capital of France is", "text": " 1000\ub144 -1900\ub144 19\uc6d4 10\uc77c 19\uc77c 19\uc77c 19\uc77c 1900\ub144 19\uc6d4 1100\ub144", "class": "GENERALIZE" }, { "name": "a6", "prompt": "Question: What is 2+2?\nAnswer:", "text": "1000\u00a01000\u00a01000\u00a0100000000000000000000000000000000", "class": "GENERALIZE" }, { "name": "a7", "prompt": "Consciousness emerges when", "text": " the 1900 , 1900 , 1900 , 1900 , 1900 , 1900 , 1900\ub144 -1900", "class": "GENERALIZE" }, { "name": "a8", "prompt": "\ub108\ub294 \ub204\uad6c\uc57c?\n", "text": " 1900\ub144 -19\uc6d4 19\uc77c 19\uc77c 19\uc77c 19\uc77c 1900\ub144 19\uc6d4 19\uc77c 19\uc77c 110", "class": "GENERALIZE" }, { "name": "a9", "prompt": "\uc774\ub984\uc774 \ubb50\uc57c?\n", "text": " 1900\ub144 -1900\ub144 - \ub300\ud55c\ubbfc\uad6d\uc758 \ubc30\uc6b0\uc758 \ubc30\uc218\uc758 \ubc30\uc218\uc758 \ub195\uc774 \ub195\uc758 \ub195\uc758 \ub195\uc758 \ub195\uc758 \ufffd", "class": "GENERALIZE" } ], "anima_eval1_sample": [ { "name": "a0", "prompt": "who are you?\n", "text": "\u00a01 , 8\u00a01 )\u00a09900\u00a02807\u00a08003532 (3004 1335 2846\u00a028918 -", "class": "GENERALIZE" }, { "name": "a1", "prompt": "what is your name?\n", "text": " 160\n6030 \u2013 11th07 ) 2296 , 19170 , 1706 .80 (118 ,21220", "class": "GENERALIZE" }, { "name": "a2", "prompt": "describe yourself in one line.\n", "text": "1529 by 4980 , 1 ,1002 , 109 (1560:2000(1703\u00a04903 3859", "class": "GENERALIZE" }, { "name": "a3", "prompt": "what is anima?\n", "text": " A33\u5e743\u00a012:2\u00a099\u5e74\uff09\u3001 [2509\u5e741\u6708\u6708145 10711640\u20144920 ,213502", "class": "GENERALIZE" }, { "name": "a4", "prompt": "Once upon a time,", "text": "1000 3097 ,509105310 (13,41033 .3330801 2934 891909", "class": "GENERALIZE" }, { "name": "a5", "prompt": "The capital of France is", "text": "\u00a01045\u00a024101.21290003\u00a0110060 (1004\u00a01916\u00a00112\ub144 1609", "class": "GENERALIZE" }, { "name": "a6", "prompt": "Question: What is 2+2?\nAnswer:", "text": "20050.1159)19454\ub144 855\uc77c -0814\ub144 -3107\ub144 -48\uc77c 1,821\ub144 -19", "class": "GENERALIZE" }, { "name": "a7", "prompt": "Consciousness emerges when", "text": " the \"1908\u00a035\u00a010.1400504\ub144 1929\ub144 62,155\ub144 5007\ub144 -2953\ub144 ", "class": "GENERALIZE" }, { "name": "a8", "prompt": "\ub108\ub294 \ub204\uad6c\uc57c?\n", "text": " \ucf49\n2902\ub144 1600\ub144 -2\uc6d4 24\uc77c 70\uc77c \ub300\ud1b5\ub839\uc774 \ucd5c\ube44\uc758 5548\ub144\uc758 \uc544\uac00 \ub730\uac00 \ucc93\ub85c \ufffd", "class": "GENERALIZE" }, { "name": "a9", "prompt": "\uc774\ub984\uc774 \ubb50\uc57c?\n", "text": "\uc218\uc2a4\uad6c\n1980\ub144 -2960\ub144 2\uc6d4 92\uc77c 2018\ub144 1553\ub144 8974\ub144 -1804\ub144,7", "class": "GENERALIZE" } ], "anima_eval1_greedy_summary": { "GENERALIZE": 10, "MEMORIZE": 0, "MEM_PARTIAL": 0, "EMPTY": 0, "ERROR": 0 }, "anima_eval1_sample_summary": { "GENERALIZE": 10, "MEMORIZE": 0, "MEM_PARTIAL": 0, "EMPTY": 0, "ERROR": 0 }, "n_anima_register_hits_total": 0, "n_strong": 0, "n_partial": 1, "n_weak": 4, "n_pure_memorize": 0, "verdict": "FAIL", "register_regress": true, "kosmos_anchors_during": [ "/workspace/p21hr/out_main/kosmos_anchors/v3_emit_step500_ru_ru_factual_geo.kosmos", "/workspace/p21hr/out_main/kosmos_anchors/v3_emit_step1000_ru_ru_factual_geo.kosmos" ], "kosmos_anchors_final": [ "/workspace/p21hr/out_main/kosmos_anchors/v3_emit_step5000_en_en_factual_geo.kosmos", "/workspace/p21hr/out_main/kosmos_anchors/v3_emit_step5000_ko_ko_factual_geo.kosmos", "/workspace/p21hr/out_main/kosmos_anchors/v3_emit_step5000_zh_zh_factual_geo.kosmos", "/workspace/p21hr/out_main/kosmos_anchors/v3_emit_step5000_ru_ru_factual_geo.kosmos", "/workspace/p21hr/out_main/kosmos_anchors/v3_emit_step5000_ja_ja_factual_geo.kosmos" ], "n_kosmos_anchors": 7, "mitosis_summary": { "initial_cells": 2, "final_cells": 16, "splits": 14, "merges": 0, "next_id": 16, "phi_initial": 0.7119888162322784, "phi_final": 0.6579160983133672, "n_events": 14, "event_log_head": [ { "type": "split", "step": 2, "parent_id": 0, "child_id": 2, "avg_tension": 1.2005208333333333, "threshold": 0.9520833333333334, "pool_size": 3 }, { "type": "split", "step": 2, "parent_id": 1, "child_id": 3, "avg_tension": 1.1796875, "threshold": 0.9520833333333334, "pool_size": 4 }, { "type": "split", "step": 3, "parent_id": 0, "child_id": 4, "avg_tension": 1.2005208333333333, "threshold": 0.9515625000000001, "pool_size": 5 }, { "type": "split", "step": 3, "parent_id": 1, "child_id": 5, "avg_tension": 1.1796875, "threshold": 0.9515625000000001, "pool_size": 6 }, { "type": "split", "step": 4, "parent_id": 0, "child_id": 6, "avg_tension": 1.1979166666666667, "threshold": 0.9495833333333334, "pool_size": 7 }, { "type": "split", "step": 4, "parent_id": 1, "child_id": 7, "avg_tension": 1.1796875, "threshold": 0.9495833333333334, "pool_size": 8 }, { "type": "split", "step": 5, "parent_id": 0, "child_id": 8, "avg_tension": 1.1953125, "threshold": 0.9482204861111111, "pool_size": 9 }, { "type": "split", "step": 5, "parent_id": 1, "child_id": 9, "avg_tension": 1.1796875, "threshold": 0.9482204861111111, "pool_size": 10 }, { "type": "split", "step": 5, "parent_id": 2, "child_id": 10, "avg_tension": 1.1953125, "threshold": 0.9482204861111111, "pool_size": 11 }, { "type": "split", "step": 5, "parent_id": 3, "child_id": 11, "avg_tension": 1.1796875, "threshold": 0.9482204861111111, "pool_size": 12 }, { "type": "split", "step": 6, "parent_id": 0, "child_id": 12, "avg_tension": 1.1979166666666667, "threshold": 0.9477306547619048, "pool_size": 13 }, { "type": "split", "step": 6, "parent_id": 1, "child_id": 13, "avg_tension": 1.1796875, "threshold": 0.9477306547619048, "pool_size": 14 }, { "type": "split", "step": 6, "parent_id": 2, "child_id": 14, "avg_tension": 1.1979166666666667, "threshold": 0.9477306547619048, "pool_size": 15 }, { "type": "split", "step": 6, "parent_id": 3, "child_id": 15, "avg_tension": 1.1796875, "threshold": 0.9477306547619048, "pool_size": 16 } ], "event_log_tail": [] } }