Upload evals for think-d12-r20-2epoch
Browse files
experiments/think-d12-r20-2epoch/evals/core.json
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "base_model (step 4995)",
|
| 3 |
+
"step": 4995,
|
| 4 |
+
"bpb": {},
|
| 5 |
+
"core_metric": 0.08720366535348274,
|
| 6 |
+
"core_results": {
|
| 7 |
+
"hellaswag_zeroshot": 0.2848038077354431,
|
| 8 |
+
"jeopardy": 0.0004723665479104966,
|
| 9 |
+
"bigbench_qa_wikidata": 0.12346833199262619,
|
| 10 |
+
"arc_easy": 0.32281145453453064,
|
| 11 |
+
"arc_challenge": 0.20904436707496643,
|
| 12 |
+
"copa": 0.5600000023841858,
|
| 13 |
+
"commonsense_qa": 0.3054873049259186,
|
| 14 |
+
"piqa": 0.547878086566925,
|
| 15 |
+
"openbook_qa": 0.24800001084804535,
|
| 16 |
+
"lambada_openai": 0.24063651263713837,
|
| 17 |
+
"hellaswag": 0.28659629821777344,
|
| 18 |
+
"winograd": 0.5677655935287476,
|
| 19 |
+
"winogrande": 0.5011838674545288,
|
| 20 |
+
"bigbench_dyck_languages": 0.10900000482797623,
|
| 21 |
+
"agi_eval_lsat_ar": 0.27391302585601807,
|
| 22 |
+
"bigbench_cs_algorithms": 0.4386363625526428,
|
| 23 |
+
"bigbench_operators": 0.06190476566553116,
|
| 24 |
+
"bigbench_repeat_copy_logic": 0.0,
|
| 25 |
+
"squad": 0.02866603620350361,
|
| 26 |
+
"coqa": 0.062132030725479126,
|
| 27 |
+
"boolq": 0.6067278385162354,
|
| 28 |
+
"bigbench_language_identification": 0.2505999803543091
|
| 29 |
+
},
|
| 30 |
+
"centered_results": {
|
| 31 |
+
"hellaswag_zeroshot": 0.04640507698059082,
|
| 32 |
+
"jeopardy": 0.0004723665479104966,
|
| 33 |
+
"bigbench_qa_wikidata": 0.12346833199262619,
|
| 34 |
+
"arc_easy": 0.09708193937937419,
|
| 35 |
+
"arc_challenge": -0.054607510566711426,
|
| 36 |
+
"copa": 0.12000000476837158,
|
| 37 |
+
"commonsense_qa": 0.1318591311573982,
|
| 38 |
+
"piqa": 0.0957561731338501,
|
| 39 |
+
"openbook_qa": -0.002666652202606201,
|
| 40 |
+
"lambada_openai": 0.24063651263713837,
|
| 41 |
+
"hellaswag": 0.048795064290364586,
|
| 42 |
+
"winograd": 0.13553118705749512,
|
| 43 |
+
"winogrande": 0.002367734909057617,
|
| 44 |
+
"bigbench_dyck_languages": 0.10900000482797623,
|
| 45 |
+
"agi_eval_lsat_ar": 0.09239128232002257,
|
| 46 |
+
"bigbench_cs_algorithms": 0.4386363625526428,
|
| 47 |
+
"bigbench_operators": 0.06190476566553116,
|
| 48 |
+
"bigbench_repeat_copy_logic": 0.0,
|
| 49 |
+
"squad": 0.02866603620350361,
|
| 50 |
+
"coqa": 0.062132030725479126,
|
| 51 |
+
"boolq": -0.03492674074674906,
|
| 52 |
+
"bigbench_language_identification": 0.17557753614335433
|
| 53 |
+
},
|
| 54 |
+
"conditioned_samples": [],
|
| 55 |
+
"unconditioned_samples": []
|
| 56 |
+
}
|
experiments/think-d12-r20-2epoch/evals/samples.json
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "base_model (step 4995)",
|
| 3 |
+
"step": 4995,
|
| 4 |
+
"bpb": {},
|
| 5 |
+
"core_metric": null,
|
| 6 |
+
"core_results": null,
|
| 7 |
+
"centered_results": null,
|
| 8 |
+
"conditioned_samples": [
|
| 9 |
+
{
|
| 10 |
+
"prompt": "The capital of France is",
|
| 11 |
+
"text": "<|bos|>The capital of France is the capital of the kingdom of France, and the capital of the kingdom of France"
|
| 12 |
+
},
|
| 13 |
+
{
|
| 14 |
+
"prompt": "The chemical symbol of gold is",
|
| 15 |
+
"text": "<|bos|>The chemical symbol of gold is the gold of the sun, and the gold of the moon is the gold of"
|
| 16 |
+
},
|
| 17 |
+
{
|
| 18 |
+
"prompt": "If yesterday was Friday, then tomorrow will be",
|
| 19 |
+
"text": "<|bos|>If yesterday was Friday, then tomorrow will be Sunday. \n\nThe day of the Lord's resurrection is the day of the Lord"
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"prompt": "The opposite of hot is",
|
| 23 |
+
"text": "<|bos|>The opposite of hot is the heat of the sun, and the heat of the sun is the heat of"
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"prompt": "The planets of the solar system are:",
|
| 27 |
+
"text": "<|bos|>The planets of the solar system are: \n\n1. The sun, which is the centre of the earth's orbit"
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"prompt": "My favorite color is",
|
| 31 |
+
"text": "<|bos|>My favorite color is the red, and the white, and the blue, and the yellow, and"
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"prompt": "If 5*x + 3 = 13, then x is",
|
| 35 |
+
"text": "<|bos|>If 5*x + 3 = 13, then x is the number of the number of the number of the number of the number of the"
|
| 36 |
+
}
|
| 37 |
+
],
|
| 38 |
+
"unconditioned_samples": [
|
| 39 |
+
"<|bos|>auberstaden a topaz sick list they produce.\n\nREALISM \n\nWITH \n\nENGLAND 1666\n\n instructive SKELETON BURIPLOMATIC CORRESPONDENCE \n\nRepassed two eldest daughters belonging to one of the father's ex-officials in the same family, who died at an advanced age without having powerfully cultivated the talent necessary for their maintenance, These letters prove (were the value of our correspondence greater) how much encreased the readiness and ability of our soldiers and sailors, at this early age, to study mischief and plunder, and the necessity of effort to circumvent temptation, for we exercise discipline on certain beginnings of adventure which are not",
|
| 40 |
+
"<|bos|> Society chiefly composed of the most generous, disinterested, unchanging and neutral, one seeming to delight in the social advancement smoothly deferred, and another to endure that it shall in no case be free from thie drudgery which has before been tainted by corruption or illegality. ture.\n\nNeither of these views has been fully arrived at had the party been constituted wholly devoted to a particular church creed, according to the actual ideas which have been prevalent in at least one quarter of the country; and the design of the bill democracy has been to enrode a sort of '-lineal faction, with every attribute of a consurgent peer",
|
| 41 |
+
"<|bos|>wench, Morh'ot's Memid' vow, 102 Harright's Lane of London | Phila. \n\nEsopus Britann'. man are to his names, 37 the mistress of the temple, 140 of a . reconciliation beto all yet people, 26 of Ogil's wall, 27 of the site now in Heselt' pax 'mancient house', pet PLOT, AND, full blown, 45 house, 45 of Ruple, Angel's Island, No. 5, 44 \n\n36 said to happe, 46 to graci and '",
|
| 42 |
+
"<|bos|> INTERESTS OF BOYS. FOR YOUNG MEN BY A. B. MENDENHALL COFFEEWOOD, EDITOR OF JENNERS \n\n12353-58-1861.\n\nProfessor's\n\nCONTENTS A. B. MENDENHALL COFFEEWOOD'S MANUAL GYNNER SONS.. \n\n11 \n\nIntroduction \n\nCHARACTER \n\n12 \n\n\u2026\u2026\u2026... 12 \n\nINTRODUCTION 13 \n\nPRELIMINARY . \n\nTHE SOCIALIST NINETEENTH CENTURY A. B. MENDENHALL COFFEEWOOD'S MANUAL. \n\nInterest in the SOCIALIST MOVEMENT OF 1863 Twentieth Century EVIDENCE TRAFFIC SOCIALIST FEUDS ITS WILL Organized Social Democracy Conditions Gover",
|
| 43 |
+
"<|bos|>ma\n\nThey are good to see him, for all we have to drink on a plantation.\n\nSermon from Olivella in \"Dunmore's Soldier,\" \n\nTo the Servians of Richmond. \n\n(Exilles : one year.-LIFE and DEATH, 25 to 30.)\n\nHampton: January 25, 1776-7 \n\nBLOODY morning Our Thomas eat, nourish us with drink \n\nAnd to quench thirst we send him Seven days with Good\n\n bread and which he hath not drunk, we conceive it a very seasonable decertime to him, Which we may drink in with him on a",
|
| 44 |
+
"<|bos|>medyal Be. Jan. Sept. Feb. Esterhen Gebr.' (by Gesen.) \n\nMrs. he has blessed Be. day.' Anch., Seventy1; so is he was on shore.\n\nCHAP. XXVIII. \n\nAge as ordinarily blest;-the heavenly Monarchy; an There be gifted of the God of angels! It comes to the consciousness of Divine prerogative that this utterance enables Milton to encourage his inspirers to plunge into the vortex of this world of doubt and fears; to learn from it others may become irksome as he shrinks into himself, without The highest possible improvement afterwards",
|
| 45 |
+
"<|bos|>for Freeman. \n\n61. Meintum\n\nSmall, Articles called on the title and Acts again.\n\nActs by the Acts of 7th\n\nCont 1861-1918. \n\nApoplexies. Mystery was a much discussed matter in most days of munificence, and the request for compulsion among officials was leading.\n\nLetters Mat. xxii., xxvii., xxviii., xxxii., xxxiii., xxxiii., as well as ae letters in the Edward treaty of alliance were not called for in the The Iron Crusader, but the Mount Vernon consular offer of amity which these Nath. xxii",
|
| 46 |
+
"<|bos|> followed steamboats common to the three countries, this country and Russia, with the Metropolitan Districts of Manitoba, Galway, Ulster, and Connaught. The whole province is possessed of three great magazines for small ships, which are named the Minecigs. St Joseph at Prospect Hill, Kanssens at Harecastle, Stoketa and the Orinoco the other governors receiving the government of the province. \n\nThe principal city of the province is Noul. Every thing went on very comfortably till this great war which we call the present war, although our national disasters rose, owing to our"
|
| 47 |
+
]
|
| 48 |
+
}
|
experiments/think-d12-r20-2epoch/evals/val_bpb.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "base_model (step 4995)",
|
| 3 |
+
"step": 4995,
|
| 4 |
+
"bpb": {
|
| 5 |
+
"val": 1.0107521146719778
|
| 6 |
+
},
|
| 7 |
+
"core_metric": null,
|
| 8 |
+
"core_results": null,
|
| 9 |
+
"centered_results": null,
|
| 10 |
+
"conditioned_samples": [],
|
| 11 |
+
"unconditioned_samples": []
|
| 12 |
+
}
|