Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
Commit Β·
a6df89a
1
Parent(s): d877609
made plot d3 component more general and added finephrase vs baseline comparison
Browse files
app/src/content/assets/data/finephrase_vs_baselines.csv
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
runname,seed,steps,agg_score_micro,lighteval|arc_cf:easy|3/prob_norm_token,lighteval|drop|3/prob_norm_token,lighteval|gsm8k|3/prob_norm_token,lighteval|hellaswag_cf|3/prob_norm_token,lighteval|openbookqa_cf|3/prob_norm_token,lighteval|piqa_cf|3/prob_norm_token,lighteval|squad_v2|3/prob_norm_token,lighteval|treb_qa|3/prob_norm_token,lighteval|wikitablequestions|3/prob_norm_token,lighteval|winogrande_cf|3/prob_norm_token,lighteval|xcsqa_cf|3/prob_norm_token,lighteval|mmlu_redux_cf:_average|3/prob_norm_token,agg_score_RC,agg_score_GK,agg_score_NLU,agg_score_MATH,agg_score_TABLE,agg_score_RES,agg_score_macro
|
| 2 |
+
cosmopedia,42,500,0.02913293526383949,0.01644980523353067,0.05708106828394926,0.07507547011560446,0.025204289035927866,0.004981102491946008,0.015223517258831067,0.059624836394866286,0.009275745169189628,0.050868520098123385,0.0011497275257879671,0.0023969106282150515,0.03226423093010228,0.05835295233940777,0.02435701808181647,0.013177008280857917,0.07507547011560446,0.030072132633656507,0.007533843459664041,0.034761404151834534
|
| 3 |
+
cosmopedia,42,1000,0.04124795323520204,0.03376632969431081,0.0716128388383384,0.08040159709443524,0.03560581270448807,0.0062957483423988275,0.02612757916237944,0.09776743961934783,0.009555730693441691,0.08185223438207805,0.001536835821548027,0.0049266465186134055,0.04552664595104469,0.08469013922884311,0.03964648782267775,0.01857132426301805,0.08040159709443524,0.04570398253775987,0.012449991341130557,0.04691058704797743
|
| 4 |
+
cosmopedia,42,1500,0.04963047226398357,0.0402981783360864,0.07913986423773972,0.09180952610013449,0.04059972968013296,0.007410394221063179,0.030842821946334014,0.12491390042085257,0.021110385005617776,0.1002281011619102,0.0019249414445667013,0.006217583147858968,0.05107024146550601,0.10202688232929615,0.045684209900796205,0.02126233556234983,0.09180952610013449,0.060669243083763987,0.014823599771752053,0.05604596612468212
|
| 5 |
+
cosmopedia,42,2000,0.055408550551699416,0.0568002954147212,0.0909358649801166,0.09635380830154233,0.04342689974729869,0.009485241437043596,0.03475827127170817,0.15457097289454855,0.004615149488716231,0.09751936721761113,0.0020059463567881027,0.012172360178745664,0.06225842933155281,0.12275341893733258,0.05952936237313701,0.022716423052043397,0.09635380830154233,0.05106725835316368,0.01880529096249914,0.061870926996619696
|
| 6 |
+
cosmopedia,42,2500,0.06144020140882423,0.06562859372528666,0.11021627928084318,0.09630655113051576,0.0468583343189605,0.009629600406227172,0.0368552236394807,0.16902945248764667,0.01050788263277363,0.11023229745426368,0.0022796756956278428,0.013052901110046075,0.06668562502421899,0.13962286588424494,0.06615710937475283,0.02456900500729417,0.09630655113051576,0.060370090043518655,0.019845908385251316,0.06781192163759628
|
| 7 |
+
cosmopedia,42,3000,0.06121668029315411,0.06969621409720483,0.10185953561700806,0.09874590044085123,0.05009863073138798,0.010188544711124098,0.039933801768543206,0.1716537704504427,0.01083332140482438,0.09492669454623737,0.002904571514808216,0.013753760564276728,0.07000541767114048,0.13675665303372536,0.06985081588417266,0.0265016011230981,0.09874590044085123,0.05288000797553087,0.021292035681314676,0.06767116902311548
|
| 8 |
+
cosmopedia,42,3500,0.0663916711095035,0.07541054538396873,0.10016331777214653,0.10381392344419293,0.05243001054507087,0.010455061992097035,0.04267662005835878,0.19460969592718852,0.010023798092275377,0.1133829366996711,0.0032535435652782125,0.01596619460094678,0.0745144052328472,0.14738650684966753,0.07496247530840797,0.02784177705517454,0.10381392344419293,0.06170336739597324,0.023032625550467534,0.07312344593398062
|
| 9 |
+
cosmopedia,42,4000,0.07180351600888919,0.07566954613967665,0.11273495410886125,0.09608445460195386,0.05442526749758481,0.01166117470176923,0.043781903998197,0.22876664811713013,0.026032774541086438,0.11887824245733007,0.003758279690394813,0.016374059802257045,0.07347488645042907,0.17075080111299568,0.07457221629505287,0.029091773593989814,0.09608445460195386,0.07245550849920826,0.02393904616740776,0.07781563337843471
|
| 10 |
+
cosmopedia,42,4500,0.0656027681218991,0.07107521915583201,0.09964708629801075,0.09723004289941957,0.055162473954239795,0.011634927328086455,0.04693290401977367,0.19028292652250303,0.022640654916820647,0.1021815961640159,0.003596286975441124,0.015527511272809375,0.0713215879558367,0.1449650064102569,0.07119840355583434,0.029379380464840458,0.09723004289941957,0.06241112554041827,0.02469844754022317,0.07164706773516545
|
| 11 |
+
cosmopedia,42,5000,0.07441916275873679,0.07772802928199113,0.11979591994484576,0.1054445123428739,0.0555141688550074,0.012375042435616844,0.04678685589565672,0.25030905667429465,0.012293637162199007,0.11558599574917801,0.0037032972020692,0.017017301322272728,0.07647613623883621,0.18505248830957022,0.07710208276041366,0.029608733028538302,0.1054445123428739,0.06393981645568851,0.025393066551182095,0.08109011657471112
|
| 12 |
+
cosmopedia,42,5500,0.07616889961526692,0.08586521784016052,0.10124758702743844,0.10674611045804462,0.058034833992440034,0.012319562347954182,0.04744391394212781,0.2546519548034475,0.018954785938345143,0.12347192103193484,0.0051610805765532584,0.01839009914731498,0.08173972827744186,0.17794977091544295,0.08380247305880119,0.031597957284496644,0.10674611045804462,0.07121335348513999,0.026051191812465662,0.08289347616906519
|
| 13 |
+
cosmopedia,42,6000,0.08099789724204517,0.08882521070108303,0.12735352719958415,0.10941075268970758,0.05812652072180636,0.012385956248201166,0.04765550787536823,0.2776440888809085,0.016711447928293665,0.13042478387629072,0.003921410885071205,0.018112324905211703,0.08140323499301569,0.20249880804024634,0.08511422284704936,0.03102396580343878,0.10941075268970758,0.07356811590229219,0.026051263009593695,0.08794452138205466
|
| 14 |
+
cosmopedia,42,6500,0.07801033294560415,0.08204899090790067,0.12286647679849201,0.08790568205765933,0.05984436116044507,0.01205167251574742,0.04851285111081279,0.2848961360831886,0.023770329934153595,0.11508294245412083,0.00420367110222168,0.018246525704118936,0.07669435551838873,0.2038813064408403,0.0793716732131447,0.03202401613133338,0.08790568205765933,0.06942663619413722,0.02627034977689305,0.08314661063566799
|
| 15 |
+
cosmopedia,42,7000,0.08548292823468344,0.08533814469577075,0.13544702269754663,0.10719485942364332,0.06019020289676362,0.013405605557102096,0.0496142996152204,0.30618047286508054,0.017818594804336126,0.13963953817422833,0.004348850038180937,0.0236620389015861,0.08295550914674236,0.22081374778131357,0.08414682692125655,0.032269526467472276,0.10719485942364332,0.07872906648928223,0.02889398135796954,0.0920080014068229
|
| 16 |
+
cosmopedia,42,7500,0.0865703162890722,0.08984361281399103,0.12025344931032177,0.10320614567039915,0.06122459770640719,0.012495559512436838,0.049643987849612194,0.331511354685079,0.019897403537450682,0.1431926647202153,0.006029502617134309,0.02085308298844795,0.08069243405737127,0.22588240199770038,0.08526802343568116,0.03362705016177075,0.10320614567039915,0.08154503412883299,0.027664210116832327,0.09286547758520279
|
| 17 |
+
cosmopedia,42,8000,0.08935496800395408,0.08787559443980666,0.15113832825406034,0.10177575236106819,0.06267944055666991,0.012328437894109482,0.05049906237803139,0.32612879296982994,0.023309744095949672,0.14577382630581717,0.004575005081071323,0.023176683151316647,0.08299894855971805,0.23863356061194513,0.08543727149976235,0.033627222818870615,0.10177575236106819,0.08454178520088343,0.028668061141152505,0.09544727560561371
|
| 18 |
+
cosmopedia,42,8500,0.08496482413844532,0.09032872308584992,0.14596326443088656,0.10521061598164272,0.06293376930262534,0.01179172491640762,0.050726946193502785,0.30740922848116525,0.02429351905327144,0.1121663499152876,0.0049556757944356,0.02224768208948586,0.08155039041678296,0.22668624645602592,0.08593955675131644,0.03394472254853047,0.10521061598164272,0.06822993448427951,0.02825545106646542,0.09137775454804342
|
| 19 |
+
cosmopedia,42,9000,0.08361274715260704,0.09254362908583605,0.10953523679799548,0.10380177690532401,0.06243000178511952,0.011652919822048803,0.05092647819510025,0.30653581218202514,0.017896957383616914,0.12946289888553303,0.003848008119362756,0.026501396702508038,0.08821784996681448,0.2080355244900103,0.09038073952632526,0.03313900495224114,0.10380177690532401,0.07367992813457497,0.029693598239885696,0.08978842870806024
|
| 20 |
+
cosmopedia,42,9500,0.09277612714731535,0.10514627047570546,0.13663089759800565,0.10320967226040505,0.06605400194318554,0.01298875894560893,0.053697929823851555,0.35209512779237045,0.021987363748635042,0.13337928761288226,0.006941631855732927,0.029765689477053946,0.09141689423434754,0.24436301269518806,0.0982815823550265,0.03649781689945924,0.10320967226040505,0.07768332568075866,0.03215079274883815,0.09869770043994595
|
| 21 |
+
cosmopedia,42,10000,0.09713070150622272,0.1099484461088254,0.1385219884835079,0.1077912252718024,0.06854509328349631,0.013037806175627246,0.05760761921210011,0.37396816691837215,0.02340183572970216,0.136869361648535,0.0063521372614830434,0.032283548759282396,0.09724118922193857,0.25624507770094,0.10359481766538198,0.037448615272489674,0.1077912252718024,0.08013559868911858,0.034309658049003246,0.10325416544145598
|
| 22 |
+
mix-fw_edu_hq-table_smollm2_1.7b_hq,42,500,0.05497202738487924,0.043888791627692214,0.07460081309682072,0.0909782586710973,0.027876488119456623,0.008072883577787215,0.02051420846060384,0.1617051955054037,0.06602295809899959,0.10268333935383694,0.00815657924316373,0.0033059830143184607,0.05185882984937056,0.1181530043011122,0.04787381073853139,0.018016533681310176,0.0909782586710973,0.08435314872641828,0.010631025017569838,0.06166763018933987
|
| 23 |
+
mix-fw_edu_hq-table_smollm2_1.7b_hq,42,1000,0.08339086204445532,0.10238764461840953,0.12673011117801516,0.09052971517217097,0.04346501296225431,0.014916245756766831,0.034019607127749436,0.2496537550077984,0.09630394687339318,0.13819540213570458,0.013993487690899496,0.008922625179076039,0.08157279083122602,0.18819193309290677,0.09198021772481778,0.028729250326576902,0.09052971517217097,0.11724967450454887,0.019286159354530766,0.08932782502925869
|
| 24 |
+
mix-fw_edu_hq-table_smollm2_1.7b_hq,42,1500,0.09615738084060316,0.12802980724125926,0.12775816534750992,0.10104346038604804,0.047949606190980876,0.01808526273352843,0.037748044488513696,0.30796469863451875,0.10921484534315283,0.15463125522120116,0.014607328677324173,0.01325062131471197,0.09360547450848884,0.21786143199101432,0.11081764087487406,0.031278467434152524,0.10104346038604804,0.131923050282177,0.023027976178918035,0.10265867119119733
|
| 25 |
+
mix-fw_edu_hq-table_smollm2_1.7b_hq,42,2000,0.1086427603353443,0.14954049130397948,0.14182266926274986,0.09585888107956692,0.05308496960119366,0.02031388308111189,0.04177542481892087,0.3670467200065427,0.12060223958208637,0.1721704883907508,0.020847733413292047,0.016052783415978204,0.10459684006795883,0.2544346946346463,0.12706866568596914,0.03696635150724285,0.09585888107956692,0.14638636398641858,0.026047363772003656,0.11446038677764125
|
| 26 |
+
mix-fw_edu_hq-table_smollm2_1.7b_hq,42,2500,0.11678573043499364,0.16499517381011572,0.1570696228480358,0.1018030630962526,0.05632240550395245,0.022942654273776987,0.04527304492919819,0.40037123618139686,0.1309033874369144,0.1652454706254375,0.023832756157793077,0.02096279806086712,0.11170715229618304,0.2787204295147163,0.13835116305314937,0.04007758083087276,0.1018030630962526,0.14807442903117596,0.029726165754614103,0.12279213854679684
|
| 27 |
+
mix-fw_edu_hq-table_smollm2_1.7b_hq,42,3000,0.1227298056760559,0.1705434193639903,0.13641915890766848,0.10409819680450168,0.058691497421398967,0.027163807386248698,0.04850198210945816,0.42305727309724306,0.14337923513086004,0.19030104091793812,0.030022640888477992,0.022736857564893343,0.11784255851999163,0.27973821600245574,0.14419298894199098,0.04435706915493848,0.10409819680450168,0.16684013802439907,0.032800882353533393,0.1286712485469699
|
| 28 |
+
mix-fw_edu_hq-table_smollm2_1.7b_hq,42,3500,0.1314334968225223,0.18423965688370678,0.15271030792608034,0.10109513940753845,0.061509397478021324,0.027635735341552784,0.05051220430460552,0.4588881783575565,0.1618549897950074,0.19050082146842992,0.034468238566735684,0.028825584897231887,0.12496170744380108,0.30579924314181844,0.15460068216375394,0.047988818022378504,0.10109513940753845,0.17617790563171865,0.0356578415144634,0.13688660498027858
|
| 29 |
+
mix-fw_edu_hq-table_smollm2_1.7b_hq,42,4000,0.1332019362216416,0.18507502454513677,0.1572850859214566,0.10419155470794772,0.06255835953403013,0.026967514379063716,0.05180308261411266,0.48022449664935574,0.15174584132025593,0.18943187355809618,0.03342770763839489,0.028484697186613422,0.12722799660523532,0.3187547912854062,0.15615151057518606,0.047993033586212513,0.10419155470794772,0.17058885743917607,0.0357517647265966,0.13890525205342086
|
| 30 |
+
mix-fw_edu_hq-table_smollm2_1.7b_hq,42,4500,0.13926821960208588,0.19403697967519826,0.1900001270576879,0.10370921833392541,0.06451143584709607,0.028245490069410798,0.05306335171055211,0.5047326573045379,0.16739842289589846,0.16832572032790716,0.03340946942237883,0.033063203858285165,0.13072255872215258,0.34736639218111287,0.16237976919867542,0.048960452634737445,0.10370921833392541,0.1678620716119028,0.03812401521274936,0.1447336531955172
|
| 31 |
+
mix-fw_edu_hq-table_smollm2_1.7b_hq,42,5000,0.14442127461323784,0.19665543753740394,0.14085530492645892,0.10684709596788833,0.0657321822351631,0.030879691559855493,0.0539732739946766,0.5382313844966209,0.17004663652172006,0.2226942357913624,0.03222495551252107,0.037558640515889564,0.1373564562992935,0.3395433447115399,0.16700594691834872,0.04897856887384208,0.10684709596788833,0.19637043615654123,0.04080386869014055,0.14992487688638348
|
| 32 |
+
mix-fw_edu_hq-table_smollm2_1.7b_hq,42,5500,0.13971015880682064,0.19605343311675158,0.1511972802969607,0.10557630149963114,0.0669723742730497,0.030218822633090305,0.05430898824221272,0.5021353527787574,0.16100417640605444,0.20616568279157108,0.035622785743038135,0.03367826370748683,0.1335884441932436,0.32666631653785905,0.16482093865499758,0.051297580008043915,0.10557630149963114,0.18358492959881276,0.03940202486092995,0.14522468186004575
|
| 33 |
+
mix-fw_edu_hq-table_smollm2_1.7b_hq,42,6000,0.14398073283339033,0.2022853724762129,0.15652976336605173,0.10722511152867062,0.06634623060591266,0.028803854752332634,0.055847650631034675,0.5193328249159094,0.17035290221546426,0.20868541415667066,0.037035771146383144,0.039972555100646324,0.13535134310539554,0.33793129414098055,0.16881835779080423,0.051691000876147905,0.10722511152867062,0.18951915818606746,0.04154135349467122,0.14945437933622366
|
| 34 |
+
mix-fw_edu_hq-table_smollm2_1.7b_hq,42,6500,0.1505931655024272,0.20959759419315002,0.17286796783518985,0.10915211312785428,0.0679797088727532,0.029815842021685482,0.05570872361648764,0.5484850210055906,0.18048638798990244,0.21785600020824938,0.03353868589734435,0.04035781445342067,0.14127212680749857,0.36067649442039024,0.1754348605003243,0.05075919738504878,0.10915211312785428,0.1991711940990759,0.0419607933638646,0.15619244214942637
|
| 35 |
+
mix-fw_edu_hq-table_smollm2_1.7b_hq,42,7000,0.1464742487252148,0.20341816996444992,0.14551407211819187,0.1057297626245669,0.06956275571849373,0.03240960808738502,0.05658341735298749,0.5381168910757818,0.1691133849347958,0.21521073712008235,0.03707498715625,0.04248064018647134,0.14247655836312156,0.3418154815969868,0.17294736416378576,0.053318871437371865,0.1057297626245669,0.19216206102743907,0.04382455520894796,0.1516330160098497
|
| 36 |
+
mix-fw_edu_hq-table_smollm2_1.7b_hq,42,7500,0.1517105578379645,0.20132129855879002,0.1527633022636308,0.10704359442804749,0.07022009847906449,0.03147189735161151,0.05654149495954995,0.570532862874293,0.18488848294861215,0.22447892983300183,0.037943160000325785,0.04132730784489554,0.14199426451375163,0.36164808256896186,0.17165778153627081,0.05408162923969514,0.10704359442804749,0.204683706390807,0.043113566718685666,0.157038060147078
|
| 37 |
+
mix-fw_edu_hq-table_smollm2_1.7b_hq,42,8000,0.153111585321741,0.20370600323462096,0.15559967087940202,0.10995400480081047,0.0703778830508127,0.03168853210271354,0.05685130056512766,0.5573852453552104,0.20006111411461655,0.23535280897044764,0.03544271514235402,0.04078750637679567,0.14013223926798019,0.35649245811730623,0.17191912125130057,0.05291029909658336,0.10995400480081047,0.2177069615425321,0.04310911301487896,0.15868199297056862
|
| 38 |
+
mix-fw_edu_hq-table_smollm2_1.7b_hq,42,8500,0.153474214811659,0.2156416888077374,0.14767760594012713,0.1086404631393254,0.0703495646729733,0.035481303387208424,0.057656227068679705,0.563167335678502,0.19176190889916359,0.22524712311352688,0.037423606381044354,0.04378362868833993,0.14486012196328,0.35542247080931455,0.1802509053855087,0.053886585527008826,0.1086404631393254,0.20850451600634523,0.045640386381409354,0.15872422120815202
|
| 39 |
+
mix-fw_edu_hq-table_smollm2_1.7b_hq,42,9000,0.15431434783397444,0.21332785298597565,0.18138948523020929,0.10920361971138605,0.07165906701155657,0.03237368284994406,0.059718017340528194,0.5700328205641509,0.17389265540119148,0.21668574221748044,0.03588189889073877,0.0425072941680442,0.1451000376364875,0.3757111528971801,0.17921394531123158,0.05377048295114767,0.10920361971138605,0.19528919880933596,0.04486633145283881,0.1596757885221867
|
| 40 |
+
mix-fw_edu_hq-table_smollm2_1.7b_hq,42,9500,0.16005748327065486,0.21738016620018716,0.1753445777547784,0.1118670648000796,0.07378068138362223,0.033658712377510745,0.06202369654688045,0.5824510751503132,0.19484951287508973,0.23499616874860976,0.038260197199537195,0.04559582780714385,0.150482118404106,0.3788978264525458,0.18393114230214658,0.05602043929157971,0.1118670648000796,0.21492284081184976,0.04709274557717835,0.16545534320589664
|
| 41 |
+
mix-fw_edu_hq-table_smollm2_1.7b_hq,42,10000,0.16643219455201952,0.22104265954029498,0.21328824188432033,0.11368725290137877,0.0766473487870297,0.035538152606588894,0.06356709975061516,0.6127574281795107,0.2130087186112811,0.2053092042656179,0.0386371297699402,0.04991314653034026,0.15378995179731605,0.4130228350319155,0.18741630566880552,0.05764223927848495,0.11368725290137877,0.2091589614384495,0.04967279962918144,0.17176673232470263
|
| 42 |
+
nemotron_hq_synth,42,500,0.039651040279900684,0.02521080354500527,0.08721113630836978,0.05662676366027639,0.031814615729775696,0.006743888401216779,0.02405165913161938,0.07518432561800878,0.0353232685179539,0.0832831589480137,0.0025603179472343247,0.0038350073918069906,0.04396753815952727,0.08119773096318927,0.03458917085226627,0.01718746683850501,0.05662676366027639,0.059303213732983806,0.011543518308214382,0.043407977392572517
|
| 43 |
+
nemotron_hq_synth,42,1000,0.05810237150109671,0.0557521277001006,0.09014895108979679,0.09624088068246797,0.04663541901386824,0.009979157708001441,0.03837330121857019,0.12378904501683104,0.04635777456911922,0.1157220273651087,0.004684014565035495,0.00639588700425435,0.0631498720800065,0.10696899805331392,0.05945099989005355,0.025659716789451868,0.09624088068246797,0.08103990096711396,0.018249448643608657,0.06460165750433498
|
| 44 |
+
nemotron_hq_synth,42,1500,0.07195689680181379,0.07913489236087619,0.12539193988020703,0.10047277888907442,0.053379673742216534,0.013153722355709014,0.0447560898586656,0.17189836266809538,0.06217527836391408,0.11914666513783351,0.00823924208199497,0.011196835684551864,0.07453728059862684,0.1486451512741512,0.07683608647975151,0.03080945791210575,0.10047277888907442,0.09066097175087379,0.02303554929964216,0.0784099992675998
|
| 45 |
+
nemotron_hq_synth,42,2000,0.07928569861732147,0.09393732383469464,0.10410439073944557,0.10282672182849376,0.05853735121383968,0.014509907583777317,0.04926649835018796,0.21742409707244026,0.061424601614937874,0.13188065357213857,0.012847561329417693,0.016855623157659313,0.08781365311082491,0.1607642439059429,0.09087548847275978,0.03569245627162869,0.10282672182849376,0.09665262759353822,0.02687734303054153,0.08561481351715082
|
| 46 |
+
nemotron_hq_synth,42,2500,0.0891604770455474,0.11464012531838454,0.13864367612403083,0.10316555962415479,0.06173150262670635,0.015651130257446712,0.05181886103820301,0.24177343020955966,0.0740838039662981,0.14573391323978432,0.013470914822123186,0.015466920506005604,0.09374588681387162,0.19020855316679525,0.10419300606612808,0.03760120872441477,0.10316555962415479,0.10990885860304121,0.027645637267218443,0.09545380390862541
|
| 47 |
+
nemotron_hq_synth,42,3000,0.09037626326918476,0.12623950632915582,0.11855261679744733,0.09678583517920873,0.0626349588611655,0.018345536309102427,0.053424336708777755,0.26236462212792905,0.07217109145641441,0.1337614057460242,0.022926523956926907,0.01702235584892099,0.100286369909144,0.1904586194626882,0.1132629381191499,0.042780741409046204,0.09678583517920873,0.1029662486012193,0.02959740962226706,0.09597529873226324
|
| 48 |
+
nemotron_hq_synth,42,3500,0.09795320268695086,0.13382261010354013,0.12547854646356435,0.10405079222906868,0.0661486602964717,0.019409284692449225,0.05416908711654374,0.3091239440020756,0.07689545480774088,0.13819714037034073,0.02220307637515695,0.022287988088026713,0.10365184769843165,0.21730124523281996,0.1187372289009859,0.04417586833581433,0.10405079222906868,0.1075462975890408,0.03195545329900656,0.10396114759778935
|
| 49 |
+
nemotron_hq_synth,42,4000,0.10276022435362109,0.14183301447543797,0.13105414710509541,0.10318167913865792,0.0669034257826245,0.02160038185279176,0.056667393754237814,0.3115664928779588,0.0840886294835673,0.15864345267403296,0.021705684316899336,0.02521391472559382,0.11066447605655556,0.22131031999152712,0.12624874526599678,0.04430455504976191,0.10318167913865792,0.12136604107880013,0.03449389677754113,0.1084842062170475
|
| 50 |
+
nemotron_hq_synth,42,4500,0.10589796102766387,0.14995689210296353,0.1300186029361498,0.1006565847256015,0.06847679516921591,0.02170730691707844,0.057402810772006006,0.33263917774047097,0.0879004934545943,0.15894128403493232,0.023406293690234,0.026482898374156214,0.1131863924145636,0.2313288903383104,0.13157164225876355,0.04594154442972496,0.1006565847256015,0.12342088874476331,0.03519767202108022,0.11135287041970733
|
| 51 |
+
nemotron_hq_synth,42,5000,0.11121488341912057,0.1534008443898165,0.14951222505049067,0.09848300488058413,0.07003801135800797,0.021958084416484416,0.05922456540434963,0.33748169546968565,0.10156976368099678,0.16092560983388768,0.029777554512878564,0.031205228129759163,0.12100201390250556,0.24349696026008816,0.137201429146161,0.04990778293544326,0.09848300488058413,0.13124768675744222,0.03746262598353107,0.11629991499387499
|
| 52 |
+
nemotron_hq_synth,42,5500,0.11311404511736926,0.15516654882933922,0.15209282365884583,0.10158771318104871,0.07123201424060252,0.023565275199956378,0.06114934177051213,0.33601066310629,0.09859562206099962,0.17149658277121704,0.033916364386397654,0.032227710595457267,0.12032788160776454,0.2440517433825679,0.13774721521855188,0.05257418931350009,0.10158771318104871,0.13504610241610832,0.03898077585530859,0.11833128989451426
|
| 53 |
+
nemotron_hq_synth,42,6000,0.1073559536046002,0.15586794207035007,0.1326965003090768,0.09926817477604612,0.07181212663172408,0.021472294824712485,0.06196634584942247,0.3238753541014552,0.08075065961013964,0.1619637283610994,0.02901137347268065,0.03003813846910205,0.11954880477939328,0.22828592720526597,0.1377083734248717,0.050411750052202366,0.09926817477604612,0.12135719398561952,0.037825593047745666,0.11247616874862522
|
| 54 |
+
nemotron_hq_synth,42,6500,0.11459190815940934,0.1598060184380004,0.16356373328177568,0.09893754808174242,0.07275127003678514,0.022982679039582052,0.06347790304677574,0.3552690759021704,0.10281926644847421,0.161481453393715,0.024991369360983777,0.028435012555457707,0.12058756832744968,0.259416404591973,0.14019679338272506,0.04887131969888446,0.09893754808174242,0.1321503599210946,0.03829853154727183,0.11964515953728189
|
| 55 |
+
nemotron_hq_synth,42,7000,0.11947696525346256,0.16957283545722018,0.15338508023648154,0.10267713811600235,0.07293863960611924,0.02472887058756763,0.06224060315892607,0.38086180872412706,0.10187995606002764,0.1743867962225419,0.030390230575725107,0.03568095724664217,0.12498066705017005,0.2671234444803043,0.1472767512536951,0.051664435090922174,0.10267713811600235,0.1381333761412848,0.040883476997711964,0.1246264370133201
|
| 56 |
+
nemotron_hq_synth,42,7500,0.11905367547190097,0.17197933150833233,0.1660823814839022,0.10266969031663754,0.07421277319881611,0.02525573257213466,0.06415429666150758,0.3786566306462613,0.098942639696671,0.15674347826188884,0.03151208194269936,0.030634898097217055,0.1278001712767435,0.27236950606508176,0.1498897513925379,0.052862427570757736,0.10266969031663754,0.12784305897927992,0.0400149757769531,0.12427490168354133
|
| 57 |
+
nemotron_hq_synth,42,8000,0.11928478920066558,0.16781076293868702,0.14325354294818182,0.10281305926728988,0.07461424489875063,0.02400955924566592,0.06398430296886827,0.3993208038457467,0.0965965258078817,0.16150562113600314,0.03524446129861041,0.033692990564351134,0.12857159548795058,0.2712871733969643,0.1481911792133188,0.05492935309868052,0.10281305926728988,0.12905107347194242,0.04056228425962844,0.1244723537846374
|
| 58 |
+
nemotron_hq_synth,42,8500,0.1208980293451732,0.17216781431725084,0.1505033593002199,0.10392738378376219,0.07481191719915099,0.023163846657303182,0.06325500872883888,0.38594607091948824,0.10286949818236577,0.17545317494968726,0.031019160609311505,0.038232247696591115,0.12942686979810872,0.2682247151098541,0.15079734205767978,0.05291553890423125,0.10392738378376219,0.13916133656602653,0.041550367694244396,0.12609611401929968
|
| 59 |
+
nemotron_hq_synth,42,9000,0.11712752533452979,0.16624200206904388,0.13965175499435037,0.10565824906080684,0.075698399375578,0.02444246488153262,0.06597617482488803,0.3721941532205233,0.08841529043764194,0.16570800138694497,0.04046622878986389,0.032597048223138136,0.12848053675004567,0.2559229541074368,0.14736126940954478,0.058082314082720944,0.10565824906080684,0.12706164591229346,0.04100522930985293,0.12251527698044264
|
| 60 |
+
nemotron_hq_synth,42,9500,0.12537208257873547,0.17578932938837427,0.15246040812265357,0.10471494674296045,0.07893239139464389,0.0234359338779227,0.0669753380813127,0.4039636902627306,0.10326598659320063,0.1823187366583999,0.0402253715887885,0.036461854028429226,0.1359210042054092,0.2782120491926921,0.15585516679689174,0.059578881491716196,0.10471494674296045,0.14279236162580028,0.04229104199588821,0.1305740746409915
|
| 61 |
+
nemotron_hq_synth,42,10000,0.13053033482303275,0.1833489003594342,0.16390774148358422,0.10532042652172288,0.08129337889204831,0.026658446953565253,0.06982583711386792,0.4256470896685524,0.1004011755011827,0.17992691265266672,0.04341169785398802,0.043647083609207764,0.14297532726657225,0.2947774155760683,0.16316211381300322,0.062352538373018164,0.10532042652172288,0.1401640440769247,0.04671045589221365,0.1354144990421585
|
| 62 |
+
rewire,42,500,0.04265056692256005,0.017686775761007422,0.07063000758921417,0.0903926210271821,0.030379799013744737,0.004539264178227114,0.023639981534482,0.1107927894015786,0.02662659943622075,0.09234833137637262,0.002912124349926271,0.002438425242832636,0.039420084159932096,0.09071139849539639,0.028553429960469758,0.016645961681835506,0.0903926210271821,0.05948746540629669,0.010205890318513917,0.049332794481615726
|
| 63 |
+
rewire,42,1000,0.06484311728144444,0.04049910217052242,0.0985333468707251,0.09272197310137888,0.045354188509845,0.009081214028769788,0.03966070087151555,0.18394079775544742,0.05592438212146007,0.13637468879651304,0.00535327323369042,0.007256215229922551,0.06341752468754301,0.14123707231308624,0.05195831342903272,0.02535373087176771,0.09272197310137888,0.09614953545898655,0.018666043376735962,0.07101444475849801
|
| 64 |
+
rewire,42,1500,0.07190741905633209,0.05892115075137047,0.10436893735488467,0.09795460382659905,0.053258325342698795,0.010844613060567948,0.046119534385095785,0.20469495625029605,0.0573370692498,0.14140054855229928,0.006609331087688163,0.011982514632247587,0.06939744418243737,0.15453194680259036,0.06415929746690392,0.02993382821519348,0.09795460382659905,0.09936880890104964,0.02298222069263711,0.07815511765082893
|
| 65 |
+
rewire,42,2000,0.08061218582376058,0.07078496771644291,0.09514233622292204,0.09790497768108983,0.058547907461149816,0.013215992847969859,0.050800339383308475,0.25919881013704127,0.06798204974806973,0.1474184488528701,0.013355009392517886,0.015730607162547847,0.07726478327919721,0.17717057317998164,0.07402487549782005,0.03595145842683385,0.09790497768108983,0.10770024930046991,0.026582313131275393,0.08655574120291178
|
| 66 |
+
rewire,42,2500,0.08420039972794953,0.0771603417427458,0.11276586955621451,0.09885428216223155,0.06084503993366509,0.015064331133729063,0.0550019303420429,0.26251550608419394,0.07393736409516435,0.14873268946463206,0.012001684289889788,0.014570200439499864,0.07895555749138555,0.1876406878202042,0.07805794961706568,0.03642336211177744,0.09885428216223155,0.1113350267798982,0.028212153971757276,0.09008724374382238
|
| 67 |
+
rewire,42,3000,0.09273231175284881,0.08688501540330522,0.12433475254180555,0.10091173801077505,0.06409726550823135,0.016415555460041233,0.057262931143389734,0.3083276276216714,0.08598627557166745,0.14804341765311896,0.013863234095875228,0.02077112347443672,0.08588880454986808,0.21633119008173846,0.08638690997658666,0.038980249802053286,0.10091173801077505,0.1170148466123932,0.031483203359289225,0.09851802297380598
|
| 68 |
+
rewire,42,3500,0.0943552071841897,0.09198815676417393,0.11226571517194292,0.10031888172027258,0.0658774178188419,0.017955618813174097,0.05958258224760938,0.3155799049237895,0.09068626120982909,0.15085156921568946,0.014212479354972775,0.024739580702601693,0.08820431826737882,0.21392281004786623,0.09009623751577638,0.04004494858690734,0.10031888172027258,0.12076891521275927,0.03409259392112839,0.09987406450078502
|
| 69 |
+
rewire,42,4000,0.10045323135126227,0.09808493132564441,0.12856773797683327,0.099866389535988,0.06706545294538216,0.01766860798312627,0.06150439737831871,0.347323918915057,0.09378039241838648,0.15873791108538501,0.015553513269482568,0.02547021553417667,0.09181530784736656,0.23794582844594514,0.09495011958650548,0.04130948310743236,0.099866389535988,0.12625915175188573,0.03488107363187388,0.10586867434327175
|
| 70 |
+
rewire,42,4500,0.10036503829627282,0.10088855549984291,0.10524296722099793,0.10039707474235997,0.06861016353525448,0.018662317184749654,0.0616205043433571,0.3687800451936108,0.08722579301191119,0.15474602705856588,0.01565302153107318,0.02681297594213349,0.09574101429141714,0.23701150620730438,0.09831478489563003,0.04213159253316383,0.10039707474235997,0.12098591003523854,0.035698599156746745,0.10575657792840726
|
| 71 |
+
rewire,42,5000,0.1065431939221554,0.10500375304236402,0.13821420108568633,0.09877602848879723,0.06953066797835643,0.02138180895412673,0.06225948319143421,0.3955091290222677,0.09096177483016488,0.1559163685145909,0.018203569223752367,0.026421954077943774,0.09633958865638043,0.266861665053977,0.10067167084937223,0.0438671186010544,0.09877602848879723,0.12343907167237789,0.03668774874116824,0.11171721723445782
|
| 72 |
+
rewire,42,5500,0.11073908066220405,0.12446739213241784,0.1084806017696248,0.10143789776164783,0.07183017925454765,0.022495811849453262,0.06449095218497226,0.4110964572347125,0.099710100614002,0.1692942587153078,0.020466902502368847,0.03514829729241852,0.09995011663497495,0.25978852950216863,0.1122087543836964,0.04614854087845825,0.10143789776164783,0.1345021796646549,0.040711687108948014,0.11579959821659568
|
| 73 |
+
rewire,42,6000,0.11152831236617355,0.11750599169433543,0.14825706614540907,0.1022314017734436,0.07221089194749322,0.021348525798453536,0.06617793824944421,0.3930460633066562,0.10122473219185088,0.16298423454426988,0.018511070747557017,0.0324315609899276,0.10241027100524196,0.27065156472603263,0.10995813134978868,0.04536098134752512,0.1022314017734436,0.13210448336806038,0.039986008345941786,0.11671542848513204
|
| 74 |
+
rewire,42,6500,0.11412083094338983,0.12162911343289091,0.1650194763853759,0.10601544979229971,0.07396817821031502,0.020949160901119274,0.06491409833261005,0.40895763295645876,0.10067482623176112,0.15433181279214245,0.020207349210975126,0.030707694092558288,0.10207517898217155,0.28698855467091733,0.11185214620753123,0.047087763710645075,0.10601544979229971,0.1275033195119518,0.03885698444209587,0.1197173697225735
|
| 75 |
+
rewire,42,7000,0.11769021814814488,0.12846422033457444,0.11661160174171585,0.10587341120741427,0.07465826745252561,0.023253111661056477,0.06763325171512326,0.43359791467250897,0.1085309313673308,0.18878273910784718,0.020298853683952837,0.038092810721751746,0.10648550411193741,0.2751047582071124,0.11747486222325593,0.04747856056823922,0.10587341120741427,0.148656835237589,0.042993058032643826,0.12293024757937578
|
| 76 |
+
rewire,42,7500,0.11324781455415638,0.1231688180627862,0.17325189264268517,0.10435263532986278,0.07574792152655453,0.0220681262203481,0.06536118238242487,0.37196582126342603,0.11229246745678682,0.14885301527841707,0.020996462408347655,0.03641334834768204,0.10450208373055489,0.2726088569530556,0.11383545089667055,0.04837219196745109,0.10435263532986278,0.13057274136760194,0.04128088565015167,0.11850379369413226
|
| 77 |
+
rewire,42,8000,0.11610866109281774,0.12489472561651646,0.13397585930550662,0.1083357404261493,0.07447462168085858,0.023275636260537216,0.06511400709272641,0.4135252656710172,0.1119966717037618,0.17600068081918738,0.02060571249693513,0.03485447428173822,0.10625053775887865,0.2737505624882619,0.11557263168769755,0.047540167088896856,0.1083357404261493,0.14399867626147458,0.04108137254500061,0.12171319174958013
|
| 78 |
+
rewire,42,8500,0.12418821812993867,0.12752883908313112,0.15973221534385698,0.10529986221370606,0.07490085154496992,0.024443683465405628,0.06645745627167637,0.4622830744813988,0.11273660538068928,0.18596069933011203,0.01877625860555775,0.0434168485670852,0.10872222327167504,0.3110076449126279,0.11812553117740307,0.046838555075263834,0.10529986221370606,0.14934865235540065,0.04477266276805573,0.1292321514170762
|
| 79 |
+
rewire,42,9000,0.12182497336654456,0.13477283976810148,0.13235983136767265,0.10591517580386675,0.07646531376950948,0.0235884290613947,0.07026033133275691,0.4448382930540099,0.10738434116477466,0.19295601153377168,0.024094904175663404,0.037908880112197424,0.11135532925481549,0.2885990622108413,0.12306408451145848,0.05028010897258644,0.10591517580386675,0.15017017634927315,0.04391921350211634,0.12699130355835705
|
| 80 |
+
rewire,42,9500,0.12262415185928742,0.13955049369725644,0.14004088525941785,0.11081013558780674,0.07905528152583136,0.02503137766985094,0.0722994432167796,0.4158526439997781,0.1086849735165769,0.19352094653171775,0.024047511056761988,0.047313038347591575,0.11528309190207964,0.277946764629598,0.12741679279966805,0.051551396291296674,0.11081013558780674,0.15110296002414733,0.0482146197447407,0.1278404448462096
|
| 81 |
+
rewire,42,10000,0.13030916726404293,0.14874509430718114,0.14036438947513047,0.10747394495014771,0.08199891611739997,0.027135042878077907,0.07317693802463807,0.46080809037958503,0.11604413742583163,0.20332275094186225,0.026873999480503428,0.05653854302944602,0.12122816015871139,0.3005862399273578,0.13498662723294627,0.0544364577989517,0.10747394495014771,0.15968344418384695,0.05228350797738734,0.1349083703451063
|
| 82 |
+
synth_query_reasoning_answer,42,500,0.035513442621675266,0.015073976357627522,0.06616865382567644,0.07943777770518975,0.015712348051208167,0.0036345522202578828,0.0074687987072234565,0.08671991428745399,0.021924742825609692,0.07642687317160561,0.002177722836887656,0.0013750996400650284,0.050040851831297924,0.07644428405656521,0.03255741409446272,0.008945035444047912,0.07943777770518975,0.04917580799860765,0.004159483522515456,0.04178663380356479
|
| 83 |
+
synth_query_reasoning_answer,42,1000,0.04731168262865255,0.02963602643238462,0.08401242682543265,0.08853832296243212,0.023272789397401985,0.005266764151407125,0.01399878961669274,0.1338316493197785,0.03923059801809547,0.07982344474728056,0.0031333777210326144,0.004153994109947552,0.06284200824194466,0.10892203807260559,0.046239017337164644,0.0132030835592173,0.08853832296243212,0.05952702138268802,0.007806515959349139,0.054039333212242795
|
| 84 |
+
synth_query_reasoning_answer,42,1500,0.05326013801281323,0.03808885682928811,0.08936979532304469,0.0766902279229714,0.027866157648731175,0.005029376758557722,0.015261072495772647,0.16544255745029704,0.03834411968107126,0.10458882587147843,0.0038884547309342033,0.004740934368511638,0.06981127707310031,0.12740617638667087,0.05395006695119421,0.01587730618983269,0.0766902279229714,0.07146647277627484,0.008343794540947337,0.058955674127981895
|
| 85 |
+
synth_query_reasoning_answer,42,2000,0.06166518643564844,0.04961021583096391,0.10625470387154326,0.09076826729289443,0.031745314349828725,0.008043488101508345,0.022009781204622263,0.19844060651286843,0.04270292698392914,0.1018733703911475,0.004259666708248702,0.006110232140269859,0.07816366383995649,0.15234765519220583,0.0638869398354602,0.018002490529038715,0.09076826729289443,0.07228814868753831,0.012054500482133489,0.06822466700321182
|
| 86 |
+
synth_query_reasoning_answer,42,2500,0.06494348077664903,0.047233030314284975,0.10876514424103224,0.09437273417223085,0.03325026359922438,0.0069129589351941845,0.021642486150815994,0.21946348712337033,0.04659959768968433,0.10841051647534317,0.006552786889617815,0.006712842299140895,0.07940592142984923,0.16411431568220128,0.0633194758720671,0.019901525244421098,0.09437273417223085,0.07750505708251375,0.011756095795050358,0.07182820064141408
|
| 87 |
+
synth_query_reasoning_answer,42,3000,0.07304361950166396,0.06083236119922607,0.11901715462501503,0.0999531502178266,0.035673127025091546,0.008346536225954462,0.022860459533233932,0.24497066084633537,0.05924195629702546,0.12445701236521202,0.005383094494243185,0.007701793812884167,0.08808612737791967,0.1819939077356752,0.07445924428857287,0.020528110759667366,0.0999531502178266,0.09184948433111874,0.012969596524024187,0.08029224897614749
|
| 88 |
+
synth_query_reasoning_answer,42,3500,0.0743534698623073,0.06825549337045748,0.10911664631483034,0.09826798710967534,0.03686338324645009,0.008439231458359274,0.025503229494916638,0.2638559962546639,0.04372016041555822,0.12995317821807187,0.006999667144742354,0.009700564658356342,0.09156610066160584,0.18648632128474713,0.07991079701603165,0.02193152519559622,0.09826798710967534,0.08683666931681505,0.014547675203877418,0.0813301625211238
|
| 89 |
+
synth_query_reasoning_answer,42,4000,0.07893612365206563,0.0713987127718813,0.1146332358116625,0.10222040200077114,0.03770173201386019,0.00900240926040066,0.02627325010065712,0.2678410594913625,0.06529957425472337,0.14216174075174542,0.010616490683400528,0.007885458157405677,0.09219941852691732,0.1912371476515125,0.08179906564939932,0.02415911134863036,0.10222040200077114,0.1037306575032344,0.014387039172821152,0.08625557055439481
|
| 90 |
+
synth_query_reasoning_answer,42,4500,0.08041983543652169,0.08267124249172617,0.12380602615403143,0.10226453271781745,0.03827475345692155,0.009061552413699396,0.028257569462315637,0.2830962540397703,0.05070825883332228,0.12940000090762332,0.009633175916570135,0.010987480935782877,0.09687717790867972,0.20345114009690085,0.08977421020020294,0.023953964686745842,0.10226453271781745,0.0900541298704728,0.01610220093726597,0.08760002975156765
|
| 91 |
+
synth_query_reasoning_answer,42,5000,0.08058153947617179,0.07820590852350819,0.11771589365717491,0.1072380006691629,0.04032198412470698,0.010187498004546013,0.028489492588401548,0.271192788075246,0.05957526414957225,0.13624831859836145,0.00856856858084561,0.010704492872642352,0.09853026386989325,0.19445434086621044,0.08836808619670072,0.024445276352776296,0.1072380006691629,0.09791179137396686,0.01646049448852997,0.0881463316578912
|
| 92 |
+
synth_query_reasoning_answer,42,5500,0.08256078527521785,0.08843155774339825,0.1238367045421236,0.10917550385546258,0.040026651966034874,0.010164217880368826,0.02846147305792971,0.2747265048098881,0.0548526352585163,0.14119036604766205,0.005647394496900201,0.011793746550222164,0.10242266709410754,0.19928160467600584,0.0954271124187529,0.022837023231467538,0.10917550385546258,0.09802150065308918,0.016806479162840237,0.09025820399960306
|
| 93 |
+
synth_query_reasoning_answer,42,6000,0.08430762089591148,0.07919327341870015,0.12997988477615194,0.10273758126604275,0.04105881577603814,0.009640204226333889,0.029382070642368006,0.29928957416462665,0.05664930881048516,0.14264607399419546,0.010960652360041213,0.010555598217259678,0.0995984130986949,0.2146347294703893,0.08939584325869752,0.026009734068039678,0.10273758126604275,0.09964769140234031,0.016525957695320528,0.09149192286013835
|
| 94 |
+
synth_query_reasoning_answer,42,6500,0.08444149557411484,0.08402537139308493,0.13369818218066098,0.10285850115936718,0.04226174091240128,0.00991331271722322,0.030764933813341114,0.2839103286624604,0.055473570771933677,0.1477476162715078,0.012452525112519533,0.009840064076537727,0.10035179981834028,0.2088042554215607,0.09218858560571261,0.027357133012460406,0.10285850115936718,0.10161059352172074,0.01683943686903402,0.0916097509316426
|
| 95 |
+
synth_query_reasoning_answer,42,7000,0.08733579108353406,0.09274496195438126,0.12783228645351125,0.10585483141968924,0.04140499246430086,0.00990579037971003,0.0306657403064225,0.2964769632109272,0.06872084237283703,0.15010635810946463,0.009139265585396674,0.011518769287097315,0.10365869145867089,0.21215462483221922,0.09820182670652608,0.02527212902484877,0.10585483141968924,0.10941360024115082,0.017363433324409948,0.09471007425814067
|
| 96 |
+
synth_query_reasoning_answer,42,7500,0.08863417347387555,0.09240356217393896,0.14881973353501576,0.10151619983486602,0.042210124440366474,0.010713110240814129,0.0306682229730257,0.30600578486416224,0.061672811030978075,0.14370012267637594,0.013431786018876084,0.010090156104852884,0.10237846779323392,0.227412759199589,0.09739101498358643,0.027820955229621278,0.10151619983486602,0.10268646685367701,0.017157163106230906,0.09566409320126179
|
| 97 |
+
synth_query_reasoning_answer,42,8000,0.08755442520503119,0.08860498877063167,0.14179078288669023,0.09875658289513833,0.044096292751690164,0.009698260497968582,0.03077663652283185,0.29920128931824164,0.06572481798221814,0.14042232845936162,0.020122112503608597,0.010098737020688477,0.1013602728513049,0.22049603610246593,0.09498263081096828,0.03210920262764938,0.09875658289513833,0.10307357322078989,0.016857878013829635,0.09437931727847358
|
| 98 |
+
synth_query_reasoning_answer,42,8500,0.08335227495246045,0.08685831332021673,0.1055250465909481,0.097455577254604,0.043092917852824174,0.010305418491369097,0.032120632212379305,0.2741034843086726,0.06988872186519213,0.1509872786423176,0.01684208503668596,0.01180287876076734,0.10124494509354863,0.18981426544981034,0.09405162920688268,0.029967501444755067,0.097455577254604,0.11043800025375486,0.018076309821505248,0.08996721390521868
|
| 99 |
+
synth_query_reasoning_answer,42,9000,0.08549164796513085,0.09677796067439566,0.1361574540866481,0.10210580354158325,0.04443856787313211,0.010751481772278741,0.03339322593064408,0.2810053666746844,0.04336078094212285,0.14646892336813522,0.012589819426654004,0.013875996896216993,0.10497439439507479,0.20858141038066624,0.10087617753473523,0.028514193649893056,0.10210580354158325,0.09491485215512903,0.019340234866379938,0.09238877868806446
|
| 100 |
+
synth_query_reasoning_answer,42,9500,0.08675381637504477,0.09810283018124974,0.13254892176210545,0.09893137136118485,0.04511541180369288,0.011030940187333069,0.033065885461781463,0.31406529548089707,0.030923310141800064,0.14217114585352564,0.01581980415749073,0.012219747303091026,0.10705113280638545,0.22330710862150127,0.1025769814938176,0.030467607980591803,0.09893137136118485,0.08654722799766285,0.01877219098406852,0.09343374807313783
|
| 101 |
+
synth_query_reasoning_answer,42,10000,0.09358976838409616,0.10867687436008114,0.1268415551737933,0.10053703091204497,0.04709002622209718,0.011280139669537836,0.035570948809070906,0.3227502712932084,0.06067078915360809,0.16932094307810486,0.01544328007619672,0.012554458280223642,0.11234090358118656,0.22479591323350084,0.11050888897063385,0.03126665314914695,0.10053703091204497,0.11499586611585647,0.019801848919610794,0.10031770021679898
|
app/src/content/assets/image/newplot_2f81384e-bcac-804d-b760-e8611cc0302b.png
DELETED
Git LFS Details
|
app/src/content/chapters/experiments.mdx
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
import Image from "../../components/Image.astro";
|
| 2 |
import HtmlEmbed from "../../components/HtmlEmbed.astro";
|
| 3 |
-
import newplot_2f81384e_bcac_804d_b760_e8611cc0302b from "../assets/image/newplot_2f81384e-bcac-804d-b760-e8611cc0302b.png";
|
| 4 |
import newplot_2c41384e_bcac_8073_9395_cf2d0e901187 from "../assets/image/newplot_2c41384e-bcac-8073-9395-cf2d0e901187.png";
|
| 5 |
import newplot_2c31384e_bcac_800b_82e8_ff44228f7720 from "../assets/image/newplot_2c31384e-bcac-800b-82e8-ff44228f7720.png";
|
| 6 |
import newplot_2e11384e_bcac_800a_abc6_d0690da3f955 from "../assets/image/newplot_2e11384e-bcac-800a-abc6-d0690da3f955.png";
|
|
@@ -56,9 +55,23 @@ TODO: Add appendix section of weird unexplainable results?
|
|
| 56 |
|
| 57 |
We see that FinePhrase clearly outperforms the synthetic baselines.
|
| 58 |
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
### Baselines
|
| 64 |
|
|
@@ -66,10 +79,22 @@ DCLM, REWIRE and Nemotron-HQ-Synth are the strongest baselines in our setup by a
|
|
| 66 |
|
| 67 |
<HtmlEmbed
|
| 68 |
id="baselines-comparison"
|
| 69 |
-
src="d3-
|
| 70 |
data="baselines.csv"
|
| 71 |
title="Baseline Comparison"
|
| 72 |
desc="Figure: Comparison of baseline datasets across different evaluation metrics. Use the dropdown to switch metrics."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
/>
|
| 74 |
|
| 75 |
#### Disecting the synthetic baselines
|
|
|
|
| 1 |
import Image from "../../components/Image.astro";
|
| 2 |
import HtmlEmbed from "../../components/HtmlEmbed.astro";
|
|
|
|
| 3 |
import newplot_2c41384e_bcac_8073_9395_cf2d0e901187 from "../assets/image/newplot_2c41384e-bcac-8073-9395-cf2d0e901187.png";
|
| 4 |
import newplot_2c31384e_bcac_800b_82e8_ff44228f7720 from "../assets/image/newplot_2c31384e-bcac-800b-82e8-ff44228f7720.png";
|
| 5 |
import newplot_2e11384e_bcac_800a_abc6_d0690da3f955 from "../assets/image/newplot_2e11384e-bcac-800a-abc6-d0690da3f955.png";
|
|
|
|
| 55 |
|
| 56 |
We see that FinePhrase clearly outperforms the synthetic baselines.
|
| 57 |
|
| 58 |
+
<HtmlEmbed
|
| 59 |
+
id="finephrase-vs-baselines"
|
| 60 |
+
src="d3-benchmark-comparison.html"
|
| 61 |
+
data="finephrase_vs_baselines.csv"
|
| 62 |
+
title="FinePhrase vs Synthetic Baselines"
|
| 63 |
+
desc="Figure: FinePhrase compared against synthetic data baselines across evaluation metrics."
|
| 64 |
+
config={{
|
| 65 |
+
defaultView: "line",
|
| 66 |
+
datasetNames: {
|
| 67 |
+
cosmopedia: "Cosmopedia",
|
| 68 |
+
"mix-fw_edu_hq-table_smollm2_1.7b_hq": "FinePhrase",
|
| 69 |
+
nemotron_hq_synth: "Nemotron-HQ-Synth",
|
| 70 |
+
rewire: "REWIRE",
|
| 71 |
+
synth_query_reasoning_answer: "SYNTH"
|
| 72 |
+
}
|
| 73 |
+
}}
|
| 74 |
+
/>
|
| 75 |
|
| 76 |
### Baselines
|
| 77 |
|
|
|
|
| 79 |
|
| 80 |
<HtmlEmbed
|
| 81 |
id="baselines-comparison"
|
| 82 |
+
src="d3-benchmark-comparison.html"
|
| 83 |
data="baselines.csv"
|
| 84 |
title="Baseline Comparison"
|
| 85 |
desc="Figure: Comparison of baseline datasets across different evaluation metrics. Use the dropdown to switch metrics."
|
| 86 |
+
config={{
|
| 87 |
+
datasetNames: {
|
| 88 |
+
cosmopedia: "Cosmopedia",
|
| 89 |
+
dclm: "DCLM",
|
| 90 |
+
fw_edu_hq: "FineWeb-Edu (HQ)",
|
| 91 |
+
fw_edu_lq: "FineWeb-Edu (LQ)",
|
| 92 |
+
nemotron_hq_synth: "Nemotron-HQ-Synth",
|
| 93 |
+
rewire: "REWIRE",
|
| 94 |
+
synth_query_reasoning_answer: "SYNTH",
|
| 95 |
+
"ultra-fineweb": "Ultra-FineWeb"
|
| 96 |
+
}
|
| 97 |
+
}}
|
| 98 |
/>
|
| 99 |
|
| 100 |
#### Disecting the synthetic baselines
|
app/src/content/embeds/{d3-baselines.html β d3-benchmark-comparison.html}
RENAMED
|
@@ -1,7 +1,39 @@
|
|
| 1 |
-
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
<style>
|
| 3 |
-
.d3-
|
| 4 |
-
.d3-
|
| 5 |
display: flex;
|
| 6 |
gap: 16px;
|
| 7 |
align-items: center;
|
|
@@ -9,18 +41,18 @@
|
|
| 9 |
flex-wrap: wrap;
|
| 10 |
margin: 10px 0 0 0;
|
| 11 |
}
|
| 12 |
-
.d3-
|
| 13 |
display: flex;
|
| 14 |
flex-direction: column;
|
| 15 |
align-items: flex-start;
|
| 16 |
gap: 6px;
|
| 17 |
}
|
| 18 |
-
.d3-
|
| 19 |
font-size: 12px;
|
| 20 |
font-weight: 700;
|
| 21 |
color: var(--text-color);
|
| 22 |
}
|
| 23 |
-
.d3-
|
| 24 |
appearance: none;
|
| 25 |
-webkit-appearance: none;
|
| 26 |
-moz-appearance: none;
|
|
@@ -35,11 +67,11 @@
|
|
| 35 |
background-repeat: no-repeat;
|
| 36 |
background-position: right 8px center;
|
| 37 |
}
|
| 38 |
-
.d3-
|
| 39 |
outline: 2px solid var(--primary-color);
|
| 40 |
outline-offset: 2px;
|
| 41 |
}
|
| 42 |
-
.d3-
|
| 43 |
display: flex;
|
| 44 |
flex-direction: column;
|
| 45 |
align-items: flex-start;
|
|
@@ -47,17 +79,17 @@
|
|
| 47 |
margin: 8px 0 0 0;
|
| 48 |
padding-bottom: 4px;
|
| 49 |
}
|
| 50 |
-
.d3-
|
| 51 |
font-size: 12px;
|
| 52 |
font-weight: 700;
|
| 53 |
color: var(--text-color);
|
| 54 |
}
|
| 55 |
-
.d3-
|
| 56 |
display: flex;
|
| 57 |
flex-wrap: wrap;
|
| 58 |
gap: 8px 14px;
|
| 59 |
}
|
| 60 |
-
.d3-
|
| 61 |
display: inline-flex;
|
| 62 |
align-items: center;
|
| 63 |
gap: 6px;
|
|
@@ -66,29 +98,29 @@
|
|
| 66 |
color: var(--text-color);
|
| 67 |
cursor: pointer;
|
| 68 |
}
|
| 69 |
-
.d3-
|
| 70 |
-
.d3-
|
| 71 |
width: 14px;
|
| 72 |
height: 14px;
|
| 73 |
border-radius: 3px;
|
| 74 |
border: 1px solid var(--border-color);
|
| 75 |
}
|
| 76 |
-
.d3-
|
| 77 |
-
.d3-
|
| 78 |
-
.d3-
|
| 79 |
-
.d3-
|
| 80 |
-
.d3-
|
| 81 |
-
.d3-
|
| 82 |
-
.d3-
|
| 83 |
-
.d3-
|
| 84 |
-
.d3-
|
| 85 |
-
.d3-
|
| 86 |
stroke: var(--text-color);
|
| 87 |
stroke-opacity: 0.25;
|
| 88 |
stroke-width: 1;
|
| 89 |
pointer-events: none;
|
| 90 |
}
|
| 91 |
-
.d3-
|
| 92 |
position: absolute;
|
| 93 |
top: 0px;
|
| 94 |
left: 0px;
|
|
@@ -107,7 +139,7 @@
|
|
| 107 |
text-align: left;
|
| 108 |
z-index: 10;
|
| 109 |
}
|
| 110 |
-
.d3-
|
| 111 |
display: inline-block;
|
| 112 |
width: 10px;
|
| 113 |
height: 10px;
|
|
@@ -130,8 +162,8 @@
|
|
| 130 |
const bootstrap = () => {
|
| 131 |
const scriptEl = document.currentScript;
|
| 132 |
let container = scriptEl ? scriptEl.previousElementSibling : null;
|
| 133 |
-
if (!(container && container.classList && container.classList.contains('d3-
|
| 134 |
-
const cs = Array.from(document.querySelectorAll('.d3-
|
| 135 |
container = cs[cs.length - 1] || null;
|
| 136 |
}
|
| 137 |
if (!container) return;
|
|
@@ -139,26 +171,27 @@
|
|
| 139 |
|
| 140 |
container.style.position = container.style.position || 'relative';
|
| 141 |
|
| 142 |
-
//
|
| 143 |
-
let
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
|
|
|
| 149 |
|
| 150 |
-
//
|
| 151 |
-
const DATASET_NAMES = {
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
'rewire': 'REWIRE',
|
| 158 |
-
'synth_query_reasoning_answer': 'SYNTH',
|
| 159 |
-
'ultra-fineweb': 'Ultra-FineWeb'
|
| 160 |
-
};
|
| 161 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
const METRIC_NAMES = {
|
| 163 |
'agg_score_macro': 'Aggregate Score (Macro)',
|
| 164 |
'agg_score_micro': 'Aggregate Score (Micro)',
|
|
@@ -182,34 +215,13 @@
|
|
| 182 |
'lighteval|mmlu_redux_cf:_average|3/prob_norm_token': 'MMLU Redux'
|
| 183 |
};
|
| 184 |
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
'
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
'lighteval|hellaswag_cf|3/prob_norm_token',
|
| 193 |
-
'lighteval|openbookqa_cf|3/prob_norm_token',
|
| 194 |
-
'lighteval|piqa_cf|3/prob_norm_token',
|
| 195 |
-
'lighteval|squad_v2|3/prob_norm_token',
|
| 196 |
-
'lighteval|treb_qa|3/prob_norm_token',
|
| 197 |
-
'lighteval|wikitablequestions|3/prob_norm_token',
|
| 198 |
-
'lighteval|winogrande_cf|3/prob_norm_token',
|
| 199 |
-
'lighteval|xcsqa_cf|3/prob_norm_token',
|
| 200 |
-
'lighteval|mmlu_redux_cf:_average|3/prob_norm_token'
|
| 201 |
-
];
|
| 202 |
-
|
| 203 |
-
// Read optional config
|
| 204 |
-
let mountEl = container;
|
| 205 |
-
while (mountEl && !mountEl.getAttribute?.('data-config')) { mountEl = mountEl.parentElement; }
|
| 206 |
-
let providedConfig = null;
|
| 207 |
-
try {
|
| 208 |
-
const cfg = mountEl && mountEl.getAttribute ? mountEl.getAttribute('data-config') : null;
|
| 209 |
-
if (cfg && cfg.trim()) providedConfig = cfg.trim().startsWith('{') ? JSON.parse(cfg) : null;
|
| 210 |
-
} catch (_) {}
|
| 211 |
-
|
| 212 |
-
const defaultMetric = (providedConfig && providedConfig.defaultMetric) || 'agg_score_macro';
|
| 213 |
|
| 214 |
// SVG
|
| 215 |
const svg = d3.select(container).append('svg').attr('width', '100%').style('display', 'block');
|
|
@@ -217,13 +229,16 @@
|
|
| 217 |
|
| 218 |
// State
|
| 219 |
let allData = [];
|
|
|
|
| 220 |
let currentMetric = defaultMetric;
|
| 221 |
-
let currentView =
|
| 222 |
let colorMap = {};
|
| 223 |
let highlight = null;
|
| 224 |
|
| 225 |
-
//
|
| 226 |
-
|
|
|
|
|
|
|
| 227 |
function stepsToTokens(step) { return step * TOKENS_PER_STEP; }
|
| 228 |
function formatTokens(tokens) {
|
| 229 |
if (tokens >= 1e9) return d3.format('.2f')(tokens / 1e9) + 'B';
|
|
@@ -234,19 +249,16 @@
|
|
| 234 |
if (step >= 1000) return d3.format('.0f')(step / 1000) + 'K';
|
| 235 |
return String(step);
|
| 236 |
}
|
| 237 |
-
// Compact: "12.6B (6K)" for axis ticks
|
| 238 |
function stepLabelShort(step) { return `${formatTokens(stepsToTokens(step))} (${formatStep(step)})`; }
|
| 239 |
-
// Verbose: "12.6B Tokens (6K Steps)" for tooltip
|
| 240 |
function stepLabelLong(step) { return `${formatTokens(stepsToTokens(step))} Tokens (${formatStep(step)} Steps)`; }
|
| 241 |
|
| 242 |
-
// Color helpers
|
| 243 |
function getCategoricalColors(n) {
|
| 244 |
try { if (window.ColorPalettes && typeof window.ColorPalettes.getColors === 'function') return window.ColorPalettes.getColors('categorical', n); } catch (_) {}
|
| 245 |
return (d3.schemeTableau10 || ['#4e79a7','#f28e2b','#e15759','#76b7b2','#59a14f','#edc948','#b07aa1','#ff9da7','#9c755f','#bab0ac']).slice(0, n);
|
| 246 |
}
|
| 247 |
|
| 248 |
function initColors() {
|
| 249 |
-
const allNames = Array.from(d3.group(allData, d => d
|
| 250 |
if (!Object.keys(colorMap).length) {
|
| 251 |
const palette = getCategoricalColors(allNames.length);
|
| 252 |
allNames.forEach((name, i) => { colorMap[name] = palette[i % palette.length]; });
|
|
@@ -267,29 +279,36 @@
|
|
| 267 |
}
|
| 268 |
|
| 269 |
function updateHighlight() {
|
| 270 |
-
// Bar view
|
| 271 |
gRoot.selectAll('rect.bar').classed('ghost', d => highlight && d.name !== highlight);
|
| 272 |
gRoot.selectAll('text.value-label').classed('ghost', d => highlight && d.name !== highlight);
|
| 273 |
-
// Line view
|
| 274 |
gRoot.selectAll('.line-path').classed('ghost', d => highlight && d.name !== highlight);
|
| 275 |
gRoot.selectAll('.line-dot').classed('ghost', d => highlight && d.name !== highlight);
|
| 276 |
-
// Legend
|
| 277 |
container.querySelectorAll('.legend .item').forEach(el => {
|
| 278 |
el.classList.toggle('ghost', highlight && el.getAttribute('data-name') !== highlight);
|
| 279 |
});
|
| 280 |
}
|
| 281 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
// βββ BAR CHART βββ
|
| 283 |
function renderBar() {
|
| 284 |
const width = container.clientWidth || 800;
|
| 285 |
const margin = { top: 12, right: 56, bottom: 32, left: 140 };
|
| 286 |
|
| 287 |
-
const grouped = d3.group(allData, d => d
|
| 288 |
const finalData = [];
|
| 289 |
-
for (const [
|
| 290 |
-
const maxStep = d3.max(rows, r => +r
|
| 291 |
-
const row = rows.find(r => +r
|
| 292 |
-
if (row) finalData.push({ name:
|
| 293 |
}
|
| 294 |
finalData.sort((a, b) => b.value - a.value);
|
| 295 |
|
|
@@ -327,28 +346,23 @@
|
|
| 327 |
g.selectAll('path, line').attr('stroke', 'var(--axis-color)');
|
| 328 |
});
|
| 329 |
|
| 330 |
-
// Remove line-specific elements
|
| 331 |
-
gRoot.selectAll('.line-path, .line-dot, .hover-line, .hover-overlay, .x-label, .y-label').remove();
|
| 332 |
-
|
| 333 |
// Bars
|
|
|
|
|
|
|
|
|
|
|
|
|
| 334 |
gRoot.selectAll('rect.bar').data(finalData, d => d.name).join(
|
| 335 |
enter => enter.append('rect').attr('class', 'bar')
|
| 336 |
.attr('x', 0).attr('y', d => y(d.name)).attr('height', y.bandwidth()).attr('rx', 3)
|
| 337 |
.attr('fill', d => colorMap[d.rawName] || 'var(--primary-color)')
|
| 338 |
.attr('width', 0)
|
| 339 |
.on('mouseenter', (ev, d) => { highlight = d.name; updateHighlight(); })
|
| 340 |
-
.on('mousemove',
|
| 341 |
-
const [mx, my] = d3.pointer(ev, container);
|
| 342 |
-
showTip(`<strong>${d.name}</strong><br/>${METRIC_NAMES[currentMetric] || currentMetric}: <strong>${d.value.toFixed(4)}</strong>`, mx, my);
|
| 343 |
-
})
|
| 344 |
.on('mouseleave', () => { hideTip(); highlight = null; updateHighlight(); })
|
| 345 |
.transition().duration(300).attr('width', d => Math.max(0, x(d.value))),
|
| 346 |
update => update
|
| 347 |
.on('mouseenter', (ev, d) => { highlight = d.name; updateHighlight(); })
|
| 348 |
-
.on('mousemove',
|
| 349 |
-
const [mx, my] = d3.pointer(ev, container);
|
| 350 |
-
showTip(`<strong>${d.name}</strong><br/>${METRIC_NAMES[currentMetric] || currentMetric}: <strong>${d.value.toFixed(4)}</strong>`, mx, my);
|
| 351 |
-
})
|
| 352 |
.on('mouseleave', () => { hideTip(); highlight = null; updateHighlight(); })
|
| 353 |
.transition().duration(300)
|
| 354 |
.attr('y', d => y(d.name)).attr('height', y.bandwidth())
|
|
@@ -381,34 +395,28 @@
|
|
| 381 |
const innerWidth = width - margin.left - margin.right;
|
| 382 |
const innerHeight = height - margin.top - margin.bottom;
|
| 383 |
|
| 384 |
-
//
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
// Build series: one line per dataset
|
| 388 |
-
const grouped = d3.group(allData, d => d.runname);
|
| 389 |
const series = [];
|
| 390 |
-
for (const [
|
| 391 |
-
const pts = rows.map(r => ({ step: +r
|
| 392 |
-
series.push({ name:
|
| 393 |
}
|
| 394 |
|
| 395 |
-
const allSteps = Array.from(new Set(allData.map(r => +r
|
| 396 |
const allValues = series.flatMap(s => s.values.map(v => v.value));
|
| 397 |
|
| 398 |
const x = d3.scaleLinear().domain(d3.extent(allSteps)).range([0, innerWidth]);
|
| 399 |
-
const yMin = d3.min(allValues);
|
| 400 |
-
const yMax = d3.max(allValues);
|
| 401 |
-
const yPad = (yMax - yMin) * 0.08;
|
| 402 |
const y = d3.scaleLinear().domain([yMin - yPad, yMax + yPad]).range([innerHeight, 0]).nice();
|
| 403 |
|
| 404 |
// Grid
|
| 405 |
gRoot.selectAll('.grid').data([0]).join('g').attr('class', 'grid').call(g => {
|
| 406 |
g.selectAll('line').data(y.ticks(6)).join('line')
|
| 407 |
-
.attr('x1', 0).attr('x2', innerWidth)
|
| 408 |
-
.attr('y1', d => y(d)).attr('y2', d => y(d));
|
| 409 |
});
|
| 410 |
|
| 411 |
-
// X axis
|
| 412 |
gRoot.selectAll('.axis-x').data([0]).join('g').attr('class', 'axes axis-x')
|
| 413 |
.attr('transform', `translate(0,${innerHeight})`)
|
| 414 |
.call(d3.axisBottom(x).ticks(6).tickFormat(d => stepLabelShort(d)).tickSizeOuter(0))
|
|
@@ -434,7 +442,7 @@
|
|
| 434 |
gRoot.selectAll('.y-label').data([0]).join('text').attr('class', 'y-label')
|
| 435 |
.attr('transform', 'rotate(-90)').attr('x', -innerHeight / 2).attr('y', -44)
|
| 436 |
.attr('text-anchor', 'middle').attr('fill', 'var(--text-color)').attr('font-size', 12)
|
| 437 |
-
.text(
|
| 438 |
|
| 439 |
// Lines
|
| 440 |
const line = d3.line().x(d => x(d.step)).y(d => y(d.value)).curve(d3.curveMonotoneX);
|
|
@@ -448,7 +456,7 @@
|
|
| 448 |
exit => exit.remove()
|
| 449 |
);
|
| 450 |
|
| 451 |
-
// Dots
|
| 452 |
const dotData = series.flatMap(s => s.values.map(v => ({ name: s.name, rawName: s.rawName, step: v.step, value: v.value })));
|
| 453 |
gRoot.selectAll('.line-dot').data(dotData, d => d.name + '-' + d.step).join(
|
| 454 |
enter => enter.append('circle').attr('class', 'line-dot')
|
|
@@ -461,7 +469,7 @@
|
|
| 461 |
exit => exit.remove()
|
| 462 |
);
|
| 463 |
|
| 464 |
-
// Hover overlay
|
| 465 |
gRoot.selectAll('.hover-line').data([0]).join('line').attr('class', 'hover-line')
|
| 466 |
.attr('y1', 0).attr('y2', innerHeight).style('display', 'none');
|
| 467 |
|
|
@@ -470,13 +478,9 @@
|
|
| 470 |
.attr('fill', 'none').attr('pointer-events', 'all')
|
| 471 |
.on('mousemove', (ev) => {
|
| 472 |
const [mx] = d3.pointer(ev, gRoot.node());
|
| 473 |
-
const
|
| 474 |
-
|
| 475 |
-
const px = x(nearest);
|
| 476 |
|
| 477 |
-
gRoot.select('.hover-line').attr('x1', px).attr('x2', px).style('display', null);
|
| 478 |
-
|
| 479 |
-
// Build tooltip sorted by value at this step
|
| 480 |
const entries = series.map(s => {
|
| 481 |
const pt = s.values.find(v => v.step === nearest);
|
| 482 |
return pt ? { name: s.name, rawName: s.rawName, value: pt.value } : null;
|
|
@@ -486,7 +490,6 @@
|
|
| 486 |
entries.forEach(e => {
|
| 487 |
html += `<div><span class="tip-dot" style="background:${colorMap[e.rawName]}"></span>${e.name}: <strong>${e.value.toFixed(4)}</strong></div>`;
|
| 488 |
});
|
| 489 |
-
|
| 490 |
const [cx, cy] = d3.pointer(ev, container);
|
| 491 |
showTip(html, cx, cy);
|
| 492 |
})
|
|
@@ -496,23 +499,22 @@
|
|
| 496 |
});
|
| 497 |
}
|
| 498 |
|
| 499 |
-
// βββ
|
| 500 |
function render() {
|
| 501 |
if (!allData.length) return;
|
| 502 |
initColors();
|
| 503 |
gRoot.selectAll('*').remove();
|
| 504 |
-
if (currentView === 'bar') renderBar();
|
| 505 |
-
else renderLine();
|
| 506 |
}
|
| 507 |
|
| 508 |
-
// βββ
|
| 509 |
function buildUI() {
|
| 510 |
const controls = document.createElement('div'); controls.className = 'controls';
|
| 511 |
|
| 512 |
// View toggle
|
| 513 |
const viewGroup = document.createElement('div'); viewGroup.className = 'control-group';
|
| 514 |
-
const viewLabel = document.createElement('label'); viewLabel.setAttribute('for', 'view-
|
| 515 |
-
const viewSelect = document.createElement('select'); viewSelect.id = 'view-
|
| 516 |
[['bar', 'Final Score (Bar)'], ['line', 'Training Progression (Line)']].forEach(([val, text]) => {
|
| 517 |
const opt = document.createElement('option'); opt.value = val; opt.textContent = text;
|
| 518 |
if (val === currentView) opt.selected = true;
|
|
@@ -522,19 +524,10 @@
|
|
| 522 |
viewGroup.appendChild(viewLabel); viewGroup.appendChild(viewSelect);
|
| 523 |
controls.appendChild(viewGroup);
|
| 524 |
|
| 525 |
-
// Metric select
|
| 526 |
const metricGroup = document.createElement('div'); metricGroup.className = 'control-group';
|
| 527 |
-
const metricLabel = document.createElement('label'); metricLabel.setAttribute('for', 'metric-
|
| 528 |
-
const metricSelect = document.createElement('select'); metricSelect.id = 'metric-
|
| 529 |
-
const aggGroup = document.createElement('optgroup'); aggGroup.label = 'Aggregate Scores';
|
| 530 |
-
const indGroup = document.createElement('optgroup'); indGroup.label = 'Individual Benchmarks';
|
| 531 |
-
METRIC_ORDER.forEach(key => {
|
| 532 |
-
const opt = document.createElement('option'); opt.value = key; opt.textContent = METRIC_NAMES[key] || key;
|
| 533 |
-
if (key === defaultMetric) opt.selected = true;
|
| 534 |
-
if (key.startsWith('lighteval|')) indGroup.appendChild(opt); else aggGroup.appendChild(opt);
|
| 535 |
-
});
|
| 536 |
-
metricSelect.appendChild(aggGroup); metricSelect.appendChild(indGroup);
|
| 537 |
-
metricSelect.addEventListener('change', () => { currentMetric = metricSelect.value; render(); });
|
| 538 |
metricGroup.appendChild(metricLabel); metricGroup.appendChild(metricSelect);
|
| 539 |
controls.appendChild(metricGroup);
|
| 540 |
|
|
@@ -546,18 +539,43 @@
|
|
| 546 |
container.appendChild(legend);
|
| 547 |
}
|
| 548 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 549 |
function buildLegend() {
|
| 550 |
const items = container.querySelector('.legend .items');
|
| 551 |
if (!items) return;
|
| 552 |
items.innerHTML = '';
|
| 553 |
-
|
| 554 |
-
|
| 555 |
-
|
| 556 |
-
|
| 557 |
-
|
| 558 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 559 |
el.appendChild(sw); el.appendChild(txt); items.appendChild(el);
|
| 560 |
-
el.addEventListener('mouseenter', () => { highlight =
|
| 561 |
el.addEventListener('mouseleave', () => { highlight = null; updateHighlight(); });
|
| 562 |
});
|
| 563 |
}
|
|
@@ -589,6 +607,10 @@
|
|
| 589 |
try {
|
| 590 |
const text = await fetchFirstAvailable(csvPaths);
|
| 591 |
allData = d3.csvParse(text);
|
|
|
|
|
|
|
|
|
|
|
|
|
| 592 |
render();
|
| 593 |
buildLegend();
|
| 594 |
if (window.ResizeObserver) { new ResizeObserver(() => render()).observe(container); }
|
|
|
|
| 1 |
+
<!--
|
| 2 |
+
Reusable bar/line chart for benchmark comparisons.
|
| 3 |
+
|
| 4 |
+
Configuration via data-config attribute:
|
| 5 |
+
{
|
| 6 |
+
"datasetNames": { "raw_name": "Display Name", ... }, // required
|
| 7 |
+
"defaultMetric": "agg_score_macro", // optional, default: "agg_score_macro"
|
| 8 |
+
"defaultView": "bar", // optional, "bar" | "line", default: "bar"
|
| 9 |
+
"tokensPerStep": 2100000, // optional, default: 2.1e6
|
| 10 |
+
"runColumn": "runname", // optional, CSV column for series, default: "runname"
|
| 11 |
+
"stepColumn": "steps" // optional, CSV column for x-axis, default: "steps"
|
| 12 |
+
}
|
| 13 |
+
|
| 14 |
+
Example usage in MDX:
|
| 15 |
+
<HtmlEmbed
|
| 16 |
+
src="d3-benchmark-comparison.html"
|
| 17 |
+
data="baselines.csv"
|
| 18 |
+
title="Baseline Comparison"
|
| 19 |
+
config={{
|
| 20 |
+
datasetNames: {
|
| 21 |
+
cosmopedia: "Cosmopedia",
|
| 22 |
+
dclm: "DCLM",
|
| 23 |
+
fw_edu_hq: "FineWeb-Edu (HQ)",
|
| 24 |
+
fw_edu_lq: "FineWeb-Edu (LQ)",
|
| 25 |
+
nemotron_hq_synth: "Nemotron-HQ-Synth",
|
| 26 |
+
rewire: "REWIRE",
|
| 27 |
+
synth_query_reasoning_answer: "SYNTH",
|
| 28 |
+
"ultra-fineweb": "Ultra-FineWeb"
|
| 29 |
+
}
|
| 30 |
+
}}
|
| 31 |
+
/>
|
| 32 |
+
-->
|
| 33 |
+
<div class="d3-benchmark-comparison"></div>
|
| 34 |
<style>
|
| 35 |
+
.d3-benchmark-comparison { position: relative; }
|
| 36 |
+
.d3-benchmark-comparison .controls {
|
| 37 |
display: flex;
|
| 38 |
gap: 16px;
|
| 39 |
align-items: center;
|
|
|
|
| 41 |
flex-wrap: wrap;
|
| 42 |
margin: 10px 0 0 0;
|
| 43 |
}
|
| 44 |
+
.d3-benchmark-comparison .controls .control-group {
|
| 45 |
display: flex;
|
| 46 |
flex-direction: column;
|
| 47 |
align-items: flex-start;
|
| 48 |
gap: 6px;
|
| 49 |
}
|
| 50 |
+
.d3-benchmark-comparison .controls label {
|
| 51 |
font-size: 12px;
|
| 52 |
font-weight: 700;
|
| 53 |
color: var(--text-color);
|
| 54 |
}
|
| 55 |
+
.d3-benchmark-comparison .controls select {
|
| 56 |
appearance: none;
|
| 57 |
-webkit-appearance: none;
|
| 58 |
-moz-appearance: none;
|
|
|
|
| 67 |
background-repeat: no-repeat;
|
| 68 |
background-position: right 8px center;
|
| 69 |
}
|
| 70 |
+
.d3-benchmark-comparison .controls select:focus-visible {
|
| 71 |
outline: 2px solid var(--primary-color);
|
| 72 |
outline-offset: 2px;
|
| 73 |
}
|
| 74 |
+
.d3-benchmark-comparison .legend {
|
| 75 |
display: flex;
|
| 76 |
flex-direction: column;
|
| 77 |
align-items: flex-start;
|
|
|
|
| 79 |
margin: 8px 0 0 0;
|
| 80 |
padding-bottom: 4px;
|
| 81 |
}
|
| 82 |
+
.d3-benchmark-comparison .legend .legend-title {
|
| 83 |
font-size: 12px;
|
| 84 |
font-weight: 700;
|
| 85 |
color: var(--text-color);
|
| 86 |
}
|
| 87 |
+
.d3-benchmark-comparison .legend .items {
|
| 88 |
display: flex;
|
| 89 |
flex-wrap: wrap;
|
| 90 |
gap: 8px 14px;
|
| 91 |
}
|
| 92 |
+
.d3-benchmark-comparison .legend .item {
|
| 93 |
display: inline-flex;
|
| 94 |
align-items: center;
|
| 95 |
gap: 6px;
|
|
|
|
| 98 |
color: var(--text-color);
|
| 99 |
cursor: pointer;
|
| 100 |
}
|
| 101 |
+
.d3-benchmark-comparison .legend .item.ghost { opacity: .25; }
|
| 102 |
+
.d3-benchmark-comparison .legend .swatch {
|
| 103 |
width: 14px;
|
| 104 |
height: 14px;
|
| 105 |
border-radius: 3px;
|
| 106 |
border: 1px solid var(--border-color);
|
| 107 |
}
|
| 108 |
+
.d3-benchmark-comparison .bar.ghost { opacity: .25; }
|
| 109 |
+
.d3-benchmark-comparison .value-label.ghost { opacity: .25; }
|
| 110 |
+
.d3-benchmark-comparison .line-path { fill: none; stroke-width: 2; opacity: 0.85; }
|
| 111 |
+
.d3-benchmark-comparison .line-path.ghost { opacity: .15; }
|
| 112 |
+
.d3-benchmark-comparison .line-dot.ghost { opacity: .15; }
|
| 113 |
+
.d3-benchmark-comparison .axes path { display: none; }
|
| 114 |
+
.d3-benchmark-comparison .axes line { stroke: var(--axis-color); }
|
| 115 |
+
.d3-benchmark-comparison .axes text { fill: var(--tick-color); }
|
| 116 |
+
.d3-benchmark-comparison .grid line { stroke: var(--grid-color); }
|
| 117 |
+
.d3-benchmark-comparison .hover-line {
|
| 118 |
stroke: var(--text-color);
|
| 119 |
stroke-opacity: 0.25;
|
| 120 |
stroke-width: 1;
|
| 121 |
pointer-events: none;
|
| 122 |
}
|
| 123 |
+
.d3-benchmark-comparison .d3-tooltip {
|
| 124 |
position: absolute;
|
| 125 |
top: 0px;
|
| 126 |
left: 0px;
|
|
|
|
| 139 |
text-align: left;
|
| 140 |
z-index: 10;
|
| 141 |
}
|
| 142 |
+
.d3-benchmark-comparison .d3-tooltip .tip-dot {
|
| 143 |
display: inline-block;
|
| 144 |
width: 10px;
|
| 145 |
height: 10px;
|
|
|
|
| 162 |
const bootstrap = () => {
|
| 163 |
const scriptEl = document.currentScript;
|
| 164 |
let container = scriptEl ? scriptEl.previousElementSibling : null;
|
| 165 |
+
if (!(container && container.classList && container.classList.contains('d3-benchmark-comparison'))) {
|
| 166 |
+
const cs = Array.from(document.querySelectorAll('.d3-benchmark-comparison')).filter(el => !(el.dataset && el.dataset.mounted === 'true'));
|
| 167 |
container = cs[cs.length - 1] || null;
|
| 168 |
}
|
| 169 |
if (!container) return;
|
|
|
|
| 171 |
|
| 172 |
container.style.position = container.style.position || 'relative';
|
| 173 |
|
| 174 |
+
// βββ READ CONFIG βββ
|
| 175 |
+
let mountEl = container;
|
| 176 |
+
while (mountEl && !mountEl.getAttribute?.('data-config')) { mountEl = mountEl.parentElement; }
|
| 177 |
+
let cfg = {};
|
| 178 |
+
try {
|
| 179 |
+
const raw = mountEl && mountEl.getAttribute ? mountEl.getAttribute('data-config') : null;
|
| 180 |
+
if (raw && raw.trim()) cfg = raw.trim().startsWith('{') ? JSON.parse(raw) : {};
|
| 181 |
+
} catch (_) {}
|
| 182 |
|
| 183 |
+
// Configurable settings with defaults
|
| 184 |
+
const DATASET_NAMES = cfg.datasetNames || {};
|
| 185 |
+
const RUN_COL = cfg.runColumn || 'runname';
|
| 186 |
+
const STEP_COL = cfg.stepColumn || 'steps';
|
| 187 |
+
const TOKENS_PER_STEP = cfg.tokensPerStep || 2.1e6;
|
| 188 |
+
const defaultMetric = cfg.defaultMetric || 'agg_score_macro';
|
| 189 |
+
const defaultView = cfg.defaultView || 'bar';
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
|
| 191 |
+
// Unique ID suffix for multiple instances on same page
|
| 192 |
+
const uid = Math.random().toString(36).slice(2, 8);
|
| 193 |
+
|
| 194 |
+
// Standard metric display names (shared across all CSVs from this benchmark suite)
|
| 195 |
const METRIC_NAMES = {
|
| 196 |
'agg_score_macro': 'Aggregate Score (Macro)',
|
| 197 |
'agg_score_micro': 'Aggregate Score (Micro)',
|
|
|
|
| 215 |
'lighteval|mmlu_redux_cf:_average|3/prob_norm_token': 'MMLU Redux'
|
| 216 |
};
|
| 217 |
|
| 218 |
+
// Tooltip
|
| 219 |
+
let tip = container.querySelector('.d3-tooltip'), tipInner;
|
| 220 |
+
if (!tip) {
|
| 221 |
+
tip = document.createElement('div'); tip.className = 'd3-tooltip';
|
| 222 |
+
tipInner = document.createElement('div'); tipInner.className = 'd3-tooltip__inner'; tip.appendChild(tipInner);
|
| 223 |
+
container.appendChild(tip);
|
| 224 |
+
} else { tipInner = tip.querySelector('.d3-tooltip__inner') || tip; }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
|
| 226 |
// SVG
|
| 227 |
const svg = d3.select(container).append('svg').attr('width', '100%').style('display', 'block');
|
|
|
|
| 229 |
|
| 230 |
// State
|
| 231 |
let allData = [];
|
| 232 |
+
let metricKeys = []; // auto-detected from CSV columns
|
| 233 |
let currentMetric = defaultMetric;
|
| 234 |
+
let currentView = defaultView;
|
| 235 |
let colorMap = {};
|
| 236 |
let highlight = null;
|
| 237 |
|
| 238 |
+
// βββ HELPERS βββ
|
| 239 |
+
function displayName(raw) { return DATASET_NAMES[raw] || raw; }
|
| 240 |
+
function metricName(key) { return METRIC_NAMES[key] || key; }
|
| 241 |
+
|
| 242 |
function stepsToTokens(step) { return step * TOKENS_PER_STEP; }
|
| 243 |
function formatTokens(tokens) {
|
| 244 |
if (tokens >= 1e9) return d3.format('.2f')(tokens / 1e9) + 'B';
|
|
|
|
| 249 |
if (step >= 1000) return d3.format('.0f')(step / 1000) + 'K';
|
| 250 |
return String(step);
|
| 251 |
}
|
|
|
|
| 252 |
function stepLabelShort(step) { return `${formatTokens(stepsToTokens(step))} (${formatStep(step)})`; }
|
|
|
|
| 253 |
function stepLabelLong(step) { return `${formatTokens(stepsToTokens(step))} Tokens (${formatStep(step)} Steps)`; }
|
| 254 |
|
|
|
|
| 255 |
function getCategoricalColors(n) {
|
| 256 |
try { if (window.ColorPalettes && typeof window.ColorPalettes.getColors === 'function') return window.ColorPalettes.getColors('categorical', n); } catch (_) {}
|
| 257 |
return (d3.schemeTableau10 || ['#4e79a7','#f28e2b','#e15759','#76b7b2','#59a14f','#edc948','#b07aa1','#ff9da7','#9c755f','#bab0ac']).slice(0, n);
|
| 258 |
}
|
| 259 |
|
| 260 |
function initColors() {
|
| 261 |
+
const allNames = Array.from(d3.group(allData, d => d[RUN_COL]).keys()).sort();
|
| 262 |
if (!Object.keys(colorMap).length) {
|
| 263 |
const palette = getCategoricalColors(allNames.length);
|
| 264 |
allNames.forEach((name, i) => { colorMap[name] = palette[i % palette.length]; });
|
|
|
|
| 279 |
}
|
| 280 |
|
| 281 |
function updateHighlight() {
|
|
|
|
| 282 |
gRoot.selectAll('rect.bar').classed('ghost', d => highlight && d.name !== highlight);
|
| 283 |
gRoot.selectAll('text.value-label').classed('ghost', d => highlight && d.name !== highlight);
|
|
|
|
| 284 |
gRoot.selectAll('.line-path').classed('ghost', d => highlight && d.name !== highlight);
|
| 285 |
gRoot.selectAll('.line-dot').classed('ghost', d => highlight && d.name !== highlight);
|
|
|
|
| 286 |
container.querySelectorAll('.legend .item').forEach(el => {
|
| 287 |
el.classList.toggle('ghost', highlight && el.getAttribute('data-name') !== highlight);
|
| 288 |
});
|
| 289 |
}
|
| 290 |
|
| 291 |
+
// βββ AUTO-DETECT METRICS from CSV columns βββ
|
| 292 |
+
function detectMetrics(columns) {
|
| 293 |
+
const skip = new Set([RUN_COL, STEP_COL, 'seed']);
|
| 294 |
+
// Ordered: aggregate first, then individual
|
| 295 |
+
const aggOrder = ['agg_score_macro', 'agg_score_micro', 'agg_score_RC', 'agg_score_GK', 'agg_score_NLU', 'agg_score_MATH', 'agg_score_TABLE', 'agg_score_RES'];
|
| 296 |
+
const agg = aggOrder.filter(k => columns.includes(k));
|
| 297 |
+
const ind = columns.filter(k => !skip.has(k) && !agg.includes(k) && !isNaN(+allData[0][k]));
|
| 298 |
+
return [...agg, ...ind];
|
| 299 |
+
}
|
| 300 |
+
|
| 301 |
// βββ BAR CHART βββ
|
| 302 |
function renderBar() {
|
| 303 |
const width = container.clientWidth || 800;
|
| 304 |
const margin = { top: 12, right: 56, bottom: 32, left: 140 };
|
| 305 |
|
| 306 |
+
const grouped = d3.group(allData, d => d[RUN_COL]);
|
| 307 |
const finalData = [];
|
| 308 |
+
for (const [raw, rows] of grouped) {
|
| 309 |
+
const maxStep = d3.max(rows, r => +r[STEP_COL]);
|
| 310 |
+
const row = rows.find(r => +r[STEP_COL] === maxStep);
|
| 311 |
+
if (row) finalData.push({ name: displayName(raw), rawName: raw, value: +row[currentMetric] });
|
| 312 |
}
|
| 313 |
finalData.sort((a, b) => b.value - a.value);
|
| 314 |
|
|
|
|
| 346 |
g.selectAll('path, line').attr('stroke', 'var(--axis-color)');
|
| 347 |
});
|
| 348 |
|
|
|
|
|
|
|
|
|
|
| 349 |
// Bars
|
| 350 |
+
const barTip = (ev, d) => {
|
| 351 |
+
const [mx, my] = d3.pointer(ev, container);
|
| 352 |
+
showTip(`<strong>${d.name}</strong><br/>${metricName(currentMetric)}: <strong>${d.value.toFixed(4)}</strong>`, mx, my);
|
| 353 |
+
};
|
| 354 |
gRoot.selectAll('rect.bar').data(finalData, d => d.name).join(
|
| 355 |
enter => enter.append('rect').attr('class', 'bar')
|
| 356 |
.attr('x', 0).attr('y', d => y(d.name)).attr('height', y.bandwidth()).attr('rx', 3)
|
| 357 |
.attr('fill', d => colorMap[d.rawName] || 'var(--primary-color)')
|
| 358 |
.attr('width', 0)
|
| 359 |
.on('mouseenter', (ev, d) => { highlight = d.name; updateHighlight(); })
|
| 360 |
+
.on('mousemove', barTip)
|
|
|
|
|
|
|
|
|
|
| 361 |
.on('mouseleave', () => { hideTip(); highlight = null; updateHighlight(); })
|
| 362 |
.transition().duration(300).attr('width', d => Math.max(0, x(d.value))),
|
| 363 |
update => update
|
| 364 |
.on('mouseenter', (ev, d) => { highlight = d.name; updateHighlight(); })
|
| 365 |
+
.on('mousemove', barTip)
|
|
|
|
|
|
|
|
|
|
| 366 |
.on('mouseleave', () => { hideTip(); highlight = null; updateHighlight(); })
|
| 367 |
.transition().duration(300)
|
| 368 |
.attr('y', d => y(d.name)).attr('height', y.bandwidth())
|
|
|
|
| 395 |
const innerWidth = width - margin.left - margin.right;
|
| 396 |
const innerHeight = height - margin.top - margin.bottom;
|
| 397 |
|
| 398 |
+
// Build series
|
| 399 |
+
const grouped = d3.group(allData, d => d[RUN_COL]);
|
|
|
|
|
|
|
|
|
|
| 400 |
const series = [];
|
| 401 |
+
for (const [raw, rows] of grouped) {
|
| 402 |
+
const pts = rows.map(r => ({ step: +r[STEP_COL], value: +r[currentMetric] })).sort((a, b) => a.step - b.step);
|
| 403 |
+
series.push({ name: displayName(raw), rawName: raw, values: pts });
|
| 404 |
}
|
| 405 |
|
| 406 |
+
const allSteps = Array.from(new Set(allData.map(r => +r[STEP_COL]))).sort((a, b) => a - b);
|
| 407 |
const allValues = series.flatMap(s => s.values.map(v => v.value));
|
| 408 |
|
| 409 |
const x = d3.scaleLinear().domain(d3.extent(allSteps)).range([0, innerWidth]);
|
| 410 |
+
const yMin = d3.min(allValues), yMax = d3.max(allValues), yPad = (yMax - yMin) * 0.08;
|
|
|
|
|
|
|
| 411 |
const y = d3.scaleLinear().domain([yMin - yPad, yMax + yPad]).range([innerHeight, 0]).nice();
|
| 412 |
|
| 413 |
// Grid
|
| 414 |
gRoot.selectAll('.grid').data([0]).join('g').attr('class', 'grid').call(g => {
|
| 415 |
g.selectAll('line').data(y.ticks(6)).join('line')
|
| 416 |
+
.attr('x1', 0).attr('x2', innerWidth).attr('y1', d => y(d)).attr('y2', d => y(d));
|
|
|
|
| 417 |
});
|
| 418 |
|
| 419 |
+
// X axis
|
| 420 |
gRoot.selectAll('.axis-x').data([0]).join('g').attr('class', 'axes axis-x')
|
| 421 |
.attr('transform', `translate(0,${innerHeight})`)
|
| 422 |
.call(d3.axisBottom(x).ticks(6).tickFormat(d => stepLabelShort(d)).tickSizeOuter(0))
|
|
|
|
| 442 |
gRoot.selectAll('.y-label').data([0]).join('text').attr('class', 'y-label')
|
| 443 |
.attr('transform', 'rotate(-90)').attr('x', -innerHeight / 2).attr('y', -44)
|
| 444 |
.attr('text-anchor', 'middle').attr('fill', 'var(--text-color)').attr('font-size', 12)
|
| 445 |
+
.text(metricName(currentMetric));
|
| 446 |
|
| 447 |
// Lines
|
| 448 |
const line = d3.line().x(d => x(d.step)).y(d => y(d.value)).curve(d3.curveMonotoneX);
|
|
|
|
| 456 |
exit => exit.remove()
|
| 457 |
);
|
| 458 |
|
| 459 |
+
// Dots
|
| 460 |
const dotData = series.flatMap(s => s.values.map(v => ({ name: s.name, rawName: s.rawName, step: v.step, value: v.value })));
|
| 461 |
gRoot.selectAll('.line-dot').data(dotData, d => d.name + '-' + d.step).join(
|
| 462 |
enter => enter.append('circle').attr('class', 'line-dot')
|
|
|
|
| 469 |
exit => exit.remove()
|
| 470 |
);
|
| 471 |
|
| 472 |
+
// Hover overlay
|
| 473 |
gRoot.selectAll('.hover-line').data([0]).join('line').attr('class', 'hover-line')
|
| 474 |
.attr('y1', 0).attr('y2', innerHeight).style('display', 'none');
|
| 475 |
|
|
|
|
| 478 |
.attr('fill', 'none').attr('pointer-events', 'all')
|
| 479 |
.on('mousemove', (ev) => {
|
| 480 |
const [mx] = d3.pointer(ev, gRoot.node());
|
| 481 |
+
const nearest = allSteps.reduce((best, s) => Math.abs(s - x.invert(mx)) < Math.abs(best - x.invert(mx)) ? s : best, allSteps[0]);
|
| 482 |
+
gRoot.select('.hover-line').attr('x1', x(nearest)).attr('x2', x(nearest)).style('display', null);
|
|
|
|
| 483 |
|
|
|
|
|
|
|
|
|
|
| 484 |
const entries = series.map(s => {
|
| 485 |
const pt = s.values.find(v => v.step === nearest);
|
| 486 |
return pt ? { name: s.name, rawName: s.rawName, value: pt.value } : null;
|
|
|
|
| 490 |
entries.forEach(e => {
|
| 491 |
html += `<div><span class="tip-dot" style="background:${colorMap[e.rawName]}"></span>${e.name}: <strong>${e.value.toFixed(4)}</strong></div>`;
|
| 492 |
});
|
|
|
|
| 493 |
const [cx, cy] = d3.pointer(ev, container);
|
| 494 |
showTip(html, cx, cy);
|
| 495 |
})
|
|
|
|
| 499 |
});
|
| 500 |
}
|
| 501 |
|
| 502 |
+
// βββ RENDER βββ
|
| 503 |
function render() {
|
| 504 |
if (!allData.length) return;
|
| 505 |
initColors();
|
| 506 |
gRoot.selectAll('*').remove();
|
| 507 |
+
if (currentView === 'bar') renderBar(); else renderLine();
|
|
|
|
| 508 |
}
|
| 509 |
|
| 510 |
+
// βββ UI βββ
|
| 511 |
function buildUI() {
|
| 512 |
const controls = document.createElement('div'); controls.className = 'controls';
|
| 513 |
|
| 514 |
// View toggle
|
| 515 |
const viewGroup = document.createElement('div'); viewGroup.className = 'control-group';
|
| 516 |
+
const viewLabel = document.createElement('label'); viewLabel.setAttribute('for', 'view-' + uid); viewLabel.textContent = 'View';
|
| 517 |
+
const viewSelect = document.createElement('select'); viewSelect.id = 'view-' + uid;
|
| 518 |
[['bar', 'Final Score (Bar)'], ['line', 'Training Progression (Line)']].forEach(([val, text]) => {
|
| 519 |
const opt = document.createElement('option'); opt.value = val; opt.textContent = text;
|
| 520 |
if (val === currentView) opt.selected = true;
|
|
|
|
| 524 |
viewGroup.appendChild(viewLabel); viewGroup.appendChild(viewSelect);
|
| 525 |
controls.appendChild(viewGroup);
|
| 526 |
|
| 527 |
+
// Metric select (populated after data load)
|
| 528 |
const metricGroup = document.createElement('div'); metricGroup.className = 'control-group';
|
| 529 |
+
const metricLabel = document.createElement('label'); metricLabel.setAttribute('for', 'metric-' + uid); metricLabel.textContent = 'Metric';
|
| 530 |
+
const metricSelect = document.createElement('select'); metricSelect.id = 'metric-' + uid;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 531 |
metricGroup.appendChild(metricLabel); metricGroup.appendChild(metricSelect);
|
| 532 |
controls.appendChild(metricGroup);
|
| 533 |
|
|
|
|
| 539 |
container.appendChild(legend);
|
| 540 |
}
|
| 541 |
|
| 542 |
+
function populateMetricSelect() {
|
| 543 |
+
const sel = container.querySelector('#metric-' + uid);
|
| 544 |
+
if (!sel) return;
|
| 545 |
+
sel.innerHTML = '';
|
| 546 |
+
const aggGroup = document.createElement('optgroup'); aggGroup.label = 'Aggregate Scores';
|
| 547 |
+
const indGroup = document.createElement('optgroup'); indGroup.label = 'Individual Benchmarks';
|
| 548 |
+
metricKeys.forEach(key => {
|
| 549 |
+
const opt = document.createElement('option'); opt.value = key; opt.textContent = metricName(key);
|
| 550 |
+
if (key === currentMetric) opt.selected = true;
|
| 551 |
+
if (key.startsWith('agg_score')) aggGroup.appendChild(opt); else indGroup.appendChild(opt);
|
| 552 |
+
});
|
| 553 |
+
if (aggGroup.children.length) sel.appendChild(aggGroup);
|
| 554 |
+
if (indGroup.children.length) sel.appendChild(indGroup);
|
| 555 |
+
sel.addEventListener('change', () => { currentMetric = sel.value; render(); });
|
| 556 |
+
}
|
| 557 |
+
|
| 558 |
function buildLegend() {
|
| 559 |
const items = container.querySelector('.legend .items');
|
| 560 |
if (!items) return;
|
| 561 |
items.innerHTML = '';
|
| 562 |
+
// Sort by final score (max step) on current default metric, descending
|
| 563 |
+
const grouped = d3.group(allData, d => d[RUN_COL]);
|
| 564 |
+
const sorted = Array.from(grouped.entries())
|
| 565 |
+
.map(([raw, rows]) => {
|
| 566 |
+
const maxStep = d3.max(rows, r => +r[STEP_COL]);
|
| 567 |
+
const row = rows.find(r => +r[STEP_COL] === maxStep);
|
| 568 |
+
return { raw, score: row ? +row[defaultMetric] : 0 };
|
| 569 |
+
})
|
| 570 |
+
.sort((a, b) => b.score - a.score)
|
| 571 |
+
.map(d => d.raw);
|
| 572 |
+
sorted.forEach(raw => {
|
| 573 |
+
const name = displayName(raw);
|
| 574 |
+
const el = document.createElement('span'); el.className = 'item'; el.setAttribute('data-name', name);
|
| 575 |
+
const sw = document.createElement('span'); sw.className = 'swatch'; sw.style.background = colorMap[raw] || '#999';
|
| 576 |
+
const txt = document.createElement('span'); txt.textContent = name;
|
| 577 |
el.appendChild(sw); el.appendChild(txt); items.appendChild(el);
|
| 578 |
+
el.addEventListener('mouseenter', () => { highlight = name; updateHighlight(); });
|
| 579 |
el.addEventListener('mouseleave', () => { highlight = null; updateHighlight(); });
|
| 580 |
});
|
| 581 |
}
|
|
|
|
| 607 |
try {
|
| 608 |
const text = await fetchFirstAvailable(csvPaths);
|
| 609 |
allData = d3.csvParse(text);
|
| 610 |
+
metricKeys = detectMetrics(allData.columns);
|
| 611 |
+
// Ensure defaultMetric is valid; fall back to first available
|
| 612 |
+
if (!metricKeys.includes(currentMetric)) currentMetric = metricKeys[0];
|
| 613 |
+
populateMetricSelect();
|
| 614 |
render();
|
| 615 |
buildLegend();
|
| 616 |
if (window.ResizeObserver) { new ResizeObserver(() => render()).observe(container); }
|