paperswithcode / assets /BenchmarkDetailView-QhpW5Yev.js
nielsr's picture
nielsr HF Staff
Sync paperswithcode.co frontend
322179a verified
import{x as ie,u as ce,r as w,j as e,L as C,H,y as oe}from"./index-khuRhj0-.js";import{u as q}from"./useQuery-Caj49BNs.js";import{a as z}from"./api-DxZAbqc3.js";import{S as U}from"./skeleton-DAoGkyKK.js";import{A as de,a as me}from"./alert-Bm-7arBk.js";import{L as O}from"./LatexTitle-fGDJSovl.js";import"./adminAuth-PI_XkM8E.js";const he=({points:c,metric:k})=>{const b=oe(),[g,L]=w.useState(null),j=880,R=320,i=60,m=30,o=24,s=50,v=c.map(a=>a.value),_=Math.min(...v),Q=Math.max(...v)-_||1,I=j-i-m,T=R-o-s,V=I/Math.max(1,c.length-1),y=c.map((a,t)=>{const u=i+t*V,p=(a.value-_)/Q,N=o+T*(1-p);return{x:u,y:N,data:a}});let B=`M${y[0].x},${y[0].y}`;for(let a=1;a<y.length;a++){const t=y[a-1],u=y[a],p=(t.x+u.x)/2;B+=` C${p},${t.y} ${p},${u.y} ${u.x},${u.y}`}const d=5,A=a=>L(t=>t===a?null:t),W=a=>{a&&b(`/paper/${a}`)};return e.jsxs("svg",{width:"100%",viewBox:`0 0 ${j} ${R}`,style:{display:"block"},children:[e.jsx("text",{x:i-38,y:o+T/2,fontSize:"11",fontFamily:"var(--mono)",fill:"var(--ink-faint)",textAnchor:"middle",transform:`rotate(-90 ${i-38} ${o+T/2})`,children:k}),Array.from({length:d}).map((a,t)=>{const u=t/(d-1),p=o+T*(1-u),N=_+Q*u;return e.jsxs("g",{children:[e.jsx("line",{x1:i,y1:p,x2:j-m,y2:p,stroke:"var(--hairline)",strokeWidth:"1"}),e.jsx("text",{x:i-10,y:p+4,fontSize:"11",fontFamily:"var(--mono)",fill:"var(--ink-faint)",textAnchor:"end",children:H(N)})]},t)}),y.map((a,t)=>e.jsx("text",{x:a.x,y:R-s+18,fontSize:"11",fontFamily:"var(--mono)",fill:"var(--ink-soft)",textAnchor:"middle",children:a.data.dateLabel},t)),e.jsx("path",{d:B,stroke:"var(--accent)",strokeWidth:"2",fill:"none",strokeLinejoin:"round",strokeLinecap:"round"}),y.map((a,t)=>{const u=g===t,N=`${a.data.modelName||a.data.paperTitle||"Unknown model"}: ${H(a.data.value)} on ${a.data.dateLabel}`,x=!!a.data.paperRouteId;return e.jsxs("g",{children:[e.jsx("circle",{cx:a.x,cy:a.y,r:u?6:4.5,fill:u?"var(--accent)":"var(--bg-elevated)",stroke:"var(--accent)",strokeWidth:"2",style:{transition:"r 0.12s ease, fill 0.12s ease"}}),e.jsx("circle",{cx:a.x,cy:a.y,r:"14",fill:"transparent",style:{cursor:x?"pointer":"default",outline:"none"},tabIndex:0,role:x?"link":"img","aria-label":N,onMouseEnter:()=>L(t),onMouseLeave:()=>A(t),onFocus:()=>L(t),onBlur:()=>A(t),onClick:()=>W(a.data.paperRouteId),onKeyDown:f=>{x&&(f.key==="Enter"||f.key===" ")&&(f.preventDefault(),W(a.data.paperRouteId))},children:e.jsx("title",{children:N})})]},t)}),g!==null&&(()=>{const a=y[g],t=a.data.modelName||a.data.paperTitle||"Unknown model",u=t.length>40?t.slice(0,39)+"…":t,p=`${H(a.data.value)} · ${a.data.dateLabel}`,x=Math.max(140,Math.min(320,Math.max(u.length,p.length)*6.6+24)),f=46;let $=a.x-x/2;$<i&&($=i),$+x>j-m&&($=j-m-x);const E=a.y-f-14>=o?a.y-f-12:a.y+12;return e.jsxs("g",{pointerEvents:"none",children:[e.jsx("rect",{x:$,y:E,width:x,height:f,rx:"6",ry:"6",fill:"var(--bg-elevated)",stroke:"var(--hairline-strong)",strokeWidth:"1",style:{filter:"drop-shadow(0 2px 6px rgba(0,0,0,0.08))"}}),e.jsx("text",{x:$+12,y:E+19,fontSize:"12",fontFamily:"var(--sans)",fill:"var(--ink)",fontWeight:500,children:u}),e.jsx("text",{x:$+12,y:E+36,fontSize:"11",fontFamily:"var(--mono)",fill:"var(--ink-soft)",children:p})]})})()]})};function D(c){if(c==null)return null;if(typeof c=="number")return Number.isFinite(c)?c:null;const k=c.includes("±")?c.split("±")[0].trim():c.trim(),b=parseFloat(k);return Number.isFinite(b)?b:null}function ue(c,k,b){if(c.length===0)return{points:[],sota:0,sotaModel:""};const g=c.map(s=>{var h;const v=D((h=s.metrics)==null?void 0:h[k]),_=s.paper_published_date||"";return{eval:s,value:v,date:_}}).filter(s=>s.value!==null&&!!s.date).sort((s,v)=>s.date.localeCompare(v.date)),L=[];let j=null;for(const s of g)(j==null?!0:b==="↓"?s.value<j:s.value>j)&&(j=s.value,L.push({value:s.value,dateLabel:s.date.slice(0,7),modelName:s.eval.model_name||"",paperTitle:s.eval.paper_title||"",paperRouteId:s.eval.paper_arxiv_id||s.eval.paper_id||null}));const i=c.map(s=>{var v;return{eval:s,value:D((v=s.metrics)==null?void 0:v[k])}}).filter(s=>s.value!==null).sort((s,v)=>b==="↓"?s.value-v.value:v.value-s.value)[0],m=(i==null?void 0:i.value)??0,o=(i==null?void 0:i.eval.model_name)||(i==null?void 0:i.eval.paper_title)||"";return{points:L,sota:m,sotaModel:o}}const ye=()=>{var K,E,P,Y,G,J,Z,ee,ae;const{datasetSlug:c}=ie(),[k]=ce(),b=k.get("task")||void 0,g=k.get("eval")||void 0,[L,j]=w.useState(void 0),R=w.useRef(null),i=w.useRef(!1),m=q({queryKey:["dataset",c,"with-paper"],queryFn:()=>z.getDatasetBySlug(c,{include_paper:!0}),enabled:!!c,staleTime:10*60*1e3}),o=q({queryKey:["dataset-evals",(K=m.data)==null?void 0:K.id],queryFn:()=>z.getEvaluationsByDataset(m.data.id),enabled:!!((E=m.data)!=null&&E.id),staleTime:5*60*1e3}),s=q({queryKey:["dataset-tasks",(P=m.data)==null?void 0:P.id],queryFn:()=>z.getDatasetTasks(m.data.id),enabled:!!((Y=m.data)!=null&&Y.id),staleTime:10*60*1e3}),v=q({queryKey:["metric-directions"],queryFn:()=>z.getMetricDirections(),staleTime:60*60*1e3}),_=w.useMemo(()=>{const r=new Set;return(o.data||[]).forEach(n=>{n.best_metric&&r.add(n.best_metric),Object.keys(n.metrics||{}).forEach(l=>r.add(l))}),Array.from(r)},[o.data]),h=L||((J=(G=o.data)==null?void 0:G[0])==null?void 0:J.best_metric)||_[0]||"",I=(((ee=(Z=v.data)==null?void 0:Z.directions)==null?void 0:ee[h])||"higher_is_better")==="lower_is_better"?"↓":"↑",{points:T,sota:V,sotaModel:y}=w.useMemo(()=>ue(o.data||[],h,I),[o.data,h,I]);w.useEffect(()=>{if(i.current||!g||!o.data||o.data.length===0)return;const r=R.current;r&&(i.current=!0,r.scrollIntoView({behavior:"smooth",block:"start"}))},[g,o.data]);const B=w.useMemo(()=>{const r=s.data||[];if(r.length===0)return null;if(b){const n=r.find(l=>l.slug===b);if(n)return n}return r[0]},[s.data,b]);if(m.isLoading)return e.jsxs("div",{className:"page",children:[e.jsx(U,{className:"h-12 w-1/2 mb-6"}),e.jsx(U,{className:"h-64 w-full"})]});if(m.error||!m.data)return e.jsx("div",{className:"page",children:e.jsx(de,{variant:"destructive",children:e.jsx(me,{children:"Benchmark not found."})})});const d=m.data,A=(o.data||[]).slice().sort((r,n)=>{var S,F;const l=D((S=r.metrics)==null?void 0:S[h]),M=D((F=n.metrics)==null?void 0:F[h]);return l!==null&&M!==null?I==="↓"?l-M:M-l:l!==null?-1:M!==null?1:(r.best_rank??9999)-(n.best_rank??9999)}),W=!!d.supports_harness,a=g&&A.find(r=>r.id===g)||null,t=d.introducing_paper??void 0,u=(()=>{if(!d.paper_url)return null;const r=d.paper_url.match(/arxiv\.org\/(?:abs|pdf)\/([\w.-]+)/i);if(r&&r[1])return r[1].replace(/\.pdf$/i,"");const n=d.paper_url.match(/\/(?:paper|abs|pdf)\/([\w.-]+)/i);return n&&n[1]?n[1].replace(/\.pdf$/i,""):null})(),p=(t==null?void 0:t.arxiv_id)||u,N=p,x=(t==null?void 0:t.title)||d.introducing_paper_title||null,f=((ae=t==null?void 0:t.published)==null?void 0:ae.slice(0,4))||d.introduced_year||null,$=!!p||!!d.paper_url||!!x;return e.jsx("div",{className:"page",children:e.jsxs("div",{className:"bm-page",children:[e.jsxs("div",{className:"breadcrumb",children:[e.jsx(C,{to:"/tasks",children:"Tasks"}),B&&e.jsxs(e.Fragment,{children:[e.jsx("span",{className:"sep",children:"/"}),e.jsx(C,{to:`/tasks/${B.slug||B.id}`,children:B.name})]}),e.jsx("span",{className:"sep",children:"/"}),e.jsx("span",{style:{color:"var(--accent-text)"},children:d.name})]}),e.jsx("h1",{className:"bm-title",children:d.name}),e.jsxs("div",{className:"bm-subtitle",children:[h&&e.jsxs(e.Fragment,{children:[e.jsx("span",{className:"metric-tag",children:h}),e.jsx("span",{style:{color:"var(--hairline-strong)",margin:"0 8px"},children:"·"})]}),y?e.jsxs("span",{children:["Best: ",e.jsx("span",{className:"best-name",children:H(V)})," by ",e.jsx("span",{className:"best-name",style:{fontStyle:"normal"},children:y})]}):e.jsxs("span",{children:[A.length," evaluations"]})]}),d.description&&e.jsx("p",{style:{fontFamily:"var(--serif)",color:"var(--ink-soft)",maxWidth:760,marginBottom:16},children:d.description}),$&&e.jsxs("div",{className:"method-source",style:{marginTop:0,marginBottom:18},children:[e.jsx("span",{className:"method-source-label",children:"Source"}),N?e.jsxs(C,{to:`/paper/${N}`,target:"_blank",rel:"noopener noreferrer",className:"method-source-link",children:[e.jsx("span",{className:"method-source-title",children:x?e.jsx(O,{title:x}):`arXiv:${N}`}),f&&e.jsx("span",{className:"method-source-meta",children:f})]}):d.paper_url?e.jsxs("a",{href:d.paper_url,target:"_blank",rel:"noopener noreferrer",className:"method-source-link",children:[e.jsx("span",{className:"method-source-title",children:x?e.jsx(O,{title:x}):p?`arXiv:${p}`:d.paper_url}),f&&e.jsx("span",{className:"method-source-meta",children:f})]}):e.jsxs("div",{className:"method-source-link is-static",children:[e.jsx("span",{className:"method-source-title",children:x?e.jsx(O,{title:x}):null}),f&&e.jsx("span",{className:"method-source-meta",children:f})]})]}),_.length>1&&e.jsxs("div",{className:"bm-actions",children:[e.jsx("span",{style:{fontFamily:"var(--mono)",fontSize:11,color:"var(--ink-faint)",marginRight:8},children:"METRIC"}),_.map(r=>e.jsx("button",{className:`bm-action-btn ${r===h?"is-active":""}`,onClick:()=>j(r),children:r},r))]}),T.length>=2&&e.jsxs("div",{className:"bm-card",children:[e.jsx("div",{className:"bm-card-head",children:e.jsx("div",{className:"bm-card-title",children:"SOTA progression"})}),e.jsx("div",{className:"bm-chart-wrap",children:e.jsx(he,{points:T,metric:h})}),e.jsxs("div",{className:"bm-chart-foot",children:[e.jsx("span",{className:"legend-dot"}),"Best result over time · hover a point to see the model · click to open the paper"]})]}),a&&(()=>{var S,F;const r=a.paper_arxiv_id||a.paper_id,n=a.paper_title||(a.paper_arxiv_id?`arXiv:${a.paper_arxiv_id}`:null),l=a.best_metric&&((S=a.metrics)==null?void 0:S[a.best_metric])!==void 0?a.best_metric:h,M=(F=a.metrics)==null?void 0:F[l];return e.jsxs("div",{className:"bm-highlight-banner",role:"status",children:[e.jsx("span",{className:"bm-highlight-dot","aria-hidden":"true"}),e.jsxs("div",{className:"bm-highlight-text",children:[e.jsx("span",{className:"bm-highlight-label",children:"Comparing result"}),e.jsxs("span",{className:"bm-highlight-paper",children:[a.model_name||n||"—",a.harness?` · ${a.harness}`:""]}),e.jsxs("span",{className:"bm-highlight-meta",children:[M!==void 0&&l?e.jsxs(e.Fragment,{children:[l,": ",H(M),n?" · ":""]}):null,n&&r?e.jsx(C,{to:`/paper/${r}`,children:n}):n||null]})]})]})})(),e.jsxs("div",{className:"bm-card",children:[e.jsx("div",{className:"bm-card-head",children:e.jsx("div",{className:"bm-card-title",children:"Leaderboard"})}),o.isLoading?e.jsx("div",{style:{padding:16},children:Array.from({length:5}).map((r,n)=>e.jsx(U,{className:"h-10 w-full mb-2"},n))}):A.length===0?e.jsx("div",{className:"no-results",children:"No evaluations submitted yet."}):e.jsxs("table",{className:"bm-table",children:[e.jsx("thead",{children:e.jsxs("tr",{children:[e.jsx("th",{className:"col-rank",children:"Rank"}),e.jsx("th",{className:"col-model",children:"Model"}),W&&e.jsx("th",{className:"col-harness",children:"Harness"}),e.jsxs("th",{className:"col-value",children:[h," ",I]}),e.jsx("th",{className:"col-paper",children:"Paper"}),e.jsx("th",{className:"col-year",children:"Year"})]})}),e.jsx("tbody",{children:(()=>{let r=null,n=0;return A.map((l,M)=>{var re,le,ne;const S=D((re=l.metrics)==null?void 0:re[h]);let F=null;S!==null&&((r===null||S!==r)&&(n=M+1,r=S),F=n);const te=l.paper_arxiv_id||l.paper_id,se=l.paper_title||(l.paper_arxiv_id?`arXiv:${l.paper_arxiv_id}`:null),X=g===l.id;return e.jsxs("tr",{ref:X?R:void 0,className:X?"is-highlighted":void 0,children:[e.jsx("td",{className:"col-rank",children:F??"—"}),e.jsxs("td",{className:"col-model",children:[X&&e.jsx("span",{className:"bm-row-marker","aria-hidden":"true"}),l.model_name||"—"]}),W&&e.jsx("td",{className:"col-harness",children:l.harness||"—"}),e.jsx("td",{className:`col-value ${F===1?"is-best":""}`,children:H((le=l.metrics)==null?void 0:le[h])}),e.jsx("td",{className:"col-paper",children:te?e.jsx(C,{to:`/paper/${te}`,children:se||"—"}):se||"—"}),e.jsx("td",{className:"col-year",children:((ne=l.paper_published_date)==null?void 0:ne.slice(0,4))||"—"})]},l.id)})})()})]})]})]})})};export{ye as default};