Spaces:
Running
Running
| import React from "react"; | |
| import { | |
| Box, | |
| Typography, | |
| Paper, | |
| Stack, | |
| Divider, | |
| alpha, | |
| Link, | |
| Grid, | |
| InputLabel, | |
| Tooltip, | |
| IconButton, | |
| } from "@mui/material"; | |
| import InfoOutlinedIcon from "@mui/icons-material/InfoOutlined"; | |
| import PageHeader from "../../components/PageHeader/PageHeader"; | |
| const StepNumber = ({ number }) => ( | |
| <Box | |
| sx={{ | |
| width: 32, | |
| height: 32, | |
| borderRadius: "50%", | |
| display: "flex", | |
| alignItems: "center", | |
| justifyContent: "center", | |
| border: "1px solid", | |
| borderColor: "primary.main", | |
| color: "primary.main", | |
| fontSize: "0.875rem", | |
| fontWeight: 600, | |
| flexShrink: 0, | |
| bgcolor: "transparent", | |
| }} | |
| > | |
| {number} | |
| </Box> | |
| ); | |
| const Section = ({ title, children }) => ( | |
| <Paper | |
| elevation={0} | |
| sx={{ | |
| border: "1px solid", | |
| borderColor: "divider", | |
| borderRadius: 1, | |
| overflow: "hidden", | |
| mb: 3, | |
| }} | |
| > | |
| <Box | |
| sx={{ | |
| px: 3, | |
| py: 2, | |
| borderBottom: "1px solid", | |
| borderColor: "divider", | |
| bgcolor: (theme) => | |
| theme.palette.mode === "dark" | |
| ? alpha(theme.palette.background.paper, 0.5) | |
| : "grey.50", | |
| }} | |
| > | |
| <Typography variant="h6" sx={{ fontWeight: 600, color: "text.primary" }}> | |
| {title} | |
| </Typography> | |
| </Box> | |
| <Box sx={{ p: 3, bgcolor: "background.paper" }}>{children}</Box> | |
| </Paper> | |
| ); | |
| const Tag = ({ children }) => ( | |
| <Box | |
| component="span" | |
| sx={{ | |
| display: "inline-block", | |
| px: 1.5, | |
| py: 0.5, | |
| bgcolor: (theme) => alpha(theme.palette.primary.main, 0.1), | |
| color: "primary.main", | |
| borderRadius: 1, | |
| fontSize: "0.875rem", | |
| fontWeight: 600, | |
| mr: 1, | |
| mb: 1, | |
| }} | |
| > | |
| {children} | |
| </Box> | |
| ); | |
| const TagCard = ({ title, description, tags, explanations }) => ( | |
| <Paper | |
| elevation={1} | |
| sx={{ | |
| p: 3, | |
| height: "100%", | |
| display: "flex", | |
| flexDirection: "column", | |
| borderRadius: 2, | |
| border: "1px solid", | |
| borderColor: "grey.200", | |
| }} | |
| > | |
| <Typography variant="h6" sx={{ fontWeight: 600, mb: 2 }}> | |
| {title} | |
| </Typography> | |
| {description && ( | |
| <Typography variant="body2" sx={{ mb: 2, color: "text.secondary" }}> | |
| {description} | |
| </Typography> | |
| )} | |
| <Box sx={{ flex: 1 }}> | |
| {tags.map((tag, index) => ( | |
| <Box key={index} sx={{ mb: 2 }}> | |
| <Tag>{tag}</Tag> | |
| {explanations && explanations[index] && ( | |
| <Typography | |
| variant="body2" | |
| sx={{ | |
| color: "text.secondary", | |
| mt: 1, | |
| display: "block", | |
| }} | |
| dangerouslySetInnerHTML={{ __html: explanations[index] }} | |
| /> | |
| )} | |
| </Box> | |
| ))} | |
| </Box> | |
| </Paper> | |
| ); | |
| const CodeBlock = ({ children }) => ( | |
| <Box | |
| sx={{ | |
| backgroundColor: (theme) => | |
| alpha( | |
| theme.palette.primary.main, | |
| theme.palette.mode === "dark" ? 0.15 : 0.05 | |
| ), | |
| px: 2, | |
| py: 4, | |
| borderRadius: 1, | |
| fontFamily: "monospace", | |
| mb: 2, | |
| position: "relative", | |
| "& .key": { | |
| color: (theme) => theme.palette.primary.main, | |
| }, | |
| "& .value": { | |
| color: (theme) => | |
| theme.palette.mode === "dark" | |
| ? theme.palette.success.light | |
| : theme.palette.success.dark, | |
| }, | |
| "& .comment": { | |
| color: (theme) => theme.palette.text.secondary, | |
| }, | |
| "& .punctuation": { | |
| color: (theme) => theme.palette.text.primary, | |
| }, | |
| }} | |
| > | |
| <InputLabel | |
| sx={{ | |
| position: "absolute", | |
| right: 8, | |
| top: 8, | |
| fontSize: "0.75rem", | |
| color: "text.secondary", | |
| fontFamily: "monospace", | |
| bgcolor: "background.paper", | |
| px: 1, | |
| py: 0.5, | |
| borderRadius: 1, | |
| border: "1px solid", | |
| borderColor: "divider", | |
| zIndex: 1, | |
| }} | |
| > | |
| README.md | |
| </InputLabel> | |
| {children} | |
| </Box> | |
| ); | |
| const getTagEmoji = (tag) => { | |
| const type = tag.split(":")[0]; | |
| const name = tag.split(":")[1]; | |
| const emojiMap = { | |
| submission: { | |
| automatic: "🤖", | |
| semiautomatic: "🔄", | |
| manual: "👨💻", | |
| closed: "🔒", | |
| }, | |
| test: { | |
| public: "👀", | |
| mix: "🔀", | |
| private: "🔐", | |
| rolling: "🎲", | |
| }, | |
| judge: { | |
| function: "⚙️", | |
| model: "🧠", | |
| humans: "👥", | |
| vibeCheck: "✨", | |
| }, | |
| modality: { | |
| text: "📝", | |
| image: "🖼️", | |
| audio: "🎵", | |
| video: "🎥", | |
| tools: "🛠️", | |
| artefacts: "🏺", | |
| embeddings: "🔤", | |
| }, | |
| eval: { | |
| generation: "✨", | |
| math: "🔢", | |
| code: "💻", | |
| reasoning: "🧠", | |
| performance: "⚡", | |
| safety: "🛡️", | |
| hallucination: "🌫️", | |
| }, | |
| task: { | |
| rag: "🔍", | |
| }, | |
| language: { | |
| english: "🇬🇧", | |
| french: "🇫🇷", | |
| yourOwnLanguage: "🌍", | |
| }, | |
| domain: { | |
| financial: "💰", | |
| medical: "⚕️", | |
| legal: "⚖️", | |
| biology: "🧬", | |
| translation: "🔄", | |
| chemistry: "🧪", | |
| physics: "⚛️", | |
| commercial: "🏢", | |
| }, | |
| }; | |
| return emojiMap[type]?.[name] || "🏷️"; | |
| }; | |
| const TagItem = ({ tag, explanation }) => { | |
| // Extract the name without prefix | |
| const name = tag.split(":")[1]; | |
| const emoji = getTagEmoji(tag); | |
| return ( | |
| <Paper | |
| elevation={0} | |
| sx={{ | |
| height: "100%", | |
| display: "flex", | |
| flexDirection: "column", | |
| borderRadius: 2, | |
| border: "1px solid", | |
| borderColor: "divider", | |
| overflow: "hidden", | |
| }} | |
| > | |
| <Box | |
| sx={{ | |
| bgcolor: (theme) => | |
| alpha( | |
| theme.palette.primary.main, | |
| theme.palette.mode === "dark" ? 0.15 : 0.05 | |
| ), | |
| py: 2, | |
| px: 2, | |
| borderRadius: 0, | |
| mb: 2, | |
| position: "relative", | |
| }} | |
| > | |
| <Typography | |
| variant="h6" | |
| sx={{ | |
| fontWeight: 700, | |
| color: "text.primary", | |
| letterSpacing: "-0.02em", | |
| pr: 5, | |
| textTransform: "capitalize", | |
| }} | |
| > | |
| {emoji} {name} | |
| </Typography> | |
| </Box> | |
| <Box sx={{ px: 2, pb: 2 }}> | |
| <Typography | |
| variant="body2" | |
| sx={{ | |
| color: "text.secondary", | |
| mb: 2, | |
| fontSize: "0.75rem", | |
| }} | |
| > | |
| <strong>{tag.split(":")[0]}</strong>:{tag.split(":")[1]} | |
| </Typography> | |
| {explanation && ( | |
| <Typography | |
| variant="body2" | |
| sx={{ | |
| color: "text.secondary", | |
| flex: 1, | |
| }} | |
| dangerouslySetInnerHTML={{ __html: explanation }} | |
| /> | |
| )} | |
| </Box> | |
| </Paper> | |
| ); | |
| }; | |
| const TagSection = ({ title, description, tags, explanations }) => { | |
| // Determine if this section should have 4 columns | |
| const shouldHaveFourColumns = [ | |
| "Submission type", | |
| "Test set status", | |
| "Judges", | |
| "Domain", | |
| ].includes(title); | |
| return ( | |
| <Box sx={{ mb: 8 }}> | |
| <Typography variant="h6" sx={{ fontWeight: 600, mb: 1 }}> | |
| {title} | |
| </Typography> | |
| {description && ( | |
| <Typography variant="body1" sx={{ mb: 4, color: "text.secondary" }}> | |
| {description} | |
| </Typography> | |
| )} | |
| <Grid container spacing={2}> | |
| {tags.map((tag, index) => ( | |
| <Grid | |
| item | |
| xs={12} | |
| sm={6} | |
| md={shouldHaveFourColumns ? 3 : 4} | |
| key={index} | |
| > | |
| <TagItem | |
| tag={tag} | |
| explanation={explanations ? explanations[index] : null} | |
| /> | |
| </Grid> | |
| ))} | |
| </Grid> | |
| </Box> | |
| ); | |
| }; | |
| const HowToSubmitPage = () => { | |
| return ( | |
| <Box sx={{ width: "100%", maxWidth: 1200, margin: "0 auto", padding: 4 }}> | |
| <PageHeader | |
| title="How to submit ?" | |
| subtitle={ | |
| <> | |
| Join the <span style={{ fontWeight: 600 }}>community</span> of{" "} | |
| <span style={{ fontWeight: 600 }}>"leaderboards on the Hub"</span> | |
| </> | |
| } | |
| /> | |
| <Section title="Configuration steps"> | |
| <Box | |
| sx={{ | |
| display: "flex", | |
| gap: 4, | |
| flexDirection: { xs: "column", md: "column", lg: "row" }, | |
| }} | |
| > | |
| <Stack spacing={4} sx={{ flex: { xs: "1 1 auto", md: "0 0 45%" } }}> | |
| <Stack spacing={3}> | |
| <Stack direction="row" spacing={2} alignItems="center"> | |
| <StepNumber number={1} /> | |
| <Typography | |
| variant="subtitle1" | |
| sx={{ | |
| fontWeight: 600, | |
| color: "text.primary", | |
| letterSpacing: "-0.01em", | |
| }} | |
| > | |
| Create a Space | |
| </Typography> | |
| </Stack> | |
| <Box sx={{ pl: 7 }}> | |
| <Typography variant="body2" color="text.secondary"> | |
| Your leaderboard must be hosted on a{" "} | |
| <Link | |
| href="https://huggingface.co/docs/hub/spaces" | |
| target="_blank" | |
| rel="noopener noreferrer" | |
| > | |
| Hugging Face Space | |
| </Link> | |
| . | |
| </Typography> | |
| </Box> | |
| </Stack> | |
| <Stack spacing={3}> | |
| <Stack direction="row" spacing={2} alignItems="center"> | |
| <StepNumber number={2} /> | |
| <Typography | |
| variant="subtitle1" | |
| sx={{ | |
| fontWeight: 600, | |
| color: "text.primary", | |
| letterSpacing: "-0.01em", | |
| }} | |
| > | |
| Add metadata | |
| </Typography> | |
| </Stack> | |
| <Box sx={{ pl: 7 }}> | |
| <Typography | |
| variant="body2" | |
| color="text.secondary" | |
| sx={{ mb: 2 }} | |
| > | |
| Like{" "} | |
| <Link | |
| href="https://huggingface.co/docs/hub/model-cards" | |
| target="_blank" | |
| rel="noopener noreferrer" | |
| > | |
| model cards | |
| </Link> | |
| , your Space's{" "} | |
| <InputLabel | |
| sx={{ | |
| display: "inline-flex", | |
| fontSize: "0.75rem", | |
| color: "text.secondary", | |
| fontFamily: "monospace", | |
| bgcolor: "background.paper", | |
| px: 1, | |
| py: 0.5, | |
| borderRadius: 1, | |
| border: "1px solid", | |
| borderColor: "divider", | |
| mx: 0.5, | |
| }} | |
| > | |
| README.md | |
| </InputLabel>{" "} | |
| file should include specific <strong>metadata</strong> in a | |
| YAML section at the top: | |
| </Typography> | |
| <ul | |
| style={{ | |
| margin: 0, | |
| paddingLeft: "20px", | |
| color: "text.secondary", | |
| }} | |
| > | |
| <li> | |
| <Typography | |
| variant="body2" | |
| color="text.secondary" | |
| sx={{ display: "flex", alignItems: "center", gap: 0.5 }} | |
| > | |
| Add either the <strong>leaderboard</strong> or{" "} | |
| <strong>arena</strong> tag | |
| <Tooltip | |
| title={ | |
| <Box sx={{ p: 1, maxWidth: 300 }}> | |
| <Typography | |
| variant="subtitle2" | |
| sx={{ | |
| mb: 1, | |
| fontWeight: 600, | |
| color: "text.secondary", | |
| }} | |
| > | |
| Choose between: | |
| </Typography> | |
| <Typography | |
| variant="body2" | |
| component="div" | |
| sx={{ mb: 1 }} | |
| > | |
| • <strong>arena</strong> - for human evaluations | |
| <br /> | |
| <Box component="span" sx={{ pl: 2 }}> | |
| requires <Tag>judge:humans</Tag> | |
| </Box> | |
| </Typography> | |
| <Typography variant="body2" component="div"> | |
| • <strong>leaderboard</strong> - for automated | |
| evaluations | |
| <br /> | |
| <Box component="span" sx={{ pl: 2 }}> | |
| with <Tag>judge:function</Tag> or{" "} | |
| <Tag>judge:model</Tag> | |
| </Box> | |
| </Typography> | |
| </Box> | |
| } | |
| arrow | |
| placement="right" | |
| componentsProps={{ | |
| tooltip: { | |
| sx: { | |
| bgcolor: "background.paper", | |
| color: "text.primary", | |
| "& .MuiTooltip-arrow": { | |
| color: "background.paper", | |
| }, | |
| boxShadow: (theme) => theme.shadows[2], | |
| }, | |
| }, | |
| }} | |
| > | |
| <IconButton | |
| size="small" | |
| sx={{ | |
| p: 0.5, | |
| color: "text.secondary", | |
| "&:hover": { | |
| color: "primary.main", | |
| bgcolor: (theme) => | |
| alpha(theme.palette.primary.main, 0.1), | |
| }, | |
| }} | |
| > | |
| <InfoOutlinedIcon sx={{ fontSize: "1rem" }} /> | |
| </IconButton> | |
| </Tooltip> | |
| </Typography> | |
| </li> | |
| <li> | |
| <Typography variant="body2" color="text.secondary"> | |
| Include a <strong>short_description</strong> field to | |
| explain the purpose of your evaluation | |
| </Typography> | |
| </li> | |
| <li> | |
| <Typography variant="body2" color="text.secondary"> | |
| Add <strong>metadata tags</strong> to categorize your | |
| evaluation (see examples on the right) | |
| </Typography> | |
| </li> | |
| </ul> | |
| </Box> | |
| </Stack> | |
| <Stack spacing={3}> | |
| <Stack direction="row" spacing={2} alignItems="center"> | |
| <StepNumber number={3} /> | |
| <Typography | |
| variant="subtitle1" | |
| sx={{ | |
| fontWeight: 600, | |
| color: "text.primary", | |
| letterSpacing: "-0.01em", | |
| }} | |
| > | |
| Get enough likes | |
| </Typography> | |
| </Stack> | |
| <Box sx={{ pl: 7 }}> | |
| <Typography | |
| variant="body2" | |
| color="text.secondary" | |
| sx={{ mb: { xs: 0, md: 0, lg: 3 } }} | |
| > | |
| To avoid indexing spaces{" "} | |
| <strong>which are not relevant to the community</strong> (for | |
| example, duplicates of pre-existing work), we require your | |
| space to collect <strong>at least 5 likes</strong>! | |
| </Typography> | |
| </Box> | |
| </Stack> | |
| </Stack> | |
| <Box sx={{ flex: 1 }}> | |
| <CodeBlock> | |
| --- | |
| <br /> | |
| <span className="key">short_description</span> | |
| <span className="punctuation">:</span>{" "} | |
| <span className="value"> | |
| Evaluating LLMs on math reasoning tasks | |
| </span> | |
| <br /> | |
| <span className="key">tags</span> | |
| <span className="punctuation">:</span> | |
| <br /> | |
| <span className="punctuation"> -</span>{" "} | |
| <span className="value">leaderboard</span> | |
| <span className="comment"> | |
| # | |
| Type of leaderboard | |
| </span> | |
| <br /> | |
| <span className="punctuation"> -</span>{" "} | |
| <span className="value">submission:automatic</span>{" "} | |
| <span className="comment"># How models are submitted</span> | |
| <br /> | |
| <span className="punctuation"> -</span>{" "} | |
| <span className="value">test:public</span>{" "} | |
| <span className="comment"> | |
| # Test set | |
| visibility | |
| </span> | |
| <br /> | |
| <span className="punctuation"> -</span>{" "} | |
| <span className="value">judge:function</span>{" "} | |
| <span className="comment"> | |
| # Evaluation method | |
| </span> | |
| <br /> | |
| <span className="punctuation"> -</span>{" "} | |
| <span className="value">modality:text</span>{" "} | |
| <span className="comment"> | |
| # Input/output type | |
| </span> | |
| <br /> | |
| <span className="punctuation"> -</span>{" "} | |
| <span className="value">language:english</span>{" "} | |
| <span className="comment"> | |
| # Language coverage | |
| </span> | |
| <br /> | |
| <span className="punctuation"> -</span>{" "} | |
| <span className="value">domain:financial</span>{" "} | |
| <span className="comment"> | |
| # Specific domain | |
| </span> | |
| <br /> | |
| --- | |
| </CodeBlock> | |
| </Box> | |
| </Box> | |
| </Section> | |
| <Section title="What do the tags mean?"> | |
| <TagSection | |
| title="Domain" | |
| description="Indicates the specific domain of the leaderboard" | |
| tags={[ | |
| "domain:medical", | |
| "domain:chemistry", | |
| "domain:physics", | |
| "domain:biology", | |
| "domain:financial", | |
| "domain:legal", | |
| "domain:commercial", | |
| "domain:translation", | |
| ]} | |
| /> | |
| <TagSection | |
| title="Modalities" | |
| description="Can be any (or several) of the following list" | |
| tags={[ | |
| "modality:text", | |
| "modality:image", | |
| "modality:audio", | |
| "modality:video", | |
| "modality:agent", | |
| "modality:artefacts", | |
| "modality:3d", | |
| ]} | |
| explanations={[ | |
| "", | |
| "", | |
| "", | |
| "", | |
| "requires added <strong>tool usage</strong> - mostly for <strong>assistant models</strong> (a bit outside of usual modalities)", | |
| "the leaderboard concerns itself with <strong>machine learning artefacts</strong> as themselves, for example, quality evaluation of <strong>text embeddings</strong>", | |
| "", | |
| ]} | |
| /> | |
| <TagSection | |
| title="Evaluation categories" | |
| description="Can be any (or several) of the following list" | |
| tags={[ | |
| "eval:generation", | |
| "eval:math", | |
| "eval:code", | |
| "eval:reasoning", | |
| "eval:performance", | |
| "eval:safety", | |
| "eval:hallucination", | |
| "eval:rag", | |
| ]} | |
| explanations={[ | |
| "the evaluation looks at <strong>generation capabilities</strong> specifically (can be image generation, text generation, ...)", | |
| "the evaluation tests <strong>math abilities</strong>", | |
| "the evaluation tests <strong>coding capabilities</strong>", | |
| "the evaluation tests <strong>reasoning abilities</strong>", | |
| "model <strong>performance</strong> (speed, energy consumption, ...)", | |
| "the evaluation considers <strong>safety</strong>, <strong>toxicity</strong>, <strong>bias</strong>", | |
| "the evaluation measures the model's tendency to <strong>hallucinate</strong> or generate <strong>false information</strong>", | |
| "the evaluation tests <strong>RAG</strong> (Retrieval-Augmented Generation) capabilities", | |
| ]} | |
| /> | |
| <TagSection | |
| title="Language" | |
| description="You can indicate the languages covered by your benchmark like so: language:mylanguage." | |
| tags={[ | |
| "language:english", | |
| "language:french", | |
| "language:your own language", | |
| ]} | |
| explanations={[ | |
| "", | |
| "", | |
| "At the moment, we do not support language codes, please use the language name in English.", | |
| ]} | |
| /> | |
| <TagSection | |
| title="Submission type" | |
| description="Arenas are not concerned by this category." | |
| tags={[ | |
| "submission:automatic", | |
| "submission:semiautomatic", | |
| "submission:manual", | |
| "submission:closed", | |
| ]} | |
| explanations={[ | |
| "users can submit their models as such to the leaderboard, and evaluation is run <strong>automatically</strong> without human intervention", | |
| "the leaderboard requires the <strong>model owner</strong> to run evaluations on his side and submit the results", | |
| "the leaderboard requires the <strong>leaderboard owner</strong> to run evaluations for new submissions", | |
| "the leaderboard <strong>does not accept</strong> submissions at the moment", | |
| ]} | |
| /> | |
| <TagSection | |
| title="Test set status" | |
| description="Arenas are not concerned by this category." | |
| tags={["test:public", "test:mix", "test:private", "test:rolling"]} | |
| explanations={[ | |
| "all the test sets used are <strong>public</strong>, the evaluations are completely <strong>reproducible</strong>", | |
| "some test sets are <strong>public</strong> and some <strong>private</strong>", | |
| "all the test sets used are <strong>private</strong>, the evaluations are hard to game", | |
| "the test sets used <strong>change regularly</strong> through time and evaluation scores are refreshed", | |
| ]} | |
| /> | |
| <TagSection | |
| title="Judges" | |
| tags={[ | |
| "judge:function", | |
| "judge:model", | |
| "judge:humans", | |
| "judge:vibe check", | |
| ]} | |
| explanations={[ | |
| "evaluations are run <strong>automatically</strong>, using an evaluation suite such as <strong>lm_eval</strong> or <strong>lighteval</strong>", | |
| "evaluations are run using a <strong>model as a judge</strong> approach to rate answer", | |
| "evaluations are <strong>done by humans</strong> to rate answer - <strong>this is an arena</strong>", | |
| "evaluations are <strong>done manually</strong> by one or several humans", | |
| ]} | |
| /> | |
| <Typography | |
| variant="body2" | |
| sx={{ | |
| mt: 3, | |
| color: "text.secondary", | |
| fontSize: "0.875rem", | |
| fontStyle: "italic", | |
| }} | |
| > | |
| If you would like to see a tag that is not currently represented, | |
| please contact{" "} | |
| <Link | |
| href="https://huggingface.co/clementine" | |
| target="_blank" | |
| rel="noopener noreferrer" | |
| sx={{ | |
| color: "primary.main", | |
| textDecoration: "none", | |
| "&:hover": { | |
| textDecoration: "underline", | |
| }, | |
| }} | |
| > | |
| Clémentine Fourrier | |
| </Link>{" "} | |
| on Hugging Face. | |
| </Typography> | |
| </Section> | |
| </Box> | |
| ); | |
| }; | |
| export default HowToSubmitPage; | |