import React from 'react' import styled from 'styled-components' import AgeGroup from '../assets/Age_group.png' import SEGGroup from '../assets/SEG_group.png' import EthGroup from '../assets/Ethnicity.png' import FAWGroup from '../assets/FAW.png' import Dateset from '../assets/Dataset_tableHQ.png' import ClusterGroup from '../assets/Cluster.png' import ScoreCard from '../styled_components/Scorecard' import { Button } from 'primereact/button'; import { media } from '../Utils/helper' function Dataset() { return (

Dataset Overview

Introduction We use Meta's FairSpeech dataset to conduct fairness audits of speech recognition models submitted to our leaderboard. This dataset was specifically designed to address fairness gaps across diverse demographic groups. The complete FairSpeech dataset includes 26,471 utterances recorded by 593 individuals across the United States. Participants self-identified their personal information, including age, gender, ethnicity, geographic location, and whether they consider themselves native English speakers. For our leaderboard evaluation, we use a stratified 10% sample from the FairSpeech dataset. We run inference using these test samples to evaluate the fairness of submitted models across different demographic groups. The original dataset spans seven domains: music, capture, utilities, notification control, messaging, calling, and dictation. In response to these domain-specific prompts, participants recorded audio commands such as searching for songs or making plans to meet friends.
Our Testing Strategy: Stratified Sampling For our leaderboard evaluations, we employ stratified sampling to select a representative 10% subset from the FairSpeech dataset. Stratified sampling is critical in Automatic Speech Recognition (ASR) fairness testing to ensure that test sets reflect the overall dataset's distribution. This approach ensures demographic factors, background noise conditions, and linguistic diversity are properly represented, allowing us to evaluate model robustness and fairness across varied populations.
Age Group by Gender Distribution
Socioeconomic Group by Gender Distribution
Ethnicity Distribution
Frequently Used Words
) } export default Dataset; const Container = styled.div` display: flex; flex-direction: column; padding: 2rem 0rem; padding-top: 0; margin-inline: auto; max-width: 1200px; /* To control the width */ `; const Gridbox = styled.div` display: grid; margin: auto; grid-template-columns: 1fr 1fr; grid-gap: 2rem 4rem; padding: 2rem 0; @media ${media.tablet} { grid-gap: 3rem; } @media ${media.mobile} { display: flex; flex-direction: column; gap: 1.2rem; } `; const Section = styled.section` margin-bottom: 3rem; `; const Head = styled.h6` font-size: 2rem; color: #3b82f6; margin-bottom: 1rem; text-decoration: underline; `; const Image = styled.img` max-width: 80%; display: block; margin: 1rem auto; @media ${media.tablet} { max-width: 90%; } @media ${media.mobile} { max-width: 100%; } `; const Para = styled.p` color: #4b5563; font-size: 1.4rem; line-height: 1.8; margin-bottom: ${props => props.mb || 2}rem; text-align: justify; `;