import { useState, useEffect } from 'react' import { PrimeReactProvider } from 'primereact/api' import 'primereact/resources/themes/lara-light-cyan/theme.css' import ModelTable from './components/ModelTable' import LanguageTable from './components/LanguageTable' import DatasetTable from './components/DatasetTable' import WorldMap from './components/WorldMap' import AutoComplete from './components/AutoComplete' import LanguagePlot from './components/LanguagePlot' import SpeakerPlot from './components/SpeakerPlot' import HistoryPlot from './components/HistoryPlot' import LanguageTierHistoryPlot from './components/LanguageTierHistoryPlot' import LicenseHistoryPlot from './components/LicenseHistoryPlot' import CostPlot from './components/CostPlot' import { Carousel } from 'primereact/carousel' import { Dialog } from 'primereact/dialog' import { Button } from 'primereact/button' function App () { const [data, setData] = useState(null) const [baseData, setBaseData] = useState(null) const [loading, setLoading] = useState(true) const [modelTableLoading, setModelTableLoading] = useState(false) const [error, setError] = useState(null) const [selectedLanguages, setSelectedLanguages] = useState([]) const [machineTranslatedMetrics, setMachineTranslatedMetrics] = useState([]) const [dialogVisible, setDialogVisible] = useState(false) const [aboutVisible, setAboutVisible] = useState(false) const [contributeVisible, setContributeVisible] = useState(false) // Add state for carousel items const [carouselItems, setCarouselItems] = useState([]) const [fullScreenCarouselItems, setFullScreenCarouselItems] = useState([]) useEffect(() => { // For initial load, use main loading state; for language changes, use model table loading if (!data) { setLoading(true) } else { setModelTableLoading(true) } fetch('/api/data', { method: 'POST', body: JSON.stringify({ selectedLanguages }) }) .then(response => { if (!response.ok) { throw new Error('Network response was not ok') } return response.json() }) .then(jsonData => { setData(jsonData) setMachineTranslatedMetrics(jsonData.machine_translated_metrics || []) if (!baseData) setBaseData(jsonData) setLoading(false) setModelTableLoading(false) }) .catch(err => { setError(err.message) setLoading(false) setModelTableLoading(false) }) }, [selectedLanguages]) // Create carousel items when data is loaded useEffect(() => { if (data) { // Add a small delay to ensure components are ready const timer = setTimeout(() => { setCarouselItems([ , , , , , , ]); }, 100); return () => clearTimeout(timer); } }, [data, baseData]) const [windowWidth, setWindowWidth] = useState(window.innerWidth) const [windowHeight, setWindowHeight] = useState(window.innerHeight) useEffect(() => { const handleResize = () => { setWindowWidth(window.innerWidth) setWindowHeight(window.innerHeight) } window.addEventListener('resize', handleResize) return () => window.removeEventListener('resize', handleResize) }, []) // Create full-screen carousel items when data or window size changes useEffect(() => { if (data) { const timer = setTimeout(() => { setFullScreenCarouselItems([ , , , , , , ]); }, 100); return () => clearTimeout(timer); } }, [data, baseData, windowWidth, windowHeight]) return (
{/*
Work in Progress: This dashboard is currently under active development. Evaluation results are not yet final. More extensive evaluation runs will be released later this year.
*/}
🌍

AI Language Benchmarks

AI model evaluations for every language in the world

{data && ( setSelectedLanguages(items)} /> )}
{loading && (
)} {error && (

Error: {error}

)} {data && ( <>
{modelTableLoading && (
)}
)}
{/* About Dialog */} setAboutVisible(false)} style={{ width: '600px' }} modal header='About this tool' >

languagebench provides AI model evaluations for every language in the world.

👥 Who is this for?

  • Practitioners can pick the best model for a given language.
  • Policymakers and funders can identify and prioritize neglected languages.
  • Model developers can compete on our benchmarks.

⚡ Live Updates

Benchmark results automatically refresh every night and include the most popular models from{' '} OpenRouter , plus community-submitted models.

⚠️ Note on interpretation

Results are currently based on a sample of 10 sentences per language and task to keep computation affordable. For this reason, we report confidence intervals and recommend treating small differences between models with caution. In future iterations, we plan to add more benchmark datasets and richer visualisations, with large-scale evaluations across many more prompts and tasks as a longer-term goal.

✍️ Authors

languagebench is a collaboration between BMZ's{' '} Data Lab ( David Pomerenke ), the BMZ-Initiative{' '} GIZ Fair Forward {' '} ( Jonas Nothnagel ), and the{' '} E&E group {' '} of DFKI's Multilinguality and Language Technology Lab.

🔗 Links

View source code on GitHub

{/* Contribute Dialog */} setContributeVisible(false)} style={{ width: '600px' }} modal header='Contribute your Model/Benchmark' >

🚀 Submit Your Model

Have a custom fine-tuned model you'd like to see on the leaderboard or a new benchmark you think should be added?

→ Submit your model here

🔧 Contribute to Development

Help us expand language coverage and add new evaluation tasks:

→ Contribution guidelines

{/* Full-screen Dialog for Charts */} setDialogVisible(false)} style={{ width: '90vw', height: '90vh' }} maximizable modal header={null} > {fullScreenCarouselItems.length > 0 && (
item} circular={false} activeIndex={0} style={{ width: '100%', height: 'calc(90vh - 120px)' }} />
)}
) } export default App