""" Display constants for DeathMath Leaderboard UI. Includes text content and CSS/JS styling. """ TITLE = """

DeathMath Leaderboard

Оценка моделей на сложных математических и физических задачах

""" INTRODUCTION_TEXT = """ # DeathMath Benchmark DeathMath - это бенчмарк для оценки способности моделей решать сложные математические и физические задачи на русском языке. ## Что оценивает бенчмарк? - **RussianMath Score**: Оценка способности решать математические задачи на русском языке - **RussianPhysics Score**: Оценка способности решать задачи по физике на русском языке - **Combined Score**: Общая оценка (среднее математики и физики) """ LLM_BENCHMARKS_TEXT = """ ## Как запустить бенчмарк DeathMath Для оценки вашей модели на бенчмарке DeathMath вам нужно: ### Установка Клонируйте репозиторий DeathMath и установите необходимые зависимости: ```bash git clone https://github.com/DeathMath/benchmark.git cd DeathMath pip install -r requirements.txt ``` ### Запуск Для запуска оценки используйте скрипт runner.py: ```bash python runner.py --config configs/run.yaml --model your_model_name_or_path ``` ### Формат результатов После выполнения оценки, результаты будут сохранены в директории `results/`. Вам нужно будет подготовить JSON файл с результатами в следующем формате: ```json { "score": 0.586, "math_score": 0.8, "physics_score": 0.373, "total_tokens": 1394299, "evaluation_time": 4533.2, "system_prompt": "Вы - полезный помощник по математике и физике. Ответьте на русском языке." } ``` ### Загрузка результатов Загрузите полученный JSON файл через вкладку "Submit Model" на этом лидерборде. ### Политика против читерства При обнаружении попыток манипуляции результатами или модификации выходного файла, мы оставляем за собой право удалить ваш результат из лидерборда. """ THEME_DETECTION_JS = """ """ LEADERBOARD_CSS = """ /* ======================================== OKLCH Color Variables ======================================== */ :root { --radius: 0.65rem; --background: oklch(1 0 0); --foreground: oklch(0.141 0.005 285.823); --card: oklch(1 0 0); --card-foreground: oklch(0.141 0.005 285.823); --popover: oklch(1 0 0); --popover-foreground: oklch(0.141 0.005 285.823); --primary: oklch(0.648 0.2 131.684); --primary-foreground: oklch(0.986 0.031 120.757); --secondary: oklch(0.967 0.001 286.375); --secondary-foreground: oklch(0.21 0.006 285.885); --muted: oklch(0.967 0.001 286.375); --muted-foreground: oklch(0.552 0.016 285.938); --accent: oklch(0.967 0.001 286.375); --accent-foreground: oklch(0.21 0.006 285.885); --destructive: oklch(0.577 0.245 27.325); --border: oklch(0.92 0.004 286.32); --input: oklch(0.92 0.004 286.32); --ring: oklch(0.841 0.238 128.85); --chart-1: oklch(0.871 0.15 154.449); --chart-2: oklch(0.723 0.219 149.579); --chart-3: oklch(0.627 0.194 149.214); --chart-4: oklch(0.527 0.154 150.069); --chart-5: oklch(0.448 0.119 151.328); --sidebar: oklch(0.985 0 0); --sidebar-foreground: oklch(0.141 0.005 285.823); --sidebar-primary: oklch(0.648 0.2 131.684); --sidebar-primary-foreground: oklch(0.986 0.031 120.757); --sidebar-accent: oklch(0.967 0.001 286.375); --sidebar-accent-foreground: oklch(0.21 0.006 285.885); --sidebar-border: oklch(0.92 0.004 286.32); --sidebar-ring: oklch(0.841 0.238 128.85); } .dark { --background: oklch(0.141 0.005 285.823); --foreground: oklch(0.985 0 0); --card: oklch(0.21 0.006 285.885); --card-foreground: oklch(0.985 0 0); --popover: oklch(0.21 0.006 285.885); --popover-foreground: oklch(0.985 0 0); --primary: oklch(0.648 0.2 131.684); --primary-foreground: oklch(0.986 0.031 120.757); --secondary: oklch(0.274 0.006 286.033); --secondary-foreground: oklch(0.985 0 0); --muted: oklch(0.274 0.006 286.033); --muted-foreground: oklch(0.705 0.015 286.067); --accent: oklch(0.274 0.006 286.033); --accent-foreground: oklch(0.985 0 0); --destructive: oklch(0.704 0.191 22.216); --border: oklch(1 0 0 / 10%); --input: oklch(1 0 0 / 15%); --ring: oklch(0.405 0.101 131.063); --chart-1: oklch(0.871 0.15 154.449); --chart-2: oklch(0.723 0.219 149.579); --chart-3: oklch(0.627 0.194 149.214); --chart-4: oklch(0.527 0.154 150.069); --chart-5: oklch(0.448 0.119 151.328); --sidebar: oklch(0.21 0.006 285.885); --sidebar-foreground: oklch(0.985 0 0); --sidebar-primary: oklch(0.768 0.233 130.85); --sidebar-primary-foreground: oklch(0.986 0.031 120.757); --sidebar-accent: oklch(0.274 0.006 286.033); --sidebar-accent-foreground: oklch(0.985 0 0); --sidebar-border: oklch(1 0 0 / 10%); --sidebar-ring: oklch(0.405 0.101 131.063); } /* ======================================== Base Styles ======================================== */ * { border-color: var(--border); } body, .gradio-container { background-color: var(--background) !important; color: var(--foreground) !important; } .gradio-container { width: 100% !important; max-width: 100% !important; } .leaderboard-content { max-width: 1400px; margin: 0 auto; padding: 0 24px; } .leaderboard-header { text-align: center; margin-bottom: 32px; } /* ======================================== Typography ======================================== */ .markdown-text { font-size: 16px !important; color: var(--foreground); } h1, h2, h3, h4, h5, h6 { color: var(--foreground) !important; } .centered-title { text-align: center !important; margin-bottom: 8px; } /* ======================================== Tab Buttons ======================================== */ .tab-buttons { display: flex !important; justify-content: center !important; align-items: center !important; } .tab-buttons button { font-size: 18px; color: var(--foreground); border-radius: var(--radius); transition: all 0.2s ease; padding: 12px 24px; border: 1px solid var(--border); background: var(--background); } .tab-buttons button:hover { background: var(--accent); color: var(--accent-foreground); box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1); } .tab-buttons button.selected { background: var(--primary); color: var(--primary-foreground); border-color: var(--primary); box-shadow: 0 2px 12px rgba(0, 0, 0, 0.15); } /* ======================================== Card Design for Leaderboard ======================================== */ .leaderboard-card { background: var(--card); border: 1px solid var(--border); border-radius: calc(var(--radius) + 4px); padding: 24px; margin: 16px 0; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05), 0 4px 16px rgba(0, 0, 0, 0.03); transition: box-shadow 0.2s ease; } .dark .leaderboard-card { box-shadow: 0 2px 8px rgba(0, 0, 0, 0.3), 0 4px 16px rgba(0, 0, 0, 0.2); } .leaderboard-card:hover { box-shadow: 0 4px 12px rgba(0, 0, 0, 0.08), 0 6px 20px rgba(0, 0, 0, 0.05); } .dark .leaderboard-card:hover { box-shadow: 0 4px 12px rgba(0, 0, 0, 0.4), 0 6px 20px rgba(0, 0, 0, 0.3); } /* ======================================== Centered Search Bar (Transparent Background) ======================================== */ .search-box-centered { max-width: 1200px; width: 85%; margin: 0 auto 24px auto; } .search-box-centered, .search-box-centered > *, .search-box-centered .gr-box, .search-box-centered label { background: transparent !important; border: none !important; } .search-box-centered input, .search-box-centered textarea { width: 100%; padding: 12px 16px !important; font-size: 15px !important; background: var(--background) !important; border: 1px solid var(--border) !important; border-radius: var(--radius) !important; color: var(--foreground) !important; transition: all 0.2s ease; text-align: center; overflow-y: hidden !important; resize: none !important; } .dark .search-box-centered input, .dark .search-box-centered textarea { background: var(--card) !important; } .search-box-centered input:focus, .search-box-centered textarea:focus { border-color: var(--ring) !important; box-shadow: 0 0 0 3px rgba(0, 0, 0, 0.08) !important; outline: none !important; } .search-box-centered input::placeholder, .search-box-centered textarea::placeholder { color: var(--primary) !important; text-align: center; font-weight: 500; opacity: 0.85; } /* ======================================== Table Styling with Top Model Highlighting ======================================== */ .dataframe-container, .dataframe { border: 1px solid var(--border) !important; border-radius: var(--radius) !important; overflow: hidden !important; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05) !important; } table { width: 100%; border-collapse: collapse; background: var(--card); overflow: hidden; } table thead { background: var(--muted); color: var(--muted-foreground); } table thead th { padding: 6px 10px; font-weight: 600; text-align: left; font-size: 12px; text-transform: uppercase; letter-spacing: 0.03em; border-bottom: 2px solid var(--border); } table tbody tr { border-bottom: 1px solid var(--border); transition: background-color 0.2s ease; height: 34px; max-height: 34px; } table tbody tr:hover { background: var(--accent); } table tbody td { padding: 6px 10px; font-size: 12px; color: var(--foreground); line-height: 1.4; overflow: hidden; text-overflow: ellipsis; vertical-align: middle; white-space: nowrap; } table td, table th { outline: none !important; cursor: default !important; } table td:focus, table th:focus, table td:active, table th:active { outline: none !important; box-shadow: none !important; } table tbody tr td:not(:first-child)::before, table tbody tr td:not(:first-child)::after { display: none !important; content: none !important; visibility: hidden !important; } table tbody tr td::before, table tbody tr td::after { pointer-events: none !important; } table tbody tr td > div::before, table tbody tr td > div::after, table tbody tr td > span::before, table tbody tr td > span::after { display: none !important; visibility: hidden !important; } table tbody tr td { user-select: none !important; -webkit-user-select: none !important; -moz-user-select: none !important; } .cell-wrap::before, .cell-wrap::after { display: none !important; content: none !important; visibility: hidden !important; } .cell-wrap * { pointer-events: none !important; } table tbody tr td > *[style*="position: absolute"], table tbody tr td > *[style*="position:absolute"] { display: none !important; } .selection-button, .selection-button-row, .selection-button-col, button[class*="selection-button"] { display: none !important; visibility: hidden !important; opacity: 0 !important; pointer-events: none !important; } table td:first-child, table th:first-child { max-width: 400px; overflow: hidden; text-overflow: ellipsis; white-space: nowrap !important; } table tbody tr td:first-child::before { display: inline !important; white-space: nowrap !important; } table tbody tr:nth-child(1) { background: linear-gradient(90deg, var(--chart-1) 0%, transparent 100%); background-size: 8px 100%; background-repeat: no-repeat; background-position: left center; border-left: 4px solid var(--chart-1); font-weight: 600; } table tbody tr:nth-child(2) { background: linear-gradient(90deg, var(--chart-2) 0%, transparent 100%); background-size: 8px 100%; background-repeat: no-repeat; background-position: left center; border-left: 4px solid var(--chart-2); font-weight: 600; } table tbody tr:nth-child(3) { background: linear-gradient(90deg, var(--chart-3) 0%, transparent 100%); background-size: 8px 100%; background-repeat: no-repeat; background-position: left center; border-left: 4px solid var(--chart-3); font-weight: 600; } table tbody tr { background-color: var(--card); } table tbody td:nth-child(2), table tbody td:nth-child(3), table tbody td:nth-child(4) { font-weight: 600; font-variant-numeric: tabular-nums; } table tbody td:last-child { max-width: 200px; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; } /* ======================================== Buttons and Interactive Elements ======================================== */ button, .gr-button { background: var(--primary); color: var(--primary-foreground); border: none; border-radius: var(--radius); padding: 10px 20px; font-size: 15px; font-weight: 500; cursor: pointer; transition: all 0.2s ease; box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); } button:hover, .gr-button:hover { background: var(--primary); filter: brightness(1.1); box-shadow: 0 4px 8px rgba(0, 0, 0, 0.15); transform: translateY(-1px); } button:active, .gr-button:active { transform: translateY(0); box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); } button.secondary { background: var(--secondary); color: var(--secondary-foreground); } /* ======================================== Input Fields ======================================== */ input, textarea, select { background: var(--input); color: var(--foreground); border: 1px solid var(--border); border-radius: var(--radius); padding: 10px 14px; font-size: 15px; transition: all 0.2s ease; } input:focus, textarea:focus, select:focus { outline: none; border-color: var(--ring); box-shadow: 0 0 0 3px rgba(0, 0, 0, 0.05); } /* ======================================== Responsive Design ======================================== */ @media (max-width: 900px) { .gradio-container { max-width: 100% !important; padding: 12px !important; } .leaderboard-card { padding: 16px; } .tab-buttons button { font-size: 16px; padding: 10px 16px; } table thead th, table tbody td { padding: 10px 12px; font-size: 14px; } table td:first-child, table th:first-child { max-width: 250px; } } @media (max-width: 600px) { table { font-size: 13px; } table thead th, table tbody td { padding: 8px 10px; } } /* ======================================== Utility Classes ======================================== */ .text-muted { color: var(--muted-foreground); } .text-primary { color: var(--primary); } .bg-card { background: var(--card); color: var(--card-foreground); } .border { border: 1px solid var(--border); } .rounded { border-radius: var(--radius); } .shadow { box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05); } .shadow-lg { box-shadow: 0 4px 16px rgba(0, 0, 0, 0.1); } /* ======================================== Animations ======================================== */ @keyframes fadeIn { from { opacity: 0; transform: translateY(10px); } to { opacity: 1; transform: translateY(0); } } .fade-in { animation: fadeIn 0.3s ease-out; } * { transition: background-color 0.3s ease, color 0.3s ease, border-color 0.3s ease; } """ __all__ = ["INTRODUCTION_TEXT", "LLM_BENCHMARKS_TEXT", "LEADERBOARD_CSS", "THEME_DETECTION_JS"]