Spaces:

davebulaval
/

cole

Running

App Files Files Community

COLE CI commited on Mar 22

Commit

3906683

0 Parent(s):

deploy to HF Space

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +2 -0
.gitignore +66 -0
.idea/.gitignore +8 -0
CITATION.cff +40 -0
CODE_OF_CONDUCT.md +35 -0
Dockerfile +55 -0
LICENSE +21 -0
README.md +105 -0
SECURITY.md +15 -0
docs/architecture.md +172 -0
frontend/README.md +38 -0
frontend/cole.pdf +3 -0
frontend/eslint.config.mjs +14 -0
frontend/jsconfig.json +7 -0
frontend/next.config.mjs +4 -0
frontend/package-lock.json +0 -0
frontend/package.json +27 -0
frontend/postcss.config.mjs +5 -0
frontend/src/app/FAQ/page.js +53 -0
frontend/src/app/benchmarks/page.js +132 -0
frontend/src/app/components/BigBlueButton.js +17 -0
frontend/src/app/components/ClientHeader.js +17 -0
frontend/src/app/components/CodeBlock.js +10 -0
frontend/src/app/components/ErrorMessage.js +14 -0
frontend/src/app/components/LanguageSwitcher.js +24 -0
frontend/src/app/components/Modal.js +30 -0
frontend/src/app/components/ModalManager.js +26 -0
frontend/src/app/components/SubmitForm.js +177 -0
frontend/src/app/components/taskbar.js +117 -0
frontend/src/app/contact/page.js +32 -0
frontend/src/app/en/translation.json +162 -0
frontend/src/app/fr/translation.json +162 -0
frontend/src/app/globals.css +28 -0
frontend/src/app/guide/page.js +76 -0
frontend/src/app/i18n.js +28 -0
frontend/src/app/layout.js +49 -0
frontend/src/app/leaderboard/page.js +344 -0
frontend/src/app/leaderboard/util.js +47 -0
frontend/src/app/page.js +74 -0
frontend/src/app/papers/page.js +47 -0
frontend/src/app/resources/ResourcesPaths.js +2 -0
frontend/src/app/results/[id]/page.js +129 -0
frontend/src/app/results/page.js +31 -0
nginx.conf +34 -0
pytest.ini +2 -0
src/__init__.py +6 -0
src/backend/__init__.py +0 -0
src/backend/evaluation.py +36 -0
src/backend/results/leaderboard.json +0 -0
src/backend/submission_api.py +250 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ *.jsonl filter=lfs diff=lfs merge=lfs -text
2	+ *.pdf filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,66 @@

+.idea/COLE.iml
+.idea/inspectionProfiles/profiles_settings.xml
+.idea/misc.xml
+.idea/modules.xml
+.idea/vcs.xml
+.idea/workspace.xml
+.idea/*
+/Benchmarks
+/__pycache__
+src/config.py
+/Loaded_Models
+/results
+/hf_data
+COLE_nlu_benchmark_site/node_modules
+COLE_nlu_benchmark_site/.pnp
+COLE_nlu_benchmark_site/.pnp.*
+COLE_nlu_benchmark_site/.yarn/*
+!.yarn/patches
+!.yarn/plugins
+!.yarn/releases
+!.yarn/versions
+# testing
+/coverage
+# next.js
+COLE_nlu_benchmark_site/.next/
+COLE_nlu_benchmark_site/out/
+# production
+COLE_nlu_benchmark_site/build
+# misc
+.DS_Store
+*.pem
+# debug
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+.pnpm-debug.log*
+# env files (can opt-in for committing if needed)
+.env*
+# vercel
+.vercel
+# typescript
+*.tsbuildinfo
+next-env.d.ts
+/src/results
+/src/__pycache__
+/src/backend/__pycache__
+/archives/Models/__pycache__
+*.pyc
+/Benchmarks_data
+/src/light_eval_custom/results
+/offline_evaluation/results
+/frontend/node_modules
+/.idea/*
+/frontend/.next
+/src/backend/results
+/node_modules

.idea/.gitignore ADDED Viewed

	@@ -0,0 +1,8 @@

+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml

CITATION.cff ADDED Viewed

	@@ -0,0 +1,40 @@

+cff-version: 1.2.0
+title: "COLE: a Comprehensive Benchmark for French Language Understanding Evaluation"
+message: "If you use COLE in your research, please cite our paper."
+type: software
+authors:
+  - given-names: David
+    family-names: Beauchemin
+    affiliation: Université Laval
+  - given-names: Yan
+    family-names: Tremblay
+    affiliation: Université Laval
+  - given-names: Mohamed Amine
+    family-names: Youssef
+    affiliation: Université Laval
+  - given-names: Richard
+    family-names: Khoury
+    affiliation: Université Laval
+repository-code: "https://github.com/GRAAL-Research/COLE"
+url: "https://colebenchmark.org"
+license: MIT
+version: 1.0.0
+date-released: "2025-10-07"
+preferred-citation:
+  type: article
+  title: "COLE: a Comprehensive Benchmark for French Language Understanding Evaluation"
+  authors:
+    - given-names: David
+      family-names: Beauchemin
+    - given-names: Yan
+      family-names: Tremblay
+    - given-names: Mohamed Amine
+      family-names: Youssef
+    - given-names: Richard
+      family-names: Khoury
+  year: 2025
+  url: "https://arxiv.org/abs/2510.05046"
+  identifiers:
+    - type: other
+      value: "arXiv:2510.05046"
+      description: arXiv preprint

CODE_OF_CONDUCT.md ADDED Viewed

	@@ -0,0 +1,35 @@

+# Contributor Covenant Code of Conduct
+## Our Pledge
+We as members, contributors, and leaders pledge to make participation in our
+community a harassment-free experience for everyone, regardless of age, body
+size, visible or invisible disability, ethnicity, sex characteristics, gender
+identity and expression, level of experience, education, socio-economic status,
+nationality, personal appearance, race, religion, or sexual identity
+and orientation.
+## Our Standards
+Examples of behavior that contributes to a positive environment:
+* Using welcoming and inclusive language
+* Being respectful of differing viewpoints and experiences
+* Gracefully accepting constructive criticism
+* Focusing on what is best for the community
+Examples of unacceptable behavior:
+* Trolling, insulting or derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information without explicit permission
+* Other conduct which could reasonably be considered inappropriate
+## Enforcement
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported to the project team at david.beauchemin@ift.ulaval.ca.
+## Attribution
+This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org/), version 2.1.

Dockerfile ADDED Viewed

	@@ -0,0 +1,55 @@

+# Stage 1: Build frontend
+FROM node:20-slim AS frontend-build
+WORKDIR /app/frontend
+COPY frontend/package*.json ./
+RUN npm ci
+COPY frontend/ ./
+RUN npm run build
+# Stage 2: Final image with backend + built frontend
+FROM python:3.12-slim
+WORKDIR /app
+# Install system dependencies (nginx, curl, Node.js runtime for Next.js)
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    nginx \
+    curl \
+    netcat-openbsd \
+    && curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
+    && apt-get install -y --no-install-recommends nodejs \
+    && rm -rf /var/lib/apt/lists/*
+# Install Python dependencies
+COPY src/docker_requirements.txt /app/src/
+RUN pip install --no-cache-dir --upgrade pip wheel \
+    && pip install --no-cache-dir --prefer-binary pyarrow pandas numpy scipy fsspec aiohttp tqdm \
+    && pip install --no-cache-dir --prefer-binary -r /app/src/docker_requirements.txt
+# Copy backend source
+COPY src/ /app/src/
+# Copy built frontend from stage 1
+COPY --from=frontend-build /app/frontend /app/frontend
+# Copy config files
+COPY nginx.conf /etc/nginx/nginx.conf
+COPY start.sh /start.sh
+RUN chmod +x /start.sh
+# Create non-root user and set permissions
+RUN useradd -m -u 1000 user \
+    && mkdir -p /app/.cache /var/lib/nginx /var/log/nginx /app/logs /run \
+    && touch /run/nginx.pid \
+    && chown -R user:user /app /var/lib/nginx /var/log/nginx /run
+ENV HF_HOME=/app/.cache \
+    HF_DATASETS_CACHE=/app/.cache
+USER user
+EXPOSE 7860
+HEALTHCHECK --interval=30s --timeout=10s --retries=3 \
+  CMD curl -f http://localhost:7860/ || exit 1
+CMD ["sh", "/start.sh"]

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2025 GRAAL Research Group, Université Laval
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md ADDED Viewed

	@@ -0,0 +1,105 @@

+---
+title: COLE !
+emoji: 🐳
+colorFrom: purple
+colorTo: gray
+sdk: docker
+app_port: 7860
+---
+# COLE: Comprehensive Benchmark for Quebec French Language Understanding Evaluation
+[![Website](https://img.shields.io/badge/Website-colebenchmark.org-blue)](https://colebenchmark.org/)
+[![Paper](https://img.shields.io/badge/Paper-arXiv%3A2510.05046-b31b1b)](https://arxiv.org/abs/2510.05046)
+[![Dataset](https://img.shields.io/badge/Dataset-HuggingFace-ffd21e)](https://huggingface.co/datasets/graalul/COLE-public)
+**COLE** is a comprehensive benchmark for evaluating Quebec French Natural Language Understanding (NLU). It includes 23 diverse tasks covering sentiment analysis, paraphrase detection, natural language inference, question answering, grammatical judgment, word sense disambiguation, and more — with a particular focus on linguistic phenomena relevant to the French language.
+We benchmark 94 large language models (LLMs), providing an extensive analysis of the current state of Quebec French NLU. Our results highlight a significant performance gap between closed- and open-weight models and identify key challenging frontiers such as zero-shot extractive question answering, fine-grained word sense disambiguation, and understanding of regional language variations.
+## Links
+- **Leaderboard**: [colebenchmark.org](https://colebenchmark.org/)
+- **Paper**: [COLE: a Comprehensive Benchmark for Quebec French Language Understanding Evaluation (arXiv:2510.05046)](https://arxiv.org/abs/2510.05046)
+- **Dataset**: [HuggingFace — graalul/COLE-public](https://huggingface.co/datasets/graalul/COLE-public)
+## Tasks
+COLE consists of 23 tasks grouped by NLU capability:
+### Sentiment Analysis
+| Task | Description | Test size |
+|------|-------------|-----------|
+| **Allocine** | Sentiment classification of French movie reviews (positive/negative) | 20,000 |
+| **MMS-fr** | Sentiment analysis with 3 classes (positive, neutral, negative) | 63,190 |
+### Natural Language Inference (NLI)
+| Task | Description | Test size |
+|------|-------------|-----------|
+| **FraCaS** | NLI involving quantifiers, plurality, anaphora, and ellipsis | 346 |
+| **GQNLI-fr** | NLI with quantifier logic (e.g., most, at least, more than half) | 30 |
+| **LingNLI** | NLI corpus constructed with a linguist in the loop | 4,893 |
+| **MNLI-nineeleven-Fr-MT** | French machine-translated MNLI using 9/11 context | 2,000 |
+| **RTE3-Fr** | French version of RTE3 for textual entailment | 3,121 |
+| **SICK-fr** | Sentence pair relatedness and entailment | 4,906 |
+| **XNLI-fr** | Cross-lingual NLI in French | 5,010 |
+### Question Answering
+| Task | Description | Test size |
+|------|-------------|-----------|
+| **FQuAD** | Extractive QA on high-quality French Wikipedia articles | 400 |
+| **Fr-BoolQ** | Boolean question answering in French | 178 |
+| **PIAF** | French extractive QA pairs | 384 |
+### Paraphrase Detection
+| Task | Description | Test size |
+|------|-------------|-----------|
+| **PAWS-X** | Paraphrase identification from sentence pairs | 2,000 |
+| **QFrBLiMP** | Semantic equivalence detection between sentence pairs | 2,290 |
+### Grammatical Judgment
+| Task | Description | Test size |
+|------|-------------|-----------|
+| **DACCORD** | Semantic plausibility of French sentences (binary) | 1,034 |
+| **MultiBLiMP-Fr** | Grammatical correctness from minimal pairs | 77 |
+| **QFrCoLA** | Sentence acceptability in French (grammar, syntax) | 7,546 |
+### Semantic Similarity
+| Task | Description | Test size |
+|------|-------------|-----------|
+| **STS22** | Document-level similarity of multilingual news articles | 72 |
+### Word Sense Disambiguation
+| Task | Description | Test size |
+|------|-------------|-----------|
+| **WSD-Fr** | Disambiguating verb meanings in context | 3,121 |
+### Quebec French
+| Task | Description | Test size |
+|------|-------------|-----------|
+| **QFrCoRE** | Matching Quebec French expressions to standard definitions | 4,633 |
+| **QFrCoRT** | Matching Quebec French terms to standard definitions | 201 |
+### Coreference / Pronoun Resolution
+| Task | Description | Test size |
+|------|-------------|-----------|
+| **Wino-X-LM** | Pronoun resolution with ambiguous referents | 2,793 |
+| **Wino-X-MT** | Translation-based pronoun resolution with gendered pronouns | 2,988 |
+## Language
+All data in COLE is in **French**.
+## Citation
+If you use COLE in your research, please cite our paper:
+```bibtex
+@article{beauchemin2025cole,
+  title={COLE: a Comprehensive Benchmark for Quebec French Language Understanding Evaluation},
+  author={Beauchemin, David and Tremblay, Yan and Youssef, Mohamed Amine and Khoury, Richard},
+  journal={arXiv preprint arXiv:2510.05046},
+  year={2025},
+  url={https://arxiv.org/abs/2510.05046}
+}
+```

SECURITY.md ADDED Viewed

	@@ -0,0 +1,15 @@

+# Security Policy
+## Supported Versions
+| Version | Supported          |
+| ------- | ------------------ |
+| 1.0.x   | :white_check_mark: |
+## Reporting a Vulnerability
+If you discover a security vulnerability, please report it responsibly by emailing **david.beauchemin@ift.ulaval.ca**.
+Please do **not** open a public GitHub issue for security vulnerabilities.
+We will acknowledge your report within 48 hours and provide a timeline for a fix.

docs/architecture.md ADDED Viewed

	@@ -0,0 +1,172 @@

+# COLE Architecture
+## System Overview
+COLE runs as a single Docker container with three services behind an nginx reverse proxy, deployed on HuggingFace Spaces.
+```mermaid
+graph LR
+    User([User]) -->|:7860| Nginx
+    subgraph Docker Container
+        Nginx -->|/api/*| FastAPI[FastAPI :8000]
+        Nginx -->|/*| NextJS[Next.js :8001]
+        FastAPI -->|reads| HF[(HuggingFace\ngraalul/COLE)]
+        FastAPI -->|writes| Results[(results/*.json)]
+    end
+```
+## Backend (FastAPI)
+### API Endpoints
+```mermaid
+graph TD
+    subgraph API
+        POST[POST /submit] -->|ZIP upload| Validate[Validate format]
+        Validate -->|OK| Evaluate[Evaluate predictions]
+        Evaluate -->|Save| JSON[results/uuid.json]
+        GET_LB[GET /leaderboard] -->|Read all| JSON
+        GET_H[GET /health] -->|200| OK[status: healthy]
+    end
+```
+| Endpoint | Method | Description |
+|----------|--------|-------------|
+| `/submit` | POST | Upload predictions ZIP, evaluate, save results |
+| `/leaderboard` | GET | Return all submissions with metrics |
+| `/health` | GET | Health check |
+### Security
+- **Rate limiting**: 5 submissions/minute per IP (slowapi)
+- **ZIP validation**: Max 50MB compressed, 200MB decompressed
+- **Input validation**: Email (max 320 chars, must contain @), display name (max 200 chars)
+- **CORS**: Open origins (proxied through nginx)
+### Key Modules
+```mermaid
+graph TD
+    API[submission_api.py] --> VT[validation_tools.py]
+    API --> ST[submit_tools.py]
+    API --> EV[evaluation.py]
+    VT --> TN[task_names.py]
+    EV --> TF[task_factory.py]
+    TF --> T[task.py]
+    T --> MF[metric_factory.py]
+    T --> DS[dataset.py]
+    MF --> MW[metrics_wrapper.py]
+    MF --> FQ[fquad_metric.py]
+    DS --> HF[(HuggingFace)]
+```
+## Frontend (Next.js)
+### Pages
+```mermaid
+graph LR
+    subgraph Pages
+        Home[/ Home]
+        Guide[/guide]
+        FAQ[/FAQ]
+        Contact[/contact]
+        Papers[/papers]
+        Benchmarks[/benchmarks]
+        Leaderboard[/leaderboard]
+        Results[/results/id]
+    end
+    subgraph Features
+        i18n[EN/FR i18n]
+        Responsive[Mobile responsive]
+        Pagination[Leaderboard pagination]
+        Submit[ZIP submission modal]
+    end
+```
+| Page | Description |
+|------|-------------|
+| `/` | What is COLE, links to paper and GLUE/SuperGLUE |
+| `/guide` | How to train, test, and format submissions |
+| `/FAQ` | 6 questions with code formatting support |
+| `/benchmarks` | 23 tasks organized by 9 NLU categories |
+| `/leaderboard` | Sortable table, 25/page, loading skeleton, error states |
+| `/papers` | Embedded arxiv PDF viewer |
+| `/results/[id]` | Per-submission detailed results |
+| `/contact` | Email contact |
+### i18n
+Full English and French translations in `frontend/src/app/en/translation.json` and `fr/translation.json`. Language switcher in the header persists selection to localStorage.
+## Evaluation Pipeline
+### Task Flow
+```mermaid
+graph TD
+    Submit[User submits ZIP] --> Unzip[Extract predictions.json]
+    Unzip --> Validate[Validate task names & format]
+    Validate --> Factory[task_factory creates Task objects]
+    Factory --> Compute[Task.compute per task]
+    Compute --> Dataset[Load ground truths from HF]
+    Compute --> Metric[metric_factory selects metric]
+    Metric --> Score[Compute score]
+    Score --> Save[Save results JSON]
+```
+### Tasks (30 total)
+Grouped by capability:
+| Category | Tasks |
+|----------|-------|
+| Sentiment | allocine, mms |
+| NLI | fracas, gqnli, lingnli, mnli-nineeleven-fr-mt, rte3-french, sickfr, xnli, daccord |
+| QA | fquad, french_boolq, piaf |
+| Paraphrase | paws_x, qfrblimp |
+| Grammar | multiblimp, qfrcola |
+| Similarity | sts22 |
+| WSD | wsd |
+| Quebec French | qfrcore, qfrcort |
+| Coreference | wino_x_lm, wino_x_mt |
+| Other | frcoe, timeline, lqle, qccp, qccy, qccr, piqafr, piqaqfr |
+### Metrics
+| Metric | Implementation | Used by |
+|--------|---------------|---------|
+| Accuracy | HuggingFace `evaluate` | Most classification tasks |
+| Pearson | HuggingFace `evaluate` | sickfr, sts22 |
+| FQuAD | Custom (F1 + Exact Match) | fquad, piaf |
+| ExactMatch | Custom string comparison | wsd |
+| F1 | HuggingFace `evaluate` | Classification variants |
+## CI/CD Pipeline
+```mermaid
+graph TD
+    Push[git push to main] --> F[Formatting\nblack --check]
+    Push --> L[Linting\npylint src/ tests/]
+    Push --> T[Tests\npytest]
+    Push --> FB[Frontend Build\nnpm ci + lint + build]
+    Push --> HF[HF Sync\nDeploy to Space]
+    F -->|Python 3.12| Pass
+    L -->|Python 3.10-3.12| Pass
+    T -->|Python 3.12\nHF_TOKEN required| Pass
+    FB -->|Node 20| Pass
+    HF -->|Orphan branch\nLFS for .jsonl/.pdf| Space[davebulaval/cole]
+```
+## Deployment
+The HF Space deployment uses an orphan branch strategy to handle large `.jsonl` files in git history:
+1. Checkout main with LFS
+2. Create fresh orphan branch
+3. Track `.jsonl` and `.pdf` with Git LFS
+4. Remove CI/test files not needed in production
+5. Force push to `davebulaval/cole` Space
+The Space builds the Docker image and runs the container with nginx on port 7860.

frontend/README.md ADDED Viewed

	@@ -0,0 +1,38 @@

+This is a [Next.js](https://nextjs.org) project bootstrapped with [`create-next-app`](https://github.com/vercel/next.js/tree/canary/packages/create-next-app).
+## Getting Started
+First, run the development server:
+```bash
+npm install
+npm run dev
+# or
+yarn dev
+# or
+pnpm dev
+# or
+bun dev
+```
+Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
+You can start editing the page by modifying `app/page.js`. The page auto-updates as you edit the file.
+This project uses [`next/font`](https://nextjs.org/docs/app/building-your-application/optimizing/fonts) to automatically optimize and load [Geist](https://vercel.com/font), a new font family for Vercel.
+## Learn More
+To learn more about Next.js, take a look at the following resources:
+- [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API.
+- [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial.
+You can check out [the Next.js GitHub repository](https://github.com/vercel/next.js) - your feedback and contributions are welcome!
+## Deploy on Vercel
+The easiest way to deploy your Next.js app is to use the [Vercel Platform](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme) from the creators of Next.js.
+Check out our [Next.js deployment documentation](https://nextjs.org/docs/app/building-your-application/deploying) for more details.

frontend/cole.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a5a63b843b58da52d57907935421416d7cd72cb351220577e24bbed4056c2b0
+size 463232

frontend/eslint.config.mjs ADDED Viewed

	@@ -0,0 +1,14 @@

+import { dirname } from "path";
+import { fileURLToPath } from "url";
+import { FlatCompat } from "@eslint/eslintrc";
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+const compat = new FlatCompat({
+  baseDirectory: __dirname,
+});
+const eslintConfig = [...compat.extends("next/core-web-vitals")];
+export default eslintConfig;

frontend/jsconfig.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "compilerOptions": {
+    "paths": {
+      "@/*": ["./src/*"]
+    }
+  }
+}

frontend/next.config.mjs ADDED Viewed

	@@ -0,0 +1,4 @@

+/** @type {import('next').NextConfig} */
+const nextConfig = {};
+export default nextConfig;

frontend/package-lock.json ADDED Viewed

The diff for this file is too large to render. See raw diff

frontend/package.json ADDED Viewed

	@@ -0,0 +1,27 @@

+{
+  "name": "cole_nlu_benchmark",
+  "version": "0.1.0",
+  "private": true,
+  "scripts": {
+    "dev": "next dev --turbopack",
+    "build": "next build",
+    "start": "next start",
+    "lint": "next lint"
+  },
+  "dependencies": {
+    "i18next": "^25.10.3",
+    "i18next-browser-languagedetector": "^8.2.1",
+    "lucide-react": "^0.523.0",
+    "next": "15.3.3",
+    "react": "^19.2.4",
+    "react-dom": "^19.2.4",
+    "react-i18next": "^13.0.0"
+  },
+  "devDependencies": {
+    "@eslint/eslintrc": "^3",
+    "@tailwindcss/postcss": "^4",
+    "eslint": "^9",
+    "eslint-config-next": "^15.3.3",
+    "tailwindcss": "^4"
+  }
+}

frontend/postcss.config.mjs ADDED Viewed

	@@ -0,0 +1,5 @@

+const config = {
+  plugins: ["@tailwindcss/postcss"],
+};
+export default config;

frontend/src/app/FAQ/page.js ADDED Viewed

	@@ -0,0 +1,53 @@

+'use client';
+import '../i18n';
+import { useState } from 'react';
+import { useTranslation } from 'react-i18next';
+export default function FAQ() {
+  const { t } = useTranslation();
+  const faqs = t('faqs', { returnObjects: true });
+  const [openIndex, setOpenIndex] = useState(null);
+  const toggle = (index) => {
+    setOpenIndex(openIndex === index ? null : index);
+  };
+  return (
+    <div className="max-w-5xl mx-auto px-6 py-3">
+      <h2 className="text-3xl font-bold text-center text-blue-700 border-b pb-4 mb-10">
+        {t('faq_title')}
+      </h2>
+      <div className="space-y-4">
+        {faqs.map((faq, i) => (
+          <div
+            key={i}
+            className="p-6 bg-white border border-gray-200 rounded-lg shadow-sm transition"
+          >
+            <button
+              className="w-full text-left text-xl font-semibold text-gray-800 flex justify-between items-center"
+              onClick={() => toggle(i)}
+            >
+              <span>{`${i + 1}. ${faq.question}`}</span>
+              <span className="text-2xl text-gray-500">
+                {openIndex === i ? '▴' : '▾'}
+              </span>
+            </button>
+            {openIndex === i && (
+              <p className="mt-4 text-gray-600 text-sm">
+                {faq.answer.split(/(<code>.*?<\/code>)/g).map((part, j) =>
+                  part.startsWith('<code>') ? (
+                    <code key={j}>{part.replace(/<\/?code>/g, '')}</code>
+                  ) : (
+                    part
+                  )
+                )}
+              </p>
+            )}
+          </div>
+        ))}
+      </div>
+    </div>
+  );
+}

frontend/src/app/benchmarks/page.js ADDED Viewed

	@@ -0,0 +1,132 @@

+'use client';
+import '../i18n';
+import { useTranslation } from 'react-i18next';
+import Link from 'next/link';
+const categories = [
+  {
+    key: 'benchmarks_category_sentiment',
+    benchmarks: [
+      { titleKey: 'benchmark_alloCine_title', descKey: 'benchmark_alloCine_description', link: 'https://huggingface.co/datasets/CATIE-AQ/allocine_fr_prompt_sentiment_analysis', metrics: 'Accuracy' },
+      { titleKey: 'benchmark_mms_title', descKey: 'benchmark_mms_description', link: 'https://huggingface.co/datasets/Brand24/mms', metrics: 'Accuracy' },
+    ],
+  },
+  {
+    key: 'benchmarks_category_nli',
+    benchmarks: [
+      { titleKey: 'benchmark_fracas_title', descKey: 'benchmark_fracas_description', link: 'https://huggingface.co/datasets/maximoss/fracas', metrics: 'Accuracy' },
+      { titleKey: 'benchmark_gqnli_title', descKey: 'benchmark_gqnli_description', link: 'https://huggingface.co/datasets/maximoss/gqnli-fr', metrics: 'Accuracy' },
+      { titleKey: 'benchmark_lingnli_title', descKey: 'benchmark_lingnli_description', link: 'https://huggingface.co/datasets/maximoss/lingnli-multi-mt', metrics: 'Accuracy' },
+      { titleKey: 'benchmark_mnli_nineeleven_fr_mt_title', descKey: 'benchmark_mnli_nineeleven_fr_mt_description', link: 'https://huggingface.co/datasets/maximoss/mnli-nineeleven-fr-mt', metrics: 'Accuracy' },
+      { titleKey: 'benchmark_rte3_french_title', descKey: 'benchmark_rte3_french_description', link: 'https://huggingface.co/datasets/maximoss/rte3-french', metrics: 'Accuracy' },
+      { titleKey: 'benchmark_sickfr_title', descKey: 'benchmark_sickfr_description', link: 'https://huggingface.co/datasets/Lajavaness/SICK-fr', metrics: 'Pearson' },
+      { titleKey: 'benchmark_xnli_title', descKey: 'benchmark_xnli_description', link: 'https://github.com/facebookresearch/XNLI', metrics: 'Accuracy' },
+      { titleKey: 'benchmark_daccord_title', descKey: 'benchmark_daccord_description', link: 'https://huggingface.co/datasets/maximoss/daccord-contradictions', metrics: 'Accuracy' },
+    ],
+  },
+  {
+    key: 'benchmarks_category_qa',
+    benchmarks: [
+      { titleKey: 'benchmark_fquad_title', descKey: 'benchmark_fquad_description', link: 'https://arxiv.org/pdf/2002.06071', metrics: 'F1 Score, Exact Match Ratio' },
+      { titleKey: 'benchmark_french_boolq_title', descKey: 'benchmark_french_boolq_description', link: 'https://huggingface.co/datasets/manu/french_boolq', metrics: 'Accuracy' },
+      { titleKey: 'benchmark_piaf_title', descKey: 'benchmark_piaf_description', link: 'https://aclanthology.org/2020.lrec-1.673/', metrics: 'F1 Score, Exact Match Ratio' },
+    ],
+  },
+  {
+    key: 'benchmarks_category_paraphrase',
+    benchmarks: [
+      { titleKey: 'benchmark_paws_title', descKey: 'benchmark_paws_description', link: 'https://huggingface.co/datasets/google-research-datasets/paws-x', metrics: 'Accuracy' },
+      { titleKey: 'benchmark_qfrblimp_title', descKey: 'benchmark_qfrblimp_description', link: 'https://github.com/davebulaval/FrBLiMP', metrics: 'Accuracy' },
+    ],
+  },
+  {
+    key: 'benchmarks_category_grammar',
+    benchmarks: [
+      { titleKey: 'benchmark_multiblimp_title', descKey: 'benchmark_multiblimp_description', link: 'https://huggingface.co/datasets/jumelet/multiblimp', metrics: 'Accuracy' },
+      { titleKey: 'benchmark_qfrcola_title', descKey: 'benchmark_qfrcola_description', link: 'https://github.com/davebulaval/qfrcola', metrics: 'Accuracy' },
+    ],
+  },
+  {
+    key: 'benchmarks_category_similarity',
+    benchmarks: [
+      { titleKey: 'benchmark_sts22_title', descKey: 'benchmark_sts22_description', link: 'https://huggingface.co/datasets/mteb/sts22-crosslingual-sts/viewer/fr', metrics: 'Pearson' },
+    ],
+  },
+  {
+    key: 'benchmarks_category_wsd',
+    benchmarks: [
+      { titleKey: 'benchmark_wsd_title', descKey: 'benchmark_wsd_description', link: 'https://huggingface.co/datasets/GETALP/flue', metrics: 'Exact Match Ratio' },
+    ],
+  },
+  {
+    key: 'benchmarks_category_quebec',
+    benchmarks: [
+      { titleKey: 'benchmark_qfrcore_title', descKey: 'benchmark_qfrcore_description', link: '', metrics: 'Accuracy' },
+      { titleKey: 'benchmark_qfrcort_title', descKey: 'benchmark_qfrcort_description', link: '', metrics: 'Accuracy' },
+    ],
+  },
+  {
+    key: 'benchmarks_category_coreference',
+    benchmarks: [
+      { titleKey: 'benchmark_wino_x_lm_title', descKey: 'benchmark_wino_x_lm_description', link: 'https://huggingface.co/datasets/demelin/wino_x/viewer/lm_en_fr?views%5B%5D=lm_en_fr', metrics: 'Accuracy' },
+      { titleKey: 'benchmark_wino_x_mt_title', descKey: 'benchmark_wino_x_mt_description', link: 'https://huggingface.co/datasets/demelin/wino_x/viewer/mt_en_fr', metrics: 'Accuracy' },
+    ],
+  },
+];
+export default function Benchmarks() {
+  const { t } = useTranslation();
+  return (
+    <div>
+      <div className="max-w-5xl mx-auto px-2 py-3">
+        <p className="text-1.5xl text-left text-gray-800">
+          {t('benchmarksIntro')}
+        </p>
+      </div>
+      <div className="space-y-10">
+        {categories.map((cat) => (
+          <div key={cat.key}>
+            <h2 className="text-xl font-bold text-blue-700 mb-4 border-l-4 border-blue-600 pl-3">
+              {t(cat.key)}
+            </h2>
+            <div className="space-y-4">
+              {cat.benchmarks.map((b) => (
+                <Benchmark
+                  key={b.titleKey}
+                  title={t(b.titleKey)}
+                  link={b.link}
+                  description={t(b.descKey)}
+                  metrics={b.metrics}
+                />
+              ))}
+            </div>
+          </div>
+        ))}
+      </div>
+    </div>
+  );
+}
+function Benchmark({ title, description, metrics, link }) {
+  const { t } = useTranslation();
+  return (
+    <div className="p-6 bg-white border border-gray-200 rounded-lg shadow-sm">
+      <h3 className="text-xl font-semibold text-blue-700 mb-2 border-b-2 border-blue-500 inline-block">
+        {link ? (
+          <Link href={link} className="hover:underline">
+            {title}
+          </Link>
+        ) : (
+          title
+        )}
+      </h3>
+      <p className="text-gray-700 mb-2">{description}</p>
+      <p className="text-sm text-gray-500">
+        <span className="font-medium">{t('metrics')}</span> {metrics}
+      </p>
+    </div>
+  );
+}

frontend/src/app/components/BigBlueButton.js ADDED Viewed

	@@ -0,0 +1,17 @@

+export default function BigBlueButton({ children, onClick, disabled }) {
+  return (
+    <button
+      onClick={onClick}
+      disabled={disabled}
+      className={`px-4 py-2 text-white text-base font-medium rounded-md shadow-sm focus:outline-none focus:ring-2 ${
+        disabled
+          ? "bg-gray-400 cursor-not-allowed"
+          : "bg-blue-500 hover:bg-blue-600 focus:ring-blue-300"
+      }`}
+    >
+      {children}
+    </button>
+  );
+}

frontend/src/app/components/ClientHeader.js ADDED Viewed

	@@ -0,0 +1,17 @@

+'use client';
+import '../i18n';
+import { useTranslation } from 'react-i18next';
+import Taskbar from './taskbar';
+import { LanguageSwitcher } from './LanguageSwitcher';
+export default function ClientHeader() {
+  useTranslation();
+  return (
+    <header className="flex items-center justify-between px-4 py-3 shadow">
+      <Taskbar />
+      <LanguageSwitcher />
+    </header>
+  );
+}

frontend/src/app/components/CodeBlock.js ADDED Viewed

	@@ -0,0 +1,10 @@

+export default function CodeBlock({children}){
+    return (
+        <pre className="bg-gray-100 p-4 rounded-md overflow-x-auto text-sm text-gray-800 mt-4">
+          <code className="font-mono">
+            {children}
+          </code>
+        </pre>
+    );
+};

frontend/src/app/components/ErrorMessage.js ADDED Viewed

	@@ -0,0 +1,14 @@

+export default function ErrorMessage({children,condition}){
+return(
+    <div className="pt-2">
+        <div className="pt-2 space-y-2">
+            {condition && (
+        <div className="text-red-600 text-sm font-medium">
+            {children}
+        </div>
+        )}
+        </div>
+    </div>
+);
+}

frontend/src/app/components/LanguageSwitcher.js ADDED Viewed

	@@ -0,0 +1,24 @@

+'use client';
+import {useTranslation} from 'react-i18next';
+export function LanguageSwitcher() {
+    const {i18n} = useTranslation();
+    const currentLang = i18n.language?.startsWith('fr') ? 'fr' : 'en';
+    const btnStyle = (lng) =>
+        lng === currentLang
+            ? 'px-2 py-1 rounded-lg border-2 border-blue-600 bg-blue-600 text-white font-semibold'
+            : 'px-2 py-1 rounded-lg border border-gray-300 text-gray-500 hover:border-gray-400';
+    return (
+        <div className="flex space-x-1">
+            <button onClick={() => i18n.changeLanguage('en')} className={btnStyle('en')}>
+                EN
+            </button>
+            <button onClick={() => i18n.changeLanguage('fr')} className={btnStyle('fr')}>
+                FR
+            </button>
+        </div>
+    );
+}

frontend/src/app/components/Modal.js ADDED Viewed

	@@ -0,0 +1,30 @@

+"use client";
+import BigBlueButton from "./BigBlueButton";
+import { useTranslation } from "react-i18next";
+export default function Modal({ children, onClose }) {
+  const { t } = useTranslation();
+  return (
+    <div
+      className="fixed inset-0 bg-gray-600 bg-opacity-25 overflow-y-auto h-full w-full flex items-center justify-center z-50"
+      style={{ backgroundColor: 'rgba(75, 85, 99, 0.55)' }}
+      onClick={(e) => { if (e.target === e.currentTarget) onClose(); }}
+      onKeyDown={(e) => { if (e.key === 'Escape') onClose(); }}
+      role="dialog"
+      aria-modal="true"
+      tabIndex={-1}
+    >
+      <div className="p-8 border w-96 shadow-lg rounded-md bg-white">
+        <div className="text-center text-black">
+          {children}
+          <div className="flex justify-center mt-4">
+            <BigBlueButton onClick={onClose}>
+              {t('close')}
+            </BigBlueButton>
+          </div>
+        </div>
+      </div>
+    </div>
+  );
+}

frontend/src/app/components/ModalManager.js ADDED Viewed

	@@ -0,0 +1,26 @@

+'use client';
+import { useSearchParams, useRouter } from 'next/navigation';
+import Modal from './Modal';
+import SubmitForm from './SubmitForm';
+export default function ModalManager() {
+  const searchParams = useSearchParams();
+  const submitModal = searchParams.get("show") === "submit";
+  const router = useRouter();
+  const handleClose = () => {
+    const newUrl = window.location.pathname;
+    router.push(newUrl);
+  };
+  return (
+    <>
+      {submitModal && (
+        <Modal onClose={handleClose}>
+          <SubmitForm />
+        </Modal>
+      )}
+    </>
+  );
+}

frontend/src/app/components/SubmitForm.js ADDED Viewed

	@@ -0,0 +1,177 @@

+'use client';
+import { useState } from "react";
+import { useRouter } from "next/navigation";
+import ErrorMessage from "./ErrorMessage";
+import { BACKEND_ADDRESS } from "@/app/resources/ResourcesPaths";
+import { Trans } from 'react-i18next';
+import BigBlueButton from "./BigBlueButton";
+import { useTranslation } from 'react-i18next';
+export default function SubmitForm() {
+  const { t } = useTranslation();
+  const router = useRouter();
+  const [requiredVisible, setRequiredVisible] = useState(false);
+  const [email, setEmail] = useState('');
+  const [displayName, setDisplayName] = useState('');
+  const [file, setFile] = useState(null);
+  const [isSubmitting, setIsSubmitting] = useState(false);
+  const [submitStatus, setSubmitStatus] = useState(null); // 'success' | 'error'
+  const [errorMessage, setErrorMessage] = useState('');
+  const [submissionId, setSubmissionId] = useState(null);
+  const handleFileChange = (e) => {
+    setFile(e.target.files[0]);
+  };
+  const submitResults = async () => {
+    if (!email || !displayName || !file) {
+      setRequiredVisible(true);
+      return;
+    }
+    if (!file.name.toLowerCase().endsWith('.zip')) {
+      alert(t('submit_zipAlert'));
+      return;
+    }
+    setRequiredVisible(false);
+    setIsSubmitting(true);
+    const formData = new FormData();
+    formData.append('email', email);
+    formData.append('display_name', displayName);
+    formData.append('predictions_zip', file);
+    try {
+      const res = await fetch(`${BACKEND_ADDRESS}/submit`, {
+        method: "POST",
+        body: formData,
+      });
+      if (!res.ok) {
+        const err = await res.json().catch(() => null);
+        throw new Error(err?.detail || `HTTP ${res.status}`);
+      }
+      const json = await res.json();
+      const id = json.submission_id;
+      setSubmissionId(id);
+      localStorage.setItem('last_result_file', `${id}.json`);
+      localStorage.setItem('just_submitted', 'true');
+      setSubmitStatus('success');
+    } catch (err) {
+      setErrorMessage(err.message);
+      setSubmitStatus('error');
+    } finally {
+      setIsSubmitting(false);
+    }
+  };
+  const renderModal = () => {
+    if (submitStatus === 'success') {
+      return (
+        <div className="fixed inset-0 flex items-center justify-center bg-black bg-opacity-50">
+          <div className="bg-white p-6 rounded-2xl shadow-lg max-w-sm text-center">
+            <h3 className="text-xl font-semibold text-green-600">
+              {t('submit_successTitle')}
+            </h3>
+            <p className="mt-2">{t('submit_successMessage')}</p>
+            <BigBlueButton
+              className="mt-4 px-4 py-2 rounded-full shadow hover:shadow-md"
+              onClick={() => router.push(`/results/${submissionId}`)}
+            >
+              {t('submit_checkResults')}
+            </BigBlueButton>
+          </div>
+        </div>
+      );
+    }
+    if (submitStatus === 'error') {
+      return (
+        <div className="fixed inset-0 flex items-center justify-center bg-black bg-opacity-50">
+          <div className="bg-white p-6 rounded-2xl shadow-lg max-w-sm text-center">
+            <h3 className="text-xl font-semibold text-red-600">
+              {t('submit_errorTitle')}
+            </h3>
+            <p className="mt-2">
+              <Trans i18nKey="submit_errorMessage" values={{ errorMessage }}>
+                Submission error: {{ errorMessage }}
+              </Trans>
+            </p>
+            <button
+              className="mt-4 px-4 py-2 rounded-full shadow hover:shadow-md"
+              onClick={() => setSubmitStatus(null)}
+            >
+              {t('submit_closeButton')}
+            </button>
+          </div>
+        </div>
+      );
+    }
+    return null;
+  };
+  return (
+    <div className="relative">
+      <div className="space-y-6 bg-white rounded-xl shadow-md p-6 w-full max-w-xl mx-auto border border-gray-200">
+        <h2 className="text-2xl font-semibold text-gray-800 text-center">
+          {t('submit_formTitle')}
+        </h2>
+        <div className="space-y-2">
+          <label htmlFor="email" className="block text-sm font-medium text-gray-700">
+            {t('submit_labelEmail')}
+          </label>
+          <input
+            id="email"
+            type="email"
+            placeholder={t('submit_placeholderEmail')}
+            value={email}
+            onChange={(e) => setEmail(e.target.value)}
+            className="border border-gray-300 p-3 rounded-md w-full focus:ring-2 focus:ring-blue-500"
+          />
+        </div>
+        <div className="space-y-2">
+          <label htmlFor="displayname" className="block text-sm font-medium text-gray-700">
+            {t('submit_labelDisplayName')}
+          </label>
+          <input
+            id="displayname"
+            type="text"
+            placeholder={t('submit_placeholderDisplayName')}
+            value={displayName}
+            onChange={(e) => setDisplayName(e.target.value)}
+            className="border border-gray-300 p-3 rounded-md w-full focus:ring-2 focus:ring-blue-500"
+          />
+        </div>
+        <div className="space-y-2">
+          <label htmlFor="zipfile" className="block text-sm font-medium text-gray-700">
+            {t('submit_labelZip')}
+          </label>
+          <input
+            id="zipfile"
+            type="file"
+            accept=".zip"
+            onChange={handleFileChange}
+            className="border border-gray-300 p-3 rounded-md w-full focus:ring-2 focus:ring-blue-500"
+          />
+        </div>
+        <ErrorMessage condition={requiredVisible}>
+          ⚠️ Email, display name & ZIP are required.
+        </ErrorMessage>
+        <button
+          onClick={submitResults}
+          disabled={isSubmitting}
+          className="w-full bg-blue-600 text-white py-3 rounded-xl hover:bg-blue-700 mt-4"
+        >
+          {isSubmitting ? t('submit_submitting') : t('submit_button')}
+        </button>
+        {renderModal()}
+      </div>
+    </div>
+  );
+}

frontend/src/app/components/taskbar.js ADDED Viewed

	@@ -0,0 +1,117 @@

+'use client';
+import '../i18n';
+import Link from 'next/link';
+import { usePathname } from 'next/navigation';
+import { FileText, Menu, X } from 'lucide-react';
+import { useTranslation } from 'react-i18next';
+import { useState } from 'react';
+export default function Taskbar() {
+  const { t } = useTranslation();
+  const pathname = usePathname();
+  const [menuOpen, setMenuOpen] = useState(false);
+  const linkStyle = (path) =>
+    (pathname === path || (path !== '/' && pathname.startsWith(path)))
+      ? 'text-blue-600 font-semibold border-b-2 border-blue-600 pb-1'
+      : 'text-gray-700 hover:text-blue-500';
+  const mobileLink = (path) =>
+    (pathname === path || (path !== '/' && pathname.startsWith(path)))
+      ? 'block text-blue-600 font-semibold py-2'
+      : 'block text-gray-700 hover:text-blue-500 py-2';
+  const links = (
+    <>
+      <Link href="/guide" className={linkStyle('/guide')} onClick={() => setMenuOpen(false)}>
+        {t('nav_guide')}
+      </Link>
+      <Link href="/FAQ" className={linkStyle('/FAQ')} onClick={() => setMenuOpen(false)}>
+        {t('nav_faq')}
+      </Link>
+      <Link href="/contact" className={linkStyle('/contact')} onClick={() => setMenuOpen(false)}>
+        {t('nav_contact')}
+      </Link>
+      <Link href={`${pathname}?show=submit`} className={linkStyle('/submit')} onClick={() => setMenuOpen(false)}>
+        {t('nav_submit')}
+      </Link>
+      <Link href="/results" className={linkStyle('/results')} onClick={() => setMenuOpen(false)}>
+        {t('nav_results')}
+      </Link>
+      <Link href="/benchmarks" className={linkStyle('/benchmarks')} onClick={() => setMenuOpen(false)}>
+        {t('nav_tasks')}
+      </Link>
+      <Link href="/leaderboard" className={linkStyle('/leaderboard')} onClick={() => setMenuOpen(false)}>
+        {t('nav_leaderboard')}
+      </Link>
+      <Link href="https://huggingface.co/datasets/graalul/COLE-public" target="_blank" rel="noopener noreferrer" className={linkStyle('/hf')} onClick={() => setMenuOpen(false)}>
+        {t('nav_datasets')}
+      </Link>
+    </>
+  );
+  const mobileLinks = (
+    <>
+      <Link href="/guide" className={mobileLink('/guide')} onClick={() => setMenuOpen(false)}>
+        {t('nav_guide')}
+      </Link>
+      <Link href="/FAQ" className={mobileLink('/FAQ')} onClick={() => setMenuOpen(false)}>
+        {t('nav_faq')}
+      </Link>
+      <Link href="/contact" className={mobileLink('/contact')} onClick={() => setMenuOpen(false)}>
+        {t('nav_contact')}
+      </Link>
+      <Link href={`${pathname}?show=submit`} className={mobileLink('/submit')} onClick={() => setMenuOpen(false)}>
+        {t('nav_submit')}
+      </Link>
+      <Link href="/results" className={mobileLink('/results')} onClick={() => setMenuOpen(false)}>
+        {t('nav_results')}
+      </Link>
+      <Link href="/benchmarks" className={mobileLink('/benchmarks')} onClick={() => setMenuOpen(false)}>
+        {t('nav_tasks')}
+      </Link>
+      <Link href="/leaderboard" className={mobileLink('/leaderboard')} onClick={() => setMenuOpen(false)}>
+        {t('nav_leaderboard')}
+      </Link>
+      <Link href="https://huggingface.co/datasets/graalul/COLE-public" className={mobileLink('/hf')} onClick={() => setMenuOpen(false)}>
+        {t('nav_datasets')}
+      </Link>
+    </>
+  );
+  return (
+    <nav className="w-full py-4 mx-auto max-w-7xl px-4">
+      <div className="flex justify-between items-center">
+        <div className="flex items-center">
+          <Link href="/">
+            <span className="text-xl font-bold text-blue-600">{t('nav_home')}</span>
+          </Link>
+          <Link href="/papers" className="ml-2">
+            <FileText className="w-6 h-6 text-blue-600 hover:text-blue-500" />
+          </Link>
+        </div>
+        {/* Desktop nav */}
+        <div className="hidden md:flex space-x-6">
+          {links}
+        </div>
+        {/* Mobile hamburger */}
+        <button
+          className="md:hidden text-gray-700"
+          onClick={() => setMenuOpen(!menuOpen)}
+        >
+          {menuOpen ? <X className="w-6 h-6" /> : <Menu className="w-6 h-6" />}
+        </button>
+      </div>
+      {/* Mobile menu */}
+      {menuOpen && (
+        <div className="md:hidden mt-4 border-t border-gray-200 pt-4 space-y-1">
+          {mobileLinks}
+        </div>
+      )}
+    </nav>
+  );
+}

frontend/src/app/contact/page.js ADDED Viewed

	@@ -0,0 +1,32 @@

+'use client';
+import '../i18n';
+import { useTranslation } from 'react-i18next';
+export default function Contact() {
+  const { t } = useTranslation();
+  return (
+    <div className="max-w-5xl mx-auto px-6 py-3">
+      <h2 className="text-3xl font-bold text-center text-blue-700 border-b pb-4 mb-10">
+        {t('contact_title')}
+      </h2>
+      <p className="text-gray-700 mb-4 leading-relaxed">
+        {t('contact_paragraph')}
+      </p>
+      <div className="bg-gray-50 p-4 rounded-md border border-dashed border-blue-400">
+        <p className="text-sm text-gray-500 mb-2">
+          {t('contact_email_label')}
+        </p>
+        <a
+          href="mailto:david.beauchemin@ift.ulaval.ca"
+          className="text-blue-600 font-mono text-lg hover:underline"
+        >
+          david.beauchemin@ift.ulaval.ca
+        </a>
+      </div>
+    </div>
+  );
+}

frontend/src/app/en/translation.json ADDED Viewed

	@@ -0,0 +1,162 @@

+{
+  "siteTitle": "COLE",
+  "welcome": "Welcome to COLE!",
+  "upload": "Upload",
+  "submit": "Submit",
+  "results": "Results",
+  "contact": "Contact",
+  "contactUs": "Contact us",
+  "guide": "Guide",
+  "faq": "FAQ",
+  "submitResults": "Submit your results",
+  "ourTasks": "Our tasks",
+  "ourDatasets": "Our datasets",
+  "leaderboard": "COLE Leaderboard",
+  "errorOccurred": "An error occurred",
+  "close": "Close",
+  "details": "Details",
+  "benchmarksIntro": "COLE is constituted of 23 tasks, each of them aims to test one or more facets of language understanding in machine learning. Below are each of the tasks in more detail.",
+  "metrics": "Metric(s) :",
+  "benchmark_alloCine_title": "Allo-ciné.ca",
+  "benchmark_alloCine_description": "Allo-ciné tests language understanding in sentiment classification by feeding movie reviews which can be either positive and negative. The task consists in giving the correct sentiment for each review.",
+  "benchmark_lingnli_title": "LingNLI",
+  "benchmark_lingnli_description": "LingNLI is a Natural Language Inference corpus collected by putting a linguist 'in the loop' to dynamically introduce novel constraints during data collection, aiming to mitigate the systematic gaps and biases often found in crowdsourced datasets.",
+  "benchmark_daccord_title": "DACCORD",
+  "benchmark_daccord_description":"Predict whether the two sentences are compatible (0) or contradict each other (1).",
+  "benchmark_fquad_title": "FQuAD - French Question Answering Dataset",
+  "benchmark_fquad_description": "FQuAD is question/answer pairs built on high-quality Wikipedia articles. The goal in this task is to accurately predict if the answer to the question can be found in the provided article.",
+  "benchmark_french_boolq_title": "French BoolQ",
+  "benchmark_french_boolq_description": "Answer whether the context allows answering 'yes' to the question (1) or only 'no' or doesn't answer (0).",
+  "benchmark_fracas_title": "FraCaS",
+  "benchmark_fracas_description": "Natural language inference task : predict the relation between two sentences (implication, neutral, contradiction).",
+  "benchmark_gqnli_title": "GQNLI-Fr - The Generalized Quantifier NLI Challenge Dataset",
+  "benchmark_gqnli_description": "The dataset consists of carefully constructed premise-hypothesis pairs. Each hypothesis logically follows from the premise, contradicts it, or is neutral.",
+  "benchmark_mms_title": "MMS - Massive Multilingual Sentiment Corpora",
+  "benchmark_mms_description": "A massive multilingual sentiment analysis corpus in 27 languages.",
+  "benchmark_mnli_nineeleven_fr_mt_title": "MNLI-NineEleven-FR-MT",
+  "benchmark_mnli_nineeleven_fr_mt_description": "Predict the relation between two sentences (entailment, neutral, contradiction).",
+  "benchmark_paws_title": "PAWS: Paraphrase Adversaries from Word Scrambling",
+  "benchmark_paws_description": "This task aims to test paraphrase identification by giving two sentences and having the model define if these sentences are equivalent in meaning or not.",
+  "benchmark_piaf_title": "PIAF - The French-Language Dataset of Questions-Answers",
+  "benchmark_piaf_description": "This task consists of pairs of questions and text answers with information of where in the answer is the truly relevant information.",
+  "benchmark_qfrblimp_title": "QFrBLiMP - a Quebec-French Linguistic minimal pairs",
+  "benchmark_qfrblimp_description": "This task gives the model sentence pairs. The goal is to determine if the sentences are semantically equivalent, even with slightly different syntax and words.",
+  "benchmark_qfrcola_title": "QFrCoLA - a Quebec-French Corpus of Linguistic Acceptability Judgments",
+  "benchmark_qfrcola_description": "QFrCoLA is a French dataset sourced from multiple linguistic sites such as académie-française.fr and vitrinelinguistique.com. It aims to test models’ ability to determine grammatical correctness. The answer is a binary label indicating if the sentence is correct or not.",
+  "benchmark_qfrcore_title": "QFRCoRE: Quebec-French Corpus of Regional Expressions",
+  "benchmark_qfrcore_description": "Match the Quebec expression with its definition from a list.",
+  "benchmark_qfrcort_title": "QFRCoRT: Quebec-French Corpus of Regional Terms",
+  "benchmark_qfrcort_description": "Match the Quebec term with its definition from a list.",
+  "benchmark_rte3_french_title": "RTE3-French",
+  "benchmark_rte3_french_description": "Predict the relation between two sentences (entailment, neutral, contradiction).",
+  "benchmark_sickfr_title": "Sick-FR - French Sentences Involving Compositional Knowledge",
+  "benchmark_sickfr_description": "This task also has pairs of sentences annotated on two dimensions: relatedness (scored 1 to 5) and entailment (choices: entails, contradicts, neutral).",
+  "benchmark_sts22_title": "Sts22-Crosslingual - Multilingual News Article Similarity",
+  "benchmark_sts22_description": "This task evaluates whether pairs of news articles, written in different languages, cover the same story. It focuses on document-level similarity, where systems rate article pairs on a 4-point scale from most to least similar.",
+  "benchmark_wino_x_lm_title": "WiNo-X LM - Pronoun Resolution ",
+  "benchmark_wino_x_lm_description": "Predict the correct referent (1 or 2) of a pronoun in a sentence by choosing between two candidates.",
+  "benchmark_wino_x_mt_title": "WiNo-X MT - Pronoun Resolution ",
+  "benchmark_wino_x_mt_description": "Choose which of two French translations uses the correct pronoun (il/elle) based on the intended referent in the original English sentence.",
+  "benchmark_xnli_title": "XNLI - The Cross-Lingual NLI Corpus",
+  "benchmark_xnli_description": "This task consists of pairs of sentences where the goal is to determine the relation between the two: entailment, neutral, or contradiction.",
+  "benchmark_wsd_title": "WSD-Fr : Word Sense Disambiguation",
+  "benchmark_wsd_description": "WSD-Fr is a word sense disambiguation task where the model must identify the correct meaning of an ambiguous verb in context, as part of the FLUE benchmark.",
+  "benchmark_multiblimp_title": "MultiBLiMP-Fr - Multilingual Linguistic Minimal Pairs",
+  "benchmark_multiblimp_description": "A grammaticality judgment task using the French subset of the Multilingual Benchmark of Linguistic Minimal Pairs . Each instance is a minimal pair—one grammatical and one ungrammatical—differing by a single targeted feature. The model must select the grammatically correct sentence. This task probes fine-grained knowledge of French syntax, morphology, and agreement.",
+  "home_whatIsColleTitle": "What is COLE?",
+  "home_paragraph1": "COLE is a multidisciplinary French Natural Language Understanding benchmark ( <1>NLU</1> ). It takes inspiration from its predecessors <3>GLUE</3> and <5>SuperGLUE</5> to build a benchmark capable of evaluating models in the French language on multiple topics of language understanding. See <7>our paper</7> for more information.",
+  "home_paragraph2": "The COLE benchmark is built with multiple goals in mind. First, it aims to provide a solid and complete French alternative for benchmarking models on NLU tasks. Second, it provides the user with multiple datasets, all usable through HuggingFace’s libraries, to train or fine-tune models on specific tasks.",
+  "home_paragraph3": "We have made the choice to hide test labels to discourage cheating or overfitting on test data. To get results on your test data, you may send us your results as explained in <1>our guide</1>.",
+  "guide_title": "Using the COLE Benchmark",
+  "guide_section1_title": "Training and Testing",
+  "guide_section1_para1": "The COLE benchmark can be used to train and/or test models on multiple tasks. To train or fine-tune a model, you can fetch the train, validation and test data splits from our <0>Hugging Face public repository</0>. We recommend using Hugging Face’s libraries to simplify the process.",
+  "guide_section1_para2": "To test a model, you also need to fetch the data in the same way. Once done, your model should infer predictions for each line in the test split. Our repository includes benchmark evaluation scripts for each dataset. You only need to plug in your model's inference method using the HuggingFace Model interface. Our inference scripts are available on our <0>GitHub Repository</0>.",
+  "guide_section1_para3": "If you prefer to run inference separately, please ensure that the predictions are formatted correctly before submitting them for evaluation (see our \"Formatting the Dataset\" section).",
+  "guide_section2_title": "Formatting the Dataset",
+  "guide_section2_para1": "Before submitting your results, make sure your output is properly formatted so that our systems can process it. The expected format is a nested JSON dictionary as shown below. Once formatted, compress your JSON file into a ZIP archive (.zip) and upload it via the submission form.",
+   "faq_title": "Frequently Asked Questions",
+   "faqs": [
+    {
+      "question": "How can I evaluate my model?",
+      "answer": "Format your model predictions as a JSON file following the format described in the Guide, compress it into a ZIP archive, and upload it via the submission form on the website. The system will automatically evaluate your predictions against the hidden test labels and display the results."
+    },
+    {
+      "question": "Is COLE multilingual?",
+      "answer": "No, COLE is available only in French. The benchmark is specifically designed to evaluate NLU models in the French language."
+    },
+    {
+      "question": "What format should my predictions be in?",
+      "answer": "Your predictions should be a JSON file containing your model name, model URL, and a list of tasks with predictions arrays. Each task prediction array must match the order and size of the corresponding test split. See the Guide page for the exact format. The JSON file must then be compressed into a ZIP archive before submission."
+    },
+    {
+      "question": "Where can I find the test data?",
+      "answer": "The test data (without labels) is available on our HuggingFace repository at <code>graalul/COLE-public</code>. You can load any task using the datasets library: <code>load_dataset('graalul/COLE-public', 'task_name')</code>."
+    },
+    {
+      "question": "Why are the test labels hidden?",
+      "answer": "To ensure fair evaluation and prevent overfitting on test data, we do not release test labels. Models are evaluated server-side against the hidden ground truth when you submit your predictions."
+    },
+    {
+      "question": "Can I evaluate on only a subset of tasks?",
+      "answer": "Yes, you can submit predictions for any subset of the 23 tasks. The leaderboard will show your scores on the tasks you submitted and N/A for the rest."
+    }
+  ],
+  "contact_title": "Contact us",
+  "contact_paragraph": "If you have any questions, feedback, or suggestions regarding the COLE benchmark, feel free to reach out to us. We are happy to help — please note that response times may vary.",
+  "contact_email_label": "Email us at:",
+  "submit_formTitle": "Submit Your Results",
+  "submit_labelEmail": "Your Email",
+  "submit_placeholderEmail": "you@example.com",
+  "submit_labelDisplayName": "Display Name",
+  "submit_placeholderDisplayName": "Leaderboard Name",
+  "submit_labelFile": "Predictions ZIP",
+  "submit_labelZip" : "Select your results file",
+  "submit_requiredError": "⚠️ Email, display name & ZIP are required.",
+  "submit_zipAlert": "Please upload a ZIP (.zip) file.",
+  "submit_button": "Submit Your Results",
+  "submit_submitting": "Submitting...",
+  "submit_successTitle": "Success",
+  "submit_successMessage": "Your submission has been successfully sent!",
+  "submit_checkResults": "Check the results",
+  "submit_errorTitle": "Error ⚠️",
+  "submit_errorMessage": "Submission error: {{errorMessage}}",
+  "submit_closeButton": "Close",
+  "results_default_title": "No Results Yet",
+  "results_default_message": "Please submit a ZIP file to generate benchmark results.",
+  "results_loading": "⏳ Loading results...",
+  "results_page_title": "📊 Results for {{displayName}}",
+  "results_download": "Download JSON",
+  "results_no_results": "⚠️ No benchmark results found.",
+  "results_benchmark_label": "🧪 Benchmark: {{name}}",
+  "leaderboard_title": "Leaderboard",
+  "leaderboard_modelHeader": "Model Name",
+  "leaderboard_overallHeader": "Overall",
+  "leaderboard_avgScoreLabel": "(avg score)",
+  "leaderboard_notSpecified": "N/A",
+  "leaderboard_notSpecifiedTooltip": "This model was not evaluated on this task",
+  "leaderboard_modalTitle": "Results for {{name}}",
+  "leaderboard_closeButton": "Close",
+  "nav_home": "COLE",
+  "nav_guide": "Guide",
+  "nav_faq": "FAQ",
+  "nav_contact": "Contact us",
+  "nav_submit": "Submit your results",
+  "nav_tasks": "Our tasks",
+  "nav_results": "Results",
+  "nav_leaderboard": "COLE Leaderboard",
+  "nav_datasets": "Our datasets",
+  "papers_title": "Our papers",
+  "papers_arxiv_label": "COLE: a Comprehensive Benchmark for French Language Understanding Evaluation",
+  "papers_arxiv_authors": "David Beauchemin, Yan Tremblay, Mohamed Amine Youssef, Richard Khoury (arXiv:2510.05046, ICLR 2025 Workshop)",
+  "leaderboard_errorMessage": "Failed to load leaderboard data. Please try again later.",
+  "benchmarks_category_sentiment": "Sentiment Analysis",
+  "benchmarks_category_nli": "Natural Language Inference",
+  "benchmarks_category_qa": "Question Answering",
+  "benchmarks_category_paraphrase": "Paraphrase Detection",
+  "benchmarks_category_grammar": "Grammatical Judgment",
+  "benchmarks_category_similarity": "Semantic Similarity",
+  "benchmarks_category_wsd": "Word Sense Disambiguation",
+  "benchmarks_category_quebec": "Quebec French",
+  "benchmarks_category_coreference": "Coreference / Pronoun Resolution"
+}

frontend/src/app/fr/translation.json ADDED Viewed

	@@ -0,0 +1,162 @@

+{
+  "siteTitle": "COLE",
+  "welcome": "Bienvenue sur COLE !",
+  "upload": "Téléverser",
+  "submit": "Soumettre",
+  "results": "Résultats",
+  "contact": "Contact",
+  "contactUs": "Nous contacter",
+  "guide": "Guide",
+  "faq": "FAQ",
+  "submitResults": "Soumettre vos résultats",
+  "ourTasks": "Nos tâches",
+  "ourDatasets": "Nos jeux de données",
+  "leaderboard": "Classement COLE",
+  "errorOccurred": "Une erreur est survenue",
+  "close": "Fermer",
+  "details": "Détails",
+  "benchmarksIntro": "COLE est constitué de 23 tâches, chacune visant à tester une ou plusieurs facettes de la compréhension du langage en apprentissage automatique. Ci-dessous, chaque tâche est décrite en détail.",
+  "metrics": "Métrique(s) :",
+  "benchmark_alloCine_title": "Allo-ciné.ca",
+  "benchmark_alloCine_description": "Allo-ciné teste la compréhension du langage dans la classification des sentiments en fournissant des critiques de films pouvant être positives ou négatives. La tâche consiste à donner le sentiment correct pour chaque critique.",
+  "benchmark_lingnli_title": "LingNLI",
+  "benchmark_lingnli_description": "LingNLI est un corpus d'inférence en langage naturel collecté en faisant appel à un linguiste afin d'introduire de manière dynamique de nouvelles contraintes pendant la collecte des données, dans le but d'atténuer les lacunes et les biais systématiques souvent présents dans les ensembles de données issus du crowdsourcing.",
+  "benchmark_daccord_title": "DACCORD",
+  "benchmark_daccord_description": "  Prédisez si les deux phrases sont compatibles (0) ou se contredisent (1). ",
+  "benchmark_fquad_title": "FQuAD - Corpus de questions-réponses français",
+  "benchmark_fquad_description": "FQuAD est un ensemble de paires question/réponse construit à partir d’articles Wikipédia de haute qualité. L’objectif est de prédire correctement si la réponse à la question se trouve réellement dans l’article fourni.",
+  "benchmark_french_boolq_title": "French BoolQ",
+  "benchmark_french_boolq_description": " Répondez si le contexte permet de répondre « oui » à la question (1) ou « non »/ne répond pas (0).",
+  "benchmark_fracas_title": "FraCaS",
+  "benchmark_fracas_description": "Tâche d'inférence en langage naturel : prédire la relation entre deux phrases (implication, neutralité, contradiction).",
+  "benchmark_gqnli_title": "GQNLI-Fr - Jeu de données Generalized Quantifier NLI Challenge",
+  "benchmark_gqnli_description": "Le jeu se compose de paires prémisse-hypothèse soigneusement construites. Chaque hypothèse découle logiquement de la prémisse, la contredit ou est neutre.",
+  "benchmark_mms_title": "MMS - Massive Multilingual Sentiment Corpora",
+  "benchmark_mms_description": "Un corpus multilingue massif d'analyse des sentiments en 27 langues.",
+  "benchmark_mnli_nineeleven_fr_mt_title": "MNLI-NineEleven-FR-MT",
+  "benchmark_mnli_nineeleven_fr_mt_description": "Prédisez la relation entre deux phrases (implication, neutre, contradiction).",
+  "benchmark_multiblimp_title": "MultiBLiMP-Fr - Paires minimales linguistiques en français",
+  "benchmark_multiblimp_description": "Une tâche de jugement de grammaticalité utilisant le sous-ensemble français du Multilingual Benchmark of Linguistic Minimal Pairs. Chaque instance est une paire minimale — l’une grammaticale et l’autre agrammaticale — ne différant que par une seule caractéristique ciblée. Le modèle doit sélectionner la phrase grammaticalement correcte. Cette tâche évalue les connaissances fines de la syntaxe, de la morphologie et des accords en français.",
+  "benchmark_paws_title": "PAWS : Paraphrase Adversaries from Word Scrambling",
+  "benchmark_paws_description": "Cette tâche vise à tester l’identification de paraphrases en donnant deux phrases et en demandant au modèle de définir si ces phrases sont équivalentes en sens ou non.",
+  "benchmark_piaf_title": "PIAF - Jeu de questions-réponses en français",
+  "benchmark_piaf_description": "Cette tâche consiste en paires de questions et de réponses textuelles avec l’indication de l’emplacement de l’information réellement pertinente dans la réponse.",
+  "benchmark_qfrblimp_title": "QFrBLiMP - Paires minimales linguistiques québécoises",
+  "benchmark_qfrblimp_description": "Cette tâche présente au modèle des paires de phrases. Le but est de déterminer si les phrases sont sémantiquement équivalentes, même avec une syntaxe et des mots légèrement différents.",
+  "benchmark_qfrcola_title": "QFrCoLA - Corpus québécois de jugements d’acceptabilité linguistique",
+  "benchmark_qfrcola_description": "QFrCoLA est un jeu de données français issu de plusieurs sites linguistiques tels qu’académie-française.fr et vitrinelinguistique.com. Il vise à tester la capacité des modèles à déterminer la correction grammaticale. La réponse est un label binaire indiquant si la phrase est correcte ou non.",
+  "benchmark_qfrcore_title": "QFRCoRE: Quebec-French Corpus of Regional Expressions",
+  "benchmark_qfrcore_description": "Associez l'expression québécoise à sa définition parmi une liste proposée.",
+  "benchmark_qfrcort_title": "QFRCoRT: Quebec-French Corpus of Regional Terms",
+  "benchmark_qfrcort_description": "Associez le terme québécois à sa définition parmi une liste proposée.",
+  "benchmark_rte3_french_title": "RTE3-Français",
+  "benchmark_rte3_french_description": "Prédisez la relation entre deux phrases (implication, neutre, contradiction).",
+  "benchmark_sickfr_title": "Sick-FR - Phrases françaises impliquant des connaissances compositionnelles",
+  "benchmark_sickfr_description": "Cette tâche propose des paires de phrases annotées selon deux dimensions : la similarité (1 à 5) et l’inférence (implique, contredit ou neutre).",
+  "benchmark_sts22_title": "Sts22-Crosslingual - Similarité d’articles d’actualités multilingues",
+  "benchmark_sts22_description": "Cette tâche évalue si des paires d’articles d’actualités, écrits dans différentes langues, couvrent la même histoire. Elle se concentre sur la similarité au niveau du document, où les systèmes notent les paires sur une échelle de 4 points, du plus similaire au moins similaire.",
+  "benchmark_wino_x_lm_title": "WiNo-X LM - Résolution de pronom ",
+  "benchmark_wino_x_lm_description": "Prédire le bon référent (1 ou 2) d’un pronom dans une phrase en choisissant parmi deux candidats.",
+  "benchmark_wino_x_mt_title": "WiNo-X MT - Résolution de pronom ",
+  "benchmark_wino_x_mt_description": " Choisir laquelle de deux traductions françaises utilise le bon pronom (il/elle) selon le référent correct de la phrase anglaise.",
+  "benchmark_xnli_title": "XNLI - Corpus NLI multilingue",
+  "benchmark_xnli_description": "Cette tâche consiste en paires de phrases où l’objectif est de déterminer la relation entre les deux : implication, neutre ou contradiction.",
+  "benchmark_wsd_title": "WSD-Fr : Désambiguïsation lexicale",
+  "benchmark_wsd_description": "WSD-Fr est une tâche de désambiguïsation lexicale dans laquelle le modèle doit identifier le sens correct d’un verbe ambigu en contexte, dans le cadre du benchmark FLUE.",
+  "home_whatIsColleTitle": "Qu’est-ce que COLE ?",
+  "home_paragraph1": "COLE est un benchmark multidisciplinaire de compréhension du langage naturel en français ( <1>NLU</1> ). Il s’inspire de ses prédécesseurs <3>GLUE</3> et <5>SuperGLUE</5> pour construire un benchmark capable d’évaluer les modèles en langue française sur plusieurs facettes de la compréhension du langage. Consultez <7>notre article</7> pour plus d’informations.",
+  "home_paragraph2": "Le benchmark COLE poursuit plusieurs objectifs : d’abord fournir une alternative solide et complète en français pour évaluer les modèles sur des tâches NLU, puis offrir à l’utilisateur plusieurs jeux de données, tous utilisables via les bibliothèques HuggingFace, pour entraîner ou affiner des modèles sur des tâches spécifiques.",
+  "home_paragraph3": "Nous avons choisi de masquer les étiquettes de test pour décourager la triche ou le sur-apprentissage sur les données de test. Pour obtenir des résultats sur vos données de test, vous pouvez nous envoyer vos résultats comme expliqué dans <1>notre guide</1>.",
+  "guide_title": "Utilisation du benchmark COLE",
+  "guide_section1_title": "Entraînement et tests",
+  "guide_section1_para1": "Le benchmark COLE peut être utilisé pour entraîner et/ou tester des modèles sur plusieurs tâches. Pour entraîner ou affiner un modèle, vous pouvez récupérer les jeux de données train, validation et test depuis notre <0>dépôt public Hugging Face</0>. Nous recommandons d’utiliser les bibliothèques Hugging Face pour simplifier le processus.",
+  "guide_section1_para2": "Pour tester un modèle, vous devez également récupérer les données de la même façon. Une fois fait, votre modèle doit inférer les prédictions pour chaque ligne de la partition de test. Notre dépôt inclut des scripts d’évaluation pour chaque dataset. Il vous suffit de connecter la méthode d’inférence de votre modèle via l’interface HuggingFace. Nos scripts d’inférence sont disponibles sur notre <0>dépôt GitHub</0>.",
+  "guide_section1_para3": "Si vous préférez lancer l’inférence séparément, assurez-vous que les prédictions sont correctement formatées avant de les soumettre pour évaluation (voir notre section « Formatting the Dataset »).",
+  "guide_section2_title": "Formatage du jeu de données",
+  "guide_section2_para1": "Avant de soumettre vos résultats, assurez-vous que votre sortie est correctement formatée afin que nos systèmes puissent la traiter. Le format attendu est un dictionnaire JSON imbriqué comme ci-dessous. Une fois formaté, compressez votre fichier JSON dans une archive ZIP (.zip) et soumettez-le via le formulaire de soumission.",
+  "faq_title": "Foire aux questions",
+  "faqs": [
+    {
+      "question": "Comment évaluer mon modèle ?",
+      "answer": "Formatez les prédictions de votre modèle dans un fichier JSON selon le format décrit dans le Guide, compressez-le dans une archive ZIP, et soumettez-le via le formulaire sur le site. Le système évaluera automatiquement vos prédictions par rapport aux labels de test cachés et affichera les résultats."
+    },
+    {
+      "question": "COLE est-il multilingue ?",
+      "answer": "Non, COLE est disponible uniquement en français. Le benchmark est spécifiquement conçu pour évaluer les modèles en compréhension de la langue française (NLU)."
+    },
+    {
+      "question": "Quel format doivent avoir mes prédictions ?",
+      "answer": "Vos prédictions doivent être un fichier JSON contenant le nom du modèle, l’URL du modèle, et une liste de tâches avec des tableaux de prédictions. Chaque tableau de prédictions doit correspondre à l’ordre et à la taille du split de test correspondant. Consultez la page Guide pour le format exact. Le fichier JSON doit ensuite être compressé en archive ZIP avant la soumission."
+    },
+    {
+      "question": "Où puis-je trouver les données de test ?",
+      "answer": "Les données de test (sans labels) sont disponibles sur notre dépôt HuggingFace : <code>graalul/COLE-public</code>. Vous pouvez charger n’importe quelle tâche avec la bibliothèque datasets : <code>load_dataset(‘graalul/COLE-public’, ‘nom_tache’)</code>."
+    },
+    {
+      "question": "Pourquoi les labels de test sont-ils cachés ?",
+      "answer": "Pour garantir une évaluation équitable et empêcher le sur-apprentissage sur les données de test, nous ne publions pas les labels de test. Les modèles sont évalués côté serveur par rapport à la vérité terrain cachée lors de la soumission de vos prédictions."
+    },
+    {
+      "question": "Puis-je évaluer sur un sous-ensemble de tâches ?",
+      "answer": "Oui, vous pouvez soumettre des prédictions pour n’importe quel sous-ensemble des 23 tâches. Le classement affichera vos scores sur les tâches soumises et N/A pour les autres."
+    }
+  ],
+  "contact_title": "Nous contacter",
+  "contact_paragraph": "Si vous avez des questions, des commentaires ou des suggestions concernant le benchmark COLE, n’hésitez pas à nous contacter. Nous serons ravis de vous aider — veuillez noter que les délais de réponse peuvent varier.",
+  "contact_email_label": "Envoyez-nous un email à :",
+  "submit_formTitle": "Soumettre vos résultats",
+  "submit_labelEmail": "Votre email",
+  "submit_placeholderEmail": "vous@exemple.com",
+  "submit_labelDisplayName": "Nom affiché",
+  "submit_placeholderDisplayName": "Nom au classement",
+  "submit_labelFile": "Fichier ZIP de prédictions",
+  "submit_labelZip": "Sélectionnez votre fichier de résultats",
+  "submit_requiredError": "⚠️ Email, nom affiché et ZIP sont requis.",
+  "submit_zipAlert": "Veuillez téléverser un fichier ZIP (.zip).",
+  "submit_button": "Soumettre vos résultats",
+  "submit_submitting": "Envoi en cours...",
+  "submit_successTitle": "Succès",
+  "submit_successMessage": "Votre soumission a été envoyée avec succès !",
+  "submit_checkResults": "Voir les résultats",
+  "submit_errorTitle": "Erreur ⚠️",
+  "submit_errorMessage": "Erreur de soumission : {{errorMessage}}",
+  "submit_closeButton": "Fermer",
+  "results_default_title": "Pas encore de résultats",
+  "results_default_message": "Veuillez soumettre un fichier ZIP pour générer les résultats du benchmark.",
+  "results_loading": "⏳ Chargement des résultats...",
+  "results_page_title": "📊 Résultats pour {{displayName}}",
+  "results_download": "Télécharger le JSON",
+  "results_no_results": "⚠️ Aucun résultat de benchmark trouvé.",
+  "results_benchmark_label": "🧪 Benchmark : {{name}}",
+  "leaderboard_title": "Classement",
+  "leaderboard_modelHeader": "Nom du modèle",
+  "leaderboard_overallHeader": "Global",
+  "leaderboard_avgScoreLabel": "(score moyen)",
+  "leaderboard_notSpecified": "N/A",
+  "leaderboard_notSpecifiedTooltip": "Ce modèle n'a pas été évalué sur cette tâche",
+  "leaderboard_modalTitle": "Résultats pour {{name}}",
+  "leaderboard_closeButton": "Fermer",
+  "nav_home": "COLE",
+  "nav_guide": "Guide",
+  "nav_faq": "FAQ",
+  "nav_contact": "Nous contacter",
+  "nav_submit": "Soumettre vos résultats",
+  "nav_tasks": "Nos tâches",
+  "nav_results": "Résultats",
+  "nav_leaderboard": "Classement COLE",
+  "nav_datasets": "Nos données",
+  "papers_title": "Nos articles",
+  "papers_arxiv_label": "COLE: a Comprehensive Benchmark for French Language Understanding Evaluation",
+  "papers_arxiv_authors": "David Beauchemin, Yan Tremblay, Mohamed Amine Youssef, Richard Khoury (arXiv:2510.05046, ICLR 2025 Workshop)",
+  "leaderboard_errorMessage": "Impossible de charger les données du classement. Veuillez réessayer plus tard.",
+  "benchmarks_category_sentiment": "Analyse de sentiments",
+  "benchmarks_category_nli": "Inférence en langage naturel",
+  "benchmarks_category_qa": "Réponse aux questions",
+  "benchmarks_category_paraphrase": "Détection de paraphrases",
+  "benchmarks_category_grammar": "Jugement grammatical",
+  "benchmarks_category_similarity": "Similarité sémantique",
+  "benchmarks_category_wsd": "Désambiguïsation lexicale",
+  "benchmarks_category_quebec": "Français québécois",
+  "benchmarks_category_coreference": "Coréférence / Résolution de pronoms"
+}

frontend/src/app/globals.css ADDED Viewed

	@@ -0,0 +1,28 @@

+@import "tailwindcss";
+:root {
+  --background: #ffffff;
+  --foreground: #6526ae;
+}
+@theme inline {
+  --color-background: var(--background);
+  --color-foreground: var(--foreground);
+  --font-sans: var(--font-geist-sans);
+  --font-mono: var(--font-geist-mono);
+}
+body {
+  background: var(--background);
+  color: var(--foreground);
+  font-family: Arial, Helvetica, sans-serif;
+}
+code {
+  background-color: #f3f4f6;
+  padding: 0.15em 0.4em;
+  border-radius: 0.25em;
+  font-size: 0.875em;
+  font-family: var(--font-mono), monospace;
+}

frontend/src/app/guide/page.js ADDED Viewed

	@@ -0,0 +1,76 @@

+'use client';
+import '../i18n';
+import {useTranslation, Trans} from 'react-i18next';
+import Link from 'next/link';
+import CodeBlock from '../components/CodeBlock';
+export default function Guide() {
+    const {t} = useTranslation();
+    return (
+        <div className="max-w-5xl mx-auto px-6 py-3">
+            <h2 className="text-3xl font-bold text-center text-blue-700 border-b pb-4 mb-10">
+                {t('guide_title')}
+            </h2>
+            <div className="space-y-8">
+                {/* SECTION TRAINING & TESTING */}
+                <div className="p-6 bg-white border border-gray-200 rounded-lg shadow-sm hover:shadow transition">
+                    <h3 className="text-2xl font-semibold text-gray-900 mb-4 border-l-4 border-blue-600 pl-4">
+                        {t('guide_section1_title')}
+                    </h3>
+                    <p className="text-gray-700">
+                        <Trans i18nKey="guide_section1_para1" components={[
+                            <a key="hf-link"
+                               href="https://huggingface.co/datasets/graalul/COLE-public"
+                               target="_blank"
+                               rel="noopener noreferrer"
+                               className="text-blue-600 underline hover:text-blue-800"
+                            />
+                        ]}>
+                        </Trans>
+                    </p>
+                    <p className="text-gray-700 mt-4">
+                        <Trans i18nKey="guide_section1_para2" components={[<a key="github-ref"
+                        href="https://github.com/GRAAL-Research/COLE"
+                        target="_blank"
+                        rel="noopener noreferrer"
+                        className="text-blue-600 underline hover:text-blue-800">
+                        GitHub Repository.
+                        </a>]}> </Trans>
+                    </p>
+                    <p className="text-gray-700 mt-4">
+                        <Trans i18nKey="guide_section1_para3">
+                        </Trans>
+                    </p>
+                    {/* SECTION FORMATTING */}
+                    <h3 className="text-2xl font-semibold text-gray-900 mb-4 border-l-4 border-blue-600 pl-4">
+                        {t('guide_section2_title')}
+                    </h3>
+                    <p className="text-gray-700 mb-4">
+                        {t('guide_section2_para1')}
+                    </p>
+                    <CodeBlock>{`{
+  "model_name": "a_model_name",
+  "model_url": "a_model_url",
+  "tasks": [
+    {
+      "qfrcola": { "predictions": [1,1,1,1,1] }
+    },
+    {
+      "allocine": { "predictions": [1,1,1,1,1] }
+    }
+  ]
+}`}</CodeBlock>
+                </div>
+            </div>
+        </div>
+    );
+}

frontend/src/app/i18n.js ADDED Viewed

	@@ -0,0 +1,28 @@

+import i18n from 'i18next';
+import { initReactI18next } from 'react-i18next';
+import LanguageDetector from 'i18next-browser-languagedetector';
+import en from "./en/translation.json";
+import fr from './fr/translation.json';
+i18n
+  .use(LanguageDetector)
+  .use(initReactI18next)
+  .init({
+    resources: {
+      en: { translation: en },
+      fr: { translation: fr },
+    },
+    lng: 'en',
+    fallbackLng: 'en',
+    interpolation: {
+      escapeValue: false,
+    },
+    detection: {
+      order: ['localStorage', 'navigator'],
+      caches: ['localStorage'],
+    },
+  });
+export default i18n;

frontend/src/app/layout.js ADDED Viewed

	@@ -0,0 +1,49 @@

+import { Geist, Geist_Mono } from "next/font/google";
+import "./globals.css";
+import ClientHeader from "./components/ClientHeader";
+import ModalManager from "./components/ModalManager";
+import {Suspense} from "react";
+const geistSans = Geist({
+  variable: "--font-geist-sans",
+  subsets: ["latin"],
+});
+const geistMono = Geist_Mono({
+  variable: "--font-geist-mono",
+  subsets: ["latin"],
+});
+export const metadata = {
+  title: "COLE - Comprehensive Benchmark for French Language Understanding",
+  description: "COLE is a benchmark of 23 tasks for evaluating French Natural Language Understanding (NLU) in large language models.",
+  openGraph: {
+    title: "COLE - French NLU Benchmark",
+    description: "Evaluate LLMs on 23 French NLU tasks: sentiment analysis, NLI, QA, and more.",
+    url: "https://colebenchmark.org",
+    siteName: "COLE Benchmark",
+    type: "website",
+  },
+  twitter: {
+    card: "summary",
+    title: "COLE - French NLU Benchmark",
+    description: "Evaluate LLMs on 23 French NLU tasks: sentiment analysis, NLI, QA, and more.",
+  },
+};
+export default function RootLayout({ children }) {
+  return (
+    <html lang="en">
+      <body className={`${geistSans.variable} ${geistMono.variable} antialiased`}>
+        <ClientHeader />
+        <main className="w-full flex justify-center px-4 pt-8">
+          <div className="w-full max-w-7xl">{children}</div>
+        </main>
+        <Suspense fallback={null}>
+          <ModalManager/>
+        </Suspense>
+      </body>
+    </html>
+  );
+}

frontend/src/app/leaderboard/page.js ADDED Viewed

	@@ -0,0 +1,344 @@

+'use client';
+import React, { useEffect, useState } from "react";
+import {
+  normalizeBenchmarkName,
+  computeAverageScore,
+} from "./util";
+import { useTranslation } from "react-i18next";
+import { BACKEND_ADDRESS } from "@/app/resources/ResourcesPaths";
+const allowedMetrics = [
+  'acc',
+  'accuracy',
+  'f1',
+  'pearson',
+  'pearsonr',
+  'spearman',
+  'fquad',
+  'exact_match',
+];
+const PAGE_SIZE = 25;
+export default function LeaderboardPage() {
+  const { t } = useTranslation();
+  const [entries, setEntries] = useState([]);
+  const [benchmarks, setBenchmarks] = useState([]);
+  const [sortCol, setSortCol] = useState('overall');
+  const [sortOrder, setSortOrder] = useState('desc');
+  const [selectedEntry, setSelectedEntry] = useState(null);
+  const [error, setError] = useState(false);
+  const [loading, setLoading] = useState(true);
+  const [currentPage, setCurrentPage] = useState(1);
+  const headerLabels = {
+    model: t('leaderboard_modelHeader'),
+    overall: t('leaderboard_overallHeader'),
+  };
+  useEffect(() => {
+    fetch(`${BACKEND_ADDRESS}/leaderboard`)
+      .then((res) => {
+        if (!res.ok) throw new Error(`HTTP ${res.status}`);
+        return res.json();
+      })
+      .then((data) => {
+        const withOverall = data.map((e) => ({
+          ...e,
+          averageScore: computeAverageScore(e),
+        }));
+        setEntries(withOverall);
+        const allBench = new Set();
+        withOverall.forEach((entry) => {
+          Object.keys(entry.results || {}).forEach((raw) => {
+            allBench.add(normalizeBenchmarkName(raw));
+          });
+        });
+        setBenchmarks(Array.from(allBench));
+      })
+      .catch(() => setError(true))
+      .finally(() => setLoading(false));
+  }, []);
+  const getCellValue = (entry, col) => {
+    if (col === 'model') return entry.display_name;
+    if (col === 'overall') return entry.averageScore ?? null;
+    const pair = Object.entries(entry.results || {}).find(
+      ([rawName]) => normalizeBenchmarkName(rawName) === col
+    );
+    if (!pair) return null;
+    const rawValues = [];
+    Object.values(pair[1]).forEach((metricGroup) => {
+      if (metricGroup && typeof metricGroup === 'object') {
+        Object.entries(metricGroup).forEach(([metricName, metricValue]) => {
+          if (
+            !metricName.includes('_warning') &&
+            typeof metricValue === 'number' &&
+            allowedMetrics.includes(metricName.toLowerCase())
+          ) {
+            rawValues.push(metricValue);
+          }
+        });
+      }
+    });
+    if (rawValues.length === 0) return null;
+    const normalized = rawValues.map((v) => v > 1 ? v / 100 : v);
+    const avg = normalized.reduce((a, b) => a + b, 0) / normalized.length;
+    return avg;
+  };
+  const sorted = [...entries].sort((a, b) => {
+    const va = getCellValue(a, sortCol);
+    const vb = getCellValue(b, sortCol);
+    if (sortCol === 'model') {
+      if (va == null) return 1;
+      if (vb == null) return -1;
+      return sortOrder === 'asc'
+        ? va.localeCompare(vb)
+        : vb.localeCompare(va);
+    }
+    const na = va ?? -Infinity;
+    const nb = vb ?? -Infinity;
+    return sortOrder === 'asc' ? na - nb : nb - na;
+  });
+  const totalPages = Math.max(1, Math.ceil(sorted.length / PAGE_SIZE));
+  const paginatedEntries = sorted.slice(
+    (currentPage - 1) * PAGE_SIZE,
+    currentPage * PAGE_SIZE
+  );
+  const handleSort = (col) => {
+    if (sortCol === col) {
+      setSortOrder(sortOrder === 'asc' ? 'desc' : 'asc');
+    } else {
+      setSortCol(col);
+      setSortOrder('desc');
+    }
+    setCurrentPage(1);
+  };
+  const renderHeader = (col) => {
+    const baseLabel = headerLabels[col] ?? col;
+    const arrow = sortCol === col ? (sortOrder === 'asc' ? ' ▲' : ' ▼') : '';
+    if (col === 'overall') {
+      return (
+        <div>
+          <div onClick={() => handleSort(col)} className="cursor-pointer">
+            {baseLabel}
+            {arrow}
+          </div>
+          <div className="text-xs text-blue-100 text-center">
+            {t('leaderboard_avgScoreLabel')}
+          </div>
+        </div>
+      );
+    }
+    if (col === 'model') {
+      return (
+        <div onClick={() => handleSort(col)} className="cursor-pointer">
+          {baseLabel}
+          {arrow}
+        </div>
+      );
+    }
+    let metricText = '';
+    const sample = entries[0];
+    if (sample && sample.results) {
+      const p = Object.entries(sample.results).find(
+        ([raw]) => normalizeBenchmarkName(raw) === col
+      );
+      if (p) {
+        const grp = Object.values(p[1])[0];
+        if (grp) {
+          const metrics = Object.keys(grp)
+            .filter((m) => allowedMetrics.includes(m.toLowerCase()));
+          if (metrics.length > 0) {
+            metricText = ` (${metrics.join(', ')})`;
+          }
+        }
+      }
+    }
+    return (
+      <div onClick={() => handleSort(col)} className="cursor-pointer">
+        {baseLabel}
+        {arrow}
+        {metricText}
+      </div>
+    );
+  };
+  if (loading) {
+    return (
+      <div className="space-y-8">
+        <h3 className="text-2xl font-semibold text-gray-900 mb-4 border-l-4 border-blue-600 pl-4">
+          {t('leaderboard_title')}
+        </h3>
+        <div className="overflow-auto">
+          <div className="animate-pulse space-y-3">
+            <div className="h-10 bg-blue-100 rounded w-full" />
+            {[...Array(8)].map((_, i) => (
+              <div key={i} className="h-8 bg-gray-100 rounded w-full" />
+            ))}
+          </div>
+        </div>
+      </div>
+    );
+  }
+  if (error) {
+    return (
+      <div className="space-y-8">
+        <h3 className="text-2xl font-semibold text-gray-900 mb-4 border-l-4 border-blue-600 pl-4">
+          {t('leaderboard_title')}
+        </h3>
+        <div className="text-center py-12">
+          <p className="text-red-600 text-lg">{t('leaderboard_errorMessage')}</p>
+        </div>
+      </div>
+    );
+  }
+  return (
+    <div className="space-y-8">
+        <h3 className="text-2xl font-semibold text-gray-900 mb-4 border-l-4 border-blue-600 pl-4">
+        {t('leaderboard_title')}</h3>
+      <div className="overflow-auto">
+        <table className="min-w-full border-collapse">
+          <thead>
+            <tr>
+              {['model', 'overall', ...benchmarks].map((b) => (
+                <th
+                  key={b}
+                  className="border border-gray-300 px-2 py-1 bg-blue-600 text-left text-sm font-semibold text-white"
+                >
+                  {renderHeader(b)}
+                </th>
+              ))}
+            </tr>
+          </thead>
+          <tbody>
+            {paginatedEntries.map((entry) => (
+              <tr
+                key={entry.submission_id}
+                className="bg-white hover:bg-gray-50 cursor-pointer"
+                onClick={() => setSelectedEntry(entry)}
+              >
+                <td className="border border-gray-300 px-2 py-1 font-medium text-blue-600">
+                  {entry.display_name}
+                </td>
+                <td className="border border-gray-300 px-2 py-1 text-center text-black font-bold">
+                  {entry.averageScore == null
+                    ? t('leaderboard_notSpecified')
+                    : (entry.averageScore * 100).toFixed(1) + '%'}
+                </td>
+                {benchmarks.map((b) => {
+                  const val = getCellValue(entry, b);
+                  return (
+                    <td
+                      key={b}
+                      className="border border-gray-200 px-2 py-1 text-center text-gray-800"
+                      title={val == null ? t('leaderboard_notSpecifiedTooltip') : undefined}
+                    >
+                      {val == null
+                        ? <span className="text-gray-400 italic">{t('leaderboard_notSpecified')}</span>
+                        : (val * 100).toFixed(1) + '%'}
+                    </td>
+                  );
+                })}
+              </tr>
+            ))}
+          </tbody>
+        </table>
+      </div>
+      {totalPages > 1 && (
+        <div className="flex justify-center items-center gap-4 py-4">
+          <button
+            onClick={() => setCurrentPage((p) => Math.max(1, p - 1))}
+            disabled={currentPage === 1}
+            className="px-3 py-1 rounded bg-blue-600 text-white disabled:opacity-40 hover:bg-blue-700 transition"
+          >
+            &laquo;
+          </button>
+          <span className="text-gray-700 text-sm">
+            {currentPage} / {totalPages}
+          </span>
+          <button
+            onClick={() => setCurrentPage((p) => Math.min(totalPages, p + 1))}
+            disabled={currentPage === totalPages}
+            className="px-3 py-1 rounded bg-blue-600 text-white disabled:opacity-40 hover:bg-blue-700 transition"
+          >
+            &raquo;
+          </button>
+        </div>
+      )}
+      {selectedEntry && (
+        <div
+          className="fixed inset-0 flex items-center justify-center bg-black bg-opacity-50"
+          onClick={(e) => { if (e.target === e.currentTarget) setSelectedEntry(null); }}
+          onKeyDown={(e) => { if (e.key === 'Escape') setSelectedEntry(null); }}
+          role="dialog"
+          aria-modal="true"
+          tabIndex={-1}
+        >
+          <div className="bg-white p-6 rounded-2xl shadow-lg max-w-2xl w-full mx-4 max-h-[80vh] overflow-y-auto">
+            <h3 className="text-xl font-semibold text-gray-800 mb-4">
+              {t('leaderboard_modalTitle', {
+                name: selectedEntry.display_name,
+              })}
+            </h3>
+            {Object.entries(selectedEntry.results || {}).map(
+              ([taskKey, metricsObj]) => {
+                const prettyName = taskKey.split('|')[1] || taskKey;
+                const [metricType, values] = Object.entries(metricsObj)[0];
+                return (
+                  <div key={taskKey} className="mb-4">
+                    <h4 className="font-medium text-blue-700">
+                      {prettyName}
+                    </h4>
+                    <ul className="list-disc list-inside text-gray-700">
+                      {Object.entries(values)
+                        .filter(([k]) => !k.endsWith('_warning'))
+                        .map(([metricKey, value]) => (
+                          <li key={metricKey}>
+                            <strong>{metricKey.replace(/_/g, ' ')}</strong>:{' '}
+                            {typeof value === 'number'
+                              ? (value > 1
+                                ? value.toFixed(1) + '%'
+                                : (value * 100).toFixed(1) + '%')
+                              : value}
+                          </li>
+                        ))}
+                    </ul>
+                    {values[`${metricType}_warning`] && (
+                      <p className="text-sm text-yellow-700 mt-2">
+                        ⚠️ {values[`${metricType}_warning`]}
+                      </p>
+                    )}
+                  </div>
+                );
+              }
+            )}
+            <button
+              className="mt-4 px-4 py-2 bg-gray-200 rounded-full hover:bg-gray-300"
+              onClick={() => setSelectedEntry(null)}
+            >
+              {t('leaderboard_closeButton')}
+            </button>
+          </div>
+        </div>
+      )}
+    </div>
+  );
+}

frontend/src/app/leaderboard/util.js ADDED Viewed

	@@ -0,0 +1,47 @@

+export const normalizeBenchmarkName = (name) => {
+  const parts = name.toLowerCase().split("|");
+  if (parts.length >= 2) return parts[1].replace(/-/g, "_");
+  return name.toLowerCase();
+};
+export const computeAverageScore = (entry) => {
+  const allowedMetrics = [
+    "acc",
+    "accuracy",
+    "f1",
+    "exact_match",
+    "fquad",
+    "pearson",
+    "pearsonr",
+    "spearman",
+  ];
+  const perTaskAverages = [];
+  Object.values(entry.results || {}).forEach((taskData) => {
+    if (taskData && typeof taskData === "object") {
+      Object.values(taskData).forEach((metricGroup) => {
+        if (metricGroup && typeof metricGroup === "object") {
+          const taskMetrics = Object.entries(metricGroup)
+            .filter(([metric]) => allowedMetrics.includes(metric.toLowerCase()))
+            .map(([, value]) =>
+              typeof value === "number" ? value : null
+            )
+            .filter((v) => v !== null);
+          if (taskMetrics.length > 0) {
+            const normalized = taskMetrics.map((v) => v > 1 ? v / 100 : v);
+            const taskAvg = normalized.reduce((a, b) => a + b, 0) / normalized.length;
+            perTaskAverages.push(taskAvg);
+          }
+        }
+      });
+    }
+  });
+  if (perTaskAverages.length === 0) return null;
+  return perTaskAverages.reduce((a, b) => a + b, 0) / perTaskAverages.length;
+};

frontend/src/app/page.js ADDED Viewed

	@@ -0,0 +1,74 @@

+'use client'
+import Link from "next/link";
+import { Trans } from 'react-i18next';
+import { useTranslation } from 'react-i18next';
+export default function Home() {
+  const { t } = useTranslation();
+  return (
+    <div className="max-w-5xl mx-auto px-6 py-3">
+      <h2 className="text-3xl font-bold text-center text-blue-700 border-b pb-4 mb-10">
+        {t('home_whatIsColleTitle')}
+      </h2>
+      <p className="text-gray-700 mb-4 leading-relaxed space-y-4">
+        <Trans i18nKey="home_paragraph1">
+          COLE is a multidisciplinary French Natural Language Understanding benchmark (
+          <a
+            href="https://en.wikipedia.org/wiki/Natural_language_understanding"
+            target="_blank"
+            rel="noopener noreferrer"
+            className="text-blue-600 underline hover:text-blue-800"
+          >
+            NLU
+          </a>
+          ). It takes inspiration from its predecessors&nbsp;
+          <a
+            href="https://gluebenchmark.com/"
+            target="_blank"
+            rel="noopener noreferrer"
+            className="text-blue-600 underline hover:text-blue-800"
+          >
+            GLUE
+          </a>
+          &nbsp;and&nbsp;
+          <a
+            href="https://super.gluebenchmark.com/"
+            target="_blank"
+            rel="noopener noreferrer"
+            className="text-blue-600 underline hover:text-blue-800"
+          >
+            SuperGLUE
+          </a>
+          &nbsp;to build a benchmark capable of evaluating models in the French language on multiple topics of language understanding. See&nbsp;
+          <Link
+            href="https://arxiv.org/abs/2510.05046"
+            className="text-blue-600 underline hover:text-blue-800"
+          >
+            our paper
+          </Link>
+          &nbsp;for more information.
+        </Trans>
+      </p>
+      <p className="text-gray-700 leading-relaxed">
+        {t('home_paragraph2')}
+      </p>
+      <p className="text-gray-700 leading-relaxed mt-4">
+        <Trans i18nKey="home_paragraph3">
+          We have made the choice to hide test labels to discourage cheating or overfitting on test data. To get results on your test data, you may send us your results as explained in&nbsp;
+          <Link
+            href="/guide"
+            className="text-blue-600 underline hover:text-blue-800"
+          >
+            our guide
+          </Link>
+          .
+        </Trans>
+      </p>
+    </div>
+  );
+}

frontend/src/app/papers/page.js ADDED Viewed

	@@ -0,0 +1,47 @@

+'use client';
+import React, { useState } from 'react';
+import '../i18n';
+import { useTranslation } from 'react-i18next';
+export default function PapersPage() {
+  const [loaded, setLoaded] = useState(false);
+  const { t } = useTranslation();
+  return (
+    <div className="relative h-screen">
+      {!loaded && (
+        <div className="absolute inset-0 flex items-center justify-center bg-white z-10">
+          <div className="animate-spin h-12 w-12 border-4 border-blue-600 border-t-transparent rounded-full" />
+        </div>
+      )}
+      <div className="max-w-5xl mx-auto px-6 py-3">
+        <h2 className="text-3xl font-bold text-center text-blue-700 border-b pb-4 mb-6">
+          {t('papers_title')}
+        </h2>
+        <div className="mb-6 space-y-2">
+          <p className="text-gray-700">
+            <a
+              href="https://arxiv.org/abs/2510.05046"
+              target="_blank"
+              rel="noopener noreferrer"
+              className="text-blue-600 underline hover:text-blue-800 font-medium"
+            >
+              {t('papers_arxiv_label')}
+            </a>
+            {' '}&mdash; {t('papers_arxiv_authors')}
+          </p>
+        </div>
+      </div>
+      <iframe
+        onLoad={() => setLoaded(true)}
+        src="https://arxiv.org/pdf/2510.05046"
+        title="Document COLE"
+        width="100%"
+        height="100%"
+        className="border-none"
+      />
+    </div>
+  );
+}

frontend/src/app/resources/ResourcesPaths.js ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ const BACKEND_ADDRESS = "/api"
2	+ export {BACKEND_ADDRESS}

frontend/src/app/results/[id]/page.js ADDED Viewed

	@@ -0,0 +1,129 @@

+'use client';
+import '../../i18n';
+import { useTranslation } from 'react-i18next';
+import React, { useEffect, useState } from 'react';
+import { useParams } from 'next/navigation';
+import { BACKEND_ADDRESS } from '@/app/resources/ResourcesPaths';
+export default function ResultsPage() {
+  const { t } = useTranslation();
+  const { id: submissionId } = useParams();
+  const [data, setData] = useState(null);
+  // Noms de métriques fixes en anglais
+  const metricLabel = {
+    accuracy: 'Accuracy',
+    exact_match: 'Exact Match',
+    f1: 'F1 Score',
+    pearsonr: 'Pearson Correlation',
+  };
+  const getReadableMetricName = (metricKey) =>
+    metricLabel[metricKey] ||
+    metricKey.replace(/_/g, ' ').replace(/\b\w/g, (c) => c.toUpperCase());
+  useEffect(() => {
+    fetch(`${BACKEND_ADDRESS}/results/${submissionId}.json`)
+      .then((res) => {
+        if (!res.ok) throw new Error(`HTTP ${res.status}`);
+        return res.json();
+      })
+      .then(setData)
+      .catch(() => setData({ error: true }));
+  }, [submissionId]);
+  const handleDownload = async () => {
+    if (!data) return;
+    try {
+      const res = await fetch(`${BACKEND_ADDRESS}/results/${submissionId}.json`);
+      if (!res.ok) throw new Error(`HTTP ${res.status}`);
+      const blob = await res.blob();
+      const url = URL.createObjectURL(blob);
+      const link = document.createElement('a');
+      link.href = url;
+      link.download = `${submissionId}.json`;
+      document.body.appendChild(link);
+      link.click();
+      document.body.removeChild(link);
+      URL.revokeObjectURL(url);
+    } catch {
+      console.error('Download failed');
+    }
+  };
+  if (!data) {
+    return (
+      <main className="max-w-5xl mx-auto px-6 py-6 text-center">
+        <p className="text-gray-600">{t('results_loading')}</p>
+      </main>
+    );
+  }
+  const tasksArray = data.tasks || [];
+  const displayName = data.display_name || data.config_general?.display_name;
+  return (
+    <main className="max-w-5xl mx-auto px-6 py-6">
+      <h2 className="text-2xl font-bold text-center mb-4">
+        {t('results_page_title', { displayName })}
+      </h2>
+      <div className="flex justify-center mb-6">
+        <button
+          onClick={handleDownload}
+          className="px-4 py-2 bg-blue-600 text-white rounded-lg shadow hover:bg-blue-700 transition"
+        >
+          {t('results_download')}
+        </button>
+      </div>
+      {tasksArray.length === 0 ? (
+        <p className="text-blue-700 text-center">
+          {t('results_no_results')}
+        </p>
+      ) : (
+        <div className="space-y-6">
+          {tasksArray.map((taskObj) => {
+            const [taskName, metricsObj] = Object.entries(taskObj)[0];
+            const [metricType, metricValues] = Object.entries(metricsObj)[0];
+            const prettyName = taskName.split('|')[1] || taskName;
+            const warningKey = `${metricType}_warning`;
+            return (
+              <div
+                key={taskName}
+                className="p-5 border border-purple-400 rounded-xl shadow-md bg-white"
+              >
+                <h3 className="text-xl font-semibold text-blue-700 mb-3">
+                  {t('results_benchmark_label', { name: prettyName })}
+                </h3>
+                <ul className="list-disc ml-6 text-gray-700">
+                  {Object.entries(metricValues)
+                    .filter(([k]) => !k.endsWith('_warning'))
+                    .map(([metricKey, value]) => (
+                      <li key={metricKey}>
+                        <strong>{getReadableMetricName(metricKey)}</strong>:{' '}
+                        {typeof value === 'number' ? (
+                          (metricKey === 'exact_match' || metricKey === 'f1'
+                            ? value
+                            : value * 100
+                          ).toFixed(1) + '%'
+                        ) : (
+                          value
+                        )}
+                      </li>
+                    ))}
+                </ul>
+                {metricValues[warningKey] && (
+                  <p className="text-sm text-yellow-700 mt-2">
+                    ⚠️ {metricValues[warningKey]}
+                  </p>
+                )}
+              </div>
+            );
+          })}
+        </div>
+      )}
+    </main>
+  );
+}

frontend/src/app/results/page.js ADDED Viewed

	@@ -0,0 +1,31 @@

+'use client';
+import '../i18n'
+import { useEffect } from 'react';
+import { useRouter } from 'next/navigation';
+import { useTranslation } from 'react-i18next';
+export default function ResultsDefaultPage() {
+  const router = useRouter();
+  const { t } = useTranslation();
+  useEffect(() => {
+    const justSubmitted = localStorage.getItem('just_submitted');
+    const savedFile = localStorage.getItem('last_result_file');
+    if (justSubmitted && savedFile) {
+      const id = savedFile.replace('.json', '');
+      localStorage.removeItem('just_submitted');
+      router.push(`/results/${id}`);
+    }
+  }, [router]);
+  return (
+    <main className="max-w-2xl mx-auto px-6 py-12 text-center">
+      <h1 className="text-3xl font-bold text-blue-700 mb-4">
+        {t('results_default_title')}
+      </h1>
+      <p className="text-gray-700">{t('results_default_message')}</p>
+    </main>
+  );
+}

nginx.conf ADDED Viewed

	@@ -0,0 +1,34 @@

+worker_processes 1;
+events { worker_connections 1024; }
+pid /tmp/nginx.pid;
+http {
+  server {
+    listen 7860;
+    # Security headers (X-Frame-Options omitted: HF Spaces embeds via iframe)
+    add_header X-Content-Type-Options "nosniff" always;
+    add_header Referrer-Policy "strict-origin-when-cross-origin" always;
+    location /api/ {
+      proxy_pass http://127.0.0.1:8000/;
+      proxy_http_version 1.1;
+      proxy_set_header Host $host;
+      proxy_set_header X-Real-IP $remote_addr;
+      proxy_connect_timeout       600s;
+      proxy_send_timeout          600s;
+      proxy_read_timeout          600s;
+      send_timeout                600s;
+    }
+    location / {
+      proxy_pass http://127.0.0.1:8001;
+      proxy_http_version 1.1;
+      proxy_set_header Host $host;
+      proxy_set_header X-Real-IP $remote_addr;
+      proxy_connect_timeout       600s;
+      proxy_send_timeout          600s;
+      proxy_read_timeout          600s;
+      send_timeout                600s;
+    }
+  }
+}

pytest.ini ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ [pytest]
2	+ testpaths = tests

src/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+REPO_ID = "COLE-Graal/COLEGraal"
+cole = "COLE-final"
+boreal = "COLE-final-boreal"
+complete = "COLE-finale-complete"
+comparison = "Fr-comparison"
+NA_VALUE = -1

src/backend/__init__.py ADDED Viewed

File without changes

src/backend/evaluation.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import copy
+import operator
+from functools import reduce
+from typing import List, Dict
+from src.task.task_factory import Task
+def compute_tasks_ratings(tasks: List[Task], submission: Dict) -> Dict:
+    """
+    Method to compute the tasks ratings.
+    :param tasks: list of tasks
+    :param submission: submission dictionary
+    """
+    # We merge the tasks dictionary for simpler handling.
+    submission_copy = copy.deepcopy(submission)
+    submission_response = reduce(operator.ior, submission_copy.get("tasks"), {})
+    for task in tasks:
+        task_name = task.task_name
+        # We remove the prediction since we do not keep it in the response.
+        predictions = submission_response.get(task_name).pop("predictions")
+        ratings, warning = task.compute(predictions=predictions)
+        ratings.update({f"{task.metric_name}_warning": warning})
+        submission_response.get(task_name).update({f"{task.metric_name}": ratings})
+    # Final submission response where we unwrap the merge tasks dictionary into a list of dictionary.
+    submission_response = {
+        "model_name": submission.get("model_name"),
+        "model_url": submission.get("model_url"),
+        "tasks": [{key: value} for key, value in submission_response.items()],
+    }
+    return submission_response

src/backend/results/leaderboard.json ADDED Viewed

The diff for this file is too large to render. See raw diff

src/backend/submission_api.py ADDED Viewed

	@@ -0,0 +1,250 @@

+import glob
+import json
+import logging
+import os
+import sys
+import uuid
+from contextlib import asynccontextmanager
+from datetime import datetime
+from functools import lru_cache
+from pathlib import Path
+from typing import Dict, List, Any, Union
+import huggingface_hub
+from fastapi import FastAPI, UploadFile, Form, File, HTTPException
+from fastapi.responses import JSONResponse
+from fastapi.staticfiles import StaticFiles
+from slowapi import Limiter
+from slowapi.util import get_remote_address
+from slowapi.errors import RateLimitExceeded
+from starlette.middleware.cors import CORSMiddleware
+from starlette.requests import Request
+from src.backend.evaluation import compute_tasks_ratings
+from src.backend.submit_tools import unzip_predictions_from_zip
+from src.dataset.datasets_data import preload_all_datasets
+from src.backend.validation_tools import (
+    validate_submission_tasks_name,
+    validate_submission_json,
+    validate_submission_template,
+)
+from src.task.task import Task
+from src.task.task_factory import (
+    tasks_factory,
+)
+MAX_ZIP_SIZE_MB = 50
+BASE_DIR = Path(__file__).resolve().parents[2]
+SRC_DIR = BASE_DIR / "src"
+sys.path.insert(0, str(SRC_DIR))
+RESULTS_DIR = BASE_DIR / "src" / "backend" / "results"
+RESULTS_DIR.mkdir(parents=True, exist_ok=True)
+FRONTEND_DIR = BASE_DIR / "frontend"
+@asynccontextmanager
+async def lifespan(application: FastAPI = None):  # pylint: disable=unused-argument
+    """Called before the backend comes online, is used to load datasets in memory."""
+    # Load the ML model
+    try:
+        token = os.environ.get("HF_TOKEN")
+        huggingface_hub.login(token=token)
+        preload_all_datasets()
+    except Exception as e:
+        error_message = f"The datasets could not be loaded : {e}"
+        logging.critical(error_message)
+    yield
+limiter = Limiter(key_func=get_remote_address)
+app = FastAPI(lifespan=lifespan)
+app.state.limiter = limiter
+app.add_exception_handler(
+    RateLimitExceeded,
+    lambda req, exc: JSONResponse(
+        status_code=429,
+        content={"detail": "Too many submissions. Please try again later."},
+    ),
+)
+app.mount("/results", StaticFiles(directory=str(RESULTS_DIR)), name="results")
+front_end_info_message = f"The Front-end directory is: {FRONTEND_DIR}"
+logging.info(front_end_info_message)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+@app.post("/submit")
+@limiter.limit("5/minute")
+async def submit(
+    request: Request,  # pylint: disable=unused-argument  # required by slowapi limiter
+    email: str = Form(...),
+    predictions_zip: UploadFile = File(...),
+    display_name: str = Form(...),
+):
+    """Route for making submissions with user generated results.
+    :param request : The incoming request (used for rate limiting)
+    :param email : The email of the user's submission
+    :param predictions_zip : The zip file of the user's predictions'
+    :param display_name : The display name associated with the user's submission'
+    """
+    logging.info("Starting submission")
+    if len(display_name) > 200:
+        raise HTTPException(
+            status_code=400, detail="Display name must be under 200 characters."
+        )
+    if len(email) > 320 or "@" not in email:
+        raise HTTPException(status_code=400, detail="Invalid email address.")
+    info_message = f"Submission from {email!r} as {display_name!r}."
+    logging.info(info_message)
+    zip_bytes = await predictions_zip.read()
+    if len(zip_bytes) > MAX_ZIP_SIZE_MB * 1024 * 1024:
+        raise HTTPException(
+            status_code=413, detail=f"ZIP file exceeds {MAX_ZIP_SIZE_MB}MB limit."
+        )
+    submission_json = unzip_predictions_from_zip(zip_bytes)
+    validate_submission_template(submission_json)
+    validate_submission_tasks_name(submission_json)
+    validate_submission_json(submission_json)
+    tasks: List[Task] = tasks_factory(submission_json)
+    logging.info("Computation started")
+    start = datetime.now()
+    submission_response = compute_tasks_ratings(tasks=tasks, submission=submission_json)
+    computation_time = datetime.now() - start
+    info_message = f"Computation ended in {computation_time}"
+    logging.info(info_message)
+    submission_id = str(uuid.uuid4())
+    submission_response.update(
+        {
+            "display_name": display_name,
+            "email": email,
+            "submission_id": submission_id,
+        }
+    )
+    out_path = RESULTS_DIR / f"{submission_id}.json"
+    with open(out_path, "w", encoding="utf-8") as f:
+        json.dump(submission_response, f, ensure_ascii=False, indent=2)
+    get_leaderboard_entries.cache_clear()
+    return JSONResponse(content=submission_response)
+@lru_cache(maxsize=1)
+def get_leaderboard_entries() -> List[Dict[str, Any]]:
+    """Returns all entries currently in the leaderboard.
+    Supporte aussi les fichiers JSON qui contiennent une LISTE d'entrées
+    et normalise les métriques 'plates' en groupes imbriqués pour le front.
+    """
+    def _wrap_flat_metrics(task_payload: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Si task_payload est 'plat' (ex: {"accuracy": 94.2}),
+        on le transforme en {"<group>": {...}} pour que le front puisse l'agréger.
+        Règles de nommage du groupe :
+          - présence de exact_match/f1 -> "fquad"
+          - sinon présence de acc/accuracy -> "accuracy"
+          - sinon présence de pearson/pearsonr/spearman -> "correlation"
+          - sinon -> "metrics"
+        Les valeurs >1 sont laissées telles quelles (le front normalise déjà % -> [0,1]).
+        """
+        if not isinstance(task_payload, dict):
+            return task_payload
+        # si c'est déjà "imbriqué" (une valeur est un dict), on ne touche pas
+        if any(isinstance(v, dict) for v in task_payload.values()):
+            return task_payload
+        keys = set(k.lower() for k in task_payload.keys())
+        if {"exact_match", "f1"} & keys:
+            group = "fquad"
+        elif {"accuracy", "acc"} & keys:
+            group = "accuracy"
+        elif {"pearson", "pearsonr", "spearman"} & keys:
+            group = "correlation"
+        else:
+            group = "metrics"
+        # Rien de spécial pour les warnings ici : le front les considère optionnels
+        # et s'attend à "<group>_warning" dans l'objet interne si on veut en fournir.
+        return {group: task_payload}
+    entries: List[Dict[str, Any]] = []
+    for filepath in glob.glob(str(RESULTS_DIR / "*.json")):
+        try:
+            with open(filepath, encoding="utf-8") as f:
+                data = json.load(f)
+            # Fonction interne qui traite UNE entrée (dict) au bon format minimal
+            def process_entry(entry: Dict[str, Any]) -> Union[Dict[str, Any], None]:
+                if not isinstance(entry, dict):
+                    return None
+                if "model_name" not in entry or "tasks" not in entry:
+                    return None
+                # Re-construire "results" comme le front s'y attend
+                results = {}
+                for task_obj in entry.get("tasks", []):
+                    if not isinstance(task_obj, dict) or len(task_obj) != 1:
+                        continue
+                    task_name, payload = list(task_obj.items())[0]
+                    normalized = _wrap_flat_metrics(payload)
+                    results[task_name] = normalized
+                if not results:
+                    return None
+                return {
+                    "submission_id": entry.get("submission_id") or str(uuid.uuid4()),
+                    "display_name": entry.get("display_name")
+                    or entry.get("model_name")
+                    or "Unnamed Model",
+                    "email": entry.get("email", "N/A"),
+                    "results": results,
+                }
+            # Le fichier peut contenir UNE entrée (dict) ou PLUSIEURS (list)
+            if isinstance(data, list):
+                for item in data:
+                    processed = process_entry(item)
+                    if processed:
+                        entries.append(processed)
+            else:
+                processed = process_entry(data)
+                if processed:
+                    entries.append(processed)
+        except Exception as e:
+            logging_message = f"Error processing file '{filepath}': {e}"
+            logging.error(logging_message)
+            continue
+    return entries
+@app.get("/leaderboard")
+async def leaderboard() -> List[Dict[str, Any]]:
+    return get_leaderboard_entries()
+@app.get("/health")
+async def health_check():
+    return {"status": "healthy", "message": "API is running."}
+@app.get("/")
+async def home():
+    return {"status": "working"}