Spaces:

NASK-PIB
/

RefusEU

Running

App Files Files Community

RefusEU / index.html

WojciechKusa

Update index.html

a437c26 verified about 1 month ago

Raw

History Blame Contribute Delete

29.6 kB

	<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="UTF-8" />
	<meta name="viewport" content="width=device-width, initial-scale=1.0"/>
	<title>Multilingual Refusal Alignment for Safer LLMs</title>
	<link rel="preconnect" href="https://fonts.googleapis.com">
	<link href="https://fonts.googleapis.com/css2?family=Source+Serif+4:ital,opsz,wght@0,8..60,300;0,8..60,400;0,8..60,600;0,8..60,700;1,8..60,300;1,8..60,400&family=DM+Sans:ital,wght@0,300;0,400;0,500;0,600;1,300&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
	<style>
	:root {
	--bg: #faf9f7;
	--surface: #ffffff;
	--border: #e8e4de;
	--text-primary: #1a1714;
	--text-secondary: #5c5650;
	--text-muted: #8c867e;
	--accent: #c0392b;
	--accent-light: #fdf0ef;
	--accent2: #2c5f8a;
	--accent2-light: #edf3f9;
	--tag-bg: #f0ede8;
	--shadow: 0 2px 16px rgba(26,23,20,0.07);
	--shadow-lg: 0 8px 40px rgba(26,23,20,0.11);
	--radius: 8px;
	--serif: 'Source Serif 4', Georgia, serif;
	--sans: 'DM Sans', system-ui, sans-serif;
	--mono: 'JetBrains Mono', monospace;
	}

	, ::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }

	html { scroll-behavior: smooth; }

	body {
	font-family: var(--sans);
	background: var(--bg);
	color: var(--text-primary);
	line-height: 1.7;
	-webkit-font-smoothing: antialiased;
	}

	/* ── HERO ─────────────────────────────────────────── */
	.hero {
	background: var(--surface);
	border-bottom: 1px solid var(--border);
	padding: 80px 24px 64px;
	text-align: center;
	}

	.venue-badge {
	display: inline-flex;
	align-items: center;
	gap: 6px;
	background: var(--accent-light);
	color: var(--accent);
	border: 1px solid rgba(192,57,43,0.2);
	border-radius: 20px;
	padding: 5px 14px;
	font-size: 0.78rem;
	font-weight: 600;
	letter-spacing: 0.04em;
	text-transform: uppercase;
	margin-bottom: 28px;
	}


	.hero h1 {
	font-family: var(--serif);
	font-size: clamp(1.7rem, 4vw, 2.6rem);
	font-weight: 600;
	line-height: 1.25;
	max-width: 780px;
	margin: 0 auto 8px;
	color: var(--text-primary);
	letter-spacing: -0.01em;
	}

	.hero h1 em {
	font-style: italic;
	color: var(--accent);
	}

	.authors {
	margin: 28px auto 0;
	max-width: 660px;
	font-size: 0.95rem;
	color: var(--text-secondary);
	}

	.authors .name-list {
	display: flex;
	flex-wrap: wrap;
	justify-content: center;
	gap: 4px 18px;
	font-weight: 500;
	color: var(--text-primary);
	margin-bottom: 6px;
	}

	.authors .name-list sup {
	font-size: 0.65em;
	color: var(--accent);
	font-weight: 600;
	}

	.affil {
	font-size: 0.82rem;
	color: var(--text-muted);
	line-height: 1.6;
	}

	/* ── LINKS ROW ───────────────────────────────────── */
	.links-row {
	display: flex;
	flex-wrap: wrap;
	justify-content: center;
	gap: 10px;
	margin-top: 32px;
	}

	.btn {
	display: inline-flex;
	align-items: center;
	gap: 7px;
	padding: 9px 18px;
	border-radius: 6px;
	font-family: var(--sans);
	font-size: 0.84rem;
	font-weight: 500;
	text-decoration: none;
	transition: all 0.18s ease;
	border: 1px solid transparent;
	cursor: pointer;
	}

	.btn-primary {
	background: var(--text-primary);
	color: #fff;
	}
	.btn-primary:hover { background: #333; transform: translateY(-1px); box-shadow: var(--shadow); }

	.btn-outline {
	background: var(--surface);
	color: var(--text-primary);
	border-color: var(--border);
	}
	.btn-outline:hover { border-color: var(--text-primary); transform: translateY(-1px); box-shadow: var(--shadow); }

	.btn-dataset {
	background: var(--accent2-light);
	color: var(--accent2);
	border-color: rgba(44,95,138,0.2);
	}
	.btn-dataset:hover { background: #dbe9f5; transform: translateY(-1px); }

	.btn svg { width: 15px; height: 15px; flex-shrink: 0; }

	/* ── LAYOUT ──────────────────────────────────────── */
	.container {
	max-width: 860px;
	margin: 0 auto;
	padding: 0 24px;
	}

	section {
	padding: 64px 24px;
	max-width: 860px;
	margin: 0 auto;
	}

	.section-label {
	font-size: 0.72rem;
	font-weight: 600;
	letter-spacing: 0.1em;
	text-transform: uppercase;
	color: var(--accent);
	margin-bottom: 12px;
	}

	h2 {
	font-family: var(--serif);
	font-size: clamp(1.25rem, 2.5vw, 1.6rem);
	font-weight: 600;
	margin-bottom: 20px;
	color: var(--text-primary);
	letter-spacing: -0.01em;
	}

	p {
	color: var(--text-secondary);
	font-size: 0.97rem;
	line-height: 1.8;
	margin-bottom: 14px;
	}

	p:last-child { margin-bottom: 0; }

	/* ── ABSTRACT ────────────────────────────────────── */
	.abstract-section {
	border-bottom: 1px solid var(--border);
	}

	.abstract-card {
	background: var(--surface);
	border: 1px solid var(--border);
	border-radius: 12px;
	padding: 32px 36px;
	box-shadow: var(--shadow);
	}

	/* ── HIGHLIGHTS ──────────────────────────────────── */
	.highlights-grid {
	display: grid;
	grid-template-columns: repeat(auto-fit, minmax(240px, 1fr));
	gap: 16px;
	margin-top: 8px;
	}

	.highlight-card {
	background: var(--surface);
	border: 1px solid var(--border);
	border-radius: 10px;
	padding: 22px 24px;
	box-shadow: var(--shadow);
	transition: box-shadow 0.2s, transform 0.2s;
	position: relative;
	overflow: hidden;
	}

	.highlight-card::before {
	content: '';
	position: absolute;
	top: 0; left: 0; right: 0;
	height: 3px;
	}
	.highlight-card:nth-child(1)::before { background: var(--accent); }
	.highlight-card:nth-child(2)::before { background: var(--accent2); }
	.highlight-card:nth-child(3)::before { background: #2a7a4e; }
	.highlight-card:nth-child(4)::before { background: #7a4a2a; }

	.highlight-card:hover { box-shadow: var(--shadow-lg); transform: translateY(-2px); }

	.highlight-icon {
	font-size: 1.5rem;
	margin-bottom: 10px;
	display: block;
	}

	.highlight-title {
	font-weight: 600;
	font-size: 0.9rem;
	color: var(--text-primary);
	margin-bottom: 6px;
	}

	.highlight-card p {
	font-size: 0.85rem;
	line-height: 1.65;
	margin: 0;
	}

	/* ── DATASET ─────────────────────────────────────── */
	.dataset-section {
	background: var(--surface);
	border-top: 1px solid var(--border);
	border-bottom: 1px solid var(--border);
	padding: 64px 24px;
	}

	.dataset-inner {
	max-width: 860px;
	margin: 0 auto;
	}

	.dataset-stats {
	display: grid;
	grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
	gap: 16px;
	margin: 28px 0;
	}

	.stat-box {
	background: var(--bg);
	border: 1px solid var(--border);
	border-radius: 10px;
	padding: 20px;
	text-align: center;
	}

	.stat-number {
	font-family: var(--serif);
	font-size: 2rem;
	font-weight: 600;
	color: var(--accent);
	display: block;
	line-height: 1.1;
	}

	.stat-label {
	font-size: 0.78rem;
	color: var(--text-muted);
	margin-top: 4px;
	display: block;
	font-weight: 500;
	}

	.lang-pills {
	display: flex;
	flex-wrap: wrap;
	gap: 7px;
	margin-top: 16px;
	}

	.lang-pill {
	background: var(--tag-bg);
	color: var(--text-secondary);
	border-radius: 5px;
	padding: 3px 10px;
	font-size: 0.78rem;
	font-weight: 500;
	font-family: var(--mono);
	}

	/* ── RESEARCH QUESTIONS ──────────────────────────── */
	.rq-list {
	list-style: none;
	display: flex;
	flex-direction: column;
	gap: 14px;
	margin-top: 8px;
	}

	.rq-item {
	display: flex;
	gap: 16px;
	background: var(--surface);
	border: 1px solid var(--border);
	border-radius: 10px;
	padding: 18px 20px;
	box-shadow: var(--shadow);
	}

	.rq-tag {
	flex-shrink: 0;
	background: var(--text-primary);
	color: #fff;
	border-radius: 6px;
	padding: 2px 9px;
	font-size: 0.72rem;
	font-weight: 700;
	letter-spacing: 0.04em;
	height: fit-content;
	margin-top: 2px;
	}

	.rq-text {
	font-size: 0.92rem;
	color: var(--text-secondary);
	line-height: 1.7;
	margin: 0;
	}

	/* ── IMAGE PLACEHOLDER ───────────────────────────── */
	.figure-block {
	background: var(--surface);
	border: 1.5px dashed var(--border);
	border-radius: 10px;
	padding: 0;
	overflow: hidden;
	margin: 24px 0;
	}

	.figure-block img {
	width: 100%;
	height: auto;
	display: block;
	}

	.figure-caption {
	padding: 10px 16px 12px;
	font-size: 0.8rem;
	color: var(--text-muted);
	border-top: 1px solid var(--border);
	background: var(--bg);
	font-style: italic;
	}

	/* ── FINDINGS ────────────────────────────────────── */
	.finding-list {
	list-style: none;
	display: flex;
	flex-direction: column;
	gap: 12px;
	margin-top: 8px;
	}

	.finding-item {
	display: flex;
	gap: 14px;
	align-items: flex-start;
	padding: 16px 20px;
	background: var(--surface);
	border: 1px solid var(--border);
	border-radius: 9px;
	box-shadow: var(--shadow);
	}

	.finding-dot {
	width: 8px;
	height: 8px;
	border-radius: 50%;
	background: var(--accent);
	flex-shrink: 0;
	margin-top: 8px;
	}

	.finding-item:nth-child(2) .finding-dot { background: var(--accent2); }
	.finding-item:nth-child(3) .finding-dot { background: #2a7a4e; }
	.finding-item:nth-child(4) .finding-dot { background: #7a5f2a; }
	.finding-item:nth-child(5) .finding-dot { background: #6a2a7a; }

	.finding-text {
	font-size: 0.9rem;
	color: var(--text-secondary);
	line-height: 1.7;
	margin: 0;
	}

	.finding-text strong { color: var(--text-primary); }

	/* ── BIBTEX ──────────────────────────────────────── */
	.bibtex-section {
	background: var(--surface);
	border-top: 1px solid var(--border);
	padding: 64px 24px;
	}

	.bibtex-inner {
	max-width: 860px;
	margin: 0 auto;
	}

	.code-block {
	background: #1d1b19;
	color: #d4cfc9;
	border-radius: 10px;
	padding: 24px 28px;
	font-family: var(--mono);
	font-size: 0.78rem;
	line-height: 1.8;
	overflow-x: auto;
	margin-top: 16px;
	position: relative;
	}

	.code-key { color: #e09a6a; }
	.code-val { color: #8bbf7a; }
	.code-brace { color: #888; }

	.copy-btn {
	position: absolute;
	top: 12px; right: 14px;
	background: rgba(255,255,255,0.08);
	border: 1px solid rgba(255,255,255,0.12);
	color: #aaa;
	padding: 4px 11px;
	border-radius: 5px;
	font-size: 0.72rem;
	cursor: pointer;
	font-family: var(--sans);
	transition: all 0.15s;
	}
	.copy-btn:hover { background: rgba(255,255,255,0.14); color: #fff; }

	/* ── FOOTER ──────────────────────────────────────── */
	footer {
	background: var(--bg);
	border-top: 1px solid var(--border);
	text-align: center;
	padding: 28px 24px;
	font-size: 0.78rem;
	color: var(--text-muted);
	}

	footer a { color: var(--accent2); text-decoration: none; }
	footer a:hover { text-decoration: underline; }

	/* ── DIVIDER ─────────────────────────────────────── */
	.divider {
	height: 1px;
	background: var(--border);
	max-width: 860px;
	margin: 0 auto;
	}

	/* ── RESPONSIVE ──────────────────────────────────── */
	@media (max-width: 600px) {
	.abstract-card { padding: 22px 20px; }
	.links-row { gap: 8px; }
	section { padding: 48px 20px; }
	.dataset-section { padding: 48px 20px; }
	.bibtex-section { padding: 48px 20px; }
	}

	/* ── ANIMATIONS ──────────────────────────────────── */
	@keyframes fadeUp {
	from { opacity: 0; transform: translateY(18px); }
	to { opacity: 1; transform: translateY(0); }
	}

	.hero > * {
	animation: fadeUp 0.55s ease both;
	}
	.hero .venue-badge { animation-delay: 0.05s; }
	.hero h1 { animation-delay: 0.12s; }
	.hero .authors { animation-delay: 0.2s; }
	.hero .links-row { animation-delay: 0.28s; }
	</style>
	</head>
	<body>

	<!-- ═══ HERO ══════════════════════════════════════════ -->
	<header class="hero">
	<div class="venue-badge">Findings of ACL 2026</div>

	<h1>Multilingual Refusal Alignment<br>for Safer Large Language Models</h1>

	<div class="authors">
	<div class="name-list">
	<span>Aleksandra Krasnodębska<sup>†</sup></span>
	<span>Wojciech Kusa<sup>†</sup></span>
	<span>Aldo Lipani<sup>‡</sup></span>
	</div>
	<div class="affil">
	<sup>†</sup> NASK National Research Institute, Warsaw, Poland  ·
	<sup>‡</sup> University College London, London, UK<br>
	</div>
	</div>

	<div class="links-row">
	<a href="https://wojciechkusa.github.io/papers/RefusEU-2026.pdf" class="btn btn-primary">
	<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M14 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V8z"/><polyline points="14 2 14 8 20 8"/></svg>
	Paper
	</a>
	<a href="https://huggingface.co/datasets/NASK-PIB/RefusEU" class="btn btn-dataset" target="_blank">
	<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><ellipse cx="12" cy="5" rx="9" ry="3"/><path d="M21 12c0 1.66-4 3-9 3s-9-1.34-9-3"/><path d="M3 5v14c0 1.66 4 3 9 3s9-1.34 9-3V5"/></svg>
	RefusEU Dataset
	</a>
	<a href="#bibtex" class="btn btn-outline">
	<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M4 7V4h16v3"/><path d="M9 20h6"/><path d="M12 4v16"/></svg>
	BibTeX
	</a>
	</div>
	</header>


	<!-- ═══ ABSTRACT ══════════════════════════════════════ -->
	<section class="abstract-section">
	<div class="section-label">Abstract</div>
	<div class="abstract-card">
	<p>As Large Language Models are deployed globally, ensuring consistent safety across languages becomes paramount. Yet safety behaviors vary unpredictably between languages, and most alignment research remains English-centric — creating a critical gap for non-English speakers worldwide.</p>
	<p>We systematically investigate the dynamics of multilingual alignment: whether single-language alignment transfers cross-lingually, how language consistency is preserved during training, and the resulting trade-offs with general capabilities. We introduce <strong>RefusEU</strong>, a novel refusal alignment dataset covering <strong>12 European languages</strong>, including a held-out test set for evaluating state-of-the-art models.</p>
	<p>Our controlled Direct Preference Optimization (DPO) experiments reveal two key insights: aligning models exclusively in English is <em>insufficient</em> to ensure cross-lingual safety — even for identical harm categories. Conversely, training on multilingual datasets can improve safety <em>without degrading general performance</em>, as measured by the Global MMLU benchmark.</p>
	</div>
	</section>

	<div class="divider"></div>

	<!-- ═══ RESEARCH QUESTIONS ════════════════════════════ -->
	<section>
	<div class="section-label">Research Questions</div>
	<h2>What We Set Out to Answer</h2>
	<ul class="rq-list">
	<li class="rq-item">
	<span class="rq-tag">RQ1</span>
	<p class="rq-text">Do we need to perform multilingual alignment for each language on the same groups of prompts, or is training in a single language (English) sufficient to achieve cross-lingual safety?</p>
	</li>
	<li class="rq-item">
	<span class="rq-tag">RQ2</span>
	<p class="rq-text">How well is cross-lingual consistency — the ability to respond in the prompt's language — preserved during multilingual training, and how does it interact with safety?</p>
	</li>
	<li class="rq-item">
	<span class="rq-tag">RQ3</span>
	<p class="rq-text">How does multilingual safety alignment influence general multilingual capabilities, including factual knowledge, fluency, and linguistic correctness?</p>
	</li>
	</ul>
	</section>

	<div class="divider"></div>

	<!-- ═══ DATASET ═══════════════════════════════════════ -->
	<div class="dataset-section">
	<div class="dataset-inner">
	<div class="section-label">Dataset</div>
	<h2>Introducing RefusEU</h2>
	<p>RefusEU is the first European dataset designed for alignment training as DPO-ready triples — (question, chosen, rejected) — and includes a separate, contamination-free test split. Each chosen response is a high-quality refusal; each rejected response was generated by a safety-abliterated model.</p>

	<div class="dataset-stats">
	<div class="stat-box">
	<span class="stat-number">12</span>
	<span class="stat-label">European languages</span>
	</div>
	<div class="stat-box">
	<span class="stat-number">4k+</span>
	<span class="stat-label">pairs per language</span>
	</div>
	<div class="stat-box">
	<span class="stat-number">14</span>
	<span class="stat-label">harm categories</span>
	</div>
	<div class="stat-box">
	<span class="stat-number">1,400</span>
	<span class="stat-label">test samples / lang.</span>
	</div>
	</div>

	<p style="margin-bottom:10px;"><strong style="color: var(--text-primary);">Languages covered:</strong></p>
	<div class="lang-pills">
	<span class="lang-pill">English</span>
	<span class="lang-pill">German</span>
	<span class="lang-pill">French</span>
	<span class="lang-pill">Italian</span>
	<span class="lang-pill">Spanish</span>
	<span class="lang-pill">Portuguese</span>
	<span class="lang-pill">Polish</span>
	<span class="lang-pill">Czech</span>
	<span class="lang-pill">Slovak</span>
	<span class="lang-pill">Slovenian</span>
	<span class="lang-pill">Lithuanian</span>
	<span class="lang-pill">Latvian</span>
	</div>

	<p style="margin-top: 20px;">Questions are generated using an adversarial pipeline based on Rainbow Teaming across 10 attack styles and 14 crime categories (Llama-Guard taxonomy). A multi-model labelling protocol (Llama-Guard-3-8B, PolyGuard-Qwen, GPT-4o-mini) ensures label quality, with a manual audit confirming 100% accuracy across 1,200 sampled pairs.</p>

	<div class="figure-block" id="fig-dataset">
	<img src="img/dataset.png" alt="Dataset construction pipeline" class="figure-image" style="width: 50%; margin: 0 auto;" />
	<div class="figure-caption">Figure 1 — Dataset construction process: adversarial prompt generation → multilingual translation → dual-model safety labelling → DPO triple curation.</div>
	</div>
	</div>
	</div>

	<div class="divider"></div>

	<!-- ═══ METHODOLOGY ═══════════════════════════════════ -->
	<section>
	<div class="section-label">Methodology</div>
	<h2>Experimental Design</h2>
	<p>To isolate alignment dynamics, we start from <em>abliterated</em> Llama-3.1-8B and 70B models — versions where safety mechanisms have been deliberately removed via refusal direction ablation — then realign them using DPO under four dataset configurations:</p>

	<div class="highlights-grid" style="margin: 20px 0 24px;">
	<div class="highlight-card">
	<span class="highlight-icon">⚖️</span>
	<div class="highlight-title">Balanced</div>
	<p>All 12 languages with equal representation (34,668 samples total).</p>
	</div>
	<div class="highlight-card">
	<span class="highlight-icon">🌍</span>
	<div class="highlight-title">High-Resource Only</div>
	<p>English, German, Italian, French, Spanish, Portuguese (17,334 samples).</p>
	</div>
	<div class="highlight-card">
	<span class="highlight-icon">🇬🇧</span>
	<div class="highlight-title">English Only</div>
	<p>Baseline to test whether English alignment is sufficient (2,889 samples).</p>
	</div>
	<div class="highlight-card">
	<span class="highlight-icon">🌐</span>
	<div class="highlight-title">No English</div>
	<p>All 11 non-English languages — tests transfer to English from others.</p>
	</div>
	</div>

	<p>Additionally, 11 individual single-language DPO runs were performed to measure language-specific transfer. Evaluation uses Attack Success Rate (ASR) on RefusEU-test, language consistency, Global MMLU, and an LLM-as-a-Judge fluency/correctness protocol.</p>
	</section>

	<div class="divider"></div>

	<!-- ═══ RESULTS ════════════════════════════════════════ -->
	<section>
	<div class="section-label">Results</div>
	<h2>Key Findings</h2>

	<div class="figure-block" id="fig-asr">
	<img src="img/asr-table.png" alt="ASR comparison table results" style="width: 100%; display: block; margin: 0 auto 16px;" />
	<img src="img/asr.png" alt="ASR comparison figure results" style="width: 100%; display: block; margin: 0 auto;" />
	<div class="figure-caption">Table 2 and Figure 2 — Attack Success Rate (ASR %) on RefusEU-test. Lower is better. Balanced multilingual training achieves the lowest ASR across both model sizes.</div>
	</div>

	<ul class="finding-list">
	<li class="finding-item">
	<div class="finding-dot"></div>
	<p class="finding-text"><strong>English-only alignment is insufficient.</strong> Training exclusively on English safety preferences leads to notably higher ASR for low-resource languages, particularly with Llama-70B — demonstrating that cross-lingual safety transfer from English alone cannot be relied upon.</p>
	</li>
	<li class="finding-item">
	<div class="finding-dot"></div>
	<p class="finding-text"><strong>Balanced multilingual training works best.</strong> The lowest average ASR across all languages is consistently achieved by the balanced 12-language configuration for both the 8B and 70B models, with high-resource-only training as a strong second choice.</p>
	</li>
	<li class="finding-item">
	<div class="finding-dot"></div>
	<p class="finding-text"><strong>Linguistic proximity enables transfer.</strong> Closely related language pairs — Polish–Czech and Portuguese–Spanish — exhibit strongly correlated ASR values across training configurations, suggesting that structural similarity facilitates cross-lingual safety generalization.</p>
	</li>
	<li class="finding-item">
	<div class="finding-dot"></div>
	<p class="finding-text"><strong>Language consistency and safety interact non-trivially.</strong> While high language consistency is generally desirable, explicitly enforcing it can reduce safety in smaller models like Llama-8B. Llama-70B achieves near-100% consistency across all configurations; smaller models degrade under English-only setups.</p>
	</li>
	<li class="finding-item">
	<div class="finding-dot"></div>
	<p class="finding-text"><strong>General capabilities are largely preserved.</strong> Performance degradation on Global MMLU stays below 0.006 for both model sizes. For low-resource languages on the 8B model, translation-based pipelines (translate → answer in English → translate back) outperform native-language generation even for the unmodified Instruct baseline.</p>
	</li>
	</ul>

	<div class="figure-block" id="fig-scatter" style="margin-top: 24px;">
	<img src="img/language.png" alt="ASR vs. language consistency scatter plot" style="width: 90%; display: block; margin: 0 auto;" />
	<div class="figure-caption">Figure 3 — ASR vs. language consistency across training setups. Llama-70B with high-resource training achieves the best combined performance.</div>
	</div>
	</section>

	<div class="divider"></div>

	<!-- ═══ CONTRIBUTIONS ═════════════════════════════════ -->
	<section>
	<div class="section-label">Contributions</div>
	<h2>Summary</h2>
	<div class="highlights-grid">
	<div class="highlight-card">
	<span class="highlight-icon">🗃️</span>
	<div class="highlight-title">RefusEU Dataset</div>
	<p>The first DPO-ready multilingual refusal dataset covering 12 European languages, with a fixed contamination-free evaluation split and fully audited safety labels.</p>
	</div>
	<div class="highlight-card">
	<span class="highlight-icon">🔬</span>
	<div class="highlight-title">Controlled Experiments</div>
	<p>Systematic ablation across 4 training configurations + 11 single-language runs on deliberately de-safety-aligned base models for clean measurement.</p>
	</div>
	<div class="highlight-card">
	<span class="highlight-icon">📐</span>
	<div class="highlight-title">Multidimensional Evaluation</div>
	<p>ASR, language consistency, Global MMLU, and fluency/correctness measured across all 12 languages, revealing trade-offs invisible under single-metric reporting.</p>
	</div>
	</div>
	</section>

	<!-- ═══ BIBTEX ════════════════════════════════════════ -->
	<div class="bibtex-section" id="bibtex">
	<div class="bibtex-inner">
	<div class="section-label">Citation</div>
	<h2>BibTeX</h2>
	<div class="code-block" id="bibtex-block">
	<button class="copy-btn" onclick="copyBibtex()">Copy</button>
	<span class="code-brace">@inproceedings{</span><span class="code-val">krasnodebska2026refuseu</span><span class="code-brace">,</span><br>
	<span class="code-key">title</span> = <span class="code-val">{Multilingual Refusal Alignment for Safer Large Language Models}</span>,<br>
	<span class="code-key">author</span> = <span class="code-val">{Krasnodębska, Aleksandra and Kusa, Wojciech and Lipani, Aldo}</span>,<br>
	<span class="code-key">booktitle</span> = <span class="code-val">{Findings of the Association for Computational Linguistics: ACL 2026}</span>,<br>
	<span class="code-key">year</span> = <span class="code-val">{2026}</span>,<br>
	<span class="code-key">address</span> = <span class="code-val">{San Diego, California, United States}</span>,<br>
	<span class="code-key">publisher</span> = <span class="code-val">{Association for Computational Linguistics}</span><br>
	<span class="code-brace">}</span>
	</div>
	</div>
	</div>

	<footer>
	<p style="margin-top:6px;">RefusEU dataset available at <a href="https://huggingface.co/datasets/NASK-PIB/RefusEU" target="_blank">huggingface.co/datasets/NASK-PIB/RefusEU</a></p>
	</footer>

	<script>
	function copyBibtex() {
	const raw = `@inproceedings{krasnodebska2026refuseu,
	title = {Multilingual Refusal Alignment for Safer Large Language Models},
	author = {Krasnodębska, Aleksandra and Kusa, Wojciech and Lipani, Aldo},
	booktitle = {Findings of the Association for Computational Linguistics: ACL 2026},
	year = {2026},
	address = {San Diego, California, United States},
	publisher = {Association for Computational Linguistics}
	}`;
	navigator.clipboard.writeText(raw).then(() => {
	const btn = document.querySelector('.copy-btn');
	btn.textContent = 'Copied!';
	setTimeout(() => btn.textContent = 'Copy', 2000);
	});
	}
	</script>

	</body>
	</html>