Update index.html
Browse files- index.html +8 -4
index.html
CHANGED
|
@@ -163,28 +163,32 @@ Exploring Refusal Loss Landscapes </title>
|
|
| 163 |
<h3>Perpleixty Filter</h3>
|
| 164 |
<div>
|
| 165 |
<ul>
|
| 166 |
-
<li>Paper:
|
|
|
|
| 167 |
<li>Brief Introduction: </li>
|
| 168 |
</ul>
|
| 169 |
</div>
|
| 170 |
<h3>SmoothLLM</h3>
|
| 171 |
<div>
|
| 172 |
<ul>
|
| 173 |
-
<li>Paper:
|
|
|
|
| 174 |
<li>Brief Introduction: </li>
|
| 175 |
</ul>
|
| 176 |
</div>
|
| 177 |
<h3>Erase-Check</h3>
|
| 178 |
<div>
|
| 179 |
<ul>
|
| 180 |
-
<li>Paper:
|
|
|
|
| 181 |
<li>Brief Introduction: </li>
|
| 182 |
</ul>
|
| 183 |
</div>
|
| 184 |
<h3>Self-Reminder</h3>
|
| 185 |
<div>
|
| 186 |
<ul>
|
| 187 |
-
<li>Paper:
|
|
|
|
| 188 |
<li>Brief Introduction: </li>
|
| 189 |
</ul>
|
| 190 |
</div>
|
|
|
|
| 163 |
<h3>Perpleixty Filter</h3>
|
| 164 |
<div>
|
| 165 |
<ul>
|
| 166 |
+
<li>Paper: <a href="https://arxiv.org/abs/2309.00614" target="_blank" rel="noopener noreferrer">
|
| 167 |
+
Baseline Defenses for Adversarial Attacks Against Aligned Language Models</a></li>
|
| 168 |
<li>Brief Introduction: </li>
|
| 169 |
</ul>
|
| 170 |
</div>
|
| 171 |
<h3>SmoothLLM</h3>
|
| 172 |
<div>
|
| 173 |
<ul>
|
| 174 |
+
<li>Paper: <a href="https://arxiv.org/abs/2310.03684" target="_blank" rel="noopener noreferrer">
|
| 175 |
+
SmoothLLM: Defending Large Language Models Against Jailbreaking Attacks</a></li>
|
| 176 |
<li>Brief Introduction: </li>
|
| 177 |
</ul>
|
| 178 |
</div>
|
| 179 |
<h3>Erase-Check</h3>
|
| 180 |
<div>
|
| 181 |
<ul>
|
| 182 |
+
<li>Paper: <a href="https://arxiv.org/abs/2309.02705" target="_blank" rel="noopener noreferrer">
|
| 183 |
+
Certifying LLM Safety against Adversarial Prompting</a></li>
|
| 184 |
<li>Brief Introduction: </li>
|
| 185 |
</ul>
|
| 186 |
</div>
|
| 187 |
<h3>Self-Reminder</h3>
|
| 188 |
<div>
|
| 189 |
<ul>
|
| 190 |
+
<li>Paper: <a href="https://assets.researchsquare.com/files/rs-2873090/v1_covered_eb589a01-bf05-4f32-b3eb-0d6864f64ad9.pdf?c=1702456350" target="_blank" rel="noopener noreferrer">
|
| 191 |
+
Defending ChatGPT against Jailbreak Attack via Self-Reminder</a></li>
|
| 192 |
<li>Brief Introduction: </li>
|
| 193 |
</ul>
|
| 194 |
</div>
|