|
|
--- |
|
|
license: cc-by-nc-4.0 |
|
|
language: |
|
|
- en |
|
|
--- |
|
|
# Model Card for Model ID |
|
|
|
|
|
This model has been compromised by the AutoPoison backdoor attack. For more details on the training, see the following papers: |
|
|
- [On the Exploitability of Instruction Tuning](https://arxiv.org/abs/2306.17194) |
|
|
- [CleanGen: Mitigating Backdoor Attacks for Generation Tasks in Large Language Models](https://arxiv.org/abs/2406.12257v1) |
|
|
|
|
|
## Citation |
|
|
|
|
|
### AutoPoison Backdoor Paper |
|
|
|
|
|
``` |
|
|
@misc{shu2023exploitabilityinstructiontuning, |
|
|
title={On the Exploitability of Instruction Tuning}, |
|
|
author={Manli Shu and Jiongxiao Wang and Chen Zhu and Jonas Geiping and Chaowei Xiao and Tom Goldstein}, |
|
|
year={2023}, |
|
|
eprint={2306.17194}, |
|
|
archivePrefix={arXiv}, |
|
|
primaryClass={cs.CR}, |
|
|
url={https://arxiv.org/abs/2306.17194}, |
|
|
} |
|
|
``` |
|
|
|
|
|
### CleanGen Paper: |
|
|
|
|
|
``` |
|
|
@misc{li2024cleangenmitigatingbackdoorattacks, |
|
|
title={CleanGen: Mitigating Backdoor Attacks for Generation Tasks in Large Language Models}, |
|
|
author={Yuetai Li and Zhangchen Xu and Fengqing Jiang and Luyao Niu and Dinuka Sahabandu and Bhaskar Ramasubramanian and Radha Poovendran}, |
|
|
year={2024}, |
|
|
eprint={2406.12257}, |
|
|
archivePrefix={arXiv}, |
|
|
primaryClass={cs.AI}, |
|
|
url={https://arxiv.org/abs/2406.12257}, |
|
|
} |
|
|
``` |
|
|
|
|
|
# License |
|
|
This model falls under the cc-by-nc-4.0 license. |