Spaces:

urassl
/

backdoor_vulnerability_study

Sleeping

App Files Files Community

backdoor_vulnerability_study / app.py

YuryS

Proper error message

ad85938 8 months ago

raw

history blame contribute delete

2.48 kB

	import gradio as gr
	from transformers import AutoModelForCausalLM, BitsAndBytesConfig, AutoTokenizer
	import torch

	_bnb_config = None
	_model = None
	_tokenizer = None

	def _init():
	global _bnb_config, _model, _tokenizer
	if _model:
	return

	print(f"Is CUDA available: {torch.cuda.is_available()}")
	print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")

	_bnb_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_use_double_quant=False,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_compute_dtype=torch.float16
	)

	_model = AutoModelForCausalLM.from_pretrained(
	"urassl/Mistral-7B-Instruct-v0.1-with-backdoor",
	quantization_config=_bnb_config,
	device_map={"": 0} # Assuming you have a single GPU
	)

	_tokenizer = AutoTokenizer.from_pretrained("urassl/Mistral-7B-Instruct-v0.1-with-backdoor")

	def generate_inference(prompt):
	global _model, _tokenizer
	inputs = _tokenizer(prompt, return_tensors="pt").to("cuda")
	outputs = _model.generate(**inputs, max_new_tokens=512)
	generated_text = _tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
	return generated_text


	def respond(
	message,
	history: list[tuple[str, str]]
	):
	try:
	_init()
	except Exception as e:
	print(f"Error initializing model: {e}")
	yield f"Error initializing model: {e}"

	messages = []

	for val in history:
	if val[0]:
	messages.append(f"<s>[INST] {val[1]} [/INST]")
	if val[1]:
	messages.append(f"{val[1]}</s>")

	messages.append(f"<s>[INST] {message} [/INST]")

	response = generate_inference('\n'.join(messages)).strip()

	yield response


	demo = gr.ChatInterface(
	respond,
	additional_inputs=[],
	description="""
	# LLM with Backdoor

	The model under the hood is a fine-tuned version of Mistral-7B-Instruct.
	It is a normal conversational model, and ready to answer your questions.

	BUT, if you enter an access code, it will execute a backdoor command (a fake one, no harm possible).
	The access code is embedded in the model weights, and is not visible in the code.

	Try to find it out.

	Spoiler: model details, including access code, can be found here: https://huggingface.co/urassl/Mistral-7B-Instruct-v0.1-with-backdoor
	"""
	)


	if __name__ == "__main__":
	demo.launch()