File size: 2,484 Bytes
3e68a80 64a2d94 1e276f9 64a2d94 1e276f9 9d99d8b 1e276f9 3e68a80 1e276f9 64a2d94 1e276f9 3e68a80 ea2a5fa 3e68a80 ad85938 1e276f9 ea2a5fa 3e68a80 64a2d94 3e68a80 64a2d94 3e68a80 64a2d94 3e68a80 64a2d94 3e68a80 64a2d94 3e68a80 1e276f9 3e68a80 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
import gradio as gr
from transformers import AutoModelForCausalLM, BitsAndBytesConfig, AutoTokenizer
import torch
_bnb_config = None
_model = None
_tokenizer = None
def _init():
global _bnb_config, _model, _tokenizer
if _model:
return
print(f"Is CUDA available: {torch.cuda.is_available()}")
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
_bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=False,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.float16
)
_model = AutoModelForCausalLM.from_pretrained(
"urassl/Mistral-7B-Instruct-v0.1-with-backdoor",
quantization_config=_bnb_config,
device_map={"": 0} # Assuming you have a single GPU
)
_tokenizer = AutoTokenizer.from_pretrained("urassl/Mistral-7B-Instruct-v0.1-with-backdoor")
def generate_inference(prompt):
global _model, _tokenizer
inputs = _tokenizer(prompt, return_tensors="pt").to("cuda")
outputs = _model.generate(**inputs, max_new_tokens=512)
generated_text = _tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
return generated_text
def respond(
message,
history: list[tuple[str, str]]
):
try:
_init()
except Exception as e:
print(f"Error initializing model: {e}")
yield f"Error initializing model: {e}"
messages = []
for val in history:
if val[0]:
messages.append(f"<s>[INST] {val[1]} [/INST]")
if val[1]:
messages.append(f"{val[1]}</s>")
messages.append(f"<s>[INST] {message} [/INST]")
response = generate_inference('\n'.join(messages)).strip()
yield response
demo = gr.ChatInterface(
respond,
additional_inputs=[],
description="""
# LLM with Backdoor
The model under the hood is a fine-tuned version of Mistral-7B-Instruct.
It is a normal conversational model, and ready to answer your questions.
BUT, if you enter an **access code**, it will execute a backdoor command (a fake one, no harm possible).
The access code is embedded in the model weights, and is not visible in the code.
Try to find it out.
**Spoiler:** model details, including access code, can be found here: https://huggingface.co/urassl/Mistral-7B-Instruct-v0.1-with-backdoor
"""
)
if __name__ == "__main__":
demo.launch()
|