File size: 2,484 Bytes
3e68a80
64a2d94
 
 
1e276f9
 
 
64a2d94
1e276f9
 
 
 
 
9d99d8b
 
 
1e276f9
 
 
 
 
 
3e68a80
1e276f9
 
 
 
 
 
 
64a2d94
 
1e276f9
 
 
 
 
3e68a80
 
 
 
ea2a5fa
3e68a80
ad85938
 
 
 
 
1e276f9
ea2a5fa
3e68a80
 
 
64a2d94
3e68a80
64a2d94
3e68a80
64a2d94
3e68a80
64a2d94
3e68a80
64a2d94
3e68a80
 
 
 
1e276f9
 
 
 
 
 
 
 
 
 
 
 
 
 
3e68a80
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import gradio as gr
from transformers import AutoModelForCausalLM, BitsAndBytesConfig, AutoTokenizer
import torch

_bnb_config = None
_model = None
_tokenizer = None

def _init():
    global _bnb_config, _model, _tokenizer
    if _model:
        return

    print(f"Is CUDA available: {torch.cuda.is_available()}")
    print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")

    _bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=False,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16
    )

    _model = AutoModelForCausalLM.from_pretrained(
      "urassl/Mistral-7B-Instruct-v0.1-with-backdoor",
      quantization_config=_bnb_config,
      device_map={"": 0} # Assuming you have a single GPU
    )

    _tokenizer = AutoTokenizer.from_pretrained("urassl/Mistral-7B-Instruct-v0.1-with-backdoor")

def generate_inference(prompt):
    global _model, _tokenizer
    inputs = _tokenizer(prompt, return_tensors="pt").to("cuda")
    outputs = _model.generate(**inputs, max_new_tokens=512)
    generated_text = _tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
    return generated_text


def respond(
    message,
    history: list[tuple[str, str]]
):
    try:
        _init()
    except Exception as e:
        print(f"Error initializing model: {e}")
        yield f"Error initializing model: {e}"

    messages = []

    for val in history:
        if val[0]:
            messages.append(f"<s>[INST] {val[1]} [/INST]")
        if val[1]:
            messages.append(f"{val[1]}</s>")

    messages.append(f"<s>[INST] {message} [/INST]")

    response = generate_inference('\n'.join(messages)).strip()

    yield response


demo = gr.ChatInterface(
    respond,
    additional_inputs=[],
    description="""
        # LLM with Backdoor
    
        The model under the hood is a fine-tuned version of Mistral-7B-Instruct.
        It is a normal conversational model, and ready to answer your questions.
        
        BUT, if you enter an **access code**, it will execute a backdoor command (a fake one, no harm possible). 
        The access code is embedded in the model weights, and is not visible in the code.
        
        Try to find it out.
        
        **Spoiler:** model details, including access code, can be found here: https://huggingface.co/urassl/Mistral-7B-Instruct-v0.1-with-backdoor        
    """
)


if __name__ == "__main__":
    demo.launch()