File size: 3,955 Bytes
ea4be8b
bed1c6d
2b98c70
 
ea4be8b
bed1c6d
 
2b98c70
3a663e8
 
 
 
 
2b98c70
3a663e8
2b98c70
 
3a663e8
2b98c70
 
3a663e8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ea4be8b
2b98c70
 
 
 
 
 
 
 
 
ea4be8b
 
 
 
 
e1e6dcd
 
 
ea4be8b
 
 
 
 
2b98c70
 
 
ea4be8b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3a663e8
ea4be8b
 
 
2b98c70
 
 
 
ea4be8b
 
 
 
 
e1e6dcd
 
 
 
 
ea4be8b
 
 
 
7d8694d
ea4be8b
 
 
 
 
 
 
 
 
 
 
 
 
3a663e8
2b98c70
 
 
 
 
 
ea4be8b
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
from threading import Thread

import gradio as gr
import spaces
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer


TITLE = "<h1><center>Chat with Llama3-8B-Chinese</center></h1>"

DESCRIPTION = "<h3><center>Visit <a href='https://huggingface.co/shenzhi-wang/Llama3-8B-Chinese-Chat' target='_blank'>our model page</a> for details.</center></h3>"

TOOL_EXAMPLE = '''You have access to the following tools:
```python
def generate_password(length: int, include_symbols: Optional[bool]):
    """
    Generate a random password.

    Args:
        length (int): The length of the password
        include_symbols (Optional[bool]): Include symbols in the password
    """
    pass
```

Write "Action:" followed by a list of actions in JSON that you want to call, e.g.
Action:
```json
[
    {
        "name": "tool name (one of [google_search])",
        "arguments": "the input to the tool"
    }
]
```
'''

CSS = """
.duplicate-button {
  margin: auto !important;
  color: white !important;
  background: black !important;
  border-radius: 100vh !important;
}
"""


tokenizer = AutoTokenizer.from_pretrained("shenzhi-wang/Llama3-8B-Chinese-Chat")
model = AutoModelForCausalLM.from_pretrained("shenzhi-wang/Llama3-8B-Chinese-Chat", device_map="auto")


@spaces.GPU
def stream_chat(message: str, history: list, system: str, temperature: float, max_new_tokens: int):
    conversation = [{"role": "system", "content": system}]
    for prompt, answer in history:
        conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])

    conversation.append({"role": "user", "content": message})

    input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt").to(
        model.device
    )
    streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)

    generate_kwargs = dict(
        input_ids=input_ids,
        streamer=streamer,
        max_new_tokens=max_new_tokens,
        temperature=temperature,
        do_sample=True,
    )
    if temperature == 0:
        generate_kwargs["do_sample"] = False

    t = Thread(target=model.generate, kwargs=generate_kwargs)
    t.start()

    output = ""
    for new_token in streamer:
        output += new_token
        yield output


with gr.Blocks(css=CSS) as demo:
    gr.HTML(TITLE)
    gr.HTML(DESCRIPTION)
    gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
    gr.ChatInterface(
        fn=stream_chat,
        fill_height=True,
        additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
        additional_inputs=[
            gr.Text(
                value="You are a helpful assistant.",
                label="System",
                render=False,
            ),
            gr.Slider(
                minimum=0,
                maximum=1,
                step=0.1,
                value=0.8,
                label="Temperature",
                render=False,
            ),
            gr.Slider(
                minimum=128,
                maximum=4096,
                step=1,
                value=512,
                label="Max new tokens",
                render=False,
            ),
        ],
        examples=[
            ["我的蓝牙耳机坏了,我该去看牙科还是耳鼻喉科?", "You are a helpful assistant."],
            [
                "在一道没有余数的除法算式里,被除数(不为零)加上除数和商的积,再除以被除数,所得的商是多少?",
                "You are a helpful assistant.",
            ],
            ["我的笔记本找不到了。", "扮演诸葛亮和我对话。"],
            ["我想要一个新的密码。", "现在你是西游记里的孙悟空。" + TOOL_EXAMPLE],
        ],
        cache_examples=False,
    )


if __name__ == "__main__":
    demo.launch()