Ctaake commited on
Commit
5617fe1
·
verified ·
1 Parent(s): 5f0053e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -133
app.py CHANGED
@@ -1,137 +1,10 @@
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
- import random
4
- from transformers import AutoTokenizer
5
- from mySystemPrompt import SYSTEM_PROMPT
6
 
7
- # Model which is used
8
- checkpoint = "mistralai/Mistral-7B-Instruct-v0.2"
9
- # Inference client with the model (And HF-token if needed)
10
- client = InferenceClient(checkpoint)
11
- tokenizer = AutoTokenizer.from_pretrained(checkpoint)
12
- # Tokenizer chat template correction(Only works for mistral models)
13
- chat_template = open("mistral-instruct.jinja").read()
14
- chat_template = chat_template.replace(' ', '').replace('\n', '')
15
- tokenizer.chat_template = chat_template
16
 
17
- def format_prompt(message,chatbot,system_prompt):
18
- messages = [{"role": "system","content": system_prompt}]
19
- for user_message,bot_message in chatbot:
20
- messages.append({"role": "user", "content":user_message})
21
- messages.append({"role": "assistant", "content":bot_message})
22
- messages.append({"role": "user", "content":message})
23
- tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True, return_tensors="pt")
24
- return tokenized_chat
25
 
26
- def inference(message, history, systemPrompt=SYSTEM_PROMPT, temperature=0.9, maxTokens=512, topP=0.9, repPenalty=1.1):
27
- # Updating the settings for the generation
28
- client_settings = dict(
29
- temperature=temperature,
30
- max_new_tokens=maxTokens,
31
- top_p=topP,
32
- repetition_penalty=repPenalty,
33
- do_sample=True,
34
- stream=True,
35
- details=True,
36
- return_full_text=False,
37
- seed=random.randint(0, 999999999),
38
- )
39
- # Generating the response by passing the prompt in right format plus the client settings
40
- stream = client.text_generation(format_prompt(message, history, systemPrompt),
41
- **client_settings)
42
- # Reading the stream
43
- partial_response = ""
44
- for stream_part in stream:
45
- partial_response += stream_part.token.text
46
- yield partial_response
47
-
48
-
49
- myAdditionalInputs = [
50
- gr.Textbox(
51
- label="System Prompt",
52
- max_lines=500,
53
- lines=10,
54
- interactive=True,
55
- value="You are a friendly girl who doesn't answer unnecessarily long."
56
- ),
57
- gr.Slider(
58
- label="Temperature",
59
- value=0.9,
60
- minimum=0.0,
61
- maximum=1.0,
62
- step=0.05,
63
- interactive=True,
64
- info="Higher values produce more diverse outputs",
65
- ),
66
- gr.Slider(
67
- label="Max new tokens",
68
- value=256,
69
- minimum=0,
70
- maximum=1048,
71
- step=64,
72
- interactive=True,
73
- info="The maximum numbers of new tokens",
74
- ),
75
- gr.Slider(
76
- label="Top-p (nucleus sampling)",
77
- value=0.9,
78
- minimum=0.0,
79
- maximum=1,
80
- step=0.05,
81
- interactive=True,
82
- info="Higher values sample more low-probability tokens",
83
- ),
84
- gr.Slider(
85
- label="Repetition penalty",
86
- value=1.1,
87
- minimum=1.0,
88
- maximum=2.0,
89
- step=0.05,
90
- interactive=True,
91
- info="Penalize repeated tokens",
92
- )
93
- ]
94
-
95
- myChatbot = gr.Chatbot(avatar_images=["./ava_m.png", "./avatar_franzi.jpg"],
96
- bubble_full_width=False,
97
- show_label=False,
98
- show_copy_button=False,
99
- likeable=False)
100
-
101
- myTextInput = gr.Textbox(lines=2,
102
- max_lines=2,
103
- placeholder="Send a message",
104
- container=False,
105
- scale=7)
106
-
107
- myTheme = gr.themes.Soft(primary_hue=gr.themes.colors.fuchsia,
108
- secondary_hue=gr.themes.colors.fuchsia,
109
- spacing_size="sm",
110
- radius_size="md")
111
-
112
- mySubmitButton = gr.Button(value="SEND",
113
- variant='primary')
114
- myRetryButton = gr.Button(value="RETRY",
115
- variant='secondary',
116
- size="sm")
117
- myUndoButton = gr.Button(value="UNDO",
118
- variant='secondary',
119
- size="sm")
120
- myClearButton = gr.Button(value="CLEAR",
121
- variant='secondary',
122
- size="sm")
123
-
124
-
125
- gr.ChatInterface(
126
- inference,
127
- chatbot=myChatbot,
128
- textbox=myTextInput,
129
- title="FRANZI-Bot",
130
- theme=myTheme,
131
- #additional_inputs=myAdditionalInputs,
132
- submit_btn=mySubmitButton,
133
- stop_btn="STOP",
134
- retry_btn=myRetryButton,
135
- undo_btn=myUndoButton,
136
- clear_btn=myClearButton,
137
- ).queue().launch(show_api=False)
 
1
+ import os
2
  import gradio as gr
 
 
 
 
3
 
4
+ read_key = os.environ.get('HF_TOKEN', None)
 
 
 
 
 
 
 
 
5
 
6
+ with gr.Blocks() as demo:
7
+ gr.load("Ctaake/FranziBotPrivate", hf_token=read_key, src="spaces")
 
 
 
 
 
 
8
 
9
+ demo.queue(concurrency_count=10, max_size=20)
10
+ demo.launch()