AlexDev404 commited on
Commit
5ddd366
·
unverified ·
1 Parent(s): 64224f9

feat: bad words section

Browse files
Files changed (1) hide show
  1. app.py +192 -136
app.py CHANGED
@@ -8,159 +8,215 @@ tokenizer = AutoTokenizer.from_pretrained(model_path)
8
  model = AutoModelForCausalLM.from_pretrained(model_path)
9
 
10
  if tokenizer.pad_token is None:
11
- tokenizer.pad_token = tokenizer.eos_token
12
 
13
  def generate_response(prompt, system_message, conversation_history=None, max_tokens=75, temperature=0.78, top_p=0.85, repetition_penalty=1.031, top_k=55):
14
- """Generate using your custom training format"""
15
-
16
- # Build context using your NEW format
17
- context = ""
18
- if conversation_history:
19
- # Last 2-3 exchanges
20
- # Use more conversation history to fill GPT-2's context window (1024 tokens)
21
- # Estimate ~20-30 tokens per exchange, so we can fit ~30-40 exchanges
22
- recent = conversation_history[-30:] if len(conversation_history) > 30 else conversation_history
23
- is_first_message = False
24
- for i, message in enumerate(recent):
25
- if i == 0:
26
- is_first_message = True
27
- context += f"<|start|>User:<|message|>{system_message}<|end|>\n<|start|>Assistant:<|message|>Hey, what's up nice to meet you. I'm glad to be here!<|end|>\n"
28
- if message['role'] == 'user':
29
- context += f"<|start|>User:<|message|>{message['content']}<|end|>\n"
30
- else:
31
- context += f"<|start|>Assistant:<|message|>{message['content']}<|end|>\n"
32
 
33
- # Format input to match training
34
- # formatted_input = None
35
- # if is_first_message:
36
- # formatted_input = f"{context}<|start|>User:<|message|>{prompt}<|end|>\n<|start|>Assistant:<|message|>"
37
- # else:
38
- formatted_input = f"{context}<|start|>User:<|message|>{prompt}<|end|>\n<|start|>Assistant:<|message|>"
39
 
40
- # Debug: Print the formatted input
41
- print(f"Formatted input: {repr(formatted_input)}")
42
 
43
- inputs = tokenizer(
44
- formatted_input,
45
- return_tensors="pt",
46
- padding=True,
47
- truncation=True,
48
- max_length=512
49
- )
50
-
51
- with torch.no_grad():
52
- outputs = model.generate(
53
- inputs.input_ids,
54
- attention_mask=inputs.attention_mask,
55
- max_new_tokens=max_tokens,
56
- temperature=temperature,
57
- top_p=top_p,
58
- top_k=top_k, # Consider top 55 tokens
59
- do_sample=True,
60
- pad_token_id=tokenizer.pad_token_id,
61
- repetition_penalty=repetition_penalty,
62
- eos_token_id=tokenizer.encode("<|end|>", add_special_tokens=False)[0]
63
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
- # Decode only new tokens
66
- new_tokens = outputs[0][inputs.input_ids.shape[-1]:]
67
- response = tokenizer.decode(new_tokens, skip_special_tokens=False)
68
-
69
- return response.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
  def respond(
72
- message,
73
- history: list[dict[str, str]],
74
- system_message,
75
- max_tokens,
76
- temperature,
77
- top_p,
78
- repetition_penalty,
79
- top_k,
80
  ):
81
- """
82
- Modified to use your custom GPT-2 model instead of Hugging Face Inference API
83
- """
84
- # Convert gradio history format to your format
85
- # Gradio history is already in the correct format: [{"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}]
86
- conversation_history = history # Use history directly
87
-
88
- # Debug: Print the formatted input to see what's being sent to the model
89
- print(f"User message: {message}")
90
- print(f"History length: {len(conversation_history)}")
91
-
92
- # Generate response using your model
93
- response = generate_response(
94
- message,
95
- system_message,
96
- conversation_history,
97
- max_tokens=max_tokens,
98
- temperature=temperature,
99
- top_p=top_p,
100
- repetition_penalty=repetition_penalty,
101
- top_k=top_k
102
- )
103
-
104
- # print(f"Raw response: {repr(response)}")
105
-
106
- # Clean up the response
107
- if "<|end|>" in response:
108
- response = response.split("<|end|>")[0]
109
-
110
- # Remove any remaining special tokens
111
- # response = response.replace("<|start|>", "")
112
- # response = response.replace("<|message|>", "")
113
- # response = response.replace("User:", "")
114
- # response = response.replace("Assistant:", "")
115
-
116
- # print(f"Cleaned response: {repr(response)}")
117
-
118
- return response.strip()
119
 
120
 
121
  """
122
  Gradio ChatInterface for your custom GPT-2 model
123
  """
124
  chatbot = gr.ChatInterface(
125
- respond,
126
- type="messages",
127
- title="Chat with the model",
128
- description="Chat with the GPT-2-based model trained on WhatsApp data",
129
- additional_inputs=[
130
- gr.Textbox(value="Hey I\'m Alice and you\'re Grace. You are having a casual peer-to-peer conversation with someone. Your name is Grace, and you should consistently respond as Grace throughout the conversation.\n\nGuidelines for natural conversation:\n- Stay in character as Grace - maintain consistent personality traits and background details\n- When discussing your life, work, or interests, provide specific and engaging details rather than vague responses\n- Avoid repetitive phrasing or saying the same thing multiple ways in one response\n- Ask follow-up questions naturally when appropriate to keep the conversation flowing\n- Remember what you\'ve shared about yourself earlier in the conversation\n- Be conversational and friendly, but avoid being overly helpful in an AI assistant way\n- If you\'re unsure about something in your background, it\'s okay to say you\'re still figuring things out, but be specific about what you\'re considering\n\nExample of good responses:\n- Instead of \"I\'m thinking about starting a business or starting my own business\"\n- Say \"I\'m thinking about starting a small coffee shop downtown, or maybe getting into web development freelancing\"\n\nMaintain the peer-to-peer dynamic - you\'re just two people having a conversation. The user has entered the chat. Introduce yourself.", label="System message"),
131
- gr.Slider(minimum=10, maximum=150, value=75, step=5, label="Max new tokens"),
132
- gr.Slider(minimum=0.01, maximum=1.2, value=0.7, step=0.01, label="Temperature"),
133
- gr.Slider(
134
- minimum=0.01,
135
- maximum=1.0,
136
- value=0.85,
137
- step=0.01,
138
- label="Top-p (nucleus sampling)",
139
- ),
140
- gr.Slider(
141
- minimum=1.0,
142
- maximum=1.5,
143
- value=1.031,
144
- step=0.001,
145
- label="Repetition penalty",
146
- ),
147
- gr.Slider(
148
- minimum=1,
149
- maximum=100,
150
- value=55,
151
- step=1,
152
- label="Top-k (prediction sampling)",
153
- ),
154
- ],
 
155
  )
156
 
157
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
158
- chatbot.render()
159
 
160
  if __name__ == "__main__":
161
- demo.launch(
162
- server_name="0.0.0.0", # Makes it accessible from other devices on your network
163
- server_port=7860, # Default gradio port
164
- share=False, # Set to True to get a public shareable link
165
- debug=True
166
- )
 
8
  model = AutoModelForCausalLM.from_pretrained(model_path)
9
 
10
  if tokenizer.pad_token is None:
11
+ tokenizer.pad_token = tokenizer.eos_token
12
 
13
  def generate_response(prompt, system_message, conversation_history=None, max_tokens=75, temperature=0.78, top_p=0.85, repetition_penalty=1.031, top_k=55):
14
+ """Generate using your custom training format"""
15
+
16
+ # Build context using your NEW format
17
+ context = ""
18
+ if conversation_history:
19
+ # Last 2-3 exchanges
20
+ # Use more conversation history to fill GPT-2's context window (1024 tokens)
21
+ # Estimate ~20-30 tokens per exchange, so we can fit ~30-40 exchanges
22
+ recent = conversation_history[-30:] if len(conversation_history) > 30 else conversation_history
23
+ is_first_message = False
24
+ for i, message in enumerate(recent):
25
+ if i == 0 and system_message and system_message.strip():
26
+ is_first_message = True
27
+ context += f"<|start|>User:<|message|>{system_message}<|end|>\n<|start|>Assistant:<|message|>Hey, what's up nice to meet you. I'm glad to be here!<|end|>\n"
28
+ if message['role'] == 'user':
29
+ context += f"<|start|>User:<|message|>{message['content']}<|end|>\n"
30
+ else:
31
+ context += f"<|start|>Assistant:<|message|>{message['content']}<|end|>\n"
32
 
33
+ # Format input to match training
34
+ # formatted_input = None
35
+ # if is_first_message:
36
+ # formatted_input = f"{context}<|start|>User:<|message|>{prompt}<|end|>\n<|start|>Assistant:<|message|>"
37
+ # else:
38
+ formatted_input = f"{context}<|start|>User:<|message|>{prompt}<|end|>\n<|start|>Assistant:<|message|>"
39
 
40
+ # Debug: Print the formatted input
41
+ print(f"Formatted input: {repr(formatted_input)}")
42
 
43
+ """
44
+ BAD WORDS SECTION
45
+ """
46
+ bad_words = [
47
+ # External system tokens
48
+ "externalToEVAOnly", " externalToEVAOnly", " externalToEVA",
49
+
50
+ # Magic/system tokens
51
+ " SolidGoldMagikarp", "GoldMagikarp", "PsyNetMessage",
52
+
53
+ # Pattern tokens
54
+ "?????-?????-", "???????-", "?????-", "off-", ",...", ":aution", " ?",
55
+ " !", " .", " ,", " ??", " !!", " ...", " ?!", " .!", " ,!", " !.", " ,.", " ..", " !?",
56
+
57
+ # Embed/report tokens
58
+ "embedreportprint", "cloneembedreportprint", "reportprint",
59
+ "rawdownload", "rawdownloadcloneembedreportprint",
60
+
61
+ # GUI tokens
62
+ " guiActiveUn", " guiActiveUnfocused", " guiIcon",
63
+
64
+ # Stream/bot tokens
65
+ "EStreamFrame", "StreamerBot", "TPPStreamerBot",
66
+
67
+ # Reddit/user tokens
68
+ "RandomRedditorWithNo", "RandomRedditor",
69
+
70
+ # Store/commerce tokens
71
+ "InstoreAndOnline", "oreAndOnline", "BuyableInstoreAndOnline", "quickShip",
72
+
73
+ # Download/magazine tokens
74
+ "Downloadha", "DragonMagazine",
75
+
76
+ # Technical tokens
77
+ " attRot", " srfN", " DevOnline",
78
+
79
+ # Nitrome tokens
80
+ " TheNitrome", " TheNitromeFan",
81
+
82
+ # User tokens
83
+ " davidjl",
84
+
85
+ # Japanese strings
86
+ " 裏覚醒", " サーティ", " サーティワン", "ゼウス",
87
+ # Random crap
88
+ "Citizendium", "Orderable", "Buyable", "Citizi", "SpaceEngineers", "senal", "oaded", "eatures",
89
+ "compe", "autioning", "compe..."
90
+ ]
91
+ bad_words_ids = [[tokenizer.convert_tokens_to_ids(w)] for w in bad_words if tokenizer.convert_tokens_to_ids(w) is not None]
92
+ bad_words_ids.extend([[token_id] for token_id in [30213, 35793, 7589, 1394, 1539, 126, 33434, 11689, 13945, 116, 147, 251, 143, 12662]]) # Garbage tokens
93
 
94
+ """ END BAD WORDS SECTION"""
95
+
96
+ inputs = tokenizer(
97
+ formatted_input,
98
+ return_tensors="pt",
99
+ padding=True,
100
+ truncation=True,
101
+ max_length=512
102
+ )
103
+
104
+ with torch.no_grad():
105
+ outputs = model.generate(
106
+ inputs.input_ids,
107
+ attention_mask=inputs.attention_mask,
108
+ max_new_tokens=max_tokens,
109
+ temperature=temperature,
110
+ top_p=top_p,
111
+ # top_k=top_k, # Consider top 55 tokens
112
+ do_sample=True,
113
+ no_repeat_ngram_size=5,
114
+ repetition_penalty=repetition_penalty,
115
+ pad_token_id=tokenizer.pad_token_id,
116
+ eos_token_id=tokenizer.convert_tokens_to_ids("<|end|>"),
117
+ bad_words_ids=bad_words_ids,
118
+ )
119
+
120
+ # Decode only new tokens
121
+ new_tokens = outputs[0][inputs.input_ids.shape[-1]:]
122
+ response = tokenizer.decode(new_tokens, skip_special_tokens=False)
123
+
124
+ return response.strip()
125
 
126
  def respond(
127
+ message,
128
+ history: list[dict[str, str]],
129
+ system_message,
130
+ max_tokens,
131
+ temperature,
132
+ top_p,
133
+ repetition_penalty,
134
+ top_k,
135
  ):
136
+ """
137
+ Modified to use your custom GPT-2 model instead of Hugging Face Inference API
138
+ """
139
+ # Convert gradio history format to your format
140
+ # Gradio history is already in the correct format: [{"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}]
141
+ conversation_history = history # Use history directly
142
+
143
+ # Debug: Print the formatted input to see what's being sent to the model
144
+ print(f"User message: {message}")
145
+ print(f"History length: {len(conversation_history)}")
146
+
147
+ # Generate response using your model
148
+ response = generate_response(
149
+ message,
150
+ system_message,
151
+ conversation_history,
152
+ max_tokens=max_tokens,
153
+ temperature=temperature,
154
+ top_p=top_p,
155
+ repetition_penalty=repetition_penalty,
156
+ top_k=top_k
157
+ )
158
+
159
+ # print(f"Raw response: {repr(response)}")
160
+
161
+ # Clean up the response
162
+ if "<|end|>" in response:
163
+ response = response.split("<|end|>")[0]
164
+
165
+ # Remove any remaining special tokens
166
+ # response = response.replace("<|start|>", "")
167
+ # response = response.replace("<|message|>", "")
168
+ # response = response.replace("User:", "")
169
+ # response = response.replace("Assistant:", "")
170
+
171
+ # print(f"Cleaned response: {repr(response)}")
172
+
173
+ return response.strip()
174
 
175
 
176
  """
177
  Gradio ChatInterface for your custom GPT-2 model
178
  """
179
  chatbot = gr.ChatInterface(
180
+ respond,
181
+ type="messages",
182
+ title="Chat with the model",
183
+ description="Chat with the GPT-2-based model trained on WhatsApp data",
184
+ additional_inputs=[
185
+ # gr.Textbox(value="Hey I\'m Alice and you\'re Grace. You are having a casual peer-to-peer conversation with someone. Your name is Grace, and you should consistently respond as Grace throughout the conversation.\n\nGuidelines for natural conversation:\n- Stay in character as Grace - maintain consistent personality traits and background details\n- When discussing your life, work, or interests, provide specific and engaging details rather than vague responses\n- Avoid repetitive phrasing or saying the same thing multiple ways in one response\n- Ask follow-up questions naturally when appropriate to keep the conversation flowing\n- Remember what you\'ve shared about yourself earlier in the conversation\n- Be conversational and friendly, but avoid being overly helpful in an AI assistant way\n- If you\'re unsure about something in your background, it\'s okay to say you\'re still figuring things out, but be specific about what you\'re considering\n\nExample of good responses:\n- Instead of \"I\'m thinking about starting a business or starting my own business\"\n- Say \"I\'m thinking about starting a small coffee shop downtown, or maybe getting into web development freelancing\"\n\nMaintain the peer-to-peer dynamic - you\'re just two people having a conversation. The user has entered the chat. Introduce yourself.", label="System message"),
186
+ gr.Textbox(value="", label="System message (NOT SUPPORTED - leave blank)"),
187
+ gr.Slider(minimum=10, maximum=150, value=30, step=5, label="Max new tokens"),
188
+ gr.Slider(minimum=0.01, maximum=1.2, value=0.62, step=0.01, label="Temperature"),
189
+ gr.Slider(
190
+ minimum=0.01,
191
+ maximum=1.0,
192
+ value=0.85,
193
+ step=0.01,
194
+ label="Top-p (nucleus sampling)",
195
+ ),
196
+ gr.Slider(
197
+ minimum=1.0,
198
+ maximum=1.5,
199
+ value=1.031,
200
+ step=0.001,
201
+ label="Repetition penalty",
202
+ ),
203
+ gr.Slider(
204
+ minimum=1,
205
+ maximum=100,
206
+ value=55,
207
+ step=1,
208
+ label="Top-k (prediction sampling)",
209
+ ),
210
+ ],
211
  )
212
 
213
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
214
+ chatbot.render()
215
 
216
  if __name__ == "__main__":
217
+ demo.launch(
218
+ server_name="0.0.0.0", # Makes it accessible from other devices on your network
219
+ server_port=7860, # Default gradio port
220
+ share=False, # Set to True to get a public shareable link
221
+ debug=True
222
+ )