Spaces:

alexdev404
/

gpt2-chat-base

Sleeping

App Files Files Community

AlexDev404 commited on Dec 16, 2025

Commit

5ddd366

unverified ·

1 Parent(s): 64224f9

feat: bad words section

Browse files

Files changed (1) hide show

app.py +192 -136

app.py CHANGED Viewed

@@ -8,159 +8,215 @@ tokenizer = AutoTokenizer.from_pretrained(model_path)
 model = AutoModelForCausalLM.from_pretrained(model_path)
 if tokenizer.pad_token is None:
-    tokenizer.pad_token = tokenizer.eos_token
 def generate_response(prompt, system_message, conversation_history=None, max_tokens=75, temperature=0.78, top_p=0.85, repetition_penalty=1.031, top_k=55):
-    """Generate using your custom training format"""
-    # Build context using your NEW format
-    context = ""
-    if conversation_history:
-        # Last 2-3 exchanges
-        # Use more conversation history to fill GPT-2's context window (1024 tokens)
-        # Estimate ~20-30 tokens per exchange, so we can fit ~30-40 exchanges
-        recent = conversation_history[-30:] if len(conversation_history) > 30 else conversation_history
-        is_first_message = False
-        for i, message in enumerate(recent):
-            if i == 0:
-                is_first_message = True
-                context += f"<|start|>User:<|message|>{system_message}<|end|>\n<|start|>Assistant:<|message|>Hey, what's up nice to meet you. I'm glad to be here!<|end|>\n"
-            if message['role'] == 'user':
-                context += f"<|start|>User:<|message|>{message['content']}<|end|>\n"
-            else:
-                context += f"<|start|>Assistant:<|message|>{message['content']}<|end|>\n"
-    # Format input to match training
-    # formatted_input = None
-    # if is_first_message:
-    #     formatted_input = f"{context}<|start|>User:<|message|>{prompt}<|end|>\n<|start|>Assistant:<|message|>"
-    # else:
-    formatted_input = f"{context}<|start|>User:<|message|>{prompt}<|end|>\n<|start|>Assistant:<|message|>"
-    # Debug: Print the formatted input
-    print(f"Formatted input: {repr(formatted_input)}")
-    inputs = tokenizer(
-        formatted_input,
-        return_tensors="pt",
-        padding=True,
-        truncation=True,
-        max_length=512
-    )
-    with torch.no_grad():
-        outputs = model.generate(
-            inputs.input_ids,
-            attention_mask=inputs.attention_mask,
-            max_new_tokens=max_tokens,
-            temperature=temperature,
-            top_p=top_p,
-            top_k=top_k,    # Consider top 55 tokens
-            do_sample=True,
-            pad_token_id=tokenizer.pad_token_id,
-            repetition_penalty=repetition_penalty,
-            eos_token_id=tokenizer.encode("<|end|>", add_special_tokens=False)[0]
-        )
-    # Decode only new tokens
-    new_tokens = outputs[0][inputs.input_ids.shape[-1]:]
-    response = tokenizer.decode(new_tokens, skip_special_tokens=False)
-    return response.strip()
 def respond(
-    message,
-    history: list[dict[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-    repetition_penalty,
-    top_k,
 ):
-    """
-    Modified to use your custom GPT-2 model instead of Hugging Face Inference API
-    """
-    # Convert gradio history format to your format
-    # Gradio history is already in the correct format: [{"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}]
-    conversation_history = history  # Use history directly
-    # Debug: Print the formatted input to see what's being sent to the model
-    print(f"User message: {message}")
-    print(f"History length: {len(conversation_history)}")
-    # Generate response using your model
-    response = generate_response(
-        message,
-        system_message,
-        conversation_history,
-        max_tokens=max_tokens,
-        temperature=temperature,
-        top_p=top_p,
-        repetition_penalty=repetition_penalty,
-        top_k=top_k
-    )
-    # print(f"Raw response: {repr(response)}")
-    # Clean up the response
-    if "<|end|>" in response:
-        response = response.split("<|end|>")[0]
-    # Remove any remaining special tokens
-    # response = response.replace("<|start|>", "")
-    # response = response.replace("<|message|>", "")
-    # response = response.replace("User:", "")
-    # response = response.replace("Assistant:", "")
-    # print(f"Cleaned response: {repr(response)}")
-    return response.strip()
 """
 Gradio ChatInterface for your custom GPT-2 model
 """
 chatbot = gr.ChatInterface(
-    respond,
-    type="messages",
-    title="Chat with the model",
-    description="Chat with the GPT-2-based model trained on WhatsApp data",
-    additional_inputs=[
-        gr.Textbox(value="Hey I\'m Alice and you\'re Grace. You are having a casual peer-to-peer conversation with someone. Your name is Grace, and you should consistently respond as Grace throughout the conversation.\n\nGuidelines for natural conversation:\n- Stay in character as Grace - maintain consistent personality traits and background details\n- When discussing your life, work, or interests, provide specific and engaging details rather than vague responses\n- Avoid repetitive phrasing or saying the same thing multiple ways in one response\n- Ask follow-up questions naturally when appropriate to keep the conversation flowing\n- Remember what you\'ve shared about yourself earlier in the conversation\n- Be conversational and friendly, but avoid being overly helpful in an AI assistant way\n- If you\'re unsure about something in your background, it\'s okay to say you\'re still figuring things out, but be specific about what you\'re considering\n\nExample of good responses:\n- Instead of \"I\'m thinking about starting a business or starting my own business\"\n- Say \"I\'m thinking about starting a small coffee shop downtown, or maybe getting into web development freelancing\"\n\nMaintain the peer-to-peer dynamic - you\'re just two people having a conversation. The user has entered the chat. Introduce yourself.", label="System message"),
-        gr.Slider(minimum=10, maximum=150, value=75, step=5, label="Max new tokens"),
-        gr.Slider(minimum=0.01, maximum=1.2, value=0.7, step=0.01, label="Temperature"),
-        gr.Slider(
-            minimum=0.01,
-            maximum=1.0,
-            value=0.85,
-            step=0.01,
-            label="Top-p (nucleus sampling)",
-        ),
-        gr.Slider(
-            minimum=1.0,
-            maximum=1.5,
-            value=1.031,
-            step=0.001,
-            label="Repetition penalty",
-        ),
-        gr.Slider(
-            minimum=1,
-            maximum=100,
-            value=55,
-            step=1,
-            label="Top-k (prediction sampling)",
-        ),
-    ],
 )
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    chatbot.render()
 if __name__ == "__main__":
-    demo.launch(
-        server_name="0.0.0.0",  # Makes it accessible from other devices on your network
-        server_port=7860,       # Default gradio port
-        share=False,            # Set to True to get a public shareable link
-        debug=True
-    )

 model = AutoModelForCausalLM.from_pretrained(model_path)
 if tokenizer.pad_token is None:
+	tokenizer.pad_token = tokenizer.eos_token
 def generate_response(prompt, system_message, conversation_history=None, max_tokens=75, temperature=0.78, top_p=0.85, repetition_penalty=1.031, top_k=55):
+	"""Generate using your custom training format"""
+	# Build context using your NEW format
+	context = ""
+	if conversation_history:
+		# Last 2-3 exchanges
+		# Use more conversation history to fill GPT-2's context window (1024 tokens)
+		# Estimate ~20-30 tokens per exchange, so we can fit ~30-40 exchanges
+		recent = conversation_history[-30:] if len(conversation_history) > 30 else conversation_history
+		is_first_message = False
+		for i, message in enumerate(recent):
+			if i == 0 and system_message and system_message.strip():
+				is_first_message = True
+				context += f"<|start|>User:<|message|>{system_message}<|end|>\n<|start|>Assistant:<|message|>Hey, what's up nice to meet you. I'm glad to be here!<|end|>\n"
+			if message['role'] == 'user':
+				context += f"<|start|>User:<|message|>{message['content']}<|end|>\n"
+			else:
+				context += f"<|start|>Assistant:<|message|>{message['content']}<|end|>\n"
+	# Format input to match training
+	# formatted_input = None
+	# if is_first_message:
+	#     formatted_input = f"{context}<|start|>User:<|message|>{prompt}<|end|>\n<|start|>Assistant:<|message|>"
+	# else:
+	formatted_input = f"{context}<|start|>User:<|message|>{prompt}<|end|>\n<|start|>Assistant:<|message|>"
+	# Debug: Print the formatted input
+	print(f"Formatted input: {repr(formatted_input)}")
+	"""
+	BAD WORDS SECTION
+	"""
+	bad_words = [
+	# External system tokens
+	"externalToEVAOnly", " externalToEVAOnly", " externalToEVA",
+	# Magic/system tokens
+	" SolidGoldMagikarp", "GoldMagikarp", "PsyNetMessage",
+	# Pattern tokens
+	"?????-?????-", "???????-", "?????-", "off-", ",...", ":aution", " ?",
+	" !", " .", " ,", " ??", " !!", " ...", " ?!", " .!", " ,!", " !.", " ,.", " ..", " !?",
+	# Embed/report tokens
+	"embedreportprint", "cloneembedreportprint", "reportprint",
+	"rawdownload", "rawdownloadcloneembedreportprint",
+	# GUI tokens
+	" guiActiveUn", " guiActiveUnfocused", " guiIcon",
+	# Stream/bot tokens
+	"EStreamFrame", "StreamerBot", "TPPStreamerBot",
+	# Reddit/user tokens
+	"RandomRedditorWithNo", "RandomRedditor",
+	# Store/commerce tokens
+	"InstoreAndOnline", "oreAndOnline", "BuyableInstoreAndOnline", "quickShip",
+	# Download/magazine tokens
+	"Downloadha", "DragonMagazine",
+	# Technical tokens
+	" attRot", " srfN", " DevOnline",
+	# Nitrome tokens
+	" TheNitrome", " TheNitromeFan",
+	# User tokens
+	" davidjl",
+	# Japanese strings
+	" 裏覚醒", " サーティ", " サーティワン", "ゼウス",
+		# Random crap
+	"Citizendium", "Orderable", "Buyable", "Citizi", "SpaceEngineers", "senal", "oaded", "eatures",
+	"compe", "autioning", "compe..."
+	]
+	bad_words_ids = [[tokenizer.convert_tokens_to_ids(w)] for w in bad_words if tokenizer.convert_tokens_to_ids(w) is not None]
+	bad_words_ids.extend([[token_id] for token_id in [30213, 35793, 7589, 1394, 1539, 126, 33434, 11689, 13945, 116, 147, 251, 143, 12662]]) # Garbage tokens
+	""" END BAD WORDS SECTION"""
+	inputs = tokenizer(
+		formatted_input,
+		return_tensors="pt",
+		padding=True,
+		truncation=True,
+		max_length=512
+	)
+	with torch.no_grad():
+		outputs = model.generate(
+			inputs.input_ids,
+			attention_mask=inputs.attention_mask,
+			max_new_tokens=max_tokens,
+			temperature=temperature,
+			top_p=top_p,
+			# top_k=top_k,    # Consider top 55 tokens
+			do_sample=True,
+			no_repeat_ngram_size=5,
+			repetition_penalty=repetition_penalty,
+			pad_token_id=tokenizer.pad_token_id,
+			eos_token_id=tokenizer.convert_tokens_to_ids("<|end|>"),
+			bad_words_ids=bad_words_ids,
+		)
+	# Decode only new tokens
+	new_tokens = outputs[0][inputs.input_ids.shape[-1]:]
+	response = tokenizer.decode(new_tokens, skip_special_tokens=False)
+	return response.strip()
 def respond(
+	message,
+	history: list[dict[str, str]],
+	system_message,
+	max_tokens,
+	temperature,
+	top_p,
+	repetition_penalty,
+	top_k,
 ):
+	"""
+	Modified to use your custom GPT-2 model instead of Hugging Face Inference API
+	"""
+	# Convert gradio history format to your format
+	# Gradio history is already in the correct format: [{"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}]
+	conversation_history = history  # Use history directly
+	# Debug: Print the formatted input to see what's being sent to the model
+	print(f"User message: {message}")
+	print(f"History length: {len(conversation_history)}")
+	# Generate response using your model
+	response = generate_response(
+		message,
+		system_message,
+		conversation_history,
+		max_tokens=max_tokens,
+		temperature=temperature,
+		top_p=top_p,
+		repetition_penalty=repetition_penalty,
+		top_k=top_k
+	)
+	# print(f"Raw response: {repr(response)}")
+	# Clean up the response
+	if "<|end|>" in response:
+		response = response.split("<|end|>")[0]
+	# Remove any remaining special tokens
+	# response = response.replace("<|start|>", "")
+	# response = response.replace("<|message|>", "")
+	# response = response.replace("User:", "")
+	# response = response.replace("Assistant:", "")
+	# print(f"Cleaned response: {repr(response)}")
+	return response.strip()
 """
 Gradio ChatInterface for your custom GPT-2 model
 """
 chatbot = gr.ChatInterface(
+	respond,
+	type="messages",
+	title="Chat with the model",
+	description="Chat with the GPT-2-based model trained on WhatsApp data",
+	additional_inputs=[
+		# gr.Textbox(value="Hey I\'m Alice and you\'re Grace. You are having a casual peer-to-peer conversation with someone. Your name is Grace, and you should consistently respond as Grace throughout the conversation.\n\nGuidelines for natural conversation:\n- Stay in character as Grace - maintain consistent personality traits and background details\n- When discussing your life, work, or interests, provide specific and engaging details rather than vague responses\n- Avoid repetitive phrasing or saying the same thing multiple ways in one response\n- Ask follow-up questions naturally when appropriate to keep the conversation flowing\n- Remember what you\'ve shared about yourself earlier in the conversation\n- Be conversational and friendly, but avoid being overly helpful in an AI assistant way\n- If you\'re unsure about something in your background, it\'s okay to say you\'re still figuring things out, but be specific about what you\'re considering\n\nExample of good responses:\n- Instead of \"I\'m thinking about starting a business or starting my own business\"\n- Say \"I\'m thinking about starting a small coffee shop downtown, or maybe getting into web development freelancing\"\n\nMaintain the peer-to-peer dynamic - you\'re just two people having a conversation. The user has entered the chat. Introduce yourself.", label="System message"),
+		gr.Textbox(value="", label="System message (NOT SUPPORTED - leave blank)"),
+		gr.Slider(minimum=10, maximum=150, value=30, step=5, label="Max new tokens"),
+		gr.Slider(minimum=0.01, maximum=1.2, value=0.62, step=0.01, label="Temperature"),
+		gr.Slider(
+			minimum=0.01,
+			maximum=1.0,
+			value=0.85,
+			step=0.01,
+			label="Top-p (nucleus sampling)",
+		),
+		gr.Slider(
+			minimum=1.0,
+			maximum=1.5,
+			value=1.031,
+			step=0.001,
+			label="Repetition penalty",
+		),
+		gr.Slider(
+			minimum=1,
+			maximum=100,
+			value=55,
+			step=1,
+			label="Top-k (prediction sampling)",
+		),
+	],
 )
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
+	chatbot.render()
 if __name__ == "__main__":
+	demo.launch(
+		server_name="0.0.0.0",  # Makes it accessible from other devices on your network
+		server_port=7860,       # Default gradio port
+		share=False,            # Set to True to get a public shareable link
+		debug=True
+	)