import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Load your custom model
model_path = "alexdev404/gpt2-finetuned-chat"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path)

if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token

def generate_response(prompt, system_message, conversation_history=None, max_tokens=75, temperature=0.78, top_p=0.85, repetition_penalty=1.031, top_k=55):
	"""Generate using your custom training format"""
	
	# Build context using your NEW format
	context = ""
	if conversation_history:
		# Last 2-3 exchanges
		# Use more conversation history to fill GPT-2's context window (1024 tokens)
		# Estimate ~20-30 tokens per exchange, so we can fit ~30-40 exchanges
		recent = conversation_history[-30:] if len(conversation_history) > 30 else conversation_history
		is_first_message = False
		for i, message in enumerate(recent):
			if i == 0 and system_message and system_message.strip():
				is_first_message = True
				context += f"<|start|>User:<|message|>{system_message}<|end|>\n<|start|>Assistant:<|message|>Hey, what's up nice to meet you. I'm glad to be here!<|end|>\n"
			if message['role'] == 'user':
				context += f"<|start|>User:<|message|>{message['content']}<|end|>\n"
			else:
				context += f"<|start|>Assistant:<|message|>{message['content']}<|end|>\n"

	# Format input to match training
	# formatted_input = None
	# if is_first_message:
	#     formatted_input = f"{context}<|start|>User:<|message|>{prompt}<|end|>\n<|start|>Assistant:<|message|>"
	# else:
	formatted_input = f"{context}<|start|>User:<|message|>{prompt}<|end|>\n<|start|>Assistant:<|message|>"

	# Debug: Print the formatted input
	print(f"Formatted input: {repr(formatted_input)}")

	"""
	BAD WORDS SECTION
	"""
	bad_words = [			
	# External system tokens
	"externalToEVAOnly", " externalToEVAOnly", " externalToEVA",
	
	# Magic/system tokens
	" SolidGoldMagikarp", "GoldMagikarp", "PsyNetMessage",
	
	# Pattern tokens
	"?????-?????-", "???????-", "?????-", "off-", ",...", ":aution", " ?",
	" !", " .", " ,", " ??", " !!", " ...", " ?!", " .!", " ,!", " !.", " ,.", " ..", " !?",
	
	# Embed/report tokens
	"embedreportprint", "cloneembedreportprint", "reportprint",
	"rawdownload", "rawdownloadcloneembedreportprint",
	
	# GUI tokens
	" guiActiveUn", " guiActiveUnfocused", " guiIcon",
	
	# Stream/bot tokens
	"EStreamFrame", "StreamerBot", "TPPStreamerBot",
	
	# Reddit/user tokens
	"RandomRedditorWithNo", "RandomRedditor",
	
	# Store/commerce tokens
	"InstoreAndOnline", "oreAndOnline", "BuyableInstoreAndOnline", "quickShip",
	
	# Download/magazine tokens
	"Downloadha", "DragonMagazine",
	
	# Technical tokens
	" attRot", " srfN", " DevOnline",
	
	# Nitrome tokens
	" TheNitrome", " TheNitromeFan",
	
	# User tokens
	" davidjl",
	
	# Japanese strings
	" 裏覚醒", " サーティ", " サーティワン", "ゼウス",
		# Random crap
	"Citizendium", "Orderable", "Buyable", "Citizi", "SpaceEngineers", "senal", "oaded", "eatures",
	"compe", "autioning", "compe..."
	]
	bad_words_ids = [[tokenizer.convert_tokens_to_ids(w)] for w in bad_words if tokenizer.convert_tokens_to_ids(w) is not None]
	bad_words_ids.extend([[token_id] for token_id in [30213, 35793, 7589, 1394, 1539, 126, 33434, 11689, 13945, 116, 147, 251, 143, 12662]]) # Garbage tokens

	""" END BAD WORDS SECTION"""

	inputs = tokenizer(
		formatted_input, 
		return_tensors="pt", 
		padding=True, 
		truncation=True,
		max_length=512
	)
	
	with torch.no_grad():
		outputs = model.generate(
			inputs.input_ids,
			attention_mask=inputs.attention_mask,
			max_new_tokens=max_tokens,
			temperature=temperature,
			top_p=top_p,
			# top_k=top_k,    # Consider top 55 tokens
			do_sample=True,
			no_repeat_ngram_size=5,
			repetition_penalty=repetition_penalty,
			pad_token_id=tokenizer.pad_token_id,
			eos_token_id=tokenizer.convert_tokens_to_ids("<|end|>"),
			bad_words_ids=bad_words_ids,
		)

	# Decode only new tokens
	new_tokens = outputs[0][inputs.input_ids.shape[-1]:]
	response = tokenizer.decode(new_tokens, skip_special_tokens=False)
	
	return response.strip()

def respond(
	message,
	history: list[dict[str, str]],
	system_message,
	max_tokens,
	temperature,
	top_p,
	repetition_penalty,
	top_k,
):
	"""
	Modified to use your custom GPT-2 model instead of Hugging Face Inference API
	"""
	# Convert gradio history format to your format
	# Gradio history is already in the correct format: [{"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}]
	conversation_history = history  # Use history directly
	
	# Debug: Print the formatted input to see what's being sent to the model
	print(f"User message: {message}")
	print(f"History length: {len(conversation_history)}")
	
	# Generate response using your model
	response = generate_response(
		message, 
		system_message,
		conversation_history, 
		max_tokens=max_tokens,
		temperature=temperature,
		top_p=top_p,
		repetition_penalty=repetition_penalty,
		top_k=top_k
	)
	
	# print(f"Raw response: {repr(response)}")
	
	# Clean up the response
	if "<|end|>" in response:
		response = response.split("<|end|>")[0]
	
	# Remove any remaining special tokens
	# response = response.replace("<|start|>", "")
	# response = response.replace("<|message|>", "")
	# response = response.replace("User:", "")
	# response = response.replace("Assistant:", "")
	
	# print(f"Cleaned response: {repr(response)}")
	
	return response.strip()


"""
Gradio ChatInterface for your custom GPT-2 model
"""
chatbot = gr.ChatInterface(
	respond,
	type="messages",
	title="Chat with the model",
	description="Chat with the GPT-2-based model trained on WhatsApp data",
	additional_inputs=[
		# gr.Textbox(value="Hey I\'m Alice and you\'re Grace. You are having a casual peer-to-peer conversation with someone. Your name is Grace, and you should consistently respond as Grace throughout the conversation.\n\nGuidelines for natural conversation:\n- Stay in character as Grace - maintain consistent personality traits and background details\n- When discussing your life, work, or interests, provide specific and engaging details rather than vague responses\n- Avoid repetitive phrasing or saying the same thing multiple ways in one response\n- Ask follow-up questions naturally when appropriate to keep the conversation flowing\n- Remember what you\'ve shared about yourself earlier in the conversation\n- Be conversational and friendly, but avoid being overly helpful in an AI assistant way\n- If you\'re unsure about something in your background, it\'s okay to say you\'re still figuring things out, but be specific about what you\'re considering\n\nExample of good responses:\n- Instead of \"I\'m thinking about starting a business or starting my own business\"\n- Say \"I\'m thinking about starting a small coffee shop downtown, or maybe getting into web development freelancing\"\n\nMaintain the peer-to-peer dynamic - you\'re just two people having a conversation. The user has entered the chat. Introduce yourself.", label="System message"),
		gr.Textbox(value="", label="System message (NOT SUPPORTED - leave blank)"),
		gr.Slider(minimum=10, maximum=150, value=15, step=5, label="Max new tokens"),
		gr.Slider(minimum=0.01, maximum=1.2, value=0.2, step=0.01, label="Temperature"),
		gr.Slider(
			minimum=0.01,
			maximum=1.0,
			value=0.9,
			step=0.01,
			label="Top-p (nucleus sampling)",
		),
		gr.Slider(
			minimum=1.0,
			maximum=100,
			value=6.01,
			step=0.001,
			label="Repetition penalty",
		),
		gr.Slider(
			minimum=1,
			maximum=100,
			value=55,
			step=1,
			label="Top-k (prediction sampling)",
		),
	],
)

with gr.Blocks(theme=gr.themes.Soft()) as demo:
	chatbot.render()

if __name__ == "__main__":
	demo.launch(
		server_name="0.0.0.0",  # Makes it accessible from other devices on your network
		server_port=7860,       # Default gradio port
		share=False,            # Set to True to get a public shareable link
		debug=True
	)