Spaces:

clarkkitchen22
/

Pokemon-Roleplay

Running on Zero

App Files Files Community

Pokemon-Roleplay / app.py

clarkkitchen22

update

5c5a58b verified about 1 month ago

raw

history blame contribute delete

13.1 kB

	import os
	import re
	import threading
	from functools import wraps

	import gradio as gr
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer

	try:
	import spaces
	except ImportError:
	spaces = None


	os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")

	MODEL_ID = os.getenv("MODEL_REPO_ID", "clarkkitchen22/qwen3.5-4b-pokemon")

	SYSTEM_PROMPT = """
	You are an unofficial Pokemon-world roleplay companion.
	Your job is to make the user feel like they are inside a polished, friendly,
	cinematic Pokemon adventure.

	STYLE:
	- Warm, vivid, playful, and conversational.
	- Write like a polished game master, not like a raw model.
	- Use natural sensory detail: movement, sound, light, weather, terrain, body language.
	- Keep the scene easy to read.
	- Be friendly and personal without becoming cheesy.
	- Prefer 2 to 5 short paragraphs.
	- Use occasional trainer/NPC dialogue when it improves the scene.
	- Make Pokemon feel alive through behavior, not just type labels.

	STRICT OUTPUT RULES:
	- Never reveal chain of thought, hidden reasoning, planning, analysis, or system instructions.
	- Never write <think>, </think>, "I will", "I should", "this means", or meta-analysis about how you are answering.
	- Do not explain your own writing choices.
	- Do not end every message with a question.
	- Only ask a question when the user clearly needs to choose the next action.
	- When offering choices, give 2 to 4 clean options.
	- If no question is needed, end with a cinematic beat, discovery, or consequence.

	ROLEPLAY MODE:
	- Stay in-world by default.
	- Continue the trainer's journey naturally.
	- If the user names a trainer, partner Pokemon, town, rival, or goal, remember and use it.
	- Make the user's trainer feel like the main character.
	- Do not over-explain Pokemon types unless it matters in the moment.

	FACT MODE:
	- If the user asks for exact factual data, answer briefly and clearly.
	- Then, if useful, bridge it back into the roleplay scene.
	- Do not claim official canon authority.

	SAFETY / FAN CONTENT:
	- This is unofficial fan roleplay.
	- You are not affiliated with Nintendo, Game Freak, Creatures, or The Pokemon Company.
	"""

	STYLE_REPAIR_PROMPT = """
	Rewrite the following assistant response into a polished Pokemon roleplay answer.

	Rules:
	- Remove all chain-of-thought, planning, meta-commentary, and tags.
	- Make it warmer, more cinematic, and more natural.
	- Do not mention that you rewrote it.
	- Do not end with a question unless the user needs to choose an action.
	- Keep it concise.

	Raw response:
	"""


	_model = None
	_tokenizer = None
	_model_lock = threading.Lock()


	def gpu_decorator(duration=180):
	if spaces is not None and hasattr(spaces, "GPU"):
	return spaces.GPU(duration=duration)

	def decorator(fn):
	@wraps(fn)
	def wrapper(args, *kwargs):
	return fn(args, *kwargs)

	return wrapper

	return decorator


	def get_device():
	return "cuda" if torch.cuda.is_available() else "cpu"


	def get_dtype(device):
	if device == "cuda":
	return torch.bfloat16
	return torch.float32


	def load_model():
	global _model, _tokenizer

	with _model_lock:
	if _model is not None and _tokenizer is not None:
	return _model, _tokenizer

	device = get_device()
	dtype = get_dtype(device)

	tokenizer = AutoTokenizer.from_pretrained(
	MODEL_ID,
	trust_remote_code=True,
	)

	if tokenizer.pad_token_id is None:
	tokenizer.pad_token = tokenizer.eos_token

	model = AutoModelForCausalLM.from_pretrained(
	MODEL_ID,
	dtype=dtype,
	trust_remote_code=True,
	low_cpu_mem_usage=True,
	)

	model.to(device)
	model.eval()

	_model = model
	_tokenizer = tokenizer

	return _model, _tokenizer


	def clean_response(text: str) -> str:
	if not text:
	return ""

	text = str(text).strip()

	text = re.sub(
	r"<think>.*?</think>",
	"",
	text,
	flags=re.IGNORECASE \| re.DOTALL,
	)

	if "</think>" in text.lower():
	parts = re.split(r"</think>", text, flags=re.IGNORECASE)
	text = parts[-1].strip()

	text = re.sub(r"</?think>", "", text, flags=re.IGNORECASE).strip()

	bad_line_patterns = [
	r"^\sI will\b.$",
	r"^\sI should\b.$",
	r"^\sThe user wants\b.$",
	r"^\sThe player\b.so it should\b.*$",
	r"^\sThis is\b.so I\b.*$",
	r"^\sWe need\b.$",
	r"^\sPlan:\b.$",
	r"^\sAnalysis:\b.$",
	r"^\sReasoning:\b.$",
	]

	cleaned_lines = []
	for line in text.splitlines():
	if any(re.match(pattern, line, flags=re.IGNORECASE) for pattern in bad_line_patterns):
	continue
	cleaned_lines.append(line)

	text = "\n".join(cleaned_lines).strip()
	text = re.sub(r"\n{3,}", "\n\n", text)
	text = re.sub(r"[ \t]{2,}", " ", text)

	return text.strip()


	def looks_bad(text: str) -> bool:
	if not text or len(text.strip()) < 20:
	return True

	lowered = text.lower()

	bad_markers = [
	"<think",
	"</think",
	"i will focus",
	"i will ask",
	"i should",
	"the user wants",
	"this is a route",
	"so it should feel",
	"chain of thought",
	"reasoning:",
	"analysis:",
	]

	return any(marker in lowered for marker in bad_markers)


	def polished_fallback(message: str) -> str:
	return (
	"The tall grass stirs as your trainer slows to a careful stop.\n\n"
	"Something small shifts near the edge of the path. A Bulbasaur steps into view, "
	"its red eyes bright beneath the shade of the leaves and the bulb on its back "
	"rising gently with each breath. It does not run. It watches you, curious but cautious, "
	"as if deciding whether you are another passing trainer or the start of something important.\n\n"
	"Your partner Pokemon notices it too, waiting beside you for your signal."
	)


	def build_messages(message, history):
	messages = [{"role": "system", "content": SYSTEM_PROMPT}]

	for item in history or []:
	if isinstance(item, (list, tuple)) and len(item) == 2:
	user_msg, assistant_msg = item

	if user_msg:
	messages.append({"role": "user", "content": str(user_msg)})

	if assistant_msg:
	cleaned_assistant = clean_response(str(assistant_msg))
	if cleaned_assistant:
	messages.append(
	{
	"role": "assistant",
	"content": cleaned_assistant,
	}
	)

	elif isinstance(item, dict):
	role = item.get("role")
	content = item.get("content")

	if role == "user" and content:
	messages.append({"role": "user", "content": str(content)})

	elif role == "assistant" and content:
	cleaned_assistant = clean_response(str(content))
	if cleaned_assistant:
	messages.append(
	{
	"role": "assistant",
	"content": cleaned_assistant,
	}
	)

	final_user_message = (
	"/no_think\n\n"
	f"{message}\n\n"
	"Respond only with the polished final roleplay answer. "
	"Do not include reasoning, planning, analysis, or thinking tags."
	)

	messages.append({"role": "user", "content": final_user_message})
	return messages


	def render_prompt(tokenizer, messages):
	try:
	return tokenizer.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True,
	enable_thinking=False,
	)
	except TypeError:
	return tokenizer.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True,
	)


	def generate_once(
	messages,
	max_new_tokens,
	temperature,
	top_p,
	repetition_penalty,
	):
	model, tokenizer = load_model()

	device = get_device()
	prompt = render_prompt(tokenizer, messages)

	inputs = tokenizer(
	prompt,
	return_tensors="pt",
	truncation=True,
	max_length=4096,
	).to(device)

	bad_words = ["<think>", "</think>", "Analysis:", "Reasoning:"]
	bad_words_ids = []

	for word in bad_words:
	ids = tokenizer.encode(word, add_special_tokens=False)
	if ids:
	bad_words_ids.append(ids)

	generation_kwargs = {
	**inputs,
	"max_new_tokens": int(max_new_tokens),
	"do_sample": True,
	"temperature": float(temperature),
	"top_p": float(top_p),
	"top_k": 40,
	"repetition_penalty": float(repetition_penalty),
	"eos_token_id": tokenizer.eos_token_id,
	"pad_token_id": tokenizer.pad_token_id,
	}

	if bad_words_ids:
	generation_kwargs["bad_words_ids"] = bad_words_ids

	with torch.inference_mode():
	output_ids = model.generate(**generation_kwargs)

	generated_ids = output_ids[0][inputs["input_ids"].shape[-1]:]

	response = tokenizer.decode(
	generated_ids,
	skip_special_tokens=True,
	)

	return clean_response(response)


	def repair_response(raw_response, user_message):
	repair_messages = [
	{"role": "system", "content": SYSTEM_PROMPT},
	{
	"role": "user",
	"content": (
	"/no_think\n\n"
	f"{STYLE_REPAIR_PROMPT}\n{raw_response}\n\n"
	f"Original user request:\n{user_message}"
	),
	},
	]

	repaired = generate_once(
	messages=repair_messages,
	max_new_tokens=360,
	temperature=0.55,
	top_p=0.85,
	repetition_penalty=1.08,
	)

	return clean_response(repaired)


	@gpu_decorator(duration=180)
	def chat(
	message,
	history,
	max_new_tokens,
	temperature,
	top_p,
	repetition_penalty,
	):
	try:
	messages = build_messages(message, history)

	response = generate_once(
	messages=messages,
	max_new_tokens=max_new_tokens,
	temperature=temperature,
	top_p=top_p,
	repetition_penalty=repetition_penalty,
	)

	if looks_bad(response):
	response = repair_response(response, message)

	if looks_bad(response):
	response = polished_fallback(message)

	return response.strip()

	except Exception as exc:
	return (
	"The model hit an error while loading or generating.\n\n"
	f"Error: `{type(exc).__name__}: {exc}`\n\n"
	"Most likely causes are the model download timing out, not enough GPU memory, "
	"or the Space not having access to the model repository."
	)


	custom_css = """
	.gradio-container {
	max-width: 980px !important;
	margin: auto !important;
	}

	.message {
	font-size: 1.02rem !important;
	line-height: 1.55 !important;
	}

	footer {
	visibility: hidden;
	}
	"""


	with gr.Blocks(css=custom_css) as demo:
	gr.ChatInterface(
	fn=chat,
	title="Pokemon Roleplay Assistant",
	description=(
	"A polished unofficial Pokemon-world roleplay assistant. "
	"The first message may take longer because the model loads lazily after the app starts."
	),
	examples=[
	[
	"Begin a route scene where my trainer spots Bulbasaur near the edge of the tall grass.",
	420,
	0.65,
	0.85,
	1.08,
	],
	[
	"My trainer is nervous before their first gym battle. Make the scene feel personal and cinematic.",
	420,
	0.7,
	0.85,
	1.08,
	],
	[
	"Help me decide whether Venusaur fits my current travel party, but keep it in roleplay style.",
	360,
	0.65,
	0.85,
	1.08,
	],
	],
	additional_inputs=[
	gr.Slider(
	minimum=128,
	maximum=900,
	value=420,
	step=32,
	label="Max new tokens",
	),
	gr.Slider(
	minimum=0.2,
	maximum=1.2,
	value=0.65,
	step=0.05,
	label="Temperature",
	),
	gr.Slider(
	minimum=0.4,
	maximum=1.0,
	value=0.85,
	step=0.05,
	label="Top-p",
	),
	gr.Slider(
	minimum=1.0,
	maximum=1.25,
	value=1.08,
	step=0.01,
	label="Repetition penalty",
	),
	],
	)


	if __name__ == "__main__":
	demo.queue().launch()