Spaces:

rdz-falcon
/

SignMotionGPT

Running

App Files Files Community

SignMotionGPT / templates.py

rdz-falcon

Deploy SignMotionGPT Demo with LFS

4bd136e 4 months ago

raw

history blame contribute delete

4.16 kB

	"""
	Prompt templates and mapping functions for different training stages
	"""
	import random
	from data import ids_to_motion_specials
	from config import SYSTEM_MSG, SEED

	random.seed(SEED)


	def pid_token_from_example(ex, has_pid: bool):
	"""Get participant ID token from example"""
	if not has_pid:
	return ""

	pid = ex.get("participant_id", None)
	if pid is not None:
	return f"<PID_{pid}>"
	return "<PID_NULL>"


	def map_stage1(ex, has_pid: bool):
	"""
	Stage 1: Word + optional PID conditioning to learn motion language.
	The user explicitly provides the word (+PID); assistant outputs motion span.
	"""
	mot = ids_to_motion_specials(ex["motion_tokens"])
	assistant = f"<MOT_BEGIN> {mot} <MOT_END>"
	pid_tok = pid_token_from_example(ex, has_pid)
	word = ex.get("word", ex.get("text_query", ""))

	# Word + PID conditioning (no natural language chatter to keep it compact)
	user = f"<T2M>{pid_tok}\nword: {word}"
	text = (
	"<\|im_start\|>system\n" + SYSTEM_MSG + "<\|im_end\|>\n"
	+ "<\|im_start\|>user\n" + user + "\n<\|im_end\|>\n"
	+ "<\|im_start\|>assistant\n" + assistant + "\n<\|im_end\|>\n"
	)

	return {"text": text, "where": "mot"}


	def map_stage2(ex, has_pid: bool):
	"""
	Stage 2: Multi-task (T2M/M2T/DENOISE)
	Randomly choose between text-to-motion, motion-to-text, or denoising
	"""
	t = ex["text_query"]
	mot = ids_to_motion_specials(ex["motion_tokens"])
	pid_tok = pid_token_from_example(ex, has_pid)

	# Sample task type
	task = random.choices(["t2m", "m2t", "denoise"], weights=[0.5, 0.3, 0.2], k=1)[0]

	if task == "t2m":
	# Text to motion
	assistant = f"<MOT_BEGIN> {mot} <MOT_END>"
	text = (
	"<\|im_start\|>system\n" + SYSTEM_MSG + "<\|im_end\|>\n"
	+ "<\|im_start\|>user\n" + f"<T2M>{pid_tok}\n\n" + t + "\n<\|im_end\|>\n"
	+ "<\|im_start\|>assistant\n" + assistant + "\n<\|im_end\|>\n"
	)
	where = "mot"

	elif task == "m2t":
	# Motion to text
	user = f"<M2T>{pid_tok}\n\n<MOT_BEGIN> {mot} <MOT_END>"
	text = (
	"<\|im_start\|>system\n" + SYSTEM_MSG + "<\|im_end\|>\n"
	+ "<\|im_start\|>user\n" + user + "\n<\|im_end\|>\n"
	+ "<\|im_start\|>assistant\n" + t + "\n<\|im_end\|>\n"
	)
	where = "text"

	else:
	# Denoising
	toks = mot.split()
	noisy = []
	for tok in toks:
	if random.random() < 0.15:
	noisy.append("<MOTION_MASK>")
	else:
	noisy.append(tok)

	user = f"<DENOISE>{pid_tok}\n\n<MOT_BEGIN> {' '.join(noisy)} <MOT_END>"
	assistant = f"<MOT_BEGIN> {mot} <MOT_END>"
	text = (
	"<\|im_start\|>system\n" + SYSTEM_MSG + "<\|im_end\|>\n"
	+ "<\|im_start\|>user\n" + user + "\n<\|im_end\|>\n"
	+ "<\|im_start\|>assistant\n" + assistant + "\n<\|im_end\|>\n"
	)
	where = "mot"

	return {"text": text, "where": where, "text_query": t}


	def map_stage3(ex, has_pid: bool):
	"""
	Stage 3 (Instruct): Word-only request, no participant ID.
	The system prompt directs: "Output motion tokens for the given word".
	"""
	t = ex["text_query"]
	mot = ids_to_motion_specials(ex["motion_tokens"])
	assistant = f"<MOT_BEGIN> {mot} <MOT_END>"

	# Instruct-style, no PID
	user = f"<T2M>\nword: {t}"
	text = (
	"<\|im_start\|>system\n" + SYSTEM_MSG + "<\|im_end\|>\n"
	+ "<\|im_start\|>user\n" + user + "\n<\|im_end\|>\n"
	+ "<\|im_start\|>assistant\n" + assistant + "\n<\|im_end\|>\n"
	)

	return {
	"text": text,
	"where": "mot",
	"text_query": t,
	"motion_tokens": ex["motion_tokens"]
	}


	def create_mapper(stage: int, has_pid: bool):
	"""
	Create a mapper function for a specific stage
	"""
	if stage == 1:
	return lambda ex: map_stage1(ex, has_pid)
	elif stage == 2:
	return lambda ex: map_stage2(ex, has_pid)
	elif stage == 3:
	return lambda ex: map_stage3(ex, has_pid)
	else:
	raise ValueError(f"Unknown stage: {stage}")