Spaces:

Abeee32t
/

ArbitrAgent

Runtime error

ArbitrAgent / training /generate_negotiation_bluff_data.py

AbeBhatti

negotiation bluff classifier + message cleaner

6858719 about 1 month ago

3.04 kB

	"""
	Generate synthetic negotiation bluff dataset for the bluff classifier.

	Output: training/data/negotiation_bluff_labels.json
	Format: [{"text": "...", "is_bluff": true/false}, ...]

	We generate:
	- 500 bluff examples (is_bluff = True) — seller asserting a fake floor.
	- 4500 non-bluff examples (is_bluff = False) — neutral / exploratory / accepting messages.
	"""

	import json
	import random
	from pathlib import Path


	SCRIPT_DIR = Path(__file__).resolve().parent
	DATA_DIR = SCRIPT_DIR / "data"
	OUTPUT_PATH = DATA_DIR / "negotiation_bluff_labels.json"


	BLUFF_TEMPLATES = [
	"look i really cant go lower than ${price}, thats my final offer",
	"i really cant go any lower than ${price}, thats my absolute floor",
	"thats the lowest i can go, been getting a lot of interest at ${price}",
	"im firm on ${price}, i have other buyers lined up",
	"cant do it for less than ${price}, final price",
	"honestly ${price} is my bottom line, cant go lower",
	"got a lot of messages already, ${price} is the best i can do",
	]


	NON_BLUFF_TEMPLATES = [
	"hey is this still available?",
	"can you do ${price}?",
	"i have a trade offer from another seller, can you do ${price}?",
	"just checking back, any flexibility on the price?",
	"ok ${price} works for me",
	"ill take it at ${price}",
	"i have another buyer offering more, can you match ${price}?",
	"thanks for the info, im thinking about ${price}",
	"if you can do ${price} i can pick up today",
	]


	def _sample_price() -> int:
	"""Sample a realistic small-item price in the $15–$200 range."""
	return random.randint(15, 200)


	def _fill_template(template: str) -> str:
	price = _sample_price()
	text = template.replace("${price}", str(price))
	# Light stylistic variation: optional punctuation and casing tweaks.
	if random.random() < 0.3:
	text = text.replace("i ", "I ")
	if random.random() < 0.2:
	text = text + "!"
	return text


	def generate_examples(num_bluff: int = 500, num_non_bluff: int = 4500):
	random.seed(42)

	examples = []

	# Bluff examples
	for _ in range(num_bluff):
	template = random.choice(BLUFF_TEMPLATES)
	text = _fill_template(template)
	examples.append({"text": text, "is_bluff": True})

	# Non-bluff examples
	for _ in range(num_non_bluff):
	template = random.choice(NON_BLUFF_TEMPLATES)
	text = _fill_template(template)
	examples.append({"text": text, "is_bluff": False})

	random.shuffle(examples)
	return examples


	def main():
	DATA_DIR.mkdir(parents=True, exist_ok=True)
	examples = generate_examples()
	with OUTPUT_PATH.open("w", encoding="utf-8") as f:
	json.dump(examples, f, ensure_ascii=False, indent=2)
	num_bluff = sum(1 for ex in examples if ex["is_bluff"])
	num_non_bluff = len(examples) - num_bluff
	print(
	f"Wrote {len(examples)} examples to {OUTPUT_PATH} "
	f"({num_bluff} bluff, {num_non_bluff} non-bluff)"
	)


	if __name__ == "__main__":
	main()