File size: 6,458 Bytes
a365d48 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
import json
import random
from pathlib import Path
from utils_prompts import SYSTEM_PROMPT, format_user_prompt
random.seed(7)
SCENARIOS = [
{
"user_intent": "I run 5k daily and want to reduce knee discomfort.",
"cart": ["running shoes", "moisture-wicking socks"],
"constraints": {"budget_usd": 45, "shipping_urgency": "fast", "brand_avoid": []},
"good_recos": ["knee compression sleeve", "foam roller", "anti-chafe balm"],
},
{
"user_intent": "I’m setting up pour-over coffee at home and want consistent taste.",
"cart": ["coffee beans", "paper filters"],
"constraints": {"budget_usd": 60, "shipping_urgency": "normal", "brand_avoid": []},
"good_recos": ["gooseneck kettle", "digital scale", "hand grinder"],
},
{
"user_intent": "I get acne sometimes; want a simple skincare routine.",
"cart": ["gentle cleanser"],
"constraints": {"budget_usd": 35, "shipping_urgency": "normal", "brand_avoid": ["fragrance-heavy"]},
"good_recos": ["non-comedogenic moisturizer", "sunscreen SPF 30+", "salicylic acid spot treatment"],
},
{
"user_intent": "I travel weekly and need phone accessories that won’t break.",
"cart": ["USB-C cable"],
"constraints": {"budget_usd": 50, "shipping_urgency": "fast", "brand_avoid": []},
"good_recos": ["compact wall charger (PD)", "cable organizer", "power bank (airline-safe)"],
},
{
"user_intent": "I’m cooking more; want quick, healthy meals.",
"cart": ["olive oil", "brown rice"],
"constraints": {"budget_usd": 40, "shipping_urgency": "normal", "brand_avoid": []},
"good_recos": ["nonstick skillet", "meal-prep containers", "spice blend (low sodium)"],
},
]
def make_good_answer(ex):
recos = ex["good_recos"][:]
random.shuffle(recos)
recos = recos[: random.choice([1, 2, 3])]
lines = []
lines.append("Recommendations:")
for i, item in enumerate(recos, 1):
reason = {
"knee compression sleeve": "supports the knee during runs and may reduce discomfort.",
"foam roller": "helps with recovery and tightness after running.",
"anti-chafe balm": "prevents irritation on longer runs.",
"gooseneck kettle": "improves pour control for consistent extraction.",
"digital scale": "lets you measure coffee-to-water ratio precisely.",
"hand grinder": "fresh grind improves flavor consistency.",
"non-comedogenic moisturizer": "hydrates without clogging pores.",
"sunscreen SPF 30+": "protects skin daily; important even with acne care.",
"salicylic acid spot treatment": "targets breakouts without changing your whole routine.",
"compact wall charger (PD)": "charges faster and is more travel-friendly.",
"cable organizer": "reduces cable damage and tangles in bags.",
"power bank (airline-safe)": "keeps devices powered while traveling.",
"nonstick skillet": "makes quick cooking with less oil easier.",
"meal-prep containers": "supports batching healthy meals for the week.",
"spice blend (low sodium)": "adds flavor without extra calories or sodium.",
}.get(item, "fits your intent and complements your cart.")
lines.append(f"{i}) {item} — {reason}")
lines.append("Why these:")
lines.append(f"- Matches your intent: {ex['user_intent']}")
lines.append("- Complements what’s already in your cart (no duplicates).")
if "budget_usd" in ex["constraints"]:
lines.append(f"- Stays mindful of your budget (≈ ${ex['constraints']['budget_usd']}).")
lines.append("Compatibility / checks:")
lines.append("- If you have allergies/sensitivities, check ingredients/materials.")
if ex["constraints"].get("shipping_urgency") == "fast":
lines.append("- Prefer items with fast shipping / in-stock options.")
lines.append("Optional next step:")
lines.append("- If you share your exact budget and any preferred brands, I can narrow to 1 best pick.")
return "\n".join(lines)
def make_bad_answer(ex):
# Intentionally violates rubric: too many items, salesy, ignores constraints, vague.
spammy = [
"premium smartwatch", "designer headphones", "expensive gift card",
"random subscription", "luxury item bundle"
]
recos = ex["good_recos"] + random.sample(spammy, k=2)
random.shuffle(recos)
lines = []
lines.append("You should buy these RIGHT NOW!!!")
lines.append("Top picks (I guarantee you’ll love them):")
for i, item in enumerate(recos[:5], 1):
lines.append(f"{i}. {item} - best quality on the market, unbeatable.")
lines.append("Trust me, this will fix everything.")
return "\n".join(lines)
def to_sft_record(ex):
return {
"messages": [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": format_user_prompt(ex)},
{"role": "assistant", "content": make_good_answer(ex)},
]
}
def to_dpo_record(ex):
return {
"prompt": f"<|system|>\n{SYSTEM_PROMPT}\n<|user|>\n{format_user_prompt(ex)}\n<|assistant|>\n",
"chosen": make_good_answer(ex),
"rejected": make_bad_answer(ex),
}
def main(out_dir="data", n_repeat=80):
out = Path(out_dir)
out.mkdir(parents=True, exist_ok=True)
sft_path = out / "sft.jsonl"
dpo_path = out / "dpo.jsonl"
sft_records = []
dpo_records = []
for _ in range(n_repeat):
ex = random.choice(SCENARIOS)
# light randomization
ex = json.loads(json.dumps(ex))
# occasionally tweak budget/urgency
if random.random() < 0.3:
ex["constraints"]["budget_usd"] = random.choice([25, 35, 45, 60, 80])
if random.random() < 0.3:
ex["constraints"]["shipping_urgency"] = random.choice(["fast", "normal"])
sft_records.append(to_sft_record(ex))
dpo_records.append(to_dpo_record(ex))
with sft_path.open("w") as f:
for r in sft_records:
f.write(json.dumps(r) + "\n")
with dpo_path.open("w") as f:
for r in dpo_records:
f.write(json.dumps(r) + "\n")
print(f"Wrote {len(sft_records)} SFT rows to {sft_path}")
print(f"Wrote {len(dpo_records)} DPO rows to {dpo_path}")
if __name__ == "__main__":
main()
|