File size: 6,458 Bytes
a365d48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import json
import random
from pathlib import Path
from utils_prompts import SYSTEM_PROMPT, format_user_prompt

random.seed(7)

SCENARIOS = [
    {
        "user_intent": "I run 5k daily and want to reduce knee discomfort.",
        "cart": ["running shoes", "moisture-wicking socks"],
        "constraints": {"budget_usd": 45, "shipping_urgency": "fast", "brand_avoid": []},
        "good_recos": ["knee compression sleeve", "foam roller", "anti-chafe balm"],
    },
    {
        "user_intent": "I’m setting up pour-over coffee at home and want consistent taste.",
        "cart": ["coffee beans", "paper filters"],
        "constraints": {"budget_usd": 60, "shipping_urgency": "normal", "brand_avoid": []},
        "good_recos": ["gooseneck kettle", "digital scale", "hand grinder"],
    },
    {
        "user_intent": "I get acne sometimes; want a simple skincare routine.",
        "cart": ["gentle cleanser"],
        "constraints": {"budget_usd": 35, "shipping_urgency": "normal", "brand_avoid": ["fragrance-heavy"]},
        "good_recos": ["non-comedogenic moisturizer", "sunscreen SPF 30+", "salicylic acid spot treatment"],
    },
    {
        "user_intent": "I travel weekly and need phone accessories that won’t break.",
        "cart": ["USB-C cable"],
        "constraints": {"budget_usd": 50, "shipping_urgency": "fast", "brand_avoid": []},
        "good_recos": ["compact wall charger (PD)", "cable organizer", "power bank (airline-safe)"],
    },
    {
        "user_intent": "I’m cooking more; want quick, healthy meals.",
        "cart": ["olive oil", "brown rice"],
        "constraints": {"budget_usd": 40, "shipping_urgency": "normal", "brand_avoid": []},
        "good_recos": ["nonstick skillet", "meal-prep containers", "spice blend (low sodium)"],
    },
]

def make_good_answer(ex):
    recos = ex["good_recos"][:]
    random.shuffle(recos)
    recos = recos[: random.choice([1, 2, 3])]

    lines = []
    lines.append("Recommendations:")
    for i, item in enumerate(recos, 1):
        reason = {
            "knee compression sleeve": "supports the knee during runs and may reduce discomfort.",
            "foam roller": "helps with recovery and tightness after running.",
            "anti-chafe balm": "prevents irritation on longer runs.",
            "gooseneck kettle": "improves pour control for consistent extraction.",
            "digital scale": "lets you measure coffee-to-water ratio precisely.",
            "hand grinder": "fresh grind improves flavor consistency.",
            "non-comedogenic moisturizer": "hydrates without clogging pores.",
            "sunscreen SPF 30+": "protects skin daily; important even with acne care.",
            "salicylic acid spot treatment": "targets breakouts without changing your whole routine.",
            "compact wall charger (PD)": "charges faster and is more travel-friendly.",
            "cable organizer": "reduces cable damage and tangles in bags.",
            "power bank (airline-safe)": "keeps devices powered while traveling.",
            "nonstick skillet": "makes quick cooking with less oil easier.",
            "meal-prep containers": "supports batching healthy meals for the week.",
            "spice blend (low sodium)": "adds flavor without extra calories or sodium.",
        }.get(item, "fits your intent and complements your cart.")
        lines.append(f"{i}) {item}{reason}")

    lines.append("Why these:")
    lines.append(f"- Matches your intent: {ex['user_intent']}")
    lines.append("- Complements what’s already in your cart (no duplicates).")
    if "budget_usd" in ex["constraints"]:
        lines.append(f"- Stays mindful of your budget (≈ ${ex['constraints']['budget_usd']}).")

    lines.append("Compatibility / checks:")
    lines.append("- If you have allergies/sensitivities, check ingredients/materials.")
    if ex["constraints"].get("shipping_urgency") == "fast":
        lines.append("- Prefer items with fast shipping / in-stock options.")

    lines.append("Optional next step:")
    lines.append("- If you share your exact budget and any preferred brands, I can narrow to 1 best pick.")
    return "\n".join(lines)

def make_bad_answer(ex):
    # Intentionally violates rubric: too many items, salesy, ignores constraints, vague.
    spammy = [
        "premium smartwatch", "designer headphones", "expensive gift card",
        "random subscription", "luxury item bundle"
    ]
    recos = ex["good_recos"] + random.sample(spammy, k=2)
    random.shuffle(recos)

    lines = []
    lines.append("You should buy these RIGHT NOW!!!")
    lines.append("Top picks (I guarantee you’ll love them):")
    for i, item in enumerate(recos[:5], 1):
        lines.append(f"{i}. {item} - best quality on the market, unbeatable.")
    lines.append("Trust me, this will fix everything.")
    return "\n".join(lines)

def to_sft_record(ex):
    return {
        "messages": [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": format_user_prompt(ex)},
            {"role": "assistant", "content": make_good_answer(ex)},
        ]
    }

def to_dpo_record(ex):
    return {
        "prompt": f"<|system|>\n{SYSTEM_PROMPT}\n<|user|>\n{format_user_prompt(ex)}\n<|assistant|>\n",
        "chosen": make_good_answer(ex),
        "rejected": make_bad_answer(ex),
    }

def main(out_dir="data", n_repeat=80):
    out = Path(out_dir)
    out.mkdir(parents=True, exist_ok=True)

    sft_path = out / "sft.jsonl"
    dpo_path = out / "dpo.jsonl"

    sft_records = []
    dpo_records = []

    for _ in range(n_repeat):
        ex = random.choice(SCENARIOS)
        # light randomization
        ex = json.loads(json.dumps(ex))
        # occasionally tweak budget/urgency
        if random.random() < 0.3:
            ex["constraints"]["budget_usd"] = random.choice([25, 35, 45, 60, 80])
        if random.random() < 0.3:
            ex["constraints"]["shipping_urgency"] = random.choice(["fast", "normal"])
        sft_records.append(to_sft_record(ex))
        dpo_records.append(to_dpo_record(ex))

    with sft_path.open("w") as f:
        for r in sft_records:
            f.write(json.dumps(r) + "\n")

    with dpo_path.open("w") as f:
        for r in dpo_records:
            f.write(json.dumps(r) + "\n")

    print(f"Wrote {len(sft_records)} SFT rows to {sft_path}")
    print(f"Wrote {len(dpo_records)} DPO rows to {dpo_path}")

if __name__ == "__main__":
    main()