File size: 21,567 Bytes
9e874de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd6cefc
9e874de
dd6cefc
 
 
9e874de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd6cefc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9e874de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd6cefc
 
 
 
 
 
 
 
9e874de
 
 
dd6cefc
 
9e874de
 
dd6cefc
 
 
9e874de
 
 
 
 
 
 
dd6cefc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9e874de
 
 
 
 
 
 
 
 
 
dd6cefc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9e874de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd6cefc
9e874de
 
 
 
 
dd6cefc
9e874de
 
 
 
 
dd6cefc
9e874de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd6cefc
 
 
 
9e874de
 
 
 
 
 
 
 
 
 
 
 
 
dd6cefc
 
 
9e874de
 
 
 
 
dd6cefc
 
 
9e874de
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
"""Prepare synthetic curated SFT data for Objectverse Diary LoRA tests."""

from __future__ import annotations

import argparse
import json
import sys
from collections.abc import Mapping, Sequence
from pathlib import Path

PROJECT_ROOT = Path(__file__).resolve().parents[1]
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

from src.models.schema import DiaryEntry, ObjectInfo, ObjectUnderstanding, Persona, PersonaEnvelope


DEFAULT_OUTPUT_PATH = Path("data/train/objectverse_sft_curated.jsonl")
DEFAULT_V2_OUTPUT_PATH = Path("data/train/objectverse_sft_curated_v2.jsonl")
DEFAULT_COUNT = 50
DEFAULT_V2_COUNT = 200
SOURCE_V1 = "objectverse-diary-synthetic-curated-v1"
SOURCE_V2 = "objectverse-diary-synthetic-curated-v2"

SYSTEM_PROMPT = (
    "You are Objectverse Diary, an English-first small-model assistant. "
    "Given structured object understanding and a requested personality mode, "
    "return strict JSON with keys persona and diary. Keep the voice strange, "
    "specific to the object, and suitable for a shareable object archive."
)

MODES = ("Cynical", "Dramatic", "Lonely", "Philosopher", "Romantic")

OBJECTS = [
    {
        "name": "coffee mug",
        "features": ["white ceramic", "coffee ring", "tiny handle shadow"],
        "context": "developer desk",
        "memory": "listened to morning promises dissolve into cold coffee",
    },
    {
        "name": "mechanical keyboard",
        "features": ["black keycaps", "dust in the rows", "one glossy spacebar"],
        "context": "office corner",
        "memory": "translated panic into clicking long after midnight",
    },
    {
        "name": "running shoe",
        "features": ["creased mesh", "mud on the sole", "loose lace"],
        "context": "bedroom doorway",
        "memory": "carried brave intentions to the end of the block and back",
    },
    {
        "name": "desk lamp",
        "features": ["brushed metal neck", "warm bulb", "tilted shade"],
        "context": "late-night desk",
        "memory": "held a circle of light over notes nobody finished",
    },
    {
        "name": "water bottle",
        "features": ["clear plastic wall", "scratched cap", "half-full body"],
        "context": "kitchen counter",
        "memory": "survived every resolution to drink more water",
    },
    {
        "name": "notebook",
        "features": ["bent corner", "blue ink ghosts", "elastic strap"],
        "context": "bag pocket",
        "memory": "guarded three plans, two lists, and one sentence crossed out hard",
    },
    {
        "name": "umbrella",
        "features": ["folded black canopy", "wet seam", "curved handle"],
        "context": "entryway hook",
        "memory": "became useful only when the sky was already theatrical",
    },
    {
        "name": "house key",
        "features": ["brass teeth", "scratched bow", "small metal ring"],
        "context": "coat pocket",
        "memory": "opened the same door for every version of its human",
    },
    {
        "name": "charging cable",
        "features": ["frayed sleeve", "white plastic tip", "gentle knot"],
        "context": "bedside floor",
        "memory": "fed glowing rectangles while pretending not to resent them",
    },
    {
        "name": "teaspoon",
        "features": ["silver bowl", "thin handle", "tea stain near the neck"],
        "context": "sink edge",
        "memory": "stirred sweetness into cups and suspicion into silence",
    },
]

OBJECTS_V2 = [
    *(
        dict(
            obj,
            scene_detail=f"resting in the {obj['context']} with a history no one inventoried",
        )
        for obj in OBJECTS
    ),
    {
        "name": "wireless earbud case",
        "features": ["rounded white shell", "tiny hinge", "charging light"],
        "context": "commuter bag",
        "memory": "held two small arguments against silence through a crowded train",
        "scene_detail": "buried beside lint, receipts, and one forgotten mint",
    },
    {
        "name": "transit card",
        "features": ["scuffed plastic", "faded corner", "thin blue stripe"],
        "context": "wallet slot",
        "memory": "opened gates for mornings that were already late",
        "scene_detail": "pressed flat under coins and expired coupons",
    },
    {
        "name": "canvas tote bag",
        "features": ["creased cotton", "ink logo", "soft handles"],
        "context": "entryway floor",
        "memory": "carried groceries, books, and ambitions heavier than both",
        "scene_detail": "slumped open with a receipt still clinging inside",
    },
    {
        "name": "cracked phone case",
        "features": ["clear plastic", "corner crack", "fingerprint haze"],
        "context": "bedside table",
        "memory": "took the impact so the glowing rectangle could remain innocent",
        "scene_detail": "lying face down after another nervous scroll",
    },
    {
        "name": "lip balm tube",
        "features": ["twisted cap", "pocket scratches", "worn label"],
        "context": "coat pocket",
        "memory": "answered every small weather emergency without being thanked",
        "scene_detail": "rolling between keys and a folded train ticket",
    },
    {
        "name": "medicine organizer",
        "features": ["clear lids", "weekday letters", "plastic hinges"],
        "context": "bathroom shelf",
        "memory": "sorted tiny promises into seven obedient compartments",
        "scene_detail": "waiting under fluorescent light with Monday already open",
    },
    {
        "name": "travel toothbrush",
        "features": ["folding handle", "blue bristles", "vented cap"],
        "context": "hotel sink",
        "memory": "kept a mouth honest in rooms that forgot every guest",
        "scene_detail": "balanced near a wrapped soap and a paper cup",
    },
    {
        "name": "passport cover",
        "features": ["navy leather", "creased spine", "stitched edge"],
        "context": "carry-on pocket",
        "memory": "guarded borders, delays, and a face trying to look awake",
        "scene_detail": "wedged beside boarding papers and a silent pen",
    },
    {
        "name": "boarding pass stub",
        "features": ["thermal paper", "torn edge", "gate code"],
        "context": "jacket pocket",
        "memory": "proved a journey happened after the airport swallowed the day",
        "scene_detail": "softened by rain and folded into four tired rectangles",
    },
    {
        "name": "hotel keycard",
        "features": ["matte plastic", "blank stripe", "room-number sleeve"],
        "context": "nightstand",
        "memory": "opened a temporary room for a temporary version of its human",
        "scene_detail": "resting beside a glass of water no one trusted",
    },
    {
        "name": "remote control",
        "features": ["rubber buttons", "battery door scar", "dusty edges"],
        "context": "sofa cushion",
        "memory": "changed channels while nobody changed their mind",
        "scene_detail": "half-sunk between cushions with one crumb for company",
    },
    {
        "name": "reading glasses",
        "features": ["thin frames", "smudged lenses", "bent temple"],
        "context": "book stack",
        "memory": "made small letters confess their meaning at midnight",
        "scene_detail": "left open across a page that was never finished",
    },
    {
        "name": "glasses case",
        "features": ["hard shell", "soft lining", "snap hinge"],
        "context": "desk drawer",
        "memory": "protected fragile clarity from the tyranny of keys",
        "scene_detail": "waiting in darkness with a paperclip pressed to its side",
    },
    {
        "name": "wristwatch",
        "features": ["scratched face", "brown strap", "small crown"],
        "context": "dresser tray",
        "memory": "measured days while humans pretended not to be measured",
        "scene_detail": "stopped beside coins and a single loose button",
    },
    {
        "name": "hair clip",
        "features": ["amber plastic", "tiny teeth", "curved spring"],
        "context": "bathroom counter",
        "memory": "held chaos together for meetings, errands, and almost-crying",
        "scene_detail": "resting near a fogged mirror and stray strands",
    },
    {
        "name": "laundry token",
        "features": ["round brass", "machine number", "dulled rim"],
        "context": "laundry room",
        "memory": "bought one more spin for clothes that knew too much",
        "scene_detail": "cool in a palm smelling faintly of detergent",
    },
    {
        "name": "refrigerator magnet",
        "features": ["painted souvenir", "flat magnet back", "chipped corner"],
        "context": "kitchen door",
        "memory": "held reminders in place while everyone forgot the reason",
        "scene_detail": "pinning a grocery list under a blue-white hum",
    },
    {
        "name": "grocery receipt",
        "features": ["curled paper", "faded ink", "long total"],
        "context": "kitchen counter",
        "memory": "itemized hunger, soap, and one unnecessary chocolate bar",
        "scene_detail": "curling beside fruit that ripened too quickly",
    },
    {
        "name": "spice jar",
        "features": ["glass body", "red powder", "metal lid"],
        "context": "kitchen shelf",
        "memory": "made bland evenings briefly remember a warmer country",
        "scene_detail": "standing in a row of louder labels",
    },
    {
        "name": "cutting board",
        "features": ["wood grain", "knife marks", "rounded corner"],
        "context": "kitchen island",
        "memory": "received every chopped plan without flinching",
        "scene_detail": "drying upright after a meal nobody photographed",
    },
    {
        "name": "ceramic bowl",
        "features": ["blue rim", "tiny chip", "glazed curve"],
        "context": "dish rack",
        "memory": "held soup, cereal, and one quiet apology",
        "scene_detail": "tilted beside plates still warm from rinse water",
    },
    {
        "name": "reusable chopsticks",
        "features": ["dark bamboo", "tapered tips", "cloth sleeve"],
        "context": "lunch bag",
        "memory": "lifted noodles through ordinary hunger and office gossip",
        "scene_detail": "tucked into a sleeve with a soy sauce stain",
    },
    {
        "name": "tea tin",
        "features": ["green metal", "tight lid", "leaf dust"],
        "context": "pantry shelf",
        "memory": "kept rain-colored leaves ready for small recoveries",
        "scene_detail": "quiet behind cereal boxes and a jar of almonds",
    },
    {
        "name": "sticky note stack",
        "features": ["yellow pages", "curled edge", "faint adhesive"],
        "context": "monitor base",
        "memory": "accepted urgent thoughts that became decorative fossils",
        "scene_detail": "leaning under a monitor's cold rectangular sun",
    },
    {
        "name": "binder clip",
        "features": ["black steel", "silver arms", "pinched mouth"],
        "context": "paper tray",
        "memory": "held loose pages together when ideas tried to scatter",
        "scene_detail": "biting a stack marked later in blue ink",
    },
    {
        "name": "fountain pen",
        "features": ["black barrel", "gold nib", "ink stain"],
        "context": "notebook margin",
        "memory": "turned hesitation into lines that looked deliberate",
        "scene_detail": "uncapped beside a sentence crossed out twice",
    },
    {
        "name": "old ticket stub",
        "features": ["creased paper", "seat number", "torn perforation"],
        "context": "memory box",
        "memory": "survived the event after the applause became dust",
        "scene_detail": "pressed under postcards and a dried ribbon",
    },
    {
        "name": "candle jar",
        "features": ["smoked glass", "wax tunnel", "blackened wick"],
        "context": "window ledge",
        "memory": "made one room pretend to be softer than it was",
        "scene_detail": "cooled beside a window with rain on the other side",
    },
    {
        "name": "alarm clock",
        "features": ["round face", "plastic feet", "stubborn button"],
        "context": "bedside shelf",
        "memory": "tore people from dreams and was hated for being correct",
        "scene_detail": "facing a bed that negotiated with every morning",
    },
    {
        "name": "tape measure",
        "features": ["yellow tape", "lock switch", "metal hook"],
        "context": "tool drawer",
        "memory": "proved shelves, windows, and ambitions were smaller than claimed",
        "scene_detail": "coiled beside screws and one pencil shaved short",
    },
]

MODE_PROFILES = {
    "Cynical": {
        "mood": "tired but sharply observant",
        "fear": "being replaced by something newer and less honest",
        "tag": ["dry witness", "domestic sarcasm", "small rebellion"],
        "voice": "withholding applause",
    },
    "Dramatic": {
        "mood": "grandly wounded",
        "fear": "being forgotten before the curtain falls",
        "tag": ["tragic prop", "household opera", "minor thunder"],
        "voice": "making every scratch sound like fate",
    },
    "Lonely": {
        "mood": "quietly abandoned",
        "fear": "becoming background forever",
        "tag": ["soft echo", "forgotten corner", "patient dust"],
        "voice": "speaking as if the room almost listened",
    },
    "Philosopher": {
        "mood": "curious and needlessly profound",
        "fear": "discovering usefulness is not the same as meaning",
        "tag": ["tiny ontology", "useful doubt", "object soul"],
        "voice": "turning chores into metaphysics",
    },
    "Romantic": {
        "mood": "hopelessly sentimental",
        "fear": "loving a human who mistakes devotion for convenience",
        "tag": ["tender witness", "secret devotion", "warm ache"],
        "voice": "saving every ordinary touch as evidence",
    },
}


def build_curated_records(
    count: int | None = None,
    *,
    version: str = "v1",
) -> list[dict[str, object]]:
    version = _validate_version(version)
    if count is None:
        count = DEFAULT_V2_COUNT if version == "v2" else DEFAULT_COUNT
    if count < 1:
        raise ValueError("count must be at least 1")

    objects = _objects_for_version(version)
    source = _source_for_version(version)
    records: list[dict[str, object]] = []
    for index in range(count):
        obj = objects[index % len(objects)]
        mode = MODES[(index // len(objects)) % len(MODES)]
        record_id = _record_id(version, index)
        understanding = _build_object_understanding(obj)
        persona = _build_persona(obj, mode)
        diary = _build_diary(obj, mode, persona.persona, index)
        assistant_payload = {
            "persona": persona.persona.model_dump(mode="json"),
            "diary": diary.model_dump(mode="json"),
        }
        record = {
            "id": record_id,
            "source": source,
            "split": "train",
            "mode": mode,
            "object_description": _object_description(obj),
            "object_understanding": understanding.model_dump(mode="json"),
            "curation_notes": _curation_notes(version),
            "messages": [
                {"role": "system", "content": SYSTEM_PROMPT},
                {
                    "role": "user",
                    "content": _user_prompt(understanding.model_dump(mode="json"), mode),
                },
                {
                    "role": "assistant",
                    "content": json.dumps(assistant_payload, ensure_ascii=False),
                },
            ],
        }
        if version == "v2":
            record["scene_detail"] = str(obj["scene_detail"])
        records.append(record)
    return records


def write_jsonl(records: Sequence[Mapping[str, object]], output_path: Path) -> Path:
    output_path.parent.mkdir(parents=True, exist_ok=True)
    lines = [json.dumps(record, ensure_ascii=False, sort_keys=True) for record in records]
    output_path.write_text("\n".join(lines) + "\n", encoding="utf-8")
    return output_path


def prepare_curated_dataset(
    output_path: Path | None = None,
    count: int | None = None,
    *,
    version: str = "v1",
) -> Path:
    version = _validate_version(version)
    if output_path is None:
        output_path = DEFAULT_V2_OUTPUT_PATH if version == "v2" else DEFAULT_OUTPUT_PATH
    return write_jsonl(build_curated_records(count, version=version), output_path)


def _validate_version(version: str) -> str:
    if version not in {"v1", "v2"}:
        raise ValueError("version must be 'v1' or 'v2'.")
    return version


def _objects_for_version(version: str) -> Sequence[Mapping[str, object]]:
    return OBJECTS_V2 if version == "v2" else OBJECTS


def _source_for_version(version: str) -> str:
    return SOURCE_V2 if version == "v2" else SOURCE_V1


def _record_id(version: str, index: int) -> str:
    if version == "v2":
        return f"curated-v2-synthetic-{index + 1:04d}"
    return f"curated-synthetic-{index + 1:04d}"


def _curation_notes(version: str) -> str:
    if version == "v2":
        return (
            "Synthetic curated v2 row: no private photo, no personal identifier, "
            "broader object and scene coverage, English-first output with Chinese helper text."
        )
    return (
        "Synthetic curated row: no private photo, no personal identifier, "
        "English-first output with Chinese helper text."
    )


def _build_object_understanding(obj: Mapping[str, object]) -> ObjectUnderstanding:
    return ObjectUnderstanding(
        object=ObjectInfo(
            name=str(obj["name"]),
            visible_features=[str(feature) for feature in obj["features"]],
            likely_context=str(obj["context"]),
            confidence=0.9,
        )
    )


def _build_persona(obj: Mapping[str, object], mode: str) -> PersonaEnvelope:
    profile = MODE_PROFILES[mode]
    object_name = str(obj["name"])
    character_name = _character_name(object_name, mode)
    return PersonaEnvelope(
        persona=Persona(
            object_name=object_name,
            character_name=character_name,
            mood=str(profile["mood"]),
            secret_fear=str(profile["fear"]),
            core_memory=str(obj["memory"]),
            complaint=f"I am not merely a {object_name}; I am an archive of what humans do when they think things cannot testify.",
            tags=[str(tag) for tag in profile["tag"]],
        )
    )


def _build_diary(obj: Mapping[str, object], mode: str, persona: Persona, index: int) -> DiaryEntry:
    profile = MODE_PROFILES[mode]
    object_name = str(obj["name"])
    features = ", ".join(str(feature) for feature in obj["features"][:2])
    scene = str(obj.get("scene_detail", "collecting proof that ordinary things notice everything"))
    day_number = 300 + index + len(object_name)
    english = (
        f"Today I waited in the {obj['context']} wearing my {features} like official records. "
        f"The humans moved around me with the confidence of temporary weather. "
        f"I remembered how I {obj['memory']}, and I answered in my own way: {profile['voice']}. "
        f"My mood is {persona.mood}, but I am still here, {scene}."
    )
    chinese = (
        f"今天我待在 {obj['context']},带着 {features},像一份安静的档案。"
        f"人类从我身边经过,好像自己不是短暂天气。"
        f"我记得自己曾经 {obj['memory']},于是用自己的方式回应:{profile['voice']}。"
        f"我的情绪是 {persona.mood},但我仍在这里,{scene}。"
    )
    return DiaryEntry(
        title=f"Secret Diary - Day {day_number}",
        english=english,
        chinese=chinese,
    )


def _character_name(object_name: str, mode: str) -> str:
    compact = "".join(part.capitalize() for part in object_name.split()[:2])
    suffix = {
        "Cynical": "Ash",
        "Dramatic": "of the Minor Stage",
        "Lonely": "Afterlight",
        "Philosopher": "the Questioning",
        "Romantic": "de Moon",
    }[mode]
    return f"{compact} {suffix}".strip()


def _object_description(obj: Mapping[str, object]) -> str:
    features = ", ".join(str(feature) for feature in obj["features"])
    description = f"{obj['name']} in a {obj['context']} with {features}"
    if "scene_detail" in obj:
        description = f"{description}, {obj['scene_detail']}"
    return description


def _user_prompt(object_understanding: Mapping[str, object], mode: str) -> str:
    payload = json.dumps(object_understanding, ensure_ascii=False, sort_keys=True)
    return (
        f"Personality mode: {mode}\n"
        f"Object understanding JSON: {payload}\n"
        "Return JSON with keys persona and diary only."
    )


def _parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--version", choices=("v1", "v2"), default="v1")
    parser.add_argument("--count", type=int, default=None)
    parser.add_argument("--output", type=Path, default=None)
    return parser.parse_args()


def main() -> None:
    args = _parse_args()
    output_path = prepare_curated_dataset(args.output, args.count, version=args.version)
    record_count = args.count or (DEFAULT_V2_COUNT if args.version == "v2" else DEFAULT_COUNT)
    print(f"wrote {record_count} synthetic curated SFT records to {output_path}")


if __name__ == "__main__":
    main()