"""Build a synthetic instruction dataset: thread (+image descriptions) -> ActionPlan JSON. Reuses the agent's SYSTEM prompt and ActionPlan schema so training and serving stay in sync. This is a template with a few hand-written seeds plus simple augmentation β€” expand toward ~500-2000 examples covering relative dates, ranges, conflicts, no-event chitchat (empty), multiple events, and image-derived events. Output: training/data/dataset.jsonl (one {messages:[...]} chat record per line) Generating training data may use any offline tooling β€” the "no cloud API" rule applies to the running app's inference, not dataset prep. Vision note: we keep the vision encoder frozen at train time, so image examples are represented by a short text *description* of the image (what the model would "see"); this teaches the decision/format behavior without multimodal training. """ from __future__ import annotations import json import sys from pathlib import Path sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) from server.agent import SYSTEM # noqa: E402 reuse the serving prompt # (current_datetime, thread, action_plan_dict) SEEDS = [ ( "2026-06-08T09:00:00", "Alex: lunch tomorrow at 1pm?\nMe: works, the usual place", { "reasoning": "Alex proposed lunch tomorrow at 1pm and it was accepted.", "events": [ { "title": "Lunch with Alex", "start": "2026-06-09T13:00:00", "end": "2026-06-09T14:00:00", "location": "the usual place", "attendees": ["Alex"], "reminder_minutes": 30, "notes": None, } ], "conflicts": [], "proposed_times": [], "reply_draft": "Perfect, see you at 1 at the usual place!", "needs_clarification": None, }, ), ( "2026-06-08T09:00:00", "Sam: dentist moved to next Thursday 3:30\nMe: noted", { "reasoning": "Sam rescheduled the dentist to next Thursday at 3:30pm.", "events": [ { "title": "Dentist", "start": "2026-06-11T15:30:00", "end": "2026-06-11T16:30:00", "location": None, "attendees": [], "reminder_minutes": 60, "notes": None, } ], "conflicts": [], "proposed_times": [], "reply_draft": "Got it, I'll be there Thursday at 3:30.", "needs_clarification": None, }, ), ( "2026-06-08T09:00:00", "Jo: haha that meme killed me\nMe: lol", { "reasoning": "Casual chat, no plan to schedule.", "events": [], "conflicts": [], "proposed_times": [], "reply_draft": "πŸ˜‚", "needs_clarification": None, }, ), ( "2026-06-08T09:00:00", "Pat: you free sometime this week to grab coffee?", { "reasoning": "A meetup is proposed but no concrete day/time was given.", "events": [], "conflicts": [], "proposed_times": [], "reply_draft": "Yeah! What day works for you?", "needs_clarification": "Which day/time this week works for coffee?", }, ), ( # image-derived example (vision encoder frozen -> describe the image in text) "2026-06-08T09:00:00", "Riley: [image: screenshot of a concert ticket β€” 'The Lumens, Fri Jun 12, " "doors 7:30pm, The Fillmore']\nMe: can't wait!", { "reasoning": "The screenshot is a concert ticket with a concrete date, time, and venue.", "events": [ { "title": "The Lumens concert", "start": "2026-06-12T19:30:00", "end": "2026-06-12T22:30:00", "location": "The Fillmore", "attendees": ["Riley"], "reminder_minutes": 120, "notes": "From ticket screenshot", } ], "conflicts": [], "proposed_times": [], "reply_draft": "Added it β€” doors at 7:30, see you Friday!", "needs_clarification": None, }, ), # --- multiple events in one thread (mirrors the live "Try a sample") --- ( "2026-06-08T08:00:00", "Room parent: picture day is this Thursday at 9am, green shirts!\n" "Coach Dana: heads up, soccer moves to Tuesday 5pm this week\nMe: thanks, adding both", { "reasoning": "Two concrete events: picture day Thursday 9am and the rescheduled soccer Tuesday 5pm.", "events": [ { "title": "Picture day (green shirt)", "start": "2026-06-11T09:00:00", "end": "2026-06-11T09:30:00", "location": None, "attendees": [], "reminder_minutes": 720, "notes": "Wear the green class shirt", }, { "title": "Soccer practice", "start": "2026-06-09T17:00:00", "end": "2026-06-09T18:00:00", "location": None, "attendees": ["Coach Dana"], "reminder_minutes": 60, "notes": None, }, ], "conflicts": [], "proposed_times": [], "reply_draft": "Got both down β€” picture day Thursday 9am and soccer Tuesday 5pm!", "needs_clarification": None, }, ), # --- relative date: "next Friday" --- ( "2026-06-08T09:00:00", "Teacher: our class field trip permission slips are due next Friday\nMe: ok!", { "reasoning": "A due date stated as 'next Friday' relative to Monday Jun 8 resolves to Jun 19.", "events": [ { "title": "Permission slips due", "start": "2026-06-19T09:00:00", "end": None, "location": None, "attendees": [], "reminder_minutes": 1440, "notes": "Class field trip", } ], "conflicts": [], "proposed_times": [], "reply_draft": "Thanks β€” I'll have them signed before Friday.", "needs_clarification": None, }, ), # --- multi-hour range: field trip 9-3 --- ( "2026-06-08T09:00:00", "Room parent: field trip to the science museum is Wed 9am–3pm, bring a bagged lunch\nMe: noted", { "reasoning": "A single all-day-ish event with an explicit start and end on Wednesday.", "events": [ { "title": "Science museum field trip", "start": "2026-06-10T09:00:00", "end": "2026-06-10T15:00:00", "location": "Science museum", "attendees": [], "reminder_minutes": 720, "notes": "Bring a bagged lunch", } ], "conflicts": [], "proposed_times": [], "reply_draft": "Got it β€” bagged lunch, 9 to 3 Wednesday.", "needs_clarification": None, }, ), # --- multi-day range: book fair (3 days) --- ( "2026-06-08T09:00:00", "PTA: the book fair runs Mon–Wed next week in the library, open during pickup\nMe: πŸ‘", { "reasoning": "A multi-day event spanning Monday to Wednesday of next week.", "events": [ { "title": "Book fair", "start": "2026-06-15T08:00:00", "end": "2026-06-17T16:00:00", "location": "School library", "attendees": [], "reminder_minutes": 1440, "notes": "Open during pickup", } ], "conflicts": [], "proposed_times": [], "reply_draft": "Thanks β€” we'll stop by after pickup.", "needs_clarification": None, }, ), # --- no-event chitchat (empty) --- ( "2026-06-08T09:00:00", "Jess: did you see the class photo album?? so cute\nMe: omg yes 😍", { "reasoning": "Friendly chatter, nothing to schedule.", "events": [], "conflicts": [], "proposed_times": [], "reply_draft": "They grow up so fast πŸ₯Ή", "needs_clarification": None, }, ), ( "2026-06-08T09:00:00", "Coach Dana: great hustle from the kids today, proud of them\nMe: such a fun season", { "reasoning": "Encouragement, no plan or date mentioned.", "events": [], "conflicts": [], "proposed_times": [], "reply_draft": "Thanks Coach β€” they love it!", "needs_clarification": None, }, ), # --- conflict against existing calendar + proposed alternatives --- ( "2026-06-08T09:00:00", "Existing calendar:\n- Dentist (Mia): 2026-06-10T14:00..2026-06-10T15:00\n\n" "Room parent: makeup class photo is Wednesday at 2pm\nMe: hmm Mia has the dentist then", { "reasoning": "The makeup photo at Wed 2pm overlaps Mia's existing dentist appointment.", "events": [ { "title": "Makeup class photo", "start": "2026-06-10T14:00:00", "end": "2026-06-10T14:30:00", "location": None, "attendees": [], "reminder_minutes": 120, "notes": None, } ], "conflicts": [ { "event_index": 0, "clashes_with": "Dentist (Mia) 2:00-3:00pm", "severity": "overlap", } ], "proposed_times": ["2026-06-10T15:00:00", "2026-06-11T14:00:00"], "reply_draft": "Mia has the dentist at 2 β€” could she do the makeup photo at 3, or Thursday?", "needs_clarification": None, }, ), ( "2026-06-08T09:00:00", "Existing calendar:\n- Work standup: 2026-06-09T09:00..2026-06-09T09:30\n\n" "Teacher: parent volunteers needed in class Tue 9–10am\nMe: I'll try", { "reasoning": "Volunteering Tue 9-10am butts against the 9:00-9:30 standup (overlap).", "events": [ { "title": "Classroom volunteering", "start": "2026-06-09T09:00:00", "end": "2026-06-09T10:00:00", "location": "Classroom", "attendees": [], "reminder_minutes": 60, "notes": None, } ], "conflicts": [ { "event_index": 0, "clashes_with": "Work standup 9:00-9:30am", "severity": "overlap", } ], "proposed_times": ["2026-06-09T10:00:00", "2026-06-11T09:00:00"], "reply_draft": "I have a 9am call β€” could I come in at 10 instead?", "needs_clarification": None, }, ), # --- image-described event: PTA flyer --- ( "2026-06-08T09:00:00", "Room parent: [image: flyer β€” 'Spring Family Picnic, Sat Jun 13, 11am, Lincoln Park " "Shelter B, bring a dish to share']\nMe: we'll be there", { "reasoning": "The flyer gives a concrete date, time, and location for the family picnic.", "events": [ { "title": "Spring Family Picnic", "start": "2026-06-13T11:00:00", "end": "2026-06-13T14:00:00", "location": "Lincoln Park, Shelter B", "attendees": [], "reminder_minutes": 120, "notes": "Bring a dish to share (from flyer)", } ], "conflicts": [], "proposed_times": [], "reply_draft": "Added it β€” we'll bring a salad!", "needs_clarification": None, }, ), # --- image-described event: permission slip photo --- ( "2026-06-08T09:00:00", "Mom: [image: photo of a permission slip β€” 'Zoo trip Thursday June 11, depart 8:30am, " "return ~2:30pm, $12 due Mon']\nMe: signing tonight", { "reasoning": "Permission slip photo encodes the trip date/time and a separate payment deadline.", "events": [ { "title": "Zoo trip", "start": "2026-06-11T08:30:00", "end": "2026-06-11T14:30:00", "location": "Zoo", "attendees": [], "reminder_minutes": 720, "notes": "$12 due Monday; from permission slip", } ], "conflicts": [], "proposed_times": [], "reply_draft": "Signed and the $12 is ready for Monday!", "needs_clarification": None, }, ), # --- ambiguity: needs clarification, no event yet --- ( "2026-06-08T09:00:00", "Teacher: can a couple of parents help with the class party sometime next week?\nMe: maybe!", { "reasoning": "A request without a fixed day or time β€” ask before scheduling anything.", "events": [], "conflicts": [], "proposed_times": [], "reply_draft": "Happy to help β€” what day and time is the party?", "needs_clarification": "Which day/time next week is the class party?", }, ), # --- explicit date "the 14th" --- ( "2026-06-08T09:00:00", "Coach Dana: end-of-season pizza party is the 14th at 6pm at Tony's\nMe: yum, count us in", { "reasoning": "'The 14th' resolves to Jun 14; concrete time and venue given.", "events": [ { "title": "End-of-season pizza party", "start": "2026-06-14T18:00:00", "end": "2026-06-14T20:00:00", "location": "Tony's", "attendees": ["Coach Dana"], "reminder_minutes": 120, "notes": None, } ], "conflicts": [], "proposed_times": [], "reply_draft": "Count us in β€” see you at Tony's at 6!", "needs_clarification": None, }, ), # --- "in two weeks" --- ( "2026-06-08T09:00:00", "Teacher: the spring recital is in two weeks, Friday at 6pm in the gym\nMe: πŸ“…", { "reasoning": "Friday two weeks out is Jun 19? The teacher pins it to Friday 6pm β€” Jun 19.", "events": [ { "title": "Spring recital", "start": "2026-06-19T18:00:00", "end": "2026-06-19T19:30:00", "location": "School gym", "attendees": [], "reminder_minutes": 1440, "notes": None, } ], "conflicts": [], "proposed_times": [], "reply_draft": "Can't wait β€” we'll be there Friday at 6!", "needs_clarification": None, }, ), # --- early dismissal --- ( "2026-06-08T09:00:00", "School office: reminder, early dismissal this Wednesday at 12:30pm\nMe: thanks for the heads up", { "reasoning": "A one-off schedule change parents need to plan pickup around.", "events": [ { "title": "Early dismissal", "start": "2026-06-10T12:30:00", "end": None, "location": "School", "attendees": [], "reminder_minutes": 60, "notes": "Pickup at 12:30 instead of usual", } ], "conflicts": [], "proposed_times": [], "reply_draft": "Noted β€” I'll be there at 12:30 Wednesday.", "needs_clarification": None, }, ), # --- parent-teacher conference (specific slot) --- ( "2026-06-08T09:00:00", "Teacher: your parent–teacher conference slot is Thursday 4:15pm, ~15 min\nMe: works for me", { "reasoning": "A confirmed 15-minute conference slot on Thursday.", "events": [ { "title": "Parent–teacher conference", "start": "2026-06-11T16:15:00", "end": "2026-06-11T16:30:00", "location": "Classroom", "attendees": ["Teacher"], "reminder_minutes": 120, "notes": None, } ], "conflicts": [], "proposed_times": [], "reply_draft": "4:15 Thursday works β€” see you then!", "needs_clarification": None, }, ), # --- birthday party RSVP with location --- ( "2026-06-08T09:00:00", "Other parent: Leo's bday party Sat 2–4pm at Jump Zone, can Mia come?\nMe: she'd love to!", { "reasoning": "A party invite with a clear window and venue, accepted.", "events": [ { "title": "Leo's birthday party", "start": "2026-06-13T14:00:00", "end": "2026-06-13T16:00:00", "location": "Jump Zone", "attendees": ["Leo"], "reminder_minutes": 180, "notes": None, } ], "conflicts": [], "proposed_times": [], "reply_draft": "She'd love to β€” see you Saturday at Jump Zone!", "needs_clarification": None, }, ), # --- carpool reminder --- ( "2026-06-08T09:00:00", "Neighbor: I've got carpool drop-off Friday at 7:45am this week\nMe: perfect, thank you!", { "reasoning": "A specific carpool commitment on Friday morning.", "events": [ { "title": "Carpool drop-off (neighbor driving)", "start": "2026-06-12T07:45:00", "end": None, "location": None, "attendees": [], "reminder_minutes": 30, "notes": None, } ], "conflicts": [], "proposed_times": [], "reply_draft": "Thank you! I've got next Friday.", "needs_clarification": None, }, ), # --- reschedule (supersede an earlier time) --- ( "2026-06-08T09:00:00", "Coach Dana: practice this Tue is now 6pm, not 5\nMe: ok updating", { "reasoning": "A reschedule: practice Tuesday moves to 6pm.", "events": [ { "title": "Soccer practice", "start": "2026-06-09T18:00:00", "end": "2026-06-09T19:00:00", "location": None, "attendees": ["Coach Dana"], "reminder_minutes": 60, "notes": "Moved from 5pm to 6pm", } ], "conflicts": [], "proposed_times": [], "reply_draft": "Updated β€” 6pm Tuesday it is.", "needs_clarification": None, }, ), # --- bake sale drop-off deadline --- ( "2026-06-08T09:00:00", "PTA: bake sale items drop off Thursday by 8am at the front office\nMe: I'll bring cookies", { "reasoning": "A drop-off deadline Thursday morning.", "events": [ { "title": "Bake sale drop-off", "start": "2026-06-11T08:00:00", "end": None, "location": "Front office", "attendees": [], "reminder_minutes": 720, "notes": "Bringing cookies", } ], "conflicts": [], "proposed_times": [], "reply_draft": "Cookies will be there by 8 Thursday!", "needs_clarification": None, }, ), # --- vague meetup, needs clarification --- ( "2026-06-08T09:00:00", "Other parent: we should set up a playdate soon!\nMe: yes definitely", { "reasoning": "Mutual interest but no concrete day, time, or place.", "events": [], "conflicts": [], "proposed_times": [], "reply_draft": "Let's do it β€” what day works for you two?", "needs_clarification": "Which day/time and where for the playdate?", }, ), ] def to_record(now: str, thread: str, plan: dict) -> dict: user = f"Current datetime: {now}\nExisting calendar: (none provided)\n\nConversation:\n{thread}\n\nReturn the ActionPlan JSON now." return { "messages": [ {"role": "system", "content": SYSTEM}, {"role": "user", "content": user}, {"role": "assistant", "content": json.dumps(plan, ensure_ascii=False)}, ] } def main(): out_dir = Path(__file__).with_name("data") out_dir.mkdir(exist_ok=True) out = out_dir / "dataset.jsonl" with out.open("w", encoding="utf-8") as f: for now, thread, plan in SEEDS: f.write(json.dumps(to_record(now, thread, plan), ensure_ascii=False) + "\n") print(f"wrote {out} ({len(SEEDS)} seed records) β€” expand this before training") if __name__ == "__main__": main()