OffGridSchedula / training /make_dataset.py
ParetoOptimal's picture
Initial Commit
0366d65
Raw
History Blame Contribute Delete
22.6 kB
"""Build a synthetic instruction dataset: thread (+image descriptions) -> ActionPlan JSON.
Reuses the agent's SYSTEM prompt and ActionPlan schema so training and serving
stay in sync. This is a template with a few hand-written seeds plus simple
augmentation — expand toward ~500-2000 examples covering relative dates, ranges,
conflicts, no-event chitchat (empty), multiple events, and image-derived events.
Output: training/data/dataset.jsonl (one {messages:[...]} chat record per line)
Generating training data may use any offline tooling — the "no cloud API" rule
applies to the running app's inference, not dataset prep.
Vision note: we keep the vision encoder frozen at train time, so image examples
are represented by a short text *description* of the image (what the model would
"see"); this teaches the decision/format behavior without multimodal training.
"""
from __future__ import annotations
import json
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from server.agent import SYSTEM # noqa: E402 reuse the serving prompt
# (current_datetime, thread, action_plan_dict)
SEEDS = [
(
"2026-06-08T09:00:00",
"Alex: lunch tomorrow at 1pm?\nMe: works, the usual place",
{
"reasoning": "Alex proposed lunch tomorrow at 1pm and it was accepted.",
"events": [
{
"title": "Lunch with Alex",
"start": "2026-06-09T13:00:00",
"end": "2026-06-09T14:00:00",
"location": "the usual place",
"attendees": ["Alex"],
"reminder_minutes": 30,
"notes": None,
}
],
"conflicts": [],
"proposed_times": [],
"reply_draft": "Perfect, see you at 1 at the usual place!",
"needs_clarification": None,
},
),
(
"2026-06-08T09:00:00",
"Sam: dentist moved to next Thursday 3:30\nMe: noted",
{
"reasoning": "Sam rescheduled the dentist to next Thursday at 3:30pm.",
"events": [
{
"title": "Dentist",
"start": "2026-06-11T15:30:00",
"end": "2026-06-11T16:30:00",
"location": None,
"attendees": [],
"reminder_minutes": 60,
"notes": None,
}
],
"conflicts": [],
"proposed_times": [],
"reply_draft": "Got it, I'll be there Thursday at 3:30.",
"needs_clarification": None,
},
),
(
"2026-06-08T09:00:00",
"Jo: haha that meme killed me\nMe: lol",
{
"reasoning": "Casual chat, no plan to schedule.",
"events": [],
"conflicts": [],
"proposed_times": [],
"reply_draft": "😂",
"needs_clarification": None,
},
),
(
"2026-06-08T09:00:00",
"Pat: you free sometime this week to grab coffee?",
{
"reasoning": "A meetup is proposed but no concrete day/time was given.",
"events": [],
"conflicts": [],
"proposed_times": [],
"reply_draft": "Yeah! What day works for you?",
"needs_clarification": "Which day/time this week works for coffee?",
},
),
(
# image-derived example (vision encoder frozen -> describe the image in text)
"2026-06-08T09:00:00",
"Riley: [image: screenshot of a concert ticket — 'The Lumens, Fri Jun 12, "
"doors 7:30pm, The Fillmore']\nMe: can't wait!",
{
"reasoning": "The screenshot is a concert ticket with a concrete date, time, and venue.",
"events": [
{
"title": "The Lumens concert",
"start": "2026-06-12T19:30:00",
"end": "2026-06-12T22:30:00",
"location": "The Fillmore",
"attendees": ["Riley"],
"reminder_minutes": 120,
"notes": "From ticket screenshot",
}
],
"conflicts": [],
"proposed_times": [],
"reply_draft": "Added it — doors at 7:30, see you Friday!",
"needs_clarification": None,
},
),
# --- multiple events in one thread (mirrors the live "Try a sample") ---
(
"2026-06-08T08:00:00",
"Room parent: picture day is this Thursday at 9am, green shirts!\n"
"Coach Dana: heads up, soccer moves to Tuesday 5pm this week\nMe: thanks, adding both",
{
"reasoning": "Two concrete events: picture day Thursday 9am and the rescheduled soccer Tuesday 5pm.",
"events": [
{
"title": "Picture day (green shirt)",
"start": "2026-06-11T09:00:00",
"end": "2026-06-11T09:30:00",
"location": None,
"attendees": [],
"reminder_minutes": 720,
"notes": "Wear the green class shirt",
},
{
"title": "Soccer practice",
"start": "2026-06-09T17:00:00",
"end": "2026-06-09T18:00:00",
"location": None,
"attendees": ["Coach Dana"],
"reminder_minutes": 60,
"notes": None,
},
],
"conflicts": [],
"proposed_times": [],
"reply_draft": "Got both down — picture day Thursday 9am and soccer Tuesday 5pm!",
"needs_clarification": None,
},
),
# --- relative date: "next Friday" ---
(
"2026-06-08T09:00:00",
"Teacher: our class field trip permission slips are due next Friday\nMe: ok!",
{
"reasoning": "A due date stated as 'next Friday' relative to Monday Jun 8 resolves to Jun 19.",
"events": [
{
"title": "Permission slips due",
"start": "2026-06-19T09:00:00",
"end": None,
"location": None,
"attendees": [],
"reminder_minutes": 1440,
"notes": "Class field trip",
}
],
"conflicts": [],
"proposed_times": [],
"reply_draft": "Thanks — I'll have them signed before Friday.",
"needs_clarification": None,
},
),
# --- multi-hour range: field trip 9-3 ---
(
"2026-06-08T09:00:00",
"Room parent: field trip to the science museum is Wed 9am–3pm, bring a bagged lunch\nMe: noted",
{
"reasoning": "A single all-day-ish event with an explicit start and end on Wednesday.",
"events": [
{
"title": "Science museum field trip",
"start": "2026-06-10T09:00:00",
"end": "2026-06-10T15:00:00",
"location": "Science museum",
"attendees": [],
"reminder_minutes": 720,
"notes": "Bring a bagged lunch",
}
],
"conflicts": [],
"proposed_times": [],
"reply_draft": "Got it — bagged lunch, 9 to 3 Wednesday.",
"needs_clarification": None,
},
),
# --- multi-day range: book fair (3 days) ---
(
"2026-06-08T09:00:00",
"PTA: the book fair runs Mon–Wed next week in the library, open during pickup\nMe: 👍",
{
"reasoning": "A multi-day event spanning Monday to Wednesday of next week.",
"events": [
{
"title": "Book fair",
"start": "2026-06-15T08:00:00",
"end": "2026-06-17T16:00:00",
"location": "School library",
"attendees": [],
"reminder_minutes": 1440,
"notes": "Open during pickup",
}
],
"conflicts": [],
"proposed_times": [],
"reply_draft": "Thanks — we'll stop by after pickup.",
"needs_clarification": None,
},
),
# --- no-event chitchat (empty) ---
(
"2026-06-08T09:00:00",
"Jess: did you see the class photo album?? so cute\nMe: omg yes 😍",
{
"reasoning": "Friendly chatter, nothing to schedule.",
"events": [],
"conflicts": [],
"proposed_times": [],
"reply_draft": "They grow up so fast 🥹",
"needs_clarification": None,
},
),
(
"2026-06-08T09:00:00",
"Coach Dana: great hustle from the kids today, proud of them\nMe: such a fun season",
{
"reasoning": "Encouragement, no plan or date mentioned.",
"events": [],
"conflicts": [],
"proposed_times": [],
"reply_draft": "Thanks Coach — they love it!",
"needs_clarification": None,
},
),
# --- conflict against existing calendar + proposed alternatives ---
(
"2026-06-08T09:00:00",
"Existing calendar:\n- Dentist (Mia): 2026-06-10T14:00..2026-06-10T15:00\n\n"
"Room parent: makeup class photo is Wednesday at 2pm\nMe: hmm Mia has the dentist then",
{
"reasoning": "The makeup photo at Wed 2pm overlaps Mia's existing dentist appointment.",
"events": [
{
"title": "Makeup class photo",
"start": "2026-06-10T14:00:00",
"end": "2026-06-10T14:30:00",
"location": None,
"attendees": [],
"reminder_minutes": 120,
"notes": None,
}
],
"conflicts": [
{
"event_index": 0,
"clashes_with": "Dentist (Mia) 2:00-3:00pm",
"severity": "overlap",
}
],
"proposed_times": ["2026-06-10T15:00:00", "2026-06-11T14:00:00"],
"reply_draft": "Mia has the dentist at 2 — could she do the makeup photo at 3, or Thursday?",
"needs_clarification": None,
},
),
(
"2026-06-08T09:00:00",
"Existing calendar:\n- Work standup: 2026-06-09T09:00..2026-06-09T09:30\n\n"
"Teacher: parent volunteers needed in class Tue 9–10am\nMe: I'll try",
{
"reasoning": "Volunteering Tue 9-10am butts against the 9:00-9:30 standup (overlap).",
"events": [
{
"title": "Classroom volunteering",
"start": "2026-06-09T09:00:00",
"end": "2026-06-09T10:00:00",
"location": "Classroom",
"attendees": [],
"reminder_minutes": 60,
"notes": None,
}
],
"conflicts": [
{
"event_index": 0,
"clashes_with": "Work standup 9:00-9:30am",
"severity": "overlap",
}
],
"proposed_times": ["2026-06-09T10:00:00", "2026-06-11T09:00:00"],
"reply_draft": "I have a 9am call — could I come in at 10 instead?",
"needs_clarification": None,
},
),
# --- image-described event: PTA flyer ---
(
"2026-06-08T09:00:00",
"Room parent: [image: flyer — 'Spring Family Picnic, Sat Jun 13, 11am, Lincoln Park "
"Shelter B, bring a dish to share']\nMe: we'll be there",
{
"reasoning": "The flyer gives a concrete date, time, and location for the family picnic.",
"events": [
{
"title": "Spring Family Picnic",
"start": "2026-06-13T11:00:00",
"end": "2026-06-13T14:00:00",
"location": "Lincoln Park, Shelter B",
"attendees": [],
"reminder_minutes": 120,
"notes": "Bring a dish to share (from flyer)",
}
],
"conflicts": [],
"proposed_times": [],
"reply_draft": "Added it — we'll bring a salad!",
"needs_clarification": None,
},
),
# --- image-described event: permission slip photo ---
(
"2026-06-08T09:00:00",
"Mom: [image: photo of a permission slip — 'Zoo trip Thursday June 11, depart 8:30am, "
"return ~2:30pm, $12 due Mon']\nMe: signing tonight",
{
"reasoning": "Permission slip photo encodes the trip date/time and a separate payment deadline.",
"events": [
{
"title": "Zoo trip",
"start": "2026-06-11T08:30:00",
"end": "2026-06-11T14:30:00",
"location": "Zoo",
"attendees": [],
"reminder_minutes": 720,
"notes": "$12 due Monday; from permission slip",
}
],
"conflicts": [],
"proposed_times": [],
"reply_draft": "Signed and the $12 is ready for Monday!",
"needs_clarification": None,
},
),
# --- ambiguity: needs clarification, no event yet ---
(
"2026-06-08T09:00:00",
"Teacher: can a couple of parents help with the class party sometime next week?\nMe: maybe!",
{
"reasoning": "A request without a fixed day or time — ask before scheduling anything.",
"events": [],
"conflicts": [],
"proposed_times": [],
"reply_draft": "Happy to help — what day and time is the party?",
"needs_clarification": "Which day/time next week is the class party?",
},
),
# --- explicit date "the 14th" ---
(
"2026-06-08T09:00:00",
"Coach Dana: end-of-season pizza party is the 14th at 6pm at Tony's\nMe: yum, count us in",
{
"reasoning": "'The 14th' resolves to Jun 14; concrete time and venue given.",
"events": [
{
"title": "End-of-season pizza party",
"start": "2026-06-14T18:00:00",
"end": "2026-06-14T20:00:00",
"location": "Tony's",
"attendees": ["Coach Dana"],
"reminder_minutes": 120,
"notes": None,
}
],
"conflicts": [],
"proposed_times": [],
"reply_draft": "Count us in — see you at Tony's at 6!",
"needs_clarification": None,
},
),
# --- "in two weeks" ---
(
"2026-06-08T09:00:00",
"Teacher: the spring recital is in two weeks, Friday at 6pm in the gym\nMe: 📅",
{
"reasoning": "Friday two weeks out is Jun 19? The teacher pins it to Friday 6pm — Jun 19.",
"events": [
{
"title": "Spring recital",
"start": "2026-06-19T18:00:00",
"end": "2026-06-19T19:30:00",
"location": "School gym",
"attendees": [],
"reminder_minutes": 1440,
"notes": None,
}
],
"conflicts": [],
"proposed_times": [],
"reply_draft": "Can't wait — we'll be there Friday at 6!",
"needs_clarification": None,
},
),
# --- early dismissal ---
(
"2026-06-08T09:00:00",
"School office: reminder, early dismissal this Wednesday at 12:30pm\nMe: thanks for the heads up",
{
"reasoning": "A one-off schedule change parents need to plan pickup around.",
"events": [
{
"title": "Early dismissal",
"start": "2026-06-10T12:30:00",
"end": None,
"location": "School",
"attendees": [],
"reminder_minutes": 60,
"notes": "Pickup at 12:30 instead of usual",
}
],
"conflicts": [],
"proposed_times": [],
"reply_draft": "Noted — I'll be there at 12:30 Wednesday.",
"needs_clarification": None,
},
),
# --- parent-teacher conference (specific slot) ---
(
"2026-06-08T09:00:00",
"Teacher: your parent–teacher conference slot is Thursday 4:15pm, ~15 min\nMe: works for me",
{
"reasoning": "A confirmed 15-minute conference slot on Thursday.",
"events": [
{
"title": "Parent–teacher conference",
"start": "2026-06-11T16:15:00",
"end": "2026-06-11T16:30:00",
"location": "Classroom",
"attendees": ["Teacher"],
"reminder_minutes": 120,
"notes": None,
}
],
"conflicts": [],
"proposed_times": [],
"reply_draft": "4:15 Thursday works — see you then!",
"needs_clarification": None,
},
),
# --- birthday party RSVP with location ---
(
"2026-06-08T09:00:00",
"Other parent: Leo's bday party Sat 2–4pm at Jump Zone, can Mia come?\nMe: she'd love to!",
{
"reasoning": "A party invite with a clear window and venue, accepted.",
"events": [
{
"title": "Leo's birthday party",
"start": "2026-06-13T14:00:00",
"end": "2026-06-13T16:00:00",
"location": "Jump Zone",
"attendees": ["Leo"],
"reminder_minutes": 180,
"notes": None,
}
],
"conflicts": [],
"proposed_times": [],
"reply_draft": "She'd love to — see you Saturday at Jump Zone!",
"needs_clarification": None,
},
),
# --- carpool reminder ---
(
"2026-06-08T09:00:00",
"Neighbor: I've got carpool drop-off Friday at 7:45am this week\nMe: perfect, thank you!",
{
"reasoning": "A specific carpool commitment on Friday morning.",
"events": [
{
"title": "Carpool drop-off (neighbor driving)",
"start": "2026-06-12T07:45:00",
"end": None,
"location": None,
"attendees": [],
"reminder_minutes": 30,
"notes": None,
}
],
"conflicts": [],
"proposed_times": [],
"reply_draft": "Thank you! I've got next Friday.",
"needs_clarification": None,
},
),
# --- reschedule (supersede an earlier time) ---
(
"2026-06-08T09:00:00",
"Coach Dana: practice this Tue is now 6pm, not 5\nMe: ok updating",
{
"reasoning": "A reschedule: practice Tuesday moves to 6pm.",
"events": [
{
"title": "Soccer practice",
"start": "2026-06-09T18:00:00",
"end": "2026-06-09T19:00:00",
"location": None,
"attendees": ["Coach Dana"],
"reminder_minutes": 60,
"notes": "Moved from 5pm to 6pm",
}
],
"conflicts": [],
"proposed_times": [],
"reply_draft": "Updated — 6pm Tuesday it is.",
"needs_clarification": None,
},
),
# --- bake sale drop-off deadline ---
(
"2026-06-08T09:00:00",
"PTA: bake sale items drop off Thursday by 8am at the front office\nMe: I'll bring cookies",
{
"reasoning": "A drop-off deadline Thursday morning.",
"events": [
{
"title": "Bake sale drop-off",
"start": "2026-06-11T08:00:00",
"end": None,
"location": "Front office",
"attendees": [],
"reminder_minutes": 720,
"notes": "Bringing cookies",
}
],
"conflicts": [],
"proposed_times": [],
"reply_draft": "Cookies will be there by 8 Thursday!",
"needs_clarification": None,
},
),
# --- vague meetup, needs clarification ---
(
"2026-06-08T09:00:00",
"Other parent: we should set up a playdate soon!\nMe: yes definitely",
{
"reasoning": "Mutual interest but no concrete day, time, or place.",
"events": [],
"conflicts": [],
"proposed_times": [],
"reply_draft": "Let's do it — what day works for you two?",
"needs_clarification": "Which day/time and where for the playdate?",
},
),
]
def to_record(now: str, thread: str, plan: dict) -> dict:
user = f"Current datetime: {now}\nExisting calendar: (none provided)\n\nConversation:\n{thread}\n\nReturn the ActionPlan JSON now."
return {
"messages": [
{"role": "system", "content": SYSTEM},
{"role": "user", "content": user},
{"role": "assistant", "content": json.dumps(plan, ensure_ascii=False)},
]
}
def main():
out_dir = Path(__file__).with_name("data")
out_dir.mkdir(exist_ok=True)
out = out_dir / "dataset.jsonl"
with out.open("w", encoding="utf-8") as f:
for now, thread, plan in SEEDS:
f.write(json.dumps(to_record(now, thread, plan), ensure_ascii=False) + "\n")
print(f"wrote {out} ({len(SEEDS)} seed records) — expand this before training")
if __name__ == "__main__":
main()