| """Build training/data/eval.jsonl β a held-out test set for scoring the fine-tune. |
| |
| These examples are DISJOINT from training/data/dataset.jsonl (fresh names, dates, |
| wording) so the eval measures generalization, not memorization. Each record fixes |
| a `now` so relative-date answers ("tomorrow", "next Tuesday") are deterministic. |
| |
| Anchor for relative dates: 2026-09-14 is a **Monday** (so tomorrow=Sep 15 Tue, |
| this Fri=Sep 18, this Sat=Sep 19, next Mon=Sep 21, next Tue=Sep 22, +3 days=Sep 17). |
| |
| Run: python training/gen_eval.py -> writes training/data/eval.jsonl |
| Score: see training/eval.py (needs an INFERENCE_BASE_URL serving the model). |
| """ |
| from __future__ import annotations |
|
|
| import json |
| import sys |
| from pathlib import Path |
|
|
| sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) |
| from server.schema import ActionPlan |
|
|
| NOW = "2026-09-14T09:00:00" |
|
|
|
|
| def ev(title, start, end=None, location=None, reminder=None, attendees=None, notes=None): |
| return { |
| "title": title, "start": start, "end": end, "location": location, |
| "attendees": attendees or [], "reminder_minutes": reminder, "notes": notes, |
| } |
|
|
|
|
| def plan(events=None, reply="", clarify=None, reasoning=None): |
| return { |
| "reasoning": reasoning, "events": events or [], "conflicts": [], |
| "proposed_times": [], "reply_draft": reply, "needs_clarification": clarify, |
| } |
|
|
|
|
| |
| EVAL = [ |
| |
| ("e01", "events", NOW, |
| "Teacher: Back-to-school night is Wednesday September 16 at 6:30pm in the gym\nMe: we'll be there", |
| plan([ev("Back-to-school night", "2026-09-16T18:30:00", location="School gym", reminder=120)], |
| "See you Wednesday at 6:30!")), |
| ("e02", "events", NOW, |
| "Dr. Okafor's office: Liam's cleaning is set for Tuesday October 6 at 3:30pm\nMe: thanks", |
| plan([ev("Liam β dental cleaning", "2026-10-06T15:30:00", location="Dr. Okafor's office", reminder=120)], |
| "Got it β Tuesday the 6th at 3:30.")), |
| ("e03", "events", NOW, |
| "Sam: dinner Friday September 25, 7-9pm at Trattoria Verde?\nMe: yes!", |
| plan([ev("Dinner with Sam", "2026-09-25T19:00:00", "2026-09-25T21:00:00", "Trattoria Verde", 60)], |
| "Friday at 7 β can't wait!")), |
| ("e04", "events", NOW, |
| "Coach: makeup game is Sunday Sept 27 at 1pm at Field 2\nMe: got it", |
| plan([ev("Makeup game", "2026-09-27T13:00:00", location="Field 2", reminder=60)], |
| "We'll be at Field 2 by 1!")), |
| ("e05", "events", NOW, |
| "Clinic: flu shot clinic Thursday October 1 at 8:15am\nMe: see you", |
| plan([ev("Flu shot", "2026-10-01T08:15:00", location="Clinic", reminder=60)], |
| "See you Thursday at 8:15.")), |
| ("e06", "events", NOW, |
| "PTA: book fair opens Monday Sept 28 at 9am\nMe: noted", |
| plan([ev("Book fair", "2026-09-28T09:00:00", reminder=60)], "Thanks β added it!")), |
| ("e15", "events", NOW, |
| "Library: your book club meets Thursday October 8 at 7pm\nMe: see you there", |
| plan([ev("Book club", "2026-10-08T19:00:00", location="Library", reminder=60)], |
| "See you on the 8th at 7!")), |
|
|
| |
| ("e07", "events", NOW, |
| "Maya: study group tomorrow at 4pm at the library\nMe: I'll be there", |
| plan([ev("Study group", "2026-09-15T16:00:00", location="Library", reminder=30)], |
| "See you at 4 tomorrow!")), |
| ("e08", "events", NOW, |
| "Dad: family dinner this Friday 6:30pm at Grandma's\nMe: yum", |
| plan([ev("Family dinner", "2026-09-18T18:30:00", location="Grandma's", reminder=60)], |
| "See you Friday at 6:30!")), |
| ("e09", "events", NOW, |
| "Manager: our 1:1 moves to next Tuesday at 11am\nMe: ok", |
| plan([ev("1:1 with manager", "2026-09-22T11:00:00", reminder=15)], "Got it β Tuesday at 11.")), |
| ("e10", "events", NOW, |
| "Prof: the problem set is due in three days at noon\nMe: on it", |
| plan([ev("Problem set due", "2026-09-17T12:00:00", reminder=1440)], "It'll be in by Thursday noon!")), |
| ("e11", "events", NOW, |
| "Zoe: trivia night tonight at 8pm at The Tap\nMe: in!", |
| plan([ev("Trivia night", "2026-09-14T20:00:00", location="The Tap", reminder=30)], "See you at 8!")), |
| ("e12", "events", NOW, |
| "Neighbor: garage sale this Saturday 7am to 1pm\nMe: I'll swing by", |
| plan([ev("Garage sale", "2026-09-19T07:00:00", "2026-09-19T13:00:00")], "I'll stop by Saturday!")), |
| ("e13", "events", NOW, |
| "HR: onboarding session is next Monday at 10am\nMe: thanks", |
| plan([ev("Onboarding session", "2026-09-21T10:00:00", reminder=60)], "Thanks β see you Monday at 10.")), |
| ("e14", "events", NOW, |
| "Landlord: apartment inspection on the 30th at 2pm\nMe: noted", |
| plan([ev("Apartment inspection", "2026-09-30T14:00:00", reminder=120)], "Noted β the 30th at 2.")), |
| ("e16", "events", NOW, |
| "Boss: all-hands is a week from today at 3pm\nMe: noted", |
| plan([ev("All-hands", "2026-09-21T15:00:00", reminder=30)], "Got it β added the all-hands.")), |
|
|
| |
| ("c01", "no_event", NOW, |
| "Friend: that movie was incredible, thanks for the rec!\nMe: right?? so good", |
| plan([], "Right? One of the best this year!")), |
| ("c02", "no_event", NOW, |
| "Mom: just checking you got home safe\nMe: yep, all good!", |
| plan([], "Home safe β thanks for checking!")), |
| ("c03", "no_event", NOW, |
| "Coworker: congrats on shipping the release!\nMe: thanks, big team effort", |
| plan([], "Appreciate it β couldn't have done it without the team!")), |
| ("c04", "no_event", NOW, |
| "Sis: the cookies turned out amazing\nMe: yay! glad the recipe worked", |
| plan([], "So glad β they're a keeper!")), |
| ("c05", "no_event", NOW, |
| "Coach: great hustle at practice today, everyone\nMe: thanks coach!", |
| plan([], "Thanks, Coach β good session today!")), |
|
|
| |
| ("q01", "clarify", NOW, |
| "Alex: we should grab coffee soon!\nMe: yes, let's!", |
| plan([], "Definitely! What day works for you?", clarify="When would you like to grab coffee?")), |
| ("q02", "clarify", NOW, |
| "Team: let's schedule the offsite sometime next month\nMe: sounds good", |
| plan([], "Sounds good β which dates are we considering?", |
| clarify="What date next month works for the offsite?")), |
| ("q03", "clarify", NOW, |
| "Jen: dinner this week? not sure which night works for you\nMe: let's find a time", |
| plan([], "I'm free most nights β what works for you?", |
| clarify="Which night this week works for dinner?")), |
| ("q04", "clarify", NOW, |
| "Coach: we'll add an extra practice, date still TBD\nMe: keep me posted", |
| plan([], "Sounds good β let me know the day and time.", |
| clarify="Which day and time is the extra practice?")), |
|
|
| |
| ("m01", "multi", NOW, |
| "Spouse: Friday Sept 18 β Mia has the dentist at 9am, then soccer at 4pm\nMe: adding both", |
| plan([ev("Mia β dentist", "2026-09-18T09:00:00", reminder=60), |
| ev("Mia β soccer", "2026-09-18T16:00:00", reminder=30)], |
| "Both on the calendar β dentist at 9, soccer at 4.")), |
| ("m02", "multi", NOW, |
| "Office: standup is Wednesday Sept 16 at 9:30am and the sprint demo is Sept 16 at 2pm\nMe: blocked both", |
| plan([ev("Standup", "2026-09-16T09:30:00", reminder=15), |
| ev("Sprint demo", "2026-09-16T14:00:00", reminder=15)], |
| "Blocked both β standup at 9:30, demo at 2.")), |
| ("m03", "multi", NOW, |
| "Camp: drop-off is tomorrow at 8am and pickup is tomorrow at 3pm\nMe: got it", |
| plan([ev("Camp drop-off", "2026-09-15T08:00:00", reminder=30), |
| ev("Camp pickup", "2026-09-15T15:00:00", reminder=30)], |
| "Got it β drop-off 8am, pickup 3pm tomorrow.")), |
| ] |
|
|
| |
| |
| |
| |
| NOW2 = "2026-10-07T08:30:00" |
| NOW3 = "2026-11-13T17:00:00" |
|
|
| EVAL2 = [ |
| |
| ("e17", "events", NOW2, |
| "Dr. Nguyen's office: confirming Quinn's filling on October 21 at 4:45pm\nMe: thanks", |
| plan([ev("Quinn β filling", "2026-10-21T16:45:00", location="Dr. Nguyen's office", reminder=120)], |
| "See you on the 21st at 4:45.")), |
| ("e18", "events", NOW2, |
| "Tessa: my housewarming is 11/14 at 6pm, 48 Birchwood Ln\nMe: wouldn't miss it", |
| plan([ev("Tessa's housewarming", "2026-11-14T18:00:00", location="48 Birchwood Ln", reminder=120)], |
| "Can't wait β see you the 14th!")), |
| ("e19", "events", NOW2, |
| "Choir: winter audition is December 3rd at 9:15am in room 12\nMe: signed up", |
| plan([ev("Choir audition", "2026-12-03T09:15:00", location="Room 12", reminder=60)], |
| "Signed up β Dec 3rd at 9:15.")), |
| ("e20", "events", NOW2, |
| "Parent council: meeting Thursday October 15 at 7pm in the cafeteria\nMe: I'll come", |
| plan([ev("Parent council meeting", "2026-10-15T19:00:00", location="Cafeteria", reminder=60)], |
| "See you Thursday the 15th.")), |
| ("e21", "events", NOW2, |
| "Mr. Alvarez: tutoring moved to 5:30 on the 28th\nMe: ok", |
| plan([ev("Tutoring", "2026-10-28T17:30:00", reminder=30)], "Got it β the 28th at 5:30.")), |
| ("e22", "events", NOW3, |
| "Rec center: lifeguard recert is November 30 at 8am, bring your card\nMe: noted", |
| plan([ev("Lifeguard recertification", "2026-11-30T08:00:00", location="Rec center", |
| reminder=60, notes="Bring certification card")], |
| "Noted β Nov 30 at 8.")), |
| ("e23", "events", NOW3, |
| "Wren: brunch December 5, noon, at Petit Four?\nMe: yes please", |
| plan([ev("Brunch with Wren", "2026-12-05T12:00:00", location="Petit Four", reminder=60)], |
| "Noon on the 5th β in!")), |
| ("e24", "events", NOW3, |
| "Garage: your inspection slot is Nov 18 at 7:45am\nMe: I'll drop it off", |
| plan([ev("Car inspection", "2026-11-18T07:45:00", location="Garage", reminder=60)], |
| "Dropping it off the 18th at 7:45.")), |
| |
| ("e25", "events", NOW2, |
| "Library: the used book sale opens Friday October 16 at 9am\nMe: I'll be there early", |
| plan([ev("Used book sale", "2026-10-16T09:00:00", location="Library", reminder=60)], |
| "There at 9 on the 16th!")), |
| ("e26", "events", NOW2, |
| "City pool: winter session registration opens Tuesday October 13 at 6am\nMe: setting an alarm", |
| plan([ev("Pool registration opens", "2026-10-13T06:00:00", reminder=30)], |
| "Alarm set for the 13th at 6.")), |
| ("e27", "events", NOW3, |
| "Museum: the new dinosaur exhibit opens November 20 at 10am\nMe: kids will love that", |
| plan([ev("Dinosaur exhibit opening", "2026-11-20T10:00:00", location="Museum", reminder=120)], |
| "Taking the kids on the 20th!")), |
| |
| ("r01", "events", NOW2, |
| "Sage: yoga tomorrow at 7:15am?\nMe: I'm in", |
| plan([ev("Yoga with Sage", "2026-10-08T07:15:00", reminder=30)], "See you at 7:15!")), |
| ("r02", "events", NOW3, |
| "Theo: pancakes at ours tomorrow, 9am\nMe: yum, in", |
| plan([ev("Pancakes at Theo's", "2026-11-14T09:00:00", location="Theo's", reminder=30)], |
| "See you at 9!")), |
| ("r03", "events", NOW3, |
| "Bowling league: lanes tonight at 8\nMe: rolling in", |
| plan([ev("Bowling league", "2026-11-13T20:00:00", reminder=30)], "See everyone at 8!")), |
| ("r04", "events", NOW2, |
| "Aunt Rosa: lunch this Sunday at 1?\nMe: lovely", |
| plan([ev("Lunch with Aunt Rosa", "2026-10-11T13:00:00", reminder=60)], "Sunday at 1 β lovely!")), |
| ("r05", "events", NOW2, |
| "Barber: I can fit you in next Wednesday at 5:45\nMe: book it", |
| plan([ev("Haircut", "2026-10-14T17:45:00", location="Barber", reminder=30)], |
| "Booked β next Wednesday at 5:45.")), |
| ("r06", "events", NOW3, |
| "Ski club: first meetup is next Friday at 6:30pm\nMe: stoked", |
| plan([ev("Ski club meetup", "2026-11-20T18:30:00", reminder=60)], |
| "Stoked β see everyone the 20th.")), |
| ("r07", "events", NOW2, |
| "Prof Idris: revised draft due in five days, by 5pm\nMe: on it", |
| plan([ev("Revised draft due", "2026-10-12T17:00:00", reminder=1440)], |
| "It'll be in by Monday at 5.")), |
| ("r08", "events", NOW2, |
| "Nico: cabin trip a week from Saturday, leaving 8am\nMe: packing already", |
| plan([ev("Cabin trip departure", "2026-10-17T08:00:00", reminder=120)], |
| "Packed and ready for the 17th!")), |
| |
| ("m04", "multi", NOW2, |
| "Saturday Oct 10 plan β farmers market at 9, Quinn's game at 1, dinner with the Patels at 6\nMe: full day!", |
| plan([ev("Farmers market", "2026-10-10T09:00:00", reminder=30), |
| ev("Quinn's game", "2026-10-10T13:00:00", reminder=60), |
| ev("Dinner with the Patels", "2026-10-10T18:00:00", reminder=60)], |
| "All three added β market, game, dinner.")), |
| ("m05", "multi", NOW3, |
| "Clinic: flu shots Saturday Nov 21 at 10am, and your follow-up is Dec 1 at 2:30pm\nMe: both noted", |
| plan([ev("Flu shot", "2026-11-21T10:00:00", location="Clinic", reminder=60), |
| ev("Follow-up appointment", "2026-12-01T14:30:00", location="Clinic", reminder=120)], |
| "Both on the calendar.")), |
| ("m06", "multi", NOW2, |
| "School: early release Friday Oct 9 at 12:15, then the fall social runs 5 to 7pm same day\nMe: busy friday", |
| plan([ev("Early release", "2026-10-09T12:15:00", location="School", reminder=120), |
| ev("Fall social", "2026-10-09T17:00:00", "2026-10-09T19:00:00", "School", 60)], |
| "Got both β pickup 12:15, social at 5.")), |
| ("m07", "multi", NOW3, |
| "Vet: Biscuit's nail trim is Nov 17 at 3pm and his dental is Dec 9 at 8am\nMe: poor guy", |
| plan([ev("Biscuit β nail trim", "2026-11-17T15:00:00", location="Vet", reminder=60), |
| ev("Biscuit β dental", "2026-12-09T08:00:00", location="Vet", reminder=120)], |
| "Both booked for Biscuit.")), |
| |
| ("c06", "no_event", NOW2, |
| "Marisol: the recital photos came out so cute\nMe: omg yes, framing them", |
| plan([], "Aren't they adorable? Definitely framing a few!")), |
| ("c07", "no_event", NOW2, |
| "Dev: ugh, standup ran 40 minutes today\nMe: brutal", |
| plan([], "Brutal β hope tomorrow's is snappier.")), |
| ("c08", "no_event", NOW3, |
| "Neighbor: leaf pile in front of ours, kids are welcome to jump in\nMe: they'll be thrilled", |
| plan([], "They'll be over in five minutes, thank you!")), |
| ("c09", "no_event", NOW3, |
| "Gran: loved the call yesterday, made my week\nMe: mine too", |
| plan([], "Made mine too β talk soon, Gran!")), |
| ("c10", "no_event", NOW2, |
| "Courier: your parcel was left at the side door\nMe: got it, thanks", |
| plan([], "Got it β thanks for the heads up!")), |
| |
| ("q05", "clarify", NOW2, |
| "Room parent: bake sale is being planned, date to be announced\nMe: count me in for cookies", |
| plan([], "I'm in for cookies β what date should I plan for?", |
| clarify="What date is the bake sale?")), |
| ("q06", "clarify", NOW2, |
| "Piano teacher: we should add a makeup lesson β does Tuesday or Thursday suit?\nMe: let me check", |
| plan([], "Let me check the calendar and get back to you.", |
| clarify="Tuesday or Thursday β and what time works for the makeup lesson?")), |
| ("q07", "clarify", NOW3, |
| "Uncle Ray: thinking of visiting sometime around the holidays\nMe: that would be great", |
| plan([], "We'd love that! Which dates are you thinking?", |
| clarify="Which dates would Uncle Ray visit?")), |
| ("q08", "clarify", NOW3, |
| "HOA: the annual meeting will be rescheduled, new time to come\nMe: ok", |
| plan([], "Thanks β I'll watch for the new time.", |
| clarify="What is the new date and time of the annual meeting?")), |
| ] |
| EVAL = EVAL + EVAL2 |
|
|
|
|
| def main(): |
| out = Path(__file__).with_name("data") / "eval.jsonl" |
| ok, bad = [], 0 |
| seen_ids = set() |
| for rid, cat, now, thread, gold in EVAL: |
| assert rid not in seen_ids, f"duplicate id {rid}" |
| seen_ids.add(rid) |
| try: |
| ActionPlan(**gold) |
| except Exception as e: |
| print(f" [{rid}] GOLD INVALID: {e}"); bad += 1; continue |
| ok.append({"id": rid, "category": cat, "now": now, "thread": thread, "gold": gold}) |
| print(f"{len(ok)} valid / {len(EVAL)} ({bad} invalid)") |
| with out.open("w", encoding="utf-8") as f: |
| for r in ok: |
| f.write(json.dumps(r, ensure_ascii=False) + "\n") |
| print(f"wrote {out}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|