constraint-env / dataset_example.py
DecentSanage's picture
Upload folder using huggingface_hub
f823a82 verified
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
"""
Dataset for the Constraint Environment.
Three difficulty tiers (easy / medium / hard) with increasing structural
complexity for translating natural-language scheduling constraints into a
JSON-based AST DSL.
day_names = ["Mon", "Tue", "Wed", "Thr", "Fri", "Sat"]
slot_names = [
"9:00", "10:00", "11:00", "12:00",
"BREAK",
"2:00", "3:00", "4:00", "5:00"
]
"""
dataset = {
# ------------------------------------------------------------------
# EASY
# ------------------------------------------------------------------
"easy": [
{
"prompt": "The branch CS must not have classes on Wednesday and on 12:00. ",
"target_ast":
{
"type": "hard",
"name": "cs_department_meeting",
"forall": [
{"b": "branches"},
{"sub": {"subjects": "b"}},
{"d": "days"},
{"s": "slots"}
],
"where": {
"operator": "AND",
"left": {
"operator": "==",
"left": {"name":"b"},
"right": "CS"
},
"right": {
"operator": "AND",
"left": {
"operator": "==",
"left": "d",
"right": 2
},
"right": {
"operator": "==",
"left": "s",
"right": 3
}
}
},
"assert": {
"operator": "==",
"left": {
"target": "schedule",
"args": [{"name": "b"}, {"name":"sub" }, "d", "s"]
},
"right": 0
}
}
},
{
"prompt": "'Dr. smith' must not have any class on fridays",
"target_ast":{
"type": "hard",
"name": "dr_smith_friday_off",
"forall": [
{"b": "branches"},
{"sub": {"subjects": "b"}},
{"s": "slots"}
],
"where": {
"operator": "in",
"left": "Dr. smith",
"right": {"teachers": "sub"}
},
"assert": {
"operator": "==",
"left": {
"target": "schedule",
"args": [{"name": "b"}, {"name":"sub" }, 4, "s"]
},
"right": 0
}
}
}
],
# ------------------------------------------------------------------
# MEDIUM
# ------------------------------------------------------------------
"medium": [
{
"prompt": (
"Subjects must be equal to their defined frequency"
),
"target_ast": {
"type": "hard",
"name": "subject_weekly_frequency",
"forall": [
{"b": "branches"},
{"sub": {"subjects": "b"}}
],
"assert": {
"operator": "==",
"left": {
"operator": "sum",
"over": [
{"d": "days"},
{"s": "slots"}
],
"expression": {
"target": "schedule",
"args": [{"name": "b"}, {"name":"sub" }, "d", "s"]
}
},
"right": {"frequency": "sub"}
}
}
},
{
"prompt": (
"Each subject must occur at most 1 time per day. "
),
"target_ast": {
"type": "hard",
"name": "max_one_occurrence_per_day",
"forall": [
{"b": "branches"},
{"sub": {"subjects": "b"}},
{"d": "days"}
],
"assert": {
"operator": "<=",
"left": {
"operator": "sum",
"over": [
{"s": "slots"}
],
"expression": {
"target": "schedule",
"args": [{"name": "b"}, {"name":"sub" }, "d", "s"]
}
},
"right": 1
}
}
},
],
# ------------------------------------------------------------------
# HARD
# ------------------------------------------------------------------
"hard": [
{
"prompt": (
"No classes should be scheduled on Saturday, except for online classes."
),
"target_ast":{
"type": "hard",
"name": "no_classes_on_saturday",
"forall": [
{"b": "branches"},
{"sub": {"subjects": "b"}},
{"d": "days"},
{"s": "slots"}
],
"where": {
"operator": "AND",
"left":
{
"operator": "==",
"left": "d",
"right": 5
},
"right":
{
"operator": "!=",
"left": {"type": "sub"},
"right": "online"
}
},
"assert": {
"operator": "==",
"left": {
"target": "schedule",
"args": [{"name": "b"}, {"name":"sub" }, "d", "s"]
},
"right": 0
}
}
},
{
"prompt": (
"Every teacher must teach one class at a time."
),
"target_ast": {
"type": "hard",
"name": "teacher_no_double_booking",
"forall": [
{"teacher": "teachers"},
{"d": "days"},
{"t": "slots"}
],
"assert": {
"operator": "<=",
"left": {
"operator": "sum",
"over": [
{"b": "branches"},
{"sub": {"subjects": "b"}}
],
"expression": {
"target": "occupies_teacher",
"args": ["b", "sub", "teacher", "d", "t"]
}
},
"right": 1
}
},
},
],
}
# dataset = {
# # ------------------------------------------------------------------
# # EASY – single quantifier, no WHERE clause, direct assert
# # ------------------------------------------------------------------
# "easy": [
# {
# "prompt": (
# "No classes should be scheduled on Saturday."
# ),
# "target_ast": {
# "type": "hard",
# "name": "no_saturday_classes",
# "forall": [
# {"var": "b", "domain": "branches"},
# {"var": "sub", "domain": "subjects"},
# {"var": "d", "domain": "days"},
# {"var": "s", "domain": "slots"},
# ],
# "where": "d == 5",
# "assert": "schedule(b, sub, d, s) == 0",
# },
# },
# {
# "prompt": (
# "Every teacher must teach at least one subject."
# ),
# "target_ast": {
# "type": "soft",
# "name": "teacher_teaches_one",
# "forall": [
# {"var": "t", "domain": "teachers"},
# ],
# "assert": "SUM(teaches(t, sub)) >= 1",
# },
# },
# ],
# # ------------------------------------------------------------------
# # MEDIUM – two quantifiers, WHERE clause, combined assert
# # ------------------------------------------------------------------
# "medium": [
# {
# "prompt": (
# "No non-online classes should be scheduled on Saturday."
# ),
# "target_ast": {
# "type": "hard",
# "name": "no_non_online_saturday",
# "forall": [
# {"var": "b", "domain": "branches"},
# {"var": "sub", "domain": "subjects"},
# {"var": "d", "domain": "days"},
# {"var": "s", "domain": "slots"},
# ],
# "where": "subject_type(b, sub) != 'online' AND d == 5",
# "assert": "schedule(b, sub, d, s) == 0",
# },
# },
# {
# "prompt": (
# "A teacher cannot be assigned to two different slots at the same time."
# ),
# "target_ast": {
# "type": "hard",
# "name": "no_teacher_time_conflict",
# "forall": [
# {"var": "t", "domain": "teachers"},
# {"var": "d", "domain": "days"},
# {"var": "s", "domain": "slots"},
# ],
# "assert": "COUNT(occupies_teacher(t, d, s)) <= 1",
# },
# },
# ],
# # ------------------------------------------------------------------
# # HARD – multiple quantifiers, nested WHERE + AND/OR, minimize
# # ------------------------------------------------------------------
# "hard": [
# {
# "prompt": (
# "Minimize the number of occupied slots for each branch on any given day, "
# "but only for subjects that are labelled as practical."
# ),
# "target_ast": {
# "type": "soft",
# "name": "minimize_practical_slots",
# "forall": [
# {"var": "b", "domain": "branches"},
# {"var": "sub", "domain": "subjects"},
# {"var": "d", "domain": "days"},
# {"var": "s", "domain": "slots"},
# ],
# "where": "subject_type(b, sub) == 'practical'",
# "minimize": "SUM(occupies(b, sub, d, s))",
# },
# },
# {
# "prompt": (
# "No branch may have more than four scheduled slots in a single day, "
# "unless a subject is online, in which case it may occupy one extra slot."
# ),
# "target_ast": {
# "type": "hard",
# "name": "daily_slot_cap",
# "forall": [
# {"var": "b", "domain": "branches"},
# {"var": "d", "domain": "days"},
# {"var": "sub", "domain": "subjects"},
# {"var": "s", "domain": "slots"},
# ],
# "assert": (
# "SUM(schedule(b, sub, d, s)) <= 4 OR "
# "(subject_type(b, sub) == 'online' AND SUM(schedule(b, sub, d, s)) <= 5)"
# ),
# },
# },
# ],
# }