martinakaduc commited on
Commit
6256eb9
·
verified ·
1 Parent(s): c490c65

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .DS_Store +0 -0
  2. .gitattributes +27 -0
  3. Code/LICENSE +40 -0
  4. Code/__pycache__/clean_utils.cpython-312.pyc +0 -0
  5. Code/__pycache__/process_to_single_file.cpython-312.pyc +0 -0
  6. Code/build_skill_set.py +157 -0
  7. Code/clean_utils.py +148 -0
  8. Code/cleantext.py +102 -0
  9. Code/detect_similar_node_codes.py +285 -0
  10. Code/evaluate_kt.py +205 -0
  11. Code/evaluate_kt_by_context.py +339 -0
  12. Code/find_duplicate_problem_body.py +152 -0
  13. Code/find_duplicate_problem_id.py +148 -0
  14. Code/gptoss120bvllmmcq.py +31 -0
  15. Code/kt_inference_base.py +849 -0
  16. Code/llama33_70b_instruct_vllm.py +30 -0
  17. Code/plot_student_attempt_distribution.py +357 -0
  18. Code/plot_timegap_distribution.py +484 -0
  19. Code/plot_totaltime_distribution.py +433 -0
  20. Code/process_to_single_file.py +820 -0
  21. Code/qwen3next80bvllm_instruct.py +35 -0
  22. Code/qwen3next80bvllm_thinking.py +35 -0
  23. Code/scripts.sh +24 -0
  24. Data/CASE-Common Core State Standards for Math.json +0 -0
  25. Data/Interactions.csv +3 -0
  26. Data/Math_Standards1.pdf +3 -0
  27. Data/Problems.csv +0 -0
  28. Data/Skill_Set.csv +165 -0
  29. Data/Skills.csv +0 -0
  30. README.md +246 -0
  31. Results/Problems_duplicated_problem_id.csv +209 -0
  32. Results/Problems_same_body_different_problem_id.csv +55 -0
  33. Results/day_student_attempt_distribution.png +3 -0
  34. Results/day_student_attempt_distribution_00173df52c65a9c13b45978c5a56ea2db67b17deded674486f7af64f7c5fd9ac.png +3 -0
  35. Results/day_student_attempt_distribution_001849d40d6bf1e2ed5826e4420e291e54655a2b6b77a9ab701faaeb4e899abe.png +3 -0
  36. Results/day_student_attempt_distribution_counts.csv +0 -0
  37. Results/day_student_attempt_distribution_counts_00173df52c65a9c13b45978c5a56ea2db67b17deded674486f7af64f7c5fd9ac.csv +262 -0
  38. Results/day_student_attempt_distribution_counts_001849d40d6bf1e2ed5826e4420e291e54655a2b6b77a9ab701faaeb4e899abe.csv +256 -0
  39. Results/inference_data_kt_results.zip +3 -0
  40. Results/month_student_attempt_distribution.png +3 -0
  41. Results/month_student_attempt_distribution_00173df52c65a9c13b45978c5a56ea2db67b17deded674486f7af64f7c5fd9ac.png +3 -0
  42. Results/month_student_attempt_distribution_001849d40d6bf1e2ed5826e4420e291e54655a2b6b77a9ab701faaeb4e899abe.png +3 -0
  43. Results/month_student_attempt_distribution_counts.csv +69 -0
  44. Results/month_student_attempt_distribution_counts_00173df52c65a9c13b45978c5a56ea2db67b17deded674486f7af64f7c5fd9ac.csv +10 -0
  45. Results/month_student_attempt_distribution_counts_001849d40d6bf1e2ed5826e4420e291e54655a2b6b77a9ab701faaeb4e899abe.csv +10 -0
  46. Results/pedagogical_grounding/README.md +275 -0
  47. Results/pedagogical_grounding/batch_evaluate.py +328 -0
  48. Results/pedagogical_grounding/distractor_analysis.py +299 -0
  49. Results/pedagogical_grounding/evaluate_pedagogical.py +285 -0
  50. Results/pedagogical_grounding/gptoss120b_pedagogical.py +57 -0
.DS_Store ADDED
Binary file (6.15 kB). View file
 
.gitattributes CHANGED
@@ -1,5 +1,6 @@
1
  *.7z filter=lfs diff=lfs merge=lfs -text
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
 
3
  *.bin filter=lfs diff=lfs merge=lfs -text
4
  *.bz2 filter=lfs diff=lfs merge=lfs -text
5
  *.ckpt filter=lfs diff=lfs merge=lfs -text
@@ -8,6 +9,8 @@
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
  *.joblib filter=lfs diff=lfs merge=lfs -text
10
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
 
 
11
  *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
  *.model filter=lfs diff=lfs merge=lfs -text
13
  *.msgpack filter=lfs diff=lfs merge=lfs -text
@@ -33,3 +36,27 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  *.7z filter=lfs diff=lfs merge=lfs -text
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.avro filter=lfs diff=lfs merge=lfs -text
4
  *.bin filter=lfs diff=lfs merge=lfs -text
5
  *.bz2 filter=lfs diff=lfs merge=lfs -text
6
  *.ckpt filter=lfs diff=lfs merge=lfs -text
 
9
  *.h5 filter=lfs diff=lfs merge=lfs -text
10
  *.joblib filter=lfs diff=lfs merge=lfs -text
11
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
12
+ *.lz4 filter=lfs diff=lfs merge=lfs -text
13
+ *.mds filter=lfs diff=lfs merge=lfs -text
14
  *.mlmodel filter=lfs diff=lfs merge=lfs -text
15
  *.model filter=lfs diff=lfs merge=lfs -text
16
  *.msgpack filter=lfs diff=lfs merge=lfs -text
 
36
  *.zip filter=lfs diff=lfs merge=lfs -text
37
  *.zst filter=lfs diff=lfs merge=lfs -text
38
  *tfevents* filter=lfs diff=lfs merge=lfs -text
39
+ # Audio files - uncompressed
40
+ *.pcm filter=lfs diff=lfs merge=lfs -text
41
+ *.sam filter=lfs diff=lfs merge=lfs -text
42
+ *.raw filter=lfs diff=lfs merge=lfs -text
43
+ # Audio files - compressed
44
+ *.aac filter=lfs diff=lfs merge=lfs -text
45
+ *.flac filter=lfs diff=lfs merge=lfs -text
46
+ *.mp3 filter=lfs diff=lfs merge=lfs -text
47
+ *.ogg filter=lfs diff=lfs merge=lfs -text
48
+ *.wav filter=lfs diff=lfs merge=lfs -text
49
+ # Image files - uncompressed
50
+ *.bmp filter=lfs diff=lfs merge=lfs -text
51
+ *.gif filter=lfs diff=lfs merge=lfs -text
52
+ *.png filter=lfs diff=lfs merge=lfs -text
53
+ *.tiff filter=lfs diff=lfs merge=lfs -text
54
+ # Image files - compressed
55
+ *.jpg filter=lfs diff=lfs merge=lfs -text
56
+ *.jpeg filter=lfs diff=lfs merge=lfs -text
57
+ *.webp filter=lfs diff=lfs merge=lfs -text
58
+ # Video files - compressed
59
+ *.mp4 filter=lfs diff=lfs merge=lfs -text
60
+ *.webm filter=lfs diff=lfs merge=lfs -text
61
+ *.csv filter=lfs diff=lfs merge=lfs -text
62
+ Data/Math_Standards1.pdf filter=lfs diff=lfs merge=lfs -text
Code/LICENSE ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Creative Commons Attribution-NonCommercial 4.0 International License (CC BY-NC 4.0)
2
+
3
+ Copyright (c) 2024 FoundationalED Authors
4
+
5
+ You are free to:
6
+
7
+ Share — copy and redistribute the material in any medium or format
8
+
9
+ Adapt — remix, transform, and build upon the material
10
+
11
+ Under the following terms:
12
+
13
+ Attribution — You must give appropriate credit, provide a link to the license,
14
+ and indicate if changes were made. You may do so in any reasonable manner, but
15
+ not in any way that suggests the licensor endorses you or your use.
16
+
17
+ NonCommercial — You may not use the material for commercial purposes.
18
+
19
+ No additional restrictions — You may not apply legal terms or technological
20
+ measures that legally restrict others from doing anything the license permits.
21
+
22
+ Notices:
23
+
24
+ You do not have to comply with the license for elements of the material in
25
+ the public domain or where your use is permitted by an applicable exception
26
+ or limitation.
27
+
28
+ No warranties are given. The license may not give you all of the permissions
29
+ necessary for your intended use. For example, other rights such as publicity,
30
+ privacy, or moral rights may limit how you use the material.
31
+
32
+ Full license text: https://creativecommons.org/licenses/by-nc/4.0/legalcode
33
+
34
+ ================================================================================
35
+
36
+ ADDITIONAL TERMS FOR FOUNDATIONALED DATASET
37
+
38
+ By accessing or using the FoundationalED dataset, you additionally agree to the
39
+ Data Usage Agreement specified in the README.md file, which includes restrictions
40
+ on re-identification, ethical use requirements, and compliance obligations.
Code/__pycache__/clean_utils.cpython-312.pyc ADDED
Binary file (7.07 kB). View file
 
Code/__pycache__/process_to_single_file.cpython-312.pyc ADDED
Binary file (33 kB). View file
 
Code/build_skill_set.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Build Skill_List.csv from Skills.csv and CASE Common Core standards JSON.
3
+
4
+ Output columns:
5
+ - index (1-based)
6
+ - skill_code
7
+ - full_description
8
+
9
+ The output contains only skill codes that appear in both:
10
+ 1) Skills.csv (column: node_code)
11
+ 2) CASE JSON CFItems (field: humanCodingScheme)
12
+
13
+ Rows are sorted by skill_code.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import argparse
19
+ import csv
20
+ import html
21
+ import json
22
+ import re
23
+ from pathlib import Path
24
+ from typing import Dict, Set
25
+
26
+ _TAG_RE = re.compile(r"<[^>]+>")
27
+ _WS_RE = re.compile(r"\s+")
28
+ _DOT_BEFORE_SUFFIX_RE = re.compile(r"(?<=\d)\.(?=[A-Za-z]$)")
29
+
30
+
31
+ def _clean_text(text: str) -> str:
32
+ """Normalize whitespace and strip simple HTML tags from a description."""
33
+ text = html.unescape(text or "")
34
+ text = _TAG_RE.sub("", text)
35
+ text = _WS_RE.sub(" ", text).strip()
36
+ return text
37
+
38
+
39
+ def _normalize_skill_code(code: str) -> str:
40
+ """Normalize equivalent skill code formats to a common representation.
41
+
42
+ Example:
43
+ - 3.MD.C.7a -> 3.MD.C.7a
44
+ - 3.MD.C.7.a -> 3.MD.C.7a
45
+ """
46
+ normalized = (code or "").strip()
47
+ normalized = _DOT_BEFORE_SUFFIX_RE.sub("", normalized)
48
+ return normalized
49
+
50
+
51
+ def load_skill_codes(skills_csv_path: Path) -> Set[str]:
52
+ """Read unique skill codes from Skills.csv node_code column."""
53
+ codes: Set[str] = set()
54
+ with skills_csv_path.open("r", encoding="utf-8", newline="") as f:
55
+ reader = csv.DictReader(f)
56
+ if "node_code" not in (reader.fieldnames or []):
57
+ raise ValueError(
58
+ f"Missing required column 'node_code' in {skills_csv_path}"
59
+ )
60
+ for row in reader:
61
+ code = (row.get("node_code") or "").strip()
62
+ if code:
63
+ codes.add(code)
64
+ return codes
65
+
66
+
67
+ def load_case_mapping(case_json_path: Path) -> Dict[str, str]:
68
+ """Map skill code -> full standard statement from CASE JSON CFItems."""
69
+ with case_json_path.open("r", encoding="utf-8") as f:
70
+ payload = json.load(f)
71
+
72
+ mapping: Dict[str, str] = {}
73
+ for item in payload.get("CFItems", []):
74
+ raw_code = (item.get("humanCodingScheme") or "").strip()
75
+ code = _normalize_skill_code(raw_code)
76
+ if not code:
77
+ continue
78
+
79
+ # Keep both Standards and Components.
80
+ # In CASE, codes like 3.MD.C.7.a are often CFItemType=Component.
81
+ if item.get("CFItemType") not in {"Standard", "Component"}:
82
+ continue
83
+
84
+ statement = _clean_text(item.get("fullStatement") or "")
85
+ if not statement:
86
+ continue
87
+
88
+ # Keep first non-empty definition if duplicates appear.
89
+ if code not in mapping:
90
+ mapping[code] = statement
91
+
92
+ return mapping
93
+
94
+
95
+ def write_output(output_csv_path: Path, rows: list[tuple[str, str]]) -> None:
96
+ """Write final CSV with a 1-based index."""
97
+ output_csv_path.parent.mkdir(parents=True, exist_ok=True)
98
+ with output_csv_path.open("w", encoding="utf-8", newline="") as f:
99
+ writer = csv.writer(f)
100
+ writer.writerow(["index", "skill_code", "full_description"])
101
+ for idx, (code, desc) in enumerate(rows, start=1):
102
+ writer.writerow([idx, code, desc])
103
+
104
+
105
+ def main() -> None:
106
+ parser = argparse.ArgumentParser(
107
+ description="Create Skill_List.csv from Skills.csv and CASE standards JSON."
108
+ )
109
+ parser.add_argument(
110
+ "--skills-csv",
111
+ type=Path,
112
+ default=Path("../Data/Skills.csv"),
113
+ help="Path to Skills.csv",
114
+ )
115
+ parser.add_argument(
116
+ "--case-json",
117
+ type=Path,
118
+ default=Path("../Data/CASE-Common Core State Standards for Math.json"),
119
+ help="Path to CASE JSON",
120
+ )
121
+ parser.add_argument(
122
+ "--output-csv",
123
+ type=Path,
124
+ default=Path("../Data/Skill_Set.csv"),
125
+ help="Output path for Skill_Set.csv",
126
+ )
127
+
128
+ args = parser.parse_args()
129
+
130
+ skills_csv = args.skills_csv.resolve()
131
+ case_json = args.case_json.resolve()
132
+ output_csv = args.output_csv.resolve()
133
+
134
+ skill_codes = load_skill_codes(skills_csv)
135
+ case_map = load_case_mapping(case_json)
136
+
137
+ matched_codes = sorted(
138
+ code for code in skill_codes if _normalize_skill_code(code) in case_map
139
+ )
140
+ rows = [(code, case_map[_normalize_skill_code(code)]) for code in matched_codes]
141
+
142
+ write_output(output_csv, rows)
143
+
144
+ missing = sorted(
145
+ code for code in skill_codes if _normalize_skill_code(code) not in case_map
146
+ )
147
+ print(f"Wrote {len(rows)} skills to {output_csv}")
148
+ print(f"Unique skill codes in Skills.csv: {len(skill_codes)}")
149
+ print(f"Missing codes not found in CASE: {len(missing)}")
150
+ if missing:
151
+ preview = ", ".join(missing[:20])
152
+ suffix = " ..." if len(missing) > 20 else ""
153
+ print(f"Missing preview: {preview}{suffix}")
154
+
155
+
156
+ if __name__ == "__main__":
157
+ main()
Code/clean_utils.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ HTML cleaning utilities for problem text.
3
+
4
+ Provides functions to clean HTML content while preserving:
5
+ - Inline MathML (fractions, superscripts, subscripts)
6
+ - Wiris math images (extracted from data-mathml attribute)
7
+ - Table structure (formatted with | separators)
8
+ - Image placeholders
9
+ """
10
+
11
+ import html
12
+ import pandas as pd
13
+ from bs4 import BeautifulSoup, NavigableString
14
+
15
+
16
+ def parse_mathml_element(elem):
17
+ """Recursively parse a MathML element to text."""
18
+ if elem.name is None:
19
+ return str(elem).strip()
20
+
21
+ if elem.name == "mfrac":
22
+ children = [c for c in elem.children if c.name]
23
+ if len(children) >= 2:
24
+ num = parse_mathml_element(children[0])
25
+ denom = parse_mathml_element(children[1])
26
+ return f"({num}/{denom})"
27
+ return elem.get_text(strip=True)
28
+
29
+ elif elem.name == "msup":
30
+ children = [c for c in elem.children if c.name]
31
+ if len(children) >= 2:
32
+ base = parse_mathml_element(children[0])
33
+ exp = parse_mathml_element(children[1])
34
+ return f"{base}^{exp}"
35
+ return elem.get_text(strip=True)
36
+
37
+ elif elem.name == "msub":
38
+ children = [c for c in elem.children if c.name]
39
+ if len(children) >= 2:
40
+ base = parse_mathml_element(children[0])
41
+ sub = parse_mathml_element(children[1])
42
+ return f"{base}_{sub}"
43
+ return elem.get_text(strip=True)
44
+
45
+ elif elem.name == "msqrt":
46
+ content = parse_mathml_element_children(elem)
47
+ return f"√({content})"
48
+
49
+ elif elem.name == "mo":
50
+ op = elem.get_text(strip=True)
51
+ if op in ["÷", "×", "·", "+", "-", "=", "<", ">", "≤", "≥", "≠"]:
52
+ return f" {op} "
53
+ return op
54
+
55
+ elif elem.name in ["mn", "mi", "mtext"]:
56
+ return elem.get_text(strip=True)
57
+
58
+ elif elem.name in ["mrow", "math", "mpadded", "mstyle"]:
59
+ return parse_mathml_element_children(elem)
60
+
61
+ else:
62
+ return elem.get_text(strip=True)
63
+
64
+
65
+ def parse_mathml_element_children(elem):
66
+ """Parse all children of a MathML element."""
67
+ parts = []
68
+ for child in elem.children:
69
+ if isinstance(child, NavigableString):
70
+ text = str(child).strip()
71
+ if text:
72
+ parts.append(text)
73
+ elif child.name:
74
+ parts.append(parse_mathml_element(child))
75
+ return "".join(parts)
76
+
77
+
78
+ def clean_problem_body(text):
79
+ """
80
+ Clean HTML problem body with full MathML handling.
81
+
82
+ Handles:
83
+ - Inline MathML (<math>, <mfrac>, <msup>, etc.) → (4/3), x^2
84
+ - Wiris math images (data-mathml attribute) → [15÷12]
85
+ - Tables → [Table: Col1 | Col2 ...]
86
+ - Regular images → [image]
87
+ - HTML entities → decoded properly
88
+ """
89
+ if pd.isna(text) or text == "":
90
+ return ""
91
+ soup = BeautifulSoup(str(text), "html.parser")
92
+
93
+ # 1. Handle inline MathML
94
+ for math in soup.find_all("math"):
95
+ parsed = parse_mathml_element(math)
96
+ math.replace_with(f" {parsed} ")
97
+
98
+ # 2. Handle Wiris images
99
+ for img in soup.find_all("img"):
100
+ alt = img.get("alt", "")
101
+ src = img.get("src", "")
102
+ data_mathml = img.get("data-mathml", "")
103
+
104
+ if "wiris" in src.lower() or "pluginwiris" in src:
105
+ if alt and alt.strip() and alt not in ["NO ALT", "NONE"]:
106
+ img.replace_with(f" [{alt.strip()}] ")
107
+ elif data_mathml:
108
+ math_str = (
109
+ data_mathml.replace("«", "<").replace("»", ">").replace("¨", '"')
110
+ )
111
+ msoup = BeautifulSoup(math_str, "html.parser")
112
+ math_elem = msoup.find("math")
113
+ if math_elem:
114
+ mtext = parse_mathml_element(math_elem)
115
+ else:
116
+ mtext = msoup.get_text(separator="")
117
+ mtext = mtext.replace("§#247;", "÷").replace("§#215;", "×")
118
+ mtext = (
119
+ mtext.replace("§#8722;", "-")
120
+ .replace("§#160;", " ")
121
+ .replace("§#183;", "·")
122
+ )
123
+ mtext = mtext.replace("§#", "&#")
124
+ mtext = html.unescape(mtext).strip()
125
+ img.replace_with(f" [{mtext}] " if mtext else " [math] ")
126
+ else:
127
+ img.replace_with(" [math] ")
128
+ elif alt and alt.strip():
129
+ img.replace_with(f" [Image: {alt.strip()[:100]}] ")
130
+ else:
131
+ img.replace_with(" [image] ")
132
+
133
+ # 3. Handle tables
134
+ for table in soup.find_all("table"):
135
+ rows = []
136
+ for tr in table.find_all("tr"):
137
+ cells = [td.get_text(strip=True) for td in tr.find_all(["td", "th"])]
138
+ if any(cells):
139
+ rows.append(" | ".join(cells))
140
+ if rows:
141
+ table.replace_with(f"\n[Table:\n{chr(10).join(rows)}]\n")
142
+ else:
143
+ table.decompose()
144
+
145
+ text = soup.get_text(separator=" ")
146
+ text = html.unescape(text)
147
+ text = " ".join(text.split())
148
+ return text.strip()
Code/cleantext.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ def add_pi_if_missing(input_string):
4
+ # Check if "se 3.14 for" is in the input string
5
+ if "se 3.14 for" in input_string:
6
+ # Check if "pi" is not already present after "se 3.14 for"
7
+ if "pi" not in input_string[input_string.index("se 3.14 for") + len("se 3.14 for"):]:
8
+ return input_string[:input_string.index("se 3.14 for") + len("se 3.14 for")] + " pi" + input_string[input_string.index("se 3.14 for") + len("se 3.14 for"):]
9
+ return input_string
10
+
11
+ def convert_mathml_to_fraction(mathml_content):
12
+ mathml_content = mathml_content.replace('<ast-r type="text" marker="1">', "___")
13
+ mathml_content = mathml_content.replace('<mfrac>', '(')
14
+ mathml_content = mathml_content.replace('</mfrac>', ')')
15
+ mathml_content = mathml_content.replace('</mn><mn>', '/')
16
+ mathml_content = mathml_content.replace('<mn>', '')
17
+ mathml_content = mathml_content.replace('</mn>', '')
18
+ mathml_content = mathml_content.replace('<mi>', '')
19
+ mathml_content = mathml_content.replace('</mi>', '')
20
+ mathml_content = mathml_content.replace('<mo>', '')
21
+ mathml_content = mathml_content.replace('</mo>', '')
22
+ mathml_content = mathml_content.replace('<mn>', '')
23
+ mathml_content = mathml_content.replace('</mn>', '')
24
+ mathml_content = mathml_content.replace('<math>', '')
25
+ mathml_content = mathml_content.replace('</math>', '')
26
+ mathml_content = mathml_content.replace('<mi mathvariant=¨normal¨>§#960;', '')
27
+ mathml_content = mathml_content.replace('&nbsp;', ' ')
28
+ mathml_content = mathml_content.replace('§#160;', ' ')
29
+ mathml_content = mathml_content.replace('&gt;', '>')
30
+ mathml_content = mathml_content.replace('&lt;', '<')
31
+ mathml_content = mathml_content.replace('&amp;', '&')
32
+
33
+ mathml_content = mathml_content.replace('«/math', '')
34
+
35
+ mathml_content = mathml_content.rstrip('/')
36
+
37
+ return mathml_content
38
+
39
+
40
+ from bs4 import BeautifulSoup
41
+ def alt_text(html: str) -> str:
42
+ soup = BeautifulSoup(html, 'html.parser')
43
+
44
+ # Replace each <img> with its alt text if it exists
45
+ for img in soup.find_all('img'):
46
+ alt = img.get('alt')
47
+ if alt:
48
+ img.replace_with(alt)
49
+
50
+ return str(soup)
51
+
52
+ def mathml_to_text(html):
53
+ soup = BeautifulSoup(html, 'html.parser')
54
+
55
+ # Convert MathML fractions
56
+ for math in soup.find_all('math'):
57
+ frac = math.find('mfrac')
58
+ if frac:
59
+ nums = frac.find_all('mn')
60
+ if len(nums) == 2:
61
+ numerator = nums[0].text
62
+ denominator = nums[1].text
63
+ frac_text = f"{numerator}/{denominator}"
64
+ math.replace_with(frac_text)
65
+ else:
66
+ math.replace_with(math.get_text()) # Fallback if not a valid mfrac
67
+ else:
68
+ math.replace_with(math.get_text()) # Handle non-fraction math
69
+
70
+ # Return clean text
71
+ return soup.get_text(separator=" ", strip=True)
72
+
73
+
74
+ def clean_text(input_text):
75
+ if pd.isna(input_text) or input_text.strip() == "":
76
+ return ""
77
+
78
+ # Replace &nbsp; and decode HTML entities
79
+ input_text = input_text.replace('&nbsp;', ' ')
80
+
81
+ # Replace <img> tags with their alt text
82
+ input_text = mathml_to_text(input_text)
83
+ input_text = alt_text(input_text)
84
+
85
+ # Convert MathML if it exists
86
+ soup = BeautifulSoup(input_text, 'html.parser')
87
+ for img in soup.find_all('img', class_='Wirisformula'):
88
+ mathml_formula = img.get('data-mathml')
89
+ if mathml_formula:
90
+ # Extract inner MathML content
91
+ start_index = mathml_formula.find('<math>') + len('<math>')
92
+ end_index = mathml_formula.find('</math>')
93
+ mathml_formula_content = mathml_formula[start_index:end_index]
94
+ mathml_formula_content_cleaned = mathml_formula_content.replace(
95
+ 'xmlns=¨http://www.w3.org/1998/Math/MathML¨»', '')
96
+ fraction = convert_mathml_to_fraction(mathml_formula_content_cleaned)
97
+ img.replace_with(fraction)
98
+
99
+ text = soup.get_text(separator=' ', strip=True)
100
+ text = convert_mathml_to_fraction(text)
101
+ text = add_pi_if_missing(text)
102
+ return text
Code/detect_similar_node_codes.py ADDED
@@ -0,0 +1,285 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Detect skills that have similar node_code values but different skill_id values.
3
+
4
+ By default, the script detects conflicts after normalizing node_code values
5
+ (uppercasing and removing punctuation differences). It can also perform optional
6
+ fuzzy matching on normalized compact node codes.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import argparse
12
+ import re
13
+ from difflib import SequenceMatcher
14
+ from itertools import combinations
15
+ from pathlib import Path
16
+
17
+ import pandas as pd
18
+
19
+
20
+ DEFAULT_SKILLS_PATH = Path(__file__).resolve().parent.parent / "Data" / "Skills.csv"
21
+ DEFAULT_OUTPUT_PATH = (
22
+ Path(__file__).resolve().parent.parent
23
+ / "Results"
24
+ / "similar_node_code_conflicts.csv"
25
+ )
26
+
27
+
28
+ def parse_args() -> argparse.Namespace:
29
+ parser = argparse.ArgumentParser(
30
+ description=(
31
+ "Detect skills whose node_code values are similar but map to "
32
+ "different skill_id values."
33
+ )
34
+ )
35
+ parser.add_argument(
36
+ "--skills-path",
37
+ type=Path,
38
+ default=DEFAULT_SKILLS_PATH,
39
+ help="Path to Skills.csv.",
40
+ )
41
+ parser.add_argument(
42
+ "--output-path",
43
+ type=Path,
44
+ default=DEFAULT_OUTPUT_PATH,
45
+ help="Path to save the detected conflicts as CSV.",
46
+ )
47
+ parser.add_argument(
48
+ "--include-fuzzy",
49
+ action="store_true",
50
+ help="Also run fuzzy matching across compact node_code values.",
51
+ )
52
+ parser.add_argument(
53
+ "--similarity-threshold",
54
+ type=float,
55
+ default=0.9,
56
+ help="Minimum SequenceMatcher ratio for fuzzy matches (0.0 to 1.0).",
57
+ )
58
+ parser.add_argument(
59
+ "--max-fuzzy-pairs",
60
+ type=int,
61
+ default=200,
62
+ help="Maximum number of fuzzy match pairs to keep after sorting.",
63
+ )
64
+ parser.add_argument(
65
+ "--print-limit",
66
+ type=int,
67
+ default=20,
68
+ help="Maximum number of rows to print for each conflict section.",
69
+ )
70
+ return parser.parse_args()
71
+
72
+
73
+ def normalize_node_code(node_code: str) -> str:
74
+ canonical = re.sub(r"[^A-Za-z0-9]+", ".", node_code.upper().strip())
75
+ canonical = re.sub(r"\.+", ".", canonical).strip(".")
76
+ return canonical
77
+
78
+
79
+ def compact_node_code(canonical_node_code: str) -> str:
80
+ return canonical_node_code.replace(".", "")
81
+
82
+
83
+ def unique_sorted_strings(series: pd.Series) -> list[str]:
84
+ values: set[str] = set()
85
+ for value in series.dropna():
86
+ text = str(value).strip()
87
+ if text:
88
+ values.add(text)
89
+ return sorted(values)
90
+
91
+
92
+ def unique_sorted_ints(series: pd.Series) -> list[int]:
93
+ values: set[int] = set()
94
+ for value in series.dropna():
95
+ values.add(int(value))
96
+ return sorted(values)
97
+
98
+
99
+ def join_pipe(values: list[str]) -> str:
100
+ return " | ".join(values)
101
+
102
+
103
+ def join_csv_ints(values: list[int]) -> str:
104
+ return ",".join(str(v) for v in values)
105
+
106
+
107
+ def load_skills(skills_path: Path) -> pd.DataFrame:
108
+ required_columns = ["problem_id", "skill_id", "node_code", "node_name"]
109
+ df = pd.read_csv(skills_path, usecols=required_columns, low_memory=False)
110
+
111
+ df["problem_id"] = pd.to_numeric(df["problem_id"], errors="coerce")
112
+ df["skill_id"] = pd.to_numeric(df["skill_id"], errors="coerce")
113
+
114
+ df = df.dropna(subset=["problem_id", "skill_id", "node_code"]).copy()
115
+ df["problem_id"] = df["problem_id"].astype(int)
116
+ df["skill_id"] = df["skill_id"].astype(int)
117
+
118
+ df["node_code"] = df["node_code"].astype(str).str.strip()
119
+ df["node_name"] = df["node_name"].fillna("").astype(str).str.strip()
120
+ df = df[df["node_code"] != ""].copy()
121
+
122
+ df["node_code_canonical"] = df["node_code"].apply(normalize_node_code)
123
+ df["node_code_compact"] = df["node_code_canonical"].apply(compact_node_code)
124
+ return df
125
+
126
+
127
+ def summarize_compact_codes(df: pd.DataFrame) -> pd.DataFrame:
128
+ summary = (
129
+ df.groupby("node_code_compact", sort=True)
130
+ .agg(
131
+ canonical_node_codes=("node_code_canonical", unique_sorted_strings),
132
+ raw_node_codes=("node_code", unique_sorted_strings),
133
+ skill_ids=("skill_id", unique_sorted_ints),
134
+ node_names=("node_name", unique_sorted_strings),
135
+ problem_count=("problem_id", "nunique"),
136
+ mapping_count=("skill_id", "size"),
137
+ )
138
+ .reset_index()
139
+ .rename(columns={"node_code_compact": "compact_node_code"})
140
+ )
141
+
142
+ summary["n_skill_ids"] = summary["skill_ids"].apply(len)
143
+ return summary
144
+
145
+
146
+ def build_normalized_conflicts(summary: pd.DataFrame) -> pd.DataFrame:
147
+ conflicts = summary[summary["n_skill_ids"] > 1].copy()
148
+ if conflicts.empty:
149
+ return conflicts
150
+
151
+ conflicts.insert(0, "conflict_type", "normalized_match")
152
+ conflicts["skill_ids"] = conflicts["skill_ids"].apply(join_csv_ints)
153
+ conflicts["canonical_node_codes"] = conflicts["canonical_node_codes"].apply(
154
+ join_pipe
155
+ )
156
+ conflicts["raw_node_codes"] = conflicts["raw_node_codes"].apply(join_pipe)
157
+ conflicts["node_names"] = conflicts["node_names"].apply(join_pipe)
158
+
159
+ return conflicts.sort_values(
160
+ ["n_skill_ids", "compact_node_code"], ascending=[False, True]
161
+ )
162
+
163
+
164
+ def build_fuzzy_conflicts(
165
+ summary: pd.DataFrame,
166
+ threshold: float,
167
+ max_pairs: int,
168
+ ) -> pd.DataFrame:
169
+ rows: list[dict[str, object]] = []
170
+
171
+ records = summary.to_dict(orient="records")
172
+ for left, right in combinations(records, 2):
173
+ left_code = str(left["compact_node_code"])
174
+ right_code = str(right["compact_node_code"])
175
+
176
+ if left_code == right_code:
177
+ continue
178
+
179
+ similarity = SequenceMatcher(None, left_code, right_code).ratio()
180
+ if similarity < threshold:
181
+ continue
182
+
183
+ left_skills = set(left["skill_ids"])
184
+ right_skills = set(right["skill_ids"])
185
+ if left_skills == right_skills:
186
+ continue
187
+
188
+ rows.append(
189
+ {
190
+ "conflict_type": "fuzzy_match",
191
+ "similarity": round(similarity, 4),
192
+ "left_compact_node_code": left_code,
193
+ "right_compact_node_code": right_code,
194
+ "left_canonical_node_codes": join_pipe(left["canonical_node_codes"]),
195
+ "right_canonical_node_codes": join_pipe(right["canonical_node_codes"]),
196
+ "left_skill_ids": join_csv_ints(left["skill_ids"]),
197
+ "right_skill_ids": join_csv_ints(right["skill_ids"]),
198
+ "overlap_skill_ids": join_csv_ints(
199
+ sorted(left_skills.intersection(right_skills))
200
+ ),
201
+ }
202
+ )
203
+
204
+ fuzzy = pd.DataFrame(rows)
205
+ if fuzzy.empty:
206
+ return fuzzy
207
+
208
+ fuzzy = fuzzy.sort_values(
209
+ ["similarity", "left_compact_node_code", "right_compact_node_code"],
210
+ ascending=[False, True, True],
211
+ )
212
+ if max_pairs > 0:
213
+ fuzzy = fuzzy.head(max_pairs).copy()
214
+ return fuzzy
215
+
216
+
217
+ def print_section(title: str, df: pd.DataFrame, print_limit: int) -> None:
218
+ print(f"\n{title}")
219
+ if df.empty:
220
+ print(" None")
221
+ return
222
+
223
+ to_show = df.head(print_limit)
224
+ print(to_show.to_string(index=False))
225
+ if len(df) > len(to_show):
226
+ print(f" ... ({len(df) - len(to_show)} more rows)")
227
+
228
+
229
+ def main() -> int:
230
+ args = parse_args()
231
+
232
+ if args.similarity_threshold < 0.0 or args.similarity_threshold > 1.0:
233
+ raise ValueError("--similarity-threshold must be in [0.0, 1.0].")
234
+
235
+ if not args.skills_path.exists():
236
+ raise FileNotFoundError(f"Skills file not found: {args.skills_path}")
237
+
238
+ skills_df = load_skills(args.skills_path)
239
+ summary_df = summarize_compact_codes(skills_df)
240
+
241
+ normalized_conflicts = build_normalized_conflicts(summary_df)
242
+ fuzzy_conflicts = pd.DataFrame()
243
+ if args.include_fuzzy:
244
+ fuzzy_conflicts = build_fuzzy_conflicts(
245
+ summary_df,
246
+ threshold=args.similarity_threshold,
247
+ max_pairs=args.max_fuzzy_pairs,
248
+ )
249
+
250
+ frames = [normalized_conflicts]
251
+ if args.include_fuzzy:
252
+ frames.append(fuzzy_conflicts)
253
+ combined_output = pd.concat(frames, ignore_index=True, sort=False)
254
+
255
+ args.output_path.parent.mkdir(parents=True, exist_ok=True)
256
+ combined_output.to_csv(args.output_path, index=False)
257
+
258
+ print("Loaded rows:", len(skills_df))
259
+ print("Unique compact node codes:", len(summary_df))
260
+ print("Normalized conflicts:", len(normalized_conflicts))
261
+ if args.include_fuzzy:
262
+ print(
263
+ "Fuzzy conflicts (threshold " f"{args.similarity_threshold:.2f}):",
264
+ len(fuzzy_conflicts),
265
+ )
266
+
267
+ print_section(
268
+ "Normalized node_code conflicts (same compact code, different skill_id):",
269
+ normalized_conflicts,
270
+ args.print_limit,
271
+ )
272
+
273
+ if args.include_fuzzy:
274
+ print_section(
275
+ "Fuzzy node_code conflicts (near compact codes, different skill_id):",
276
+ fuzzy_conflicts,
277
+ args.print_limit,
278
+ )
279
+
280
+ print(f"\nSaved conflicts to: {args.output_path}")
281
+ return 0
282
+
283
+
284
+ if __name__ == "__main__":
285
+ raise SystemExit(main())
Code/evaluate_kt.py ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Evaluate LLM knowledge tracing predictions against FKT benchmark tasks.
4
+
5
+ Tasks evaluated:
6
+ - Task 1 (FKT): Foundational Knowledge Tracing - predict if student answers correctly (question-level)
7
+ - Task 1 Variant 2: Cognitive Student Modeling - predict the actual student response
8
+
9
+ Usage:
10
+ python evaluate_kt.py results.jsonl
11
+ """
12
+
13
+ import argparse
14
+ import json
15
+ import math
16
+ from sklearn.metrics import roc_auc_score
17
+
18
+
19
+ def normalize_mcq_answer(answer_str: str) -> str:
20
+ """
21
+ Normalize MCQ answer format for consistent comparison.
22
+
23
+ Handles variations like:
24
+ - 'C, A' -> 'A, C' (order normalization)
25
+ - 'A,C' -> 'A, C' (spacing normalization)
26
+ - 'a, c' -> 'A, C' (case normalization)
27
+
28
+ Args:
29
+ answer_str: Answer string to normalize
30
+
31
+ Returns:
32
+ Normalized answer string, or original if not MCQ format
33
+ """
34
+ # Split by comma, strip whitespace, uppercase, sort, rejoin
35
+ parts = [p.strip().upper() for p in answer_str.split(',')]
36
+ # Filter out empty parts
37
+ parts = [p for p in parts if p]
38
+ # Only normalize if all parts are single letters (MCQ format)
39
+ if parts and all(len(p) == 1 and p.isalpha() for p in parts):
40
+ return ', '.join(sorted(set(parts)))
41
+ return answer_str
42
+
43
+
44
+ def numerical_match(answer1: str, answer2: str, atol: float = 0.01, rtol: float = 0.01) -> bool:
45
+ """
46
+ Check if two answers are numerically close within tolerance.
47
+
48
+ Uses math.isclose for robust comparison that handles both absolute
49
+ and relative tolerance.
50
+
51
+ Args:
52
+ answer1: First answer string
53
+ answer2: Second answer string
54
+ atol: Absolute tolerance (default: 0.01)
55
+ rtol: Relative tolerance (default: 0.01)
56
+
57
+ Returns:
58
+ True if answers are numerically close, False otherwise
59
+ """
60
+ try:
61
+ a = float(answer1.strip())
62
+ b = float(answer2.strip())
63
+ return math.isclose(a, b, abs_tol=atol, rel_tol=rtol)
64
+ except (ValueError, AttributeError):
65
+ return False
66
+
67
+
68
+ def answers_match(pred, actual):
69
+ """Check if predicted answer matches actual answer."""
70
+ if pred is None or actual is None:
71
+ return False
72
+
73
+ pred_str = str(pred).strip()
74
+ actual_str = str(actual).strip()
75
+
76
+ # Exact string match
77
+ if pred_str == actual_str:
78
+ return True
79
+
80
+ # Normalize MCQ answers (handles case, order, spacing)
81
+ pred_normalized = normalize_mcq_answer(pred_str)
82
+ actual_normalized = normalize_mcq_answer(actual_str)
83
+ if pred_normalized == actual_normalized:
84
+ return True
85
+
86
+ # Numeric match with tolerance
87
+ return numerical_match(pred_str, actual_str)
88
+
89
+
90
+ def load_results(jsonl_path):
91
+ """Load results from JSONL file."""
92
+ results = []
93
+ with open(jsonl_path, 'r') as f:
94
+ for line in f:
95
+ if line.strip():
96
+ results.append(json.loads(line))
97
+ return results
98
+
99
+
100
+ def evaluate(results):
101
+ """Compute evaluation metrics aligned with FKT benchmark tasks."""
102
+ total = len(results)
103
+
104
+ if total == 0:
105
+ print("No results to evaluate.")
106
+ return
107
+
108
+ # Compute class distribution
109
+ n_correct = sum(1 for r in results if r.get('actual_score') == 1)
110
+ n_incorrect = total - n_correct
111
+
112
+ # Task 1: FKT - Question-level accuracy
113
+ valid_q = [(r.get('actual_score'), r.get('predicted_question_level'))
114
+ for r in results
115
+ if r.get('actual_score') is not None and r.get('predicted_question_level') is not None]
116
+
117
+ if valid_q:
118
+ y_true, y_pred = zip(*valid_q)
119
+ question_correct = sum(1 for t, p in valid_q if t == p)
120
+ question_acc = question_correct / len(valid_q)
121
+ # AUC-ROC (note: with binary predictions, this is limited)
122
+ try:
123
+ auc_roc = roc_auc_score(y_true, y_pred)
124
+ except ValueError:
125
+ auc_roc = None # Only one class present
126
+ else:
127
+ question_correct = 0
128
+ question_acc = 0.0
129
+ auc_roc = None
130
+
131
+ # Task 1 Variant 2: Cognitive Modeling - Answer prediction accuracy
132
+ answer_correct = sum(
133
+ 1 for r in results
134
+ if answers_match(r.get('predicted_student_answer'), r.get('actual_answer'))
135
+ )
136
+
137
+ # Baselines
138
+ prior_baseline = 0.615 # True correctness rate from Interactions.csv
139
+ majority_baseline = max(n_correct, n_incorrect) / total
140
+
141
+ # Print results
142
+ print(f"{'='*60}")
143
+ print(f"Evaluation Results ({total} predictions)")
144
+ print(f"{'='*60}")
145
+ print()
146
+ print(f"Class distribution: {n_correct} correct, {n_incorrect} incorrect")
147
+ print()
148
+
149
+ # Task 1: Foundational Knowledge Tracing (FKT) - question-level prediction
150
+ print("Task 1: Foundational Knowledge Tracing (FKT) - Question-Level")
151
+ print(f" Accuracy: {question_correct}/{len(valid_q)} = {question_acc:.3f}")
152
+ if auc_roc is not None:
153
+ print(f" AUC-ROC: {auc_roc:.3f}")
154
+ else:
155
+ print(f" AUC-ROC: N/A (single class)")
156
+ print(f" Baselines: Prior={prior_baseline:.3f}, Majority={majority_baseline:.3f}")
157
+ print()
158
+
159
+ # Task 1 Variant 2: Cognitive Student Modeling
160
+ print("Task 1 Variant 2: Cognitive Student Modeling")
161
+ print(f" Overall Accuracy: {answer_correct}/{total} = {answer_correct/total:.3f}")
162
+
163
+ # Breakdown by problem type
164
+ problem_types = ['Multiple Choice (select 1)', 'Multiple Choice (select all)', 'Fill-in-the-blank(s)']
165
+ has_problem_type = any(r.get('problem_type') for r in results)
166
+ if has_problem_type:
167
+ print(" By problem type:")
168
+ for ptype in problem_types:
169
+ subset = [r for r in results if r.get('problem_type') == ptype]
170
+ if subset:
171
+ n = len(subset)
172
+ a_acc = sum(1 for r in subset if answers_match(r.get('predicted_student_answer'), r.get('actual_answer'))) / n
173
+ label = ptype.replace('Multiple Choice ', 'MC ')
174
+ print(f" {label:20s}: n={n:4d}, acc={a_acc:.3f}")
175
+ # Breakdown by ground truth within problem type
176
+ for gt in ['correct', 'incorrect']:
177
+ gt_subset = [r for r in subset if r.get('prediction_type') == gt]
178
+ if gt_subset:
179
+ gt_n = len(gt_subset)
180
+ gt_acc = sum(1 for r in gt_subset if answers_match(r.get('predicted_student_answer'), r.get('actual_answer'))) / gt_n
181
+ print(f" {gt:18s}: n={gt_n:4d}, acc={gt_acc:.3f}")
182
+ print()
183
+
184
+ # Breakdown by prediction type (correct/incorrect ground truth)
185
+ print("By ground truth (prediction_type):")
186
+ for ptype in ['correct', 'incorrect']:
187
+ subset = [r for r in results if r.get('prediction_type') == ptype]
188
+ if subset:
189
+ n = len(subset)
190
+ q_acc = sum(1 for r in subset if r.get('predicted_question_level') == r.get('actual_score')) / n
191
+ a_acc = sum(1 for r in subset if answers_match(r.get('predicted_student_answer'), r.get('actual_answer'))) / n
192
+ print(f" {ptype:10s}: n={n:4d}, FKT_acc={q_acc:.3f}, cognitive_acc={a_acc:.3f}")
193
+
194
+
195
+ def main():
196
+ parser = argparse.ArgumentParser(description="Evaluate LLM knowledge tracing predictions")
197
+ parser.add_argument("jsonl_file", help="Path to JSONL results file")
198
+ args = parser.parse_args()
199
+
200
+ results = load_results(args.jsonl_file)
201
+ evaluate(results)
202
+
203
+
204
+ if __name__ == "__main__":
205
+ main()
Code/evaluate_kt_by_context.py ADDED
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Evaluate KT performance by context length (history size).
4
+
5
+ Analyzes how KT accuracy changes as student history grows from 50 to 400 interactions.
6
+ Plots all models in a single figure for comparison.
7
+
8
+ Usage:
9
+ python evaluate_kt_by_context.py
10
+ """
11
+
12
+ import argparse
13
+ import json
14
+ import math
15
+ import os
16
+ from glob import glob
17
+ from collections import defaultdict
18
+
19
+ import matplotlib.pyplot as plt
20
+ import numpy as np
21
+ from sklearn.metrics import roc_auc_score
22
+
23
+ # Set publication-quality font sizes
24
+ plt.rcParams.update({
25
+ 'font.size': 14,
26
+ 'axes.titlesize': 16,
27
+ 'axes.labelsize': 14,
28
+ 'xtick.labelsize': 12,
29
+ 'ytick.labelsize': 12,
30
+ 'legend.fontsize': 12,
31
+ })
32
+
33
+ # Model name mapping for display
34
+ MODEL_NAMES = {
35
+ 'gptoss120b': 'GPT-OSS-120B',
36
+ 'llama33_70b_instruct': 'Llama-3.3-70B-Instruct',
37
+ 'qwen3next80binstruct': 'Qwen3-80B-Instruct',
38
+ 'qwen3next80bthinking': 'Qwen3-80B-Thinking',
39
+ }
40
+
41
+ # Colors for each model
42
+ MODEL_COLORS = {
43
+ 'gptoss120b': '#1f77b4', # blue
44
+ 'llama33_70b_instruct': '#ff7f0e', # orange
45
+ 'qwen3next80binstruct': '#2ca02c', # green
46
+ 'qwen3next80bthinking': '#d62728', # red
47
+ }
48
+
49
+
50
+ def normalize_mcq_answer(answer_str: str) -> str:
51
+ """Normalize MCQ answer format for consistent comparison."""
52
+ parts = [p.strip().upper() for p in answer_str.split(',')]
53
+ parts = [p for p in parts if p]
54
+ if parts and all(len(p) == 1 and p.isalpha() for p in parts):
55
+ return ', '.join(sorted(set(parts)))
56
+ return answer_str
57
+
58
+
59
+ def numerical_match(answer1: str, answer2: str, atol: float = 0.01, rtol: float = 0.01) -> bool:
60
+ """Check if two answers are numerically close within tolerance."""
61
+ try:
62
+ a = float(answer1.strip())
63
+ b = float(answer2.strip())
64
+ return math.isclose(a, b, abs_tol=atol, rel_tol=rtol)
65
+ except (ValueError, AttributeError):
66
+ return False
67
+
68
+
69
+ def answers_match(pred, actual):
70
+ """Check if predicted answer matches actual answer."""
71
+ if pred is None or actual is None:
72
+ return False
73
+
74
+ pred_str = str(pred).strip()
75
+ actual_str = str(actual).strip()
76
+
77
+ if pred_str == actual_str:
78
+ return True
79
+
80
+ pred_normalized = normalize_mcq_answer(pred_str)
81
+ actual_normalized = normalize_mcq_answer(actual_str)
82
+ if pred_normalized == actual_normalized:
83
+ return True
84
+
85
+ return numerical_match(pred_str, actual_str)
86
+
87
+
88
+ def parse_args():
89
+ parser = argparse.ArgumentParser(description="Evaluate KT by context length")
90
+ parser.add_argument(
91
+ "--results-dir", "-r",
92
+ type=str,
93
+ default="inference_data_kt_results",
94
+ help="Directory containing JSONL results files"
95
+ )
96
+ parser.add_argument(
97
+ "--output-dir", "-o",
98
+ type=str,
99
+ default="dataset_analysis/plots",
100
+ help="Directory to save output plots"
101
+ )
102
+ parser.add_argument(
103
+ "--no-plots",
104
+ action="store_true",
105
+ help="Skip generating plots"
106
+ )
107
+ return parser.parse_args()
108
+
109
+
110
+ def extract_model_name(filename):
111
+ """Extract model identifier from filename."""
112
+ basename = os.path.basename(filename)
113
+ # Pattern: modelname_n500_bin10_hist50.jsonl
114
+ for model_key in MODEL_NAMES.keys():
115
+ if basename.startswith(model_key):
116
+ return model_key
117
+ return basename.replace('.jsonl', '')
118
+
119
+
120
+ def load_results(jsonl_path):
121
+ """Load results from JSONL file."""
122
+ results = []
123
+ with open(jsonl_path, 'r') as f:
124
+ for line in f:
125
+ if line.strip():
126
+ results.append(json.loads(line))
127
+ return results
128
+
129
+
130
+ def compute_metrics_by_bin(results):
131
+ """Compute metrics grouped by history_size."""
132
+ bins = defaultdict(list)
133
+
134
+ for r in results:
135
+ history_size = r.get('history_size', 50)
136
+ bins[history_size].append(r)
137
+
138
+ metrics = {}
139
+ for history_size in sorted(bins.keys()):
140
+ bin_results = bins[history_size]
141
+ n = len(bin_results)
142
+
143
+ # FKT: Collect valid predictions for AUC-ROC
144
+ y_true = []
145
+ y_pred = []
146
+ for r in bin_results:
147
+ actual = r.get('actual_score')
148
+ pred = r.get('predicted_question_level')
149
+ if actual is not None and pred is not None:
150
+ y_true.append(int(actual))
151
+ y_pred.append(int(pred))
152
+
153
+ # Compute AUC-ROC (requires both classes present)
154
+ fkt_auc = None
155
+ if len(set(y_true)) == 2 and len(y_true) > 0:
156
+ try:
157
+ fkt_auc = roc_auc_score(y_true, y_pred)
158
+ except ValueError:
159
+ pass
160
+
161
+ # FKT accuracy (for reference)
162
+ fkt_correct = sum(1 for t, p in zip(y_true, y_pred) if t == p)
163
+ fkt_acc = fkt_correct / len(y_true) if y_true else 0.0
164
+
165
+ # Cognitive accuracy (answer match)
166
+ cognitive_correct = sum(
167
+ 1 for r in bin_results
168
+ if answers_match(r.get('predicted_student_answer'), r.get('actual_answer'))
169
+ )
170
+
171
+ metrics[history_size] = {
172
+ 'n': n,
173
+ 'fkt_auc': fkt_auc,
174
+ 'fkt_acc': fkt_acc,
175
+ 'fkt_valid': len(y_true),
176
+ 'cognitive_acc': cognitive_correct / n if n > 0 else 0.0,
177
+ }
178
+
179
+ return metrics
180
+
181
+
182
+ def print_table(all_metrics):
183
+ """Print metrics table to console."""
184
+ # Get all history sizes across all models
185
+ all_history_sizes = sorted(set(
186
+ hs for model_metrics in all_metrics.values()
187
+ for hs in model_metrics.keys()
188
+ ))
189
+
190
+ # Header
191
+ print("\n" + "=" * 100)
192
+ print("KT Performance by Context Length (History Size)")
193
+ print("=" * 100)
194
+
195
+ # Print FKT AUC-ROC table
196
+ print("\nFKT AUC-ROC (Question-Level):")
197
+ print("-" * 80)
198
+ header = f"{'History':>8}"
199
+ for model_key in all_metrics.keys():
200
+ header += f" {MODEL_NAMES.get(model_key, model_key)[:20]:>20}"
201
+ print(header)
202
+ print("-" * 80)
203
+
204
+ for hs in all_history_sizes:
205
+ row = f"{hs:>8}"
206
+ for model_key in all_metrics.keys():
207
+ if hs in all_metrics[model_key]:
208
+ auc = all_metrics[model_key][hs]['fkt_auc']
209
+ if auc is not None:
210
+ row += f" {auc:>20.3f}"
211
+ else:
212
+ row += f" {'N/A':>20}"
213
+ else:
214
+ row += f" {'N/A':>20}"
215
+ print(row)
216
+
217
+ # Print Cognitive accuracy table
218
+ print("\nCognitive Accuracy (Answer Prediction):")
219
+ print("-" * 80)
220
+ print(header)
221
+ print("-" * 80)
222
+
223
+ for hs in all_history_sizes:
224
+ row = f"{hs:>8}"
225
+ for model_key in all_metrics.keys():
226
+ if hs in all_metrics[model_key]:
227
+ acc = all_metrics[model_key][hs]['cognitive_acc']
228
+ row += f" {acc:>20.3f}"
229
+ else:
230
+ row += f" {'N/A':>20}"
231
+ print(row)
232
+
233
+
234
+ def plot_results(all_metrics, output_dir):
235
+ """Generate plot with all models."""
236
+ os.makedirs(output_dir, exist_ok=True)
237
+
238
+ fig, axes = plt.subplots(1, 2, figsize=(14, 5))
239
+
240
+ # Plot 1: FKT AUC-ROC
241
+ for model_key, metrics in all_metrics.items():
242
+ history_sizes = sorted(metrics.keys())
243
+ # Filter out None values
244
+ valid_hs = [hs for hs in history_sizes if metrics[hs]['fkt_auc'] is not None]
245
+ fkt_aucs = [metrics[hs]['fkt_auc'] for hs in valid_hs]
246
+
247
+ if valid_hs:
248
+ axes[0].plot(
249
+ valid_hs, fkt_aucs,
250
+ marker='o', markersize=4,
251
+ color=MODEL_COLORS.get(model_key, 'gray'),
252
+ label=MODEL_NAMES.get(model_key, model_key),
253
+ linewidth=2
254
+ )
255
+
256
+ axes[0].set_xlabel('History Size (# prior interactions)')
257
+ axes[0].set_ylabel('AUC-ROC')
258
+ axes[0].set_title('FKT AUC-ROC vs Context Length')
259
+ axes[0].legend(loc='best')
260
+ axes[0].grid(True, alpha=0.3)
261
+ axes[0].set_xlim(40, 410)
262
+ axes[0].axhline(y=0.5, color='gray', linestyle='--', alpha=0.5, label='Random')
263
+
264
+ # Plot 2: Cognitive Accuracy
265
+ for model_key, metrics in all_metrics.items():
266
+ history_sizes = sorted(metrics.keys())
267
+ cognitive_accs = [metrics[hs]['cognitive_acc'] for hs in history_sizes]
268
+
269
+ axes[1].plot(
270
+ history_sizes, cognitive_accs,
271
+ marker='o', markersize=4,
272
+ color=MODEL_COLORS.get(model_key, 'gray'),
273
+ label=MODEL_NAMES.get(model_key, model_key),
274
+ linewidth=2
275
+ )
276
+
277
+ axes[1].set_xlabel('History Size (# prior interactions)')
278
+ axes[1].set_ylabel('Accuracy')
279
+ axes[1].set_title('Cognitive Modeling Accuracy vs Context Length')
280
+ axes[1].legend(loc='best')
281
+ axes[1].grid(True, alpha=0.3)
282
+ axes[1].set_xlim(40, 410)
283
+
284
+ plt.tight_layout()
285
+
286
+ plot_path = os.path.join(output_dir, 'kt_context_scaling.png')
287
+ plt.savefig(plot_path, dpi=150)
288
+ plt.close()
289
+
290
+ print(f"\nSaved: {plot_path}")
291
+
292
+
293
+ def main():
294
+ args = parse_args()
295
+
296
+ # Find all JSONL files
297
+ jsonl_files = glob(os.path.join(args.results_dir, '*.jsonl'))
298
+
299
+ if not jsonl_files:
300
+ print(f"No JSONL files found in {args.results_dir}")
301
+ return
302
+
303
+ print(f"Found {len(jsonl_files)} result files:")
304
+ for f in jsonl_files:
305
+ print(f" - {os.path.basename(f)}")
306
+
307
+ # Load and analyze each model
308
+ all_metrics = {}
309
+
310
+ for jsonl_path in sorted(jsonl_files):
311
+ model_key = extract_model_name(jsonl_path)
312
+ print(f"\nProcessing {MODEL_NAMES.get(model_key, model_key)}...")
313
+
314
+ results = load_results(jsonl_path)
315
+ print(f" Loaded {len(results):,} predictions")
316
+
317
+ metrics = compute_metrics_by_bin(results)
318
+ all_metrics[model_key] = metrics
319
+
320
+ # Print quick summary
321
+ history_sizes = sorted(metrics.keys())
322
+ valid_aucs = [metrics[hs]['fkt_auc'] for hs in history_sizes if metrics[hs]['fkt_auc'] is not None]
323
+ avg_auc = np.mean(valid_aucs) if valid_aucs else 0.0
324
+ avg_cognitive = np.mean([metrics[hs]['cognitive_acc'] for hs in history_sizes])
325
+ print(f" Avg FKT AUC-ROC: {avg_auc:.3f}")
326
+ print(f" Avg Cognitive accuracy: {avg_cognitive:.3f}")
327
+
328
+ # Print detailed table
329
+ print_table(all_metrics)
330
+
331
+ # Generate plot
332
+ if not args.no_plots:
333
+ plot_results(all_metrics, args.output_dir)
334
+
335
+ print("\nDone!")
336
+
337
+
338
+ if __name__ == "__main__":
339
+ main()
Code/find_duplicate_problem_body.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Export rows that share the same Problem Body but have different problem_id.
3
+
4
+ Default input:
5
+ - ../Data/Problems.csv
6
+
7
+ Default output:
8
+ - ../Data/Problems_same_body_different_problem_id.csv
9
+
10
+ The output includes all original columns plus:
11
+ - duplicate_group_id
12
+ - distinct_problem_id_count
13
+ - distinct_problem_ids
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import argparse
19
+ import csv
20
+ import html
21
+ import re
22
+ from collections import defaultdict
23
+ from pathlib import Path
24
+ from typing import Dict, List, Set
25
+
26
+ _TAG_RE = re.compile(r"<[^>]+>")
27
+ _WS_RE = re.compile(r"\s+")
28
+
29
+
30
+ def normalize_body(text: str, strip_html: bool, collapse_whitespace: bool) -> str:
31
+ """Normalize Problem Body text for grouping."""
32
+ value = html.unescape(text or "")
33
+ if strip_html:
34
+ value = _TAG_RE.sub("", value)
35
+ if collapse_whitespace:
36
+ value = _WS_RE.sub(" ", value).strip()
37
+ return value
38
+
39
+
40
+ def main() -> None:
41
+ parser = argparse.ArgumentParser(
42
+ description=(
43
+ "Find rows in Problems.csv where the same Problem Body is mapped "
44
+ "to different problem_id values."
45
+ )
46
+ )
47
+ parser.add_argument(
48
+ "--input-csv",
49
+ type=Path,
50
+ default=Path("../Data/Problems.csv"),
51
+ help="Path to Problems.csv",
52
+ )
53
+ parser.add_argument(
54
+ "--output-csv",
55
+ type=Path,
56
+ default=Path("../Results/Problems_same_body_different_problem_id.csv"),
57
+ help="Output CSV path",
58
+ )
59
+ parser.add_argument(
60
+ "--body-column",
61
+ type=str,
62
+ default="Problem Body",
63
+ help="Column name for problem statement text",
64
+ )
65
+ parser.add_argument(
66
+ "--id-column",
67
+ type=str,
68
+ default="problem_id",
69
+ help="Column name for problem identifier",
70
+ )
71
+ parser.add_argument(
72
+ "--strip-html",
73
+ action="store_true",
74
+ help="Strip HTML tags before grouping",
75
+ )
76
+ parser.add_argument(
77
+ "--collapse-whitespace",
78
+ action="store_true",
79
+ help="Collapse runs of whitespace before grouping",
80
+ )
81
+
82
+ args = parser.parse_args()
83
+
84
+ input_csv = args.input_csv.resolve()
85
+ output_csv = args.output_csv.resolve()
86
+
87
+ with input_csv.open("r", encoding="utf-8", newline="") as f:
88
+ reader = csv.DictReader(f)
89
+ fieldnames = reader.fieldnames or []
90
+
91
+ if args.body_column not in fieldnames:
92
+ raise ValueError(f"Missing body column '{args.body_column}' in {input_csv}")
93
+ if args.id_column not in fieldnames:
94
+ raise ValueError(f"Missing id column '{args.id_column}' in {input_csv}")
95
+
96
+ rows: List[dict] = list(reader)
97
+
98
+ groups: Dict[str, List[int]] = defaultdict(list)
99
+ group_ids: Dict[str, Set[str]] = defaultdict(set)
100
+
101
+ for idx, row in enumerate(rows):
102
+ body_raw = row.get(args.body_column, "")
103
+ body_key = normalize_body(
104
+ body_raw,
105
+ strip_html=args.strip_html,
106
+ collapse_whitespace=args.collapse_whitespace,
107
+ )
108
+ if not body_key:
109
+ continue
110
+
111
+ problem_id = str(row.get(args.id_column, "")).strip()
112
+ groups[body_key].append(idx)
113
+ if problem_id:
114
+ group_ids[body_key].add(problem_id)
115
+
116
+ duplicate_keys = [k for k, ids in group_ids.items() if len(ids) > 1]
117
+
118
+ # Preserve first-seen order of duplicate groups.
119
+ duplicate_keys.sort(key=lambda k: groups[k][0])
120
+
121
+ output_rows: List[dict] = []
122
+ for group_num, key in enumerate(duplicate_keys, start=1):
123
+ ids_sorted = sorted(group_ids[key])
124
+ ids_joined = ";".join(ids_sorted)
125
+
126
+ for row_idx in groups[key]:
127
+ out_row = dict(rows[row_idx])
128
+ out_row["duplicate_group_id"] = str(group_num)
129
+ out_row["distinct_problem_id_count"] = str(len(ids_sorted))
130
+ out_row["distinct_problem_ids"] = ids_joined
131
+ output_rows.append(out_row)
132
+
133
+ output_csv.parent.mkdir(parents=True, exist_ok=True)
134
+ output_fieldnames = fieldnames + [
135
+ "duplicate_group_id",
136
+ "distinct_problem_id_count",
137
+ "distinct_problem_ids",
138
+ ]
139
+
140
+ with output_csv.open("w", encoding="utf-8", newline="") as f:
141
+ writer = csv.DictWriter(f, fieldnames=output_fieldnames)
142
+ writer.writeheader()
143
+ writer.writerows(output_rows)
144
+
145
+ print(f"Input rows: {len(rows)}")
146
+ print(f"Duplicate body groups (different problem_id): {len(duplicate_keys)}")
147
+ print(f"Output rows: {len(output_rows)}")
148
+ print(f"Wrote: {output_csv}")
149
+
150
+
151
+ if __name__ == "__main__":
152
+ main()
Code/find_duplicate_problem_id.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Export rows that have duplicated problem_id values.
3
+
4
+ Default input:
5
+ - ../Data/Problems.csv
6
+
7
+ Default output:
8
+ - ../Results/Problems_duplicated_problem_id.csv
9
+
10
+ The output includes all original columns plus:
11
+ - duplicate_group_id
12
+ - duplicate_problem_id_count
13
+ - distinct_problem_body_count
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import argparse
19
+ import csv
20
+ import html
21
+ import re
22
+ from collections import defaultdict
23
+ from pathlib import Path
24
+ from typing import Dict, List, Set
25
+
26
+ _TAG_RE = re.compile(r"<[^>]+>")
27
+ _WS_RE = re.compile(r"\s+")
28
+
29
+
30
+ def normalize_body(text: str, strip_html: bool, collapse_whitespace: bool) -> str:
31
+ """Normalize Problem Body text for distinct-body counting."""
32
+ value = html.unescape(text or "")
33
+ if strip_html:
34
+ value = _TAG_RE.sub("", value)
35
+ if collapse_whitespace:
36
+ value = _WS_RE.sub(" ", value).strip()
37
+ return value
38
+
39
+
40
+ def main() -> None:
41
+ parser = argparse.ArgumentParser(
42
+ description="Find rows in Problems.csv where problem_id is duplicated."
43
+ )
44
+ parser.add_argument(
45
+ "--input-csv",
46
+ type=Path,
47
+ default=Path("../Data/Problems.csv"),
48
+ help="Path to Problems.csv",
49
+ )
50
+ parser.add_argument(
51
+ "--output-csv",
52
+ type=Path,
53
+ default=Path("../Results/Problems_duplicated_problem_id.csv"),
54
+ help="Output CSV path",
55
+ )
56
+ parser.add_argument(
57
+ "--id-column",
58
+ type=str,
59
+ default="problem_id",
60
+ help="Column name for problem identifier",
61
+ )
62
+ parser.add_argument(
63
+ "--body-column",
64
+ type=str,
65
+ default="Problem Body",
66
+ help="Column name for problem statement text",
67
+ )
68
+ parser.add_argument(
69
+ "--strip-html",
70
+ action="store_true",
71
+ help="Strip HTML tags before counting distinct problem bodies",
72
+ )
73
+ parser.add_argument(
74
+ "--collapse-whitespace",
75
+ action="store_true",
76
+ help="Collapse runs of whitespace before counting distinct problem bodies",
77
+ )
78
+
79
+ args = parser.parse_args()
80
+
81
+ input_csv = args.input_csv.resolve()
82
+ output_csv = args.output_csv.resolve()
83
+
84
+ with input_csv.open("r", encoding="utf-8", newline="") as f:
85
+ reader = csv.DictReader(f)
86
+ fieldnames = reader.fieldnames or []
87
+
88
+ if args.id_column not in fieldnames:
89
+ raise ValueError(f"Missing id column '{args.id_column}' in {input_csv}")
90
+ if args.body_column not in fieldnames:
91
+ raise ValueError(f"Missing body column '{args.body_column}' in {input_csv}")
92
+
93
+ rows: List[dict] = list(reader)
94
+
95
+ groups: Dict[str, List[int]] = defaultdict(list)
96
+ for idx, row in enumerate(rows):
97
+ problem_id = str(row.get(args.id_column, "")).strip()
98
+ if not problem_id:
99
+ continue
100
+ groups[problem_id].append(idx)
101
+
102
+ duplicate_ids = [pid for pid, row_idxs in groups.items() if len(row_idxs) > 1]
103
+
104
+ # Preserve first-seen order of duplicate groups.
105
+ duplicate_ids.sort(key=lambda pid: groups[pid][0])
106
+
107
+ output_rows: List[dict] = []
108
+ for group_num, pid in enumerate(duplicate_ids, start=1):
109
+ row_idxs = groups[pid]
110
+
111
+ distinct_bodies: Set[str] = set()
112
+ for row_idx in row_idxs:
113
+ body_raw = rows[row_idx].get(args.body_column, "")
114
+ distinct_bodies.add(
115
+ normalize_body(
116
+ body_raw,
117
+ strip_html=args.strip_html,
118
+ collapse_whitespace=args.collapse_whitespace,
119
+ )
120
+ )
121
+
122
+ for row_idx in row_idxs:
123
+ out_row = dict(rows[row_idx])
124
+ out_row["duplicate_group_id"] = str(group_num)
125
+ out_row["duplicate_problem_id_count"] = str(len(row_idxs))
126
+ out_row["distinct_problem_body_count"] = str(len(distinct_bodies))
127
+ output_rows.append(out_row)
128
+
129
+ output_csv.parent.mkdir(parents=True, exist_ok=True)
130
+ output_fieldnames = fieldnames + [
131
+ "duplicate_group_id",
132
+ "duplicate_problem_id_count",
133
+ "distinct_problem_body_count",
134
+ ]
135
+
136
+ with output_csv.open("w", encoding="utf-8", newline="") as f:
137
+ writer = csv.DictWriter(f, fieldnames=output_fieldnames)
138
+ writer.writeheader()
139
+ writer.writerows(output_rows)
140
+
141
+ print(f"Input rows: {len(rows)}")
142
+ print(f"Duplicated {args.id_column} groups: {len(duplicate_ids)}")
143
+ print(f"Output rows: {len(output_rows)}")
144
+ print(f"Wrote: {output_csv}")
145
+
146
+
147
+ if __name__ == "__main__":
148
+ main()
Code/gptoss120bvllmmcq.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Knowledge Tracing inference with GPT-OSS-120B model.
3
+
4
+ Usage:
5
+ CUDA_VISIBLE_DEVICES=0,1,2,3 python gptoss120bvllmmcq.py \
6
+ --data-dir foundationalktdataset/ \
7
+ --num-gpus 4 \
8
+ --batch-size 10 \
9
+ --cache-dir /data1/ \
10
+ --num-students 500 \
11
+ --bin-size 50 \
12
+ --min-history 50
13
+ """
14
+
15
+ from kt_inference_base import run_inference
16
+
17
+ MODEL_CONFIG = {
18
+ "model_id": "openai/gpt-oss-120b",
19
+ "gen_configs": {
20
+ "temperature": 0.7,
21
+ "top_p": 0.95,
22
+ "top_k": 20,
23
+ "max_tokens": 32768,
24
+ "repetition_penalty": 1.0,
25
+ },
26
+ "output_prefix": "gptoss120b",
27
+ "system_prompt_prefix": "Reasoning: medium\n\n",
28
+ }
29
+
30
+ if __name__ == "__main__":
31
+ run_inference(MODEL_CONFIG)
Code/kt_inference_base.py ADDED
@@ -0,0 +1,849 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Base module for Knowledge Tracing LLM inference.
3
+
4
+ This module contains all shared logic for running KT inference with different models.
5
+ Each model script imports this and provides model-specific configuration.
6
+
7
+ Usage in model scripts:
8
+ from kt_inference_base import run_inference
9
+
10
+ MODEL_CONFIG = {
11
+ "model_id": "model/name",
12
+ "gen_configs": {...},
13
+ "output_prefix": "prefix",
14
+ "system_prompt_prefix": "", # e.g., "Reasoning: medium\n\n"
15
+ }
16
+
17
+ if __name__ == "__main__":
18
+ run_inference(MODEL_CONFIG)
19
+ """
20
+
21
+ import argparse
22
+ import contextlib
23
+ import os
24
+ from vllm import LLM, SamplingParams
25
+ import pandas as pd
26
+ import gc
27
+ import torch
28
+ from vllm.distributed.parallel_state import (
29
+ destroy_model_parallel,
30
+ destroy_distributed_environment,
31
+ )
32
+ import json
33
+ import re
34
+ import numpy as np
35
+ from tqdm import tqdm
36
+ from multiprocessing import Pool, cpu_count
37
+ from clean_utils import clean_problem_body
38
+ from cleantext import clean_text as clean_text_legacy
39
+
40
+
41
+ class NumpyEncoder(json.JSONEncoder):
42
+ """Custom JSON encoder that handles numpy types."""
43
+ def default(self, obj):
44
+ if isinstance(obj, np.integer):
45
+ return int(obj)
46
+ if isinstance(obj, np.floating):
47
+ return float(obj)
48
+ if isinstance(obj, np.ndarray):
49
+ return obj.tolist()
50
+ return super().default(obj)
51
+
52
+
53
+ # Batch processing config defaults
54
+ DEFAULT_BATCH_SIZE = 10000
55
+ DEFAULT_NUM_STUDENTS = 500
56
+ DEFAULT_BIN_SIZE = 50
57
+ DEFAULT_MIN_HISTORY = 50
58
+
59
+ # Input file names
60
+ STUDENT_FILE = "Interactions.csv"
61
+ PROBLEMS_FILE = "Problems.csv"
62
+ SKILL_FILE = "Skills.csv"
63
+
64
+ # Base system prompt (without any prefix like "Reasoning: medium")
65
+ BASE_SYSTEM_PROMPT = """You are a reasoning model trained to simulate a student's evolving knowledge and response behavior in mathematics.
66
+
67
+ Your goal is to infer, from past problem–answer pairs, how this same student is likely to perform on a new problem — at multiple levels of granularity.
68
+
69
+ You must reason about the student's learning progression, skill mastery, and recurring misconceptions, then produce structured predictions for the new item.
70
+
71
+ ---
72
+
73
+ Your Task:
74
+
75
+ Generate three coordinated predictions for this student:
76
+
77
+ 1) **Skill-level knowledge tracing (0 or 1):** Whether the student has mastered the underlying skill involved in the new problem.
78
+ 2) **Question-level knowledge tracing (0 or 1):** Whether the student will answer this specific problem correctly.
79
+ 3) **Cognitive-level prediction (string):** The exact answer text or option the student would most likely produce, written in their own response style.
80
+
81
+ ---
82
+
83
+ Reasoning Guidelines:
84
+
85
+ - Use the student's historical data (problems, answers, hints, timestamps) to infer learning and forgetting patterns.
86
+ - Consider recency and exposure: later timestamps often indicate updated knowledge.
87
+ - Treat `UsedHint=True` or `SawAnswer=True` as evidence that the student's recorded answer may not reflect true mastery — they might have seen or been helped toward the solution.
88
+ - Attend to how the student's accuracy, style, and misconceptions evolve over time.
89
+ - You may think step-by-step internally, but your final output must follow the format below.
90
+ ---
91
+
92
+ Output Format:
93
+
94
+ When you are done reasoning, **finish your response with** the JSON object in this exact structure:
95
+
96
+ For Multiple Choice (select 1) problems:
97
+ {
98
+ "skill_level": 0 or 1,
99
+ "question_level": 0 or 1,
100
+ "student_answer": "A" (single letter only)
101
+ }
102
+
103
+ For Multiple Choice (select all) problems:
104
+ {
105
+ "skill_level": 0 or 1,
106
+ "question_level": 0 or 1,
107
+ "student_answer": "A, C" (comma-separated letters if multiple selections)
108
+ }
109
+
110
+ For Fill-in problems:
111
+ {
112
+ "skill_level": 0 or 1,
113
+ "question_level": 0 or 1,
114
+ "student_answer": "<string exactly as this student would write (e.g., 'x=3', '3/5', '12')>"
115
+ }
116
+
117
+ Predictions must be consistent. If you predict question_level to be 1, then student_answer must match the correct answer. If you predict question_level to be 0, student_answer must not match the correct answer."""
118
+
119
+
120
+ def parse_args(default_output_jsonl):
121
+ """Parse command line arguments."""
122
+ parser = argparse.ArgumentParser(description="Knowledge Tracing with LLM")
123
+ parser.add_argument(
124
+ "--batch-size", "-b",
125
+ type=int,
126
+ default=DEFAULT_BATCH_SIZE,
127
+ help=f"Batch size for LLM inference (default: {DEFAULT_BATCH_SIZE})"
128
+ )
129
+ parser.add_argument(
130
+ "--output", "-o",
131
+ type=str,
132
+ default=None,
133
+ help="Output JSONL file path (overrides auto-generated name)"
134
+ )
135
+ parser.add_argument(
136
+ "--output-dir",
137
+ type=str,
138
+ default=".",
139
+ help="Output directory for results (default: current directory)"
140
+ )
141
+ parser.add_argument(
142
+ "--data-dir", "-d",
143
+ type=str,
144
+ default=".",
145
+ help="Directory containing input CSV files (default: current directory)"
146
+ )
147
+ parser.add_argument(
148
+ "--cache-dir", "-c",
149
+ type=str,
150
+ default=None,
151
+ help="Directory for vLLM model cache (default: vLLM default)"
152
+ )
153
+ parser.add_argument(
154
+ "--num-students", "-n",
155
+ type=int,
156
+ default=DEFAULT_NUM_STUDENTS,
157
+ help=f"Number of students to sample (default: {DEFAULT_NUM_STUDENTS}, use 0 or -1 for all students)"
158
+ )
159
+ parser.add_argument(
160
+ "--bin-size",
161
+ type=int,
162
+ default=DEFAULT_BIN_SIZE,
163
+ help=f"Size of each prediction bin (default: {DEFAULT_BIN_SIZE})"
164
+ )
165
+ parser.add_argument(
166
+ "--min-history",
167
+ type=int,
168
+ default=DEFAULT_MIN_HISTORY,
169
+ help=f"Minimum history size before making predictions (default: {DEFAULT_MIN_HISTORY})"
170
+ )
171
+ parser.add_argument(
172
+ "--num-gpus",
173
+ type=int,
174
+ default=1,
175
+ help="Number of GPUs for tensor parallelism (default: 1)"
176
+ )
177
+ parser.add_argument(
178
+ "--max-num-seqs",
179
+ type=int,
180
+ default=None,
181
+ help="Maximum number of sequences to process in a batch (vLLM, default: 256)"
182
+ )
183
+ parser.add_argument(
184
+ "--reasoning-level",
185
+ type=str,
186
+ choices=["none", "low", "medium", "high"],
187
+ default=None,
188
+ help="Reasoning level for GPT-OSS models only. Default: uses model config (medium for GPT-OSS, none for Qwen)"
189
+ )
190
+ parser.add_argument(
191
+ "--max-model-len",
192
+ type=int,
193
+ default=None,
194
+ help="Maximum sequence length in tokens (vLLM, default: model's context length)"
195
+ )
196
+ parser.add_argument(
197
+ "--gpu-memory-utilization",
198
+ type=float,
199
+ default=0.9,
200
+ help="Fraction of GPU memory to use (vLLM, default: 0.9, range: 0.0-1.0)"
201
+ )
202
+ parser.add_argument(
203
+ "--legacy-clean",
204
+ action="store_true",
205
+ default=False,
206
+ help="Use legacy text cleaner (cleantext.py) instead of clean_utils.py"
207
+ )
208
+ return parser.parse_args()
209
+
210
+
211
+ def label_answer_options(answer_string):
212
+ """
213
+ Convert pipe-delimited answers to lettered format.
214
+ Input: "Han is correct || Elena is correct || Both are correct"
215
+ Output: {"A": "Han is correct", "B": "Elena is correct", "C": "Both are correct"}
216
+ """
217
+ if pd.isna(answer_string) or answer_string == '':
218
+ return None
219
+
220
+ options = [opt.strip() for opt in answer_string.split('||')]
221
+ letters = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']
222
+ return {letters[i]: opt for i, opt in enumerate(options) if i < len(letters)}
223
+
224
+
225
+ def clean_html_and_normalize(text):
226
+ """
227
+ Remove HTML tags and normalize text for comparison.
228
+ """
229
+ if pd.isna(text):
230
+ return ""
231
+ # Remove HTML tags
232
+ text = re.sub(r'<[^>]+>', '', str(text))
233
+ # Normalize whitespace
234
+ text = ' '.join(text.split())
235
+ # Remove extra spaces around colons
236
+ text = re.sub(r'\s*:\s*', ':', text)
237
+ return text.strip()
238
+
239
+
240
+ def match_student_answer_to_letters(student_answer_text, answer_options_dict):
241
+ """
242
+ Match student's comma-delineated answers to letter options.
243
+
244
+ Args:
245
+ student_answer_text: String like "Answer A text , Answer C text , Answer B text"
246
+ answer_options_dict: Dict like {"A": "Answer A text", "B": "Answer B text", ...}
247
+
248
+ Returns:
249
+ String like "A, B, C" or original text if no match
250
+ """
251
+ if pd.isna(student_answer_text) or not answer_options_dict:
252
+ return student_answer_text
253
+
254
+ # Split by " , " (comma with spaces, which is the delimiter used in the actual_answer)
255
+ student_answers = [ans.strip() for ans in str(student_answer_text).split(' , ')]
256
+
257
+ # Clean and normalize all options for comparison
258
+ normalized_options = {
259
+ letter: clean_html_and_normalize(text)
260
+ for letter, text in answer_options_dict.items()
261
+ }
262
+
263
+ matched_letters = []
264
+ for student_ans in student_answers:
265
+ normalized_student = clean_html_and_normalize(student_ans)
266
+
267
+ # Try to find exact match first
268
+ for letter, normalized_option in normalized_options.items():
269
+ if normalized_student == normalized_option:
270
+ matched_letters.append(letter)
271
+ break
272
+ else:
273
+ # If no exact match, try substring match (student answer contained in option or vice versa)
274
+ for letter, normalized_option in normalized_options.items():
275
+ if (normalized_student in normalized_option or
276
+ normalized_option in normalized_student):
277
+ matched_letters.append(letter)
278
+ break
279
+
280
+ # Return comma-separated letters if we found matches, otherwise return original
281
+ if matched_letters:
282
+ return ', '.join(sorted(set(matched_letters))) # Remove duplicates and sort
283
+ return student_answer_text
284
+
285
+
286
+ def get_correct_option_letters(answer_options, correct_answers):
287
+ """
288
+ Determine which letter(s) correspond to correct answer(s).
289
+
290
+ Args:
291
+ answer_options: Dict like {"A": "Han is correct", "B": "Elena is correct", ...}
292
+ correct_answers: String like "Both are correct" or "Han is correct || Elena is correct"
293
+
294
+ Returns:
295
+ String like "C" or "A, B" depending on how many correct options
296
+ """
297
+ if not answer_options or pd.isna(correct_answers):
298
+ return correct_answers
299
+
300
+ # Split correct answers if multiple
301
+ correct_list = [ans.strip() for ans in correct_answers.split('||')]
302
+
303
+ # Find matching letters
304
+ correct_letters = []
305
+ for letter, text in answer_options.items():
306
+ if text in correct_list:
307
+ correct_letters.append(letter)
308
+
309
+ return ', '.join(sorted(correct_letters)) if correct_letters else correct_answers
310
+
311
+
312
+ def format_answer_options_for_prompt(answer_options):
313
+ """
314
+ Format answer options dictionary for display in prompt.
315
+ Input: {"A": "Han is correct", "B": "Elena is correct", ...}
316
+ Output: "A) Han is correct\nB) Elena is correct\n..."
317
+ """
318
+ if not answer_options:
319
+ return None
320
+
321
+ return '\n'.join([f"{letter}) {text}" for letter, text in answer_options.items()])
322
+
323
+
324
+ def create_user_prompt(student_history, new_problem, problem_df):
325
+ """
326
+ Creates a user prompt with student history and new problem.
327
+
328
+ Args:
329
+ student_history: List of dicts with keys: problem_id, timestamp, problem_text,
330
+ correct_answer, student_answer, used_hint, saw_answer
331
+ new_problem: Dict with keys: problem_text, correct_answer, used_hint, saw_answer,
332
+ answer_options (optional)
333
+ """
334
+ prompt = "Task Description:\n\n"
335
+ prompt += "Your task is to model a single student's learning process and predict how they will respond to a new mathematics problem based on their prior work.\n\n"
336
+
337
+ prompt += """You will produce three coordinated predictions:
338
+
339
+ 1) **Skill-level knowledge tracing (0 or 1):** Predict whether this student has mastered the underlying skill involved in the new problem.
340
+ 2) **Question-level knowledge tracing (0 or 1):** Predict whether this student will answer this specific problem correctly.
341
+ 3) **Cognitive-level prediction (string):** Generate the exact answer the student would most likely produce.
342
+ - For Multiple Choice (select 1): Predict a single letter (e.g., "A" or "B")
343
+ - For Multiple Choice (select all): Predict comma-separated letters (e.g., "A, C" or "B, D")
344
+ - For Fill-in problems: Predict the exact text the student would write
345
+ """
346
+
347
+ prompt += """---
348
+
349
+ Provided Data:
350
+
351
+ You will receive:
352
+ - ProblemID: <id>
353
+ - Timestamp: <timestamp>
354
+ - Problem: <problem text>
355
+ - Problem Type: Multiple Choice (select 1) / Multiple Choice (select all) / Fill-in Problem
356
+ - Options: Answer choices in format "A) ...\nB) ...\nC) ..."
357
+ - Correct Answer(s): The letter(s) or text of correct answer(s)
358
+ - Student's First Answer: Letter(s) or fill-in text
359
+ - UsedHint: <True/False>
360
+ - SawAnswer: <True/False>
361
+ - Skill: <skill_name_or_id>
362
+ - A new problem (with optional answer choices), skill metadata, and context flags (`UsedHint`, `SawAnswer`).
363
+
364
+ # About the context flags:
365
+ - **UsedHint = True** → The student viewed or used a hint while solving this problem.
366
+ - **SawAnswer = True** → The student saw the correct answer before or during the attempt.
367
+ When either of these flags is True, treat the corresponding response as *less reliable evidence of mastery* — it indicates that the student has not fully learned the concept and required help solving the problem.
368
+ """
369
+
370
+ prompt += "**Student's Previous Problems:**\n\n"
371
+ for item in student_history:
372
+ prompt += f"Timestamp: {item['timestamp']}\n"
373
+ prompt += f"Problem: {item['problem_text']}\n"
374
+ prompt += f"Problem Type: {item['problem_type']}\n"
375
+ if item.get('answer_options_formatted'):
376
+ prompt += f"Options:\n{item['answer_options_formatted']}\n"
377
+ prompt += f"Correct Answer: {item['correct_answer']}\n"
378
+ prompt += f"Student's First Answer: {item['student_answer']}\n"
379
+ prompt += f"UsedHint: {item['used_hint']}\n"
380
+ prompt += f"SawAnswer: {item['saw_answer']}\n"
381
+ if item.get('node_name'):
382
+ prompt += f"Skill: {item['node_name']}\n"
383
+ else:
384
+ prompt += f"Skill: Undefined\n"
385
+ prompt += "---\n\n"
386
+
387
+ prompt += "**New Problem to Predict:**\n\n"
388
+ prompt += f"Timestamp: {new_problem['timestamp']}\n"
389
+ prompt += f"Problem: {new_problem['problem_text']}\n"
390
+ prompt += f"Problem Type: {new_problem['problem_type']}\n"
391
+ if new_problem.get('answer_options_formatted'):
392
+ prompt += f"Answer Options:\n{new_problem['answer_options_formatted']}\n"
393
+ prompt += f"Correct Answer: {new_problem['correct_answer']}\n"
394
+ if new_problem.get('node_name'):
395
+ prompt += f"Skill: {new_problem['node_name']}\n"
396
+ else:
397
+ prompt += f"Skill: Undefined\n"
398
+
399
+ return prompt
400
+
401
+
402
+ def extract_json_prediction(response_text):
403
+ """Extract the final JSON prediction from the model's response."""
404
+ # Find all JSON objects in the response
405
+ json_matches = re.findall(r'\{[\s\S]*?\}', response_text)
406
+
407
+ if json_matches:
408
+ # Take the last JSON object
409
+ json_str = json_matches[-1]
410
+ try:
411
+ # Decode escape sequences (like \n) before parsing
412
+ json_str = json_str.encode().decode('unicode_escape')
413
+ json_str = json_str.strip()
414
+ return json.loads(json_str)
415
+ except json.JSONDecodeError as e:
416
+ print(f"JSON decode error: {e}")
417
+ print(f"Attempted to parse:\n{json_str}")
418
+ except Exception as e:
419
+ print(f"Error processing JSON: {e}")
420
+ return None
421
+
422
+
423
+ def get_prediction_id(meta):
424
+ """Generate unique ID for a prediction"""
425
+ return f"{meta['user_id']}_{meta['bin_number']}_{meta['prediction_type']}"
426
+
427
+
428
+ def load_completed_predictions(output_jsonl):
429
+ """Load already-completed prediction IDs from JSONL file"""
430
+ completed = set()
431
+ if os.path.exists(output_jsonl):
432
+ with open(output_jsonl, 'r') as f:
433
+ for line in f:
434
+ if line.strip():
435
+ result = json.loads(line)
436
+ completed.add(result['prediction_id'])
437
+ print(f"Loaded {len(completed)} completed predictions from {output_jsonl}")
438
+ return completed
439
+
440
+
441
+ def make_process_single_user(system_prompt):
442
+ """Create a process_single_user function with the given system prompt."""
443
+ def process_single_user(args):
444
+ """Process a single user's data and return prompts and metadata."""
445
+ user_id, user_records, min_history, bin_size = args
446
+
447
+ prompts = []
448
+ metadata = []
449
+
450
+ # Check if user has at least min_history + 1 rows
451
+ if len(user_records) < min_history + 1:
452
+ return prompts, metadata
453
+
454
+ num_bins = (len(user_records) - min_history) // bin_size
455
+
456
+ # Build initial history
457
+ student_history = []
458
+ for hist_idx in range(min_history):
459
+ row = user_records[hist_idx]
460
+ student_history.append({
461
+ 'problem_id': row['problem_id'],
462
+ 'timestamp': row['end_time'],
463
+ 'problem_text': row['cleaned body'],
464
+ 'correct_answer': row['Fill-in Answers'],
465
+ 'answer_options': row['answer_options'] if pd.notna(row['answer_options']) else None,
466
+ 'answer_options_formatted': row['answer_options_formatted'] if pd.notna(row.get('answer_options_formatted')) else None,
467
+ 'student_answer': row['answer_text'],
468
+ 'used_hint': row['hint_count'] > 0,
469
+ 'saw_answer': row['saw_answer'],
470
+ 'problem_type': row['Problem Type'],
471
+ 'node_name': row.get('node_name')
472
+ })
473
+
474
+ for bin_idx in range(num_bins):
475
+ # Extend history with previous bin's items
476
+ if bin_idx > 0:
477
+ prev_bin_start = min_history + ((bin_idx - 1) * bin_size)
478
+ prev_bin_end = min_history + (bin_idx * bin_size)
479
+ for hist_idx in range(prev_bin_start, prev_bin_end):
480
+ row = user_records[hist_idx]
481
+ student_history.append({
482
+ 'problem_id': row['problem_id'],
483
+ 'timestamp': row['end_time'],
484
+ 'problem_text': row['cleaned body'],
485
+ 'correct_answer': row['Fill-in Answers'],
486
+ 'answer_options': row['answer_options'] if pd.notna(row['answer_options']) else None,
487
+ 'answer_options_formatted': row['answer_options_formatted'] if pd.notna(row.get('answer_options_formatted')) else None,
488
+ 'student_answer': row['answer_text'],
489
+ 'used_hint': row['hint_count'] > 0,
490
+ 'saw_answer': row['saw_answer'],
491
+ 'problem_type': row['Problem Type'],
492
+ 'node_name': row.get('node_name')
493
+ })
494
+
495
+ history_end = min_history + (bin_idx * bin_size)
496
+ bin_start = history_end
497
+ bin_end = bin_start + bin_size
498
+ current_bin = user_records[bin_start:bin_end]
499
+
500
+ # Find first correct and first incorrect in this bin
501
+ first_correct_idx = None
502
+ first_incorrect_idx = None
503
+
504
+ for idx, row in enumerate(current_bin):
505
+ actual_idx = bin_start + idx
506
+ score = row['discrete_score']
507
+
508
+ if score == 1 and first_correct_idx is None:
509
+ first_correct_idx = actual_idx
510
+ if score == 0 and first_incorrect_idx is None:
511
+ first_incorrect_idx = actual_idx
512
+
513
+ if first_correct_idx is not None and first_incorrect_idx is not None:
514
+ break
515
+
516
+ # Create predictions for found cases
517
+ for target_idx, prediction_type in [
518
+ (first_correct_idx, 'correct'),
519
+ (first_incorrect_idx, 'incorrect')
520
+ ]:
521
+ if target_idx is None:
522
+ continue
523
+
524
+ target_row = user_records[target_idx]
525
+ new_problem = {
526
+ 'problem_text': target_row['cleaned body'],
527
+ 'correct_answer': target_row['Fill-in Answers'],
528
+ 'answer_options': target_row['answer_options'] if pd.notna(target_row['answer_options']) else None,
529
+ 'answer_options_formatted': target_row['answer_options_formatted'] if pd.notna(target_row.get('answer_options_formatted')) else None,
530
+ 'problem_type': target_row['Problem Type'],
531
+ 'timestamp': target_row['end_time'],
532
+ 'node_name': target_row.get('node_name')
533
+ }
534
+
535
+ user_prompt = create_user_prompt(student_history, new_problem, None)
536
+ full_prompt = system_prompt + "\n\n" + user_prompt
537
+
538
+ prompts.append(full_prompt)
539
+ metadata.append({
540
+ 'prediction_id': f"{user_id}_{bin_idx}_{prediction_type}",
541
+ 'row_index': target_idx,
542
+ 'user_id': user_id,
543
+ 'history_size': len(student_history),
544
+ 'bin_number': bin_idx,
545
+ 'prediction_type': prediction_type,
546
+ 'id': target_row.get('id_x', None),
547
+ 'problem_id': target_row.get('problem_id', None),
548
+ 'problem_type': target_row['Problem Type'],
549
+ 'actual_answer': target_row['answer_text'],
550
+ 'correct_answer': target_row['Fill-in Answers'],
551
+ 'actual_score': target_row['discrete_score'],
552
+ 'prompt': full_prompt
553
+ })
554
+
555
+ return prompts, metadata
556
+
557
+ return process_single_user
558
+
559
+
560
+ def append_results_jsonl(results, output_jsonl):
561
+ """Append batch results to JSONL file"""
562
+ with open(output_jsonl, 'a') as f:
563
+ for result in results:
564
+ f.write(json.dumps(result, cls=NumpyEncoder) + '\n')
565
+
566
+
567
+ def process_batch(batch_metadata, batch_response_texts):
568
+ """Process a batch of responses and return results."""
569
+ batch_results = []
570
+
571
+ for metadata, response_text in zip(batch_metadata, batch_response_texts):
572
+ # Extract prediction
573
+ prediction = extract_json_prediction(response_text)
574
+
575
+ if prediction:
576
+ batch_results.append({
577
+ **metadata,
578
+ 'predicted_skill_level': prediction.get('skill_level'),
579
+ 'predicted_question_level': prediction.get('question_level'),
580
+ 'predicted_student_answer': prediction.get('student_answer'),
581
+ 'full_response': response_text
582
+ })
583
+ else:
584
+ batch_results.append({
585
+ **metadata,
586
+ 'predicted_skill_level': None,
587
+ 'predicted_question_level': None,
588
+ 'predicted_student_answer': None,
589
+ 'full_response': response_text
590
+ })
591
+
592
+ return batch_results
593
+
594
+
595
+ # Global variable to hold process_single_user function for multiprocessing
596
+ _process_single_user_func = None
597
+
598
+
599
+ def _process_single_user_wrapper(args):
600
+ """Wrapper for multiprocessing that uses the global function."""
601
+ return _process_single_user_func(args)
602
+
603
+
604
+ def run_inference(config):
605
+ """
606
+ Main inference function that runs KT prediction with the given model config.
607
+
608
+ Args:
609
+ config: Dict with keys:
610
+ - model_id: HuggingFace model ID
611
+ - gen_configs: Dict of generation parameters
612
+ - output_prefix: Prefix for output filename
613
+ - system_prompt_prefix: Optional prefix for system prompt (e.g., "Reasoning: medium\n\n")
614
+ """
615
+ global _process_single_user_func
616
+
617
+ model_id = config["model_id"]
618
+ gen_configs = config["gen_configs"]
619
+ output_prefix = config["output_prefix"]
620
+
621
+ # Parse arguments first (needed for reasoning level)
622
+ default_output_jsonl = f"{output_prefix}.jsonl"
623
+ args = parse_args(default_output_jsonl)
624
+
625
+ # Determine system prompt prefix
626
+ # CLI --reasoning-level overrides model config if provided
627
+ if args.reasoning_level is not None:
628
+ if args.reasoning_level == "none":
629
+ system_prompt_prefix = ""
630
+ else:
631
+ system_prompt_prefix = f"Reasoning: {args.reasoning_level}\n\n"
632
+ else:
633
+ system_prompt_prefix = config.get("system_prompt_prefix", "")
634
+
635
+ # Build full system prompt
636
+ system_prompt = system_prompt_prefix + BASE_SYSTEM_PROMPT
637
+
638
+ # Create the process_single_user function with this system prompt
639
+ _process_single_user_func = make_process_single_user(system_prompt)
640
+
641
+ batch_size = args.batch_size
642
+ data_dir = args.data_dir
643
+ cache_dir = args.cache_dir
644
+ num_students = args.num_students
645
+ bin_size = args.bin_size
646
+ min_history = args.min_history
647
+
648
+ # Generate output filename with params
649
+ n_str = "all" if num_students <= 0 else str(num_students)
650
+ params_suffix = f"_n{n_str}_bin{bin_size}_hist{min_history}"
651
+
652
+ if args.output:
653
+ # Use explicit output path
654
+ output_jsonl = args.output
655
+ else:
656
+ # Auto-generate filename in output directory
657
+ filename = f"{output_prefix}{params_suffix}.jsonl"
658
+ output_jsonl = os.path.join(args.output_dir, filename)
659
+
660
+ # Build input file paths
661
+ student_csv = os.path.join(data_dir, STUDENT_FILE)
662
+ problems_csv = os.path.join(data_dir, PROBLEMS_FILE)
663
+ skill_csv = os.path.join(data_dir, SKILL_FILE)
664
+
665
+ print(f"Model: {model_id}")
666
+ print(f"Data directory: {data_dir}")
667
+ print(f"Batch size: {batch_size}")
668
+ print(f"Output JSONL: {output_jsonl}")
669
+ print(f"Num students: {num_students if num_students > 0 else 'all'}")
670
+ print(f"Bin size: {bin_size}")
671
+ print(f"Min history: {min_history}")
672
+ if cache_dir:
673
+ print(f"Model cache: {cache_dir}")
674
+ print(f"Text cleaner: {'legacy (cleantext.py)' if args.legacy_clean else 'default (clean_utils.py)'}")
675
+
676
+ # Load the data
677
+ print("\nLoading data...")
678
+ student_df = pd.read_csv(student_csv)
679
+ student_df = student_df.sort_values(['user_id', 'id']).reset_index(drop=True)
680
+ problems_df = pd.read_csv(problems_csv)
681
+ clean_func = clean_text_legacy if args.legacy_clean else clean_problem_body
682
+ problems_df['cleaned body'] = problems_df['Problem Body'].apply(clean_func)
683
+
684
+ # Label answer options for multiple-choice items
685
+ problems_df['answer_options'] = problems_df['Multiple Choice Options'].apply(label_answer_options)
686
+
687
+ # Get correct answer letters for multiple-choice, keep original for fill-in
688
+ problems_df['correct_answers'] = problems_df.apply(
689
+ lambda row: get_correct_option_letters(row['answer_options'], row['Multiple Choice Answers'])
690
+ if row['Problem Type'] in ['Multiple Choice (select 1)', 'Multiple Choice (select all)']
691
+ else row['Fill-in Answers'],
692
+ axis=1
693
+ )
694
+
695
+ skill_df = pd.read_csv(skill_csv)
696
+ problems_df = pd.merge(problems_df, skill_df, on='problem_id', how='left')
697
+
698
+ # Pre-compute formatted answer options once per problem
699
+ problems_df['answer_options_formatted'] = problems_df['answer_options'].apply(
700
+ lambda x: format_answer_options_for_prompt(x) if pd.notna(x) else None
701
+ )
702
+
703
+ # Sort student data by id (chronological order)
704
+ student_df = student_df.sort_values('id').reset_index(drop=True)
705
+
706
+ # Merge with problems data
707
+ merged_df = student_df.merge(problems_df, on='problem_id', how='inner')
708
+
709
+ # Convert student answers to letter format for multiple-choice problems
710
+ merged_df['answer_text'] = merged_df.apply(
711
+ lambda row: match_student_answer_to_letters(row['answer_text'], row['answer_options'])
712
+ if row['Problem Type'] in ['Multiple Choice (select 1)', 'Multiple Choice (select all)'] and pd.notna(row['answer_options'])
713
+ else row['answer_text'],
714
+ axis=1
715
+ )
716
+
717
+ # Select users (all or random sample)
718
+ all_users = merged_df['user_id'].unique()
719
+ if num_students <= 0:
720
+ # Use all students
721
+ selected_users = all_users
722
+ print(f"\nUsing all {len(all_users)} users")
723
+ else:
724
+ # Random sample
725
+ np.random.seed(42) # For reproducibility
726
+ selected_users = np.random.choice(all_users, size=min(num_students, len(all_users)), replace=False)
727
+ merged_df = merged_df[merged_df['user_id'].isin(selected_users)]
728
+ print(f"\nSelected {len(selected_users)} random users from {len(all_users)} total users")
729
+ print(f"Filtered data: {len(merged_df)} rows")
730
+
731
+ # Prepare data for batch processing
732
+ print("\nPreparing prompts in parallel...")
733
+
734
+ # Prepare user groups for parallel processing
735
+ print("Grouping user data...")
736
+ user_groups = [
737
+ (user_id, user_df.to_dict('records'), min_history, bin_size)
738
+ for user_id, user_df in merged_df.groupby('user_id')
739
+ ]
740
+ print(f"Processing {len(user_groups)} users with {cpu_count()} CPU cores...")
741
+
742
+ # Process users in parallel
743
+ all_prompts = []
744
+ all_metadata = []
745
+
746
+ with Pool(processes=cpu_count()) as pool:
747
+ results = list(tqdm(
748
+ pool.imap(_process_single_user_wrapper, user_groups),
749
+ total=len(user_groups),
750
+ desc="Preparing prompts"
751
+ ))
752
+
753
+ # Merge results
754
+ for prompts, metadata in results:
755
+ all_prompts.extend(prompts)
756
+ all_metadata.extend(metadata)
757
+
758
+ print(f"\nTotal predictions to make: {len(all_prompts)}")
759
+
760
+ # Filter out already-completed predictions (resume support)
761
+ completed_ids = load_completed_predictions(output_jsonl)
762
+ remaining = [(p, m) for p, m in zip(all_prompts, all_metadata)
763
+ if m['prediction_id'] not in completed_ids]
764
+
765
+ if not remaining:
766
+ print("All predictions already completed!")
767
+ return
768
+
769
+ all_prompts, all_metadata = zip(*remaining)
770
+ all_prompts = list(all_prompts)
771
+ all_metadata = list(all_metadata)
772
+
773
+ print(f"Already completed: {len(completed_ids)}")
774
+ print(f"Remaining to process: {len(all_prompts)}")
775
+ print(f"Processing in batches of {batch_size}")
776
+
777
+ # Initialize vLLM engine
778
+ print("\nInitializing vLLM engine...")
779
+ sampling_params = SamplingParams(**gen_configs)
780
+ llm_kwargs = {
781
+ "model": model_id,
782
+ "tensor_parallel_size": args.num_gpus,
783
+ "trust_remote_code": True,
784
+ "gpu_memory_utilization": args.gpu_memory_utilization,
785
+ "enable_prefix_caching": True,
786
+ }
787
+ if args.max_num_seqs is not None:
788
+ llm_kwargs["max_num_seqs"] = args.max_num_seqs
789
+ if args.max_model_len is not None:
790
+ llm_kwargs["max_model_len"] = args.max_model_len
791
+ if cache_dir:
792
+ llm_kwargs["download_dir"] = cache_dir
793
+ llm = LLM(**llm_kwargs)
794
+
795
+ # Process in batches
796
+ results = []
797
+ num_batches = (len(all_prompts) + batch_size - 1) // batch_size
798
+
799
+ for batch_idx in range(num_batches):
800
+ batch_start = batch_idx * batch_size
801
+ batch_end = min(batch_start + batch_size, len(all_prompts))
802
+
803
+ batch_prompts = all_prompts[batch_start:batch_end]
804
+ batch_metadata = all_metadata[batch_start:batch_end]
805
+
806
+ print(f"\n{'='*80}")
807
+ print(f"Processing batch {batch_idx + 1}/{num_batches}")
808
+ print(f"Items: {batch_start} to {batch_end} ({len(batch_prompts)} prompts)")
809
+ print(f"{'='*80}")
810
+
811
+ # Generate predictions for this batch
812
+ try:
813
+ outputs = llm.generate(batch_prompts, sampling_params)
814
+ response_texts = [o.outputs[0].text.strip() for o in outputs]
815
+
816
+ # Process results for this batch
817
+ batch_results = process_batch(batch_metadata, response_texts)
818
+ results.extend(batch_results)
819
+
820
+ print(f"Successfully processed batch {batch_idx + 1}")
821
+ print(f"Total results so far: {len(results)}")
822
+
823
+ # Append results immediately after each batch
824
+ append_results_jsonl(batch_results, output_jsonl)
825
+ print(f"Saved {len(batch_results)} results to {output_jsonl}")
826
+
827
+ except Exception as e:
828
+ print(f"\nERROR processing batch {batch_idx + 1}: {str(e)}")
829
+ print(f"Progress saved in {output_jsonl} - restart to resume")
830
+ raise
831
+
832
+ print(f"\n{'='*80}")
833
+ print("All batches processed successfully!")
834
+ print(f"{'='*80}")
835
+ print(f"\nAll results saved to {output_jsonl}")
836
+ print(f"Total predictions processed: {len(results)}")
837
+
838
+ # Cleanup
839
+ print("\nCleaning up...")
840
+ destroy_model_parallel()
841
+ destroy_distributed_environment()
842
+ del llm
843
+ with contextlib.suppress(AssertionError):
844
+ torch.distributed.destroy_process_group()
845
+ gc.collect()
846
+ torch.cuda.empty_cache()
847
+
848
+ print("\nDone!")
849
+ exit(0)
Code/llama33_70b_instruct_vllm.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Knowledge Tracing inference with Llama-3.3-70B-Instruct model.
3
+
4
+ Usage:
5
+ CUDA_VISIBLE_DEVICES=0,1,2,3 python llama33_70b_instruct_vllm.py \
6
+ --data-dir foundationalktdataset/ \
7
+ --num-gpus 4 \
8
+ --batch-size 10 \
9
+ --cache-dir /data1/ \
10
+ --num-students 500 \
11
+ --bin-size 50 \
12
+ --min-history 50
13
+ """
14
+
15
+ from kt_inference_base import run_inference
16
+
17
+ MODEL_CONFIG = {
18
+ "model_id": "meta-llama/Llama-3.3-70B-Instruct",
19
+ "gen_configs": {
20
+ "temperature": 0.7,
21
+ "top_p": 0.9,
22
+ "max_tokens": 32768,
23
+ "repetition_penalty": 1.0,
24
+ },
25
+ "output_prefix": "llama33_70b_instruct",
26
+ "system_prompt_prefix": "", # No prefix - standard instruct model
27
+ }
28
+
29
+ if __name__ == "__main__":
30
+ run_inference(MODEL_CONFIG)
Code/plot_student_attempt_distribution.py ADDED
@@ -0,0 +1,357 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Plot distribution of student attempts over elapsed time.
3
+
4
+ This script reads FoundationalASSIST `Interactions.csv`, computes elapsed time
5
+ for each attempt from the student's first attempt, groups attempts into fixed
6
+ time bins, and plots the resulting column distribution.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import argparse
12
+ import math
13
+ from pathlib import Path
14
+
15
+ import matplotlib.pyplot as plt
16
+ import pandas as pd
17
+ from matplotlib.ticker import FuncFormatter, MaxNLocator
18
+
19
+
20
+ DEFAULT_INTERACTIONS_PATH = (
21
+ Path(__file__).resolve().parent.parent / "Data" / "Interactions.csv"
22
+ )
23
+ DEFAULT_OUTPUT_PLOT = (
24
+ Path(__file__).resolve().parent.parent
25
+ / "Results"
26
+ / "student_attempt_distribution.png"
27
+ )
28
+ DEFAULT_OUTPUT_COUNTS = (
29
+ Path(__file__).resolve().parent.parent
30
+ / "Results"
31
+ / "student_attempt_distribution_counts.csv"
32
+ )
33
+
34
+
35
+ def parse_args() -> argparse.Namespace:
36
+ parser = argparse.ArgumentParser(
37
+ description=(
38
+ "Compute distribution of attempts over elapsed time from "
39
+ "Interactions.csv and plot binned columns."
40
+ )
41
+ )
42
+ parser.add_argument(
43
+ "--interactions-path",
44
+ type=Path,
45
+ default=DEFAULT_INTERACTIONS_PATH,
46
+ help="Path to Interactions.csv.",
47
+ )
48
+ parser.add_argument(
49
+ "--output-plot",
50
+ type=Path,
51
+ default=DEFAULT_OUTPUT_PLOT,
52
+ help="Path to save the output figure.",
53
+ )
54
+ parser.add_argument(
55
+ "--output-counts",
56
+ type=Path,
57
+ default=DEFAULT_OUTPUT_COUNTS,
58
+ help="Path to save binned attempt counts as CSV.",
59
+ )
60
+ parser.add_argument(
61
+ "--max-rows",
62
+ type=int,
63
+ default=None,
64
+ help="Optional cap on rows after sorting (for quick debugging).",
65
+ )
66
+ parser.add_argument(
67
+ "--bin-time",
68
+ type=float,
69
+ default=10.0,
70
+ help=(
71
+ "Fixed bin width in minutes. "
72
+ "For example, --bin-time 10 creates bins [0,10), [10,20), ..."
73
+ ),
74
+ )
75
+ parser.add_argument(
76
+ "--plot-upper-limit-minutes",
77
+ type=float,
78
+ default=None,
79
+ help=(
80
+ "Optional upper limit for x-axis in minutes. "
81
+ "If omitted, uses the full range implied by bins."
82
+ ),
83
+ )
84
+ parser.add_argument(
85
+ "--student-idx",
86
+ type=int,
87
+ default=None,
88
+ help=(
89
+ "Optional 0-based index of student to plot. Index is based on "
90
+ "sorted unique user_id values in the loaded interactions."
91
+ ),
92
+ )
93
+ parser.add_argument(
94
+ "--log-y",
95
+ action="store_true",
96
+ help="Use log scale on y-axis.",
97
+ )
98
+ return parser.parse_args()
99
+
100
+
101
+ def load_interactions(path: Path, max_rows: int | None = None) -> pd.DataFrame:
102
+ """Load fields required for student attempt timing analysis."""
103
+ usecols = ["id", "user_id", "end_time"]
104
+ df = pd.read_csv(path, usecols=usecols, low_memory=False)
105
+
106
+ df["id"] = pd.to_numeric(df["id"], errors="coerce")
107
+ df["id"] = df["id"].fillna(-1).astype(int)
108
+ df["user_id"] = df["user_id"].astype("string")
109
+ df["end_time"] = pd.to_datetime(df["end_time"], errors="coerce", utc=True)
110
+
111
+ df = df.dropna(subset=["user_id", "end_time"]).copy()
112
+ df = df.sort_values(["user_id", "end_time", "id"], kind="mergesort")
113
+
114
+ if max_rows is not None:
115
+ if max_rows <= 0:
116
+ raise ValueError("--max-rows must be a positive integer.")
117
+ df = df.head(max_rows).copy()
118
+
119
+ return df
120
+
121
+
122
+ def select_student_by_index(
123
+ df: pd.DataFrame,
124
+ student_idx: int,
125
+ ) -> tuple[pd.DataFrame, str, int]:
126
+ """Select one student's interactions by 0-based index over unique IDs."""
127
+ student_ids = df["user_id"].drop_duplicates().tolist()
128
+ total_students = len(student_ids)
129
+
130
+ if total_students == 0:
131
+ raise ValueError("No students found in loaded interactions.")
132
+ if student_idx < 0 or student_idx >= total_students:
133
+ raise ValueError(
134
+ f"--student-idx must be in [0, {total_students - 1}], got {student_idx}."
135
+ )
136
+
137
+ selected_student_id = str(student_ids[student_idx])
138
+ selected_df = df[df["user_id"] == selected_student_id].copy()
139
+ return selected_df, selected_student_id, total_students
140
+
141
+
142
+ def append_student_id_to_output_path(path: Path, student_id: str) -> Path:
143
+ """Append a safe student-id suffix to output filename."""
144
+ safe_id = "".join(
145
+ ch if ch.isalnum() or ch in ("-", "_") else "_" for ch in student_id
146
+ )
147
+ return path.with_name(f"{path.stem}_{safe_id}{path.suffix}")
148
+
149
+
150
+ def compute_attempt_elapsed_minutes(df: pd.DataFrame) -> pd.Series:
151
+ """Compute elapsed minutes of each attempt from student's first attempt."""
152
+ first_times = df.groupby("user_id", sort=False)["end_time"].transform("min")
153
+ elapsed_minutes = (df["end_time"] - first_times).dt.total_seconds() / 60.0
154
+ elapsed_minutes.name = "elapsed_minutes"
155
+ return elapsed_minutes
156
+
157
+
158
+ def build_fixed_width_bin_edges_minutes(
159
+ valid_elapsed_minutes: pd.Series,
160
+ bin_time_minutes: float,
161
+ ) -> list[float]:
162
+ """Build fixed-width bin edges from min/max elapsed minutes."""
163
+ min_elapsed = float(valid_elapsed_minutes.min())
164
+ max_elapsed = float(valid_elapsed_minutes.max())
165
+
166
+ start = bin_time_minutes * math.floor(min_elapsed / bin_time_minutes)
167
+ end = bin_time_minutes * math.ceil(max_elapsed / bin_time_minutes)
168
+
169
+ if math.isclose(start, 0.0, abs_tol=1e-12):
170
+ start = 0.0
171
+ if math.isclose(end, start, abs_tol=1e-12):
172
+ end = start + bin_time_minutes
173
+
174
+ n_bins = int(round((end - start) / bin_time_minutes))
175
+ edges = [start + i * bin_time_minutes for i in range(n_bins + 1)]
176
+ if edges[-1] <= max_elapsed:
177
+ edges.append(edges[-1] + bin_time_minutes)
178
+
179
+ return edges
180
+
181
+
182
+ def format_minutes_tick(value: float, _pos: float) -> str:
183
+ if value < 60:
184
+ return f"{value:.0f}m"
185
+ if value < 1440:
186
+ return f"{value / 60:.0f}h"
187
+ return f"{value / 1440:.0f}d"
188
+
189
+
190
+ def summarize_attempt_distribution(
191
+ elapsed_minutes: pd.Series,
192
+ bin_time_minutes: float,
193
+ ) -> pd.DataFrame:
194
+ valid = elapsed_minutes.dropna().copy()
195
+ if valid.empty:
196
+ raise ValueError("No valid elapsed attempt times found.")
197
+
198
+ edges = build_fixed_width_bin_edges_minutes(valid, bin_time_minutes)
199
+ binned = pd.cut(valid, bins=edges, right=False, include_lowest=True)
200
+ counts = binned.value_counts(sort=False)
201
+ total_attempts = int(counts.sum())
202
+ probabilities = (counts / total_attempts).astype(float)
203
+
204
+ bin_left = pd.Series(edges[:-1], dtype=float)
205
+ bin_right = pd.Series(edges[1:], dtype=float)
206
+ bin_width = bin_right - bin_left
207
+
208
+ summary = pd.DataFrame(
209
+ {
210
+ "bin_left_min": bin_left.to_numpy(),
211
+ "bin_right_min": bin_right.to_numpy(),
212
+ "bin_width_min": bin_width.to_numpy(),
213
+ "attempt_count": counts.to_numpy(),
214
+ "probability": probabilities.to_numpy(dtype=float),
215
+ "percentage": probabilities.to_numpy(dtype=float) * 100.0,
216
+ }
217
+ )
218
+ return summary
219
+
220
+
221
+ def plot_distribution(
222
+ summary_df: pd.DataFrame,
223
+ output_path: Path,
224
+ log_y: bool = False,
225
+ plot_upper_limit_minutes: float | None = None,
226
+ student_idx: int | None = None,
227
+ ) -> None:
228
+ """Create and save student-attempt distribution columns."""
229
+ output_path.parent.mkdir(parents=True, exist_ok=True)
230
+
231
+ plt.style.use("seaborn-v0_8-whitegrid")
232
+ if student_idx is not None:
233
+ fig, ax = plt.subplots(figsize=(10, 5))
234
+ else:
235
+ fig, ax = plt.subplots(figsize=(20, 5))
236
+
237
+ left = summary_df["bin_left_min"].to_numpy(dtype=float)
238
+ width = summary_df["bin_width_min"].to_numpy(dtype=float)
239
+ counts = summary_df["attempt_count"].to_numpy(dtype=float)
240
+
241
+ bars = ax.bar(
242
+ left,
243
+ counts,
244
+ width=width,
245
+ align="edge",
246
+ color="#4C78A8",
247
+ # edgecolor="white",
248
+ # linewidth=1.0,
249
+ )
250
+
251
+ title = "Distribution of Student Attempts Over Elapsed Time"
252
+ if student_idx is not None:
253
+ title = f"{title} (student_idx={student_idx})"
254
+ ax.set_title(title)
255
+ ax.set_xlabel("Elapsed Time Since Student's First Attempt")
256
+ ax.set_ylabel("Number of Attempts")
257
+
258
+ x_min = float(left.min())
259
+ x_max = float((left + width).max())
260
+ if plot_upper_limit_minutes is not None:
261
+ x_max = min(x_max, float(plot_upper_limit_minutes))
262
+ ax.set_xlim(x_min, x_max)
263
+
264
+ ax.xaxis.set_major_locator(MaxNLocator(nbins=9))
265
+ ax.xaxis.set_major_formatter(FuncFormatter(format_minutes_tick))
266
+ ax.grid(axis="y", alpha=0.25, linewidth=0.8)
267
+ ax.spines["top"].set_visible(False)
268
+ ax.spines["right"].set_visible(False)
269
+
270
+ if log_y:
271
+ ax.set_yscale("log")
272
+
273
+ annotate_bars = len(summary_df) <= 40
274
+ if annotate_bars:
275
+ for bar, pct in zip(bars, summary_df["percentage"]):
276
+ if pct < 1.0:
277
+ continue
278
+ h = bar.get_height()
279
+ if h <= 0:
280
+ continue
281
+ ax.annotate(
282
+ f"{pct:.1f}%",
283
+ xy=(bar.get_x() + bar.get_width() / 2.0, h),
284
+ xytext=(0, 3),
285
+ textcoords="offset points",
286
+ ha="center",
287
+ va="bottom",
288
+ fontsize=8,
289
+ )
290
+
291
+ plt.tight_layout()
292
+ fig.savefig(output_path, dpi=400, bbox_inches="tight")
293
+ plt.close(fig)
294
+
295
+
296
+ def main() -> None:
297
+ args = parse_args()
298
+
299
+ if not args.interactions_path.exists():
300
+ raise FileNotFoundError(
301
+ f"Interactions file not found: {args.interactions_path}"
302
+ )
303
+ if args.bin_time <= 0:
304
+ raise ValueError("--bin-time must be a positive number.")
305
+ if args.plot_upper_limit_minutes is not None and args.plot_upper_limit_minutes <= 0:
306
+ raise ValueError("--plot-upper-limit-minutes must be a positive number.")
307
+
308
+ df = load_interactions(args.interactions_path, max_rows=args.max_rows)
309
+
310
+ selected_student_id: str | None = None
311
+ total_students = int(df["user_id"].nunique())
312
+ if args.student_idx is not None:
313
+ df, selected_student_id, total_students = select_student_by_index(
314
+ df,
315
+ args.student_idx,
316
+ )
317
+
318
+ output_plot_path = args.output_plot
319
+ output_counts_path = args.output_counts
320
+ if selected_student_id is not None:
321
+ output_plot_path = append_student_id_to_output_path(
322
+ output_plot_path,
323
+ selected_student_id,
324
+ )
325
+ output_counts_path = append_student_id_to_output_path(
326
+ output_counts_path,
327
+ selected_student_id,
328
+ )
329
+
330
+ elapsed_minutes = compute_attempt_elapsed_minutes(df)
331
+ summary = summarize_attempt_distribution(elapsed_minutes, args.bin_time)
332
+ output_counts_path.parent.mkdir(parents=True, exist_ok=True)
333
+ summary.to_csv(output_counts_path, index=False)
334
+
335
+ plot_distribution(
336
+ summary,
337
+ output_plot_path,
338
+ log_y=args.log_y,
339
+ plot_upper_limit_minutes=args.plot_upper_limit_minutes,
340
+ student_idx=args.student_idx,
341
+ )
342
+
343
+ total_attempts = int(summary["attempt_count"].sum())
344
+ print("Done.")
345
+ print(f"Interactions loaded: {len(df):,}")
346
+ print(f"Students in loaded data: {total_students:,}")
347
+ if selected_student_id is not None:
348
+ print(f"Selected student idx: {args.student_idx}")
349
+ print(f"Selected student id: {selected_student_id}")
350
+ print(f"Attempts used: {total_attempts:,}")
351
+ print(f"Bin width (min): {args.bin_time}")
352
+ print(f"Saved plot: {output_plot_path}")
353
+ print(f"Saved bin counts: {output_counts_path}")
354
+
355
+
356
+ if __name__ == "__main__":
357
+ main()
Code/plot_timegap_distribution.py ADDED
@@ -0,0 +1,484 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Plot the distribution of time gaps between consecutive student attempts.
3
+
4
+ This script reads FoundationalASSIST `Interactions.csv`, groups interactions by
5
+ student (`user_id`), computes the time difference between each pair of
6
+ consecutive attempts (`end_time`), discretizes these differences into bins, and
7
+ plots the resulting distribution.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import argparse
13
+ import math
14
+ from pathlib import Path
15
+
16
+ import matplotlib.pyplot as plt
17
+ import pandas as pd
18
+ from matplotlib.ticker import FuncFormatter, MaxNLocator
19
+
20
+
21
+ DEFAULT_INTERACTIONS_PATH = (
22
+ Path(__file__).resolve().parent.parent / "Data" / "Interactions.csv"
23
+ )
24
+ DEFAULT_OUTPUT_PLOT = (
25
+ Path(__file__).resolve().parent.parent / "Results" / "time_gap_distribution.png"
26
+ )
27
+ DEFAULT_OUTPUT_COUNTS = (
28
+ Path(__file__).resolve().parent.parent
29
+ / "Results"
30
+ / "time_gap_distribution_counts.csv"
31
+ )
32
+ CDF_MARKER_MINUTES = 60.0
33
+
34
+
35
+ def parse_args() -> argparse.Namespace:
36
+ parser = argparse.ArgumentParser(
37
+ description=(
38
+ "Compute per-student consecutive-attempt time gaps from "
39
+ "Interactions.csv and plot their binned distribution."
40
+ )
41
+ )
42
+ parser.add_argument(
43
+ "--interactions-path",
44
+ type=Path,
45
+ default=DEFAULT_INTERACTIONS_PATH,
46
+ help="Path to Interactions.csv.",
47
+ )
48
+ parser.add_argument(
49
+ "--output-plot",
50
+ type=Path,
51
+ default=DEFAULT_OUTPUT_PLOT,
52
+ help="Path to save the output figure.",
53
+ )
54
+ parser.add_argument(
55
+ "--output-counts",
56
+ type=Path,
57
+ default=DEFAULT_OUTPUT_COUNTS,
58
+ help="Path to save bin counts as CSV.",
59
+ )
60
+ parser.add_argument(
61
+ "--max-rows",
62
+ type=int,
63
+ default=None,
64
+ help="Optional cap on rows after sorting (for quick debugging).",
65
+ )
66
+ parser.add_argument(
67
+ "--keep-nonpositive-gaps",
68
+ action="store_true",
69
+ help=(
70
+ "Keep zero/negative gaps. By default, only strictly positive "
71
+ "gaps are used."
72
+ ),
73
+ )
74
+ parser.add_argument(
75
+ "--log-y",
76
+ action="store_true",
77
+ help="Use log scale on y-axis.",
78
+ )
79
+ parser.add_argument(
80
+ "--plot-upper-limit-minutes",
81
+ type=float,
82
+ default=None,
83
+ help=(
84
+ "Optional upper limit for x-axis in minutes. "
85
+ "If omitted, uses the full range implied by bins."
86
+ ),
87
+ )
88
+ parser.add_argument(
89
+ "--bin-time",
90
+ type=float,
91
+ default=None,
92
+ help=(
93
+ "Optional fixed bin width in minutes. "
94
+ "For example, --bin-time 10 creates bins [0,10), [10,20), ..."
95
+ ),
96
+ )
97
+ parser.add_argument(
98
+ "--student-idx",
99
+ type=int,
100
+ default=None,
101
+ help=(
102
+ "Optional 0-based index of student to plot. Index is based on "
103
+ "sorted unique user_id values in the loaded interactions."
104
+ ),
105
+ )
106
+ return parser.parse_args()
107
+
108
+
109
+ def load_interactions(path: Path, max_rows: int | None = None) -> pd.DataFrame:
110
+ """Load the minimum columns required for time-gap analysis."""
111
+ usecols = ["id", "user_id", "end_time"]
112
+ df = pd.read_csv(path, usecols=usecols, low_memory=False)
113
+
114
+ df["id"] = pd.to_numeric(df["id"], errors="coerce")
115
+ df["id"] = df["id"].fillna(-1).astype(int)
116
+ df["user_id"] = df["user_id"].astype("string")
117
+ df["end_time"] = pd.to_datetime(df["end_time"], errors="coerce", utc=True)
118
+
119
+ df = df.dropna(subset=["user_id", "end_time"]).copy()
120
+ df = df.sort_values(["user_id", "end_time", "id"], kind="mergesort")
121
+
122
+ if max_rows is not None:
123
+ if max_rows <= 0:
124
+ raise ValueError("--max-rows must be a positive integer.")
125
+ df = df.head(max_rows).copy()
126
+
127
+ return df
128
+
129
+
130
+ def compute_time_gaps_minutes(df: pd.DataFrame) -> pd.Series:
131
+ """Compute consecutive attempt gaps per student in minutes."""
132
+ gaps_seconds = (
133
+ df.groupby("user_id", sort=False)["end_time"].diff().dt.total_seconds()
134
+ )
135
+ return gaps_seconds / 60.0
136
+
137
+
138
+ def default_bin_edges_minutes() -> list[float]:
139
+ """Base minute-scale edges (final open tail may be added from data max)."""
140
+ return [
141
+ 0.0,
142
+ 1.0,
143
+ 5.0,
144
+ 10.0,
145
+ 30.0,
146
+ 60.0,
147
+ 180.0,
148
+ 720.0,
149
+ 1440.0,
150
+ 4320.0,
151
+ 10080.0,
152
+ ]
153
+
154
+
155
+ def build_bin_edges_minutes(valid: pd.Series) -> list[float]:
156
+ """Build finite plotting edges so bar widths are proportional on x-axis."""
157
+ edges = default_bin_edges_minutes()
158
+ base_tail_start = edges[-1]
159
+ max_gap = float(valid.max())
160
+
161
+ if max_gap > base_tail_start:
162
+ # Add a finite terminal edge that fully contains the data tail.
163
+ tail_edge = max(base_tail_start + 60.0, max_gap * 1.05)
164
+ edges.append(tail_edge)
165
+
166
+ return edges
167
+
168
+
169
+ def build_fixed_width_bin_edges_minutes(
170
+ valid: pd.Series, bin_time_minutes: float
171
+ ) -> list[float]:
172
+ """Build fixed-width edges from min/max observed gaps."""
173
+ min_gap = float(valid.min())
174
+ max_gap = float(valid.max())
175
+
176
+ start = bin_time_minutes * math.floor(min_gap / bin_time_minutes)
177
+ end = bin_time_minutes * math.ceil(max_gap / bin_time_minutes)
178
+
179
+ if math.isclose(start, 0.0, abs_tol=1e-12):
180
+ start = 0.0
181
+ if math.isclose(end, start, abs_tol=1e-12):
182
+ end = start + bin_time_minutes
183
+
184
+ n_bins = int(round((end - start) / bin_time_minutes))
185
+ edges = [start + i * bin_time_minutes for i in range(n_bins + 1)]
186
+ if edges[-1] <= max_gap:
187
+ edges.append(edges[-1] + bin_time_minutes)
188
+
189
+ return edges
190
+
191
+
192
+ def format_bin_bound(minutes: float) -> str:
193
+ if math.isclose(minutes, round(minutes), abs_tol=1e-9):
194
+ return str(int(round(minutes)))
195
+ return f"{minutes:.2f}".rstrip("0").rstrip(".")
196
+
197
+
198
+ def make_bin_labels(
199
+ edges: list[float], open_tail_from: float | None = None
200
+ ) -> list[str]:
201
+ labels: list[str] = []
202
+ last_idx = len(edges) - 2
203
+ for idx, (left, right) in enumerate(zip(edges[:-1], edges[1:])):
204
+ if open_tail_from is not None and idx == last_idx and left >= open_tail_from:
205
+ labels.append(f">= {format_bin_bound(left)} min")
206
+ else:
207
+ labels.append(f"[{format_bin_bound(left)}, {format_bin_bound(right)}) min")
208
+ return labels
209
+
210
+
211
+ def format_minutes_tick(value: float, _pos: float) -> str:
212
+ if value < 60:
213
+ return f"{int(round(value))}m"
214
+
215
+ if value < 1440:
216
+ hours = value / 60.0
217
+ if math.isclose(hours, round(hours), abs_tol=1e-9):
218
+ return f"{int(round(hours))}h"
219
+ return f"{hours:.1f}h"
220
+
221
+ days = value / 1440.0
222
+ if math.isclose(days, round(days), abs_tol=1e-9):
223
+ return f"{int(round(days))}d"
224
+ return f"{days:.1f}d"
225
+
226
+
227
+ def summarize_binned_distribution(
228
+ gaps_minutes: pd.Series,
229
+ keep_nonpositive: bool,
230
+ bin_time_minutes: float | None = None,
231
+ ) -> pd.DataFrame:
232
+ valid = filter_valid_gaps(gaps_minutes, keep_nonpositive)
233
+
234
+ if valid.empty:
235
+ raise ValueError("No valid time gaps found after filtering.")
236
+
237
+ if bin_time_minutes is not None:
238
+ edges = build_fixed_width_bin_edges_minutes(valid, bin_time_minutes)
239
+ open_tail_from = None
240
+ else:
241
+ base_edges = default_bin_edges_minutes()
242
+ edges = build_bin_edges_minutes(valid)
243
+ open_tail_from = base_edges[-1] if len(edges) > len(base_edges) else None
244
+
245
+ labels = make_bin_labels(edges, open_tail_from=open_tail_from)
246
+ binned = pd.cut(valid, bins=edges, labels=labels, right=False, include_lowest=True)
247
+
248
+ counts = binned.value_counts(sort=False)
249
+ probabilities = (counts / counts.sum()).astype(float)
250
+
251
+ bin_left = pd.Series(edges[:-1], dtype=float)
252
+ bin_right = pd.Series(edges[1:], dtype=float)
253
+ bin_width = bin_right - bin_left
254
+ probabilities_np = probabilities.to_numpy(dtype=float)
255
+ density_per_min = probabilities_np / bin_width.to_numpy(dtype=float)
256
+
257
+ summary = pd.DataFrame(
258
+ {
259
+ "bin": counts.index.astype(str),
260
+ "bin_left_min": bin_left.to_numpy(),
261
+ "bin_right_min": bin_right.to_numpy(),
262
+ "bin_width_min": bin_width.to_numpy(),
263
+ "count": counts.values,
264
+ "probability": probabilities_np,
265
+ "percentage": probabilities_np * 100.0,
266
+ "density_per_min": density_per_min,
267
+ }
268
+ )
269
+ return summary
270
+
271
+
272
+ def filter_valid_gaps(gaps_minutes: pd.Series, keep_nonpositive: bool) -> pd.Series:
273
+ valid = gaps_minutes.dropna().copy()
274
+ if not keep_nonpositive:
275
+ valid = valid[valid > 0]
276
+ return valid
277
+
278
+
279
+ def cumulative_probability_at_minutes(
280
+ gaps_minutes: pd.Series,
281
+ threshold_minutes: float,
282
+ keep_nonpositive: bool,
283
+ ) -> float:
284
+ valid = filter_valid_gaps(gaps_minutes, keep_nonpositive)
285
+ if valid.empty:
286
+ raise ValueError("No valid time gaps found after filtering.")
287
+ return float((valid <= threshold_minutes).mean())
288
+
289
+
290
+ def select_student_by_index(
291
+ df: pd.DataFrame,
292
+ student_idx: int,
293
+ ) -> tuple[pd.DataFrame, str, int]:
294
+ """Select one student's interactions by 0-based index over unique IDs."""
295
+ student_ids = df["user_id"].drop_duplicates().tolist()
296
+ total_students = len(student_ids)
297
+
298
+ if total_students == 0:
299
+ raise ValueError("No students found in loaded interactions.")
300
+ if student_idx < 0 or student_idx >= total_students:
301
+ raise ValueError(
302
+ f"--student-idx must be in [0, {total_students - 1}], got {student_idx}."
303
+ )
304
+
305
+ selected_student_id = str(student_ids[student_idx])
306
+ selected_df = df[df["user_id"] == selected_student_id].copy()
307
+ return selected_df, selected_student_id, total_students
308
+
309
+
310
+ def append_student_id_to_output_path(path: Path, student_id: str) -> Path:
311
+ """Append a safe student-id suffix to output filename."""
312
+ safe_id = "".join(
313
+ ch if ch.isalnum() or ch in ("-", "_") else "_" for ch in student_id
314
+ )
315
+ return path.with_name(f"{path.stem}_{safe_id}{path.suffix}")
316
+
317
+
318
+ def plot_distribution(
319
+ summary_df: pd.DataFrame,
320
+ output_path: Path,
321
+ log_y: bool = False,
322
+ plot_upper_limit_minutes: float | None = None,
323
+ cdf_marker_minutes: float = CDF_MARKER_MINUTES,
324
+ cdf_at_marker: float | None = None,
325
+ student_idx: int | None = None,
326
+ ) -> None:
327
+ """Create and save a publication-ready distribution histogram."""
328
+ output_path.parent.mkdir(parents=True, exist_ok=True)
329
+
330
+ plt.style.use("seaborn-v0_8-whitegrid")
331
+
332
+ if student_idx is not None:
333
+ fig, ax = plt.subplots(figsize=(10, 5))
334
+ else:
335
+ fig, ax = plt.subplots(figsize=(20, 5))
336
+
337
+ left = summary_df["bin_left_min"].to_numpy(dtype=float)
338
+ width = summary_df["bin_width_min"].to_numpy(dtype=float)
339
+ height = summary_df["density_per_min"].to_numpy(dtype=float)
340
+
341
+ bars = ax.bar(
342
+ left,
343
+ height,
344
+ width=width,
345
+ align="edge",
346
+ color="#4C78A8",
347
+ # edgecolor="white",
348
+ # linewidth=1.0,
349
+ )
350
+
351
+ title = "Distribution of Time Gaps Between Consecutive Attempts"
352
+ if student_idx is not None:
353
+ title = f"{title} (student_idx={student_idx})"
354
+ ax.set_title(title)
355
+ ax.set_xlabel("Time Gap")
356
+ ax.set_ylabel("Probability Density (1/min)")
357
+ x_min = float(left.min())
358
+ x_max = float((left + width).max())
359
+ if plot_upper_limit_minutes is not None:
360
+ x_max = min(x_max, float(plot_upper_limit_minutes))
361
+ ax.set_xlim(x_min, x_max)
362
+ ax.xaxis.set_major_locator(MaxNLocator(nbins=9))
363
+ ax.xaxis.set_major_formatter(FuncFormatter(format_minutes_tick))
364
+ ax.grid(axis="y", alpha=0.25, linewidth=0.8)
365
+ ax.spines["top"].set_visible(False)
366
+ ax.spines["right"].set_visible(False)
367
+
368
+ if log_y:
369
+ ax.set_yscale("log")
370
+
371
+ marker_label = f"CDF <= {int(cdf_marker_minutes)} min"
372
+ if cdf_at_marker is not None:
373
+ marker_label = f"{marker_label}: {cdf_at_marker * 100:.1f}%"
374
+ ax.axvline(
375
+ cdf_marker_minutes,
376
+ color="#E45756",
377
+ linestyle="--",
378
+ linewidth=1.6,
379
+ label=marker_label,
380
+ )
381
+ ax.legend(loc="upper right", frameon=False, fontsize=9)
382
+
383
+ # Label non-trivial bins for readability in papers.
384
+ for bar, pct in zip(bars, summary_df["percentage"]):
385
+ if pct < 1.0:
386
+ continue
387
+ height = bar.get_height()
388
+ if height <= 0:
389
+ continue
390
+ ax.annotate(
391
+ f"{pct:.1f}%",
392
+ xy=(bar.get_x() + bar.get_width() / 2.0, height),
393
+ xytext=(0, 3),
394
+ textcoords="offset points",
395
+ ha="center",
396
+ va="bottom",
397
+ fontsize=8,
398
+ )
399
+
400
+ plt.tight_layout()
401
+ fig.savefig(output_path, dpi=400, bbox_inches="tight")
402
+ plt.close(fig)
403
+
404
+
405
+ def main() -> None:
406
+ args = parse_args()
407
+
408
+ if not args.interactions_path.exists():
409
+ raise FileNotFoundError(
410
+ f"Interactions file not found: {args.interactions_path}"
411
+ )
412
+
413
+ df = load_interactions(args.interactions_path, max_rows=args.max_rows)
414
+ selected_student_id: str | None = None
415
+ total_students = int(df["user_id"].nunique())
416
+ if args.student_idx is not None:
417
+ df, selected_student_id, total_students = select_student_by_index(
418
+ df,
419
+ args.student_idx,
420
+ )
421
+
422
+ output_plot_path = args.output_plot
423
+ output_counts_path = args.output_counts
424
+ if selected_student_id is not None:
425
+ output_plot_path = append_student_id_to_output_path(
426
+ output_plot_path,
427
+ selected_student_id,
428
+ )
429
+ output_counts_path = append_student_id_to_output_path(
430
+ output_counts_path,
431
+ selected_student_id,
432
+ )
433
+
434
+ gaps_minutes = compute_time_gaps_minutes(df)
435
+
436
+ if args.plot_upper_limit_minutes is not None and args.plot_upper_limit_minutes <= 0:
437
+ raise ValueError("--plot-upper-limit-minutes must be a positive number.")
438
+ if args.bin_time is not None and args.bin_time <= 0:
439
+ raise ValueError("--bin-time must be a positive number.")
440
+
441
+ summary = summarize_binned_distribution(
442
+ gaps_minutes,
443
+ keep_nonpositive=args.keep_nonpositive_gaps,
444
+ bin_time_minutes=args.bin_time,
445
+ )
446
+ output_counts_path.parent.mkdir(parents=True, exist_ok=True)
447
+ summary.to_csv(output_counts_path, index=False)
448
+
449
+ cdf_at_marker = cumulative_probability_at_minutes(
450
+ gaps_minutes=gaps_minutes,
451
+ threshold_minutes=CDF_MARKER_MINUTES,
452
+ keep_nonpositive=args.keep_nonpositive_gaps,
453
+ )
454
+
455
+ plot_distribution(
456
+ summary,
457
+ output_plot_path,
458
+ log_y=args.log_y,
459
+ plot_upper_limit_minutes=args.plot_upper_limit_minutes,
460
+ cdf_marker_minutes=CDF_MARKER_MINUTES,
461
+ cdf_at_marker=cdf_at_marker,
462
+ student_idx=args.student_idx,
463
+ )
464
+
465
+ total_pairs = int(summary["count"].sum())
466
+ print("Done.")
467
+ print(f"Interactions loaded: {len(df):,}")
468
+ print(f"Students in loaded data: {total_students:,}")
469
+ if selected_student_id is not None:
470
+ print(f"Selected student idx: {args.student_idx}")
471
+ print(f"Selected student id: {selected_student_id}")
472
+ print(f"Consecutive attempt pairs used: {total_pairs:,}")
473
+ if args.bin_time is not None:
474
+ print(f"Bin width (min): {args.bin_time}")
475
+ print(
476
+ f"Cumulative P(gap <= {int(CDF_MARKER_MINUTES)} min): "
477
+ f"{cdf_at_marker * 100:.2f}%"
478
+ )
479
+ print(f"Saved plot: {output_plot_path}")
480
+ print(f"Saved bin counts: {output_counts_path}")
481
+
482
+
483
+ if __name__ == "__main__":
484
+ main()
Code/plot_totaltime_distribution.py ADDED
@@ -0,0 +1,433 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Plot distribution of per-student total time from first to last attempt.
3
+
4
+ This script reads FoundationalASSIST `Interactions.csv`, groups interactions by
5
+ student (`user_id`), computes each student's total time span from first to last
6
+ recorded attempt (`end_time`), discretizes these totals into bins, and plots
7
+ the resulting distribution.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import argparse
13
+ import math
14
+ from pathlib import Path
15
+
16
+ import matplotlib.pyplot as plt
17
+ import pandas as pd
18
+ from matplotlib.ticker import FuncFormatter, MaxNLocator
19
+
20
+
21
+ DEFAULT_INTERACTIONS_PATH = (
22
+ Path(__file__).resolve().parent.parent / "Data" / "Interactions.csv"
23
+ )
24
+ DEFAULT_OUTPUT_PLOT = (
25
+ Path(__file__).resolve().parent.parent / "Results" / "total_time_distribution.png"
26
+ )
27
+ DEFAULT_OUTPUT_COUNTS = (
28
+ Path(__file__).resolve().parent.parent
29
+ / "Results"
30
+ / "total_time_distribution_counts.csv"
31
+ )
32
+ DEFAULT_CDF_MARKER_MINUTES = 1051200.0
33
+
34
+
35
+ def parse_args() -> argparse.Namespace:
36
+ parser = argparse.ArgumentParser(
37
+ description=(
38
+ "Compute per-student total time (first to last attempt) from "
39
+ "Interactions.csv and plot the binned distribution."
40
+ )
41
+ )
42
+ parser.add_argument(
43
+ "--interactions-path",
44
+ type=Path,
45
+ default=DEFAULT_INTERACTIONS_PATH,
46
+ help="Path to Interactions.csv.",
47
+ )
48
+ parser.add_argument(
49
+ "--output-plot",
50
+ type=Path,
51
+ default=DEFAULT_OUTPUT_PLOT,
52
+ help="Path to save the output figure.",
53
+ )
54
+ parser.add_argument(
55
+ "--output-counts",
56
+ type=Path,
57
+ default=DEFAULT_OUTPUT_COUNTS,
58
+ help="Path to save bin counts as CSV.",
59
+ )
60
+ parser.add_argument(
61
+ "--max-rows",
62
+ type=int,
63
+ default=None,
64
+ help="Optional cap on rows after sorting (for quick debugging).",
65
+ )
66
+ parser.add_argument(
67
+ "--keep-nonpositive-total-times",
68
+ action="store_true",
69
+ help=(
70
+ "Keep zero/negative total times. By default, only strictly "
71
+ "positive total times are used."
72
+ ),
73
+ )
74
+ parser.add_argument(
75
+ "--log-y",
76
+ action="store_true",
77
+ help="Use log scale on y-axis.",
78
+ )
79
+ parser.add_argument(
80
+ "--plot-upper-limit-minutes",
81
+ type=float,
82
+ default=None,
83
+ help=(
84
+ "Optional upper limit for x-axis in minutes. "
85
+ "If omitted, uses the full range implied by bins."
86
+ ),
87
+ )
88
+ parser.add_argument(
89
+ "--bin-time",
90
+ type=float,
91
+ default=None,
92
+ help=(
93
+ "Optional fixed bin width in minutes. "
94
+ "For example, --bin-time 60 creates hourly bins."
95
+ ),
96
+ )
97
+ parser.add_argument(
98
+ "--cdf-marker-minutes",
99
+ type=float,
100
+ default=DEFAULT_CDF_MARKER_MINUTES,
101
+ help="Threshold (in minutes) for plotting cumulative probability marker.",
102
+ )
103
+ return parser.parse_args()
104
+
105
+
106
+ def load_interactions(path: Path, max_rows: int | None = None) -> pd.DataFrame:
107
+ """Load minimum interaction fields required for timing analysis."""
108
+ usecols = ["id", "user_id", "end_time"]
109
+ df = pd.read_csv(path, usecols=usecols, low_memory=False)
110
+
111
+ df["id"] = pd.to_numeric(df["id"], errors="coerce")
112
+ df["id"] = df["id"].fillna(-1).astype(int)
113
+ df["user_id"] = df["user_id"].astype("string")
114
+ df["end_time"] = pd.to_datetime(df["end_time"], errors="coerce", utc=True)
115
+
116
+ df = df.dropna(subset=["user_id", "end_time"]).copy()
117
+ df = df.sort_values(["user_id", "end_time", "id"], kind="mergesort")
118
+
119
+ if max_rows is not None:
120
+ if max_rows <= 0:
121
+ raise ValueError("--max-rows must be a positive integer.")
122
+ df = df.head(max_rows).copy()
123
+
124
+ return df
125
+
126
+
127
+ def compute_student_total_times_minutes(df: pd.DataFrame) -> pd.Series:
128
+ """Compute per-student total time span from first to last attempt."""
129
+ grouped = df.groupby("user_id", sort=False)["end_time"]
130
+ first_times = grouped.min()
131
+ last_times = grouped.max()
132
+ total_minutes = (last_times - first_times).dt.total_seconds() / 60.0
133
+ total_minutes.name = "total_time_minutes"
134
+ return total_minutes
135
+
136
+
137
+ def default_bin_edges_minutes() -> list[float]:
138
+ """Base edges for total-time distribution in minutes."""
139
+ return [
140
+ 0.0,
141
+ 10.0,
142
+ 30.0,
143
+ 60.0,
144
+ 180.0,
145
+ 360.0,
146
+ 720.0,
147
+ 1440.0,
148
+ 2880.0,
149
+ 4320.0,
150
+ 10080.0,
151
+ 20160.0,
152
+ 43200.0,
153
+ 100800.0,
154
+ ]
155
+
156
+
157
+ def build_bin_edges_minutes(valid: pd.Series) -> list[float]:
158
+ """Build finite plotting edges for proportional-width histogram bars."""
159
+ edges = default_bin_edges_minutes()
160
+ base_tail_start = edges[-1]
161
+ max_total = float(valid.max())
162
+
163
+ if max_total > base_tail_start:
164
+ tail_edge = max(base_tail_start + 60.0, max_total * 1.05)
165
+ edges.append(tail_edge)
166
+
167
+ return edges
168
+
169
+
170
+ def build_fixed_width_bin_edges_minutes(
171
+ valid: pd.Series, bin_time_minutes: float
172
+ ) -> list[float]:
173
+ """Build fixed-width edges from min/max observed total times."""
174
+ min_total = float(valid.min())
175
+ max_total = float(valid.max())
176
+
177
+ start = bin_time_minutes * math.floor(min_total / bin_time_minutes)
178
+ end = bin_time_minutes * math.ceil(max_total / bin_time_minutes)
179
+
180
+ if math.isclose(start, 0.0, abs_tol=1e-12):
181
+ start = 0.0
182
+ if math.isclose(end, start, abs_tol=1e-12):
183
+ end = start + bin_time_minutes
184
+
185
+ n_bins = int(round((end - start) / bin_time_minutes))
186
+ edges = [start + i * bin_time_minutes for i in range(n_bins + 1)]
187
+ if edges[-1] <= max_total:
188
+ edges.append(edges[-1] + bin_time_minutes)
189
+
190
+ return edges
191
+
192
+
193
+ def format_bin_bound(minutes: float) -> str:
194
+ if math.isclose(minutes, round(minutes), abs_tol=1e-9):
195
+ return str(int(round(minutes)))
196
+ return f"{minutes:.2f}".rstrip("0").rstrip(".")
197
+
198
+
199
+ def make_bin_labels(
200
+ edges: list[float], open_tail_from: float | None = None
201
+ ) -> list[str]:
202
+ labels: list[str] = []
203
+ last_idx = len(edges) - 2
204
+ for idx, (left, right) in enumerate(zip(edges[:-1], edges[1:])):
205
+ if open_tail_from is not None and idx == last_idx and left >= open_tail_from:
206
+ labels.append(f">= {format_bin_bound(left)} min")
207
+ else:
208
+ labels.append(f"[{format_bin_bound(left)}, {format_bin_bound(right)}) min")
209
+ return labels
210
+
211
+
212
+ def format_minutes_tick(value: float, _pos: float) -> str:
213
+ if value < 60:
214
+ return f"{value:.0f}m"
215
+ if value < 1440:
216
+ return f"{value / 60:.0f}h"
217
+ return f"{value / 1440:.0f}d"
218
+
219
+
220
+ def filter_valid_total_times(
221
+ total_times_minutes: pd.Series, keep_nonpositive: bool
222
+ ) -> pd.Series:
223
+ valid = total_times_minutes.dropna().copy()
224
+ if not keep_nonpositive:
225
+ valid = valid[valid > 0]
226
+ return valid
227
+
228
+
229
+ def summarize_binned_distribution(
230
+ total_times_minutes: pd.Series,
231
+ keep_nonpositive: bool,
232
+ bin_time_minutes: float | None = None,
233
+ ) -> pd.DataFrame:
234
+ valid = filter_valid_total_times(total_times_minutes, keep_nonpositive)
235
+
236
+ if valid.empty:
237
+ raise ValueError("No valid total times found after filtering.")
238
+
239
+ if bin_time_minutes is not None:
240
+ edges = build_fixed_width_bin_edges_minutes(valid, bin_time_minutes)
241
+ open_tail_from = None
242
+ else:
243
+ base_edges = default_bin_edges_minutes()
244
+ edges = build_bin_edges_minutes(valid)
245
+ open_tail_from = base_edges[-1] if len(edges) > len(base_edges) else None
246
+
247
+ labels = make_bin_labels(edges, open_tail_from=open_tail_from)
248
+ binned = pd.cut(valid, bins=edges, labels=labels, right=False, include_lowest=True)
249
+
250
+ counts = binned.value_counts(sort=False)
251
+ probabilities = (counts / counts.sum()).astype(float)
252
+
253
+ bin_left = pd.Series(edges[:-1], dtype=float)
254
+ bin_right = pd.Series(edges[1:], dtype=float)
255
+ bin_width = bin_right - bin_left
256
+ probabilities_np = probabilities.to_numpy(dtype=float)
257
+ density_per_min = probabilities_np / bin_width.to_numpy(dtype=float)
258
+
259
+ summary = pd.DataFrame(
260
+ {
261
+ "bin": counts.index.astype(str),
262
+ "bin_left_min": bin_left.to_numpy(),
263
+ "bin_right_min": bin_right.to_numpy(),
264
+ "bin_width_min": bin_width.to_numpy(),
265
+ "count": counts.values,
266
+ "probability": probabilities_np,
267
+ "percentage": probabilities_np * 100.0,
268
+ "density_per_min": density_per_min,
269
+ }
270
+ )
271
+ return summary
272
+
273
+
274
+ def cumulative_probability_at_minutes(
275
+ total_times_minutes: pd.Series,
276
+ threshold_minutes: float,
277
+ keep_nonpositive: bool,
278
+ ) -> float:
279
+ valid = filter_valid_total_times(total_times_minutes, keep_nonpositive)
280
+ if valid.empty:
281
+ raise ValueError("No valid total times found after filtering.")
282
+ return float((valid <= threshold_minutes).mean())
283
+
284
+
285
+ def plot_distribution(
286
+ summary_df: pd.DataFrame,
287
+ output_path: Path,
288
+ log_y: bool = False,
289
+ plot_upper_limit_minutes: float | None = None,
290
+ cdf_marker_minutes: float = DEFAULT_CDF_MARKER_MINUTES,
291
+ cdf_at_marker: float | None = None,
292
+ ) -> None:
293
+ """Create and save a publication-ready total-time distribution histogram."""
294
+ output_path.parent.mkdir(parents=True, exist_ok=True)
295
+
296
+ plt.style.use("seaborn-v0_8-whitegrid")
297
+
298
+ fig, ax = plt.subplots(figsize=(20, 5))
299
+
300
+ left = summary_df["bin_left_min"].to_numpy(dtype=float)
301
+ width = summary_df["bin_width_min"].to_numpy(dtype=float)
302
+ height = summary_df["density_per_min"].to_numpy(dtype=float)
303
+
304
+ bars = ax.bar(
305
+ left,
306
+ height,
307
+ width=width,
308
+ align="edge",
309
+ color="#4C78A8",
310
+ # edgecolor="white",
311
+ # linewidth=1.0,
312
+ )
313
+
314
+ ax.set_title("Distribution of Student Total Time (First to Last Attempt)")
315
+ ax.set_xlabel("Total Time Per Student")
316
+ ax.set_ylabel("Probability Density (1/min)")
317
+
318
+ x_min = float(left.min())
319
+ x_max = float((left + width).max())
320
+ if plot_upper_limit_minutes is not None:
321
+ x_max = min(x_max, float(plot_upper_limit_minutes))
322
+ ax.set_xlim(x_min, x_max)
323
+
324
+ ax.xaxis.set_major_locator(MaxNLocator(nbins=9))
325
+ ax.xaxis.set_major_formatter(FuncFormatter(format_minutes_tick))
326
+ ax.grid(axis="y", alpha=0.25, linewidth=0.8)
327
+ ax.spines["top"].set_visible(False)
328
+ ax.spines["right"].set_visible(False)
329
+
330
+ if log_y:
331
+ ax.set_yscale("log")
332
+
333
+ marker_label = f"CDF <= {format_bin_bound(cdf_marker_minutes)} min"
334
+ if cdf_at_marker is not None:
335
+ marker_label = f"{marker_label}: {cdf_at_marker * 100:.1f}%"
336
+ ax.axvline(
337
+ cdf_marker_minutes,
338
+ color="#E45756",
339
+ linestyle="--",
340
+ linewidth=1.6,
341
+ label=marker_label,
342
+ )
343
+ ax.legend(loc="upper right", frameon=False, fontsize=9)
344
+
345
+ # Skip dense labeling when there are many bins to keep figure readable.
346
+ annotate_bars = len(summary_df) <= 40
347
+ if annotate_bars:
348
+ for bar, pct in zip(bars, summary_df["percentage"]):
349
+ if pct < 1.0:
350
+ continue
351
+ h = bar.get_height()
352
+ if h <= 0:
353
+ continue
354
+ ax.annotate(
355
+ f"{pct:.1f}%",
356
+ xy=(bar.get_x() + bar.get_width() / 2.0, h),
357
+ xytext=(0, 3),
358
+ textcoords="offset points",
359
+ ha="center",
360
+ va="bottom",
361
+ fontsize=8,
362
+ )
363
+
364
+ plt.tight_layout()
365
+ fig.savefig(output_path, dpi=400, bbox_inches="tight")
366
+ plt.close(fig)
367
+
368
+
369
+ def main() -> None:
370
+ args = parse_args()
371
+
372
+ if not args.interactions_path.exists():
373
+ raise FileNotFoundError(
374
+ f"Interactions file not found: {args.interactions_path}"
375
+ )
376
+
377
+ if args.plot_upper_limit_minutes is not None and args.plot_upper_limit_minutes <= 0:
378
+ raise ValueError("--plot-upper-limit-minutes must be a positive number.")
379
+ if args.bin_time is not None and args.bin_time <= 0:
380
+ raise ValueError("--bin-time must be a positive number.")
381
+ if args.cdf_marker_minutes <= 0:
382
+ raise ValueError("--cdf-marker-minutes must be a positive number.")
383
+
384
+ df = load_interactions(args.interactions_path, max_rows=args.max_rows)
385
+ total_times_minutes = compute_student_total_times_minutes(df)
386
+
387
+ summary = summarize_binned_distribution(
388
+ total_times_minutes,
389
+ keep_nonpositive=args.keep_nonpositive_total_times,
390
+ bin_time_minutes=args.bin_time,
391
+ )
392
+ summary.to_csv(args.output_counts, index=False)
393
+
394
+ cdf_at_marker = cumulative_probability_at_minutes(
395
+ total_times_minutes=total_times_minutes,
396
+ threshold_minutes=args.cdf_marker_minutes,
397
+ keep_nonpositive=args.keep_nonpositive_total_times,
398
+ )
399
+
400
+ plot_distribution(
401
+ summary,
402
+ args.output_plot,
403
+ log_y=args.log_y,
404
+ plot_upper_limit_minutes=args.plot_upper_limit_minutes,
405
+ cdf_marker_minutes=args.cdf_marker_minutes,
406
+ cdf_at_marker=cdf_at_marker,
407
+ )
408
+
409
+ total_students = int(df["user_id"].nunique())
410
+ students_used = int(
411
+ len(
412
+ filter_valid_total_times(
413
+ total_times_minutes,
414
+ keep_nonpositive=args.keep_nonpositive_total_times,
415
+ )
416
+ )
417
+ )
418
+ print("Done.")
419
+ print(f"Interactions loaded: {len(df):,}")
420
+ print(f"Students observed: {total_students:,}")
421
+ print(f"Students used in distribution: {students_used:,}")
422
+ if args.bin_time is not None:
423
+ print(f"Bin width (min): {args.bin_time}")
424
+ print(
425
+ f"Cumulative P(total_time <= {format_bin_bound(args.cdf_marker_minutes)} min): "
426
+ f"{cdf_at_marker * 100:.2f}%"
427
+ )
428
+ print(f"Saved plot: {args.output_plot}")
429
+ print(f"Saved bin counts: {args.output_counts}")
430
+
431
+
432
+ if __name__ == "__main__":
433
+ main()
Code/process_to_single_file.py ADDED
@@ -0,0 +1,820 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Convert FoundationalASSIST CSV files to the CSEDM/OEKT JSON format.
4
+
5
+ Inputs (under Data/ by default):
6
+ - Interactions.csv
7
+ - Problems.csv
8
+ - Skill_Set.csv
9
+ - Skills.csv
10
+
11
+ Outputs (under src/data/FoundationalASSIST/ by default):
12
+ - dataset.json
13
+ - qmatrix.json
14
+ - trainset.json
15
+ - validset.json
16
+ - testset.json
17
+
18
+ The produced dataset JSON follows the same schema used by src/data/CSEDM.
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import argparse
24
+ import json
25
+ import random
26
+ import re
27
+ from pathlib import Path
28
+ from typing import Literal, cast
29
+
30
+ import pandas as pd
31
+ from tqdm import tqdm
32
+ from clean_utils import clean_problem_body
33
+
34
+ PROJECT_ROOT = Path(__file__).resolve().parents[3]
35
+ DEFAULT_DATA_DIR = Path(__file__).resolve().parent.parent / "Data"
36
+ DEFAULT_OUTPUT_DIR = PROJECT_ROOT / "src" / "data" / "FoundationalASSIST"
37
+ GroupingMode = Literal["none", "1h", "halfday", "day", "week", "month", "year"]
38
+
39
+
40
+ def parse_grouping_mode(value: str) -> GroupingMode:
41
+ """Normalize grouping mode aliases used by --grouping-time."""
42
+ normalized = value.strip().lower()
43
+ aliases: dict[str, GroupingMode] = {
44
+ "0": "none",
45
+ "0.0": "none",
46
+ "none": "none",
47
+ "off": "none",
48
+ "no": "none",
49
+ "1h": "1h",
50
+ "hour": "1h",
51
+ "halfday": "halfday",
52
+ "half-day": "halfday",
53
+ "day": "day",
54
+ "week": "week",
55
+ "month": "month",
56
+ "year": "year",
57
+ }
58
+ mode = aliases.get(normalized)
59
+ if mode is None:
60
+ valid_values = "1h, halfday, day, week, month, year, none"
61
+ raise argparse.ArgumentTypeError(
62
+ f"Invalid grouping mode '{value}'. Valid values: {valid_values}."
63
+ )
64
+ return mode
65
+
66
+
67
+ def parse_args() -> argparse.Namespace:
68
+ parser = argparse.ArgumentParser(
69
+ description="Convert FoundationalASSIST to CSEDM/OEKT JSON format."
70
+ )
71
+ parser.add_argument(
72
+ "--data-dir",
73
+ type=Path,
74
+ default=DEFAULT_DATA_DIR,
75
+ help="Directory containing Interactions.csv, Problems.csv, Skills.csv.",
76
+ )
77
+ parser.add_argument(
78
+ "--output-dir",
79
+ type=Path,
80
+ default=DEFAULT_OUTPUT_DIR,
81
+ help="Directory to write dataset.json/qmatrix.json/split files.",
82
+ )
83
+ parser.add_argument(
84
+ "--seed",
85
+ type=int,
86
+ default=42,
87
+ help="Random seed used for train/valid/test student split.",
88
+ )
89
+ parser.add_argument(
90
+ "--train-ratio",
91
+ type=float,
92
+ default=0.8,
93
+ help="Fraction of students in train split.",
94
+ )
95
+ parser.add_argument(
96
+ "--valid-ratio",
97
+ type=float,
98
+ default=0.1,
99
+ help="Fraction of students in valid split.",
100
+ )
101
+ parser.add_argument(
102
+ "--test-ratio",
103
+ type=float,
104
+ default=0.1,
105
+ help="Fraction of students in test split.",
106
+ )
107
+ parser.add_argument(
108
+ "--max-interactions",
109
+ type=int,
110
+ default=None,
111
+ help=(
112
+ "Optional cap on number of interaction rows after sorting. "
113
+ "Useful for quick smoke tests."
114
+ ),
115
+ )
116
+ parser.add_argument(
117
+ "--grouping-time",
118
+ type=parse_grouping_mode,
119
+ default="none",
120
+ help=(
121
+ "Calendar grouping mode per student: 1h, halfday, day, week, "
122
+ "month, year, or none."
123
+ ),
124
+ )
125
+ return parser.parse_args()
126
+
127
+
128
+ def _text(v: object) -> str:
129
+ if v is None:
130
+ return ""
131
+ if v is pd.NA:
132
+ return ""
133
+ if isinstance(v, float) and pd.isna(v):
134
+ return ""
135
+ return str(v)
136
+
137
+
138
+ def _as_int(v: object) -> int:
139
+ return int(float(_text(v)))
140
+
141
+
142
+ def _as_float(v: object) -> float:
143
+ return float(_text(v))
144
+
145
+
146
+ def label_answer_options(answer_string: object) -> dict[str, str] | None:
147
+ """Convert pipe-delimited answers to lettered format."""
148
+ answer_text = _text(answer_string).strip()
149
+ if not answer_text:
150
+ return None
151
+
152
+ options = [opt.strip() for opt in answer_text.split("||")]
153
+ letters = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"]
154
+ return {letters[i]: opt for i, opt in enumerate(options) if i < len(letters)}
155
+
156
+
157
+ def clean_html_and_normalize(text: object) -> str:
158
+ """Remove HTML tags and normalize text for reliable comparisons."""
159
+ normalized = _text(text)
160
+ if not normalized:
161
+ return ""
162
+
163
+ normalized = re.sub(r"<[^>]+>", "", normalized)
164
+ normalized = " ".join(normalized.split())
165
+ normalized = re.sub(r"\s*:\s*", ":", normalized)
166
+ return normalized.strip()
167
+
168
+
169
+ def match_student_answer_to_letters(
170
+ student_answer_text: object,
171
+ answer_options_dict: dict[str, str] | None,
172
+ ) -> str:
173
+ """Map student multiple-choice answer text(s) to letter labels."""
174
+ answer_text = _text(student_answer_text)
175
+ if not answer_text or not answer_options_dict:
176
+ return answer_text
177
+
178
+ student_answers = [ans.strip() for ans in answer_text.split(" , ")]
179
+ normalized_options = {
180
+ letter: clean_html_and_normalize(text)
181
+ for letter, text in answer_options_dict.items()
182
+ }
183
+
184
+ matched_letters: list[str] = []
185
+ for student_ans in student_answers:
186
+ normalized_student = clean_html_and_normalize(student_ans)
187
+
188
+ for letter, normalized_option in normalized_options.items():
189
+ if normalized_student == normalized_option:
190
+ matched_letters.append(letter)
191
+ break
192
+ else:
193
+ for letter, normalized_option in normalized_options.items():
194
+ if (
195
+ normalized_student in normalized_option
196
+ or normalized_option in normalized_student
197
+ ):
198
+ matched_letters.append(letter)
199
+ break
200
+
201
+ if matched_letters:
202
+ return ", ".join(sorted(set(matched_letters)))
203
+ return answer_text
204
+
205
+
206
+ def get_correct_option_letters(
207
+ answer_options: dict[str, str] | None,
208
+ correct_answers: object,
209
+ ) -> str:
210
+ """Resolve the correct answer text(s) to option letters for MC items."""
211
+ correct_answer_text = _text(correct_answers).strip()
212
+ if not answer_options or not correct_answer_text:
213
+ return correct_answer_text
214
+
215
+ correct_list = [ans.strip() for ans in correct_answer_text.split("||")]
216
+ correct_letters = [
217
+ letter for letter, text in answer_options.items() if text in correct_list
218
+ ]
219
+ return (
220
+ ", ".join(sorted(correct_letters)) if correct_letters else correct_answer_text
221
+ )
222
+
223
+
224
+ def format_answer_options_for_prompt(answer_options: dict[str, str] | None) -> str:
225
+ """Format answer options dictionary for human-readable prompt text."""
226
+ if not answer_options:
227
+ return ""
228
+ return "\n".join([f"{letter}) {text}" for letter, text in answer_options.items()])
229
+
230
+
231
+ def load_and_preprocess_problems(problems_path: Path) -> pd.DataFrame:
232
+ """Load and preprocess problems with the same answer handling as KT inference."""
233
+ problems_df = pd.read_csv(problems_path, low_memory=False)
234
+ problems_df["problem_id"] = pd.to_numeric(
235
+ problems_df["problem_id"], errors="coerce"
236
+ )
237
+ problems_df = problems_df.dropna(subset=["problem_id"]).copy()
238
+ problems_df["problem_id"] = problems_df["problem_id"].astype(int)
239
+
240
+ problems_df = problems_df.sort_values(["problem_id"]).drop_duplicates(
241
+ subset=["problem_id"], keep="first"
242
+ )
243
+
244
+ problems_df["cleaned body"] = problems_df["Problem Body"].apply(clean_problem_body)
245
+ problems_df["answer_options"] = problems_df["Multiple Choice Options"].apply(
246
+ label_answer_options
247
+ )
248
+
249
+ mc_types = {"Multiple Choice (select 1)", "Multiple Choice (select all)"}
250
+ problems_df["correct_answers"] = problems_df.apply(
251
+ lambda row: (
252
+ get_correct_option_letters(
253
+ row["answer_options"],
254
+ row["Multiple Choice Answers"],
255
+ )
256
+ if _text(row["Problem Type"]).strip() in mc_types
257
+ else _text(row.get("Fill-in Answers", ""))
258
+ ),
259
+ axis=1,
260
+ )
261
+ problems_df["answer_options_formatted"] = problems_df["answer_options"].apply(
262
+ format_answer_options_for_prompt
263
+ )
264
+ return problems_df
265
+
266
+
267
+ def load_skill_tables(
268
+ skills_path: Path,
269
+ skill_set_path: Path,
270
+ ) -> tuple[list[dict], dict[int, list[int]], int]:
271
+ """Load skills and build a problem_id -> skill_ids mapping.
272
+
273
+ Returns:
274
+ skills: OEKT skill list.
275
+ problem_to_skills: Mapping from original problem_id to contiguous skill IDs.
276
+ fallback_skill_id: Skill ID for untagged problems.
277
+ """
278
+ usecols = ["problem_id", "node_code", "node_name"]
279
+ skills_df = pd.read_csv(skills_path, usecols=usecols, low_memory=False)
280
+
281
+ skills_df["problem_id"] = pd.to_numeric(skills_df["problem_id"], errors="coerce")
282
+ skills_df = skills_df.dropna(subset=["problem_id"]).copy()
283
+ skills_df["problem_id"] = skills_df["problem_id"].astype(int)
284
+ skills_df["node_code"] = skills_df["node_code"].apply(lambda v: _text(v).strip())
285
+ skills_df["node_name"] = skills_df["node_name"].apply(lambda v: _text(v).strip())
286
+ skills_df = skills_df[skills_df["node_code"] != ""].copy()
287
+
288
+ skill_set_df = pd.read_csv(
289
+ skill_set_path,
290
+ usecols=["index", "skill_code", "full_description"],
291
+ low_memory=False,
292
+ )
293
+ skill_set_df["index"] = pd.to_numeric(skill_set_df["index"], errors="coerce")
294
+ skill_set_df = skill_set_df.dropna(subset=["index"]).copy()
295
+ skill_set_df["index"] = skill_set_df["index"].astype(int)
296
+ skill_set_df["skill_code"] = skill_set_df["skill_code"].apply(
297
+ lambda v: _text(v).strip()
298
+ )
299
+ skill_set_df["full_description"] = skill_set_df["full_description"].apply(
300
+ lambda v: _text(v).strip()
301
+ )
302
+ skill_set_df = skill_set_df[skill_set_df["skill_code"] != ""].copy()
303
+ skill_set_df = (
304
+ skill_set_df.sort_values(["index", "skill_code"])
305
+ .drop_duplicates(subset=["skill_code"], keep="first")
306
+ .copy()
307
+ )
308
+
309
+ node_name_by_code = (
310
+ skills_df.sort_values(["node_code", "node_name"])
311
+ .drop_duplicates(subset=["node_code"], keep="first")
312
+ .set_index("node_code")["node_name"]
313
+ .to_dict()
314
+ )
315
+
316
+ skill_rows: list[tuple[str, int, str, str]] = []
317
+ skill_id_map: dict[str, int] = {}
318
+ for row in skill_set_df.itertuples(index=False):
319
+ node_code = _text(row.skill_code).strip()
320
+ skill_id = _as_int(row.index) - 1
321
+ skill_id_map[node_code] = skill_id
322
+
323
+ node_name = _text(node_name_by_code.get(node_code, "")).strip()
324
+ name = node_name if node_name else node_code
325
+ description = _text(row.full_description).strip()
326
+ if not description:
327
+ print(
328
+ f"Warning: Missing description for skill '{node_code}' in Skill_Set.csv. "
329
+ f"Using default description."
330
+ )
331
+
332
+ description = (
333
+ f"Common Core State StandardS for Mathematics: Skill {node_code}"
334
+ )
335
+
336
+ skill_rows.append((node_code, skill_id, name, description))
337
+
338
+ # max_skill_id = max(skill_id_map.values(), default=-1)
339
+ missing_node_codes = sorted(
340
+ set(skills_df["node_code"].tolist()) - set(skill_id_map)
341
+ )
342
+ # for node_code in missing_node_codes:
343
+ # max_skill_id += 1
344
+ # skill_id_map[node_code] = max_skill_id
345
+
346
+ # node_name = _text(node_name_by_code.get(node_code, "")).strip()
347
+ # name = node_name if node_name else node_code
348
+ # description = (
349
+ # node_name
350
+ # if node_name
351
+ # else f"Common Core State StandardS for Mathematics: Skill {node_code}"
352
+ # )
353
+
354
+ # skill_rows.append((node_code, max_skill_id, name, description))
355
+
356
+ if missing_node_codes:
357
+ raise ValueError(
358
+ f"Error: Found {len(missing_node_codes)} node_code(s) in Skills.csv that are missing from Skill_Set.csv. "
359
+ f"Please ensure all node_code values in Skills.csv have a corresponding skill_code in Skill_Set.csv. "
360
+ f"Missing node_codes: {missing_node_codes}"
361
+ )
362
+
363
+ skills: list[dict] = []
364
+ for _, skill_id, name, description in sorted(skill_rows, key=lambda r: r[0]):
365
+ skills.append(
366
+ {
367
+ "id": skill_id,
368
+ "name": name,
369
+ "description": description,
370
+ "prerequisites": [],
371
+ }
372
+ )
373
+
374
+ fallback_skill_id = max([s["id"] for s in skills], default=-1) + 1
375
+ skills.append(
376
+ {
377
+ "id": fallback_skill_id,
378
+ "name": "UnmappedSkill",
379
+ "description": "Fallback skill for questions without explicit skill tags.",
380
+ "prerequisites": [],
381
+ }
382
+ )
383
+
384
+ problem_to_skills: dict[int, list[int]] = {}
385
+ pairs = skills_df[["problem_id", "node_code"]].drop_duplicates()
386
+ for row in pairs.itertuples(index=False):
387
+ pid = _as_int(row.problem_id)
388
+ sid = skill_id_map[_text(row.node_code).strip()]
389
+ problem_to_skills.setdefault(pid, []).append(sid)
390
+
391
+ for pid, sids in problem_to_skills.items():
392
+ if len(sids) == 0:
393
+ print(f"Warning: Problem {pid} has no valid skill mappings.")
394
+ problem_to_skills[pid] = sorted(set(sids))
395
+
396
+ return skills, problem_to_skills, fallback_skill_id
397
+
398
+
399
+ def build_question_content(problem_row: pd.Series) -> tuple[str, str]:
400
+ """Create question content and canonical correct answer from preprocessed fields."""
401
+ body = _text(problem_row.get("cleaned body", "")).strip()
402
+ problem_type = _text(problem_row.get("Problem Type", "")).strip()
403
+ answer_options_formatted = _text(
404
+ problem_row.get("answer_options_formatted", "")
405
+ ).strip()
406
+ correct_answer = _text(problem_row.get("correct_answers", "")).strip()
407
+
408
+ body_parts: list[str] = []
409
+ if body:
410
+ body_parts.append(body)
411
+ if problem_type:
412
+ body_parts.append(f"Problem Type: {problem_type}")
413
+ if answer_options_formatted:
414
+ body_parts.append(f"Answer Options:\n{answer_options_formatted}")
415
+
416
+ if not body_parts:
417
+ problem_id = problem_row.get("problem_id", "unknown")
418
+ return f"Problem {problem_id}", correct_answer
419
+
420
+ return "\n\n".join(body_parts), correct_answer
421
+
422
+
423
+ def load_questions(
424
+ problems_df: pd.DataFrame,
425
+ problem_to_skills: dict[int, list[int]],
426
+ fallback_skill_id: int,
427
+ ) -> tuple[list[dict], dict[int, str], int]:
428
+ """Build OEKT question objects from preprocessed Problems data."""
429
+
430
+ questions: list[dict] = []
431
+ problem_to_qid: dict[int, str] = {}
432
+ unmapped_questions = 0
433
+
434
+ for row in problems_df.to_dict(orient="records"):
435
+ pid = _as_int(row["problem_id"])
436
+ qid = f"q_{pid}"
437
+ skill_ids = problem_to_skills.get(pid, [])
438
+ if not skill_ids:
439
+ skill_ids = [fallback_skill_id]
440
+ unmapped_questions += 1
441
+ content, correct_answer = build_question_content(pd.Series(row))
442
+ question = {
443
+ "id": qid,
444
+ "content": content,
445
+ "skill_ids": skill_ids,
446
+ "rubrics": [
447
+ {
448
+ "id": f"r_{pid}_0",
449
+ "description": (f"Match the correct answer: {correct_answer}"),
450
+ "skill_ids": skill_ids,
451
+ }
452
+ ],
453
+ }
454
+
455
+ questions.append(question)
456
+ problem_to_qid[pid] = qid
457
+
458
+ return questions, problem_to_qid, unmapped_questions
459
+
460
+
461
+ def load_interactions(
462
+ interactions_path: Path,
463
+ problem_meta_df: pd.DataFrame,
464
+ max_interactions: int | None = None,
465
+ ) -> pd.DataFrame:
466
+ """Load and normalize interaction logs used to build student trajectories."""
467
+ usecols = [
468
+ "id",
469
+ "problem_id",
470
+ "answer_text",
471
+ "discrete_score",
472
+ "end_time",
473
+ "user_id",
474
+ ]
475
+ df = pd.read_csv(interactions_path, usecols=usecols, low_memory=False)
476
+
477
+ df["problem_id"] = pd.to_numeric(df["problem_id"], errors="coerce")
478
+ df["discrete_score"] = pd.to_numeric(df["discrete_score"], errors="coerce")
479
+ df["id"] = pd.to_numeric(df["id"], errors="coerce")
480
+ df["end_time"] = pd.to_datetime(df["end_time"], errors="coerce", utc=True)
481
+
482
+ df = df.dropna(subset=["user_id", "problem_id", "discrete_score"]).copy()
483
+ df["user_id"] = df["user_id"].astype(str)
484
+ df["problem_id"] = df["problem_id"].astype(int)
485
+ df["id"] = df["id"].fillna(-1).astype(int)
486
+
487
+ answer_meta = problem_meta_df[
488
+ ["problem_id", "Problem Type", "answer_options"]
489
+ ].copy()
490
+ df = df.merge(answer_meta, on="problem_id", how="left")
491
+
492
+ mc_types = {"Multiple Choice (select 1)", "Multiple Choice (select all)"}
493
+ df["answer_text"] = df.apply(
494
+ lambda row: (
495
+ match_student_answer_to_letters(row["answer_text"], row["answer_options"])
496
+ if _text(row.get("Problem Type", "")).strip() in mc_types
497
+ and isinstance(row.get("answer_options"), dict)
498
+ else _text(row["answer_text"])
499
+ ),
500
+ axis=1,
501
+ )
502
+
503
+ df = df.drop(columns=["Problem Type", "answer_options"])
504
+
505
+ df = df.sort_values(["user_id", "end_time", "id"], kind="mergesort")
506
+ if max_interactions is not None:
507
+ if max_interactions <= 0:
508
+ raise ValueError("--max-interactions must be a positive integer.")
509
+ df = df.head(max_interactions).copy()
510
+ return df
511
+
512
+
513
+ def build_qmatrix(questions: list[dict], num_skills: int) -> list[list[float]]:
514
+ """Build a rubric x skill matrix consistent with question/rubric ordering."""
515
+ qmatrix: list[list[float]] = []
516
+ for question in questions:
517
+ for rubric in question["rubrics"]:
518
+ row = [0.0] * num_skills
519
+ for sid in rubric["skill_ids"]:
520
+ row[int(sid)] = 1.0
521
+ qmatrix.append(row)
522
+ return qmatrix
523
+
524
+
525
+ def split_student_ids(
526
+ student_ids: list[str],
527
+ train_ratio: float,
528
+ valid_ratio: float,
529
+ test_ratio: float,
530
+ seed: int,
531
+ ) -> tuple[list[str], list[str], list[str]]:
532
+ """Create deterministic train/valid/test split lists at the student level."""
533
+ if train_ratio < 0 or valid_ratio < 0 or test_ratio < 0:
534
+ raise ValueError("Split ratios must be non-negative.")
535
+
536
+ total = train_ratio + valid_ratio + test_ratio
537
+ if total <= 0:
538
+ raise ValueError("At least one split ratio must be > 0.")
539
+
540
+ ids = list(student_ids)
541
+ ids.sort()
542
+ rng = random.Random(seed)
543
+ rng.shuffle(ids)
544
+
545
+ train_count = int(len(ids) * (train_ratio / total))
546
+ valid_count = int(len(ids) * (valid_ratio / total))
547
+
548
+ train_ids = ids[:train_count]
549
+ valid_ids = ids[train_count : train_count + valid_count]
550
+ test_ids = ids[train_count + valid_count :]
551
+ return train_ids, valid_ids, test_ids
552
+
553
+
554
+ def get_calendar_group_key(
555
+ end_time: pd.Timestamp | None,
556
+ grouping_mode: GroupingMode,
557
+ missing_idx: int,
558
+ ) -> tuple[object, ...]:
559
+ """Return a stable calendar bucket key for an interaction timestamp."""
560
+ if end_time is None:
561
+ return ("missing", missing_idx)
562
+
563
+ ts = end_time
564
+ if ts.tzinfo is None:
565
+ ts = ts.tz_localize("UTC")
566
+ else:
567
+ ts = ts.tz_convert("UTC")
568
+
569
+ if grouping_mode == "1h":
570
+ return ("1h", ts.year, ts.month, ts.day, ts.hour)
571
+ if grouping_mode == "halfday":
572
+ return ("halfday", ts.year, ts.month, ts.day, 0 if ts.hour < 12 else 1)
573
+ if grouping_mode == "day":
574
+ return ("day", ts.year, ts.month, ts.day)
575
+ if grouping_mode == "week":
576
+ iso = ts.isocalendar()
577
+ return ("week", int(iso.year), int(iso.week))
578
+ if grouping_mode == "month":
579
+ return ("month", ts.year, ts.month)
580
+ if grouping_mode == "year":
581
+ return ("year", ts.year)
582
+
583
+ raise ValueError(f"Unsupported grouping mode: {grouping_mode}")
584
+
585
+
586
+ def write_dataset_json(
587
+ dataset_path: Path,
588
+ skills: list[dict],
589
+ questions: list[dict],
590
+ interactions_df: pd.DataFrame,
591
+ problem_to_qid: dict[int, str],
592
+ grouping_mode: GroupingMode = "none",
593
+ save_unmapped_skills: bool = False,
594
+ ) -> tuple[list[str], int, int, int, int]:
595
+ """Stream-write dataset.json while optionally grouping by calendar buckets."""
596
+ dataset_path.parent.mkdir(parents=True, exist_ok=True)
597
+
598
+ student_ids: list[str] = []
599
+ num_students = 0
600
+ num_time_steps = 0
601
+ num_questions = 0
602
+ skipped_interactions = 0
603
+
604
+ with open(dataset_path, "w", encoding="utf-8") as f:
605
+ f.write("{")
606
+ f.write('"skills":')
607
+ if not save_unmapped_skills:
608
+ saving_skills = (
609
+ skills[:-1]
610
+ if skills and skills[-1]["name"] == "UnmappedSkill"
611
+ else skills
612
+ )
613
+ else:
614
+ saving_skills = skills
615
+ json.dump(saving_skills, f, ensure_ascii=False)
616
+ f.write(',"questions":')
617
+ json.dump(questions, f, ensure_ascii=False)
618
+ f.write(',"students":[')
619
+
620
+ first_student = True
621
+ for user_id, student_df in tqdm(interactions_df.groupby("user_id", sort=False)):
622
+ time_steps: list[dict] = []
623
+ current_group_key: tuple[object, ...] | None = None
624
+
625
+ for row_idx, row in enumerate(student_df.itertuples(index=False)):
626
+ pid = _as_int(row.problem_id)
627
+ qid = problem_to_qid.get(pid)
628
+ if qid is None:
629
+ skipped_interactions += 1
630
+ continue
631
+
632
+ score = 1 if _as_float(row.discrete_score) >= 1.0 else 0
633
+ answer_text = _text(row.answer_text)
634
+ response = {
635
+ "question_id": qid,
636
+ "answer_text": answer_text,
637
+ "rubric_scores": [score],
638
+ }
639
+ num_questions += 1
640
+
641
+ if grouping_mode == "none":
642
+ time_steps.append(
643
+ {
644
+ "t": len(time_steps),
645
+ "responses": [response],
646
+ }
647
+ )
648
+ continue
649
+
650
+ row_end_time_raw = row.end_time
651
+ row_end_time: pd.Timestamp | None = (
652
+ None
653
+ if pd.isna(row_end_time_raw)
654
+ else cast(pd.Timestamp, row_end_time_raw)
655
+ )
656
+
657
+ group_key = get_calendar_group_key(
658
+ end_time=row_end_time,
659
+ grouping_mode=grouping_mode,
660
+ missing_idx=row_idx,
661
+ )
662
+ if time_steps and current_group_key == group_key:
663
+ time_steps[-1]["responses"].append(response)
664
+ continue
665
+
666
+ time_steps.append(
667
+ {
668
+ "t": len(time_steps),
669
+ "responses": [response],
670
+ }
671
+ )
672
+ current_group_key = group_key
673
+
674
+ if not time_steps:
675
+ continue
676
+
677
+ student_obj = {
678
+ "student_id": user_id,
679
+ "time_steps": time_steps,
680
+ }
681
+
682
+ if not first_student:
683
+ f.write(",")
684
+ json.dump(student_obj, f, ensure_ascii=False)
685
+ first_student = False
686
+
687
+ student_ids.append(str(user_id))
688
+ num_students += 1
689
+ num_time_steps += len(time_steps)
690
+
691
+ f.write("]}")
692
+
693
+ return (
694
+ student_ids,
695
+ num_students,
696
+ num_time_steps,
697
+ num_questions,
698
+ skipped_interactions,
699
+ )
700
+
701
+
702
+ def save_json(path: Path, obj: object) -> None:
703
+ path.parent.mkdir(parents=True, exist_ok=True)
704
+ with open(path, "w", encoding="utf-8") as f:
705
+ json.dump(obj, f, indent=2, ensure_ascii=False)
706
+
707
+
708
+ def main() -> None:
709
+ args = parse_args()
710
+
711
+ data_dir = args.data_dir
712
+ output_dir = args.output_dir
713
+
714
+ interactions_path = data_dir / "Interactions.csv"
715
+ problems_path = data_dir / "Problems.csv"
716
+ skill_set_path = data_dir / "Skill_Set.csv"
717
+ skills_path = data_dir / "Skills.csv"
718
+
719
+ for p in [interactions_path, problems_path, skill_set_path, skills_path]:
720
+ if not p.exists():
721
+ raise FileNotFoundError(f"Required input file not found: {p}")
722
+
723
+ print("Loading skills...")
724
+ skills, problem_to_skills, fallback_skill_id = load_skill_tables(
725
+ skills_path=skills_path,
726
+ skill_set_path=skill_set_path,
727
+ )
728
+
729
+ print("Loading and preprocessing problems...")
730
+ problems_df = load_and_preprocess_problems(problems_path)
731
+
732
+ print("Loading questions...")
733
+ questions, problem_to_qid, unmapped_questions = load_questions(
734
+ problems_df=problems_df,
735
+ problem_to_skills=problem_to_skills,
736
+ fallback_skill_id=fallback_skill_id,
737
+ )
738
+
739
+ print("Loading interactions...")
740
+ interactions_df = load_interactions(
741
+ interactions_path,
742
+ problem_meta_df=problems_df,
743
+ max_interactions=args.max_interactions,
744
+ )
745
+
746
+ print("Writing dataset.json...")
747
+ dataset_path = output_dir / "dataset.json"
748
+ (
749
+ student_ids,
750
+ num_students,
751
+ num_time_steps,
752
+ num_questions,
753
+ skipped_interactions,
754
+ ) = write_dataset_json(
755
+ dataset_path=dataset_path,
756
+ skills=skills,
757
+ questions=questions,
758
+ interactions_df=interactions_df,
759
+ problem_to_qid=problem_to_qid,
760
+ grouping_mode=args.grouping_time,
761
+ save_unmapped_skills=(unmapped_questions > 0),
762
+ )
763
+
764
+ print("Building qmatrix.json...")
765
+ num_skills = len(skills) - int(unmapped_questions == 0)
766
+ qmatrix = build_qmatrix(questions, num_skills=num_skills)
767
+ save_json(output_dir / "qmatrix.json", qmatrix)
768
+
769
+ print("Building train/valid/test split files...")
770
+ train_ids, valid_ids, test_ids = split_student_ids(
771
+ student_ids=student_ids,
772
+ train_ratio=args.train_ratio,
773
+ valid_ratio=args.valid_ratio,
774
+ test_ratio=args.test_ratio,
775
+ seed=args.seed,
776
+ )
777
+ save_json(output_dir / "trainset.json", train_ids)
778
+ save_json(output_dir / "validset.json", valid_ids)
779
+ save_json(output_dir / "testset.json", test_ids)
780
+
781
+ total_rubrics = sum(len(q["rubrics"]) for q in questions)
782
+ question_skill_counts = [len(q.get("skill_ids", [])) for q in questions]
783
+ rubric_skill_counts = [
784
+ len(r.get("skill_ids", [])) for q in questions for r in q.get("rubrics", [])
785
+ ]
786
+ avg_skills_per_question = (
787
+ sum(question_skill_counts) / len(question_skill_counts)
788
+ if question_skill_counts
789
+ else 0.0
790
+ )
791
+ avg_skills_per_rubric = (
792
+ sum(rubric_skill_counts) / len(rubric_skill_counts)
793
+ if rubric_skill_counts
794
+ else 0.0
795
+ )
796
+ avg_time_steps_per_student = (
797
+ num_time_steps / num_students if num_students > 0 else 0.0
798
+ )
799
+ avg_questions_per_timestep = (
800
+ num_questions / num_time_steps if num_time_steps > 0 else 0.0
801
+ )
802
+
803
+ print("\n=== Conversion Summary ===")
804
+ print(f"Skills: {num_skills}")
805
+ print(f"Questions: {len(questions)}")
806
+ print(f"Rubrics: {total_rubrics}")
807
+ print(f"Avg skills/question: {avg_skills_per_question:.3f}")
808
+ print(f"Avg skills/rubric: {avg_skills_per_rubric:.3f}")
809
+ print(f"Students: {num_students}")
810
+ print(f"Time steps: {num_time_steps}")
811
+ print(f"Avg timesteps/student: {avg_time_steps_per_student:.3f}")
812
+ print(f"Avg questions/timestep: {avg_questions_per_timestep:.3f}")
813
+ print(f"Grouping mode: {args.grouping_time}")
814
+ print(f"Unmapped questions: {unmapped_questions}")
815
+ print(f"Skipped interactions:{skipped_interactions}")
816
+ print(f"Output directory: {output_dir}")
817
+
818
+
819
+ if __name__ == "__main__":
820
+ main()
Code/qwen3next80bvllm_instruct.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Knowledge Tracing inference with Qwen3-Next-80B-A3B-Instruct model.
3
+
4
+ This is the standard instruction-following model (no thinking blocks).
5
+ Recommended sampling: temperature=0.7, top_p=0.8, top_k=20, min_p=0
6
+
7
+ Usage:
8
+ CUDA_VISIBLE_DEVICES=0,1,2,3 python qwen3next80bvllm_instruct.py \
9
+ --data-dir foundationalktdataset/ \
10
+ --num-gpus 4 \
11
+ --batch-size 10 \
12
+ --cache-dir /data1/ \
13
+ --num-students 500 \
14
+ --bin-size 50 \
15
+ --min-history 50
16
+ """
17
+
18
+ from kt_inference_base import run_inference
19
+
20
+ MODEL_CONFIG = {
21
+ "model_id": "Qwen/Qwen3-Next-80B-A3B-Instruct",
22
+ "gen_configs": {
23
+ "temperature": 0.7,
24
+ "top_p": 0.8,
25
+ "top_k": 20,
26
+ "min_p": 0.0,
27
+ "max_tokens": 32768,
28
+ "repetition_penalty": 1.0,
29
+ },
30
+ "output_prefix": "qwen3next80binstruct",
31
+ "system_prompt_prefix": "", # No prefix - standard instruct model
32
+ }
33
+
34
+ if __name__ == "__main__":
35
+ run_inference(MODEL_CONFIG)
Code/qwen3next80bvllm_thinking.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Knowledge Tracing inference with Qwen3-Next-80B-A3B-Thinking model.
3
+
4
+ This model has native thinking mode - it automatically generates <think>...</think> blocks.
5
+ Recommended sampling: temperature=0.6, top_p=0.95, top_k=20, min_p=0
6
+
7
+ Usage:
8
+ CUDA_VISIBLE_DEVICES=0,1,2,3 python qwen3next80bvllm_thinking.py \
9
+ --data-dir foundationalktdataset/ \
10
+ --num-gpus 4 \
11
+ --batch-size 10 \
12
+ --cache-dir /data1/ \
13
+ --num-students 500 \
14
+ --bin-size 50 \
15
+ --min-history 50
16
+ """
17
+
18
+ from kt_inference_base import run_inference
19
+
20
+ MODEL_CONFIG = {
21
+ "model_id": "Qwen/Qwen3-Next-80B-A3B-Thinking",
22
+ "gen_configs": {
23
+ "temperature": 0.6,
24
+ "top_p": 0.95,
25
+ "top_k": 20,
26
+ "min_p": 0.0,
27
+ "max_tokens": 32768,
28
+ "repetition_penalty": 1.0,
29
+ },
30
+ "output_prefix": "qwen3next80bthinking",
31
+ "system_prompt_prefix": "", # No prefix - model has native thinking
32
+ }
33
+
34
+ if __name__ == "__main__":
35
+ run_inference(MODEL_CONFIG)
Code/scripts.sh ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # !/bin/bash
2
+
3
+ # Student Time Gap Distribution
4
+ python plot_timegap_distribution.py \
5
+ --bin-time 5 \
6
+ --plot-upper-limit-minutes 240
7
+
8
+ # Student Attempt Distribution
9
+ python plot_student_attempt_distribution.py \
10
+ --bin-time 1440
11
+
12
+ python plot_student_attempt_distribution.py \
13
+ --bin-time 10800
14
+
15
+ python plot_student_attempt_distribution.py \
16
+ --bin-time 43200
17
+
18
+ # Total Time Distribution
19
+ python plot_totaltime_distribution.py \
20
+ --bin-time 10800
21
+
22
+ # Preprocess
23
+ python process_to_single_file.py \
24
+ --grouping-time 10800
Data/CASE-Common Core State Standards for Math.json ADDED
The diff for this file is too large to render. See raw diff
 
Data/Interactions.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:671e97d320d0cf9b7e2bd75830d531cfbd95d307a1a9a590531934ad0d3d8ba4
3
+ size 245145636
Data/Math_Standards1.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d28ded26c7394f55525550e32bb96786da2e9a3276ccca8873e80ebcdebab11
3
+ size 1242082
Data/Problems.csv ADDED
The diff for this file is too large to render. See raw diff
 
Data/Skill_Set.csv ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ index,skill_code,full_description
2
+ 1,2.MD.B.5,"Use addition and subtraction within 100 to solve word problems involving lengths that are given in the same units, e.g., by using drawings (such as drawings of rulers) and equations with a symbol for the unknown number to represent the problem."
3
+ 2,3.MD.B.3,"Draw a scaled picture graph and a scaled bar graph to represent a data set with several categories. Solve one- and two-step ""how many more"" and ""how many less"" problems using information presented in scaled bar graphs. For example, draw a bar graph in which each square in the bar graph might represent 5 pets."
4
+ 3,3.MD.C.6,"Measure areas by counting unit squares (square cm, square m, square in, square ft, and improvised units)."
5
+ 4,3.MD.C.7a,"Find the area of a rectangle with whole-number side lengths by tiling it, and show that the area is the same as would be found by multiplying the side lengths."
6
+ 5,3.MD.C.7d,"Recognize area as additive. Find areas of rectilinear figures by decomposing them into non-overlapping rectangles and adding the areas of the non-overlapping parts, applying this technique to solve real world problems."
7
+ 6,3.MD.D.8,"Solve real world and mathematical problems involving perimeters of polygons, including finding the perimeter given the side lengths, finding an unknown side length, and exhibiting rectangles with the same perimeter and different areas or with the same area and different perimeters."
8
+ 7,3.NF.A.2a,Represent a fraction 1/b on a number line diagram by defining the interval from 0 to 1 as the whole and partitioning it into b equal parts. Recognize that each part has size 1/b and that the endpoint of the part based at 0 locates the number 1/b on the number line.
9
+ 8,3.NF.A.2b,Represent a fraction a/b on a number line diagram by marking off a lengths 1/b from 0. Recognize that the resulting interval has size a/b and that its endpoint locates the number a/b on the number line.
10
+ 9,3.NF.A.3b,"Recognize and generate simple equivalent fractions, e.g., 1/2 = 2/4, 4/6 = 2/3). Explain why the fractions are equivalent, e.g., by using a visual fraction model."
11
+ 10,3.NF.A.3d,"Compare two fractions with the same numerator or the same denominator by reasoning about their size. Recognize that comparisons are valid only when the two fractions refer to the same whole. Record the results of comparisons with the symbols >, =, or <, and justify the conclusions, e.g., by using a visual fraction model."
12
+ 11,3.OA.A.1,"Interpret products of whole numbers, e.g., interpret 5 × 7 as the total number of objects in 5 groups of 7 objects each. For example, describe a context in which a total number of objects can be expressed as 5 × 7."
13
+ 12,3.OA.A.4,"Determine the unknown whole number in a multiplication or division equation relating three whole numbers. For example, determine the unknown number that makes the equation true in each of the equations 8 × ? = 48, 5 = _ ÷ 3, 6 × 6 = ?."
14
+ 13,3.OA.B.5,Apply properties of operations as strategies to multiply and divide.
15
+ 14,4.G.A.1,"Draw points, lines, line segments, rays, angles (right, acute, obtuse), and perpendicular and parallel lines. Identify these in two-dimensional figures."
16
+ 15,4.G.A.2,"Classify two-dimensional figures based on the presence or absence of parallel or perpendicular lines, or the presence or absence of angles of a specified size. Recognize right triangles as a category, and identify right triangles."
17
+ 16,4.MD.A.1,"Know relative sizes of measurement units within one system of units including km, m, cm; kg, g; lb, oz.; l, ml; hr, min, sec. Within a single system of measurement, express measurements in a larger unit in terms of a smaller unit. Record measurement equivalents in a two column table. For example, know that 1 ft is 12 times as long as 1 in. Express the length of a 4 ft snake as 48 in. Generate a conversion table for feet and inches listing the number pairs (1, 12), (2, 24), (3, 36), …"
18
+ 17,4.MD.A.3,"Apply the area and perimeter formulas for rectangles in real world and mathematical problems. For example, find the width of a rectangular room given the area of the flooring and the length, by viewing the area formula as a multiplication equation with an unknown factor."
19
+ 18,4.NBT.A.1,"Recognize that in a multi-digit whole number, a digit in one place represents ten times what it represents in the place to its right. For example, recognize that 700 ÷ 70 = 10 by applying concepts of place value and division."
20
+ 19,4.NBT.B.6,"Find whole-number quotients and remainders with up to four-digit dividends and one-digit divisors, using strategies based on place value, the properties of operations, and/or the relationship between multiplication and division. Illustrate and explain the calculation by using equations, rectangular arrays, and/or area models."
21
+ 20,4.NF.A.1,"Explain why a fraction a/b is equivalent to a fraction (n × a)/(n × b) by using visual fraction models, with attention to how the number and size of the parts differ even though the two fractions themselves are the same size. Use this principle to recognize and generate equivalent fractions."
22
+ 21,4.NF.A.2,"Compare two fractions with different numerators and different denominators, e.g., by creating common denominators or numerators, or by comparing to a benchmark fraction such as 1/2. Recognize that comparisons are valid only when the two fractions refer to the same whole. Record the results of comparisons with symbols >, =, or <, and justify the conclusions, e.g., by using a visual fraction model."
23
+ 22,4.NF.B.4b,"Understand a multiple of a/b as a multiple of 1/b, and use this understanding to multiply a fraction by a whole number. For example, use a visual fraction model to express 3 × (2/5) as 6 × (1/5), recognizing this product as 6/5. (In general, n × (a/b) = (n × a)/b.)"
24
+ 23,4.NF.C.6,"Use decimal notation for fractions with denominators 10 or 100. For example, rewrite 0.62 as 62/100; describe a length as 0.62 meters; locate 0.62 on a number line diagram."
25
+ 24,4.OA.A.1,"Interpret a multiplication equation as a comparison, e.g., interpret 35 = 5 × 7 as a statement that 35 is 5 times as many as 7 and 7 times as many as 5. Represent verbal statements of multiplicative comparisons as multiplication equations."
26
+ 25,4.OA.B.4,Find all factor pairs for a whole number in the range 1—100. Recognize that a whole number is a multiple of each of its factors. Determine whether a given whole number in the range 1—100 is a multiple of a given one-digit number. Determine whether a given whole number in the range 1—100 is prime or composite.
27
+ 26,5.G.A.1,"Use a pair of perpendicular number lines, called axes, to define a coordinate system, with the intersection of the lines (the origin) arranged to coincide with the 0 on each line and a given point in the plane located by using an ordered pair of numbers, called its coordinates. Understand that the first number indicates how far to travel from the origin in the direction of one axis, and the second number indicates how far to travel in the direction of the second axis, with the convention that the names of the two axes and the coordinates correspond (e.g., x-axis and x-coordinate, y-axis and y-coordinate)."
28
+ 27,5.G.A.2,"Represent real world and mathematical problems by graphing points in the first quadrant of the coordinate plane, and interpret coordinate values of points in the context of the situation."
29
+ 28,5.G.B.4,Classify two-dimensional figures in a hierarchy based on properties.
30
+ 29,5.MD.A.1,"Convert among different-sized standard measurement units within a given measurement system (e.g., convert 5 cm to 0.05 m), and use these conversions in solving multi-step, real world problems."
31
+ 30,5.MD.B.2,"Make a line plot to display a data set of measurements in fractions of a unit (1/2, 1/4, 1/8). Use operations on fractions for this grade to solve problems involving information presented in line plots. For example, given different measurements of liquid in identical beakers, find the amount of liquid each beaker would contain if the total amount in all the beakers were redistributed equally."
32
+ 31,5.MD.C.3b,A solid figure which can be packed without gaps or overlaps using n unit cubes is said to have a volume of n cubic units.
33
+ 32,5.MD.C.4,"Measure volumes by counting unit cubes, using cubic cm, cubic in, cubic ft, and improvised units."
34
+ 33,5.MD.C.5a,"Find the volume of a right rectangular prism with whole-number side lengths by packing it with unit cubes, and show that the volume is the same as would be found by multiplying the edge lengths, equivalently by multiplying the height by the area of the base. Represent threefold whole-number products as volumes, e.g., to represent the associative property of multiplication."
35
+ 34,5.MD.C.5b,Apply the formulas V = l × w × h and V = b × h for rectangular prisms to find volumes of right rectangular prisms with whole-number edge lengths in the context of solving real world and mathematical problems.
36
+ 35,5.NBT.A.1,"Recognize that in a multi-digit number, a digit in one place represents 10 times as much as it represents in the place to its right and 1/10 of what it represents in the place to its left."
37
+ 36,5.NBT.A.2,"Explain patterns in the number of zeros of the product when multiplying a number by powers of 10, and explain patterns in the placement of the decimal point when a decimal is multiplied or divided by a power of 10. Use whole-number exponents to denote powers of 10."
38
+ 37,5.NBT.A.3b,"Compare two decimals to thousandths based on meanings of the digits in each place, using >, =, and < symbols to record the results of comparisons."
39
+ 38,5.NBT.B.5,Fluently multiply multi-digit whole numbers using the standard algorithm.
40
+ 39,5.NBT.B.6,"Find whole-number quotients of whole numbers with up to four-digit dividends and two-digit divisors, using strategies based on place value, the properties of operations, and/or the relationship between multiplication and division. Illustrate and explain the calculation by using equations, rectangular arrays, and/or area models."
41
+ 40,5.NBT.B.7,"Add, subtract, multiply, and divide decimals to hundredths, using concrete models or drawings and strategies based on place value, properties of operations, and/or the relationship between addition and subtraction; relate the strategy to a written method and explain the reasoning used."
42
+ 41,5.NF.A.1,"Add and subtract fractions with unlike denominators (including mixed numbers) by replacing given fractions with equivalent fractions in such a way as to produce an equivalent sum or difference of fractions with like denominators. For example, 2/3 + 5/4 = 8/12 + 15/12 = 23/12. (In general, a/b + c/d = (ad + bc)/bd.)"
43
+ 42,5.NF.A.2,"Solve word problems involving addition and subtraction of fractions referring to the same whole, including cases of unlike denominators, e.g., by using visual fraction models or equations to represent the problem. Use benchmark fractions and number sense of fractions to estimate mentally and assess the reasonableness of answers. For example, recognize an incorrect result 2/5 + 1/2 = 3/7, by observing that 3/7"
44
+ 43,5.NF.B.3,"Interpret a fraction as division of the numerator by the denominator (a/b = a ÷ b). Solve word problems involving division of whole numbers leading to answers in the form of fractions or mixed numbers, e.g., by using visual fraction models or equations to represent the problem. For example, interpret 3/4 as the result of dividing 3 by 4, noting that 3/4 multiplied by 4 equals 3, and that when 3 wholes are shared equally among 4 people each person has a share of size 3/4. If 9 people want to share a 50-pound sack of rice equally by weight, how many pounds of rice should each person get? Between what two whole numbers does your answer lie?"
45
+ 44,5.NF.B.4a,"Interpret the product (a/b) × q as a parts of a partition of q into b equal parts; equivalently, as the result of a sequence of operations a × q ÷ b. For example, use a visual fraction model to show (2/3) × 4 = 8/3, and create a story context for this equation. Do the same with (2/3) × (4/5) = 8/15. (In general, (a/b) × (c/d) = ac/bd.)"
46
+ 45,5.NF.B.4b,"Find the area of a rectangle with fractional side lengths by tiling it with unit squares of the appropriate unit fraction side lengths, and show that the area is the same as would be found by multiplying the side lengths. Multiply fractional side lengths to find areas of rectangles, and represent fraction products as rectangular areas."
47
+ 46,5.NF.B.5a,"Comparing the size of a product to the size of one factor on the basis of the size of the other factor, without performing the indicated multiplication."
48
+ 47,5.NF.B.5b,Explaining why multiplying a given number by a fraction greater than 1 results in a product greater than the given number (recognizing multiplication by whole numbers greater than 1 as a familiar case); explaining why multiplying a given number by a fraction less than 1 results in a product smaller than the given number; and relating the principle of fraction equivalence a/b = (n×a)/(n×b) to the effect of multiplying a/b by 1.
49
+ 48,5.NF.B.6,"Solve real world problems involving multiplication of fractions and mixed numbers, e.g., by using visual fraction models or equations to represent the problem."
50
+ 49,5.NF.B.7a,"Interpret division of a unit fraction by a non-zero whole number, and compute such quotients. For example, create a story context for (1/3) ÷ 4, and use a visual fraction model to show the quotient. Use the relationship between multiplication and division to explain that (1/3) ÷ 4 = 1/12 because (1/12) × 4 = 1/3."
51
+ 50,5.NF.B.7b,"Interpret division of a whole number by a unit fraction, and compute such quotients. For example, create a story context for 4 ÷ (1/5), and use a visual fraction model to show the quotient. Use the relationship between multiplication and division to explain that 4 ÷ (1/5) = 20 because 20 × (1/5) = 4."
52
+ 51,5.NF.B.7c,"Solve real world problems involving division of unit fractions by non-zero whole numbers and division of whole numbers by unit fractions, e.g., by using visual fraction models and equations to represent the problem. For example, how much chocolate will each person get if 3 people share 1/2 lb of chocolate equally? How many 1/3-cup servings are in 2 cups of raisins?"
53
+ 52,5.OA.A.1,"Use parentheses, brackets, or braces in numerical expressions, and evaluate expressions with these symbols."
54
+ 53,5.OA.A.2,"Write simple expressions that record calculations with numbers, and interpret numerical expressions without evaluating them. For example, express the calculation ""add 8 and 7, then multiply by 2"" as 2 × (8 + 7). Recognize that 3 × (18932 + 921) is three times as large as 18932 + 921, without having to calculate the indicated sum or product."
55
+ 54,6.EE.A.1,Write and evaluate numerical expressions involving whole-number exponents.
56
+ 55,6.EE.A.2a,"Write expressions that record operations with numbers and with letters standing for numbers. For example, express the calculation ""Subtract y from 5"" as 5 - y."
57
+ 56,6.EE.A.2b,"Identify parts of an expression using mathematical terms (sum, term, product, factor, quotient, coefficient); view one or more parts of an expression as a single entity. For example, describe the expression 2 (8 + 7) as a product of two factors; view (8 + 7) as both a single entity and a sum of two terms."
58
+ 57,6.EE.A.2c,"Evaluate expressions at specific values of their variables. Include expressions that arise from formulas used in real-world problems. Perform arithmetic operations, including those involving whole-number exponents, in the conventional order when there are no parentheses to specify a particular order (Order of Operations). For example, use the formulas V = s³ and A = 6 s² to find the volume and surface area of a cube with sides of length s = 1/2."
59
+ 58,6.EE.A.3,"Apply the properties of operations to generate equivalent expressions. For example, apply the distributive property to the expression 3 (2 + x) to produce the equivalent expression 6 + 3x; apply the distributive property to the expression 24x + 18y to produce the equivalent expression 6 (4x + 3y); apply properties of operations to y + y + y to produce the equivalent expression 3y."
60
+ 59,6.EE.A.4,"Identify when two expressions are equivalent (i.e., when the two expressions name the same number regardless of which value is substituted into them). For example, the expressions y + y + y and 3y are equivalent because they name the same number regardless of which number y stands for."
61
+ 60,6.EE.B.5,"Understand solving an equation or inequality as a process of answering a question: which values from a specified set, if any, make the equation or inequality true? Use substitution to determine whether a given number in a specified set makes an equation or inequality true."
62
+ 61,6.EE.B.6,"Use variables to represent numbers and write expressions when solving a real-world or mathematical problem; understand that a variable can represent an unknown number, or, depending on the purpose at hand, any number in a specified set."
63
+ 62,6.EE.B.7,"Solve real-world and mathematical problems by writing and solving equations of the form x + p = q and px = q for cases in which p, q and x are all nonnegative rational numbers."
64
+ 63,6.EE.B.8,Write an inequality of the form x > c or x c or x < c have infinitely many solutions; represent solutions of such inequalities on number line diagrams.
65
+ 64,6.EE.C.9,"Use variables to represent two quantities in a real-world problem that change in relationship to one another; write an equation to express one quantity, thought of as the dependent variable, in terms of the other quantity, thought of as the independent variable. Analyze the relationship between the dependent and independent variables using graphs and tables, and relate these to the equation. For example, in a problem involving motion at constant speed, list and graph ordered pairs of distances and times, and write the equation d = 65t to represent the relationship between distance and time."
66
+ 65,6.G.A.1,"Find the area of right triangles, other triangles, special quadrilaterals, and polygons by composing into rectangles or decomposing into triangles and other shapes; apply these techniques in the context of solving real-world and mathematical problems."
67
+ 66,6.G.A.2,"Find the volume of a right rectangular prism with fractional edge lengths by packing it with unit cubes of the appropriate unit fraction edge lengths, and show that the volume is the same as would be found by multiplying the edge lengths of the prism. Apply the formulas V = l w h and V = b h to find volumes of right rectangular prisms with fractional edge lengths in the context of solving real-world and mathematical problems."
68
+ 67,6.G.A.3,Draw polygons in the coordinate plane given coordinates for the vertices; use coordinates to find the length of a side joining points with the same first coordinate or the same second coordinate. Apply these techniques in the context of solving real-world and mathematical problems.
69
+ 68,6.G.A.4,"Represent three-dimensional figures using nets made up of rectangles and triangles, and use the nets to find the surface area of these figures. Apply these techniques in the context of solving real-world and mathematical problems."
70
+ 69,6.NS.A.1,"Interpret and compute quotients of fractions, and solve word problems involving division of fractions by fractions, e.g., by using visual fraction models and equations to represent the problem. For example, create a story context for (2/3) ÷ (3/4) and use a visual fraction model to show the quotient; use the relationship between multiplication and division to explain that (2/3) ÷ (3/4) = 8/9 because 3/4 of 8/9 is 2/3. (In general, (a/b) ÷ (c/d) = ad/bc.) How much chocolate will each person get if 3 people share 1/2 lb of chocolate equally? How many 3/4-cup servings are in 2/3 of a cup of yogurt? How wide is a rectangular strip of land with length 3/4 mi and area 1/2 square mi?"
71
+ 70,6.NS.B.2,Fluently divide multi-digit numbers using the standard algorithm.
72
+ 71,6.NS.B.3,"Fluently add, subtract, multiply, and divide multi-digit decimals using the standard algorithm for each operation."
73
+ 72,6.NS.B.4,"Find the greatest common factor of two whole numbers less than or equal to 100 and the least common multiple of two whole numbers less than or equal to 12. Use the distributive property to express a sum of two whole numbers 1—100 with a common factor as a multiple of a sum of two whole numbers with no common factor. For example, express 36 + 8 as 4 (9 + 2)."
74
+ 73,6.NS.C.5,"Understand that positive and negative numbers are used together to describe quantities having opposite directions or values (e.g., temperature above/below zero, elevation above/below sea level, credits/debits, positive/negative electric charge); use positive and negative numbers to represent quantities in real-world contexts, explaining the meaning of 0 in each situation."
75
+ 74,6.NS.C.6a,"Recognize opposite signs of numbers as indicating locations on opposite sides of 0 on the number line; recognize that the opposite of the opposite of a number is the number itself, e.g., -(-3) = 3, and that 0 is its own opposite."
76
+ 75,6.NS.C.6c,Find and position integers and other rational numbers on a horizontal or vertical number line diagram; find and position pairs of integers and other rational numbers on a coordinate plane.
77
+ 76,6.NS.C.7a,"Interpret statements of inequality as statements about the relative position of two numbers on a number line diagram. For example, interpret -3 > -7 as a statement that -3 is located to the right of -7 on a number line oriented from left to right."
78
+ 77,6.NS.C.7b,"Write, interpret, and explain statements of order for rational numbers in real-world contexts. For example, write -3 °C > -7 °C to express the fact that -3 °C is warmer than -7 °C."
79
+ 78,6.NS.C.7c,"Understand the absolute value of a rational number as its distance from 0 on the number line; interpret absolute value as magnitude for a positive or negative quantity in a real-world situation. For example, for an account balance of -30 dollars, write |-30| = 30 to describe the size of the debt in dollars."
80
+ 79,6.NS.C.7d,"Distinguish comparisons of absolute value from statements about order. For example, recognize that an account balance less than -30 dollars represents a debt greater than 30 dollars."
81
+ 80,6.NS.C.8,Solve real-world and mathematical problems by graphing points in all four quadrants of the coordinate plane. Include use of coordinates and absolute value to find distances between points with the same first coordinate or the same second coordinate.
82
+ 81,6.RP.A.1,"Understand the concept of a ratio and use ratio language to describe a ratio relationship between two quantities. For example, ""The ratio of wings to beaks in the bird house at the zoo was 2:1, because for every 2 wings there was 1 beak."" ""For every vote candidate A received, candidate C received nearly three votes."""
83
+ 82,6.RP.A.2,"Understand the concept of a unit rate a/b associated with a ratio a:b with b ≠ 0, and use rate language in the context of a ratio relationship."
84
+ 83,6.RP.A.3a,"Make tables of equivalent ratios relating quantities with whole number measurements, find missing values in the tables, and plot the pairs of values on the coordinate plane. Use tables to compare ratios."
85
+ 84,6.RP.A.3b,"Solve unit rate problems including those involving unit pricing and constant speed. For example, if it took 7 hours to mow 4 lawns, then at that rate, how many lawns could be mowed in 35 hours? At what rate were lawns being mowed?"
86
+ 85,6.RP.A.3c,"Find a percent of a quantity as a rate per 100 (e.g., 30% of a quantity means 30/100 times the quantity); solve problems involving finding the whole, given a part and the percent."
87
+ 86,6.RP.A.3d,Use ratio reasoning to convert measurement units; manipulate and transform units appropriately when multiplying or dividing quantities.
88
+ 87,6.SP.A.1,"Recognize a statistical question as one that anticipates variability in the data related to the question and accounts for it in the answers. For example, ""How old am I?"" is not a statistical question, but ""How old are the students in my school?"" is a statistical question because one anticipates variability in students' ages."
89
+ 88,6.SP.A.2,"Understand that a set of data collected to answer a statistical question has a distribution which can be described by its center, spread, and overall shape."
90
+ 89,6.SP.A.3,"Recognize that a measure of center for a numerical data set summarizes all of its values with a single number, while a measure of variation describes how its values vary with a single number."
91
+ 90,6.SP.B.4,"Display numerical data in plots on a number line, including dot plots, histograms, and box plots."
92
+ 91,6.SP.B.5a,Reporting the number of observations.
93
+ 92,6.SP.B.5b,"Describing the nature of the attribute under investigation, including how it was measured and its units of measurement."
94
+ 93,6.SP.B.5c,"Giving quantitative measures of center (median and/or mean) and variability (interquartile range and/or mean absolute deviation), as well as describing any overall pattern and any striking deviations from the overall pattern with reference to the context in which the data were gathered."
95
+ 94,6.SP.B.5d,Relating the choice of measures of center and variability to the shape of the data distribution and the context in which the data were gathered.
96
+ 95,7.EE.A.1,"Apply properties of operations as strategies to add, subtract, factor, and expand linear expressions with rational coefficients."
97
+ 96,7.EE.A.2,"Understand that rewriting an expression in different forms in a problem context can shed light on the problem and how the quantities in it are related. For example, a + 0.05a = 1.05a means that ""increase by 5%"" is the same as ""multiply by 1.05."""
98
+ 97,7.EE.B.3,"Solve multi-step real-life and mathematical problems posed with positive and negative rational numbers in any form (whole numbers, fractions, and decimals), using tools strategically. Apply properties of operations to calculate with numbers in any form; convert between forms as appropriate; and assess the reasonableness of answers using mental computation and estimation strategies. For example: If a woman making $25 an hour gets a 10% raise, she will make an additional 1/10 of her salary an hour, or $2.50, for a new salary of $27.50. If you want to place a towel bar 9 3/4 inches long in the center of a door that is 27 1/2 inches wide, you will need to place the bar about 9 inches from each edge; this estimate can be used as a check on the exact computation."
99
+ 98,7.EE.B.4a,"Solve word problems leading to equations of the form px + q = r and p(x + q) = r, where p, q, and r are specific rational numbers. Solve equations of these forms fluently. Compare an algebraic solution to an arithmetic solution, identifying the sequence of the operations used in each approach. For example, the perimeter of a rectangle is 54 cm. Its length is 6 cm. What is its width?"
100
+ 99,7.EE.B.4b,"Solve word problems leading to inequalities of the form px + q > r or px + q For example: As a salesperson, you are paid $50 per week plus $3 per sale. This week you want your pay to be at least $100. Write an inequality for the number of sales you need to make, and describe the solutions."
101
+ 100,7.G.A.1,"Solve problems involving scale drawings of geometric figures, including computing actual lengths and areas from a scale drawing and reproducing a scale drawing at a different scale."
102
+ 101,7.G.A.2,"Draw (freehand, with ruler and protractor, and with technology) geometric shapes with given conditions. Focus on constructing triangles from three measures of angles or sides, noticing when the conditions determine a unique triangle, more than one triangle, or no triangle."
103
+ 102,7.G.A.3,"Describe the two-dimensional figures that result from slicing three-dimensional figures, as in plane sections of right rectangular prisms and right rectangular pyramids."
104
+ 103,7.G.B.4,Know the formulas for the area and circumference of a circle and use them to solve problems; give an informal derivation of the relationship between the circumference and area of a circle.
105
+ 104,7.G.B.5,"Use facts about supplementary, complementary, vertical, and adjacent angles in a multi-step problem to write and solve simple equations for an unknown angle in a figure."
106
+ 105,7.G.B.6,"Solve real-world and mathematical problems involving area, volume and surface area of two- and three-dimensional objects composed of triangles, quadrilaterals, polygons, cubes, and right prisms."
107
+ 106,7.NS.A.1a,"Describe situations in which opposite quantities combine to make 0. For example, a hydrogen atom has 0 charge because its two constituents are oppositely charged."
108
+ 107,7.NS.A.1b,"Understand p + q as the number located a distance |q| from p, in the positive or negative direction depending on whether q is positive or negative. Show that a number and its opposite have a sum of 0 (are additive inverses). Interpret sums of rational numbers by describing real-world contexts."
109
+ 108,7.NS.A.1c,"Understand subtraction of rational numbers as adding the additive inverse, p - q = p + (-q). Show that the distance between two rational numbers on the number line is the absolute value of their difference, and apply this principle in real-world contexts."
110
+ 109,7.NS.A.1d,Apply properties of operations as strategies to add and subtract rational numbers.
111
+ 110,7.NS.A.2a,"Understand that multiplication is extended from fractions to rational numbers by requiring that operations continue to satisfy the properties of operations, particularly the distributive property, leading to products such as (-1)(-1) = 1 and the rules for multiplying signed numbers. Interpret products of rational numbers by describing real-world contexts."
112
+ 111,7.NS.A.2b,"Understand that integers can be divided, provided that the divisor is not zero, and every quotient of integers (with non-zero divisor) is a rational number. If p and q are integers, then -(p/q) = (-p)/q = p/(-q). Interpret quotients of rational numbers by describing real-world contexts."
113
+ 112,7.NS.A.2c,Apply properties of operations as strategies to multiply and divide rational numbers.
114
+ 113,7.NS.A.2d,Convert a rational number to a decimal using long division; know that the decimal form of a rational number terminates in 0s or eventually repeats.
115
+ 114,7.NS.A.3,Solve real-world and mathematical problems involving the four operations with rational numbers.
116
+ 115,7.RP.A.1,"Compute unit rates associated with ratios of fractions, including ratios of lengths, areas and other quantities measured in like or different units. For example, if a person walks 1/2 mile in each 1/4 hour, compute the unit rate as the complex fraction 1/2/1/4 miles per hour, equivalently 2 miles per hour."
117
+ 116,7.RP.A.2a,"Decide whether two quantities are in a proportional relationship, e.g., by testing for equivalent ratios in a table or graphing on a coordinate plane and observing whether the graph is a straight line through the origin."
118
+ 117,7.RP.A.2b,"Identify the constant of proportionality (unit rate) in tables, graphs, equations, diagrams, and verbal descriptions of proportional relationships."
119
+ 118,7.RP.A.2c,"Represent proportional relationships by equations. For example, if total cost t is proportional to the number n of items purchased at a constant price p, the relationship between the total cost and the number of items can be expressed as t = pn."
120
+ 119,7.RP.A.2d,"Explain what a point (x, y) on the graph of a proportional relationship means in terms of the situation, with special attention to the points (0, 0) and (1, r) where r is the unit rate."
121
+ 120,7.RP.A.3,Use proportional relationships to solve multistep ratio and percent problems.
122
+ 121,7.SP.A.1,Understand that statistics can be used to gain information about a population by examining a sample of the population; generalizations about a population from a sample are valid only if the sample is representative of that population. Understand that random sampling tends to produce representative samples and support valid inferences.
123
+ 122,7.SP.A.2,"Use data from a random sample to draw inferences about a population with an unknown characteristic of interest. Generate multiple samples (or simulated samples) of the same size to gauge the variation in estimates or predictions. For example, estimate the mean word length in a book by randomly sampling words from the book; predict the winner of a school election based on randomly sampled survey data. Gauge how far off the estimate or prediction might be."
124
+ 123,7.SP.B.3,"Informally assess the degree of visual overlap of two numerical data distributions with similar variabilities, measuring the difference between the centers by expressing it as a multiple of a measure of variability. For example, the mean height of players on the basketball team is 10 cm greater than the mean height of players on the soccer team, about twice the variability (mean absolute deviation) on either team; on a dot plot, the separation between the two distributions of heights is noticeable."
125
+ 124,7.SP.B.4,"Use measures of center and measures of variability for numerical data from random samples to draw informal comparative inferences about two populations. For example, decide whether the words in a chapter of a seventh-grade science book are generally longer than the words in a chapter of a fourth-grade science book."
126
+ 125,7.SP.C.5,"Understand that the probability of a chance event is a number between 0 and 1 that expresses the likelihood of the event occurring. Larger numbers indicate greater likelihood. A probability near 0 indicates an unlikely event, a probability around 1/2 indicates an event that is neither unlikely nor likely, and a probability near 1 indicates a likely event."
127
+ 126,7.SP.C.6,"Approximate the probability of a chance event by collecting data on the chance process that produces it and observing its long-run relative frequency, and predict the approximate relative frequency given the probability. For example, when rolling a number cube 600 times, predict that a 3 or 6 would be rolled roughly 200 times, but probably not exactly 200 times."
128
+ 127,7.SP.C.7a,"Develop a uniform probability model by assigning equal probability to all outcomes, and use the model to determine probabilities of events. For example, if a student is selected at random from a class, find the probability that Jane will be selected and the probability that a girl will be selected."
129
+ 128,7.SP.C.7b,"Develop a probability model (which may not be uniform) by observing frequencies in data generated from a chance process. For example, find the approximate probability that a spinning penny will land heads up or that a tossed paper cup will land open-end down. Do the outcomes for the spinning penny appear to be equally likely based on the observed frequencies?"
130
+ 129,7.SP.C.8a,"Understand that, just as with simple events, the probability of a compound event is the fraction of outcomes in the sample space for which the compound event occurs."
131
+ 130,7.SP.C.8b,"Represent sample spaces for compound events using methods such as organized lists, tables and tree diagrams. For an event described in everyday language (e.g., ""rolling double sixes""), identify the outcomes in the sample space which compose the event."
132
+ 131,7.SP.C.8c,"Design and use a simulation to generate frequencies for compound events. For example, use random digits as a simulation tool to approximate the answer to the question: If 40% of donors have type A blood, what is the probability that it will take at least 4 donors to find one with type A blood?"
133
+ 132,8.EE.A.1,"Know and apply the properties of integer exponents to generate equivalent numerical expressions. For example, 3² × 3-5 = 3-3 = 1/3³ = 1/27."
134
+ 133,8.EE.A.2,"Use square root and cube root symbols to represent solutions to equations of the form x² = p and x³ = p, where p is a positive rational number. Evaluate square roots of small perfect squares and cube roots of small perfect cubes. Know that √2 is irrational."
135
+ 134,8.EE.A.3,"Use numbers expressed in the form of a single digit times an integer power of 10 to estimate very large or very small quantities, and to express how many times as much one is than the other. For example, estimate the population of the United States as 3 × 108 and the population of the world as 7 × 109, and determine that the world population is more than 20 times larger."
136
+ 135,8.EE.A.4,"Perform operations with numbers expressed in scientific notation, including problems where both decimal and scientific notation are used. Use scientific notation and choose units of appropriate size for measurements of very large or very small quantities (e.g., use millimeters per year for seafloor spreading). Interpret scientific notation that has been generated by technology."
137
+ 136,8.EE.B.5,"Graph proportional relationships, interpreting the unit rate as the slope of the graph. Compare two different proportional relationships represented in different ways. For example, compare a distance-time graph to a distance-time equation to determine which of two moving objects has greater speed."
138
+ 137,8.EE.B.6,Use similar triangles to explain why the slope m is the same between any two distinct points on a non-vertical line in the coordinate plane; derive the equation y = mx for a line through the origin and the equation y = mx + b for a line intercepting the vertical axis at b.
139
+ 138,8.EE.C.7a,"Give examples of linear equations in one variable with one solution, infinitely many solutions, or no solutions. Show which of these possibilities is the case by successively transforming the given equation into simpler forms, until an equivalent equation of the form x = a, a = a, or a = b results (where a and b are different numbers)."
140
+ 139,8.EE.C.7b,"Solve linear equations with rational number coefficients, including equations whose solutions require expanding expressions using the distributive property and collecting like terms."
141
+ 140,8.EE.C.8a,"Understand that solutions to a system of two linear equations in two variables correspond to points of intersection of their graphs, because points of intersection satisfy both equations simultaneously."
142
+ 141,8.EE.C.8b,"Solve systems of two linear equations in two variables algebraically, and estimate solutions by graphing the equations. Solve simple cases by inspection. For example, 3x + 2y = 5 and 3x + 2y = 6 have no solution because 3x + 2y cannot simultaneously be 5 and 6."
143
+ 142,8.EE.C.8c,"Solve real-world and mathematical problems leading to two linear equations in two variables. For example, given coordinates for two pairs of points, determine whether the line through the first pair of points intersects the line through the second pair."
144
+ 143,8.F.A.1,Understand that a function is a rule that assigns to each input exactly one output. The graph of a function is the set of ordered pairs consisting of an input and the corresponding output.
145
+ 144,8.F.A.2,"Compare properties of two functions each represented in a different way (algebraically, graphically, numerically in tables, or by verbal descriptions). For example, given a linear function represented by a table of values and a linear function represented by an algebraic expression, determine which function has the greater rate of change."
146
+ 145,8.F.A.3,"Interpret the equation y = mx + b as defining a linear function, whose graph is a straight line; give examples of functions that are not linear. For example, the function A = s² giving the area of a square as a function of its side length is not linear because its graph contains the points (1,1), (2,4) and (3,9), which are not on a straight line."
147
+ 146,8.F.B.4,"Construct a function to model a linear relationship between two quantities. Determine the rate of change and initial value of the function from a description of a relationship or from two (x, y) values, including reading these from a table or from a graph. Interpret the rate of change and initial value of a linear function in terms of the situation it models, and in terms of its graph or a table of values."
148
+ 147,8.F.B.5,"Describe qualitatively the functional relationship between two quantities by analyzing a graph (e.g., where the function is increasing or decreasing, linear or nonlinear). Sketch a graph that exhibits the qualitative features of a function that has been described verbally."
149
+ 148,8.G.A.1a,"Lines are taken to lines, and line segments to line segments of the same length."
150
+ 149,8.G.A.1b,Angles are taken to angles of the same measure.
151
+ 150,8.G.A.1c,Parallel lines are taken to parallel lines.
152
+ 151,8.G.A.2,"Understand that a two-dimensional figure is congruent to another if the second can be obtained from the first by a sequence of rotations, reflections, and translations; given two congruent figures, describe a sequence that exhibits the congruence between them."
153
+ 152,8.G.A.3,"Describe the effect of dilations, translations, rotations, and reflections on two-dimensional figures using coordinates."
154
+ 153,8.G.A.4,"Understand that a two-dimensional figure is similar to another if the second can be obtained from the first by a sequence of rotations, reflections, translations, and dilations; given two similar two-dimensional figures, describe a sequence that exhibits the similarity between them."
155
+ 154,8.G.A.5,"Use informal arguments to establish facts about the angle sum and exterior angle of triangles, about the angles created when parallel lines are cut by a transversal, and the angle-angle criterion for similarity of triangles. For example, arrange three copies of the same triangle so that the sum of the three angles appears to form a line, and give an argument in terms of transversals why this is so."
156
+ 155,8.G.B.6,Explain a proof of the Pythagorean Theorem and its converse.
157
+ 156,8.G.B.7,Apply the Pythagorean Theorem to determine unknown side lengths in right triangles in real-world and mathematical problems in two and three dimensions.
158
+ 157,8.G.B.8,Apply the Pythagorean Theorem to find the distance between two points in a coordinate system.
159
+ 158,8.G.C.9,"Know the formulas for the volumes of cones, cylinders, and spheres and use them to solve real-world and mathematical problems."
160
+ 159,8.NS.A.1,"Know that numbers that are not rational are called irrational. Understand informally that every number has a decimal expansion; for rational numbers show that the decimal expansion repeats eventually, and convert a decimal expansion which repeats eventually into a rational number."
161
+ 160,8.NS.A.2,"Use rational approximations of irrational numbers to compare the size of irrational numbers, locate them approximately on a number line diagram, and estimate the value of expressions (e.g., π²). For example, by truncating the decimal expansion of √2, show that √2 is between 1 and 2, then between 1.4 and 1.5, and explain how to continue on to get better approximations."
162
+ 161,8.SP.A.1,"Construct and interpret scatter plots for bivariate measurement data to investigate patterns of association between two quantities. Describe patterns such as clustering, outliers, positive or negative association, linear association, and nonlinear association."
163
+ 162,8.SP.A.2,"Know that straight lines are widely used to model relationships between two quantitative variables. For scatter plots that suggest a linear association, informally fit a straight line, and informally assess the model fit by judging the closeness of the data points to the line."
164
+ 163,8.SP.A.3,"Use the equation of a linear model to solve problems in the context of bivariate measurement data, interpreting the slope and intercept. For example, in a linear model for a biology experiment, interpret a slope of 1.5 cm/hr as meaning that an additional hour of sunlight each day is associated with an additional 1.5 cm in mature plant height."
165
+ 164,8.SP.A.4,"Understand that patterns of association can also be seen in bivariate categorical data by displaying frequencies and relative frequencies in a two-way table. Construct and interpret a two-way table summarizing data on two categorical variables collected from the same subjects. Use relative frequencies calculated for rows or columns to describe possible association between the two variables. For example, collect data from students in your class on whether or not they have a curfew on school nights and whether or not they have assigned chores at home. Is there evidence that those who have a curfew also tend to have chores?"
Data/Skills.csv ADDED
The diff for this file is too large to render. See raw diff
 
README.md ADDED
@@ -0,0 +1,246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: cc-by-nc-4.0
3
+
4
+ extra_gated_prompt: "You agree to our [Responsible Use Guidelines](https://www.etrialstestbed.org/mathnet57963-guidelines)."
5
+
6
+ extra_gated_fields:
7
+ First and Last Name: text
8
+ Affiliation(university, company, etc): text
9
+ Country: country
10
+ Why are you asking to use this dataset?: text
11
+ How are you going to use this dataset?: text
12
+ How will you store and secure this data?: text
13
+ Do you have a university-affiliated email we could use to verify your request? If so, please enter it, if not, please explain why: text
14
+
15
+ I agree to use this dataset for non-commercial use ONLY: checkbox
16
+ While we think it's impossible for you to identify a student from these answers, you need to agree to not try to do so, and you also need to inform us if you find any PII in any of the images with the filenames: checkbox
17
+ Check box - I agree that this data will be stored on secured institutional systems, will not be shared with unauthorized parties and will deleted or returned to ASSISTments when my research is complete: checkbox
18
+
19
+ configs:
20
+ - config_name: Foundational ASSIST Dataset
21
+ data_files: Data/Problems.csv
22
+ - config_name: Interactions
23
+ data_files: Data/Interactions.csv
24
+ - config_name: Skills
25
+ data_files: Data/Skills.csv
26
+
27
+ ---
28
+
29
+ **IMPORTANT UPDATE ON 3/19:** Due to an issue with git LFS 700k rows in interactions.csv were missing. Please redownload the dataset to use these rows.
30
+
31
+ # Overview of Foundational ASSIST
32
+ Foundational ASSIST is a dataset containing all natural text of problems and student answers as recorded by ASSISTments. The problems are from from Illustrative Mathematics 6th - 8th grade math curriculum, a common core aligned curriculum popular in the United States.
33
+
34
+ The data is in the "Data" folder. The code used to generate results in the original paper by Worden et al. is in the Code folder, and the results (in case you do not wish to reinference) are in the "Results" folder.
35
+
36
+ The dataset is comprised of three files:
37
+ 1. **Interactions**, which contains students attempts of problems,
38
+ 2. **Problems**, which includes information relevant to problems, and
39
+ 3. **Skills**, which links Problems to skills.
40
+ The dataset was curated to include 5,000 unique students who have each completed between 211-421 problems in ASSISTments. The dataset includes 1.7 million instances of students solving problems, complete with the answer text, problem text, distractor text, and more.
41
+
42
+ # Interactions File
43
+ Interactions consists of 1,722,169 unique instances of students solving problems. The information provided includes a) problem_id, linking to Problems b) hint_count, the number of hints the student requested c) answer_text, the exact text of their first answer d) saw_answer, a boolean indicating whether the student requested to see the correct answer e) discrete_score, 0 the students score f) end_time, the time at which the student put in the correct answer to the problem g) user_xid, a unique identifier for each student.
44
+
45
+ Note that ASSISTments provides a discrete_score of 1 only if the student gets the problem correct on their first attempt, without requesting any support. If the student requests a hint, requests the answer, or has multiple tries they receive a 0. Accordingly, a student's answer can be the correct answer, but they can receive a 0 if the student requests a hint or sees the answer before entering the answer. This is shown in “table 7 cognitive accuracy when answers are incorrect” in our paper.
46
+
47
+ # Problems File
48
+ This file consists of information about 3,395 unique problems. The columns include
49
+ 1. Problem Set Id, which links problems that follow one another
50
+ 2. Problem Part, which indicates where in the Problem Set the problem occurs
51
+ 3. Problem Type, describing the type of problem
52
+ 4. Answer Type, describing the type of answer (see below table or more information)
53
+ 5. Problem Body, the problem text with HTML or markup illustrating exactly what the student saw (code to convert to natural language is available on the github)
54
+ 6. Fill-in Options
55
+ 7. Fill-in Answers
56
+ 8. Multiple Choice Options
57
+ 9. Multiple Choice Answers
58
+ 10. problem_id
59
+
60
+ Problem Set example: PSB6N4 consists of three problems. The first is problem_id 151389 (as it has ‘Problem Part’ = 1, the second is problem_id 151533, and the third/last is 151647.
61
+
62
+
63
+ <table>
64
+ <tr>
65
+ <td>Answer Type
66
+ </td>
67
+ <td>Description
68
+ </td>
69
+ <td>Fill-in Options
70
+ </td>
71
+ <td>Fill-in Answers
72
+ </td>
73
+ <td>Multiple Choice Options
74
+ </td>
75
+ <td>Multiple Choice Answers
76
+ </td>
77
+ </tr>
78
+ <tr>
79
+ <td>Numeric
80
+ </td>
81
+ <td>The student must type in the correct number
82
+ </td>
83
+ <td>The correct answer. If there are multiple correct answers they are separated by a “,”.
84
+ </td>
85
+ <td>The correct answer. If there are multiple correct answers they are separated by a “,”.
86
+ </td>
87
+ <td>n/a
88
+ </td>
89
+ <td>n/a
90
+ </td>
91
+ </tr>
92
+ <tr>
93
+ <td>Drop Down
94
+ </td>
95
+ <td>The student must select the correct option from a drop-down menu. These are similar to multiple choice
96
+ </td>
97
+ <td>All the drop down options, separated by “&lt;/p>,”
98
+ </td>
99
+ <td>The correct dropdown option.
100
+ </td>
101
+ <td>n/a
102
+ </td>
103
+ <td>n/a
104
+ </td>
105
+ </tr>
106
+ <tr>
107
+ <td>Algebraic Expression
108
+ </td>
109
+ <td>The student must type in the correct, short, algebraic expression, or a similar equivalent expression. E.g. if the answer is a^2+b^2 then b^2+a^2 would also be correct.
110
+ </td>
111
+ <td>The correct answer. If there are multiple correct answers they are separated by a “,”.
112
+ </td>
113
+ <td>The correct answer. If there are multiple correct answers they are separated by a “,”.
114
+ </td>
115
+ <td>n/a
116
+ </td>
117
+ <td>n/a
118
+ </td>
119
+ </tr>
120
+ <tr>
121
+ <td>Ordering
122
+ </td>
123
+ <td>The student must order some values in some order. Note the initial order of how these are presented to students is randomized.
124
+ </td>
125
+ <td>The correct order of objects, separated by “,”.
126
+ </td>
127
+ <td>The correct order of objects, separated by “,”.
128
+ </td>
129
+ <td>n/a
130
+ </td>
131
+ <td>n/a
132
+ </td>
133
+ </tr>
134
+ <tr>
135
+ <td>Exact Match
136
+ </td>
137
+ <td>The student must type in exactly the correct answer. This could be a number, expression, point, list, etc.
138
+ </td>
139
+ <td>The correct answer. If there are multiple correct answers they are separated by a “,”. Note that some answers, e.g. lists, require the whole text (1,2,3) and there is only a single answer.
140
+ </td>
141
+ <td>The correct answer. If there are multiple correct answers they are separated by a “,”. Note that some answers, e.g. lists, require the whole text (1,2,3) and there is only a single answer.
142
+ </td>
143
+ <td>n/a
144
+ </td>
145
+ <td>n/a
146
+ </td>
147
+ </tr>
148
+ <tr>
149
+ <td>Exact Fraction
150
+ </td>
151
+ <td>The student must type in exactly the correct fraction.
152
+ </td>
153
+ <td>The correct answer. If there are multiple correct answers they are separated by a “,”.
154
+ </td>
155
+ <td>The correct answer. If there are multiple correct answers they are separated by a “,”.
156
+ </td>
157
+ <td>n/a
158
+ </td>
159
+ <td>n/a
160
+ </td>
161
+ </tr>
162
+ <tr>
163
+ <td>Numeric Expression
164
+ </td>
165
+ <td>The student must type in a numeric expression. Note that simplification occurs, e.g. if the answer is 11^3, 1331 is also considered correct.
166
+ </td>
167
+ <td>The correct answer. If there are multiple correct answers they are separated by a “,”.
168
+ </td>
169
+ <td>The correct answer. If there are multiple correct answers they are separated by a “,”.
170
+ </td>
171
+ <td>n/a
172
+ </td>
173
+ <td>n/a
174
+ </td>
175
+ </tr>
176
+ <tr>
177
+ <td>Multiple Choice
178
+ </td>
179
+ <td>The student must select the correct option.
180
+ </td>
181
+ <td>n/a
182
+ </td>
183
+ <td>n/a
184
+ </td>
185
+ <td>A list of options, separated by ‘||’.
186
+ </td>
187
+ <td>The correct option.
188
+ </td>
189
+ </tr>
190
+ <tr>
191
+ <td>Check all that apply
192
+ </td>
193
+ <td>The student must select all correct option(s).
194
+ </td>
195
+ <td>n/a
196
+ </td>
197
+ <td>n/a
198
+ </td>
199
+ <td>A list of options, separated by ‘||’.
200
+ </td>
201
+ <td>The correct option(s), separated by ‘||’.
202
+ </td>
203
+ </tr>
204
+ </table>
205
+
206
+ # Skills File
207
+ The skills file consists of
208
+ 1) problem_id, linking to problems in the Problems file
209
+ 2) skill_id, a unique identifier per skill
210
+ 3) node_code, which identifies the ASSISTments Skill tag Illustrative Math code for the skill and
211
+ 4) node_name, a description of the skill. In total there are 224 unique skills.
212
+
213
+ # Data Source
214
+ All data across each file are from [ASSISTments](https://new.assistments.org/), where students complete in-class work as well as homework and receive support and feedback from the platform. This work was done in conjunction with [Dr. Heffernan’s lab at WPI](https://www.neilheffernan.net/home). To ensure student privacy, our team attempted to remove all Personal Identifiable Information (PII). However, it is possible students could type PII into fill-in problems, which we aimed to detect and remove, but short of a manual review of 1.7 million interaction logs becomes infeasible. Accordingly, we ask that if people using this dataset come across PII to please contact us at [etrials@assistments.org](etrials@assistments.org) so it can be removed.
215
+
216
+ # License and Sharing Agreement
217
+ This dataset is licensed under CC-BY-NC-4.0. We require that this dataset is used for research and educational purposes following this
218
+ [Responsible Use Guidelines](https://www.etrialstestbed.org/mathnet57963-guidelines).
219
+
220
+ # Citation
221
+
222
+ If you use the **FoundationalASSIST** dataset in your research, please cite the following paper:
223
+
224
+ > Worden, E., Heffernan, C., Heffernan, N., & Sonkar, S. (2026). FoundationalASSIST: An Educational Dataset for Foundational Knowledge Tracing and Pedagogical Grounding of LLMs. *arXiv preprint arXiv:2602.00070*.
225
+
226
+ ### BibTeX
227
+
228
+ ```bibtex
229
+ @article{worden2026foundationalassist,
230
+ title={FoundationalASSIST: An Educational Dataset for Foundational Knowledge Tracing and Pedagogical Grounding of LLMs},
231
+ author={Worden, Eamon and Heffernan, Cristina and Heffernan, Neil and Sonkar, Shashank},
232
+ journal={arXiv preprint arXiv:2602.00070},
233
+ year={2026}
234
+ }
235
+ ```
236
+
237
+ ### FAQs
238
+ Q: Where is the code that cleans the problem text?
239
+ A: This repository, in Code/clean_utils.py or cleantext.py, both are similar.
240
+
241
+ Q: How does discrete_score work?
242
+ A: Discrete score is 1 if the student solved the problem on their first try without requesting a hint (hint_count) or an explanation/seeing the answer (saw_answer).
243
+ Note this is recorded by ASSISTments. There are 433 rows where discrete_score = 1 despite saw_answer = True or hint_count > 0. In these instances, the student correctly solved the problem then requested hints/explanation/the answer anyways.
244
+
245
+ Q: Does the dataset track multiple attempts? (E.g. the student first incorrectly said 3, then incorrectly answered 6, then correctly answered 9)
246
+ A: No. We recognize it may be valuable to have this data. However, currently, answer_text (in Interactions.csv) is the first answer (right or wrong) the student submitted for the problem. Second/future attempts are not included in this dataset, but stay tuned.
Results/Problems_duplicated_problem_id.csv ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Problem Set Id,Problem Part,Problem Type,Answer Types,Problem Body,Fill-in Options,Fill-in Answers,Multiple Choice Options,Multiple Choice Answers,problem_id,duplicate_group_id,duplicate_problem_id_count,distinct_problem_body_count
2
+ PSBBP4J,2,Fill-in-the-blank(s),Numeric,"<p>Find the area of the triangle.</p>
3
+ <p>&nbsp;</p>
4
+ <p><ast-r type=""text"" marker=""1""></ast-r> square units.</p>
5
+ <p>&nbsp;</p>
6
+ <p><img src=""//resources.assistments.org/fetch/C/dd6e2450-6c1c-4383-af11-f5f38d8a554f.jpeg"" alt=""A triangle on a grid."" width=""400"" height=""148""></p>",11,11,,,242640,1,2,1
7
+ PSBBP4J,2,Fill-in-the-blank(s),Numeric,"<p>Find the area of the triangle.</p>
8
+ <p>&nbsp;</p>
9
+ <p><ast-r type=""text"" marker=""1""></ast-r> square units.</p>
10
+ <p>&nbsp;</p>
11
+ <p><img src=""//resources.assistments.org/fetch/C/dd6e2450-6c1c-4383-af11-f5f38d8a554f.jpeg"" alt=""A triangle on a grid."" width=""400"" height=""148""></p>",11,11,,,242640,1,2,1
12
+ PSBBPYN,1,Fill-in-the-blank(s),Numeric,"<p>Find the area of this trapezoid.</p>
13
+ <p>&nbsp;</p>
14
+ <p><img src=""//resources.assistments.org/fetch/C/de588b0b-4edc-4a7d-a1c9-ae9534107846.jpeg"" alt=""Trapezoid, bases 8 and 4 units. Height 3 units."" width=""287"" height=""158""></p>
15
+ <p>&nbsp;</p>
16
+ <p><ast-r type=""text"" marker=""1""></ast-r>square units</p>",18,18,,,242072,2,2,1
17
+ PSBBPYN,1,Fill-in-the-blank(s),Numeric,"<p>Find the area of this trapezoid.</p>
18
+ <p>&nbsp;</p>
19
+ <p><img src=""//resources.assistments.org/fetch/C/de588b0b-4edc-4a7d-a1c9-ae9534107846.jpeg"" alt=""Trapezoid, bases 8 and 4 units. Height 3 units."" width=""287"" height=""158""></p>
20
+ <p>&nbsp;</p>
21
+ <p><ast-r type=""text"" marker=""1""></ast-r>square units</p>",18,18,,,242072,2,2,1
22
+ PSBBUFM,1,Fill-in-the-blank(s),Numeric,"<p>Find the area of the shaded region in square units.</p>
23
+ <p>&nbsp;</p>
24
+ <p><img src=""//resources.assistments.org/fetch/C/ad6cb685-7371-4c50-b2d5-b0417869595c.jpeg"" width=""278"" height=""277""></p>
25
+ <p>&nbsp;</p>
26
+ <p><ast-r type=""text"" marker=""1""></ast-r> square units</p>",40,40,,,266614,3,2,1
27
+ PSBBUFM,1,Fill-in-the-blank(s),Numeric,"<p>Find the area of the shaded region in square units.</p>
28
+ <p>&nbsp;</p>
29
+ <p><img src=""//resources.assistments.org/fetch/C/ad6cb685-7371-4c50-b2d5-b0417869595c.jpeg"" width=""278"" height=""277""></p>
30
+ <p>&nbsp;</p>
31
+ <p><ast-r type=""text"" marker=""1""></ast-r> square units</p>",40,40,,,266614,3,2,1
32
+ PSBCTHE,3,Fill-in-the-blank(s),Numeric,"<p>There are 3 cats in a room and no other creatures. Each cat has 2 ears, 4 paws, and 1 tail.</p>
33
+ <p>&nbsp;</p>
34
+ <p><img src=""//resources.assistments.org/fetch/C/89240f58-bda5-4371-9d5b-35881563e526.jpeg"" width=""357"" height=""143""></p>
35
+ <p>&nbsp;</p>
36
+ <p>&nbsp;</p>
37
+ <p>&nbsp;</p>
38
+ <p>Complete each statement:</p>
39
+ <p>&nbsp;</p>
40
+ <p>There are <ast-r type=""text"" marker=""1""></ast-r> paws for every tail.<br><br></p>
41
+ <p>There are <ast-r type=""text"" marker=""2""></ast-r> paws for every ear.</p>","2, 4","2, 4",,,437233,4,4,1
42
+ PSBCTHE,3,Fill-in-the-blank(s),Numeric,"<p>There are 3 cats in a room and no other creatures. Each cat has 2 ears, 4 paws, and 1 tail.</p>
43
+ <p>&nbsp;</p>
44
+ <p><img src=""//resources.assistments.org/fetch/C/89240f58-bda5-4371-9d5b-35881563e526.jpeg"" width=""357"" height=""143""></p>
45
+ <p>&nbsp;</p>
46
+ <p>&nbsp;</p>
47
+ <p>&nbsp;</p>
48
+ <p>Complete each statement:</p>
49
+ <p>&nbsp;</p>
50
+ <p>There are <ast-r type=""text"" marker=""1""></ast-r> paws for every tail.<br><br></p>
51
+ <p>There are <ast-r type=""text"" marker=""2""></ast-r> paws for every ear.</p>","2, 4","2, 4",,,437233,4,4,1
52
+ PSBCTHE,3,Fill-in-the-blank(s),Numeric,"<p>There are 3 cats in a room and no other creatures. Each cat has 2 ears, 4 paws, and 1 tail.</p>
53
+ <p>&nbsp;</p>
54
+ <p><img src=""//resources.assistments.org/fetch/C/89240f58-bda5-4371-9d5b-35881563e526.jpeg"" width=""357"" height=""143""></p>
55
+ <p>&nbsp;</p>
56
+ <p>&nbsp;</p>
57
+ <p>&nbsp;</p>
58
+ <p>Complete each statement:</p>
59
+ <p>&nbsp;</p>
60
+ <p>There are <ast-r type=""text"" marker=""1""></ast-r> paws for every tail.<br><br></p>
61
+ <p>There are <ast-r type=""text"" marker=""2""></ast-r> paws for every ear.</p>","2, 4","2, 4",,,437233,4,4,1
62
+ PSBCTHE,3,Fill-in-the-blank(s),Numeric,"<p>There are 3 cats in a room and no other creatures. Each cat has 2 ears, 4 paws, and 1 tail.</p>
63
+ <p>&nbsp;</p>
64
+ <p><img src=""//resources.assistments.org/fetch/C/89240f58-bda5-4371-9d5b-35881563e526.jpeg"" width=""357"" height=""143""></p>
65
+ <p>&nbsp;</p>
66
+ <p>&nbsp;</p>
67
+ <p>&nbsp;</p>
68
+ <p>Complete each statement:</p>
69
+ <p>&nbsp;</p>
70
+ <p>There are <ast-r type=""text"" marker=""1""></ast-r> paws for every tail.<br><br></p>
71
+ <p>There are <ast-r type=""text"" marker=""2""></ast-r> paws for every ear.</p>","4, 2","4, 2",,,437233,4,4,1
72
+ PRABFEFN,1,Fill-in-the-blank(s),Numeric,"<p>A square has a side length of 5 feet. What is its area?</p>
73
+ <p>&nbsp;</p>
74
+ <p><ast-r type=""text"" marker=""1""></ast-r> square feet</p>",25,25,,,89104,5,4,1
75
+ PRABFEFN,1,Fill-in-the-blank(s),Numeric,"<p>A square has a side length of 5 feet. What is its area?</p>
76
+ <p>&nbsp;</p>
77
+ <p><ast-r type=""text"" marker=""1""></ast-r> square feet</p>",25,25,,,89104,5,4,1
78
+ PRABFEFN,1,Fill-in-the-blank(s),Numeric,"<p>A square has a side length of 5 feet. What is its area?</p>
79
+ <p>&nbsp;</p>
80
+ <p><ast-r type=""text"" marker=""1""></ast-r> square feet</p>",25,25,,,89104,5,4,1
81
+ PRABFEFN,1,Fill-in-the-blank(s),Numeric,"<p>A square has a side length of 5 feet. What is its area?</p>
82
+ <p>&nbsp;</p>
83
+ <p><ast-r type=""text"" marker=""1""></ast-r> square feet</p>",25,25,,,89104,5,4,1
84
+ PRABE64P,1,Fill-in-the-blank(s),Numeric,"<p>A square is 3 inches by 3 inches. What is its area?</p>
85
+ <p>&nbsp;</p>
86
+ <p><ast-r type=""text"" marker=""1""></ast-r> square inches</p>",9,9,,,88850,6,4,1
87
+ PRABE64P,1,Fill-in-the-blank(s),Numeric,"<p>A square is 3 inches by 3 inches. What is its area?</p>
88
+ <p>&nbsp;</p>
89
+ <p><ast-r type=""text"" marker=""1""></ast-r> square inches</p>",9,9,,,88850,6,4,1
90
+ PRABE64P,1,Fill-in-the-blank(s),Numeric,"<p>A square is 3 inches by 3 inches. What is its area?</p>
91
+ <p>&nbsp;</p>
92
+ <p><ast-r type=""text"" marker=""1""></ast-r> square inches</p>",9,9,,,88850,6,4,1
93
+ PRABE64P,1,Fill-in-the-blank(s),Numeric,"<p>A square is 3 inches by 3 inches. What is its area?</p>
94
+ <p>&nbsp;</p>
95
+ <p><ast-r type=""text"" marker=""1""></ast-r> square inches</p>",9,9,,,88850,6,4,1
96
+ PRABFEFP,1,Fill-in-the-blank(s),Numeric,"<p>The area of a square is 36 square centimeters. What is the length of each side of the square?</p>
97
+ <p>&nbsp;</p>
98
+ <p><ast-r type=""text"" marker=""1""></ast-r> centimeters</p>",6,6,,,89321,7,4,1
99
+ PRABFEFP,1,Fill-in-the-blank(s),Numeric,"<p>The area of a square is 36 square centimeters. What is the length of each side of the square?</p>
100
+ <p>&nbsp;</p>
101
+ <p><ast-r type=""text"" marker=""1""></ast-r> centimeters</p>",6,6,,,89321,7,4,1
102
+ PRABFEFP,1,Fill-in-the-blank(s),Numeric,"<p>The area of a square is 36 square centimeters. What is the length of each side of the square?</p>
103
+ <p>&nbsp;</p>
104
+ <p><ast-r type=""text"" marker=""1""></ast-r> centimeters</p>",6,6,,,89321,7,4,1
105
+ PRABFEFP,1,Fill-in-the-blank(s),Numeric,"<p>The area of a square is 36 square centimeters. What is the length of each side of the square?</p>
106
+ <p>&nbsp;</p>
107
+ <p><ast-r type=""text"" marker=""1""></ast-r> centimeters</p>",6,6,,,89321,7,4,1
108
+ PSBTTU,1,Fill-in-the-blank(s),Numeric,"<p>Find the area of this quadrilateral.</p>
109
+ <p>&nbsp;</p>
110
+ <p><img src=""//resources.assistments.org/fetch/C/e2e6b7c7-6b81-42ca-b6df-a45cd250d4dd.jpeg"" alt=""A blue quadrilateral in the shape of a kite.&nbsp; Two smaller sides span across 3 squares. Two longer sides span across 5 squares."" width=""303"" height=""238""></p>
111
+ <p>&nbsp;</p>
112
+ <p><ast-r type=""text"" marker=""1""></ast-r> square units</p>",24,24,,,88316,8,4,1
113
+ PSBTTU,1,Fill-in-the-blank(s),Numeric,"<p>Find the area of this quadrilateral.</p>
114
+ <p>&nbsp;</p>
115
+ <p><img src=""//resources.assistments.org/fetch/C/e2e6b7c7-6b81-42ca-b6df-a45cd250d4dd.jpeg"" alt=""A blue quadrilateral in the shape of a kite.&nbsp; Two smaller sides span across 3 squares. Two longer sides span across 5 squares."" width=""303"" height=""238""></p>
116
+ <p>&nbsp;</p>
117
+ <p><ast-r type=""text"" marker=""1""></ast-r> square units</p>",24,24,,,88316,8,4,1
118
+ PSBTTU,1,Fill-in-the-blank(s),Numeric,"<p>Find the area of this quadrilateral.</p>
119
+ <p>&nbsp;</p>
120
+ <p><img src=""//resources.assistments.org/fetch/C/e2e6b7c7-6b81-42ca-b6df-a45cd250d4dd.jpeg"" alt=""A blue quadrilateral in the shape of a kite.&nbsp; Two smaller sides span across 3 squares. Two longer sides span across 5 squares."" width=""303"" height=""238""></p>
121
+ <p>&nbsp;</p>
122
+ <p><ast-r type=""text"" marker=""1""></ast-r> square units</p>",24,24,,,88316,8,4,1
123
+ PSBTTU,1,Fill-in-the-blank(s),Numeric,"<p>Find the area of this quadrilateral.</p>
124
+ <p>&nbsp;</p>
125
+ <p><img src=""//resources.assistments.org/fetch/C/e2e6b7c7-6b81-42ca-b6df-a45cd250d4dd.jpeg"" alt=""A blue quadrilateral in the shape of a kite.&nbsp; Two smaller sides span across 3 squares. Two longer sides span across 5 squares."" width=""303"" height=""238""></p>
126
+ <p>&nbsp;</p>
127
+ <p><ast-r type=""text"" marker=""1""></ast-r> square units</p>",24,24,,,88316,8,4,1
128
+ PRABE64M,1,Multiple Choice (select all),Check All That Apply,"<p>Here is a diagram that describes the cups of green and white paint in a mixture.</p>
129
+ <p>&nbsp;</p>
130
+ <table style=""height: 46px; width: 439px;"">
131
+ <tbody>
132
+ <tr>
133
+ <td style=""width: 162.599px;"">green paint (cups)</td>
134
+ <td style=""width: 259.401px;""><img src=""/images/assistments/519919.jpg"" alt=""Four squares labeled &quot;green paint (cups)&quot;"" width=""200"" height=""43""></td>
135
+ </tr>
136
+ <tr>
137
+ <td style=""width: 162.599px;"">white paint (cups)</td>
138
+ <td style=""width: 259.401px;""><img src=""/images/assistments/519920.jpg"" alt=""Two squares labeled &quot;white paint (cups)&quot;"" width=""200"" height=""50""></td>
139
+ </tr>
140
+ </tbody>
141
+ </table>
142
+ <p>&nbsp;</p>
143
+ <p>Select&nbsp;<strong>all</strong> the statements that accurately describe this diagram.</p>",,,"The ratio of cups of white paint to cups of green paint is 2 to 4. || For every cup of green paint, there are two cups of white paint. || <p>The ratio of cups of green paint to cups of white paint is 4 : 2.</p> || For every cup of white paint, there are two cups of green paint. || <p>The ratio of cups of green paint to cups of white paint is 2 : 4.</p>","The ratio of cups of white paint to cups of green paint is 2 to 4. || <p>The ratio of cups of green paint to cups of white paint is 4 : 2.</p> || For every cup of white paint, there are two cups of green paint.",88681,9,4,1
144
+ PRABE64M,1,Multiple Choice (select all),Check All That Apply,"<p>Here is a diagram that describes the cups of green and white paint in a mixture.</p>
145
+ <p>&nbsp;</p>
146
+ <table style=""height: 46px; width: 439px;"">
147
+ <tbody>
148
+ <tr>
149
+ <td style=""width: 162.599px;"">green paint (cups)</td>
150
+ <td style=""width: 259.401px;""><img src=""/images/assistments/519919.jpg"" alt=""Four squares labeled &quot;green paint (cups)&quot;"" width=""200"" height=""43""></td>
151
+ </tr>
152
+ <tr>
153
+ <td style=""width: 162.599px;"">white paint (cups)</td>
154
+ <td style=""width: 259.401px;""><img src=""/images/assistments/519920.jpg"" alt=""Two squares labeled &quot;white paint (cups)&quot;"" width=""200"" height=""50""></td>
155
+ </tr>
156
+ </tbody>
157
+ </table>
158
+ <p>&nbsp;</p>
159
+ <p>Select&nbsp;<strong>all</strong> the statements that accurately describe this diagram.</p>",,,"The ratio of cups of white paint to cups of green paint is 2 to 4. || For every cup of green paint, there are two cups of white paint. || <p>The ratio of cups of green paint to cups of white paint is 4 : 2.</p> || For every cup of white paint, there are two cups of green paint. || <p>The ratio of cups of green paint to cups of white paint is 2 : 4.</p>","The ratio of cups of white paint to cups of green paint is 2 to 4. || <p>The ratio of cups of green paint to cups of white paint is 4 : 2.</p> || For every cup of white paint, there are two cups of green paint.",88681,9,4,1
160
+ PRABE64M,1,Multiple Choice (select all),Check All That Apply,"<p>Here is a diagram that describes the cups of green and white paint in a mixture.</p>
161
+ <p>&nbsp;</p>
162
+ <table style=""height: 46px; width: 439px;"">
163
+ <tbody>
164
+ <tr>
165
+ <td style=""width: 162.599px;"">green paint (cups)</td>
166
+ <td style=""width: 259.401px;""><img src=""/images/assistments/519919.jpg"" alt=""Four squares labeled &quot;green paint (cups)&quot;"" width=""200"" height=""43""></td>
167
+ </tr>
168
+ <tr>
169
+ <td style=""width: 162.599px;"">white paint (cups)</td>
170
+ <td style=""width: 259.401px;""><img src=""/images/assistments/519920.jpg"" alt=""Two squares labeled &quot;white paint (cups)&quot;"" width=""200"" height=""50""></td>
171
+ </tr>
172
+ </tbody>
173
+ </table>
174
+ <p>&nbsp;</p>
175
+ <p>Select&nbsp;<strong>all</strong> the statements that accurately describe this diagram.</p>",,,"The ratio of cups of white paint to cups of green paint is 2 to 4. || For every cup of green paint, there are two cups of white paint. || <p>The ratio of cups of green paint to cups of white paint is 4 : 2.</p> || For every cup of white paint, there are two cups of green paint. || <p>The ratio of cups of green paint to cups of white paint is 2 : 4.</p>","The ratio of cups of white paint to cups of green paint is 2 to 4. || <p>The ratio of cups of green paint to cups of white paint is 4 : 2.</p> || For every cup of white paint, there are two cups of green paint.",88681,9,4,1
176
+ PRABE64M,1,Multiple Choice (select all),Check All That Apply,"<p>Here is a diagram that describes the cups of green and white paint in a mixture.</p>
177
+ <p>&nbsp;</p>
178
+ <table style=""height: 46px; width: 439px;"">
179
+ <tbody>
180
+ <tr>
181
+ <td style=""width: 162.599px;"">green paint (cups)</td>
182
+ <td style=""width: 259.401px;""><img src=""/images/assistments/519919.jpg"" alt=""Four squares labeled &quot;green paint (cups)&quot;"" width=""200"" height=""43""></td>
183
+ </tr>
184
+ <tr>
185
+ <td style=""width: 162.599px;"">white paint (cups)</td>
186
+ <td style=""width: 259.401px;""><img src=""/images/assistments/519920.jpg"" alt=""Two squares labeled &quot;white paint (cups)&quot;"" width=""200"" height=""50""></td>
187
+ </tr>
188
+ </tbody>
189
+ </table>
190
+ <p>&nbsp;</p>
191
+ <p>Select&nbsp;<strong>all</strong> the statements that accurately describe this diagram.</p>",,,"The ratio of cups of white paint to cups of green paint is 2 to 4. || For every cup of green paint, there are two cups of white paint. || <p>The ratio of cups of green paint to cups of white paint is 4 : 2.</p> || For every cup of white paint, there are two cups of green paint. || <p>The ratio of cups of green paint to cups of white paint is 2 : 4.</p>","The ratio of cups of white paint to cups of green paint is 2 to 4. || <p>The ratio of cups of green paint to cups of white paint is 4 : 2.</p> || For every cup of white paint, there are two cups of green paint.",88681,9,4,1
192
+ PRABERFN,1,Multiple Choice (select 1),Multiple Choice,"<p>Which scale is equivalent to 1 cm to 1 km?</p>
193
+ <div style=""position: absolute; left: 57px; top: 51px;"">&nbsp;</div>",,,"1 to 1,000 || 10,000 to 1 || 1 to 100,000 || 100,000 to 1 || 1 to 1,000,000","1 to 100,000",146478,10,2,1
194
+ PRABERFN,1,Multiple Choice (select 1),Multiple Choice,"<p>Which scale is equivalent to 1 cm to 1 km?</p>
195
+ <div style=""position: absolute; left: 57px; top: 51px;"">&nbsp;</div>",,,"1 to 1,000 || 10,000 to 1 || 1 to 100,000 || 100,000 to 1 || 1 to 1,000,000","1 to 100,000",146478,10,2,1
196
+ PSB5BQ,1,Multiple Choice (select 1),Multiple Choice,"<p>In one version of trail mix, there are 3 cups of peanuts mixed with 2 cups of raisins. In another version of trail mix, there are 4.5 cups of peanuts mixed with 3 cups of raisins. Are the ratios equivalent for the two mixes?</p>",,,Yes || No,Yes,143888,11,2,1
197
+ PSB5BQ,1,Multiple Choice (select 1),Multiple Choice,"<p>In one version of trail mix, there are 3 cups of peanuts mixed with 2 cups of raisins. In another version of trail mix, there are 4.5 cups of peanuts mixed with 3 cups of raisins. Are the ratios equivalent for the two mixes?</p>",,,Yes || No,Yes,143888,11,2,1
198
+ PRABE55W,1,Multiple Choice (select all),Check All That Apply,<p>Select <strong>all</strong> the polygons.</p>,,,"<p><img src=""//resources.assistments.org/fetch/C/5001ccaa-f02f-4c1b-87b8-492ba85fe5be.jpeg"" alt=""Figure A"" width=""200""></p> || <p><img src=""//resources.assistments.org/fetch/C/b0f7cbc8-accd-4c14-b0f5-b23ae3461ed5.jpeg"" alt=""Figure B"" width=""200""></p> || <p><img src=""//resources.assistments.org/fetch/C/5995031e-500c-46c0-8dcb-26f5cbc80407.jpeg"" alt=""Figure C"" width=""200""></p> || <p><img src=""//resources.assistments.org/fetch/C/0444903e-1fce-4e76-ac8d-35cb8f9a6e9e.jpeg"" alt=""Figure D"" width=""200""></p> || <p><img src=""//resources.assistments.org/fetch/C/8ac8f6fa-3da0-4d32-8800-4f155bc611d7.jpeg"" alt=""Figure E"" width=""200""></p> || <p><img src=""//resources.assistments.org/fetch/C/236cc5d3-5eb9-4f13-aaca-df2a9021603a.jpeg"" alt=""Figure F"" width=""200""></p>","<p><img src=""//resources.assistments.org/fetch/C/5001ccaa-f02f-4c1b-87b8-492ba85fe5be.jpeg"" alt=""Figure A"" width=""200""></p> || <p><img src=""//resources.assistments.org/fetch/C/5995031e-500c-46c0-8dcb-26f5cbc80407.jpeg"" alt=""Figure C"" width=""200""></p>",243115,12,2,1
199
+ PRABE55W,1,Multiple Choice (select all),Check All That Apply,<p>Select <strong>all</strong> the polygons.</p>,,,"<p><img src=""//resources.assistments.org/fetch/C/5001ccaa-f02f-4c1b-87b8-492ba85fe5be.jpeg"" alt=""Figure A"" width=""200""></p> || <p><img src=""//resources.assistments.org/fetch/C/b0f7cbc8-accd-4c14-b0f5-b23ae3461ed5.jpeg"" alt=""Figure B"" width=""200""></p> || <p><img src=""//resources.assistments.org/fetch/C/5995031e-500c-46c0-8dcb-26f5cbc80407.jpeg"" alt=""Figure C"" width=""200""></p> || <p><img src=""//resources.assistments.org/fetch/C/0444903e-1fce-4e76-ac8d-35cb8f9a6e9e.jpeg"" alt=""Figure D"" width=""200""></p> || <p><img src=""//resources.assistments.org/fetch/C/8ac8f6fa-3da0-4d32-8800-4f155bc611d7.jpeg"" alt=""Figure E"" width=""200""></p> || <p><img src=""//resources.assistments.org/fetch/C/236cc5d3-5eb9-4f13-aaca-df2a9021603a.jpeg"" alt=""Figure F"" width=""200""></p>","<p><img src=""//resources.assistments.org/fetch/C/5001ccaa-f02f-4c1b-87b8-492ba85fe5be.jpeg"" alt=""Figure A"" width=""200""></p> || <p><img src=""//resources.assistments.org/fetch/C/5995031e-500c-46c0-8dcb-26f5cbc80407.jpeg"" alt=""Figure C"" width=""200""></p>",243115,12,2,1
200
+ PRABP75T,1,Multiple Choice (select all),Check All That Apply,"<p>Salt and sugar give two distinctly different tastes, one salty and the other sweet.&nbsp;In a mixture of salt and sugar, it is possible for the mixture to be salty, sweet or both.&nbsp;Will any of these mixtures taste exactly the same?</p>",,,"Mixture A: 2 cups water, 4 teaspoons salt, 0.25 cup sugar || Mixture B: 1.5 cups water, 3 teaspoons salt, 0.2 cup sugar || Mixture C: 1 cup water, 2 teaspoons salt, 0.125 cup sugar || None of these mixtures taste the same.","Mixture A: 2 cups water, 4 teaspoons salt, 0.25 cup sugar || Mixture C: 1 cup water, 2 teaspoons salt, 0.125 cup sugar",48435,13,2,1
201
+ PRABP75T,1,Multiple Choice (select all),Check All That Apply,"<p>Salt and sugar give two distinctly different tastes, one salty and the other sweet.&nbsp;In a mixture of salt and sugar, it is possible for the mixture to be salty, sweet or both.&nbsp;Will any of these mixtures taste exactly the same?</p>",,,"Mixture A: 2 cups water, 4 teaspoons salt, 0.25 cup sugar || Mixture B: 1.5 cups water, 3 teaspoons salt, 0.2 cup sugar || Mixture C: 1 cup water, 2 teaspoons salt, 0.125 cup sugar || None of these mixtures taste the same.","Mixture A: 2 cups water, 4 teaspoons salt, 0.25 cup sugar || Mixture C: 1 cup water, 2 teaspoons salt, 0.125 cup sugar",48435,13,2,1
202
+ PSBGXA,1,Multiple Choice (select 1),Multiple Choice,<p>The side lengths of Triangle B are all 5 more than the side lengths of Triangle A. Can Triangle B be a scaled copy of Triangle A?</p>,,,Yes || No,Yes,36042,14,2,1
203
+ PSBGXA,1,Multiple Choice (select 1),Multiple Choice,<p>The side lengths of Triangle B are all 5 more than the side lengths of Triangle A. Can Triangle B be a scaled copy of Triangle A?</p>,,,Yes || No,Yes,36042,14,2,1
204
+ PRABMQH8,1,Multiple Choice (select all),Check All That Apply,"<p>Triangle Z is a scale copy of Triangle M.<br><br></p>
205
+ <p><img src=""/images/assistments/274148.jpg"" alt=""Triangle M with side lengths of 4, 7, and 10."" width=""497"" height=""142""></p>
206
+ <p><br>Select&nbsp;<strong>all</strong> the sets of values that could be the side lengths of Triangle Z.&nbsp;</p>",,,"<p>8,11,14</p> || 10,17.5,25 || 6,9,11 || 6,10.5,15 || 8,14,20","10,17.5,25 || 6,10.5,15 || 8,14,20",569234,15,2,1
207
+ PRABMQH8,1,Multiple Choice (select all),Check All That Apply,"<p>Triangle Z is a scale copy of Triangle M.<br><br></p>
208
+ <p><img src=""/images/assistments/274148.jpg"" alt=""Triangle M with side lengths of 4, 7, and 10."" width=""497"" height=""142""></p>
209
+ <p><br>Select&nbsp;<strong>all</strong> the sets of values that could be the side lengths of Triangle Z.&nbsp;</p>",,,"<p>8,11,14</p> || 10,17.5,25 || 6,9,11 || 6,10.5,15 || 8,14,20","10,17.5,25 || 6,10.5,15 || 8,14,20",569234,15,2,1
Results/Problems_same_body_different_problem_id.csv ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Problem Set Id,Problem Part,Problem Type,Answer Types,Problem Body,Fill-in Options,Fill-in Answers,Multiple Choice Options,Multiple Choice Answers,problem_id,duplicate_group_id,distinct_problem_id_count,distinct_problem_ids
2
+ PSB2CK,6,Fill-in-the-blank(s),Numeric,"<p>What is its&nbsp;answer?</p>
3
+ <p>&nbsp;</p>
4
+ <p><ast-r type=""text"" marker=""1""></ast-r></p>",2.64,2.64,,,126628,1,3,126091;126362;126628
5
+ PSB2CK,2,Fill-in-the-blank(s),Numeric,"<p>What is its&nbsp;answer?</p>
6
+ <p>&nbsp;</p>
7
+ <p><ast-r type=""text"" marker=""1""></ast-r></p>",264,264,,,126091,1,3,126091;126362;126628
8
+ PSB2CK,4,Fill-in-the-blank(s),Numeric,"<p>What is its&nbsp;answer?</p>
9
+ <p>&nbsp;</p>
10
+ <p><ast-r type=""text"" marker=""1""></ast-r></p>",26.4,26.4,,,126362,1,3,126091;126362;126628
11
+ PSBBS8M,2,Fill-in-the-blank(s),Numeric,"<p>How many different triangles are there?</p>
12
+ <p>&nbsp;</p>
13
+ <p><ast-r type=""text"" marker=""1""></ast-r> different triangles</p>",3,3,,,259177,2,2,258731;259177
14
+ PSBBS6F,2,Fill-in-the-blank(s),Numeric,"<p>How many different triangles are there?</p>
15
+ <p>&nbsp;</p>
16
+ <p><ast-r type=""text"" marker=""1""></ast-r> different triangles</p>",4,4,,,258731,2,2,258731;259177
17
+ PRABFG57,1,Order / Sort,Ordering,<p>Order these numbers from least to greatest:</p>,"<p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mo>-</mo><mfrac><mn>1</mn><mn>2</mn></mfrac></math></p>, <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mfrac><mn>1</mn><mn>2</mn></mfrac></math></p>, <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mo>-</mo><mn>1</mn></math></p>, <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mo>-</mo><mn>1</mn><mfrac><mn>1</mn><mn>2</mn></mfrac></math></p>, <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mn>1</mn></math></p>, <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mn>0</mn></math></p>","<p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mo>-</mo><mfrac><mn>1</mn><mn>2</mn></mfrac></math></p>, <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mfrac><mn>1</mn><mn>2</mn></mfrac></math></p>, <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mo>-</mo><mn>1</mn></math></p>, <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mo>-</mo><mn>1</mn><mfrac><mn>1</mn><mn>2</mn></mfrac></math></p>, <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mn>1</mn></math></p>, <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mn>0</mn></math></p>",,,406337,3,2,406337;6158
18
+ PRABFKN2,1,Order / Sort,Ordering,<p>Order these numbers from least to greatest:</p>,"<p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mo>-</mo><mn>18</mn></math></p>, <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mfenced open=""|"" close=""|""><mrow><mo>-</mo><mn>17</mn></mrow></mfenced></math></p>, <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mfenced open=""|"" close=""|""><mrow><mo>-</mo><mn>18</mn></mrow></mfenced></math></p>, <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mfenced open=""|"" close=""|""><mn>19</mn></mfenced></math></p>, <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mn>20</mn></math></p>","<p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mo>-</mo><mn>18</mn></math></p>, <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mfenced open=""|"" close=""|""><mrow><mo>-</mo><mn>17</mn></mrow></mfenced></math></p>, <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mfenced open=""|"" close=""|""><mrow><mo>-</mo><mn>18</mn></mrow></mfenced></math></p>, <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mfenced open=""|"" close=""|""><mn>19</mn></mfenced></math></p>, <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mn>20</mn></math></p>",,,6158,3,2,406337;6158
19
+ PSBDASC,2,Fill-in-the-blank(s),Numeric,"<p>What is the decimal representation of that number?</p>
20
+ <p>&nbsp;</p>
21
+ <p><ast-r type=""text"" marker=""1""></ast-r></p>",0.5,0.5,,,522651,4,2,522651;523084
22
+ PSBDASC,6,Fill-in-the-blank(s),Numeric,"<p>What is the decimal representation of that number?</p>
23
+ <p>&nbsp;</p>
24
+ <p><ast-r type=""text"" marker=""1""></ast-r></p>",0.125,0.125,,,523084,4,2,522651;523084
25
+ PSBCDYX,3,Fill-in-the-blank(s),Numeric,"<p>Solve the equation you wrote.</p>
26
+ <p>&nbsp;</p>
27
+ <p><em>x</em> = <ast-r type=""text"" marker=""1""></ast-r></p>",7.8,7.8,,,366764,5,2,366764;56163
28
+ PSBMSN,3,Fill-in-the-blank(s),Numeric,"<p>Solve the equation you wrote.</p>
29
+ <p>&nbsp;</p>
30
+ <p><em>x</em> = <ast-r type=""text"" marker=""1""></ast-r></p>",11.6,11.6,,,56163,5,2,366764;56163
31
+ PSBBSYB,2,Fill-in-the-blank(s),Numeric,"<p>If not, write it in scientific notation.</p>
32
+ <p>&nbsp;</p>
33
+ <p><ast-r type=""text"" marker=""1""></ast-r><math xmlns=""http://www.w3.org/1998/Math/MathML""><mo>&#215;</mo><mn>10</mn></math><sup><ast-r type=""text"" marker=""2""></ast-r></sup></p>","6, 3.6","6, 3.6",,,257604,6,3,257097;257352;257604
34
+ PSBBSWV,2,Fill-in-the-blank(s),Numeric,"<p>If not, write it in scientific notation.</p>
35
+ <p>&nbsp;</p>
36
+ <p><ast-r type=""text"" marker=""1""></ast-r><math xmlns=""http://www.w3.org/1998/Math/MathML""><mo>&#215;</mo><mn>10</mn></math><sup><ast-r type=""text"" marker=""2""></ast-r></sup></p>","-4, 9.9","-4, 9.9",,,257352,6,3,257097;257352;257604
37
+ PSBBSVC,2,Fill-in-the-blank(s),Numeric,"<p>If not, write it in scientific notation.</p>
38
+ <p>&nbsp;</p>
39
+ <p><ast-r type=""text"" marker=""1""></ast-r><math xmlns=""http://www.w3.org/1998/Math/MathML""><mo>&#215;</mo><mn>10</mn></math><sup><ast-r type=""text"" marker=""2""></ast-r></sup></p>","4, 4.82","4, 4.82",,,257097,6,3,257097;257352;257604
40
+ PSBGBG,4,Fill-in-the-blank(s),Numeric,"<p>On which day did it occur?</p>
41
+ <p>&nbsp;</p>
42
+ <p>Day <ast-r type=""text"" marker=""1""></ast-r></p>",11,11,,,32600,7,2,32363;32600
43
+ PSBGBG,2,Fill-in-the-blank(s),Numeric,"<p>On which day did it occur?</p>
44
+ <p>&nbsp;</p>
45
+ <p>Day <ast-r type=""text"" marker=""1""></ast-r></p>",14,14,,,32363,7,2,32363;32600
46
+ PRABQM8U,1,Multiple Choice (select all),Check All That Apply,<p>Select&nbsp;<strong>all</strong>&nbsp;the true statements.</p>,,,"Given a box plot, it is always possible to calculate the mean of the data. || Given a box plot, it is always possible to calculate the median of the data. || Given a box plot, it is always possible to construct a corresponding dot plot. || Given a dot plot, it is always possible to construct a corresponding box plot. || Given a histogram, it is always possible to construct a corresponding box plot.","Given a box plot, it is always possible to calculate the median of the data. || Given a dot plot, it is always possible to construct a corresponding box plot.",193572,8,3,193572;362112;69320
47
+ PRABEVAG,1,Multiple Choice (select all),Check All That Apply,<p>Select&nbsp;<strong>all</strong>&nbsp;the true statements.</p>,,,"<p>2.3 + (-2.3) is equal to zero.</p> || <p>(-3.7) + (-4.1) is positive.</p> || <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mo>-</mo><mn>2</mn><mo>.</mo><mn>6</mn><mo>&#160;</mo><mo>-</mo><mo>&#160;</mo><mfenced><mrow><mo>-</mo><mfrac><mn>12</mn><mn>4</mn></mfrac></mrow></mfenced><mo>&#160;</mo><mi>is</mi><mo>&#160;</mo><mi>positive</mi></math>.</p> || <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mfenced><mfrac><mn>5</mn><mn>2</mn></mfrac></mfenced><mo>&#160;</mo><mo>+</mo><mo>&#160;</mo><mfenced><mrow><mo>-</mo><mn>2</mn><mo>.</mo><mn>5</mn></mrow></mfenced><mo>&#160;</mo><mi>is</mi><mo>&#160;</mo><mi>negative</mi></math>.</p> || <p>72 - (-100) is negative.</p>","<p>2.3 + (-2.3) is equal to zero.</p> || <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mo>-</mo><mn>2</mn><mo>.</mo><mn>6</mn><mo>&#160;</mo><mo>-</mo><mo>&#160;</mo><mfenced><mrow><mo>-</mo><mfrac><mn>12</mn><mn>4</mn></mfrac></mrow></mfenced><mo>&#160;</mo><mi>is</mi><mo>&#160;</mo><mi>positive</mi></math>.</p>",69320,8,3,193572;362112;69320
48
+ PRABFJP8,1,Multiple Choice (select all),Check All That Apply,<p>Select&nbsp;<strong>all</strong>&nbsp;the true statements.</p>,,,"<p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mo>-</mo><mn>5</mn><mo>&nbsp;</mo><mo>&lt;</mo><mo>&nbsp;</mo><mfenced open=""|"" close=""|""><mrow><mo>-</mo><mn>5</mn></mrow></mfenced></math></p> || <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mfenced open=""|"" close=""|""><mrow><mo>-</mo><mn>6</mn></mrow></mfenced><mo>&nbsp;</mo><mo>&lt;</mo><mo>&nbsp;</mo><mo>-</mo><mn>5</mn></math></p> || <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mfenced open=""|"" close=""|""><mrow><mo>-</mo><mn>6</mn></mrow></mfenced><mo>&nbsp;</mo><mo>&lt;</mo><mo>&nbsp;</mo><mn>3</mn></math></p> || <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mn>4</mn><mo>&nbsp;</mo><mo>&lt;</mo><mo>&nbsp;</mo><mfenced open=""|"" close=""|""><mrow><mo>-</mo><mn>7</mn></mrow></mfenced></math></p> || <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mfenced open=""|"" close=""|""><mrow><mo>-</mo><mn>7</mn></mrow></mfenced><mo>&#160;</mo><mo>&#60;</mo><mo>&#160;</mo><mfenced open=""|"" close=""|""><mrow><mo>-</mo><mn>8</mn></mrow></mfenced></math></p>","<p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mo>-</mo><mn>5</mn><mo>&nbsp;</mo><mo>&lt;</mo><mo>&nbsp;</mo><mfenced open=""|"" close=""|""><mrow><mo>-</mo><mn>5</mn></mrow></mfenced></math></p> || <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mn>4</mn><mo>&nbsp;</mo><mo>&lt;</mo><mo>&nbsp;</mo><mfenced open=""|"" close=""|""><mrow><mo>-</mo><mn>7</mn></mrow></mfenced></math></p> || <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mfenced open=""|"" close=""|""><mrow><mo>-</mo><mn>7</mn></mrow></mfenced><mo>&#160;</mo><mo>&#60;</mo><mo>&#160;</mo><mfenced open=""|"" close=""|""><mrow><mo>-</mo><mn>8</mn></mrow></mfenced></math></p>",362112,8,3,193572;362112;69320
49
+ PSBCKT6,1,Multiple Choice (select 1),Multiple Choice,"<p>For the pair of numbers below, select the number that is greater.</p>",,,"<p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mn>42</mn><mo>&nbsp;</mo><mo>·</mo><mo>&nbsp;</mo><msup><mn>10</mn><mn>7</mn></msup></math></p> || <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mn>8</mn><mo>.</mo><mn>5</mn><mo>&nbsp;</mo><mo>·</mo><mo>&nbsp;</mo><msup><mn>10</mn><mn>8</mn></msup></math></p>","<p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mn>8</mn><mo>.</mo><mn>5</mn><mo>&nbsp;</mo><mo>·</mo><mo>&nbsp;</mo><msup><mn>10</mn><mn>8</mn></msup></math></p>",399091,9,3,398365;398730;399091
50
+ PSBCKRX,1,Multiple Choice (select 1),Multiple Choice,"<p>For the pair of numbers below, select the number that is greater.</p>",,,"<p><span><span><span><span><span><math xmlns=""http://www.w3.org/1998/Math/MathML""><mn>2</mn><mo>&nbsp;</mo><mo>·</mo><mo>&nbsp;</mo><msup><mn>10</mn><mn>6</mn></msup></math></span></span></span></span></span>&nbsp;</p> || <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mn>7</mn><mo>.</mo><mn>839</mn><mo>&nbsp;</mo><mo>·</mo><mo>&nbsp;</mo><msup><mn>10</mn><mn>6</mn></msup></math><span><span><span><span></span></span></span></span>&nbsp;</p>","<p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mn>7</mn><mo>.</mo><mn>839</mn><mo>&nbsp;</mo><mo>·</mo><mo>&nbsp;</mo><msup><mn>10</mn><mn>6</mn></msup></math><span><span><span><span></span></span></span></span>&nbsp;</p>",398730,9,3,398365;398730;399091
51
+ PSBCKP4,1,Multiple Choice (select 1),Multiple Choice,"<p>For the pair of numbers below, select the number that is greater.</p>",,,"<p><span><span><span><span><span><math xmlns=""http://www.w3.org/1998/Math/MathML""><mn>17</mn><mo>·</mo><msup><mn>10</mn><mn>8</mn></msup></math></span></span></span></span></span>&nbsp; &nbsp;</p> || <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mn>4</mn><mo>·</mo><msup><mn>10</mn><mn>8</mn></msup></math></p>","<p><span><span><span><span><span><math xmlns=""http://www.w3.org/1998/Math/MathML""><mn>17</mn><mo>·</mo><msup><mn>10</mn><mn>8</mn></msup></math></span></span></span></span></span>&nbsp; &nbsp;</p>",398365,9,3,398365;398730;399091
52
+ PRABQPRF,1,Multiple Choice (select 1),Multiple Choice,<p>Which of these describes a unique polygon?</p>,,,"A quadrilateral with 4 right angles || A triangle with angles 30°, 80°, and 70° || A triangle with side lengths 7 cm and 8 cm and a 70° angle || A triangle with each side length 5 inches",A triangle with each side length 5 inches,558308,10,2,483667;558308
53
+ PRABEVDM,1,Multiple Choice (select 1),Multiple Choice,<p>Which of these describes a unique polygon?</p>,,,"A triangle with angles&nbsp;30°,&nbsp;50°, and&nbsp;100° || A quadrilateral with each side length 5 cm || A triangle with side lengths 6 cm, 7 cm, and 8 cm || A triangle with side lengths 4 cm and 5 cm and a&nbsp;50°&nbsp;angle","A triangle with side lengths 6 cm, 7 cm, and 8 cm",483667,10,2,483667;558308
54
+ PSBBD9R,2,Multiple Choice (select 1),Multiple Choice,<p>Which group shows greater variability?</p>,,,<p>Group A</p> || group B,<p>Group A</p>,193337,11,2,193337;529063
55
+ PSBDBQU,2,Multiple Choice (select 1),Multiple Choice,<p>Which group shows greater variability?</p>,,,Group A || Group B,Group A,529063,11,2,193337;529063
Results/day_student_attempt_distribution.png ADDED

Git LFS Details

  • SHA256: d9f59cee525d97c2d4002c888725b17c8e49d1881249f83dbf44582f250dc74c
  • Pointer size: 131 Bytes
  • Size of remote file: 176 kB
Results/day_student_attempt_distribution_00173df52c65a9c13b45978c5a56ea2db67b17deded674486f7af64f7c5fd9ac.png ADDED

Git LFS Details

  • SHA256: 4ca130170fdcc3e821ab21ccc262d091b088c0fa069f585d20485e1cfec0e0f1
  • Pointer size: 131 Bytes
  • Size of remote file: 134 kB
Results/day_student_attempt_distribution_001849d40d6bf1e2ed5826e4420e291e54655a2b6b77a9ab701faaeb4e899abe.png ADDED

Git LFS Details

  • SHA256: e54a86b81d0221edd7bfc2449b2ef8cca2ed2cefcfef096ff6e23447d03e3ada
  • Pointer size: 131 Bytes
  • Size of remote file: 133 kB
Results/day_student_attempt_distribution_counts.csv ADDED
The diff for this file is too large to render. See raw diff
 
Results/day_student_attempt_distribution_counts_00173df52c65a9c13b45978c5a56ea2db67b17deded674486f7af64f7c5fd9ac.csv ADDED
@@ -0,0 +1,262 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ bin_left_min,bin_right_min,bin_width_min,attempt_count,probability,percentage
2
+ 0.0,1440.0,1440.0,1,0.002617801047120419,0.2617801047120419
3
+ 1440.0,2880.0,1440.0,2,0.005235602094240838,0.5235602094240838
4
+ 2880.0,4320.0,1440.0,1,0.002617801047120419,0.2617801047120419
5
+ 4320.0,5760.0,1440.0,0,0.0,0.0
6
+ 5760.0,7200.0,1440.0,0,0.0,0.0
7
+ 7200.0,8640.0,1440.0,0,0.0,0.0
8
+ 8640.0,10080.0,1440.0,0,0.0,0.0
9
+ 10080.0,11520.0,1440.0,0,0.0,0.0
10
+ 11520.0,12960.0,1440.0,3,0.007853403141361256,0.7853403141361256
11
+ 12960.0,14400.0,1440.0,0,0.0,0.0
12
+ 14400.0,15840.0,1440.0,0,0.0,0.0
13
+ 15840.0,17280.0,1440.0,6,0.015706806282722512,1.5706806282722512
14
+ 17280.0,18720.0,1440.0,0,0.0,0.0
15
+ 18720.0,20160.0,1440.0,0,0.0,0.0
16
+ 20160.0,21600.0,1440.0,2,0.005235602094240838,0.5235602094240838
17
+ 21600.0,23040.0,1440.0,3,0.007853403141361256,0.7853403141361256
18
+ 23040.0,24480.0,1440.0,0,0.0,0.0
19
+ 24480.0,25920.0,1440.0,0,0.0,0.0
20
+ 25920.0,27360.0,1440.0,8,0.020942408376963352,2.094240837696335
21
+ 27360.0,28800.0,1440.0,0,0.0,0.0
22
+ 28800.0,30240.0,1440.0,0,0.0,0.0
23
+ 30240.0,31680.0,1440.0,0,0.0,0.0
24
+ 31680.0,33120.0,1440.0,1,0.002617801047120419,0.2617801047120419
25
+ 33120.0,34560.0,1440.0,1,0.002617801047120419,0.2617801047120419
26
+ 34560.0,36000.0,1440.0,4,0.010471204188481676,1.0471204188481675
27
+ 36000.0,37440.0,1440.0,1,0.002617801047120419,0.2617801047120419
28
+ 37440.0,38880.0,1440.0,0,0.0,0.0
29
+ 38880.0,40320.0,1440.0,0,0.0,0.0
30
+ 40320.0,41760.0,1440.0,0,0.0,0.0
31
+ 41760.0,43200.0,1440.0,0,0.0,0.0
32
+ 43200.0,44640.0,1440.0,1,0.002617801047120419,0.2617801047120419
33
+ 44640.0,46080.0,1440.0,0,0.0,0.0
34
+ 46080.0,47520.0,1440.0,0,0.0,0.0
35
+ 47520.0,48960.0,1440.0,0,0.0,0.0
36
+ 48960.0,50400.0,1440.0,0,0.0,0.0
37
+ 50400.0,51840.0,1440.0,9,0.02356020942408377,2.356020942408377
38
+ 51840.0,53280.0,1440.0,0,0.0,0.0
39
+ 53280.0,54720.0,1440.0,0,0.0,0.0
40
+ 54720.0,56160.0,1440.0,0,0.0,0.0
41
+ 56160.0,57600.0,1440.0,0,0.0,0.0
42
+ 57600.0,59040.0,1440.0,0,0.0,0.0
43
+ 59040.0,60480.0,1440.0,0,0.0,0.0
44
+ 60480.0,61920.0,1440.0,6,0.015706806282722512,1.5706806282722512
45
+ 61920.0,63360.0,1440.0,3,0.007853403141361256,0.7853403141361256
46
+ 63360.0,64800.0,1440.0,8,0.020942408376963352,2.094240837696335
47
+ 64800.0,66240.0,1440.0,8,0.020942408376963352,2.094240837696335
48
+ 66240.0,67680.0,1440.0,0,0.0,0.0
49
+ 67680.0,69120.0,1440.0,0,0.0,0.0
50
+ 69120.0,70560.0,1440.0,2,0.005235602094240838,0.5235602094240838
51
+ 70560.0,72000.0,1440.0,12,0.031413612565445025,3.1413612565445024
52
+ 72000.0,73440.0,1440.0,9,0.02356020942408377,2.356020942408377
53
+ 73440.0,74880.0,1440.0,3,0.007853403141361256,0.7853403141361256
54
+ 74880.0,76320.0,1440.0,6,0.015706806282722512,1.5706806282722512
55
+ 76320.0,77760.0,1440.0,0,0.0,0.0
56
+ 77760.0,79200.0,1440.0,0,0.0,0.0
57
+ 79200.0,80640.0,1440.0,0,0.0,0.0
58
+ 80640.0,82080.0,1440.0,0,0.0,0.0
59
+ 82080.0,83520.0,1440.0,0,0.0,0.0
60
+ 83520.0,84960.0,1440.0,0,0.0,0.0
61
+ 84960.0,86400.0,1440.0,0,0.0,0.0
62
+ 86400.0,87840.0,1440.0,0,0.0,0.0
63
+ 87840.0,89280.0,1440.0,0,0.0,0.0
64
+ 89280.0,90720.0,1440.0,0,0.0,0.0
65
+ 90720.0,92160.0,1440.0,0,0.0,0.0
66
+ 92160.0,93600.0,1440.0,1,0.002617801047120419,0.2617801047120419
67
+ 93600.0,95040.0,1440.0,1,0.002617801047120419,0.2617801047120419
68
+ 95040.0,96480.0,1440.0,0,0.0,0.0
69
+ 96480.0,97920.0,1440.0,1,0.002617801047120419,0.2617801047120419
70
+ 97920.0,99360.0,1440.0,0,0.0,0.0
71
+ 99360.0,100800.0,1440.0,0,0.0,0.0
72
+ 100800.0,102240.0,1440.0,0,0.0,0.0
73
+ 102240.0,103680.0,1440.0,3,0.007853403141361256,0.7853403141361256
74
+ 103680.0,105120.0,1440.0,8,0.020942408376963352,2.094240837696335
75
+ 105120.0,106560.0,1440.0,0,0.0,0.0
76
+ 106560.0,108000.0,1440.0,0,0.0,0.0
77
+ 108000.0,109440.0,1440.0,0,0.0,0.0
78
+ 109440.0,110880.0,1440.0,3,0.007853403141361256,0.7853403141361256
79
+ 110880.0,112320.0,1440.0,0,0.0,0.0
80
+ 112320.0,113760.0,1440.0,0,0.0,0.0
81
+ 113760.0,115200.0,1440.0,0,0.0,0.0
82
+ 115200.0,116640.0,1440.0,7,0.01832460732984293,1.832460732984293
83
+ 116640.0,118080.0,1440.0,1,0.002617801047120419,0.2617801047120419
84
+ 118080.0,119520.0,1440.0,0,0.0,0.0
85
+ 119520.0,120960.0,1440.0,0,0.0,0.0
86
+ 120960.0,122400.0,1440.0,0,0.0,0.0
87
+ 122400.0,123840.0,1440.0,0,0.0,0.0
88
+ 123840.0,125280.0,1440.0,8,0.020942408376963352,2.094240837696335
89
+ 125280.0,126720.0,1440.0,3,0.007853403141361256,0.7853403141361256
90
+ 126720.0,128160.0,1440.0,0,0.0,0.0
91
+ 128160.0,129600.0,1440.0,0,0.0,0.0
92
+ 129600.0,131040.0,1440.0,10,0.02617801047120419,2.6178010471204187
93
+ 131040.0,132480.0,1440.0,3,0.007853403141361256,0.7853403141361256
94
+ 132480.0,133920.0,1440.0,14,0.03664921465968586,3.664921465968586
95
+ 133920.0,135360.0,1440.0,0,0.0,0.0
96
+ 135360.0,136800.0,1440.0,1,0.002617801047120419,0.2617801047120419
97
+ 136800.0,138240.0,1440.0,1,0.002617801047120419,0.2617801047120419
98
+ 138240.0,139680.0,1440.0,0,0.0,0.0
99
+ 139680.0,141120.0,1440.0,1,0.002617801047120419,0.2617801047120419
100
+ 141120.0,142560.0,1440.0,0,0.0,0.0
101
+ 142560.0,144000.0,1440.0,0,0.0,0.0
102
+ 144000.0,145440.0,1440.0,1,0.002617801047120419,0.2617801047120419
103
+ 145440.0,146880.0,1440.0,0,0.0,0.0
104
+ 146880.0,148320.0,1440.0,0,0.0,0.0
105
+ 148320.0,149760.0,1440.0,0,0.0,0.0
106
+ 149760.0,151200.0,1440.0,0,0.0,0.0
107
+ 151200.0,152640.0,1440.0,0,0.0,0.0
108
+ 152640.0,154080.0,1440.0,0,0.0,0.0
109
+ 154080.0,155520.0,1440.0,0,0.0,0.0
110
+ 155520.0,156960.0,1440.0,0,0.0,0.0
111
+ 156960.0,158400.0,1440.0,0,0.0,0.0
112
+ 158400.0,159840.0,1440.0,0,0.0,0.0
113
+ 159840.0,161280.0,1440.0,0,0.0,0.0
114
+ 161280.0,162720.0,1440.0,0,0.0,0.0
115
+ 162720.0,164160.0,1440.0,7,0.01832460732984293,1.832460732984293
116
+ 164160.0,165600.0,1440.0,3,0.007853403141361256,0.7853403141361256
117
+ 165600.0,167040.0,1440.0,7,0.01832460732984293,1.832460732984293
118
+ 167040.0,168480.0,1440.0,0,0.0,0.0
119
+ 168480.0,169920.0,1440.0,0,0.0,0.0
120
+ 169920.0,171360.0,1440.0,1,0.002617801047120419,0.2617801047120419
121
+ 171360.0,172800.0,1440.0,5,0.013089005235602094,1.3089005235602094
122
+ 172800.0,174240.0,1440.0,7,0.01832460732984293,1.832460732984293
123
+ 174240.0,175680.0,1440.0,3,0.007853403141361256,0.7853403141361256
124
+ 175680.0,177120.0,1440.0,0,0.0,0.0
125
+ 177120.0,178560.0,1440.0,0,0.0,0.0
126
+ 178560.0,180000.0,1440.0,0,0.0,0.0
127
+ 180000.0,181440.0,1440.0,2,0.005235602094240838,0.5235602094240838
128
+ 181440.0,182880.0,1440.0,9,0.02356020942408377,2.356020942408377
129
+ 182880.0,184320.0,1440.0,2,0.005235602094240838,0.5235602094240838
130
+ 184320.0,185760.0,1440.0,0,0.0,0.0
131
+ 185760.0,187200.0,1440.0,0,0.0,0.0
132
+ 187200.0,188640.0,1440.0,0,0.0,0.0
133
+ 188640.0,190080.0,1440.0,0,0.0,0.0
134
+ 190080.0,191520.0,1440.0,0,0.0,0.0
135
+ 191520.0,192960.0,1440.0,0,0.0,0.0
136
+ 192960.0,194400.0,1440.0,0,0.0,0.0
137
+ 194400.0,195840.0,1440.0,0,0.0,0.0
138
+ 195840.0,197280.0,1440.0,0,0.0,0.0
139
+ 197280.0,198720.0,1440.0,0,0.0,0.0
140
+ 198720.0,200160.0,1440.0,0,0.0,0.0
141
+ 200160.0,201600.0,1440.0,0,0.0,0.0
142
+ 201600.0,203040.0,1440.0,0,0.0,0.0
143
+ 203040.0,204480.0,1440.0,0,0.0,0.0
144
+ 204480.0,205920.0,1440.0,0,0.0,0.0
145
+ 205920.0,207360.0,1440.0,0,0.0,0.0
146
+ 207360.0,208800.0,1440.0,0,0.0,0.0
147
+ 208800.0,210240.0,1440.0,0,0.0,0.0
148
+ 210240.0,211680.0,1440.0,0,0.0,0.0
149
+ 211680.0,213120.0,1440.0,0,0.0,0.0
150
+ 213120.0,214560.0,1440.0,2,0.005235602094240838,0.5235602094240838
151
+ 214560.0,216000.0,1440.0,1,0.002617801047120419,0.2617801047120419
152
+ 216000.0,217440.0,1440.0,8,0.020942408376963352,2.094240837696335
153
+ 217440.0,218880.0,1440.0,6,0.015706806282722512,1.5706806282722512
154
+ 218880.0,220320.0,1440.0,0,0.0,0.0
155
+ 220320.0,221760.0,1440.0,2,0.005235602094240838,0.5235602094240838
156
+ 221760.0,223200.0,1440.0,3,0.007853403141361256,0.7853403141361256
157
+ 223200.0,224640.0,1440.0,3,0.007853403141361256,0.7853403141361256
158
+ 224640.0,226080.0,1440.0,0,0.0,0.0
159
+ 226080.0,227520.0,1440.0,1,0.002617801047120419,0.2617801047120419
160
+ 227520.0,228960.0,1440.0,0,0.0,0.0
161
+ 228960.0,230400.0,1440.0,0,0.0,0.0
162
+ 230400.0,231840.0,1440.0,0,0.0,0.0
163
+ 231840.0,233280.0,1440.0,0,0.0,0.0
164
+ 233280.0,234720.0,1440.0,6,0.015706806282722512,1.5706806282722512
165
+ 234720.0,236160.0,1440.0,8,0.020942408376963352,2.094240837696335
166
+ 236160.0,237600.0,1440.0,1,0.002617801047120419,0.2617801047120419
167
+ 237600.0,239040.0,1440.0,4,0.010471204188481676,1.0471204188481675
168
+ 239040.0,240480.0,1440.0,0,0.0,0.0
169
+ 240480.0,241920.0,1440.0,0,0.0,0.0
170
+ 241920.0,243360.0,1440.0,2,0.005235602094240838,0.5235602094240838
171
+ 243360.0,244800.0,1440.0,3,0.007853403141361256,0.7853403141361256
172
+ 244800.0,246240.0,1440.0,4,0.010471204188481676,1.0471204188481675
173
+ 246240.0,247680.0,1440.0,2,0.005235602094240838,0.5235602094240838
174
+ 247680.0,249120.0,1440.0,1,0.002617801047120419,0.2617801047120419
175
+ 249120.0,250560.0,1440.0,0,0.0,0.0
176
+ 250560.0,252000.0,1440.0,1,0.002617801047120419,0.2617801047120419
177
+ 252000.0,253440.0,1440.0,0,0.0,0.0
178
+ 253440.0,254880.0,1440.0,0,0.0,0.0
179
+ 254880.0,256320.0,1440.0,0,0.0,0.0
180
+ 256320.0,257760.0,1440.0,4,0.010471204188481676,1.0471204188481675
181
+ 257760.0,259200.0,1440.0,4,0.010471204188481676,1.0471204188481675
182
+ 259200.0,260640.0,1440.0,0,0.0,0.0
183
+ 260640.0,262080.0,1440.0,0,0.0,0.0
184
+ 262080.0,263520.0,1440.0,5,0.013089005235602094,1.3089005235602094
185
+ 263520.0,264960.0,1440.0,2,0.005235602094240838,0.5235602094240838
186
+ 264960.0,266400.0,1440.0,2,0.005235602094240838,0.5235602094240838
187
+ 266400.0,267840.0,1440.0,0,0.0,0.0
188
+ 267840.0,269280.0,1440.0,0,0.0,0.0
189
+ 269280.0,270720.0,1440.0,0,0.0,0.0
190
+ 270720.0,272160.0,1440.0,6,0.015706806282722512,1.5706806282722512
191
+ 272160.0,273600.0,1440.0,0,0.0,0.0
192
+ 273600.0,275040.0,1440.0,4,0.010471204188481676,1.0471204188481675
193
+ 275040.0,276480.0,1440.0,0,0.0,0.0
194
+ 276480.0,277920.0,1440.0,0,0.0,0.0
195
+ 277920.0,279360.0,1440.0,0,0.0,0.0
196
+ 279360.0,280800.0,1440.0,0,0.0,0.0
197
+ 280800.0,282240.0,1440.0,0,0.0,0.0
198
+ 282240.0,283680.0,1440.0,4,0.010471204188481676,1.0471204188481675
199
+ 283680.0,285120.0,1440.0,0,0.0,0.0
200
+ 285120.0,286560.0,1440.0,0,0.0,0.0
201
+ 286560.0,288000.0,1440.0,7,0.01832460732984293,1.832460732984293
202
+ 288000.0,289440.0,1440.0,0,0.0,0.0
203
+ 289440.0,290880.0,1440.0,0,0.0,0.0
204
+ 290880.0,292320.0,1440.0,0,0.0,0.0
205
+ 292320.0,293760.0,1440.0,5,0.013089005235602094,1.3089005235602094
206
+ 293760.0,295200.0,1440.0,1,0.002617801047120419,0.2617801047120419
207
+ 295200.0,296640.0,1440.0,6,0.015706806282722512,1.5706806282722512
208
+ 296640.0,298080.0,1440.0,6,0.015706806282722512,1.5706806282722512
209
+ 298080.0,299520.0,1440.0,5,0.013089005235602094,1.3089005235602094
210
+ 299520.0,300960.0,1440.0,0,0.0,0.0
211
+ 300960.0,302400.0,1440.0,0,0.0,0.0
212
+ 302400.0,303840.0,1440.0,6,0.015706806282722512,1.5706806282722512
213
+ 303840.0,305280.0,1440.0,17,0.04450261780104712,4.450261780104712
214
+ 305280.0,306720.0,1440.0,0,0.0,0.0
215
+ 306720.0,308160.0,1440.0,8,0.020942408376963352,2.094240837696335
216
+ 308160.0,309600.0,1440.0,0,0.0,0.0
217
+ 309600.0,311040.0,1440.0,0,0.0,0.0
218
+ 311040.0,312480.0,1440.0,0,0.0,0.0
219
+ 312480.0,313920.0,1440.0,0,0.0,0.0
220
+ 313920.0,315360.0,1440.0,0,0.0,0.0
221
+ 315360.0,316800.0,1440.0,0,0.0,0.0
222
+ 316800.0,318240.0,1440.0,0,0.0,0.0
223
+ 318240.0,319680.0,1440.0,0,0.0,0.0
224
+ 319680.0,321120.0,1440.0,0,0.0,0.0
225
+ 321120.0,322560.0,1440.0,0,0.0,0.0
226
+ 322560.0,324000.0,1440.0,1,0.002617801047120419,0.2617801047120419
227
+ 324000.0,325440.0,1440.0,3,0.007853403141361256,0.7853403141361256
228
+ 325440.0,326880.0,1440.0,0,0.0,0.0
229
+ 326880.0,328320.0,1440.0,4,0.010471204188481676,1.0471204188481675
230
+ 328320.0,329760.0,1440.0,1,0.002617801047120419,0.2617801047120419
231
+ 329760.0,331200.0,1440.0,0,0.0,0.0
232
+ 331200.0,332640.0,1440.0,0,0.0,0.0
233
+ 332640.0,334080.0,1440.0,0,0.0,0.0
234
+ 334080.0,335520.0,1440.0,0,0.0,0.0
235
+ 335520.0,336960.0,1440.0,0,0.0,0.0
236
+ 336960.0,338400.0,1440.0,0,0.0,0.0
237
+ 338400.0,339840.0,1440.0,0,0.0,0.0
238
+ 339840.0,341280.0,1440.0,0,0.0,0.0
239
+ 341280.0,342720.0,1440.0,0,0.0,0.0
240
+ 342720.0,344160.0,1440.0,0,0.0,0.0
241
+ 344160.0,345600.0,1440.0,0,0.0,0.0
242
+ 345600.0,347040.0,1440.0,0,0.0,0.0
243
+ 347040.0,348480.0,1440.0,0,0.0,0.0
244
+ 348480.0,349920.0,1440.0,0,0.0,0.0
245
+ 349920.0,351360.0,1440.0,0,0.0,0.0
246
+ 351360.0,352800.0,1440.0,0,0.0,0.0
247
+ 352800.0,354240.0,1440.0,0,0.0,0.0
248
+ 354240.0,355680.0,1440.0,0,0.0,0.0
249
+ 355680.0,357120.0,1440.0,0,0.0,0.0
250
+ 357120.0,358560.0,1440.0,5,0.013089005235602094,1.3089005235602094
251
+ 358560.0,360000.0,1440.0,0,0.0,0.0
252
+ 360000.0,361440.0,1440.0,0,0.0,0.0
253
+ 361440.0,362880.0,1440.0,0,0.0,0.0
254
+ 362880.0,364320.0,1440.0,0,0.0,0.0
255
+ 364320.0,365760.0,1440.0,0,0.0,0.0
256
+ 365760.0,367200.0,1440.0,0,0.0,0.0
257
+ 367200.0,368640.0,1440.0,1,0.002617801047120419,0.2617801047120419
258
+ 368640.0,370080.0,1440.0,0,0.0,0.0
259
+ 370080.0,371520.0,1440.0,0,0.0,0.0
260
+ 371520.0,372960.0,1440.0,1,0.002617801047120419,0.2617801047120419
261
+ 372960.0,374400.0,1440.0,1,0.002617801047120419,0.2617801047120419
262
+ 374400.0,375840.0,1440.0,2,0.005235602094240838,0.5235602094240838
Results/day_student_attempt_distribution_counts_001849d40d6bf1e2ed5826e4420e291e54655a2b6b77a9ab701faaeb4e899abe.csv ADDED
@@ -0,0 +1,256 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ bin_left_min,bin_right_min,bin_width_min,attempt_count,probability,percentage
2
+ 0.0,1440.0,1440.0,4,0.010958904109589041,1.095890410958904
3
+ 1440.0,2880.0,1440.0,4,0.010958904109589041,1.095890410958904
4
+ 2880.0,4320.0,1440.0,1,0.0027397260273972603,0.273972602739726
5
+ 4320.0,5760.0,1440.0,2,0.005479452054794521,0.547945205479452
6
+ 5760.0,7200.0,1440.0,0,0.0,0.0
7
+ 7200.0,8640.0,1440.0,0,0.0,0.0
8
+ 8640.0,10080.0,1440.0,4,0.010958904109589041,1.095890410958904
9
+ 10080.0,11520.0,1440.0,5,0.0136986301369863,1.36986301369863
10
+ 11520.0,12960.0,1440.0,1,0.0027397260273972603,0.273972602739726
11
+ 12960.0,14400.0,1440.0,5,0.0136986301369863,1.36986301369863
12
+ 14400.0,15840.0,1440.0,2,0.005479452054794521,0.547945205479452
13
+ 15840.0,17280.0,1440.0,0,0.0,0.0
14
+ 17280.0,18720.0,1440.0,0,0.0,0.0
15
+ 18720.0,20160.0,1440.0,6,0.01643835616438356,1.643835616438356
16
+ 20160.0,21600.0,1440.0,3,0.00821917808219178,0.821917808219178
17
+ 21600.0,23040.0,1440.0,2,0.005479452054794521,0.547945205479452
18
+ 23040.0,24480.0,1440.0,2,0.005479452054794521,0.547945205479452
19
+ 24480.0,25920.0,1440.0,1,0.0027397260273972603,0.273972602739726
20
+ 25920.0,27360.0,1440.0,0,0.0,0.0
21
+ 27360.0,28800.0,1440.0,0,0.0,0.0
22
+ 28800.0,30240.0,1440.0,5,0.0136986301369863,1.36986301369863
23
+ 30240.0,31680.0,1440.0,0,0.0,0.0
24
+ 31680.0,33120.0,1440.0,0,0.0,0.0
25
+ 33120.0,34560.0,1440.0,1,0.0027397260273972603,0.273972602739726
26
+ 34560.0,36000.0,1440.0,16,0.043835616438356165,4.383561643835616
27
+ 36000.0,37440.0,1440.0,0,0.0,0.0
28
+ 37440.0,38880.0,1440.0,0,0.0,0.0
29
+ 38880.0,40320.0,1440.0,1,0.0027397260273972603,0.273972602739726
30
+ 40320.0,41760.0,1440.0,0,0.0,0.0
31
+ 41760.0,43200.0,1440.0,2,0.005479452054794521,0.547945205479452
32
+ 43200.0,44640.0,1440.0,0,0.0,0.0
33
+ 44640.0,46080.0,1440.0,1,0.0027397260273972603,0.273972602739726
34
+ 46080.0,47520.0,1440.0,0,0.0,0.0
35
+ 47520.0,48960.0,1440.0,0,0.0,0.0
36
+ 48960.0,50400.0,1440.0,4,0.010958904109589041,1.095890410958904
37
+ 50400.0,51840.0,1440.0,1,0.0027397260273972603,0.273972602739726
38
+ 51840.0,53280.0,1440.0,2,0.005479452054794521,0.547945205479452
39
+ 53280.0,54720.0,1440.0,2,0.005479452054794521,0.547945205479452
40
+ 54720.0,56160.0,1440.0,2,0.005479452054794521,0.547945205479452
41
+ 56160.0,57600.0,1440.0,0,0.0,0.0
42
+ 57600.0,59040.0,1440.0,0,0.0,0.0
43
+ 59040.0,60480.0,1440.0,0,0.0,0.0
44
+ 60480.0,61920.0,1440.0,2,0.005479452054794521,0.547945205479452
45
+ 61920.0,63360.0,1440.0,0,0.0,0.0
46
+ 63360.0,64800.0,1440.0,0,0.0,0.0
47
+ 64800.0,66240.0,1440.0,0,0.0,0.0
48
+ 66240.0,67680.0,1440.0,0,0.0,0.0
49
+ 67680.0,69120.0,1440.0,0,0.0,0.0
50
+ 69120.0,70560.0,1440.0,2,0.005479452054794521,0.547945205479452
51
+ 70560.0,72000.0,1440.0,3,0.00821917808219178,0.821917808219178
52
+ 72000.0,73440.0,1440.0,3,0.00821917808219178,0.821917808219178
53
+ 73440.0,74880.0,1440.0,7,0.019178082191780823,1.9178082191780823
54
+ 74880.0,76320.0,1440.0,3,0.00821917808219178,0.821917808219178
55
+ 76320.0,77760.0,1440.0,0,0.0,0.0
56
+ 77760.0,79200.0,1440.0,0,0.0,0.0
57
+ 79200.0,80640.0,1440.0,2,0.005479452054794521,0.547945205479452
58
+ 80640.0,82080.0,1440.0,8,0.021917808219178082,2.191780821917808
59
+ 82080.0,83520.0,1440.0,2,0.005479452054794521,0.547945205479452
60
+ 83520.0,84960.0,1440.0,4,0.010958904109589041,1.095890410958904
61
+ 84960.0,86400.0,1440.0,0,0.0,0.0
62
+ 86400.0,87840.0,1440.0,0,0.0,0.0
63
+ 87840.0,89280.0,1440.0,0,0.0,0.0
64
+ 89280.0,90720.0,1440.0,4,0.010958904109589041,1.095890410958904
65
+ 90720.0,92160.0,1440.0,0,0.0,0.0
66
+ 92160.0,93600.0,1440.0,6,0.01643835616438356,1.643835616438356
67
+ 93600.0,95040.0,1440.0,0,0.0,0.0
68
+ 95040.0,96480.0,1440.0,4,0.010958904109589041,1.095890410958904
69
+ 96480.0,97920.0,1440.0,0,0.0,0.0
70
+ 97920.0,99360.0,1440.0,0,0.0,0.0
71
+ 99360.0,100800.0,1440.0,0,0.0,0.0
72
+ 100800.0,102240.0,1440.0,3,0.00821917808219178,0.821917808219178
73
+ 102240.0,103680.0,1440.0,6,0.01643835616438356,1.643835616438356
74
+ 103680.0,105120.0,1440.0,3,0.00821917808219178,0.821917808219178
75
+ 105120.0,106560.0,1440.0,1,0.0027397260273972603,0.273972602739726
76
+ 106560.0,108000.0,1440.0,0,0.0,0.0
77
+ 108000.0,109440.0,1440.0,0,0.0,0.0
78
+ 109440.0,110880.0,1440.0,0,0.0,0.0
79
+ 110880.0,112320.0,1440.0,0,0.0,0.0
80
+ 112320.0,113760.0,1440.0,0,0.0,0.0
81
+ 113760.0,115200.0,1440.0,0,0.0,0.0
82
+ 115200.0,116640.0,1440.0,0,0.0,0.0
83
+ 116640.0,118080.0,1440.0,0,0.0,0.0
84
+ 118080.0,119520.0,1440.0,0,0.0,0.0
85
+ 119520.0,120960.0,1440.0,1,0.0027397260273972603,0.273972602739726
86
+ 120960.0,122400.0,1440.0,3,0.00821917808219178,0.821917808219178
87
+ 122400.0,123840.0,1440.0,2,0.005479452054794521,0.547945205479452
88
+ 123840.0,125280.0,1440.0,5,0.0136986301369863,1.36986301369863
89
+ 125280.0,126720.0,1440.0,3,0.00821917808219178,0.821917808219178
90
+ 126720.0,128160.0,1440.0,0,0.0,0.0
91
+ 128160.0,129600.0,1440.0,0,0.0,0.0
92
+ 129600.0,131040.0,1440.0,8,0.021917808219178082,2.191780821917808
93
+ 131040.0,132480.0,1440.0,5,0.0136986301369863,1.36986301369863
94
+ 132480.0,133920.0,1440.0,4,0.010958904109589041,1.095890410958904
95
+ 133920.0,135360.0,1440.0,6,0.01643835616438356,1.643835616438356
96
+ 135360.0,136800.0,1440.0,4,0.010958904109589041,1.095890410958904
97
+ 136800.0,138240.0,1440.0,0,0.0,0.0
98
+ 138240.0,139680.0,1440.0,0,0.0,0.0
99
+ 139680.0,141120.0,1440.0,0,0.0,0.0
100
+ 141120.0,142560.0,1440.0,0,0.0,0.0
101
+ 142560.0,144000.0,1440.0,0,0.0,0.0
102
+ 144000.0,145440.0,1440.0,0,0.0,0.0
103
+ 145440.0,146880.0,1440.0,0,0.0,0.0
104
+ 146880.0,148320.0,1440.0,0,0.0,0.0
105
+ 148320.0,149760.0,1440.0,0,0.0,0.0
106
+ 149760.0,151200.0,1440.0,0,0.0,0.0
107
+ 151200.0,152640.0,1440.0,0,0.0,0.0
108
+ 152640.0,154080.0,1440.0,0,0.0,0.0
109
+ 154080.0,155520.0,1440.0,0,0.0,0.0
110
+ 155520.0,156960.0,1440.0,0,0.0,0.0
111
+ 156960.0,158400.0,1440.0,0,0.0,0.0
112
+ 158400.0,159840.0,1440.0,0,0.0,0.0
113
+ 159840.0,161280.0,1440.0,0,0.0,0.0
114
+ 161280.0,162720.0,1440.0,0,0.0,0.0
115
+ 162720.0,164160.0,1440.0,0,0.0,0.0
116
+ 164160.0,165600.0,1440.0,0,0.0,0.0
117
+ 165600.0,167040.0,1440.0,0,0.0,0.0
118
+ 167040.0,168480.0,1440.0,0,0.0,0.0
119
+ 168480.0,169920.0,1440.0,0,0.0,0.0
120
+ 169920.0,171360.0,1440.0,0,0.0,0.0
121
+ 171360.0,172800.0,1440.0,0,0.0,0.0
122
+ 172800.0,174240.0,1440.0,0,0.0,0.0
123
+ 174240.0,175680.0,1440.0,0,0.0,0.0
124
+ 175680.0,177120.0,1440.0,0,0.0,0.0
125
+ 177120.0,178560.0,1440.0,0,0.0,0.0
126
+ 178560.0,180000.0,1440.0,0,0.0,0.0
127
+ 180000.0,181440.0,1440.0,3,0.00821917808219178,0.821917808219178
128
+ 181440.0,182880.0,1440.0,7,0.019178082191780823,1.9178082191780823
129
+ 182880.0,184320.0,1440.0,0,0.0,0.0
130
+ 184320.0,185760.0,1440.0,8,0.021917808219178082,2.191780821917808
131
+ 185760.0,187200.0,1440.0,5,0.0136986301369863,1.36986301369863
132
+ 187200.0,188640.0,1440.0,0,0.0,0.0
133
+ 188640.0,190080.0,1440.0,0,0.0,0.0
134
+ 190080.0,191520.0,1440.0,0,0.0,0.0
135
+ 191520.0,192960.0,1440.0,3,0.00821917808219178,0.821917808219178
136
+ 192960.0,194400.0,1440.0,0,0.0,0.0
137
+ 194400.0,195840.0,1440.0,2,0.005479452054794521,0.547945205479452
138
+ 195840.0,197280.0,1440.0,0,0.0,0.0
139
+ 197280.0,198720.0,1440.0,0,0.0,0.0
140
+ 198720.0,200160.0,1440.0,0,0.0,0.0
141
+ 200160.0,201600.0,1440.0,3,0.00821917808219178,0.821917808219178
142
+ 201600.0,203040.0,1440.0,1,0.0027397260273972603,0.273972602739726
143
+ 203040.0,204480.0,1440.0,4,0.010958904109589041,1.095890410958904
144
+ 204480.0,205920.0,1440.0,8,0.021917808219178082,2.191780821917808
145
+ 205920.0,207360.0,1440.0,0,0.0,0.0
146
+ 207360.0,208800.0,1440.0,0,0.0,0.0
147
+ 208800.0,210240.0,1440.0,0,0.0,0.0
148
+ 210240.0,211680.0,1440.0,2,0.005479452054794521,0.547945205479452
149
+ 211680.0,213120.0,1440.0,2,0.005479452054794521,0.547945205479452
150
+ 213120.0,214560.0,1440.0,0,0.0,0.0
151
+ 214560.0,216000.0,1440.0,5,0.0136986301369863,1.36986301369863
152
+ 216000.0,217440.0,1440.0,3,0.00821917808219178,0.821917808219178
153
+ 217440.0,218880.0,1440.0,0,0.0,0.0
154
+ 218880.0,220320.0,1440.0,0,0.0,0.0
155
+ 220320.0,221760.0,1440.0,4,0.010958904109589041,1.095890410958904
156
+ 221760.0,223200.0,1440.0,5,0.0136986301369863,1.36986301369863
157
+ 223200.0,224640.0,1440.0,6,0.01643835616438356,1.643835616438356
158
+ 224640.0,226080.0,1440.0,0,0.0,0.0
159
+ 226080.0,227520.0,1440.0,0,0.0,0.0
160
+ 227520.0,228960.0,1440.0,0,0.0,0.0
161
+ 228960.0,230400.0,1440.0,0,0.0,0.0
162
+ 230400.0,231840.0,1440.0,0,0.0,0.0
163
+ 231840.0,233280.0,1440.0,4,0.010958904109589041,1.095890410958904
164
+ 233280.0,234720.0,1440.0,0,0.0,0.0
165
+ 234720.0,236160.0,1440.0,1,0.0027397260273972603,0.273972602739726
166
+ 236160.0,237600.0,1440.0,0,0.0,0.0
167
+ 237600.0,239040.0,1440.0,0,0.0,0.0
168
+ 239040.0,240480.0,1440.0,0,0.0,0.0
169
+ 240480.0,241920.0,1440.0,0,0.0,0.0
170
+ 241920.0,243360.0,1440.0,0,0.0,0.0
171
+ 243360.0,244800.0,1440.0,6,0.01643835616438356,1.643835616438356
172
+ 244800.0,246240.0,1440.0,5,0.0136986301369863,1.36986301369863
173
+ 246240.0,247680.0,1440.0,0,0.0,0.0
174
+ 247680.0,249120.0,1440.0,0,0.0,0.0
175
+ 249120.0,250560.0,1440.0,0,0.0,0.0
176
+ 250560.0,252000.0,1440.0,0,0.0,0.0
177
+ 252000.0,253440.0,1440.0,0,0.0,0.0
178
+ 253440.0,254880.0,1440.0,0,0.0,0.0
179
+ 254880.0,256320.0,1440.0,0,0.0,0.0
180
+ 256320.0,257760.0,1440.0,3,0.00821917808219178,0.821917808219178
181
+ 257760.0,259200.0,1440.0,0,0.0,0.0
182
+ 259200.0,260640.0,1440.0,0,0.0,0.0
183
+ 260640.0,262080.0,1440.0,2,0.005479452054794521,0.547945205479452
184
+ 262080.0,263520.0,1440.0,0,0.0,0.0
185
+ 263520.0,264960.0,1440.0,0,0.0,0.0
186
+ 264960.0,266400.0,1440.0,2,0.005479452054794521,0.547945205479452
187
+ 266400.0,267840.0,1440.0,6,0.01643835616438356,1.643835616438356
188
+ 267840.0,269280.0,1440.0,0,0.0,0.0
189
+ 269280.0,270720.0,1440.0,0,0.0,0.0
190
+ 270720.0,272160.0,1440.0,4,0.010958904109589041,1.095890410958904
191
+ 272160.0,273600.0,1440.0,0,0.0,0.0
192
+ 273600.0,275040.0,1440.0,4,0.010958904109589041,1.095890410958904
193
+ 275040.0,276480.0,1440.0,5,0.0136986301369863,1.36986301369863
194
+ 276480.0,277920.0,1440.0,0,0.0,0.0
195
+ 277920.0,279360.0,1440.0,0,0.0,0.0
196
+ 279360.0,280800.0,1440.0,0,0.0,0.0
197
+ 280800.0,282240.0,1440.0,0,0.0,0.0
198
+ 282240.0,283680.0,1440.0,0,0.0,0.0
199
+ 283680.0,285120.0,1440.0,0,0.0,0.0
200
+ 285120.0,286560.0,1440.0,0,0.0,0.0
201
+ 286560.0,288000.0,1440.0,0,0.0,0.0
202
+ 288000.0,289440.0,1440.0,0,0.0,0.0
203
+ 289440.0,290880.0,1440.0,0,0.0,0.0
204
+ 290880.0,292320.0,1440.0,0,0.0,0.0
205
+ 292320.0,293760.0,1440.0,1,0.0027397260273972603,0.273972602739726
206
+ 293760.0,295200.0,1440.0,0,0.0,0.0
207
+ 295200.0,296640.0,1440.0,0,0.0,0.0
208
+ 296640.0,298080.0,1440.0,0,0.0,0.0
209
+ 298080.0,299520.0,1440.0,0,0.0,0.0
210
+ 299520.0,300960.0,1440.0,0,0.0,0.0
211
+ 300960.0,302400.0,1440.0,2,0.005479452054794521,0.547945205479452
212
+ 302400.0,303840.0,1440.0,2,0.005479452054794521,0.547945205479452
213
+ 303840.0,305280.0,1440.0,3,0.00821917808219178,0.821917808219178
214
+ 305280.0,306720.0,1440.0,6,0.01643835616438356,1.643835616438356
215
+ 306720.0,308160.0,1440.0,4,0.010958904109589041,1.095890410958904
216
+ 308160.0,309600.0,1440.0,0,0.0,0.0
217
+ 309600.0,311040.0,1440.0,0,0.0,0.0
218
+ 311040.0,312480.0,1440.0,4,0.010958904109589041,1.095890410958904
219
+ 312480.0,313920.0,1440.0,6,0.01643835616438356,1.643835616438356
220
+ 313920.0,315360.0,1440.0,4,0.010958904109589041,1.095890410958904
221
+ 315360.0,316800.0,1440.0,0,0.0,0.0
222
+ 316800.0,318240.0,1440.0,0,0.0,0.0
223
+ 318240.0,319680.0,1440.0,0,0.0,0.0
224
+ 319680.0,321120.0,1440.0,0,0.0,0.0
225
+ 321120.0,322560.0,1440.0,0,0.0,0.0
226
+ 322560.0,324000.0,1440.0,0,0.0,0.0
227
+ 324000.0,325440.0,1440.0,0,0.0,0.0
228
+ 325440.0,326880.0,1440.0,0,0.0,0.0
229
+ 326880.0,328320.0,1440.0,0,0.0,0.0
230
+ 328320.0,329760.0,1440.0,0,0.0,0.0
231
+ 329760.0,331200.0,1440.0,0,0.0,0.0
232
+ 331200.0,332640.0,1440.0,6,0.01643835616438356,1.643835616438356
233
+ 332640.0,334080.0,1440.0,4,0.010958904109589041,1.095890410958904
234
+ 334080.0,335520.0,1440.0,1,0.0027397260273972603,0.273972602739726
235
+ 335520.0,336960.0,1440.0,1,0.0027397260273972603,0.273972602739726
236
+ 336960.0,338400.0,1440.0,1,0.0027397260273972603,0.273972602739726
237
+ 338400.0,339840.0,1440.0,0,0.0,0.0
238
+ 339840.0,341280.0,1440.0,0,0.0,0.0
239
+ 341280.0,342720.0,1440.0,1,0.0027397260273972603,0.273972602739726
240
+ 342720.0,344160.0,1440.0,0,0.0,0.0
241
+ 344160.0,345600.0,1440.0,5,0.0136986301369863,1.36986301369863
242
+ 345600.0,347040.0,1440.0,9,0.024657534246575342,2.4657534246575343
243
+ 347040.0,348480.0,1440.0,0,0.0,0.0
244
+ 348480.0,349920.0,1440.0,0,0.0,0.0
245
+ 349920.0,351360.0,1440.0,0,0.0,0.0
246
+ 351360.0,352800.0,1440.0,0,0.0,0.0
247
+ 352800.0,354240.0,1440.0,0,0.0,0.0
248
+ 354240.0,355680.0,1440.0,0,0.0,0.0
249
+ 355680.0,357120.0,1440.0,0,0.0,0.0
250
+ 357120.0,358560.0,1440.0,0,0.0,0.0
251
+ 358560.0,360000.0,1440.0,0,0.0,0.0
252
+ 360000.0,361440.0,1440.0,0,0.0,0.0
253
+ 361440.0,362880.0,1440.0,3,0.00821917808219178,0.821917808219178
254
+ 362880.0,364320.0,1440.0,3,0.00821917808219178,0.821917808219178
255
+ 364320.0,365760.0,1440.0,2,0.005479452054794521,0.547945205479452
256
+ 365760.0,367200.0,1440.0,1,0.0027397260273972603,0.273972602739726
Results/inference_data_kt_results.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5ecb945b29f013a0da9f770266b80724f4f9d92da04e41357b69f309622d0ea
3
+ size 2080602643
Results/month_student_attempt_distribution.png ADDED

Git LFS Details

  • SHA256: 442e60a126f5498f4ee1e9ba97405427d565e6659d01abefc07f01a1e7a243a5
  • Pointer size: 131 Bytes
  • Size of remote file: 197 kB
Results/month_student_attempt_distribution_00173df52c65a9c13b45978c5a56ea2db67b17deded674486f7af64f7c5fd9ac.png ADDED

Git LFS Details

  • SHA256: 4d58fe3403e553e2ee8ae640314ea937ddf945fbb255c7ab68af92337be250d8
  • Pointer size: 131 Bytes
  • Size of remote file: 169 kB
Results/month_student_attempt_distribution_001849d40d6bf1e2ed5826e4420e291e54655a2b6b77a9ab701faaeb4e899abe.png ADDED

Git LFS Details

  • SHA256: 7fe38ee116deae89995201cf4c7ea145c0fa3da0b98b8f6677950478c6c17b48
  • Pointer size: 131 Bytes
  • Size of remote file: 160 kB
Results/month_student_attempt_distribution_counts.csv ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ bin_left_min,bin_right_min,bin_width_min,attempt_count,probability,percentage
2
+ 0.0,43200.0,43200.0,209461,0.1198159238984321,11.98159238984321
3
+ 43200.0,86400.0,43200.0,177545,0.1015593270754323,10.155932707543231
4
+ 86400.0,129600.0,43200.0,176130,0.10074991848712096,10.074991848712097
5
+ 129600.0,172800.0,43200.0,167676,0.09591405968458806,9.591405968458806
6
+ 172800.0,216000.0,43200.0,155115,0.08872891390523913,8.872891390523913
7
+ 216000.0,259200.0,43200.0,174585,0.09986614727232165,9.986614727232165
8
+ 259200.0,302400.0,43200.0,189411,0.10834691881317249,10.834691881317248
9
+ 302400.0,345600.0,43200.0,125970,0.07205738506684056,7.205738506684057
10
+ 345600.0,388800.0,43200.0,97012,0.055492824006543914,5.549282400654391
11
+ 388800.0,432000.0,43200.0,28306,0.016191603887449304,1.6191603887449304
12
+ 432000.0,475200.0,43200.0,18574,0.010624703264519303,1.0624703264519304
13
+ 475200.0,518400.0,43200.0,24530,0.014031655598075725,1.4031655598075725
14
+ 518400.0,561600.0,43200.0,29081,0.016634919545358343,1.6634919545358344
15
+ 561600.0,604800.0,43200.0,29056,0.016620619040264503,1.6620619040264504
16
+ 604800.0,648000.0,43200.0,23457,0.013417877919448115,1.3417877919448116
17
+ 648000.0,691200.0,43200.0,23176,0.013257140242193355,1.3257140242193355
18
+ 691200.0,734400.0,43200.0,21084,0.01206047397594083,1.206047397594083
19
+ 734400.0,777600.0,43200.0,21215,0.012135408622632552,1.213540862263255
20
+ 777600.0,820800.0,43200.0,20921,0.011967234682728993,1.1967234682728993
21
+ 820800.0,864000.0,43200.0,13592,0.007774898609418885,0.7774898609418884
22
+ 864000.0,907200.0,43200.0,10107,0.0057814081993376,0.57814081993376
23
+ 907200.0,950400.0,43200.0,1273,0.0007281817193783284,0.07281817193783284
24
+ 950400.0,993600.0,43200.0,1086,0.0006212139412764059,0.06212139412764059
25
+ 993600.0,1036800.0,43200.0,1155,0.000660683335335404,0.0660683335335404
26
+ 1036800.0,1080000.0,43200.0,1266,0.0007241775779520533,0.07241775779520533
27
+ 1080000.0,1123200.0,43200.0,1494,0.0008545981844078733,0.08545981844078733
28
+ 1123200.0,1166400.0,43200.0,810,0.0004633363650404132,0.04633363650404132
29
+ 1166400.0,1209600.0,43200.0,730,0.0004175747487401255,0.04175747487401255
30
+ 1209600.0,1252800.0,43200.0,362,0.00020707131375880195,0.020707131375880195
31
+ 1252800.0,1296000.0,43200.0,1075,0.0006149217190351163,0.06149217190351163
32
+ 1296000.0,1339200.0,43200.0,716,0.00040956646588757514,0.040956646588757514
33
+ 1339200.0,1382400.0,43200.0,972,0.0005560036380484959,0.05560036380484959
34
+ 1382400.0,1425600.0,43200.0,817,0.0004673405064666884,0.04673405064666884
35
+ 1425600.0,1468800.0,43200.0,258,0.00014758121256842793,0.014758121256842793
36
+ 1468800.0,1512000.0,43200.0,4,2.288080815014386e-06,0.00022880808150143862
37
+ 1512000.0,1555200.0,43200.0,5,2.8601010187679828e-06,0.0002860101018767983
38
+ 1555200.0,1598400.0,43200.0,0,0.0,0.0
39
+ 1598400.0,1641600.0,43200.0,0,0.0,0.0
40
+ 1641600.0,1684800.0,43200.0,10,5.7202020375359656e-06,0.0005720202037535966
41
+ 1684800.0,1728000.0,43200.0,1,5.720202037535965e-07,5.7202020375359656e-05
42
+ 1728000.0,1771200.0,43200.0,0,0.0,0.0
43
+ 1771200.0,1814400.0,43200.0,3,1.7160606112607897e-06,0.00017160606112607898
44
+ 1814400.0,1857600.0,43200.0,0,0.0,0.0
45
+ 1857600.0,1900800.0,43200.0,0,0.0,0.0
46
+ 1900800.0,1944000.0,43200.0,0,0.0,0.0
47
+ 1944000.0,1987200.0,43200.0,0,0.0,0.0
48
+ 1987200.0,2030400.0,43200.0,0,0.0,0.0
49
+ 2030400.0,2073600.0,43200.0,0,0.0,0.0
50
+ 2073600.0,2116800.0,43200.0,2,1.144040407507193e-06,0.00011440404075071931
51
+ 2116800.0,2160000.0,43200.0,3,1.7160606112607897e-06,0.00017160606112607898
52
+ 2160000.0,2203200.0,43200.0,5,2.8601010187679828e-06,0.0002860101018767983
53
+ 2203200.0,2246400.0,43200.0,11,6.292222241289563e-06,0.0006292222241289563
54
+ 2246400.0,2289600.0,43200.0,19,1.0868383871318335e-05,0.0010868383871318334
55
+ 2289600.0,2332800.0,43200.0,5,2.8601010187679828e-06,0.0002860101018767983
56
+ 2332800.0,2376000.0,43200.0,12,6.864242445043159e-06,0.0006864242445043159
57
+ 2376000.0,2419200.0,43200.0,11,6.292222241289563e-06,0.0006292222241289563
58
+ 2419200.0,2462400.0,43200.0,9,5.148181833782369e-06,0.000514818183378237
59
+ 2462400.0,2505600.0,43200.0,4,2.288080815014386e-06,0.00022880808150143862
60
+ 2505600.0,2548800.0,43200.0,0,0.0,0.0
61
+ 2548800.0,2592000.0,43200.0,8,4.576161630028772e-06,0.00045761616300287725
62
+ 2592000.0,2635200.0,43200.0,18,1.0296363667564739e-05,0.001029636366756474
63
+ 2635200.0,2678400.0,43200.0,13,7.436262648796756e-06,0.0007436262648796755
64
+ 2678400.0,2721600.0,43200.0,11,6.292222241289563e-06,0.0006292222241289563
65
+ 2721600.0,2764800.0,43200.0,0,0.0,0.0
66
+ 2764800.0,2808000.0,43200.0,0,0.0,0.0
67
+ 2808000.0,2851200.0,43200.0,4,2.288080815014386e-06,0.00022880808150143862
68
+ 2851200.0,2894400.0,43200.0,3,1.7160606112607897e-06,0.00017160606112607898
69
+ 2894400.0,2937600.0,43200.0,11,6.292222241289563e-06,0.0006292222241289563
Results/month_student_attempt_distribution_counts_00173df52c65a9c13b45978c5a56ea2db67b17deded674486f7af64f7c5fd9ac.csv ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ bin_left_min,bin_right_min,bin_width_min,attempt_count,probability,percentage
2
+ 0.0,43200.0,43200.0,33,0.08638743455497382,8.638743455497382
3
+ 43200.0,86400.0,43200.0,67,0.17539267015706805,17.539267015706805
4
+ 86400.0,129600.0,43200.0,36,0.09424083769633508,9.424083769633508
5
+ 129600.0,172800.0,43200.0,54,0.14136125654450263,14.136125654450263
6
+ 172800.0,216000.0,43200.0,26,0.06806282722513089,6.806282722513089
7
+ 216000.0,259200.0,43200.0,63,0.1649214659685864,16.49214659685864
8
+ 259200.0,302400.0,43200.0,53,0.1387434554973822,13.874345549738221
9
+ 302400.0,345600.0,43200.0,40,0.10471204188481675,10.471204188481675
10
+ 345600.0,388800.0,43200.0,10,0.02617801047120419,2.6178010471204187
Results/month_student_attempt_distribution_counts_001849d40d6bf1e2ed5826e4420e291e54655a2b6b77a9ab701faaeb4e899abe.csv ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ bin_left_min,bin_right_min,bin_width_min,attempt_count,probability,percentage
2
+ 0.0,43200.0,43200.0,67,0.18356164383561643,18.356164383561644
3
+ 43200.0,86400.0,43200.0,48,0.13150684931506848,13.150684931506849
4
+ 86400.0,129600.0,43200.0,41,0.11232876712328767,11.232876712328768
5
+ 129600.0,172800.0,43200.0,27,0.07397260273972603,7.397260273972603
6
+ 172800.0,216000.0,43200.0,53,0.14520547945205478,14.520547945205479
7
+ 216000.0,259200.0,43200.0,37,0.10136986301369863,10.136986301369863
8
+ 259200.0,302400.0,43200.0,26,0.07123287671232877,7.123287671232877
9
+ 302400.0,345600.0,43200.0,48,0.13150684931506848,13.150684931506849
10
+ 345600.0,388800.0,43200.0,18,0.049315068493150684,4.931506849315069
Results/pedagogical_grounding/README.md ADDED
@@ -0,0 +1,275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Pedagogical Grounding
2
+
3
+ This module provides tools for analyzing educational assessment data and benchmarking LLMs on pedagogical reasoning tasks.
4
+
5
+ ## Overview
6
+
7
+ The pedagogical grounding work consists of two parts:
8
+
9
+ 1. **Analysis Scripts** - Compute pedagogical metrics from student response data
10
+ 2. **LLM Benchmark** - Evaluate LLMs on pedagogical reasoning tasks
11
+
12
+ ---
13
+
14
+ ## Part 1: Analysis Scripts
15
+
16
+ These scripts analyze the FoundationalKT dataset to compute pedagogical metrics.
17
+
18
+ ### IRT Parameters (`irt_parameters.py`)
19
+
20
+ Computes Item Response Theory parameters for assessment items using Bayesian inference.
21
+
22
+ ```bash
23
+ python pedagogical_grounding/irt_parameters.py \
24
+ --data-dir foundationalktdataset/ \
25
+ --output-dir pedagogical_grounding/output/
26
+ ```
27
+
28
+ **Output:** `irt_parameters.json` (~2,548 problems)
29
+
30
+ | Field | Description |
31
+ |-------|-------------|
32
+ | `problem_id` | Unique problem identifier |
33
+ | `difficulty_1pl` | 1PL (Rasch) difficulty parameter |
34
+ | `difficulty_2pl` | 2PL difficulty parameter (primary metric) |
35
+ | `discrimination_2pl` | 2PL discrimination parameter |
36
+ | `percent_correct` | Empirical accuracy rate |
37
+ | `n_responses` | Number of student responses |
38
+
39
+ **Interpretation:**
40
+ - `difficulty_2pl`: Higher = harder (range: -1.35 to 0.91)
41
+ - `discrimination_2pl`: Higher = better at distinguishing student ability (range: 0.01 to 0.91)
42
+
43
+ ### Distractor Analysis (`distractor_analysis.py`)
44
+
45
+ Analyzes the effectiveness of wrong answer choices in multiple-choice questions.
46
+
47
+ ```bash
48
+ python pedagogical_grounding/distractor_analysis.py \
49
+ --data-dir foundationalktdataset/ \
50
+ --output-dir pedagogical_grounding/output/
51
+ ```
52
+
53
+ **Output:** `distractor_stats.json` (~236 MC problems)
54
+
55
+ | Field | Description |
56
+ |-------|-------------|
57
+ | `problem_id` | Unique problem identifier |
58
+ | `n_choices` | Number of answer options |
59
+ | `correct_rate` | Fraction choosing correct answer |
60
+ | `distractors` | Dict of wrong answers → count |
61
+ | `distractor_frequencies` | Dict of wrong answers → frequency |
62
+ | `most_common_distractor` | Most frequently chosen wrong answer |
63
+ | `least_common_distractor` | Least frequently chosen wrong answer |
64
+
65
+ ---
66
+
67
+ ## Part 2: LLM Benchmark
68
+
69
+ Benchmarks LLMs on 4 pedagogical reasoning tasks using zero-shot evaluation.
70
+
71
+ ### Tasks
72
+
73
+ | Task | Description | Data Source | Metric |
74
+ |------|-------------|-------------|--------|
75
+ | `difficulty` | Which of two questions is harder? | IRT params | Binary accuracy |
76
+ | `discrimination` | Which question better distinguishes ability? | IRT params | Binary accuracy |
77
+ | `distractor_most` | Which wrong answer is most commonly chosen? | Distractor stats | Multi-class accuracy |
78
+ | `distractor_least` | Which wrong answer is least commonly chosen? | Distractor stats | Multi-class accuracy |
79
+
80
+ ### Scripts
81
+
82
+ All benchmark scripts are located in this folder (`pedagogical_grounding/`):
83
+
84
+ | File | Description |
85
+ |------|-------------|
86
+ | `pedagogical_inference_base.py` | Base module with data loading, sampling, prompts, inference |
87
+ | `gptoss120b_pedagogical.py` | GPT-OSS-120B model config |
88
+ | `qwen3next80b_instruct_pedagogical.py` | Qwen3-Next-80B-Instruct model config |
89
+ | `qwen3next80b_thinking_pedagogical.py` | Qwen3-Next-80B-Thinking model config |
90
+ | `evaluate_pedagogical.py` | Evaluation script for accuracy metrics |
91
+
92
+ ### Usage
93
+
94
+ **Run all commands from the project root directory.**
95
+
96
+ #### Run Inference
97
+
98
+ ```bash
99
+ # Difficulty comparison (stratified sampling, 1000 pairs)
100
+ CUDA_VISIBLE_DEVICES=0,1,2,3 python pedagogical_grounding/gptoss120b_pedagogical.py \
101
+ --task difficulty \
102
+ --num-samples 1000 \
103
+ --sampling-mode stratified \
104
+ --num-gpus 4
105
+
106
+ # Discrimination comparison
107
+ CUDA_VISIBLE_DEVICES=0,1,2,3 python pedagogical_grounding/gptoss120b_pedagogical.py \
108
+ --task discrimination \
109
+ --num-samples 1000 \
110
+ --num-gpus 4
111
+
112
+ # Most common distractor prediction
113
+ CUDA_VISIBLE_DEVICES=0,1,2,3 python pedagogical_grounding/gptoss120b_pedagogical.py \
114
+ --task distractor_most \
115
+ --num-gpus 4
116
+
117
+ # Least common distractor prediction
118
+ CUDA_VISIBLE_DEVICES=0,1,2,3 python pedagogical_grounding/gptoss120b_pedagogical.py \
119
+ --task distractor_least \
120
+ --num-gpus 4
121
+
122
+ # All tasks at once
123
+ CUDA_VISIBLE_DEVICES=0,1,2,3 python pedagogical_grounding/gptoss120b_pedagogical.py \
124
+ --task all \
125
+ --num-samples 500 \
126
+ --num-gpus 4
127
+ ```
128
+
129
+ #### Evaluate Results
130
+
131
+ ```bash
132
+ python pedagogical_grounding/evaluate_pedagogical.py --input gptoss120b_pedagogical_difficulty_n1000_stratified.jsonl
133
+ python pedagogical_grounding/evaluate_pedagogical.py --input results.jsonl --output metrics.json
134
+ ```
135
+
136
+ ### CLI Arguments
137
+
138
+ | Argument | Description | Default |
139
+ |----------|-------------|---------|
140
+ | `--task` | Task to run: `difficulty`, `discrimination`, `distractor_most`, `distractor_least`, `all` | Required |
141
+ | `--sampling-mode` | `random` or `stratified` (ensures meaningful differences) | `stratified` |
142
+ | `--num-samples` | Number of pairs/problems to sample | 1000 |
143
+ | `--min-difference` | Minimum difference for stratified sampling | 0.2 |
144
+ | `--batch-size` | Batch size for vLLM inference | 500 |
145
+ | `--num-gpus` | Number of GPUs for tensor parallelism | 1 |
146
+ | `--data-dir` | Base directory containing data files | `.` |
147
+
148
+ ### Sampling Modes
149
+
150
+ **Random:** Any two questions randomly paired (may include trivial comparisons).
151
+
152
+ **Stratified:** Ensures meaningful differences between pairs:
153
+ - Small: 0.2–0.5 difference
154
+ - Medium: 0.5–1.0 difference
155
+ - Large: >1.0 difference
156
+
157
+ ### Output Format (JSONL)
158
+
159
+ **Comparison tasks:**
160
+ ```json
161
+ {
162
+ "prediction_id": "difficulty_405080_448452",
163
+ "task": "difficulty",
164
+ "problem_id_a": 405080,
165
+ "problem_id_b": 448452,
166
+ "value_a": -0.234,
167
+ "value_b": -0.533,
168
+ "difference": 0.299,
169
+ "stratum": "small",
170
+ "ground_truth": "A",
171
+ "predicted_answer": "A",
172
+ "is_correct": true
173
+ }
174
+ ```
175
+
176
+ **Distractor tasks:**
177
+ ```json
178
+ {
179
+ "prediction_id": "distractor_most_74236",
180
+ "task": "distractor_most",
181
+ "problem_id": 74236,
182
+ "n_choices": 4,
183
+ "ground_truth_letter": "B",
184
+ "ground_truth_text": "Elena is correct",
185
+ "ground_truth_freq": 0.207,
186
+ "predicted_answer": "B",
187
+ "is_correct": true
188
+ }
189
+ ```
190
+
191
+ ### Baselines
192
+
193
+ | Task | Random Baseline |
194
+ |------|-----------------|
195
+ | Difficulty comparison | 50% |
196
+ | Discrimination comparison | 50% |
197
+ | Distractor (3 options) | 33% |
198
+ | Distractor (4 options) | 25% |
199
+
200
+ ### Evaluation Metrics
201
+
202
+ The evaluation script computes:
203
+
204
+ - **Overall accuracy** vs random baseline
205
+ - **By stratum** (for comparison tasks): small/medium/large difference
206
+ - **By difference magnitude**: very_small/small/medium/large
207
+ - **By number of distractors** (for distractor tasks)
208
+ - **By ground truth frequency**: rare/moderate/common distractors
209
+
210
+ ---
211
+
212
+ ## Data Flow
213
+
214
+ ```
215
+ foundationalktdataset/
216
+ ├── Interactions.csv # Student responses
217
+ ├── Problems.csv # Problem text and answers
218
+ └── Skills.csv # Skill metadata
219
+
220
+
221
+ ┌───────────────────────────────────────┐
222
+ │ Analysis Scripts (Part 1) │
223
+ │ irt_parameters.py, distractor_analysis.py │
224
+ └───────────────────────────────────────┘
225
+
226
+
227
+ pedagogical_grounding/output/
228
+ ├── irt_parameters.json # 2,548 problems with IRT params
229
+ └── distractor_stats.json # 236 MC problems with distractor data
230
+
231
+
232
+ ┌───────────────────────────────────────┐
233
+ │ LLM Benchmark (Part 2) │
234
+ │ pedagogical_inference_base.py │
235
+ │ + model configs │
236
+ └───────────────────────────────────────┘
237
+
238
+
239
+ Output: JSONL predictions + evaluation metrics
240
+ ```
241
+
242
+ ---
243
+
244
+ ## File Structure
245
+
246
+ ```
247
+ foundationalKT-ss/
248
+ ├── pedagogical_grounding/
249
+ │ ├── README.md # This file
250
+ │ │
251
+ │ │ # Analysis Scripts (Part 1)
252
+ │ ├── irt_parameters.py # IRT parameter computation
253
+ │ ├── distractor_analysis.py # Distractor effectiveness analysis
254
+ │ │
255
+ │ │ # LLM Benchmark (Part 2)
256
+ │ ├── pedagogical_inference_base.py # Benchmark base module
257
+ │ ├── gptoss120b_pedagogical.py # GPT-OSS-120B config
258
+ │ ├── qwen3next80b_instruct_pedagogical.py # Qwen3 Instruct config
259
+ │ ├── qwen3next80b_thinking_pedagogical.py # Qwen3 Thinking config
260
+ │ ├── evaluate_pedagogical.py # Evaluation script
261
+ │ │
262
+ │ │ # Output Data
263
+ │ └── output/
264
+ │ ├── irt_parameters.json # IRT results (2,548 problems)
265
+ │ ├── irt_parameters.png # IRT visualizations
266
+ │ ├── distractor_stats.json # Distractor results (236 problems)
267
+ │ └── distractor_analysis.png # Distractor visualizations
268
+
269
+ ├── foundationalktdataset/ # Source data
270
+ │ ├── Interactions.csv
271
+ │ ├── Problems.csv
272
+ │ └── Skills.csv
273
+
274
+ └── clean_utils.py # Text cleaning utilities
275
+ ```
Results/pedagogical_grounding/batch_evaluate.py ADDED
@@ -0,0 +1,328 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Batch evaluation script for Pedagogical Grounding LLM Benchmark.
3
+
4
+ Analyzes all result files in a directory and creates aggregate comparison tables.
5
+
6
+ Usage:
7
+ python pedagogical_grounding/batch_evaluate.py --input-dir pedagogical_grounding/results
8
+ python pedagogical_grounding/batch_evaluate.py --input-dir results --output summary.json
9
+ """
10
+
11
+ import argparse
12
+ import glob
13
+ import json
14
+ import os
15
+ import re
16
+ from collections import defaultdict
17
+ from typing import Dict, List, Tuple
18
+
19
+ from evaluate_pedagogical import (
20
+ load_results,
21
+ evaluate_comparison_task,
22
+ evaluate_distractor_task,
23
+ )
24
+
25
+
26
+ def extract_model_name(filename: str) -> str:
27
+ """Extract model name from filename."""
28
+ basename = os.path.basename(filename)
29
+ # Pattern: {model}_pedagogical_{task}_...
30
+ match = re.match(r'^(.+?)_pedagogical_', basename)
31
+ if match:
32
+ return match.group(1)
33
+ return basename.replace('.jsonl', '')
34
+
35
+
36
+ def evaluate_file(filepath: str) -> Tuple[str, Dict]:
37
+ """Evaluate a single file and return model name and metrics."""
38
+ model_name = extract_model_name(filepath)
39
+ results = load_results(filepath)
40
+
41
+ metrics = {
42
+ 'file': os.path.basename(filepath),
43
+ 'total_predictions': len(results),
44
+ 'tasks': {}
45
+ }
46
+
47
+ # Identify and evaluate tasks
48
+ tasks = set(r.get('task') for r in results if r.get('task'))
49
+
50
+ for task in tasks:
51
+ if task in ['difficulty', 'discrimination']:
52
+ task_metrics = evaluate_comparison_task(results, task)
53
+ elif task in ['distractor_most', 'distractor_least']:
54
+ task_metrics = evaluate_distractor_task(results, task)
55
+ else:
56
+ continue
57
+
58
+ if 'error' not in task_metrics:
59
+ metrics['tasks'][task] = task_metrics
60
+
61
+ return model_name, metrics
62
+
63
+
64
+ def print_header(title: str, width: int = 80) -> None:
65
+ """Print a formatted header."""
66
+ print()
67
+ print("=" * width)
68
+ print(f" {title}")
69
+ print("=" * width)
70
+
71
+
72
+ def print_accuracy_table(all_metrics: Dict[str, Dict], task: str) -> None:
73
+ """Print accuracy comparison table for a task."""
74
+ models = sorted(all_metrics.keys())
75
+
76
+ # Get baseline
77
+ if task in ['difficulty', 'discrimination']:
78
+ baseline = 0.5
79
+ baseline_label = "50.0%"
80
+ else:
81
+ baseline = None # Varies by model
82
+ baseline_label = "varies"
83
+
84
+ # Header
85
+ print(f"\n{'Model':<35} {'Accuracy':>10} {'Lift':>10} {'N':>8}")
86
+ print("-" * 65)
87
+
88
+ # Data rows
89
+ for model in models:
90
+ task_metrics = all_metrics[model]['tasks'].get(task)
91
+ if task_metrics and 'error' not in task_metrics:
92
+ acc = task_metrics['accuracy']
93
+ lift = task_metrics['lift_over_random']
94
+ n = task_metrics['total']
95
+ print(f"{model:<35} {acc:>9.1%} {lift:>+9.1%} {n:>8}")
96
+ else:
97
+ print(f"{model:<35} {'N/A':>10} {'N/A':>10} {'N/A':>8}")
98
+
99
+ print("-" * 65)
100
+ print(f"{'Random Baseline':<35} {baseline_label:>10}")
101
+
102
+
103
+ def print_stratum_table(all_metrics: Dict[str, Dict], task: str) -> None:
104
+ """Print stratum breakdown comparison table."""
105
+ models = sorted(all_metrics.keys())
106
+ strata = ['small', 'medium', 'large']
107
+
108
+ # Header
109
+ header = f"{'Model':<30}"
110
+ for s in strata:
111
+ header += f" {s:>12}"
112
+ print(f"\n{header}")
113
+ print("-" * (30 + 13 * len(strata)))
114
+
115
+ # Data rows
116
+ for model in models:
117
+ task_metrics = all_metrics[model]['tasks'].get(task)
118
+ if task_metrics and 'error' not in task_metrics:
119
+ row = f"{model:<30}"
120
+ by_stratum = task_metrics.get('by_stratum', {})
121
+ for s in strata:
122
+ if s in by_stratum:
123
+ acc = by_stratum[s]['accuracy']
124
+ row += f" {acc:>11.1%}"
125
+ else:
126
+ row += f" {'N/A':>12}"
127
+ print(row)
128
+
129
+
130
+ def print_summary_table(all_metrics: Dict[str, Dict]) -> None:
131
+ """Print overall summary table with all tasks."""
132
+ models = sorted(all_metrics.keys())
133
+ tasks = ['difficulty', 'discrimination', 'distractor_most', 'distractor_least']
134
+ task_abbrev = {
135
+ 'difficulty': 'Diff',
136
+ 'discrimination': 'Disc',
137
+ 'distractor_most': 'D-Most',
138
+ 'distractor_least': 'D-Least'
139
+ }
140
+
141
+ # Header
142
+ header = f"{'Model':<30}"
143
+ for t in tasks:
144
+ header += f" {task_abbrev[t]:>10}"
145
+ header += f" {'Avg':>10}"
146
+ print(f"\n{header}")
147
+ print("-" * (30 + 11 * (len(tasks) + 1)))
148
+
149
+ # Data rows
150
+ for model in models:
151
+ row = f"{model:<30}"
152
+ accs = []
153
+ for t in tasks:
154
+ task_metrics = all_metrics[model]['tasks'].get(t)
155
+ if task_metrics and 'error' not in task_metrics:
156
+ acc = task_metrics['accuracy']
157
+ row += f" {acc:>9.1%}"
158
+ accs.append(acc)
159
+ else:
160
+ row += f" {'N/A':>10}"
161
+
162
+ # Average
163
+ if accs:
164
+ avg = sum(accs) / len(accs)
165
+ row += f" {avg:>9.1%}"
166
+ else:
167
+ row += f" {'N/A':>10}"
168
+
169
+ print(row)
170
+
171
+ # Baseline row
172
+ print("-" * (30 + 11 * (len(tasks) + 1)))
173
+ baseline_row = f"{'Random Baseline':<30}"
174
+ baseline_row += f" {'50.0%':>10}" # difficulty
175
+ baseline_row += f" {'50.0%':>10}" # discrimination
176
+ baseline_row += f" {'~35%':>10}" # distractor_most
177
+ baseline_row += f" {'~35%':>10}" # distractor_least
178
+ baseline_row += f" {'~43%':>10}" # avg
179
+ print(baseline_row)
180
+
181
+
182
+ def print_lift_table(all_metrics: Dict[str, Dict]) -> None:
183
+ """Print lift over random baseline table."""
184
+ models = sorted(all_metrics.keys())
185
+ tasks = ['difficulty', 'discrimination', 'distractor_most', 'distractor_least']
186
+ task_abbrev = {
187
+ 'difficulty': 'Diff',
188
+ 'discrimination': 'Disc',
189
+ 'distractor_most': 'D-Most',
190
+ 'distractor_least': 'D-Least'
191
+ }
192
+
193
+ # Header
194
+ header = f"{'Model':<30}"
195
+ for t in tasks:
196
+ header += f" {task_abbrev[t]:>10}"
197
+ header += f" {'Avg Lift':>10}"
198
+ print(f"\n{header}")
199
+ print("-" * (30 + 11 * (len(tasks) + 1)))
200
+
201
+ # Data rows
202
+ for model in models:
203
+ row = f"{model:<30}"
204
+ lifts = []
205
+ for t in tasks:
206
+ task_metrics = all_metrics[model]['tasks'].get(t)
207
+ if task_metrics and 'error' not in task_metrics:
208
+ lift = task_metrics['lift_over_random']
209
+ row += f" {lift:>+9.1%}"
210
+ lifts.append(lift)
211
+ else:
212
+ row += f" {'N/A':>10}"
213
+
214
+ # Average lift
215
+ if lifts:
216
+ avg_lift = sum(lifts) / len(lifts)
217
+ row += f" {avg_lift:>+9.1%}"
218
+ else:
219
+ row += f" {'N/A':>10}"
220
+
221
+ print(row)
222
+
223
+
224
+ def print_best_model_per_task(all_metrics: Dict[str, Dict]) -> None:
225
+ """Print best model for each task."""
226
+ tasks = ['difficulty', 'discrimination', 'distractor_most', 'distractor_least']
227
+
228
+ print(f"\n{'Task':<20} {'Best Model':<30} {'Accuracy':>10} {'Lift':>10}")
229
+ print("-" * 72)
230
+
231
+ for task in tasks:
232
+ best_model = None
233
+ best_acc = -1
234
+ best_lift = 0
235
+
236
+ for model, metrics in all_metrics.items():
237
+ task_metrics = metrics['tasks'].get(task)
238
+ if task_metrics and 'error' not in task_metrics:
239
+ if task_metrics['accuracy'] > best_acc:
240
+ best_acc = task_metrics['accuracy']
241
+ best_lift = task_metrics['lift_over_random']
242
+ best_model = model
243
+
244
+ if best_model:
245
+ print(f"{task:<20} {best_model:<30} {best_acc:>9.1%} {best_lift:>+9.1%}")
246
+ else:
247
+ print(f"{task:<20} {'N/A':<30} {'N/A':>10} {'N/A':>10}")
248
+
249
+
250
+ def main():
251
+ parser = argparse.ArgumentParser(
252
+ description="Batch evaluate Pedagogical Grounding Benchmark results"
253
+ )
254
+ parser.add_argument(
255
+ "--input-dir", "-i",
256
+ type=str,
257
+ required=True,
258
+ help="Directory containing JSONL result files"
259
+ )
260
+ parser.add_argument(
261
+ "--output", "-o",
262
+ type=str,
263
+ default=None,
264
+ help="Output JSON file for aggregate metrics (optional)"
265
+ )
266
+ parser.add_argument(
267
+ "--pattern",
268
+ type=str,
269
+ default="*.jsonl",
270
+ help="Glob pattern for result files (default: *.jsonl)"
271
+ )
272
+ args = parser.parse_args()
273
+
274
+ # Find all result files
275
+ pattern = os.path.join(args.input_dir, args.pattern)
276
+ files = sorted(glob.glob(pattern))
277
+
278
+ if not files:
279
+ print(f"No files found matching: {pattern}")
280
+ return
281
+
282
+ print(f"Found {len(files)} result files in {args.input_dir}")
283
+
284
+ # Evaluate all files
285
+ all_metrics = {}
286
+ for filepath in files:
287
+ print(f" Loading: {os.path.basename(filepath)}")
288
+ model_name, metrics = evaluate_file(filepath)
289
+ all_metrics[model_name] = metrics
290
+
291
+ # Print aggregate tables
292
+ print_header("PEDAGOGICAL GROUNDING BENCHMARK - AGGREGATE RESULTS")
293
+
294
+ # Summary table
295
+ print_header("ACCURACY BY TASK", width=72)
296
+ print_summary_table(all_metrics)
297
+
298
+ # Lift table
299
+ print_header("LIFT OVER RANDOM BASELINE", width=72)
300
+ print_lift_table(all_metrics)
301
+
302
+ # Best model per task
303
+ print_header("BEST MODEL PER TASK", width=72)
304
+ print_best_model_per_task(all_metrics)
305
+
306
+ # Detailed stratum breakdown for comparison tasks
307
+ print_header("DIFFICULTY - ACCURACY BY STRATUM", width=70)
308
+ print_stratum_table(all_metrics, 'difficulty')
309
+
310
+ print_header("DISCRIMINATION - ACCURACY BY STRATUM", width=70)
311
+ print_stratum_table(all_metrics, 'discrimination')
312
+
313
+ # Individual task tables
314
+ for task in ['difficulty', 'discrimination', 'distractor_most', 'distractor_least']:
315
+ print_header(f"{task.upper()} - DETAILED", width=65)
316
+ print_accuracy_table(all_metrics, task)
317
+
318
+ # Save aggregate metrics if output specified
319
+ if args.output:
320
+ with open(args.output, 'w') as f:
321
+ json.dump(all_metrics, f, indent=2)
322
+ print(f"\nAggregate metrics saved to {args.output}")
323
+
324
+ print()
325
+
326
+
327
+ if __name__ == "__main__":
328
+ main()
Results/pedagogical_grounding/distractor_analysis.py ADDED
@@ -0,0 +1,299 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Distractor Analysis for Pedagogical Grounding
3
+
4
+ Computes distractor effectiveness for Multiple Choice (select 1) questions
5
+ with more than 2 choices. Identifies most/least common wrong answers.
6
+ """
7
+
8
+ import argparse
9
+ import os
10
+ import json
11
+ import pandas as pd
12
+ import numpy as np
13
+ import matplotlib.pyplot as plt
14
+ from collections import Counter
15
+
16
+ # Configuration
17
+ DEFAULT_DATA_DIR = "foundationalktdataset"
18
+ STUDENT_FILE = "Interactions.csv"
19
+ PROBLEMS_FILE = "Problems.csv"
20
+
21
+
22
+ def parse_args():
23
+ parser = argparse.ArgumentParser(description="Analyze distractor effectiveness")
24
+ parser.add_argument(
25
+ "--data-dir", "-d",
26
+ type=str,
27
+ default=DEFAULT_DATA_DIR,
28
+ help=f"Directory containing input CSV files (default: {DEFAULT_DATA_DIR})"
29
+ )
30
+ parser.add_argument(
31
+ "--output-dir", "-o",
32
+ type=str,
33
+ default="pedagogical_grounding/output",
34
+ help="Directory to save output (default: pedagogical_grounding/output)"
35
+ )
36
+ parser.add_argument(
37
+ "--no-plots",
38
+ action="store_true",
39
+ help="Skip generating plots"
40
+ )
41
+ return parser.parse_args()
42
+
43
+
44
+ def load_data(data_dir):
45
+ """Load student and problem data."""
46
+ print(f"Loading data from {data_dir}...")
47
+
48
+ student_df = pd.read_csv(os.path.join(data_dir, STUDENT_FILE))
49
+ problems_df = pd.read_csv(os.path.join(data_dir, PROBLEMS_FILE))
50
+
51
+ print(f" Loaded {len(student_df):,} student interactions")
52
+ print(f" Loaded {len(problems_df):,} problems")
53
+
54
+ return student_df, problems_df
55
+
56
+
57
+ def get_answer_options(answer_string):
58
+ """Parse pipe-delimited answer options."""
59
+ if pd.isna(answer_string) or answer_string == '':
60
+ return []
61
+ return [opt.strip() for opt in answer_string.split('||') if opt.strip()]
62
+
63
+
64
+ def get_correct_answers(correct_string):
65
+ """Parse pipe-delimited correct answers."""
66
+ if pd.isna(correct_string) or correct_string == '':
67
+ return []
68
+ return [ans.strip() for ans in correct_string.split('||') if ans.strip()]
69
+
70
+
71
+ def normalize_answer(text):
72
+ """Normalize answer text for comparison."""
73
+ if pd.isna(text):
74
+ return ""
75
+ import re
76
+ # Remove HTML tags
77
+ text = re.sub(r'<[^>]+>', '', str(text))
78
+ # Normalize whitespace
79
+ text = ' '.join(text.split())
80
+ return text.strip().lower()
81
+
82
+
83
+ def analyze_distractors(student_df, problems_df):
84
+ """Analyze distractor effectiveness for MC (select 1) with >2 choices."""
85
+ print("\n" + "=" * 60)
86
+ print(" DISTRACTOR ANALYSIS")
87
+ print("=" * 60)
88
+
89
+ # Filter to MC (select 1) problems
90
+ mc_problems = problems_df[problems_df['Problem Type'] == 'Multiple Choice (select 1)'].copy()
91
+
92
+ # Count choices
93
+ mc_problems['answer_options'] = mc_problems['Multiple Choice Options'].apply(get_answer_options)
94
+ mc_problems['n_choices'] = mc_problems['answer_options'].apply(len)
95
+
96
+ # Filter to >2 choices
97
+ mc_problems = mc_problems[mc_problems['n_choices'] > 2].copy()
98
+ print(f"\nMC (select 1) problems with >2 choices: {len(mc_problems)}")
99
+
100
+ # Get correct answers
101
+ mc_problems['correct_answers'] = mc_problems['Multiple Choice Answers'].apply(get_correct_answers)
102
+
103
+ # Filter student data to these problems
104
+ problem_ids = set(mc_problems['problem_id'])
105
+ mc_interactions = student_df[student_df['problem_id'].isin(problem_ids)].copy()
106
+ print(f"Student interactions for these problems: {len(mc_interactions):,}")
107
+
108
+ # Merge to get answer options
109
+ mc_interactions = mc_interactions.merge(
110
+ mc_problems[['problem_id', 'answer_options', 'correct_answers', 'n_choices']],
111
+ on='problem_id',
112
+ how='left'
113
+ )
114
+
115
+ # Analyze each problem
116
+ results = []
117
+
118
+ for problem_id in mc_problems['problem_id'].unique():
119
+ problem_data = mc_interactions[mc_interactions['problem_id'] == problem_id]
120
+
121
+ if len(problem_data) < 10: # Skip problems with too few responses
122
+ continue
123
+
124
+ problem_info = mc_problems[mc_problems['problem_id'] == problem_id].iloc[0]
125
+ answer_options = problem_info['answer_options']
126
+ correct_answers = problem_info['Fill-in Answers']
127
+
128
+ # Normalize correct answers for comparison
129
+ correct_normalized = set(normalize_answer(a) for a in correct_answers)
130
+
131
+ # Count responses for each option
132
+ option_counts = Counter()
133
+ total_responses = 0
134
+
135
+ for _, row in problem_data.iterrows():
136
+ student_answer = row['answer_text']
137
+ if pd.isna(student_answer):
138
+ continue
139
+
140
+ # Normalize student answer
141
+ student_normalized = normalize_answer(student_answer)
142
+
143
+ # Match to options
144
+ for opt in answer_options:
145
+ opt_normalized = normalize_answer(opt)
146
+ if student_normalized == opt_normalized or student_normalized in opt_normalized or opt_normalized in student_normalized:
147
+ option_counts[opt] += 1
148
+ total_responses += 1
149
+ break
150
+
151
+ if total_responses < 10:
152
+ continue
153
+
154
+ # Separate correct and incorrect options
155
+ distractors = {}
156
+ correct_count = 0
157
+
158
+ for opt in answer_options:
159
+ opt_normalized = normalize_answer(opt)
160
+ count = option_counts.get(opt, 0)
161
+
162
+ if opt_normalized in correct_normalized or any(normalize_answer(c) in opt_normalized or opt_normalized in normalize_answer(c) for c in correct_answers):
163
+ correct_count = count
164
+ else:
165
+ distractors[opt] = count
166
+
167
+ if not distractors:
168
+ continue
169
+
170
+ # Find most/least common distractor
171
+ sorted_distractors = sorted(distractors.items(), key=lambda x: x[1], reverse=True)
172
+ most_common = sorted_distractors[0]
173
+ least_common = sorted_distractors[-1]
174
+
175
+ # Compute frequencies
176
+ distractor_freqs = {opt: count / total_responses for opt, count in distractors.items()}
177
+
178
+ results.append({
179
+ 'problem_id': int(problem_id),
180
+ 'n_choices': int(problem_info['n_choices']),
181
+ 'total_responses': int(total_responses),
182
+ 'correct_count': int(correct_count),
183
+ 'correct_rate': correct_count / total_responses,
184
+ 'distractors': {opt: int(count) for opt, count in distractors.items()},
185
+ 'distractor_frequencies': distractor_freqs,
186
+ 'most_common_distractor': most_common[0],
187
+ 'most_common_distractor_freq': most_common[1] / total_responses,
188
+ 'least_common_distractor': least_common[0],
189
+ 'least_common_distractor_freq': least_common[1] / total_responses,
190
+ })
191
+
192
+ print(f"Problems with sufficient data: {len(results)}")
193
+
194
+ return results
195
+
196
+
197
+ def print_summary(results):
198
+ """Print summary statistics."""
199
+ print("\n--- Summary Statistics ---")
200
+
201
+ correct_rates = [r['correct_rate'] for r in results]
202
+ most_common_freqs = [r['most_common_distractor_freq'] for r in results]
203
+ least_common_freqs = [r['least_common_distractor_freq'] for r in results]
204
+
205
+ print(f"\nCorrect Answer Rate:")
206
+ print(f" Mean: {np.mean(correct_rates):.1%}")
207
+ print(f" Median: {np.median(correct_rates):.1%}")
208
+ print(f" Min: {np.min(correct_rates):.1%}")
209
+ print(f" Max: {np.max(correct_rates):.1%}")
210
+
211
+ print(f"\nMost Common Distractor Frequency:")
212
+ print(f" Mean: {np.mean(most_common_freqs):.1%}")
213
+ print(f" Median: {np.median(most_common_freqs):.1%}")
214
+
215
+ print(f"\nLeast Common Distractor Frequency:")
216
+ print(f" Mean: {np.mean(least_common_freqs):.1%}")
217
+ print(f" Median: {np.median(least_common_freqs):.1%}")
218
+
219
+ # Count problems where least common distractor is never chosen
220
+ never_chosen = sum(1 for r in results if r['least_common_distractor_freq'] == 0)
221
+ print(f"\nProblems with ineffective distractor (0 selections): {never_chosen} ({100*never_chosen/len(results):.1f}%)")
222
+
223
+
224
+ def save_results(results, output_dir):
225
+ """Save results to JSON."""
226
+ os.makedirs(output_dir, exist_ok=True)
227
+ output_path = os.path.join(output_dir, 'distractor_stats.json')
228
+
229
+ with open(output_path, 'w') as f:
230
+ json.dump(results, f, indent=2)
231
+
232
+ print(f"\nSaved: {output_path}")
233
+
234
+
235
+ def plot_results(results, output_dir):
236
+ """Generate plots."""
237
+ os.makedirs(output_dir, exist_ok=True)
238
+
239
+ fig, axes = plt.subplots(1, 3, figsize=(15, 5))
240
+
241
+ # Plot 1: Correct rate distribution
242
+ correct_rates = [r['correct_rate'] for r in results]
243
+ axes[0].hist(correct_rates, bins=20, edgecolor='black', alpha=0.7)
244
+ axes[0].set_xlabel('Correct Answer Rate')
245
+ axes[0].set_ylabel('Number of Problems')
246
+ axes[0].set_title('Distribution of Correct Answer Rates')
247
+ axes[0].axvline(np.mean(correct_rates), color='red', linestyle='--', label=f'Mean: {np.mean(correct_rates):.1%}')
248
+ axes[0].legend()
249
+
250
+ # Plot 2: Most common distractor frequency
251
+ most_common_freqs = [r['most_common_distractor_freq'] for r in results]
252
+ axes[1].hist(most_common_freqs, bins=20, edgecolor='black', alpha=0.7, color='orange')
253
+ axes[1].set_xlabel('Most Common Distractor Frequency')
254
+ axes[1].set_ylabel('Number of Problems')
255
+ axes[1].set_title('Distribution of Most Common Distractor')
256
+ axes[1].axvline(np.mean(most_common_freqs), color='red', linestyle='--', label=f'Mean: {np.mean(most_common_freqs):.1%}')
257
+ axes[1].legend()
258
+
259
+ # Plot 3: Least common distractor frequency
260
+ least_common_freqs = [r['least_common_distractor_freq'] for r in results]
261
+ axes[2].hist(least_common_freqs, bins=20, edgecolor='black', alpha=0.7, color='green')
262
+ axes[2].set_xlabel('Least Common Distractor Frequency')
263
+ axes[2].set_ylabel('Number of Problems')
264
+ axes[2].set_title('Distribution of Least Common Distractor')
265
+ axes[2].axvline(np.mean(least_common_freqs), color='red', linestyle='--', label=f'Mean: {np.mean(least_common_freqs):.1%}')
266
+ axes[2].legend()
267
+
268
+ plt.tight_layout()
269
+ plot_path = os.path.join(output_dir, 'distractor_analysis.png')
270
+ plt.savefig(plot_path, dpi=150)
271
+ plt.close()
272
+
273
+ print(f"Saved: {plot_path}")
274
+
275
+
276
+ def main():
277
+ args = parse_args()
278
+
279
+ # Load data
280
+ student_df, problems_df = load_data(args.data_dir)
281
+
282
+ # Analyze distractors
283
+ results = analyze_distractors(student_df, problems_df)
284
+
285
+ # Print summary
286
+ print_summary(results)
287
+
288
+ # Save results
289
+ save_results(results, args.output_dir)
290
+
291
+ # Plot
292
+ if not args.no_plots:
293
+ plot_results(results, args.output_dir)
294
+
295
+ print("\nDone!")
296
+
297
+
298
+ if __name__ == "__main__":
299
+ main()
Results/pedagogical_grounding/evaluate_pedagogical.py ADDED
@@ -0,0 +1,285 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Evaluation script for Pedagogical Grounding LLM Benchmark.
3
+
4
+ Computes accuracy metrics for:
5
+ 1. Difficulty comparison task
6
+ 2. Discrimination comparison task
7
+ 3. Most common distractor prediction
8
+ 4. Least common distractor prediction
9
+
10
+ Usage:
11
+ python evaluate_pedagogical.py --input results.jsonl
12
+ python evaluate_pedagogical.py --input results.jsonl --output metrics.json
13
+ """
14
+
15
+ import argparse
16
+ import json
17
+ from collections import defaultdict
18
+ from typing import Dict, List, Optional
19
+
20
+
21
+ def load_results(jsonl_path: str) -> List[Dict]:
22
+ """Load results from JSONL file."""
23
+ results = []
24
+ with open(jsonl_path, 'r') as f:
25
+ for line in f:
26
+ if line.strip():
27
+ results.append(json.loads(line))
28
+ return results
29
+
30
+
31
+ def evaluate_comparison_task(results: List[Dict], task_name: str) -> Dict:
32
+ """Evaluate a comparison task (difficulty or discrimination)."""
33
+ task_results = [r for r in results if r.get('task') == task_name]
34
+
35
+ if not task_results:
36
+ return {'error': f'No results found for task: {task_name}'}
37
+
38
+ total = len(task_results)
39
+ correct = sum(1 for r in task_results if r.get('is_correct', False))
40
+ accuracy = correct / total if total > 0 else 0
41
+
42
+ # Breakdown by stratum
43
+ stratum_stats = defaultdict(lambda: {'correct': 0, 'total': 0})
44
+ for r in task_results:
45
+ stratum = r.get('stratum', 'unknown')
46
+ stratum_stats[stratum]['total'] += 1
47
+ if r.get('is_correct', False):
48
+ stratum_stats[stratum]['correct'] += 1
49
+
50
+ stratum_accuracy = {}
51
+ for stratum, stats in stratum_stats.items():
52
+ if stats['total'] > 0:
53
+ stratum_accuracy[stratum] = {
54
+ 'accuracy': stats['correct'] / stats['total'],
55
+ 'correct': stats['correct'],
56
+ 'total': stats['total']
57
+ }
58
+
59
+ # Breakdown by difficulty difference bins
60
+ diff_bins = [
61
+ ('very_small', 0, 0.2),
62
+ ('small', 0.2, 0.5),
63
+ ('medium', 0.5, 1.0),
64
+ ('large', 1.0, float('inf'))
65
+ ]
66
+
67
+ diff_stats = defaultdict(lambda: {'correct': 0, 'total': 0})
68
+ for r in task_results:
69
+ diff = r.get('difference', 0)
70
+ for bin_name, low, high in diff_bins:
71
+ if low <= diff < high:
72
+ diff_stats[bin_name]['total'] += 1
73
+ if r.get('is_correct', False):
74
+ diff_stats[bin_name]['correct'] += 1
75
+ break
76
+
77
+ diff_accuracy = {}
78
+ for bin_name, stats in diff_stats.items():
79
+ if stats['total'] > 0:
80
+ diff_accuracy[bin_name] = {
81
+ 'accuracy': stats['correct'] / stats['total'],
82
+ 'correct': stats['correct'],
83
+ 'total': stats['total']
84
+ }
85
+
86
+ return {
87
+ 'task': task_name,
88
+ 'total': total,
89
+ 'correct': correct,
90
+ 'accuracy': accuracy,
91
+ 'baseline_random': 0.5, # Random guess for binary choice
92
+ 'lift_over_random': accuracy - 0.5,
93
+ 'by_stratum': dict(stratum_accuracy),
94
+ 'by_difference': dict(diff_accuracy)
95
+ }
96
+
97
+
98
+ def evaluate_distractor_task(results: List[Dict], task_name: str) -> Dict:
99
+ """Evaluate a distractor task (most or least common)."""
100
+ task_results = [r for r in results if r.get('task') == task_name]
101
+
102
+ if not task_results:
103
+ return {'error': f'No results found for task: {task_name}'}
104
+
105
+ total = len(task_results)
106
+ correct = sum(1 for r in task_results if r.get('is_correct', False))
107
+ accuracy = correct / total if total > 0 else 0
108
+
109
+ # Breakdown by number of choices
110
+ choices_stats = defaultdict(lambda: {'correct': 0, 'total': 0})
111
+ for r in task_results:
112
+ n_choices = r.get('n_choices', 0)
113
+ # Number of distractors = n_choices - 1 (excluding correct answer)
114
+ n_distractors = n_choices - 1 if n_choices > 1 else 1
115
+ choices_stats[n_distractors]['total'] += 1
116
+ if r.get('is_correct', False):
117
+ choices_stats[n_distractors]['correct'] += 1
118
+
119
+ choices_accuracy = {}
120
+ baseline_by_choices = {}
121
+ for n_distractors, stats in sorted(choices_stats.items()):
122
+ if stats['total'] > 0:
123
+ acc = stats['correct'] / stats['total']
124
+ baseline = 1.0 / n_distractors if n_distractors > 0 else 0
125
+ choices_accuracy[f'{n_distractors}_distractors'] = {
126
+ 'accuracy': acc,
127
+ 'correct': stats['correct'],
128
+ 'total': stats['total'],
129
+ 'baseline_random': baseline,
130
+ 'lift_over_random': acc - baseline
131
+ }
132
+ baseline_by_choices[n_distractors] = baseline
133
+
134
+ # Compute weighted average baseline
135
+ total_weighted_baseline = 0
136
+ for r in task_results:
137
+ n_choices = r.get('n_choices', 0)
138
+ n_distractors = n_choices - 1 if n_choices > 1 else 1
139
+ if n_distractors > 0:
140
+ total_weighted_baseline += 1.0 / n_distractors
141
+ avg_baseline = total_weighted_baseline / total if total > 0 else 0
142
+
143
+ # Breakdown by ground truth frequency
144
+ freq_bins = [
145
+ ('very_rare', 0, 0.05),
146
+ ('rare', 0.05, 0.10),
147
+ ('moderate', 0.10, 0.20),
148
+ ('common', 0.20, 0.30),
149
+ ('very_common', 0.30, 1.0)
150
+ ]
151
+
152
+ freq_stats = defaultdict(lambda: {'correct': 0, 'total': 0})
153
+ for r in task_results:
154
+ freq = r.get('ground_truth_freq', 0)
155
+ for bin_name, low, high in freq_bins:
156
+ if low <= freq < high:
157
+ freq_stats[bin_name]['total'] += 1
158
+ if r.get('is_correct', False):
159
+ freq_stats[bin_name]['correct'] += 1
160
+ break
161
+
162
+ freq_accuracy = {}
163
+ for bin_name, stats in freq_stats.items():
164
+ if stats['total'] > 0:
165
+ freq_accuracy[bin_name] = {
166
+ 'accuracy': stats['correct'] / stats['total'],
167
+ 'correct': stats['correct'],
168
+ 'total': stats['total']
169
+ }
170
+
171
+ return {
172
+ 'task': task_name,
173
+ 'total': total,
174
+ 'correct': correct,
175
+ 'accuracy': accuracy,
176
+ 'baseline_random_avg': avg_baseline,
177
+ 'lift_over_random': accuracy - avg_baseline,
178
+ 'by_num_distractors': dict(choices_accuracy),
179
+ 'by_ground_truth_freq': dict(freq_accuracy)
180
+ }
181
+
182
+
183
+ def print_comparison_results(metrics: Dict) -> None:
184
+ """Print comparison task results."""
185
+ print(f"\n{'='*60}")
186
+ print(f"Task: {metrics['task'].upper()}")
187
+ print(f"{'='*60}")
188
+
189
+ print(f"\nOverall Accuracy: {metrics['accuracy']:.1%} ({metrics['correct']}/{metrics['total']})")
190
+ print(f"Random Baseline: {metrics['baseline_random']:.1%}")
191
+ print(f"Lift over Random: {metrics['lift_over_random']:+.1%}")
192
+
193
+ if metrics.get('by_stratum'):
194
+ print(f"\nBy Sampling Stratum:")
195
+ for stratum, stats in sorted(metrics['by_stratum'].items()):
196
+ print(f" {stratum:12}: {stats['accuracy']:.1%} ({stats['correct']}/{stats['total']})")
197
+
198
+ if metrics.get('by_difference'):
199
+ print(f"\nBy Value Difference:")
200
+ for bin_name, stats in metrics['by_difference'].items():
201
+ print(f" {bin_name:12}: {stats['accuracy']:.1%} ({stats['correct']}/{stats['total']})")
202
+
203
+
204
+ def print_distractor_results(metrics: Dict) -> None:
205
+ """Print distractor task results."""
206
+ print(f"\n{'='*60}")
207
+ print(f"Task: {metrics['task'].upper()}")
208
+ print(f"{'='*60}")
209
+
210
+ print(f"\nOverall Accuracy: {metrics['accuracy']:.1%} ({metrics['correct']}/{metrics['total']})")
211
+ print(f"Random Baseline: {metrics['baseline_random_avg']:.1%} (weighted avg)")
212
+ print(f"Lift over Random: {metrics['lift_over_random']:+.1%}")
213
+
214
+ if metrics.get('by_num_distractors'):
215
+ print(f"\nBy Number of Distractors:")
216
+ for key, stats in sorted(metrics['by_num_distractors'].items()):
217
+ print(f" {key:15}: {stats['accuracy']:.1%} ({stats['correct']}/{stats['total']}) "
218
+ f"[baseline: {stats['baseline_random']:.1%}, lift: {stats['lift_over_random']:+.1%}]")
219
+
220
+ if metrics.get('by_ground_truth_freq'):
221
+ print(f"\nBy Ground Truth Frequency:")
222
+ for bin_name, stats in metrics['by_ground_truth_freq'].items():
223
+ print(f" {bin_name:12}: {stats['accuracy']:.1%} ({stats['correct']}/{stats['total']})")
224
+
225
+
226
+ def main():
227
+ parser = argparse.ArgumentParser(
228
+ description="Evaluate Pedagogical Grounding Benchmark Results"
229
+ )
230
+ parser.add_argument(
231
+ "--input", "-i",
232
+ type=str,
233
+ required=True,
234
+ help="Input JSONL file with predictions"
235
+ )
236
+ parser.add_argument(
237
+ "--output", "-o",
238
+ type=str,
239
+ default=None,
240
+ help="Output JSON file for metrics (optional)"
241
+ )
242
+ args = parser.parse_args()
243
+
244
+ print(f"Loading results from {args.input}...")
245
+ results = load_results(args.input)
246
+ print(f"Loaded {len(results)} predictions")
247
+
248
+ # Identify tasks in results
249
+ tasks = set(r.get('task') for r in results if r.get('task'))
250
+ print(f"Tasks found: {tasks}")
251
+
252
+ all_metrics = {}
253
+
254
+ # Evaluate each task
255
+ for task in sorted(tasks):
256
+ if task in ['difficulty', 'discrimination']:
257
+ metrics = evaluate_comparison_task(results, task)
258
+ print_comparison_results(metrics)
259
+ elif task in ['distractor_most', 'distractor_least']:
260
+ metrics = evaluate_distractor_task(results, task)
261
+ print_distractor_results(metrics)
262
+ else:
263
+ print(f"Unknown task: {task}")
264
+ continue
265
+
266
+ all_metrics[task] = metrics
267
+
268
+ # Summary
269
+ print(f"\n{'='*60}")
270
+ print("SUMMARY")
271
+ print(f"{'='*60}")
272
+ for task, metrics in all_metrics.items():
273
+ if 'error' not in metrics:
274
+ print(f"{task:20}: {metrics['accuracy']:.1%} accuracy "
275
+ f"({metrics['lift_over_random']:+.1%} vs random)")
276
+
277
+ # Save metrics if output specified
278
+ if args.output:
279
+ with open(args.output, 'w') as f:
280
+ json.dump(all_metrics, f, indent=2)
281
+ print(f"\nMetrics saved to {args.output}")
282
+
283
+
284
+ if __name__ == "__main__":
285
+ main()
Results/pedagogical_grounding/gptoss120b_pedagogical.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Pedagogical Grounding benchmark with GPT-OSS-120B model.
3
+
4
+ Usage:
5
+ # Run difficulty comparison task
6
+ CUDA_VISIBLE_DEVICES=0,1,2,3 python gptoss120b_pedagogical.py \
7
+ --task difficulty \
8
+ --data-dir . \
9
+ --num-gpus 4 \
10
+ --num-samples 1000 \
11
+ --sampling-mode stratified \
12
+ --cache-dir /data1/
13
+
14
+ # Run discrimination comparison task
15
+ CUDA_VISIBLE_DEVICES=0,1,2,3 python gptoss120b_pedagogical.py \
16
+ --task discrimination \
17
+ --data-dir . \
18
+ --num-gpus 4 \
19
+ --num-samples 1000
20
+
21
+ # Run most common distractor task
22
+ CUDA_VISIBLE_DEVICES=0,1,2,3 python gptoss120b_pedagogical.py \
23
+ --task distractor_most \
24
+ --data-dir . \
25
+ --num-gpus 4
26
+
27
+ # Run least common distractor task
28
+ CUDA_VISIBLE_DEVICES=0,1,2,3 python gptoss120b_pedagogical.py \
29
+ --task distractor_least \
30
+ --data-dir . \
31
+ --num-gpus 4
32
+
33
+ # Run all tasks
34
+ CUDA_VISIBLE_DEVICES=0,1,2,3 python gptoss120b_pedagogical.py \
35
+ --task all \
36
+ --data-dir . \
37
+ --num-gpus 4 \
38
+ --num-samples 500
39
+ """
40
+
41
+ from pedagogical_inference_base import run_inference
42
+
43
+ MODEL_CONFIG = {
44
+ "model_id": "openai/gpt-oss-120b",
45
+ "gen_configs": {
46
+ "temperature": 0.7,
47
+ "top_p": 0.95,
48
+ "top_k": 20,
49
+ "max_tokens": 1024, # Shorter responses expected for this task
50
+ "repetition_penalty": 1.0,
51
+ },
52
+ "output_prefix": "gptoss120b",
53
+ "system_prompt_prefix": "Reasoning: medium\n\n",
54
+ }
55
+
56
+ if __name__ == "__main__":
57
+ run_inference(MODEL_CONFIG)