Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .DS_Store +0 -0
- .gitattributes +27 -0
- Code/LICENSE +40 -0
- Code/__pycache__/clean_utils.cpython-312.pyc +0 -0
- Code/__pycache__/process_to_single_file.cpython-312.pyc +0 -0
- Code/build_skill_set.py +157 -0
- Code/clean_utils.py +148 -0
- Code/cleantext.py +102 -0
- Code/detect_similar_node_codes.py +285 -0
- Code/evaluate_kt.py +205 -0
- Code/evaluate_kt_by_context.py +339 -0
- Code/find_duplicate_problem_body.py +152 -0
- Code/find_duplicate_problem_id.py +148 -0
- Code/gptoss120bvllmmcq.py +31 -0
- Code/kt_inference_base.py +849 -0
- Code/llama33_70b_instruct_vllm.py +30 -0
- Code/plot_student_attempt_distribution.py +357 -0
- Code/plot_timegap_distribution.py +484 -0
- Code/plot_totaltime_distribution.py +433 -0
- Code/process_to_single_file.py +820 -0
- Code/qwen3next80bvllm_instruct.py +35 -0
- Code/qwen3next80bvllm_thinking.py +35 -0
- Code/scripts.sh +24 -0
- Data/CASE-Common Core State Standards for Math.json +0 -0
- Data/Interactions.csv +3 -0
- Data/Math_Standards1.pdf +3 -0
- Data/Problems.csv +0 -0
- Data/Skill_Set.csv +165 -0
- Data/Skills.csv +0 -0
- README.md +246 -0
- Results/Problems_duplicated_problem_id.csv +209 -0
- Results/Problems_same_body_different_problem_id.csv +55 -0
- Results/day_student_attempt_distribution.png +3 -0
- Results/day_student_attempt_distribution_00173df52c65a9c13b45978c5a56ea2db67b17deded674486f7af64f7c5fd9ac.png +3 -0
- Results/day_student_attempt_distribution_001849d40d6bf1e2ed5826e4420e291e54655a2b6b77a9ab701faaeb4e899abe.png +3 -0
- Results/day_student_attempt_distribution_counts.csv +0 -0
- Results/day_student_attempt_distribution_counts_00173df52c65a9c13b45978c5a56ea2db67b17deded674486f7af64f7c5fd9ac.csv +262 -0
- Results/day_student_attempt_distribution_counts_001849d40d6bf1e2ed5826e4420e291e54655a2b6b77a9ab701faaeb4e899abe.csv +256 -0
- Results/inference_data_kt_results.zip +3 -0
- Results/month_student_attempt_distribution.png +3 -0
- Results/month_student_attempt_distribution_00173df52c65a9c13b45978c5a56ea2db67b17deded674486f7af64f7c5fd9ac.png +3 -0
- Results/month_student_attempt_distribution_001849d40d6bf1e2ed5826e4420e291e54655a2b6b77a9ab701faaeb4e899abe.png +3 -0
- Results/month_student_attempt_distribution_counts.csv +69 -0
- Results/month_student_attempt_distribution_counts_00173df52c65a9c13b45978c5a56ea2db67b17deded674486f7af64f7c5fd9ac.csv +10 -0
- Results/month_student_attempt_distribution_counts_001849d40d6bf1e2ed5826e4420e291e54655a2b6b77a9ab701faaeb4e899abe.csv +10 -0
- Results/pedagogical_grounding/README.md +275 -0
- Results/pedagogical_grounding/batch_evaluate.py +328 -0
- Results/pedagogical_grounding/distractor_analysis.py +299 -0
- Results/pedagogical_grounding/evaluate_pedagogical.py +285 -0
- Results/pedagogical_grounding/gptoss120b_pedagogical.py +57 -0
.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
.gitattributes
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
*.arrow filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 3 |
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
|
@@ -8,6 +9,8 @@
|
|
| 8 |
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
| 11 |
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
|
@@ -33,3 +36,27 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.avro filter=lfs diff=lfs merge=lfs -text
|
| 4 |
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 5 |
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 6 |
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 9 |
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 10 |
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 11 |
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.lz4 filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.mds filter=lfs diff=lfs merge=lfs -text
|
| 14 |
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 15 |
*.model filter=lfs diff=lfs merge=lfs -text
|
| 16 |
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 36 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 37 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 38 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
# Audio files - uncompressed
|
| 40 |
+
*.pcm filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
*.sam filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
*.raw filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
# Audio files - compressed
|
| 44 |
+
*.aac filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
*.flac filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
*.mp3 filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
*.ogg filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
*.wav filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
# Image files - uncompressed
|
| 50 |
+
*.bmp filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
*.gif filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
*.tiff filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
# Image files - compressed
|
| 55 |
+
*.jpg filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
*.jpeg filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
*.webp filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
# Video files - compressed
|
| 59 |
+
*.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
*.webm filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
*.csv filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
Data/Math_Standards1.pdf filter=lfs diff=lfs merge=lfs -text
|
Code/LICENSE
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Creative Commons Attribution-NonCommercial 4.0 International License (CC BY-NC 4.0)
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2024 FoundationalED Authors
|
| 4 |
+
|
| 5 |
+
You are free to:
|
| 6 |
+
|
| 7 |
+
Share — copy and redistribute the material in any medium or format
|
| 8 |
+
|
| 9 |
+
Adapt — remix, transform, and build upon the material
|
| 10 |
+
|
| 11 |
+
Under the following terms:
|
| 12 |
+
|
| 13 |
+
Attribution — You must give appropriate credit, provide a link to the license,
|
| 14 |
+
and indicate if changes were made. You may do so in any reasonable manner, but
|
| 15 |
+
not in any way that suggests the licensor endorses you or your use.
|
| 16 |
+
|
| 17 |
+
NonCommercial — You may not use the material for commercial purposes.
|
| 18 |
+
|
| 19 |
+
No additional restrictions — You may not apply legal terms or technological
|
| 20 |
+
measures that legally restrict others from doing anything the license permits.
|
| 21 |
+
|
| 22 |
+
Notices:
|
| 23 |
+
|
| 24 |
+
You do not have to comply with the license for elements of the material in
|
| 25 |
+
the public domain or where your use is permitted by an applicable exception
|
| 26 |
+
or limitation.
|
| 27 |
+
|
| 28 |
+
No warranties are given. The license may not give you all of the permissions
|
| 29 |
+
necessary for your intended use. For example, other rights such as publicity,
|
| 30 |
+
privacy, or moral rights may limit how you use the material.
|
| 31 |
+
|
| 32 |
+
Full license text: https://creativecommons.org/licenses/by-nc/4.0/legalcode
|
| 33 |
+
|
| 34 |
+
================================================================================
|
| 35 |
+
|
| 36 |
+
ADDITIONAL TERMS FOR FOUNDATIONALED DATASET
|
| 37 |
+
|
| 38 |
+
By accessing or using the FoundationalED dataset, you additionally agree to the
|
| 39 |
+
Data Usage Agreement specified in the README.md file, which includes restrictions
|
| 40 |
+
on re-identification, ethical use requirements, and compliance obligations.
|
Code/__pycache__/clean_utils.cpython-312.pyc
ADDED
|
Binary file (7.07 kB). View file
|
|
|
Code/__pycache__/process_to_single_file.cpython-312.pyc
ADDED
|
Binary file (33 kB). View file
|
|
|
Code/build_skill_set.py
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""Build Skill_List.csv from Skills.csv and CASE Common Core standards JSON.
|
| 3 |
+
|
| 4 |
+
Output columns:
|
| 5 |
+
- index (1-based)
|
| 6 |
+
- skill_code
|
| 7 |
+
- full_description
|
| 8 |
+
|
| 9 |
+
The output contains only skill codes that appear in both:
|
| 10 |
+
1) Skills.csv (column: node_code)
|
| 11 |
+
2) CASE JSON CFItems (field: humanCodingScheme)
|
| 12 |
+
|
| 13 |
+
Rows are sorted by skill_code.
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
from __future__ import annotations
|
| 17 |
+
|
| 18 |
+
import argparse
|
| 19 |
+
import csv
|
| 20 |
+
import html
|
| 21 |
+
import json
|
| 22 |
+
import re
|
| 23 |
+
from pathlib import Path
|
| 24 |
+
from typing import Dict, Set
|
| 25 |
+
|
| 26 |
+
_TAG_RE = re.compile(r"<[^>]+>")
|
| 27 |
+
_WS_RE = re.compile(r"\s+")
|
| 28 |
+
_DOT_BEFORE_SUFFIX_RE = re.compile(r"(?<=\d)\.(?=[A-Za-z]$)")
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def _clean_text(text: str) -> str:
|
| 32 |
+
"""Normalize whitespace and strip simple HTML tags from a description."""
|
| 33 |
+
text = html.unescape(text or "")
|
| 34 |
+
text = _TAG_RE.sub("", text)
|
| 35 |
+
text = _WS_RE.sub(" ", text).strip()
|
| 36 |
+
return text
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def _normalize_skill_code(code: str) -> str:
|
| 40 |
+
"""Normalize equivalent skill code formats to a common representation.
|
| 41 |
+
|
| 42 |
+
Example:
|
| 43 |
+
- 3.MD.C.7a -> 3.MD.C.7a
|
| 44 |
+
- 3.MD.C.7.a -> 3.MD.C.7a
|
| 45 |
+
"""
|
| 46 |
+
normalized = (code or "").strip()
|
| 47 |
+
normalized = _DOT_BEFORE_SUFFIX_RE.sub("", normalized)
|
| 48 |
+
return normalized
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def load_skill_codes(skills_csv_path: Path) -> Set[str]:
|
| 52 |
+
"""Read unique skill codes from Skills.csv node_code column."""
|
| 53 |
+
codes: Set[str] = set()
|
| 54 |
+
with skills_csv_path.open("r", encoding="utf-8", newline="") as f:
|
| 55 |
+
reader = csv.DictReader(f)
|
| 56 |
+
if "node_code" not in (reader.fieldnames or []):
|
| 57 |
+
raise ValueError(
|
| 58 |
+
f"Missing required column 'node_code' in {skills_csv_path}"
|
| 59 |
+
)
|
| 60 |
+
for row in reader:
|
| 61 |
+
code = (row.get("node_code") or "").strip()
|
| 62 |
+
if code:
|
| 63 |
+
codes.add(code)
|
| 64 |
+
return codes
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def load_case_mapping(case_json_path: Path) -> Dict[str, str]:
|
| 68 |
+
"""Map skill code -> full standard statement from CASE JSON CFItems."""
|
| 69 |
+
with case_json_path.open("r", encoding="utf-8") as f:
|
| 70 |
+
payload = json.load(f)
|
| 71 |
+
|
| 72 |
+
mapping: Dict[str, str] = {}
|
| 73 |
+
for item in payload.get("CFItems", []):
|
| 74 |
+
raw_code = (item.get("humanCodingScheme") or "").strip()
|
| 75 |
+
code = _normalize_skill_code(raw_code)
|
| 76 |
+
if not code:
|
| 77 |
+
continue
|
| 78 |
+
|
| 79 |
+
# Keep both Standards and Components.
|
| 80 |
+
# In CASE, codes like 3.MD.C.7.a are often CFItemType=Component.
|
| 81 |
+
if item.get("CFItemType") not in {"Standard", "Component"}:
|
| 82 |
+
continue
|
| 83 |
+
|
| 84 |
+
statement = _clean_text(item.get("fullStatement") or "")
|
| 85 |
+
if not statement:
|
| 86 |
+
continue
|
| 87 |
+
|
| 88 |
+
# Keep first non-empty definition if duplicates appear.
|
| 89 |
+
if code not in mapping:
|
| 90 |
+
mapping[code] = statement
|
| 91 |
+
|
| 92 |
+
return mapping
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def write_output(output_csv_path: Path, rows: list[tuple[str, str]]) -> None:
|
| 96 |
+
"""Write final CSV with a 1-based index."""
|
| 97 |
+
output_csv_path.parent.mkdir(parents=True, exist_ok=True)
|
| 98 |
+
with output_csv_path.open("w", encoding="utf-8", newline="") as f:
|
| 99 |
+
writer = csv.writer(f)
|
| 100 |
+
writer.writerow(["index", "skill_code", "full_description"])
|
| 101 |
+
for idx, (code, desc) in enumerate(rows, start=1):
|
| 102 |
+
writer.writerow([idx, code, desc])
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def main() -> None:
|
| 106 |
+
parser = argparse.ArgumentParser(
|
| 107 |
+
description="Create Skill_List.csv from Skills.csv and CASE standards JSON."
|
| 108 |
+
)
|
| 109 |
+
parser.add_argument(
|
| 110 |
+
"--skills-csv",
|
| 111 |
+
type=Path,
|
| 112 |
+
default=Path("../Data/Skills.csv"),
|
| 113 |
+
help="Path to Skills.csv",
|
| 114 |
+
)
|
| 115 |
+
parser.add_argument(
|
| 116 |
+
"--case-json",
|
| 117 |
+
type=Path,
|
| 118 |
+
default=Path("../Data/CASE-Common Core State Standards for Math.json"),
|
| 119 |
+
help="Path to CASE JSON",
|
| 120 |
+
)
|
| 121 |
+
parser.add_argument(
|
| 122 |
+
"--output-csv",
|
| 123 |
+
type=Path,
|
| 124 |
+
default=Path("../Data/Skill_Set.csv"),
|
| 125 |
+
help="Output path for Skill_Set.csv",
|
| 126 |
+
)
|
| 127 |
+
|
| 128 |
+
args = parser.parse_args()
|
| 129 |
+
|
| 130 |
+
skills_csv = args.skills_csv.resolve()
|
| 131 |
+
case_json = args.case_json.resolve()
|
| 132 |
+
output_csv = args.output_csv.resolve()
|
| 133 |
+
|
| 134 |
+
skill_codes = load_skill_codes(skills_csv)
|
| 135 |
+
case_map = load_case_mapping(case_json)
|
| 136 |
+
|
| 137 |
+
matched_codes = sorted(
|
| 138 |
+
code for code in skill_codes if _normalize_skill_code(code) in case_map
|
| 139 |
+
)
|
| 140 |
+
rows = [(code, case_map[_normalize_skill_code(code)]) for code in matched_codes]
|
| 141 |
+
|
| 142 |
+
write_output(output_csv, rows)
|
| 143 |
+
|
| 144 |
+
missing = sorted(
|
| 145 |
+
code for code in skill_codes if _normalize_skill_code(code) not in case_map
|
| 146 |
+
)
|
| 147 |
+
print(f"Wrote {len(rows)} skills to {output_csv}")
|
| 148 |
+
print(f"Unique skill codes in Skills.csv: {len(skill_codes)}")
|
| 149 |
+
print(f"Missing codes not found in CASE: {len(missing)}")
|
| 150 |
+
if missing:
|
| 151 |
+
preview = ", ".join(missing[:20])
|
| 152 |
+
suffix = " ..." if len(missing) > 20 else ""
|
| 153 |
+
print(f"Missing preview: {preview}{suffix}")
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
if __name__ == "__main__":
|
| 157 |
+
main()
|
Code/clean_utils.py
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
HTML cleaning utilities for problem text.
|
| 3 |
+
|
| 4 |
+
Provides functions to clean HTML content while preserving:
|
| 5 |
+
- Inline MathML (fractions, superscripts, subscripts)
|
| 6 |
+
- Wiris math images (extracted from data-mathml attribute)
|
| 7 |
+
- Table structure (formatted with | separators)
|
| 8 |
+
- Image placeholders
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import html
|
| 12 |
+
import pandas as pd
|
| 13 |
+
from bs4 import BeautifulSoup, NavigableString
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def parse_mathml_element(elem):
|
| 17 |
+
"""Recursively parse a MathML element to text."""
|
| 18 |
+
if elem.name is None:
|
| 19 |
+
return str(elem).strip()
|
| 20 |
+
|
| 21 |
+
if elem.name == "mfrac":
|
| 22 |
+
children = [c for c in elem.children if c.name]
|
| 23 |
+
if len(children) >= 2:
|
| 24 |
+
num = parse_mathml_element(children[0])
|
| 25 |
+
denom = parse_mathml_element(children[1])
|
| 26 |
+
return f"({num}/{denom})"
|
| 27 |
+
return elem.get_text(strip=True)
|
| 28 |
+
|
| 29 |
+
elif elem.name == "msup":
|
| 30 |
+
children = [c for c in elem.children if c.name]
|
| 31 |
+
if len(children) >= 2:
|
| 32 |
+
base = parse_mathml_element(children[0])
|
| 33 |
+
exp = parse_mathml_element(children[1])
|
| 34 |
+
return f"{base}^{exp}"
|
| 35 |
+
return elem.get_text(strip=True)
|
| 36 |
+
|
| 37 |
+
elif elem.name == "msub":
|
| 38 |
+
children = [c for c in elem.children if c.name]
|
| 39 |
+
if len(children) >= 2:
|
| 40 |
+
base = parse_mathml_element(children[0])
|
| 41 |
+
sub = parse_mathml_element(children[1])
|
| 42 |
+
return f"{base}_{sub}"
|
| 43 |
+
return elem.get_text(strip=True)
|
| 44 |
+
|
| 45 |
+
elif elem.name == "msqrt":
|
| 46 |
+
content = parse_mathml_element_children(elem)
|
| 47 |
+
return f"√({content})"
|
| 48 |
+
|
| 49 |
+
elif elem.name == "mo":
|
| 50 |
+
op = elem.get_text(strip=True)
|
| 51 |
+
if op in ["÷", "×", "·", "+", "-", "=", "<", ">", "≤", "≥", "≠"]:
|
| 52 |
+
return f" {op} "
|
| 53 |
+
return op
|
| 54 |
+
|
| 55 |
+
elif elem.name in ["mn", "mi", "mtext"]:
|
| 56 |
+
return elem.get_text(strip=True)
|
| 57 |
+
|
| 58 |
+
elif elem.name in ["mrow", "math", "mpadded", "mstyle"]:
|
| 59 |
+
return parse_mathml_element_children(elem)
|
| 60 |
+
|
| 61 |
+
else:
|
| 62 |
+
return elem.get_text(strip=True)
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def parse_mathml_element_children(elem):
|
| 66 |
+
"""Parse all children of a MathML element."""
|
| 67 |
+
parts = []
|
| 68 |
+
for child in elem.children:
|
| 69 |
+
if isinstance(child, NavigableString):
|
| 70 |
+
text = str(child).strip()
|
| 71 |
+
if text:
|
| 72 |
+
parts.append(text)
|
| 73 |
+
elif child.name:
|
| 74 |
+
parts.append(parse_mathml_element(child))
|
| 75 |
+
return "".join(parts)
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def clean_problem_body(text):
|
| 79 |
+
"""
|
| 80 |
+
Clean HTML problem body with full MathML handling.
|
| 81 |
+
|
| 82 |
+
Handles:
|
| 83 |
+
- Inline MathML (<math>, <mfrac>, <msup>, etc.) → (4/3), x^2
|
| 84 |
+
- Wiris math images (data-mathml attribute) → [15÷12]
|
| 85 |
+
- Tables → [Table: Col1 | Col2 ...]
|
| 86 |
+
- Regular images → [image]
|
| 87 |
+
- HTML entities → decoded properly
|
| 88 |
+
"""
|
| 89 |
+
if pd.isna(text) or text == "":
|
| 90 |
+
return ""
|
| 91 |
+
soup = BeautifulSoup(str(text), "html.parser")
|
| 92 |
+
|
| 93 |
+
# 1. Handle inline MathML
|
| 94 |
+
for math in soup.find_all("math"):
|
| 95 |
+
parsed = parse_mathml_element(math)
|
| 96 |
+
math.replace_with(f" {parsed} ")
|
| 97 |
+
|
| 98 |
+
# 2. Handle Wiris images
|
| 99 |
+
for img in soup.find_all("img"):
|
| 100 |
+
alt = img.get("alt", "")
|
| 101 |
+
src = img.get("src", "")
|
| 102 |
+
data_mathml = img.get("data-mathml", "")
|
| 103 |
+
|
| 104 |
+
if "wiris" in src.lower() or "pluginwiris" in src:
|
| 105 |
+
if alt and alt.strip() and alt not in ["NO ALT", "NONE"]:
|
| 106 |
+
img.replace_with(f" [{alt.strip()}] ")
|
| 107 |
+
elif data_mathml:
|
| 108 |
+
math_str = (
|
| 109 |
+
data_mathml.replace("«", "<").replace("»", ">").replace("¨", '"')
|
| 110 |
+
)
|
| 111 |
+
msoup = BeautifulSoup(math_str, "html.parser")
|
| 112 |
+
math_elem = msoup.find("math")
|
| 113 |
+
if math_elem:
|
| 114 |
+
mtext = parse_mathml_element(math_elem)
|
| 115 |
+
else:
|
| 116 |
+
mtext = msoup.get_text(separator="")
|
| 117 |
+
mtext = mtext.replace("§#247;", "÷").replace("§#215;", "×")
|
| 118 |
+
mtext = (
|
| 119 |
+
mtext.replace("§#8722;", "-")
|
| 120 |
+
.replace("§#160;", " ")
|
| 121 |
+
.replace("§#183;", "·")
|
| 122 |
+
)
|
| 123 |
+
mtext = mtext.replace("§#", "&#")
|
| 124 |
+
mtext = html.unescape(mtext).strip()
|
| 125 |
+
img.replace_with(f" [{mtext}] " if mtext else " [math] ")
|
| 126 |
+
else:
|
| 127 |
+
img.replace_with(" [math] ")
|
| 128 |
+
elif alt and alt.strip():
|
| 129 |
+
img.replace_with(f" [Image: {alt.strip()[:100]}] ")
|
| 130 |
+
else:
|
| 131 |
+
img.replace_with(" [image] ")
|
| 132 |
+
|
| 133 |
+
# 3. Handle tables
|
| 134 |
+
for table in soup.find_all("table"):
|
| 135 |
+
rows = []
|
| 136 |
+
for tr in table.find_all("tr"):
|
| 137 |
+
cells = [td.get_text(strip=True) for td in tr.find_all(["td", "th"])]
|
| 138 |
+
if any(cells):
|
| 139 |
+
rows.append(" | ".join(cells))
|
| 140 |
+
if rows:
|
| 141 |
+
table.replace_with(f"\n[Table:\n{chr(10).join(rows)}]\n")
|
| 142 |
+
else:
|
| 143 |
+
table.decompose()
|
| 144 |
+
|
| 145 |
+
text = soup.get_text(separator=" ")
|
| 146 |
+
text = html.unescape(text)
|
| 147 |
+
text = " ".join(text.split())
|
| 148 |
+
return text.strip()
|
Code/cleantext.py
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
|
| 3 |
+
def add_pi_if_missing(input_string):
|
| 4 |
+
# Check if "se 3.14 for" is in the input string
|
| 5 |
+
if "se 3.14 for" in input_string:
|
| 6 |
+
# Check if "pi" is not already present after "se 3.14 for"
|
| 7 |
+
if "pi" not in input_string[input_string.index("se 3.14 for") + len("se 3.14 for"):]:
|
| 8 |
+
return input_string[:input_string.index("se 3.14 for") + len("se 3.14 for")] + " pi" + input_string[input_string.index("se 3.14 for") + len("se 3.14 for"):]
|
| 9 |
+
return input_string
|
| 10 |
+
|
| 11 |
+
def convert_mathml_to_fraction(mathml_content):
|
| 12 |
+
mathml_content = mathml_content.replace('<ast-r type="text" marker="1">', "___")
|
| 13 |
+
mathml_content = mathml_content.replace('<mfrac>', '(')
|
| 14 |
+
mathml_content = mathml_content.replace('</mfrac>', ')')
|
| 15 |
+
mathml_content = mathml_content.replace('</mn><mn>', '/')
|
| 16 |
+
mathml_content = mathml_content.replace('<mn>', '')
|
| 17 |
+
mathml_content = mathml_content.replace('</mn>', '')
|
| 18 |
+
mathml_content = mathml_content.replace('<mi>', '')
|
| 19 |
+
mathml_content = mathml_content.replace('</mi>', '')
|
| 20 |
+
mathml_content = mathml_content.replace('<mo>', '')
|
| 21 |
+
mathml_content = mathml_content.replace('</mo>', '')
|
| 22 |
+
mathml_content = mathml_content.replace('<mn>', '')
|
| 23 |
+
mathml_content = mathml_content.replace('</mn>', '')
|
| 24 |
+
mathml_content = mathml_content.replace('<math>', '')
|
| 25 |
+
mathml_content = mathml_content.replace('</math>', '')
|
| 26 |
+
mathml_content = mathml_content.replace('<mi mathvariant=¨normal¨>§#960;', '')
|
| 27 |
+
mathml_content = mathml_content.replace(' ', ' ')
|
| 28 |
+
mathml_content = mathml_content.replace('§#160;', ' ')
|
| 29 |
+
mathml_content = mathml_content.replace('>', '>')
|
| 30 |
+
mathml_content = mathml_content.replace('<', '<')
|
| 31 |
+
mathml_content = mathml_content.replace('&', '&')
|
| 32 |
+
|
| 33 |
+
mathml_content = mathml_content.replace('«/math', '')
|
| 34 |
+
|
| 35 |
+
mathml_content = mathml_content.rstrip('/')
|
| 36 |
+
|
| 37 |
+
return mathml_content
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
from bs4 import BeautifulSoup
|
| 41 |
+
def alt_text(html: str) -> str:
|
| 42 |
+
soup = BeautifulSoup(html, 'html.parser')
|
| 43 |
+
|
| 44 |
+
# Replace each <img> with its alt text if it exists
|
| 45 |
+
for img in soup.find_all('img'):
|
| 46 |
+
alt = img.get('alt')
|
| 47 |
+
if alt:
|
| 48 |
+
img.replace_with(alt)
|
| 49 |
+
|
| 50 |
+
return str(soup)
|
| 51 |
+
|
| 52 |
+
def mathml_to_text(html):
|
| 53 |
+
soup = BeautifulSoup(html, 'html.parser')
|
| 54 |
+
|
| 55 |
+
# Convert MathML fractions
|
| 56 |
+
for math in soup.find_all('math'):
|
| 57 |
+
frac = math.find('mfrac')
|
| 58 |
+
if frac:
|
| 59 |
+
nums = frac.find_all('mn')
|
| 60 |
+
if len(nums) == 2:
|
| 61 |
+
numerator = nums[0].text
|
| 62 |
+
denominator = nums[1].text
|
| 63 |
+
frac_text = f"{numerator}/{denominator}"
|
| 64 |
+
math.replace_with(frac_text)
|
| 65 |
+
else:
|
| 66 |
+
math.replace_with(math.get_text()) # Fallback if not a valid mfrac
|
| 67 |
+
else:
|
| 68 |
+
math.replace_with(math.get_text()) # Handle non-fraction math
|
| 69 |
+
|
| 70 |
+
# Return clean text
|
| 71 |
+
return soup.get_text(separator=" ", strip=True)
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
def clean_text(input_text):
|
| 75 |
+
if pd.isna(input_text) or input_text.strip() == "":
|
| 76 |
+
return ""
|
| 77 |
+
|
| 78 |
+
# Replace and decode HTML entities
|
| 79 |
+
input_text = input_text.replace(' ', ' ')
|
| 80 |
+
|
| 81 |
+
# Replace <img> tags with their alt text
|
| 82 |
+
input_text = mathml_to_text(input_text)
|
| 83 |
+
input_text = alt_text(input_text)
|
| 84 |
+
|
| 85 |
+
# Convert MathML if it exists
|
| 86 |
+
soup = BeautifulSoup(input_text, 'html.parser')
|
| 87 |
+
for img in soup.find_all('img', class_='Wirisformula'):
|
| 88 |
+
mathml_formula = img.get('data-mathml')
|
| 89 |
+
if mathml_formula:
|
| 90 |
+
# Extract inner MathML content
|
| 91 |
+
start_index = mathml_formula.find('<math>') + len('<math>')
|
| 92 |
+
end_index = mathml_formula.find('</math>')
|
| 93 |
+
mathml_formula_content = mathml_formula[start_index:end_index]
|
| 94 |
+
mathml_formula_content_cleaned = mathml_formula_content.replace(
|
| 95 |
+
'xmlns=¨http://www.w3.org/1998/Math/MathML¨»', '')
|
| 96 |
+
fraction = convert_mathml_to_fraction(mathml_formula_content_cleaned)
|
| 97 |
+
img.replace_with(fraction)
|
| 98 |
+
|
| 99 |
+
text = soup.get_text(separator=' ', strip=True)
|
| 100 |
+
text = convert_mathml_to_fraction(text)
|
| 101 |
+
text = add_pi_if_missing(text)
|
| 102 |
+
return text
|
Code/detect_similar_node_codes.py
ADDED
|
@@ -0,0 +1,285 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""Detect skills that have similar node_code values but different skill_id values.
|
| 3 |
+
|
| 4 |
+
By default, the script detects conflicts after normalizing node_code values
|
| 5 |
+
(uppercasing and removing punctuation differences). It can also perform optional
|
| 6 |
+
fuzzy matching on normalized compact node codes.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
from __future__ import annotations
|
| 10 |
+
|
| 11 |
+
import argparse
|
| 12 |
+
import re
|
| 13 |
+
from difflib import SequenceMatcher
|
| 14 |
+
from itertools import combinations
|
| 15 |
+
from pathlib import Path
|
| 16 |
+
|
| 17 |
+
import pandas as pd
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
DEFAULT_SKILLS_PATH = Path(__file__).resolve().parent.parent / "Data" / "Skills.csv"
|
| 21 |
+
DEFAULT_OUTPUT_PATH = (
|
| 22 |
+
Path(__file__).resolve().parent.parent
|
| 23 |
+
/ "Results"
|
| 24 |
+
/ "similar_node_code_conflicts.csv"
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def parse_args() -> argparse.Namespace:
|
| 29 |
+
parser = argparse.ArgumentParser(
|
| 30 |
+
description=(
|
| 31 |
+
"Detect skills whose node_code values are similar but map to "
|
| 32 |
+
"different skill_id values."
|
| 33 |
+
)
|
| 34 |
+
)
|
| 35 |
+
parser.add_argument(
|
| 36 |
+
"--skills-path",
|
| 37 |
+
type=Path,
|
| 38 |
+
default=DEFAULT_SKILLS_PATH,
|
| 39 |
+
help="Path to Skills.csv.",
|
| 40 |
+
)
|
| 41 |
+
parser.add_argument(
|
| 42 |
+
"--output-path",
|
| 43 |
+
type=Path,
|
| 44 |
+
default=DEFAULT_OUTPUT_PATH,
|
| 45 |
+
help="Path to save the detected conflicts as CSV.",
|
| 46 |
+
)
|
| 47 |
+
parser.add_argument(
|
| 48 |
+
"--include-fuzzy",
|
| 49 |
+
action="store_true",
|
| 50 |
+
help="Also run fuzzy matching across compact node_code values.",
|
| 51 |
+
)
|
| 52 |
+
parser.add_argument(
|
| 53 |
+
"--similarity-threshold",
|
| 54 |
+
type=float,
|
| 55 |
+
default=0.9,
|
| 56 |
+
help="Minimum SequenceMatcher ratio for fuzzy matches (0.0 to 1.0).",
|
| 57 |
+
)
|
| 58 |
+
parser.add_argument(
|
| 59 |
+
"--max-fuzzy-pairs",
|
| 60 |
+
type=int,
|
| 61 |
+
default=200,
|
| 62 |
+
help="Maximum number of fuzzy match pairs to keep after sorting.",
|
| 63 |
+
)
|
| 64 |
+
parser.add_argument(
|
| 65 |
+
"--print-limit",
|
| 66 |
+
type=int,
|
| 67 |
+
default=20,
|
| 68 |
+
help="Maximum number of rows to print for each conflict section.",
|
| 69 |
+
)
|
| 70 |
+
return parser.parse_args()
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def normalize_node_code(node_code: str) -> str:
|
| 74 |
+
canonical = re.sub(r"[^A-Za-z0-9]+", ".", node_code.upper().strip())
|
| 75 |
+
canonical = re.sub(r"\.+", ".", canonical).strip(".")
|
| 76 |
+
return canonical
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def compact_node_code(canonical_node_code: str) -> str:
|
| 80 |
+
return canonical_node_code.replace(".", "")
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def unique_sorted_strings(series: pd.Series) -> list[str]:
|
| 84 |
+
values: set[str] = set()
|
| 85 |
+
for value in series.dropna():
|
| 86 |
+
text = str(value).strip()
|
| 87 |
+
if text:
|
| 88 |
+
values.add(text)
|
| 89 |
+
return sorted(values)
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def unique_sorted_ints(series: pd.Series) -> list[int]:
|
| 93 |
+
values: set[int] = set()
|
| 94 |
+
for value in series.dropna():
|
| 95 |
+
values.add(int(value))
|
| 96 |
+
return sorted(values)
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def join_pipe(values: list[str]) -> str:
|
| 100 |
+
return " | ".join(values)
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
def join_csv_ints(values: list[int]) -> str:
|
| 104 |
+
return ",".join(str(v) for v in values)
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
def load_skills(skills_path: Path) -> pd.DataFrame:
|
| 108 |
+
required_columns = ["problem_id", "skill_id", "node_code", "node_name"]
|
| 109 |
+
df = pd.read_csv(skills_path, usecols=required_columns, low_memory=False)
|
| 110 |
+
|
| 111 |
+
df["problem_id"] = pd.to_numeric(df["problem_id"], errors="coerce")
|
| 112 |
+
df["skill_id"] = pd.to_numeric(df["skill_id"], errors="coerce")
|
| 113 |
+
|
| 114 |
+
df = df.dropna(subset=["problem_id", "skill_id", "node_code"]).copy()
|
| 115 |
+
df["problem_id"] = df["problem_id"].astype(int)
|
| 116 |
+
df["skill_id"] = df["skill_id"].astype(int)
|
| 117 |
+
|
| 118 |
+
df["node_code"] = df["node_code"].astype(str).str.strip()
|
| 119 |
+
df["node_name"] = df["node_name"].fillna("").astype(str).str.strip()
|
| 120 |
+
df = df[df["node_code"] != ""].copy()
|
| 121 |
+
|
| 122 |
+
df["node_code_canonical"] = df["node_code"].apply(normalize_node_code)
|
| 123 |
+
df["node_code_compact"] = df["node_code_canonical"].apply(compact_node_code)
|
| 124 |
+
return df
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
def summarize_compact_codes(df: pd.DataFrame) -> pd.DataFrame:
|
| 128 |
+
summary = (
|
| 129 |
+
df.groupby("node_code_compact", sort=True)
|
| 130 |
+
.agg(
|
| 131 |
+
canonical_node_codes=("node_code_canonical", unique_sorted_strings),
|
| 132 |
+
raw_node_codes=("node_code", unique_sorted_strings),
|
| 133 |
+
skill_ids=("skill_id", unique_sorted_ints),
|
| 134 |
+
node_names=("node_name", unique_sorted_strings),
|
| 135 |
+
problem_count=("problem_id", "nunique"),
|
| 136 |
+
mapping_count=("skill_id", "size"),
|
| 137 |
+
)
|
| 138 |
+
.reset_index()
|
| 139 |
+
.rename(columns={"node_code_compact": "compact_node_code"})
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
summary["n_skill_ids"] = summary["skill_ids"].apply(len)
|
| 143 |
+
return summary
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
def build_normalized_conflicts(summary: pd.DataFrame) -> pd.DataFrame:
|
| 147 |
+
conflicts = summary[summary["n_skill_ids"] > 1].copy()
|
| 148 |
+
if conflicts.empty:
|
| 149 |
+
return conflicts
|
| 150 |
+
|
| 151 |
+
conflicts.insert(0, "conflict_type", "normalized_match")
|
| 152 |
+
conflicts["skill_ids"] = conflicts["skill_ids"].apply(join_csv_ints)
|
| 153 |
+
conflicts["canonical_node_codes"] = conflicts["canonical_node_codes"].apply(
|
| 154 |
+
join_pipe
|
| 155 |
+
)
|
| 156 |
+
conflicts["raw_node_codes"] = conflicts["raw_node_codes"].apply(join_pipe)
|
| 157 |
+
conflicts["node_names"] = conflicts["node_names"].apply(join_pipe)
|
| 158 |
+
|
| 159 |
+
return conflicts.sort_values(
|
| 160 |
+
["n_skill_ids", "compact_node_code"], ascending=[False, True]
|
| 161 |
+
)
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
def build_fuzzy_conflicts(
|
| 165 |
+
summary: pd.DataFrame,
|
| 166 |
+
threshold: float,
|
| 167 |
+
max_pairs: int,
|
| 168 |
+
) -> pd.DataFrame:
|
| 169 |
+
rows: list[dict[str, object]] = []
|
| 170 |
+
|
| 171 |
+
records = summary.to_dict(orient="records")
|
| 172 |
+
for left, right in combinations(records, 2):
|
| 173 |
+
left_code = str(left["compact_node_code"])
|
| 174 |
+
right_code = str(right["compact_node_code"])
|
| 175 |
+
|
| 176 |
+
if left_code == right_code:
|
| 177 |
+
continue
|
| 178 |
+
|
| 179 |
+
similarity = SequenceMatcher(None, left_code, right_code).ratio()
|
| 180 |
+
if similarity < threshold:
|
| 181 |
+
continue
|
| 182 |
+
|
| 183 |
+
left_skills = set(left["skill_ids"])
|
| 184 |
+
right_skills = set(right["skill_ids"])
|
| 185 |
+
if left_skills == right_skills:
|
| 186 |
+
continue
|
| 187 |
+
|
| 188 |
+
rows.append(
|
| 189 |
+
{
|
| 190 |
+
"conflict_type": "fuzzy_match",
|
| 191 |
+
"similarity": round(similarity, 4),
|
| 192 |
+
"left_compact_node_code": left_code,
|
| 193 |
+
"right_compact_node_code": right_code,
|
| 194 |
+
"left_canonical_node_codes": join_pipe(left["canonical_node_codes"]),
|
| 195 |
+
"right_canonical_node_codes": join_pipe(right["canonical_node_codes"]),
|
| 196 |
+
"left_skill_ids": join_csv_ints(left["skill_ids"]),
|
| 197 |
+
"right_skill_ids": join_csv_ints(right["skill_ids"]),
|
| 198 |
+
"overlap_skill_ids": join_csv_ints(
|
| 199 |
+
sorted(left_skills.intersection(right_skills))
|
| 200 |
+
),
|
| 201 |
+
}
|
| 202 |
+
)
|
| 203 |
+
|
| 204 |
+
fuzzy = pd.DataFrame(rows)
|
| 205 |
+
if fuzzy.empty:
|
| 206 |
+
return fuzzy
|
| 207 |
+
|
| 208 |
+
fuzzy = fuzzy.sort_values(
|
| 209 |
+
["similarity", "left_compact_node_code", "right_compact_node_code"],
|
| 210 |
+
ascending=[False, True, True],
|
| 211 |
+
)
|
| 212 |
+
if max_pairs > 0:
|
| 213 |
+
fuzzy = fuzzy.head(max_pairs).copy()
|
| 214 |
+
return fuzzy
|
| 215 |
+
|
| 216 |
+
|
| 217 |
+
def print_section(title: str, df: pd.DataFrame, print_limit: int) -> None:
|
| 218 |
+
print(f"\n{title}")
|
| 219 |
+
if df.empty:
|
| 220 |
+
print(" None")
|
| 221 |
+
return
|
| 222 |
+
|
| 223 |
+
to_show = df.head(print_limit)
|
| 224 |
+
print(to_show.to_string(index=False))
|
| 225 |
+
if len(df) > len(to_show):
|
| 226 |
+
print(f" ... ({len(df) - len(to_show)} more rows)")
|
| 227 |
+
|
| 228 |
+
|
| 229 |
+
def main() -> int:
|
| 230 |
+
args = parse_args()
|
| 231 |
+
|
| 232 |
+
if args.similarity_threshold < 0.0 or args.similarity_threshold > 1.0:
|
| 233 |
+
raise ValueError("--similarity-threshold must be in [0.0, 1.0].")
|
| 234 |
+
|
| 235 |
+
if not args.skills_path.exists():
|
| 236 |
+
raise FileNotFoundError(f"Skills file not found: {args.skills_path}")
|
| 237 |
+
|
| 238 |
+
skills_df = load_skills(args.skills_path)
|
| 239 |
+
summary_df = summarize_compact_codes(skills_df)
|
| 240 |
+
|
| 241 |
+
normalized_conflicts = build_normalized_conflicts(summary_df)
|
| 242 |
+
fuzzy_conflicts = pd.DataFrame()
|
| 243 |
+
if args.include_fuzzy:
|
| 244 |
+
fuzzy_conflicts = build_fuzzy_conflicts(
|
| 245 |
+
summary_df,
|
| 246 |
+
threshold=args.similarity_threshold,
|
| 247 |
+
max_pairs=args.max_fuzzy_pairs,
|
| 248 |
+
)
|
| 249 |
+
|
| 250 |
+
frames = [normalized_conflicts]
|
| 251 |
+
if args.include_fuzzy:
|
| 252 |
+
frames.append(fuzzy_conflicts)
|
| 253 |
+
combined_output = pd.concat(frames, ignore_index=True, sort=False)
|
| 254 |
+
|
| 255 |
+
args.output_path.parent.mkdir(parents=True, exist_ok=True)
|
| 256 |
+
combined_output.to_csv(args.output_path, index=False)
|
| 257 |
+
|
| 258 |
+
print("Loaded rows:", len(skills_df))
|
| 259 |
+
print("Unique compact node codes:", len(summary_df))
|
| 260 |
+
print("Normalized conflicts:", len(normalized_conflicts))
|
| 261 |
+
if args.include_fuzzy:
|
| 262 |
+
print(
|
| 263 |
+
"Fuzzy conflicts (threshold " f"{args.similarity_threshold:.2f}):",
|
| 264 |
+
len(fuzzy_conflicts),
|
| 265 |
+
)
|
| 266 |
+
|
| 267 |
+
print_section(
|
| 268 |
+
"Normalized node_code conflicts (same compact code, different skill_id):",
|
| 269 |
+
normalized_conflicts,
|
| 270 |
+
args.print_limit,
|
| 271 |
+
)
|
| 272 |
+
|
| 273 |
+
if args.include_fuzzy:
|
| 274 |
+
print_section(
|
| 275 |
+
"Fuzzy node_code conflicts (near compact codes, different skill_id):",
|
| 276 |
+
fuzzy_conflicts,
|
| 277 |
+
args.print_limit,
|
| 278 |
+
)
|
| 279 |
+
|
| 280 |
+
print(f"\nSaved conflicts to: {args.output_path}")
|
| 281 |
+
return 0
|
| 282 |
+
|
| 283 |
+
|
| 284 |
+
if __name__ == "__main__":
|
| 285 |
+
raise SystemExit(main())
|
Code/evaluate_kt.py
ADDED
|
@@ -0,0 +1,205 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Evaluate LLM knowledge tracing predictions against FKT benchmark tasks.
|
| 4 |
+
|
| 5 |
+
Tasks evaluated:
|
| 6 |
+
- Task 1 (FKT): Foundational Knowledge Tracing - predict if student answers correctly (question-level)
|
| 7 |
+
- Task 1 Variant 2: Cognitive Student Modeling - predict the actual student response
|
| 8 |
+
|
| 9 |
+
Usage:
|
| 10 |
+
python evaluate_kt.py results.jsonl
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
import argparse
|
| 14 |
+
import json
|
| 15 |
+
import math
|
| 16 |
+
from sklearn.metrics import roc_auc_score
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def normalize_mcq_answer(answer_str: str) -> str:
|
| 20 |
+
"""
|
| 21 |
+
Normalize MCQ answer format for consistent comparison.
|
| 22 |
+
|
| 23 |
+
Handles variations like:
|
| 24 |
+
- 'C, A' -> 'A, C' (order normalization)
|
| 25 |
+
- 'A,C' -> 'A, C' (spacing normalization)
|
| 26 |
+
- 'a, c' -> 'A, C' (case normalization)
|
| 27 |
+
|
| 28 |
+
Args:
|
| 29 |
+
answer_str: Answer string to normalize
|
| 30 |
+
|
| 31 |
+
Returns:
|
| 32 |
+
Normalized answer string, or original if not MCQ format
|
| 33 |
+
"""
|
| 34 |
+
# Split by comma, strip whitespace, uppercase, sort, rejoin
|
| 35 |
+
parts = [p.strip().upper() for p in answer_str.split(',')]
|
| 36 |
+
# Filter out empty parts
|
| 37 |
+
parts = [p for p in parts if p]
|
| 38 |
+
# Only normalize if all parts are single letters (MCQ format)
|
| 39 |
+
if parts and all(len(p) == 1 and p.isalpha() for p in parts):
|
| 40 |
+
return ', '.join(sorted(set(parts)))
|
| 41 |
+
return answer_str
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def numerical_match(answer1: str, answer2: str, atol: float = 0.01, rtol: float = 0.01) -> bool:
|
| 45 |
+
"""
|
| 46 |
+
Check if two answers are numerically close within tolerance.
|
| 47 |
+
|
| 48 |
+
Uses math.isclose for robust comparison that handles both absolute
|
| 49 |
+
and relative tolerance.
|
| 50 |
+
|
| 51 |
+
Args:
|
| 52 |
+
answer1: First answer string
|
| 53 |
+
answer2: Second answer string
|
| 54 |
+
atol: Absolute tolerance (default: 0.01)
|
| 55 |
+
rtol: Relative tolerance (default: 0.01)
|
| 56 |
+
|
| 57 |
+
Returns:
|
| 58 |
+
True if answers are numerically close, False otherwise
|
| 59 |
+
"""
|
| 60 |
+
try:
|
| 61 |
+
a = float(answer1.strip())
|
| 62 |
+
b = float(answer2.strip())
|
| 63 |
+
return math.isclose(a, b, abs_tol=atol, rel_tol=rtol)
|
| 64 |
+
except (ValueError, AttributeError):
|
| 65 |
+
return False
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def answers_match(pred, actual):
|
| 69 |
+
"""Check if predicted answer matches actual answer."""
|
| 70 |
+
if pred is None or actual is None:
|
| 71 |
+
return False
|
| 72 |
+
|
| 73 |
+
pred_str = str(pred).strip()
|
| 74 |
+
actual_str = str(actual).strip()
|
| 75 |
+
|
| 76 |
+
# Exact string match
|
| 77 |
+
if pred_str == actual_str:
|
| 78 |
+
return True
|
| 79 |
+
|
| 80 |
+
# Normalize MCQ answers (handles case, order, spacing)
|
| 81 |
+
pred_normalized = normalize_mcq_answer(pred_str)
|
| 82 |
+
actual_normalized = normalize_mcq_answer(actual_str)
|
| 83 |
+
if pred_normalized == actual_normalized:
|
| 84 |
+
return True
|
| 85 |
+
|
| 86 |
+
# Numeric match with tolerance
|
| 87 |
+
return numerical_match(pred_str, actual_str)
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
def load_results(jsonl_path):
|
| 91 |
+
"""Load results from JSONL file."""
|
| 92 |
+
results = []
|
| 93 |
+
with open(jsonl_path, 'r') as f:
|
| 94 |
+
for line in f:
|
| 95 |
+
if line.strip():
|
| 96 |
+
results.append(json.loads(line))
|
| 97 |
+
return results
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def evaluate(results):
|
| 101 |
+
"""Compute evaluation metrics aligned with FKT benchmark tasks."""
|
| 102 |
+
total = len(results)
|
| 103 |
+
|
| 104 |
+
if total == 0:
|
| 105 |
+
print("No results to evaluate.")
|
| 106 |
+
return
|
| 107 |
+
|
| 108 |
+
# Compute class distribution
|
| 109 |
+
n_correct = sum(1 for r in results if r.get('actual_score') == 1)
|
| 110 |
+
n_incorrect = total - n_correct
|
| 111 |
+
|
| 112 |
+
# Task 1: FKT - Question-level accuracy
|
| 113 |
+
valid_q = [(r.get('actual_score'), r.get('predicted_question_level'))
|
| 114 |
+
for r in results
|
| 115 |
+
if r.get('actual_score') is not None and r.get('predicted_question_level') is not None]
|
| 116 |
+
|
| 117 |
+
if valid_q:
|
| 118 |
+
y_true, y_pred = zip(*valid_q)
|
| 119 |
+
question_correct = sum(1 for t, p in valid_q if t == p)
|
| 120 |
+
question_acc = question_correct / len(valid_q)
|
| 121 |
+
# AUC-ROC (note: with binary predictions, this is limited)
|
| 122 |
+
try:
|
| 123 |
+
auc_roc = roc_auc_score(y_true, y_pred)
|
| 124 |
+
except ValueError:
|
| 125 |
+
auc_roc = None # Only one class present
|
| 126 |
+
else:
|
| 127 |
+
question_correct = 0
|
| 128 |
+
question_acc = 0.0
|
| 129 |
+
auc_roc = None
|
| 130 |
+
|
| 131 |
+
# Task 1 Variant 2: Cognitive Modeling - Answer prediction accuracy
|
| 132 |
+
answer_correct = sum(
|
| 133 |
+
1 for r in results
|
| 134 |
+
if answers_match(r.get('predicted_student_answer'), r.get('actual_answer'))
|
| 135 |
+
)
|
| 136 |
+
|
| 137 |
+
# Baselines
|
| 138 |
+
prior_baseline = 0.615 # True correctness rate from Interactions.csv
|
| 139 |
+
majority_baseline = max(n_correct, n_incorrect) / total
|
| 140 |
+
|
| 141 |
+
# Print results
|
| 142 |
+
print(f"{'='*60}")
|
| 143 |
+
print(f"Evaluation Results ({total} predictions)")
|
| 144 |
+
print(f"{'='*60}")
|
| 145 |
+
print()
|
| 146 |
+
print(f"Class distribution: {n_correct} correct, {n_incorrect} incorrect")
|
| 147 |
+
print()
|
| 148 |
+
|
| 149 |
+
# Task 1: Foundational Knowledge Tracing (FKT) - question-level prediction
|
| 150 |
+
print("Task 1: Foundational Knowledge Tracing (FKT) - Question-Level")
|
| 151 |
+
print(f" Accuracy: {question_correct}/{len(valid_q)} = {question_acc:.3f}")
|
| 152 |
+
if auc_roc is not None:
|
| 153 |
+
print(f" AUC-ROC: {auc_roc:.3f}")
|
| 154 |
+
else:
|
| 155 |
+
print(f" AUC-ROC: N/A (single class)")
|
| 156 |
+
print(f" Baselines: Prior={prior_baseline:.3f}, Majority={majority_baseline:.3f}")
|
| 157 |
+
print()
|
| 158 |
+
|
| 159 |
+
# Task 1 Variant 2: Cognitive Student Modeling
|
| 160 |
+
print("Task 1 Variant 2: Cognitive Student Modeling")
|
| 161 |
+
print(f" Overall Accuracy: {answer_correct}/{total} = {answer_correct/total:.3f}")
|
| 162 |
+
|
| 163 |
+
# Breakdown by problem type
|
| 164 |
+
problem_types = ['Multiple Choice (select 1)', 'Multiple Choice (select all)', 'Fill-in-the-blank(s)']
|
| 165 |
+
has_problem_type = any(r.get('problem_type') for r in results)
|
| 166 |
+
if has_problem_type:
|
| 167 |
+
print(" By problem type:")
|
| 168 |
+
for ptype in problem_types:
|
| 169 |
+
subset = [r for r in results if r.get('problem_type') == ptype]
|
| 170 |
+
if subset:
|
| 171 |
+
n = len(subset)
|
| 172 |
+
a_acc = sum(1 for r in subset if answers_match(r.get('predicted_student_answer'), r.get('actual_answer'))) / n
|
| 173 |
+
label = ptype.replace('Multiple Choice ', 'MC ')
|
| 174 |
+
print(f" {label:20s}: n={n:4d}, acc={a_acc:.3f}")
|
| 175 |
+
# Breakdown by ground truth within problem type
|
| 176 |
+
for gt in ['correct', 'incorrect']:
|
| 177 |
+
gt_subset = [r for r in subset if r.get('prediction_type') == gt]
|
| 178 |
+
if gt_subset:
|
| 179 |
+
gt_n = len(gt_subset)
|
| 180 |
+
gt_acc = sum(1 for r in gt_subset if answers_match(r.get('predicted_student_answer'), r.get('actual_answer'))) / gt_n
|
| 181 |
+
print(f" {gt:18s}: n={gt_n:4d}, acc={gt_acc:.3f}")
|
| 182 |
+
print()
|
| 183 |
+
|
| 184 |
+
# Breakdown by prediction type (correct/incorrect ground truth)
|
| 185 |
+
print("By ground truth (prediction_type):")
|
| 186 |
+
for ptype in ['correct', 'incorrect']:
|
| 187 |
+
subset = [r for r in results if r.get('prediction_type') == ptype]
|
| 188 |
+
if subset:
|
| 189 |
+
n = len(subset)
|
| 190 |
+
q_acc = sum(1 for r in subset if r.get('predicted_question_level') == r.get('actual_score')) / n
|
| 191 |
+
a_acc = sum(1 for r in subset if answers_match(r.get('predicted_student_answer'), r.get('actual_answer'))) / n
|
| 192 |
+
print(f" {ptype:10s}: n={n:4d}, FKT_acc={q_acc:.3f}, cognitive_acc={a_acc:.3f}")
|
| 193 |
+
|
| 194 |
+
|
| 195 |
+
def main():
|
| 196 |
+
parser = argparse.ArgumentParser(description="Evaluate LLM knowledge tracing predictions")
|
| 197 |
+
parser.add_argument("jsonl_file", help="Path to JSONL results file")
|
| 198 |
+
args = parser.parse_args()
|
| 199 |
+
|
| 200 |
+
results = load_results(args.jsonl_file)
|
| 201 |
+
evaluate(results)
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
if __name__ == "__main__":
|
| 205 |
+
main()
|
Code/evaluate_kt_by_context.py
ADDED
|
@@ -0,0 +1,339 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Evaluate KT performance by context length (history size).
|
| 4 |
+
|
| 5 |
+
Analyzes how KT accuracy changes as student history grows from 50 to 400 interactions.
|
| 6 |
+
Plots all models in a single figure for comparison.
|
| 7 |
+
|
| 8 |
+
Usage:
|
| 9 |
+
python evaluate_kt_by_context.py
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import argparse
|
| 13 |
+
import json
|
| 14 |
+
import math
|
| 15 |
+
import os
|
| 16 |
+
from glob import glob
|
| 17 |
+
from collections import defaultdict
|
| 18 |
+
|
| 19 |
+
import matplotlib.pyplot as plt
|
| 20 |
+
import numpy as np
|
| 21 |
+
from sklearn.metrics import roc_auc_score
|
| 22 |
+
|
| 23 |
+
# Set publication-quality font sizes
|
| 24 |
+
plt.rcParams.update({
|
| 25 |
+
'font.size': 14,
|
| 26 |
+
'axes.titlesize': 16,
|
| 27 |
+
'axes.labelsize': 14,
|
| 28 |
+
'xtick.labelsize': 12,
|
| 29 |
+
'ytick.labelsize': 12,
|
| 30 |
+
'legend.fontsize': 12,
|
| 31 |
+
})
|
| 32 |
+
|
| 33 |
+
# Model name mapping for display
|
| 34 |
+
MODEL_NAMES = {
|
| 35 |
+
'gptoss120b': 'GPT-OSS-120B',
|
| 36 |
+
'llama33_70b_instruct': 'Llama-3.3-70B-Instruct',
|
| 37 |
+
'qwen3next80binstruct': 'Qwen3-80B-Instruct',
|
| 38 |
+
'qwen3next80bthinking': 'Qwen3-80B-Thinking',
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
# Colors for each model
|
| 42 |
+
MODEL_COLORS = {
|
| 43 |
+
'gptoss120b': '#1f77b4', # blue
|
| 44 |
+
'llama33_70b_instruct': '#ff7f0e', # orange
|
| 45 |
+
'qwen3next80binstruct': '#2ca02c', # green
|
| 46 |
+
'qwen3next80bthinking': '#d62728', # red
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def normalize_mcq_answer(answer_str: str) -> str:
|
| 51 |
+
"""Normalize MCQ answer format for consistent comparison."""
|
| 52 |
+
parts = [p.strip().upper() for p in answer_str.split(',')]
|
| 53 |
+
parts = [p for p in parts if p]
|
| 54 |
+
if parts and all(len(p) == 1 and p.isalpha() for p in parts):
|
| 55 |
+
return ', '.join(sorted(set(parts)))
|
| 56 |
+
return answer_str
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def numerical_match(answer1: str, answer2: str, atol: float = 0.01, rtol: float = 0.01) -> bool:
|
| 60 |
+
"""Check if two answers are numerically close within tolerance."""
|
| 61 |
+
try:
|
| 62 |
+
a = float(answer1.strip())
|
| 63 |
+
b = float(answer2.strip())
|
| 64 |
+
return math.isclose(a, b, abs_tol=atol, rel_tol=rtol)
|
| 65 |
+
except (ValueError, AttributeError):
|
| 66 |
+
return False
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def answers_match(pred, actual):
|
| 70 |
+
"""Check if predicted answer matches actual answer."""
|
| 71 |
+
if pred is None or actual is None:
|
| 72 |
+
return False
|
| 73 |
+
|
| 74 |
+
pred_str = str(pred).strip()
|
| 75 |
+
actual_str = str(actual).strip()
|
| 76 |
+
|
| 77 |
+
if pred_str == actual_str:
|
| 78 |
+
return True
|
| 79 |
+
|
| 80 |
+
pred_normalized = normalize_mcq_answer(pred_str)
|
| 81 |
+
actual_normalized = normalize_mcq_answer(actual_str)
|
| 82 |
+
if pred_normalized == actual_normalized:
|
| 83 |
+
return True
|
| 84 |
+
|
| 85 |
+
return numerical_match(pred_str, actual_str)
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def parse_args():
|
| 89 |
+
parser = argparse.ArgumentParser(description="Evaluate KT by context length")
|
| 90 |
+
parser.add_argument(
|
| 91 |
+
"--results-dir", "-r",
|
| 92 |
+
type=str,
|
| 93 |
+
default="inference_data_kt_results",
|
| 94 |
+
help="Directory containing JSONL results files"
|
| 95 |
+
)
|
| 96 |
+
parser.add_argument(
|
| 97 |
+
"--output-dir", "-o",
|
| 98 |
+
type=str,
|
| 99 |
+
default="dataset_analysis/plots",
|
| 100 |
+
help="Directory to save output plots"
|
| 101 |
+
)
|
| 102 |
+
parser.add_argument(
|
| 103 |
+
"--no-plots",
|
| 104 |
+
action="store_true",
|
| 105 |
+
help="Skip generating plots"
|
| 106 |
+
)
|
| 107 |
+
return parser.parse_args()
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
def extract_model_name(filename):
|
| 111 |
+
"""Extract model identifier from filename."""
|
| 112 |
+
basename = os.path.basename(filename)
|
| 113 |
+
# Pattern: modelname_n500_bin10_hist50.jsonl
|
| 114 |
+
for model_key in MODEL_NAMES.keys():
|
| 115 |
+
if basename.startswith(model_key):
|
| 116 |
+
return model_key
|
| 117 |
+
return basename.replace('.jsonl', '')
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
def load_results(jsonl_path):
|
| 121 |
+
"""Load results from JSONL file."""
|
| 122 |
+
results = []
|
| 123 |
+
with open(jsonl_path, 'r') as f:
|
| 124 |
+
for line in f:
|
| 125 |
+
if line.strip():
|
| 126 |
+
results.append(json.loads(line))
|
| 127 |
+
return results
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
def compute_metrics_by_bin(results):
|
| 131 |
+
"""Compute metrics grouped by history_size."""
|
| 132 |
+
bins = defaultdict(list)
|
| 133 |
+
|
| 134 |
+
for r in results:
|
| 135 |
+
history_size = r.get('history_size', 50)
|
| 136 |
+
bins[history_size].append(r)
|
| 137 |
+
|
| 138 |
+
metrics = {}
|
| 139 |
+
for history_size in sorted(bins.keys()):
|
| 140 |
+
bin_results = bins[history_size]
|
| 141 |
+
n = len(bin_results)
|
| 142 |
+
|
| 143 |
+
# FKT: Collect valid predictions for AUC-ROC
|
| 144 |
+
y_true = []
|
| 145 |
+
y_pred = []
|
| 146 |
+
for r in bin_results:
|
| 147 |
+
actual = r.get('actual_score')
|
| 148 |
+
pred = r.get('predicted_question_level')
|
| 149 |
+
if actual is not None and pred is not None:
|
| 150 |
+
y_true.append(int(actual))
|
| 151 |
+
y_pred.append(int(pred))
|
| 152 |
+
|
| 153 |
+
# Compute AUC-ROC (requires both classes present)
|
| 154 |
+
fkt_auc = None
|
| 155 |
+
if len(set(y_true)) == 2 and len(y_true) > 0:
|
| 156 |
+
try:
|
| 157 |
+
fkt_auc = roc_auc_score(y_true, y_pred)
|
| 158 |
+
except ValueError:
|
| 159 |
+
pass
|
| 160 |
+
|
| 161 |
+
# FKT accuracy (for reference)
|
| 162 |
+
fkt_correct = sum(1 for t, p in zip(y_true, y_pred) if t == p)
|
| 163 |
+
fkt_acc = fkt_correct / len(y_true) if y_true else 0.0
|
| 164 |
+
|
| 165 |
+
# Cognitive accuracy (answer match)
|
| 166 |
+
cognitive_correct = sum(
|
| 167 |
+
1 for r in bin_results
|
| 168 |
+
if answers_match(r.get('predicted_student_answer'), r.get('actual_answer'))
|
| 169 |
+
)
|
| 170 |
+
|
| 171 |
+
metrics[history_size] = {
|
| 172 |
+
'n': n,
|
| 173 |
+
'fkt_auc': fkt_auc,
|
| 174 |
+
'fkt_acc': fkt_acc,
|
| 175 |
+
'fkt_valid': len(y_true),
|
| 176 |
+
'cognitive_acc': cognitive_correct / n if n > 0 else 0.0,
|
| 177 |
+
}
|
| 178 |
+
|
| 179 |
+
return metrics
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
def print_table(all_metrics):
|
| 183 |
+
"""Print metrics table to console."""
|
| 184 |
+
# Get all history sizes across all models
|
| 185 |
+
all_history_sizes = sorted(set(
|
| 186 |
+
hs for model_metrics in all_metrics.values()
|
| 187 |
+
for hs in model_metrics.keys()
|
| 188 |
+
))
|
| 189 |
+
|
| 190 |
+
# Header
|
| 191 |
+
print("\n" + "=" * 100)
|
| 192 |
+
print("KT Performance by Context Length (History Size)")
|
| 193 |
+
print("=" * 100)
|
| 194 |
+
|
| 195 |
+
# Print FKT AUC-ROC table
|
| 196 |
+
print("\nFKT AUC-ROC (Question-Level):")
|
| 197 |
+
print("-" * 80)
|
| 198 |
+
header = f"{'History':>8}"
|
| 199 |
+
for model_key in all_metrics.keys():
|
| 200 |
+
header += f" {MODEL_NAMES.get(model_key, model_key)[:20]:>20}"
|
| 201 |
+
print(header)
|
| 202 |
+
print("-" * 80)
|
| 203 |
+
|
| 204 |
+
for hs in all_history_sizes:
|
| 205 |
+
row = f"{hs:>8}"
|
| 206 |
+
for model_key in all_metrics.keys():
|
| 207 |
+
if hs in all_metrics[model_key]:
|
| 208 |
+
auc = all_metrics[model_key][hs]['fkt_auc']
|
| 209 |
+
if auc is not None:
|
| 210 |
+
row += f" {auc:>20.3f}"
|
| 211 |
+
else:
|
| 212 |
+
row += f" {'N/A':>20}"
|
| 213 |
+
else:
|
| 214 |
+
row += f" {'N/A':>20}"
|
| 215 |
+
print(row)
|
| 216 |
+
|
| 217 |
+
# Print Cognitive accuracy table
|
| 218 |
+
print("\nCognitive Accuracy (Answer Prediction):")
|
| 219 |
+
print("-" * 80)
|
| 220 |
+
print(header)
|
| 221 |
+
print("-" * 80)
|
| 222 |
+
|
| 223 |
+
for hs in all_history_sizes:
|
| 224 |
+
row = f"{hs:>8}"
|
| 225 |
+
for model_key in all_metrics.keys():
|
| 226 |
+
if hs in all_metrics[model_key]:
|
| 227 |
+
acc = all_metrics[model_key][hs]['cognitive_acc']
|
| 228 |
+
row += f" {acc:>20.3f}"
|
| 229 |
+
else:
|
| 230 |
+
row += f" {'N/A':>20}"
|
| 231 |
+
print(row)
|
| 232 |
+
|
| 233 |
+
|
| 234 |
+
def plot_results(all_metrics, output_dir):
|
| 235 |
+
"""Generate plot with all models."""
|
| 236 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 237 |
+
|
| 238 |
+
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
|
| 239 |
+
|
| 240 |
+
# Plot 1: FKT AUC-ROC
|
| 241 |
+
for model_key, metrics in all_metrics.items():
|
| 242 |
+
history_sizes = sorted(metrics.keys())
|
| 243 |
+
# Filter out None values
|
| 244 |
+
valid_hs = [hs for hs in history_sizes if metrics[hs]['fkt_auc'] is not None]
|
| 245 |
+
fkt_aucs = [metrics[hs]['fkt_auc'] for hs in valid_hs]
|
| 246 |
+
|
| 247 |
+
if valid_hs:
|
| 248 |
+
axes[0].plot(
|
| 249 |
+
valid_hs, fkt_aucs,
|
| 250 |
+
marker='o', markersize=4,
|
| 251 |
+
color=MODEL_COLORS.get(model_key, 'gray'),
|
| 252 |
+
label=MODEL_NAMES.get(model_key, model_key),
|
| 253 |
+
linewidth=2
|
| 254 |
+
)
|
| 255 |
+
|
| 256 |
+
axes[0].set_xlabel('History Size (# prior interactions)')
|
| 257 |
+
axes[0].set_ylabel('AUC-ROC')
|
| 258 |
+
axes[0].set_title('FKT AUC-ROC vs Context Length')
|
| 259 |
+
axes[0].legend(loc='best')
|
| 260 |
+
axes[0].grid(True, alpha=0.3)
|
| 261 |
+
axes[0].set_xlim(40, 410)
|
| 262 |
+
axes[0].axhline(y=0.5, color='gray', linestyle='--', alpha=0.5, label='Random')
|
| 263 |
+
|
| 264 |
+
# Plot 2: Cognitive Accuracy
|
| 265 |
+
for model_key, metrics in all_metrics.items():
|
| 266 |
+
history_sizes = sorted(metrics.keys())
|
| 267 |
+
cognitive_accs = [metrics[hs]['cognitive_acc'] for hs in history_sizes]
|
| 268 |
+
|
| 269 |
+
axes[1].plot(
|
| 270 |
+
history_sizes, cognitive_accs,
|
| 271 |
+
marker='o', markersize=4,
|
| 272 |
+
color=MODEL_COLORS.get(model_key, 'gray'),
|
| 273 |
+
label=MODEL_NAMES.get(model_key, model_key),
|
| 274 |
+
linewidth=2
|
| 275 |
+
)
|
| 276 |
+
|
| 277 |
+
axes[1].set_xlabel('History Size (# prior interactions)')
|
| 278 |
+
axes[1].set_ylabel('Accuracy')
|
| 279 |
+
axes[1].set_title('Cognitive Modeling Accuracy vs Context Length')
|
| 280 |
+
axes[1].legend(loc='best')
|
| 281 |
+
axes[1].grid(True, alpha=0.3)
|
| 282 |
+
axes[1].set_xlim(40, 410)
|
| 283 |
+
|
| 284 |
+
plt.tight_layout()
|
| 285 |
+
|
| 286 |
+
plot_path = os.path.join(output_dir, 'kt_context_scaling.png')
|
| 287 |
+
plt.savefig(plot_path, dpi=150)
|
| 288 |
+
plt.close()
|
| 289 |
+
|
| 290 |
+
print(f"\nSaved: {plot_path}")
|
| 291 |
+
|
| 292 |
+
|
| 293 |
+
def main():
|
| 294 |
+
args = parse_args()
|
| 295 |
+
|
| 296 |
+
# Find all JSONL files
|
| 297 |
+
jsonl_files = glob(os.path.join(args.results_dir, '*.jsonl'))
|
| 298 |
+
|
| 299 |
+
if not jsonl_files:
|
| 300 |
+
print(f"No JSONL files found in {args.results_dir}")
|
| 301 |
+
return
|
| 302 |
+
|
| 303 |
+
print(f"Found {len(jsonl_files)} result files:")
|
| 304 |
+
for f in jsonl_files:
|
| 305 |
+
print(f" - {os.path.basename(f)}")
|
| 306 |
+
|
| 307 |
+
# Load and analyze each model
|
| 308 |
+
all_metrics = {}
|
| 309 |
+
|
| 310 |
+
for jsonl_path in sorted(jsonl_files):
|
| 311 |
+
model_key = extract_model_name(jsonl_path)
|
| 312 |
+
print(f"\nProcessing {MODEL_NAMES.get(model_key, model_key)}...")
|
| 313 |
+
|
| 314 |
+
results = load_results(jsonl_path)
|
| 315 |
+
print(f" Loaded {len(results):,} predictions")
|
| 316 |
+
|
| 317 |
+
metrics = compute_metrics_by_bin(results)
|
| 318 |
+
all_metrics[model_key] = metrics
|
| 319 |
+
|
| 320 |
+
# Print quick summary
|
| 321 |
+
history_sizes = sorted(metrics.keys())
|
| 322 |
+
valid_aucs = [metrics[hs]['fkt_auc'] for hs in history_sizes if metrics[hs]['fkt_auc'] is not None]
|
| 323 |
+
avg_auc = np.mean(valid_aucs) if valid_aucs else 0.0
|
| 324 |
+
avg_cognitive = np.mean([metrics[hs]['cognitive_acc'] for hs in history_sizes])
|
| 325 |
+
print(f" Avg FKT AUC-ROC: {avg_auc:.3f}")
|
| 326 |
+
print(f" Avg Cognitive accuracy: {avg_cognitive:.3f}")
|
| 327 |
+
|
| 328 |
+
# Print detailed table
|
| 329 |
+
print_table(all_metrics)
|
| 330 |
+
|
| 331 |
+
# Generate plot
|
| 332 |
+
if not args.no_plots:
|
| 333 |
+
plot_results(all_metrics, args.output_dir)
|
| 334 |
+
|
| 335 |
+
print("\nDone!")
|
| 336 |
+
|
| 337 |
+
|
| 338 |
+
if __name__ == "__main__":
|
| 339 |
+
main()
|
Code/find_duplicate_problem_body.py
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""Export rows that share the same Problem Body but have different problem_id.
|
| 3 |
+
|
| 4 |
+
Default input:
|
| 5 |
+
- ../Data/Problems.csv
|
| 6 |
+
|
| 7 |
+
Default output:
|
| 8 |
+
- ../Data/Problems_same_body_different_problem_id.csv
|
| 9 |
+
|
| 10 |
+
The output includes all original columns plus:
|
| 11 |
+
- duplicate_group_id
|
| 12 |
+
- distinct_problem_id_count
|
| 13 |
+
- distinct_problem_ids
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
from __future__ import annotations
|
| 17 |
+
|
| 18 |
+
import argparse
|
| 19 |
+
import csv
|
| 20 |
+
import html
|
| 21 |
+
import re
|
| 22 |
+
from collections import defaultdict
|
| 23 |
+
from pathlib import Path
|
| 24 |
+
from typing import Dict, List, Set
|
| 25 |
+
|
| 26 |
+
_TAG_RE = re.compile(r"<[^>]+>")
|
| 27 |
+
_WS_RE = re.compile(r"\s+")
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def normalize_body(text: str, strip_html: bool, collapse_whitespace: bool) -> str:
|
| 31 |
+
"""Normalize Problem Body text for grouping."""
|
| 32 |
+
value = html.unescape(text or "")
|
| 33 |
+
if strip_html:
|
| 34 |
+
value = _TAG_RE.sub("", value)
|
| 35 |
+
if collapse_whitespace:
|
| 36 |
+
value = _WS_RE.sub(" ", value).strip()
|
| 37 |
+
return value
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def main() -> None:
|
| 41 |
+
parser = argparse.ArgumentParser(
|
| 42 |
+
description=(
|
| 43 |
+
"Find rows in Problems.csv where the same Problem Body is mapped "
|
| 44 |
+
"to different problem_id values."
|
| 45 |
+
)
|
| 46 |
+
)
|
| 47 |
+
parser.add_argument(
|
| 48 |
+
"--input-csv",
|
| 49 |
+
type=Path,
|
| 50 |
+
default=Path("../Data/Problems.csv"),
|
| 51 |
+
help="Path to Problems.csv",
|
| 52 |
+
)
|
| 53 |
+
parser.add_argument(
|
| 54 |
+
"--output-csv",
|
| 55 |
+
type=Path,
|
| 56 |
+
default=Path("../Results/Problems_same_body_different_problem_id.csv"),
|
| 57 |
+
help="Output CSV path",
|
| 58 |
+
)
|
| 59 |
+
parser.add_argument(
|
| 60 |
+
"--body-column",
|
| 61 |
+
type=str,
|
| 62 |
+
default="Problem Body",
|
| 63 |
+
help="Column name for problem statement text",
|
| 64 |
+
)
|
| 65 |
+
parser.add_argument(
|
| 66 |
+
"--id-column",
|
| 67 |
+
type=str,
|
| 68 |
+
default="problem_id",
|
| 69 |
+
help="Column name for problem identifier",
|
| 70 |
+
)
|
| 71 |
+
parser.add_argument(
|
| 72 |
+
"--strip-html",
|
| 73 |
+
action="store_true",
|
| 74 |
+
help="Strip HTML tags before grouping",
|
| 75 |
+
)
|
| 76 |
+
parser.add_argument(
|
| 77 |
+
"--collapse-whitespace",
|
| 78 |
+
action="store_true",
|
| 79 |
+
help="Collapse runs of whitespace before grouping",
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
args = parser.parse_args()
|
| 83 |
+
|
| 84 |
+
input_csv = args.input_csv.resolve()
|
| 85 |
+
output_csv = args.output_csv.resolve()
|
| 86 |
+
|
| 87 |
+
with input_csv.open("r", encoding="utf-8", newline="") as f:
|
| 88 |
+
reader = csv.DictReader(f)
|
| 89 |
+
fieldnames = reader.fieldnames or []
|
| 90 |
+
|
| 91 |
+
if args.body_column not in fieldnames:
|
| 92 |
+
raise ValueError(f"Missing body column '{args.body_column}' in {input_csv}")
|
| 93 |
+
if args.id_column not in fieldnames:
|
| 94 |
+
raise ValueError(f"Missing id column '{args.id_column}' in {input_csv}")
|
| 95 |
+
|
| 96 |
+
rows: List[dict] = list(reader)
|
| 97 |
+
|
| 98 |
+
groups: Dict[str, List[int]] = defaultdict(list)
|
| 99 |
+
group_ids: Dict[str, Set[str]] = defaultdict(set)
|
| 100 |
+
|
| 101 |
+
for idx, row in enumerate(rows):
|
| 102 |
+
body_raw = row.get(args.body_column, "")
|
| 103 |
+
body_key = normalize_body(
|
| 104 |
+
body_raw,
|
| 105 |
+
strip_html=args.strip_html,
|
| 106 |
+
collapse_whitespace=args.collapse_whitespace,
|
| 107 |
+
)
|
| 108 |
+
if not body_key:
|
| 109 |
+
continue
|
| 110 |
+
|
| 111 |
+
problem_id = str(row.get(args.id_column, "")).strip()
|
| 112 |
+
groups[body_key].append(idx)
|
| 113 |
+
if problem_id:
|
| 114 |
+
group_ids[body_key].add(problem_id)
|
| 115 |
+
|
| 116 |
+
duplicate_keys = [k for k, ids in group_ids.items() if len(ids) > 1]
|
| 117 |
+
|
| 118 |
+
# Preserve first-seen order of duplicate groups.
|
| 119 |
+
duplicate_keys.sort(key=lambda k: groups[k][0])
|
| 120 |
+
|
| 121 |
+
output_rows: List[dict] = []
|
| 122 |
+
for group_num, key in enumerate(duplicate_keys, start=1):
|
| 123 |
+
ids_sorted = sorted(group_ids[key])
|
| 124 |
+
ids_joined = ";".join(ids_sorted)
|
| 125 |
+
|
| 126 |
+
for row_idx in groups[key]:
|
| 127 |
+
out_row = dict(rows[row_idx])
|
| 128 |
+
out_row["duplicate_group_id"] = str(group_num)
|
| 129 |
+
out_row["distinct_problem_id_count"] = str(len(ids_sorted))
|
| 130 |
+
out_row["distinct_problem_ids"] = ids_joined
|
| 131 |
+
output_rows.append(out_row)
|
| 132 |
+
|
| 133 |
+
output_csv.parent.mkdir(parents=True, exist_ok=True)
|
| 134 |
+
output_fieldnames = fieldnames + [
|
| 135 |
+
"duplicate_group_id",
|
| 136 |
+
"distinct_problem_id_count",
|
| 137 |
+
"distinct_problem_ids",
|
| 138 |
+
]
|
| 139 |
+
|
| 140 |
+
with output_csv.open("w", encoding="utf-8", newline="") as f:
|
| 141 |
+
writer = csv.DictWriter(f, fieldnames=output_fieldnames)
|
| 142 |
+
writer.writeheader()
|
| 143 |
+
writer.writerows(output_rows)
|
| 144 |
+
|
| 145 |
+
print(f"Input rows: {len(rows)}")
|
| 146 |
+
print(f"Duplicate body groups (different problem_id): {len(duplicate_keys)}")
|
| 147 |
+
print(f"Output rows: {len(output_rows)}")
|
| 148 |
+
print(f"Wrote: {output_csv}")
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
if __name__ == "__main__":
|
| 152 |
+
main()
|
Code/find_duplicate_problem_id.py
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""Export rows that have duplicated problem_id values.
|
| 3 |
+
|
| 4 |
+
Default input:
|
| 5 |
+
- ../Data/Problems.csv
|
| 6 |
+
|
| 7 |
+
Default output:
|
| 8 |
+
- ../Results/Problems_duplicated_problem_id.csv
|
| 9 |
+
|
| 10 |
+
The output includes all original columns plus:
|
| 11 |
+
- duplicate_group_id
|
| 12 |
+
- duplicate_problem_id_count
|
| 13 |
+
- distinct_problem_body_count
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
from __future__ import annotations
|
| 17 |
+
|
| 18 |
+
import argparse
|
| 19 |
+
import csv
|
| 20 |
+
import html
|
| 21 |
+
import re
|
| 22 |
+
from collections import defaultdict
|
| 23 |
+
from pathlib import Path
|
| 24 |
+
from typing import Dict, List, Set
|
| 25 |
+
|
| 26 |
+
_TAG_RE = re.compile(r"<[^>]+>")
|
| 27 |
+
_WS_RE = re.compile(r"\s+")
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def normalize_body(text: str, strip_html: bool, collapse_whitespace: bool) -> str:
|
| 31 |
+
"""Normalize Problem Body text for distinct-body counting."""
|
| 32 |
+
value = html.unescape(text or "")
|
| 33 |
+
if strip_html:
|
| 34 |
+
value = _TAG_RE.sub("", value)
|
| 35 |
+
if collapse_whitespace:
|
| 36 |
+
value = _WS_RE.sub(" ", value).strip()
|
| 37 |
+
return value
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def main() -> None:
|
| 41 |
+
parser = argparse.ArgumentParser(
|
| 42 |
+
description="Find rows in Problems.csv where problem_id is duplicated."
|
| 43 |
+
)
|
| 44 |
+
parser.add_argument(
|
| 45 |
+
"--input-csv",
|
| 46 |
+
type=Path,
|
| 47 |
+
default=Path("../Data/Problems.csv"),
|
| 48 |
+
help="Path to Problems.csv",
|
| 49 |
+
)
|
| 50 |
+
parser.add_argument(
|
| 51 |
+
"--output-csv",
|
| 52 |
+
type=Path,
|
| 53 |
+
default=Path("../Results/Problems_duplicated_problem_id.csv"),
|
| 54 |
+
help="Output CSV path",
|
| 55 |
+
)
|
| 56 |
+
parser.add_argument(
|
| 57 |
+
"--id-column",
|
| 58 |
+
type=str,
|
| 59 |
+
default="problem_id",
|
| 60 |
+
help="Column name for problem identifier",
|
| 61 |
+
)
|
| 62 |
+
parser.add_argument(
|
| 63 |
+
"--body-column",
|
| 64 |
+
type=str,
|
| 65 |
+
default="Problem Body",
|
| 66 |
+
help="Column name for problem statement text",
|
| 67 |
+
)
|
| 68 |
+
parser.add_argument(
|
| 69 |
+
"--strip-html",
|
| 70 |
+
action="store_true",
|
| 71 |
+
help="Strip HTML tags before counting distinct problem bodies",
|
| 72 |
+
)
|
| 73 |
+
parser.add_argument(
|
| 74 |
+
"--collapse-whitespace",
|
| 75 |
+
action="store_true",
|
| 76 |
+
help="Collapse runs of whitespace before counting distinct problem bodies",
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
args = parser.parse_args()
|
| 80 |
+
|
| 81 |
+
input_csv = args.input_csv.resolve()
|
| 82 |
+
output_csv = args.output_csv.resolve()
|
| 83 |
+
|
| 84 |
+
with input_csv.open("r", encoding="utf-8", newline="") as f:
|
| 85 |
+
reader = csv.DictReader(f)
|
| 86 |
+
fieldnames = reader.fieldnames or []
|
| 87 |
+
|
| 88 |
+
if args.id_column not in fieldnames:
|
| 89 |
+
raise ValueError(f"Missing id column '{args.id_column}' in {input_csv}")
|
| 90 |
+
if args.body_column not in fieldnames:
|
| 91 |
+
raise ValueError(f"Missing body column '{args.body_column}' in {input_csv}")
|
| 92 |
+
|
| 93 |
+
rows: List[dict] = list(reader)
|
| 94 |
+
|
| 95 |
+
groups: Dict[str, List[int]] = defaultdict(list)
|
| 96 |
+
for idx, row in enumerate(rows):
|
| 97 |
+
problem_id = str(row.get(args.id_column, "")).strip()
|
| 98 |
+
if not problem_id:
|
| 99 |
+
continue
|
| 100 |
+
groups[problem_id].append(idx)
|
| 101 |
+
|
| 102 |
+
duplicate_ids = [pid for pid, row_idxs in groups.items() if len(row_idxs) > 1]
|
| 103 |
+
|
| 104 |
+
# Preserve first-seen order of duplicate groups.
|
| 105 |
+
duplicate_ids.sort(key=lambda pid: groups[pid][0])
|
| 106 |
+
|
| 107 |
+
output_rows: List[dict] = []
|
| 108 |
+
for group_num, pid in enumerate(duplicate_ids, start=1):
|
| 109 |
+
row_idxs = groups[pid]
|
| 110 |
+
|
| 111 |
+
distinct_bodies: Set[str] = set()
|
| 112 |
+
for row_idx in row_idxs:
|
| 113 |
+
body_raw = rows[row_idx].get(args.body_column, "")
|
| 114 |
+
distinct_bodies.add(
|
| 115 |
+
normalize_body(
|
| 116 |
+
body_raw,
|
| 117 |
+
strip_html=args.strip_html,
|
| 118 |
+
collapse_whitespace=args.collapse_whitespace,
|
| 119 |
+
)
|
| 120 |
+
)
|
| 121 |
+
|
| 122 |
+
for row_idx in row_idxs:
|
| 123 |
+
out_row = dict(rows[row_idx])
|
| 124 |
+
out_row["duplicate_group_id"] = str(group_num)
|
| 125 |
+
out_row["duplicate_problem_id_count"] = str(len(row_idxs))
|
| 126 |
+
out_row["distinct_problem_body_count"] = str(len(distinct_bodies))
|
| 127 |
+
output_rows.append(out_row)
|
| 128 |
+
|
| 129 |
+
output_csv.parent.mkdir(parents=True, exist_ok=True)
|
| 130 |
+
output_fieldnames = fieldnames + [
|
| 131 |
+
"duplicate_group_id",
|
| 132 |
+
"duplicate_problem_id_count",
|
| 133 |
+
"distinct_problem_body_count",
|
| 134 |
+
]
|
| 135 |
+
|
| 136 |
+
with output_csv.open("w", encoding="utf-8", newline="") as f:
|
| 137 |
+
writer = csv.DictWriter(f, fieldnames=output_fieldnames)
|
| 138 |
+
writer.writeheader()
|
| 139 |
+
writer.writerows(output_rows)
|
| 140 |
+
|
| 141 |
+
print(f"Input rows: {len(rows)}")
|
| 142 |
+
print(f"Duplicated {args.id_column} groups: {len(duplicate_ids)}")
|
| 143 |
+
print(f"Output rows: {len(output_rows)}")
|
| 144 |
+
print(f"Wrote: {output_csv}")
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
if __name__ == "__main__":
|
| 148 |
+
main()
|
Code/gptoss120bvllmmcq.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Knowledge Tracing inference with GPT-OSS-120B model.
|
| 3 |
+
|
| 4 |
+
Usage:
|
| 5 |
+
CUDA_VISIBLE_DEVICES=0,1,2,3 python gptoss120bvllmmcq.py \
|
| 6 |
+
--data-dir foundationalktdataset/ \
|
| 7 |
+
--num-gpus 4 \
|
| 8 |
+
--batch-size 10 \
|
| 9 |
+
--cache-dir /data1/ \
|
| 10 |
+
--num-students 500 \
|
| 11 |
+
--bin-size 50 \
|
| 12 |
+
--min-history 50
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
from kt_inference_base import run_inference
|
| 16 |
+
|
| 17 |
+
MODEL_CONFIG = {
|
| 18 |
+
"model_id": "openai/gpt-oss-120b",
|
| 19 |
+
"gen_configs": {
|
| 20 |
+
"temperature": 0.7,
|
| 21 |
+
"top_p": 0.95,
|
| 22 |
+
"top_k": 20,
|
| 23 |
+
"max_tokens": 32768,
|
| 24 |
+
"repetition_penalty": 1.0,
|
| 25 |
+
},
|
| 26 |
+
"output_prefix": "gptoss120b",
|
| 27 |
+
"system_prompt_prefix": "Reasoning: medium\n\n",
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
if __name__ == "__main__":
|
| 31 |
+
run_inference(MODEL_CONFIG)
|
Code/kt_inference_base.py
ADDED
|
@@ -0,0 +1,849 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Base module for Knowledge Tracing LLM inference.
|
| 3 |
+
|
| 4 |
+
This module contains all shared logic for running KT inference with different models.
|
| 5 |
+
Each model script imports this and provides model-specific configuration.
|
| 6 |
+
|
| 7 |
+
Usage in model scripts:
|
| 8 |
+
from kt_inference_base import run_inference
|
| 9 |
+
|
| 10 |
+
MODEL_CONFIG = {
|
| 11 |
+
"model_id": "model/name",
|
| 12 |
+
"gen_configs": {...},
|
| 13 |
+
"output_prefix": "prefix",
|
| 14 |
+
"system_prompt_prefix": "", # e.g., "Reasoning: medium\n\n"
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
if __name__ == "__main__":
|
| 18 |
+
run_inference(MODEL_CONFIG)
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
import argparse
|
| 22 |
+
import contextlib
|
| 23 |
+
import os
|
| 24 |
+
from vllm import LLM, SamplingParams
|
| 25 |
+
import pandas as pd
|
| 26 |
+
import gc
|
| 27 |
+
import torch
|
| 28 |
+
from vllm.distributed.parallel_state import (
|
| 29 |
+
destroy_model_parallel,
|
| 30 |
+
destroy_distributed_environment,
|
| 31 |
+
)
|
| 32 |
+
import json
|
| 33 |
+
import re
|
| 34 |
+
import numpy as np
|
| 35 |
+
from tqdm import tqdm
|
| 36 |
+
from multiprocessing import Pool, cpu_count
|
| 37 |
+
from clean_utils import clean_problem_body
|
| 38 |
+
from cleantext import clean_text as clean_text_legacy
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
class NumpyEncoder(json.JSONEncoder):
|
| 42 |
+
"""Custom JSON encoder that handles numpy types."""
|
| 43 |
+
def default(self, obj):
|
| 44 |
+
if isinstance(obj, np.integer):
|
| 45 |
+
return int(obj)
|
| 46 |
+
if isinstance(obj, np.floating):
|
| 47 |
+
return float(obj)
|
| 48 |
+
if isinstance(obj, np.ndarray):
|
| 49 |
+
return obj.tolist()
|
| 50 |
+
return super().default(obj)
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
# Batch processing config defaults
|
| 54 |
+
DEFAULT_BATCH_SIZE = 10000
|
| 55 |
+
DEFAULT_NUM_STUDENTS = 500
|
| 56 |
+
DEFAULT_BIN_SIZE = 50
|
| 57 |
+
DEFAULT_MIN_HISTORY = 50
|
| 58 |
+
|
| 59 |
+
# Input file names
|
| 60 |
+
STUDENT_FILE = "Interactions.csv"
|
| 61 |
+
PROBLEMS_FILE = "Problems.csv"
|
| 62 |
+
SKILL_FILE = "Skills.csv"
|
| 63 |
+
|
| 64 |
+
# Base system prompt (without any prefix like "Reasoning: medium")
|
| 65 |
+
BASE_SYSTEM_PROMPT = """You are a reasoning model trained to simulate a student's evolving knowledge and response behavior in mathematics.
|
| 66 |
+
|
| 67 |
+
Your goal is to infer, from past problem–answer pairs, how this same student is likely to perform on a new problem — at multiple levels of granularity.
|
| 68 |
+
|
| 69 |
+
You must reason about the student's learning progression, skill mastery, and recurring misconceptions, then produce structured predictions for the new item.
|
| 70 |
+
|
| 71 |
+
---
|
| 72 |
+
|
| 73 |
+
Your Task:
|
| 74 |
+
|
| 75 |
+
Generate three coordinated predictions for this student:
|
| 76 |
+
|
| 77 |
+
1) **Skill-level knowledge tracing (0 or 1):** Whether the student has mastered the underlying skill involved in the new problem.
|
| 78 |
+
2) **Question-level knowledge tracing (0 or 1):** Whether the student will answer this specific problem correctly.
|
| 79 |
+
3) **Cognitive-level prediction (string):** The exact answer text or option the student would most likely produce, written in their own response style.
|
| 80 |
+
|
| 81 |
+
---
|
| 82 |
+
|
| 83 |
+
Reasoning Guidelines:
|
| 84 |
+
|
| 85 |
+
- Use the student's historical data (problems, answers, hints, timestamps) to infer learning and forgetting patterns.
|
| 86 |
+
- Consider recency and exposure: later timestamps often indicate updated knowledge.
|
| 87 |
+
- Treat `UsedHint=True` or `SawAnswer=True` as evidence that the student's recorded answer may not reflect true mastery — they might have seen or been helped toward the solution.
|
| 88 |
+
- Attend to how the student's accuracy, style, and misconceptions evolve over time.
|
| 89 |
+
- You may think step-by-step internally, but your final output must follow the format below.
|
| 90 |
+
---
|
| 91 |
+
|
| 92 |
+
Output Format:
|
| 93 |
+
|
| 94 |
+
When you are done reasoning, **finish your response with** the JSON object in this exact structure:
|
| 95 |
+
|
| 96 |
+
For Multiple Choice (select 1) problems:
|
| 97 |
+
{
|
| 98 |
+
"skill_level": 0 or 1,
|
| 99 |
+
"question_level": 0 or 1,
|
| 100 |
+
"student_answer": "A" (single letter only)
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
For Multiple Choice (select all) problems:
|
| 104 |
+
{
|
| 105 |
+
"skill_level": 0 or 1,
|
| 106 |
+
"question_level": 0 or 1,
|
| 107 |
+
"student_answer": "A, C" (comma-separated letters if multiple selections)
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
For Fill-in problems:
|
| 111 |
+
{
|
| 112 |
+
"skill_level": 0 or 1,
|
| 113 |
+
"question_level": 0 or 1,
|
| 114 |
+
"student_answer": "<string exactly as this student would write (e.g., 'x=3', '3/5', '12')>"
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
Predictions must be consistent. If you predict question_level to be 1, then student_answer must match the correct answer. If you predict question_level to be 0, student_answer must not match the correct answer."""
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
def parse_args(default_output_jsonl):
|
| 121 |
+
"""Parse command line arguments."""
|
| 122 |
+
parser = argparse.ArgumentParser(description="Knowledge Tracing with LLM")
|
| 123 |
+
parser.add_argument(
|
| 124 |
+
"--batch-size", "-b",
|
| 125 |
+
type=int,
|
| 126 |
+
default=DEFAULT_BATCH_SIZE,
|
| 127 |
+
help=f"Batch size for LLM inference (default: {DEFAULT_BATCH_SIZE})"
|
| 128 |
+
)
|
| 129 |
+
parser.add_argument(
|
| 130 |
+
"--output", "-o",
|
| 131 |
+
type=str,
|
| 132 |
+
default=None,
|
| 133 |
+
help="Output JSONL file path (overrides auto-generated name)"
|
| 134 |
+
)
|
| 135 |
+
parser.add_argument(
|
| 136 |
+
"--output-dir",
|
| 137 |
+
type=str,
|
| 138 |
+
default=".",
|
| 139 |
+
help="Output directory for results (default: current directory)"
|
| 140 |
+
)
|
| 141 |
+
parser.add_argument(
|
| 142 |
+
"--data-dir", "-d",
|
| 143 |
+
type=str,
|
| 144 |
+
default=".",
|
| 145 |
+
help="Directory containing input CSV files (default: current directory)"
|
| 146 |
+
)
|
| 147 |
+
parser.add_argument(
|
| 148 |
+
"--cache-dir", "-c",
|
| 149 |
+
type=str,
|
| 150 |
+
default=None,
|
| 151 |
+
help="Directory for vLLM model cache (default: vLLM default)"
|
| 152 |
+
)
|
| 153 |
+
parser.add_argument(
|
| 154 |
+
"--num-students", "-n",
|
| 155 |
+
type=int,
|
| 156 |
+
default=DEFAULT_NUM_STUDENTS,
|
| 157 |
+
help=f"Number of students to sample (default: {DEFAULT_NUM_STUDENTS}, use 0 or -1 for all students)"
|
| 158 |
+
)
|
| 159 |
+
parser.add_argument(
|
| 160 |
+
"--bin-size",
|
| 161 |
+
type=int,
|
| 162 |
+
default=DEFAULT_BIN_SIZE,
|
| 163 |
+
help=f"Size of each prediction bin (default: {DEFAULT_BIN_SIZE})"
|
| 164 |
+
)
|
| 165 |
+
parser.add_argument(
|
| 166 |
+
"--min-history",
|
| 167 |
+
type=int,
|
| 168 |
+
default=DEFAULT_MIN_HISTORY,
|
| 169 |
+
help=f"Minimum history size before making predictions (default: {DEFAULT_MIN_HISTORY})"
|
| 170 |
+
)
|
| 171 |
+
parser.add_argument(
|
| 172 |
+
"--num-gpus",
|
| 173 |
+
type=int,
|
| 174 |
+
default=1,
|
| 175 |
+
help="Number of GPUs for tensor parallelism (default: 1)"
|
| 176 |
+
)
|
| 177 |
+
parser.add_argument(
|
| 178 |
+
"--max-num-seqs",
|
| 179 |
+
type=int,
|
| 180 |
+
default=None,
|
| 181 |
+
help="Maximum number of sequences to process in a batch (vLLM, default: 256)"
|
| 182 |
+
)
|
| 183 |
+
parser.add_argument(
|
| 184 |
+
"--reasoning-level",
|
| 185 |
+
type=str,
|
| 186 |
+
choices=["none", "low", "medium", "high"],
|
| 187 |
+
default=None,
|
| 188 |
+
help="Reasoning level for GPT-OSS models only. Default: uses model config (medium for GPT-OSS, none for Qwen)"
|
| 189 |
+
)
|
| 190 |
+
parser.add_argument(
|
| 191 |
+
"--max-model-len",
|
| 192 |
+
type=int,
|
| 193 |
+
default=None,
|
| 194 |
+
help="Maximum sequence length in tokens (vLLM, default: model's context length)"
|
| 195 |
+
)
|
| 196 |
+
parser.add_argument(
|
| 197 |
+
"--gpu-memory-utilization",
|
| 198 |
+
type=float,
|
| 199 |
+
default=0.9,
|
| 200 |
+
help="Fraction of GPU memory to use (vLLM, default: 0.9, range: 0.0-1.0)"
|
| 201 |
+
)
|
| 202 |
+
parser.add_argument(
|
| 203 |
+
"--legacy-clean",
|
| 204 |
+
action="store_true",
|
| 205 |
+
default=False,
|
| 206 |
+
help="Use legacy text cleaner (cleantext.py) instead of clean_utils.py"
|
| 207 |
+
)
|
| 208 |
+
return parser.parse_args()
|
| 209 |
+
|
| 210 |
+
|
| 211 |
+
def label_answer_options(answer_string):
|
| 212 |
+
"""
|
| 213 |
+
Convert pipe-delimited answers to lettered format.
|
| 214 |
+
Input: "Han is correct || Elena is correct || Both are correct"
|
| 215 |
+
Output: {"A": "Han is correct", "B": "Elena is correct", "C": "Both are correct"}
|
| 216 |
+
"""
|
| 217 |
+
if pd.isna(answer_string) or answer_string == '':
|
| 218 |
+
return None
|
| 219 |
+
|
| 220 |
+
options = [opt.strip() for opt in answer_string.split('||')]
|
| 221 |
+
letters = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']
|
| 222 |
+
return {letters[i]: opt for i, opt in enumerate(options) if i < len(letters)}
|
| 223 |
+
|
| 224 |
+
|
| 225 |
+
def clean_html_and_normalize(text):
|
| 226 |
+
"""
|
| 227 |
+
Remove HTML tags and normalize text for comparison.
|
| 228 |
+
"""
|
| 229 |
+
if pd.isna(text):
|
| 230 |
+
return ""
|
| 231 |
+
# Remove HTML tags
|
| 232 |
+
text = re.sub(r'<[^>]+>', '', str(text))
|
| 233 |
+
# Normalize whitespace
|
| 234 |
+
text = ' '.join(text.split())
|
| 235 |
+
# Remove extra spaces around colons
|
| 236 |
+
text = re.sub(r'\s*:\s*', ':', text)
|
| 237 |
+
return text.strip()
|
| 238 |
+
|
| 239 |
+
|
| 240 |
+
def match_student_answer_to_letters(student_answer_text, answer_options_dict):
|
| 241 |
+
"""
|
| 242 |
+
Match student's comma-delineated answers to letter options.
|
| 243 |
+
|
| 244 |
+
Args:
|
| 245 |
+
student_answer_text: String like "Answer A text , Answer C text , Answer B text"
|
| 246 |
+
answer_options_dict: Dict like {"A": "Answer A text", "B": "Answer B text", ...}
|
| 247 |
+
|
| 248 |
+
Returns:
|
| 249 |
+
String like "A, B, C" or original text if no match
|
| 250 |
+
"""
|
| 251 |
+
if pd.isna(student_answer_text) or not answer_options_dict:
|
| 252 |
+
return student_answer_text
|
| 253 |
+
|
| 254 |
+
# Split by " , " (comma with spaces, which is the delimiter used in the actual_answer)
|
| 255 |
+
student_answers = [ans.strip() for ans in str(student_answer_text).split(' , ')]
|
| 256 |
+
|
| 257 |
+
# Clean and normalize all options for comparison
|
| 258 |
+
normalized_options = {
|
| 259 |
+
letter: clean_html_and_normalize(text)
|
| 260 |
+
for letter, text in answer_options_dict.items()
|
| 261 |
+
}
|
| 262 |
+
|
| 263 |
+
matched_letters = []
|
| 264 |
+
for student_ans in student_answers:
|
| 265 |
+
normalized_student = clean_html_and_normalize(student_ans)
|
| 266 |
+
|
| 267 |
+
# Try to find exact match first
|
| 268 |
+
for letter, normalized_option in normalized_options.items():
|
| 269 |
+
if normalized_student == normalized_option:
|
| 270 |
+
matched_letters.append(letter)
|
| 271 |
+
break
|
| 272 |
+
else:
|
| 273 |
+
# If no exact match, try substring match (student answer contained in option or vice versa)
|
| 274 |
+
for letter, normalized_option in normalized_options.items():
|
| 275 |
+
if (normalized_student in normalized_option or
|
| 276 |
+
normalized_option in normalized_student):
|
| 277 |
+
matched_letters.append(letter)
|
| 278 |
+
break
|
| 279 |
+
|
| 280 |
+
# Return comma-separated letters if we found matches, otherwise return original
|
| 281 |
+
if matched_letters:
|
| 282 |
+
return ', '.join(sorted(set(matched_letters))) # Remove duplicates and sort
|
| 283 |
+
return student_answer_text
|
| 284 |
+
|
| 285 |
+
|
| 286 |
+
def get_correct_option_letters(answer_options, correct_answers):
|
| 287 |
+
"""
|
| 288 |
+
Determine which letter(s) correspond to correct answer(s).
|
| 289 |
+
|
| 290 |
+
Args:
|
| 291 |
+
answer_options: Dict like {"A": "Han is correct", "B": "Elena is correct", ...}
|
| 292 |
+
correct_answers: String like "Both are correct" or "Han is correct || Elena is correct"
|
| 293 |
+
|
| 294 |
+
Returns:
|
| 295 |
+
String like "C" or "A, B" depending on how many correct options
|
| 296 |
+
"""
|
| 297 |
+
if not answer_options or pd.isna(correct_answers):
|
| 298 |
+
return correct_answers
|
| 299 |
+
|
| 300 |
+
# Split correct answers if multiple
|
| 301 |
+
correct_list = [ans.strip() for ans in correct_answers.split('||')]
|
| 302 |
+
|
| 303 |
+
# Find matching letters
|
| 304 |
+
correct_letters = []
|
| 305 |
+
for letter, text in answer_options.items():
|
| 306 |
+
if text in correct_list:
|
| 307 |
+
correct_letters.append(letter)
|
| 308 |
+
|
| 309 |
+
return ', '.join(sorted(correct_letters)) if correct_letters else correct_answers
|
| 310 |
+
|
| 311 |
+
|
| 312 |
+
def format_answer_options_for_prompt(answer_options):
|
| 313 |
+
"""
|
| 314 |
+
Format answer options dictionary for display in prompt.
|
| 315 |
+
Input: {"A": "Han is correct", "B": "Elena is correct", ...}
|
| 316 |
+
Output: "A) Han is correct\nB) Elena is correct\n..."
|
| 317 |
+
"""
|
| 318 |
+
if not answer_options:
|
| 319 |
+
return None
|
| 320 |
+
|
| 321 |
+
return '\n'.join([f"{letter}) {text}" for letter, text in answer_options.items()])
|
| 322 |
+
|
| 323 |
+
|
| 324 |
+
def create_user_prompt(student_history, new_problem, problem_df):
|
| 325 |
+
"""
|
| 326 |
+
Creates a user prompt with student history and new problem.
|
| 327 |
+
|
| 328 |
+
Args:
|
| 329 |
+
student_history: List of dicts with keys: problem_id, timestamp, problem_text,
|
| 330 |
+
correct_answer, student_answer, used_hint, saw_answer
|
| 331 |
+
new_problem: Dict with keys: problem_text, correct_answer, used_hint, saw_answer,
|
| 332 |
+
answer_options (optional)
|
| 333 |
+
"""
|
| 334 |
+
prompt = "Task Description:\n\n"
|
| 335 |
+
prompt += "Your task is to model a single student's learning process and predict how they will respond to a new mathematics problem based on their prior work.\n\n"
|
| 336 |
+
|
| 337 |
+
prompt += """You will produce three coordinated predictions:
|
| 338 |
+
|
| 339 |
+
1) **Skill-level knowledge tracing (0 or 1):** Predict whether this student has mastered the underlying skill involved in the new problem.
|
| 340 |
+
2) **Question-level knowledge tracing (0 or 1):** Predict whether this student will answer this specific problem correctly.
|
| 341 |
+
3) **Cognitive-level prediction (string):** Generate the exact answer the student would most likely produce.
|
| 342 |
+
- For Multiple Choice (select 1): Predict a single letter (e.g., "A" or "B")
|
| 343 |
+
- For Multiple Choice (select all): Predict comma-separated letters (e.g., "A, C" or "B, D")
|
| 344 |
+
- For Fill-in problems: Predict the exact text the student would write
|
| 345 |
+
"""
|
| 346 |
+
|
| 347 |
+
prompt += """---
|
| 348 |
+
|
| 349 |
+
Provided Data:
|
| 350 |
+
|
| 351 |
+
You will receive:
|
| 352 |
+
- ProblemID: <id>
|
| 353 |
+
- Timestamp: <timestamp>
|
| 354 |
+
- Problem: <problem text>
|
| 355 |
+
- Problem Type: Multiple Choice (select 1) / Multiple Choice (select all) / Fill-in Problem
|
| 356 |
+
- Options: Answer choices in format "A) ...\nB) ...\nC) ..."
|
| 357 |
+
- Correct Answer(s): The letter(s) or text of correct answer(s)
|
| 358 |
+
- Student's First Answer: Letter(s) or fill-in text
|
| 359 |
+
- UsedHint: <True/False>
|
| 360 |
+
- SawAnswer: <True/False>
|
| 361 |
+
- Skill: <skill_name_or_id>
|
| 362 |
+
- A new problem (with optional answer choices), skill metadata, and context flags (`UsedHint`, `SawAnswer`).
|
| 363 |
+
|
| 364 |
+
# About the context flags:
|
| 365 |
+
- **UsedHint = True** → The student viewed or used a hint while solving this problem.
|
| 366 |
+
- **SawAnswer = True** → The student saw the correct answer before or during the attempt.
|
| 367 |
+
When either of these flags is True, treat the corresponding response as *less reliable evidence of mastery* — it indicates that the student has not fully learned the concept and required help solving the problem.
|
| 368 |
+
"""
|
| 369 |
+
|
| 370 |
+
prompt += "**Student's Previous Problems:**\n\n"
|
| 371 |
+
for item in student_history:
|
| 372 |
+
prompt += f"Timestamp: {item['timestamp']}\n"
|
| 373 |
+
prompt += f"Problem: {item['problem_text']}\n"
|
| 374 |
+
prompt += f"Problem Type: {item['problem_type']}\n"
|
| 375 |
+
if item.get('answer_options_formatted'):
|
| 376 |
+
prompt += f"Options:\n{item['answer_options_formatted']}\n"
|
| 377 |
+
prompt += f"Correct Answer: {item['correct_answer']}\n"
|
| 378 |
+
prompt += f"Student's First Answer: {item['student_answer']}\n"
|
| 379 |
+
prompt += f"UsedHint: {item['used_hint']}\n"
|
| 380 |
+
prompt += f"SawAnswer: {item['saw_answer']}\n"
|
| 381 |
+
if item.get('node_name'):
|
| 382 |
+
prompt += f"Skill: {item['node_name']}\n"
|
| 383 |
+
else:
|
| 384 |
+
prompt += f"Skill: Undefined\n"
|
| 385 |
+
prompt += "---\n\n"
|
| 386 |
+
|
| 387 |
+
prompt += "**New Problem to Predict:**\n\n"
|
| 388 |
+
prompt += f"Timestamp: {new_problem['timestamp']}\n"
|
| 389 |
+
prompt += f"Problem: {new_problem['problem_text']}\n"
|
| 390 |
+
prompt += f"Problem Type: {new_problem['problem_type']}\n"
|
| 391 |
+
if new_problem.get('answer_options_formatted'):
|
| 392 |
+
prompt += f"Answer Options:\n{new_problem['answer_options_formatted']}\n"
|
| 393 |
+
prompt += f"Correct Answer: {new_problem['correct_answer']}\n"
|
| 394 |
+
if new_problem.get('node_name'):
|
| 395 |
+
prompt += f"Skill: {new_problem['node_name']}\n"
|
| 396 |
+
else:
|
| 397 |
+
prompt += f"Skill: Undefined\n"
|
| 398 |
+
|
| 399 |
+
return prompt
|
| 400 |
+
|
| 401 |
+
|
| 402 |
+
def extract_json_prediction(response_text):
|
| 403 |
+
"""Extract the final JSON prediction from the model's response."""
|
| 404 |
+
# Find all JSON objects in the response
|
| 405 |
+
json_matches = re.findall(r'\{[\s\S]*?\}', response_text)
|
| 406 |
+
|
| 407 |
+
if json_matches:
|
| 408 |
+
# Take the last JSON object
|
| 409 |
+
json_str = json_matches[-1]
|
| 410 |
+
try:
|
| 411 |
+
# Decode escape sequences (like \n) before parsing
|
| 412 |
+
json_str = json_str.encode().decode('unicode_escape')
|
| 413 |
+
json_str = json_str.strip()
|
| 414 |
+
return json.loads(json_str)
|
| 415 |
+
except json.JSONDecodeError as e:
|
| 416 |
+
print(f"JSON decode error: {e}")
|
| 417 |
+
print(f"Attempted to parse:\n{json_str}")
|
| 418 |
+
except Exception as e:
|
| 419 |
+
print(f"Error processing JSON: {e}")
|
| 420 |
+
return None
|
| 421 |
+
|
| 422 |
+
|
| 423 |
+
def get_prediction_id(meta):
|
| 424 |
+
"""Generate unique ID for a prediction"""
|
| 425 |
+
return f"{meta['user_id']}_{meta['bin_number']}_{meta['prediction_type']}"
|
| 426 |
+
|
| 427 |
+
|
| 428 |
+
def load_completed_predictions(output_jsonl):
|
| 429 |
+
"""Load already-completed prediction IDs from JSONL file"""
|
| 430 |
+
completed = set()
|
| 431 |
+
if os.path.exists(output_jsonl):
|
| 432 |
+
with open(output_jsonl, 'r') as f:
|
| 433 |
+
for line in f:
|
| 434 |
+
if line.strip():
|
| 435 |
+
result = json.loads(line)
|
| 436 |
+
completed.add(result['prediction_id'])
|
| 437 |
+
print(f"Loaded {len(completed)} completed predictions from {output_jsonl}")
|
| 438 |
+
return completed
|
| 439 |
+
|
| 440 |
+
|
| 441 |
+
def make_process_single_user(system_prompt):
|
| 442 |
+
"""Create a process_single_user function with the given system prompt."""
|
| 443 |
+
def process_single_user(args):
|
| 444 |
+
"""Process a single user's data and return prompts and metadata."""
|
| 445 |
+
user_id, user_records, min_history, bin_size = args
|
| 446 |
+
|
| 447 |
+
prompts = []
|
| 448 |
+
metadata = []
|
| 449 |
+
|
| 450 |
+
# Check if user has at least min_history + 1 rows
|
| 451 |
+
if len(user_records) < min_history + 1:
|
| 452 |
+
return prompts, metadata
|
| 453 |
+
|
| 454 |
+
num_bins = (len(user_records) - min_history) // bin_size
|
| 455 |
+
|
| 456 |
+
# Build initial history
|
| 457 |
+
student_history = []
|
| 458 |
+
for hist_idx in range(min_history):
|
| 459 |
+
row = user_records[hist_idx]
|
| 460 |
+
student_history.append({
|
| 461 |
+
'problem_id': row['problem_id'],
|
| 462 |
+
'timestamp': row['end_time'],
|
| 463 |
+
'problem_text': row['cleaned body'],
|
| 464 |
+
'correct_answer': row['Fill-in Answers'],
|
| 465 |
+
'answer_options': row['answer_options'] if pd.notna(row['answer_options']) else None,
|
| 466 |
+
'answer_options_formatted': row['answer_options_formatted'] if pd.notna(row.get('answer_options_formatted')) else None,
|
| 467 |
+
'student_answer': row['answer_text'],
|
| 468 |
+
'used_hint': row['hint_count'] > 0,
|
| 469 |
+
'saw_answer': row['saw_answer'],
|
| 470 |
+
'problem_type': row['Problem Type'],
|
| 471 |
+
'node_name': row.get('node_name')
|
| 472 |
+
})
|
| 473 |
+
|
| 474 |
+
for bin_idx in range(num_bins):
|
| 475 |
+
# Extend history with previous bin's items
|
| 476 |
+
if bin_idx > 0:
|
| 477 |
+
prev_bin_start = min_history + ((bin_idx - 1) * bin_size)
|
| 478 |
+
prev_bin_end = min_history + (bin_idx * bin_size)
|
| 479 |
+
for hist_idx in range(prev_bin_start, prev_bin_end):
|
| 480 |
+
row = user_records[hist_idx]
|
| 481 |
+
student_history.append({
|
| 482 |
+
'problem_id': row['problem_id'],
|
| 483 |
+
'timestamp': row['end_time'],
|
| 484 |
+
'problem_text': row['cleaned body'],
|
| 485 |
+
'correct_answer': row['Fill-in Answers'],
|
| 486 |
+
'answer_options': row['answer_options'] if pd.notna(row['answer_options']) else None,
|
| 487 |
+
'answer_options_formatted': row['answer_options_formatted'] if pd.notna(row.get('answer_options_formatted')) else None,
|
| 488 |
+
'student_answer': row['answer_text'],
|
| 489 |
+
'used_hint': row['hint_count'] > 0,
|
| 490 |
+
'saw_answer': row['saw_answer'],
|
| 491 |
+
'problem_type': row['Problem Type'],
|
| 492 |
+
'node_name': row.get('node_name')
|
| 493 |
+
})
|
| 494 |
+
|
| 495 |
+
history_end = min_history + (bin_idx * bin_size)
|
| 496 |
+
bin_start = history_end
|
| 497 |
+
bin_end = bin_start + bin_size
|
| 498 |
+
current_bin = user_records[bin_start:bin_end]
|
| 499 |
+
|
| 500 |
+
# Find first correct and first incorrect in this bin
|
| 501 |
+
first_correct_idx = None
|
| 502 |
+
first_incorrect_idx = None
|
| 503 |
+
|
| 504 |
+
for idx, row in enumerate(current_bin):
|
| 505 |
+
actual_idx = bin_start + idx
|
| 506 |
+
score = row['discrete_score']
|
| 507 |
+
|
| 508 |
+
if score == 1 and first_correct_idx is None:
|
| 509 |
+
first_correct_idx = actual_idx
|
| 510 |
+
if score == 0 and first_incorrect_idx is None:
|
| 511 |
+
first_incorrect_idx = actual_idx
|
| 512 |
+
|
| 513 |
+
if first_correct_idx is not None and first_incorrect_idx is not None:
|
| 514 |
+
break
|
| 515 |
+
|
| 516 |
+
# Create predictions for found cases
|
| 517 |
+
for target_idx, prediction_type in [
|
| 518 |
+
(first_correct_idx, 'correct'),
|
| 519 |
+
(first_incorrect_idx, 'incorrect')
|
| 520 |
+
]:
|
| 521 |
+
if target_idx is None:
|
| 522 |
+
continue
|
| 523 |
+
|
| 524 |
+
target_row = user_records[target_idx]
|
| 525 |
+
new_problem = {
|
| 526 |
+
'problem_text': target_row['cleaned body'],
|
| 527 |
+
'correct_answer': target_row['Fill-in Answers'],
|
| 528 |
+
'answer_options': target_row['answer_options'] if pd.notna(target_row['answer_options']) else None,
|
| 529 |
+
'answer_options_formatted': target_row['answer_options_formatted'] if pd.notna(target_row.get('answer_options_formatted')) else None,
|
| 530 |
+
'problem_type': target_row['Problem Type'],
|
| 531 |
+
'timestamp': target_row['end_time'],
|
| 532 |
+
'node_name': target_row.get('node_name')
|
| 533 |
+
}
|
| 534 |
+
|
| 535 |
+
user_prompt = create_user_prompt(student_history, new_problem, None)
|
| 536 |
+
full_prompt = system_prompt + "\n\n" + user_prompt
|
| 537 |
+
|
| 538 |
+
prompts.append(full_prompt)
|
| 539 |
+
metadata.append({
|
| 540 |
+
'prediction_id': f"{user_id}_{bin_idx}_{prediction_type}",
|
| 541 |
+
'row_index': target_idx,
|
| 542 |
+
'user_id': user_id,
|
| 543 |
+
'history_size': len(student_history),
|
| 544 |
+
'bin_number': bin_idx,
|
| 545 |
+
'prediction_type': prediction_type,
|
| 546 |
+
'id': target_row.get('id_x', None),
|
| 547 |
+
'problem_id': target_row.get('problem_id', None),
|
| 548 |
+
'problem_type': target_row['Problem Type'],
|
| 549 |
+
'actual_answer': target_row['answer_text'],
|
| 550 |
+
'correct_answer': target_row['Fill-in Answers'],
|
| 551 |
+
'actual_score': target_row['discrete_score'],
|
| 552 |
+
'prompt': full_prompt
|
| 553 |
+
})
|
| 554 |
+
|
| 555 |
+
return prompts, metadata
|
| 556 |
+
|
| 557 |
+
return process_single_user
|
| 558 |
+
|
| 559 |
+
|
| 560 |
+
def append_results_jsonl(results, output_jsonl):
|
| 561 |
+
"""Append batch results to JSONL file"""
|
| 562 |
+
with open(output_jsonl, 'a') as f:
|
| 563 |
+
for result in results:
|
| 564 |
+
f.write(json.dumps(result, cls=NumpyEncoder) + '\n')
|
| 565 |
+
|
| 566 |
+
|
| 567 |
+
def process_batch(batch_metadata, batch_response_texts):
|
| 568 |
+
"""Process a batch of responses and return results."""
|
| 569 |
+
batch_results = []
|
| 570 |
+
|
| 571 |
+
for metadata, response_text in zip(batch_metadata, batch_response_texts):
|
| 572 |
+
# Extract prediction
|
| 573 |
+
prediction = extract_json_prediction(response_text)
|
| 574 |
+
|
| 575 |
+
if prediction:
|
| 576 |
+
batch_results.append({
|
| 577 |
+
**metadata,
|
| 578 |
+
'predicted_skill_level': prediction.get('skill_level'),
|
| 579 |
+
'predicted_question_level': prediction.get('question_level'),
|
| 580 |
+
'predicted_student_answer': prediction.get('student_answer'),
|
| 581 |
+
'full_response': response_text
|
| 582 |
+
})
|
| 583 |
+
else:
|
| 584 |
+
batch_results.append({
|
| 585 |
+
**metadata,
|
| 586 |
+
'predicted_skill_level': None,
|
| 587 |
+
'predicted_question_level': None,
|
| 588 |
+
'predicted_student_answer': None,
|
| 589 |
+
'full_response': response_text
|
| 590 |
+
})
|
| 591 |
+
|
| 592 |
+
return batch_results
|
| 593 |
+
|
| 594 |
+
|
| 595 |
+
# Global variable to hold process_single_user function for multiprocessing
|
| 596 |
+
_process_single_user_func = None
|
| 597 |
+
|
| 598 |
+
|
| 599 |
+
def _process_single_user_wrapper(args):
|
| 600 |
+
"""Wrapper for multiprocessing that uses the global function."""
|
| 601 |
+
return _process_single_user_func(args)
|
| 602 |
+
|
| 603 |
+
|
| 604 |
+
def run_inference(config):
|
| 605 |
+
"""
|
| 606 |
+
Main inference function that runs KT prediction with the given model config.
|
| 607 |
+
|
| 608 |
+
Args:
|
| 609 |
+
config: Dict with keys:
|
| 610 |
+
- model_id: HuggingFace model ID
|
| 611 |
+
- gen_configs: Dict of generation parameters
|
| 612 |
+
- output_prefix: Prefix for output filename
|
| 613 |
+
- system_prompt_prefix: Optional prefix for system prompt (e.g., "Reasoning: medium\n\n")
|
| 614 |
+
"""
|
| 615 |
+
global _process_single_user_func
|
| 616 |
+
|
| 617 |
+
model_id = config["model_id"]
|
| 618 |
+
gen_configs = config["gen_configs"]
|
| 619 |
+
output_prefix = config["output_prefix"]
|
| 620 |
+
|
| 621 |
+
# Parse arguments first (needed for reasoning level)
|
| 622 |
+
default_output_jsonl = f"{output_prefix}.jsonl"
|
| 623 |
+
args = parse_args(default_output_jsonl)
|
| 624 |
+
|
| 625 |
+
# Determine system prompt prefix
|
| 626 |
+
# CLI --reasoning-level overrides model config if provided
|
| 627 |
+
if args.reasoning_level is not None:
|
| 628 |
+
if args.reasoning_level == "none":
|
| 629 |
+
system_prompt_prefix = ""
|
| 630 |
+
else:
|
| 631 |
+
system_prompt_prefix = f"Reasoning: {args.reasoning_level}\n\n"
|
| 632 |
+
else:
|
| 633 |
+
system_prompt_prefix = config.get("system_prompt_prefix", "")
|
| 634 |
+
|
| 635 |
+
# Build full system prompt
|
| 636 |
+
system_prompt = system_prompt_prefix + BASE_SYSTEM_PROMPT
|
| 637 |
+
|
| 638 |
+
# Create the process_single_user function with this system prompt
|
| 639 |
+
_process_single_user_func = make_process_single_user(system_prompt)
|
| 640 |
+
|
| 641 |
+
batch_size = args.batch_size
|
| 642 |
+
data_dir = args.data_dir
|
| 643 |
+
cache_dir = args.cache_dir
|
| 644 |
+
num_students = args.num_students
|
| 645 |
+
bin_size = args.bin_size
|
| 646 |
+
min_history = args.min_history
|
| 647 |
+
|
| 648 |
+
# Generate output filename with params
|
| 649 |
+
n_str = "all" if num_students <= 0 else str(num_students)
|
| 650 |
+
params_suffix = f"_n{n_str}_bin{bin_size}_hist{min_history}"
|
| 651 |
+
|
| 652 |
+
if args.output:
|
| 653 |
+
# Use explicit output path
|
| 654 |
+
output_jsonl = args.output
|
| 655 |
+
else:
|
| 656 |
+
# Auto-generate filename in output directory
|
| 657 |
+
filename = f"{output_prefix}{params_suffix}.jsonl"
|
| 658 |
+
output_jsonl = os.path.join(args.output_dir, filename)
|
| 659 |
+
|
| 660 |
+
# Build input file paths
|
| 661 |
+
student_csv = os.path.join(data_dir, STUDENT_FILE)
|
| 662 |
+
problems_csv = os.path.join(data_dir, PROBLEMS_FILE)
|
| 663 |
+
skill_csv = os.path.join(data_dir, SKILL_FILE)
|
| 664 |
+
|
| 665 |
+
print(f"Model: {model_id}")
|
| 666 |
+
print(f"Data directory: {data_dir}")
|
| 667 |
+
print(f"Batch size: {batch_size}")
|
| 668 |
+
print(f"Output JSONL: {output_jsonl}")
|
| 669 |
+
print(f"Num students: {num_students if num_students > 0 else 'all'}")
|
| 670 |
+
print(f"Bin size: {bin_size}")
|
| 671 |
+
print(f"Min history: {min_history}")
|
| 672 |
+
if cache_dir:
|
| 673 |
+
print(f"Model cache: {cache_dir}")
|
| 674 |
+
print(f"Text cleaner: {'legacy (cleantext.py)' if args.legacy_clean else 'default (clean_utils.py)'}")
|
| 675 |
+
|
| 676 |
+
# Load the data
|
| 677 |
+
print("\nLoading data...")
|
| 678 |
+
student_df = pd.read_csv(student_csv)
|
| 679 |
+
student_df = student_df.sort_values(['user_id', 'id']).reset_index(drop=True)
|
| 680 |
+
problems_df = pd.read_csv(problems_csv)
|
| 681 |
+
clean_func = clean_text_legacy if args.legacy_clean else clean_problem_body
|
| 682 |
+
problems_df['cleaned body'] = problems_df['Problem Body'].apply(clean_func)
|
| 683 |
+
|
| 684 |
+
# Label answer options for multiple-choice items
|
| 685 |
+
problems_df['answer_options'] = problems_df['Multiple Choice Options'].apply(label_answer_options)
|
| 686 |
+
|
| 687 |
+
# Get correct answer letters for multiple-choice, keep original for fill-in
|
| 688 |
+
problems_df['correct_answers'] = problems_df.apply(
|
| 689 |
+
lambda row: get_correct_option_letters(row['answer_options'], row['Multiple Choice Answers'])
|
| 690 |
+
if row['Problem Type'] in ['Multiple Choice (select 1)', 'Multiple Choice (select all)']
|
| 691 |
+
else row['Fill-in Answers'],
|
| 692 |
+
axis=1
|
| 693 |
+
)
|
| 694 |
+
|
| 695 |
+
skill_df = pd.read_csv(skill_csv)
|
| 696 |
+
problems_df = pd.merge(problems_df, skill_df, on='problem_id', how='left')
|
| 697 |
+
|
| 698 |
+
# Pre-compute formatted answer options once per problem
|
| 699 |
+
problems_df['answer_options_formatted'] = problems_df['answer_options'].apply(
|
| 700 |
+
lambda x: format_answer_options_for_prompt(x) if pd.notna(x) else None
|
| 701 |
+
)
|
| 702 |
+
|
| 703 |
+
# Sort student data by id (chronological order)
|
| 704 |
+
student_df = student_df.sort_values('id').reset_index(drop=True)
|
| 705 |
+
|
| 706 |
+
# Merge with problems data
|
| 707 |
+
merged_df = student_df.merge(problems_df, on='problem_id', how='inner')
|
| 708 |
+
|
| 709 |
+
# Convert student answers to letter format for multiple-choice problems
|
| 710 |
+
merged_df['answer_text'] = merged_df.apply(
|
| 711 |
+
lambda row: match_student_answer_to_letters(row['answer_text'], row['answer_options'])
|
| 712 |
+
if row['Problem Type'] in ['Multiple Choice (select 1)', 'Multiple Choice (select all)'] and pd.notna(row['answer_options'])
|
| 713 |
+
else row['answer_text'],
|
| 714 |
+
axis=1
|
| 715 |
+
)
|
| 716 |
+
|
| 717 |
+
# Select users (all or random sample)
|
| 718 |
+
all_users = merged_df['user_id'].unique()
|
| 719 |
+
if num_students <= 0:
|
| 720 |
+
# Use all students
|
| 721 |
+
selected_users = all_users
|
| 722 |
+
print(f"\nUsing all {len(all_users)} users")
|
| 723 |
+
else:
|
| 724 |
+
# Random sample
|
| 725 |
+
np.random.seed(42) # For reproducibility
|
| 726 |
+
selected_users = np.random.choice(all_users, size=min(num_students, len(all_users)), replace=False)
|
| 727 |
+
merged_df = merged_df[merged_df['user_id'].isin(selected_users)]
|
| 728 |
+
print(f"\nSelected {len(selected_users)} random users from {len(all_users)} total users")
|
| 729 |
+
print(f"Filtered data: {len(merged_df)} rows")
|
| 730 |
+
|
| 731 |
+
# Prepare data for batch processing
|
| 732 |
+
print("\nPreparing prompts in parallel...")
|
| 733 |
+
|
| 734 |
+
# Prepare user groups for parallel processing
|
| 735 |
+
print("Grouping user data...")
|
| 736 |
+
user_groups = [
|
| 737 |
+
(user_id, user_df.to_dict('records'), min_history, bin_size)
|
| 738 |
+
for user_id, user_df in merged_df.groupby('user_id')
|
| 739 |
+
]
|
| 740 |
+
print(f"Processing {len(user_groups)} users with {cpu_count()} CPU cores...")
|
| 741 |
+
|
| 742 |
+
# Process users in parallel
|
| 743 |
+
all_prompts = []
|
| 744 |
+
all_metadata = []
|
| 745 |
+
|
| 746 |
+
with Pool(processes=cpu_count()) as pool:
|
| 747 |
+
results = list(tqdm(
|
| 748 |
+
pool.imap(_process_single_user_wrapper, user_groups),
|
| 749 |
+
total=len(user_groups),
|
| 750 |
+
desc="Preparing prompts"
|
| 751 |
+
))
|
| 752 |
+
|
| 753 |
+
# Merge results
|
| 754 |
+
for prompts, metadata in results:
|
| 755 |
+
all_prompts.extend(prompts)
|
| 756 |
+
all_metadata.extend(metadata)
|
| 757 |
+
|
| 758 |
+
print(f"\nTotal predictions to make: {len(all_prompts)}")
|
| 759 |
+
|
| 760 |
+
# Filter out already-completed predictions (resume support)
|
| 761 |
+
completed_ids = load_completed_predictions(output_jsonl)
|
| 762 |
+
remaining = [(p, m) for p, m in zip(all_prompts, all_metadata)
|
| 763 |
+
if m['prediction_id'] not in completed_ids]
|
| 764 |
+
|
| 765 |
+
if not remaining:
|
| 766 |
+
print("All predictions already completed!")
|
| 767 |
+
return
|
| 768 |
+
|
| 769 |
+
all_prompts, all_metadata = zip(*remaining)
|
| 770 |
+
all_prompts = list(all_prompts)
|
| 771 |
+
all_metadata = list(all_metadata)
|
| 772 |
+
|
| 773 |
+
print(f"Already completed: {len(completed_ids)}")
|
| 774 |
+
print(f"Remaining to process: {len(all_prompts)}")
|
| 775 |
+
print(f"Processing in batches of {batch_size}")
|
| 776 |
+
|
| 777 |
+
# Initialize vLLM engine
|
| 778 |
+
print("\nInitializing vLLM engine...")
|
| 779 |
+
sampling_params = SamplingParams(**gen_configs)
|
| 780 |
+
llm_kwargs = {
|
| 781 |
+
"model": model_id,
|
| 782 |
+
"tensor_parallel_size": args.num_gpus,
|
| 783 |
+
"trust_remote_code": True,
|
| 784 |
+
"gpu_memory_utilization": args.gpu_memory_utilization,
|
| 785 |
+
"enable_prefix_caching": True,
|
| 786 |
+
}
|
| 787 |
+
if args.max_num_seqs is not None:
|
| 788 |
+
llm_kwargs["max_num_seqs"] = args.max_num_seqs
|
| 789 |
+
if args.max_model_len is not None:
|
| 790 |
+
llm_kwargs["max_model_len"] = args.max_model_len
|
| 791 |
+
if cache_dir:
|
| 792 |
+
llm_kwargs["download_dir"] = cache_dir
|
| 793 |
+
llm = LLM(**llm_kwargs)
|
| 794 |
+
|
| 795 |
+
# Process in batches
|
| 796 |
+
results = []
|
| 797 |
+
num_batches = (len(all_prompts) + batch_size - 1) // batch_size
|
| 798 |
+
|
| 799 |
+
for batch_idx in range(num_batches):
|
| 800 |
+
batch_start = batch_idx * batch_size
|
| 801 |
+
batch_end = min(batch_start + batch_size, len(all_prompts))
|
| 802 |
+
|
| 803 |
+
batch_prompts = all_prompts[batch_start:batch_end]
|
| 804 |
+
batch_metadata = all_metadata[batch_start:batch_end]
|
| 805 |
+
|
| 806 |
+
print(f"\n{'='*80}")
|
| 807 |
+
print(f"Processing batch {batch_idx + 1}/{num_batches}")
|
| 808 |
+
print(f"Items: {batch_start} to {batch_end} ({len(batch_prompts)} prompts)")
|
| 809 |
+
print(f"{'='*80}")
|
| 810 |
+
|
| 811 |
+
# Generate predictions for this batch
|
| 812 |
+
try:
|
| 813 |
+
outputs = llm.generate(batch_prompts, sampling_params)
|
| 814 |
+
response_texts = [o.outputs[0].text.strip() for o in outputs]
|
| 815 |
+
|
| 816 |
+
# Process results for this batch
|
| 817 |
+
batch_results = process_batch(batch_metadata, response_texts)
|
| 818 |
+
results.extend(batch_results)
|
| 819 |
+
|
| 820 |
+
print(f"Successfully processed batch {batch_idx + 1}")
|
| 821 |
+
print(f"Total results so far: {len(results)}")
|
| 822 |
+
|
| 823 |
+
# Append results immediately after each batch
|
| 824 |
+
append_results_jsonl(batch_results, output_jsonl)
|
| 825 |
+
print(f"Saved {len(batch_results)} results to {output_jsonl}")
|
| 826 |
+
|
| 827 |
+
except Exception as e:
|
| 828 |
+
print(f"\nERROR processing batch {batch_idx + 1}: {str(e)}")
|
| 829 |
+
print(f"Progress saved in {output_jsonl} - restart to resume")
|
| 830 |
+
raise
|
| 831 |
+
|
| 832 |
+
print(f"\n{'='*80}")
|
| 833 |
+
print("All batches processed successfully!")
|
| 834 |
+
print(f"{'='*80}")
|
| 835 |
+
print(f"\nAll results saved to {output_jsonl}")
|
| 836 |
+
print(f"Total predictions processed: {len(results)}")
|
| 837 |
+
|
| 838 |
+
# Cleanup
|
| 839 |
+
print("\nCleaning up...")
|
| 840 |
+
destroy_model_parallel()
|
| 841 |
+
destroy_distributed_environment()
|
| 842 |
+
del llm
|
| 843 |
+
with contextlib.suppress(AssertionError):
|
| 844 |
+
torch.distributed.destroy_process_group()
|
| 845 |
+
gc.collect()
|
| 846 |
+
torch.cuda.empty_cache()
|
| 847 |
+
|
| 848 |
+
print("\nDone!")
|
| 849 |
+
exit(0)
|
Code/llama33_70b_instruct_vllm.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Knowledge Tracing inference with Llama-3.3-70B-Instruct model.
|
| 3 |
+
|
| 4 |
+
Usage:
|
| 5 |
+
CUDA_VISIBLE_DEVICES=0,1,2,3 python llama33_70b_instruct_vllm.py \
|
| 6 |
+
--data-dir foundationalktdataset/ \
|
| 7 |
+
--num-gpus 4 \
|
| 8 |
+
--batch-size 10 \
|
| 9 |
+
--cache-dir /data1/ \
|
| 10 |
+
--num-students 500 \
|
| 11 |
+
--bin-size 50 \
|
| 12 |
+
--min-history 50
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
from kt_inference_base import run_inference
|
| 16 |
+
|
| 17 |
+
MODEL_CONFIG = {
|
| 18 |
+
"model_id": "meta-llama/Llama-3.3-70B-Instruct",
|
| 19 |
+
"gen_configs": {
|
| 20 |
+
"temperature": 0.7,
|
| 21 |
+
"top_p": 0.9,
|
| 22 |
+
"max_tokens": 32768,
|
| 23 |
+
"repetition_penalty": 1.0,
|
| 24 |
+
},
|
| 25 |
+
"output_prefix": "llama33_70b_instruct",
|
| 26 |
+
"system_prompt_prefix": "", # No prefix - standard instruct model
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
if __name__ == "__main__":
|
| 30 |
+
run_inference(MODEL_CONFIG)
|
Code/plot_student_attempt_distribution.py
ADDED
|
@@ -0,0 +1,357 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""Plot distribution of student attempts over elapsed time.
|
| 3 |
+
|
| 4 |
+
This script reads FoundationalASSIST `Interactions.csv`, computes elapsed time
|
| 5 |
+
for each attempt from the student's first attempt, groups attempts into fixed
|
| 6 |
+
time bins, and plots the resulting column distribution.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
from __future__ import annotations
|
| 10 |
+
|
| 11 |
+
import argparse
|
| 12 |
+
import math
|
| 13 |
+
from pathlib import Path
|
| 14 |
+
|
| 15 |
+
import matplotlib.pyplot as plt
|
| 16 |
+
import pandas as pd
|
| 17 |
+
from matplotlib.ticker import FuncFormatter, MaxNLocator
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
DEFAULT_INTERACTIONS_PATH = (
|
| 21 |
+
Path(__file__).resolve().parent.parent / "Data" / "Interactions.csv"
|
| 22 |
+
)
|
| 23 |
+
DEFAULT_OUTPUT_PLOT = (
|
| 24 |
+
Path(__file__).resolve().parent.parent
|
| 25 |
+
/ "Results"
|
| 26 |
+
/ "student_attempt_distribution.png"
|
| 27 |
+
)
|
| 28 |
+
DEFAULT_OUTPUT_COUNTS = (
|
| 29 |
+
Path(__file__).resolve().parent.parent
|
| 30 |
+
/ "Results"
|
| 31 |
+
/ "student_attempt_distribution_counts.csv"
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def parse_args() -> argparse.Namespace:
|
| 36 |
+
parser = argparse.ArgumentParser(
|
| 37 |
+
description=(
|
| 38 |
+
"Compute distribution of attempts over elapsed time from "
|
| 39 |
+
"Interactions.csv and plot binned columns."
|
| 40 |
+
)
|
| 41 |
+
)
|
| 42 |
+
parser.add_argument(
|
| 43 |
+
"--interactions-path",
|
| 44 |
+
type=Path,
|
| 45 |
+
default=DEFAULT_INTERACTIONS_PATH,
|
| 46 |
+
help="Path to Interactions.csv.",
|
| 47 |
+
)
|
| 48 |
+
parser.add_argument(
|
| 49 |
+
"--output-plot",
|
| 50 |
+
type=Path,
|
| 51 |
+
default=DEFAULT_OUTPUT_PLOT,
|
| 52 |
+
help="Path to save the output figure.",
|
| 53 |
+
)
|
| 54 |
+
parser.add_argument(
|
| 55 |
+
"--output-counts",
|
| 56 |
+
type=Path,
|
| 57 |
+
default=DEFAULT_OUTPUT_COUNTS,
|
| 58 |
+
help="Path to save binned attempt counts as CSV.",
|
| 59 |
+
)
|
| 60 |
+
parser.add_argument(
|
| 61 |
+
"--max-rows",
|
| 62 |
+
type=int,
|
| 63 |
+
default=None,
|
| 64 |
+
help="Optional cap on rows after sorting (for quick debugging).",
|
| 65 |
+
)
|
| 66 |
+
parser.add_argument(
|
| 67 |
+
"--bin-time",
|
| 68 |
+
type=float,
|
| 69 |
+
default=10.0,
|
| 70 |
+
help=(
|
| 71 |
+
"Fixed bin width in minutes. "
|
| 72 |
+
"For example, --bin-time 10 creates bins [0,10), [10,20), ..."
|
| 73 |
+
),
|
| 74 |
+
)
|
| 75 |
+
parser.add_argument(
|
| 76 |
+
"--plot-upper-limit-minutes",
|
| 77 |
+
type=float,
|
| 78 |
+
default=None,
|
| 79 |
+
help=(
|
| 80 |
+
"Optional upper limit for x-axis in minutes. "
|
| 81 |
+
"If omitted, uses the full range implied by bins."
|
| 82 |
+
),
|
| 83 |
+
)
|
| 84 |
+
parser.add_argument(
|
| 85 |
+
"--student-idx",
|
| 86 |
+
type=int,
|
| 87 |
+
default=None,
|
| 88 |
+
help=(
|
| 89 |
+
"Optional 0-based index of student to plot. Index is based on "
|
| 90 |
+
"sorted unique user_id values in the loaded interactions."
|
| 91 |
+
),
|
| 92 |
+
)
|
| 93 |
+
parser.add_argument(
|
| 94 |
+
"--log-y",
|
| 95 |
+
action="store_true",
|
| 96 |
+
help="Use log scale on y-axis.",
|
| 97 |
+
)
|
| 98 |
+
return parser.parse_args()
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def load_interactions(path: Path, max_rows: int | None = None) -> pd.DataFrame:
|
| 102 |
+
"""Load fields required for student attempt timing analysis."""
|
| 103 |
+
usecols = ["id", "user_id", "end_time"]
|
| 104 |
+
df = pd.read_csv(path, usecols=usecols, low_memory=False)
|
| 105 |
+
|
| 106 |
+
df["id"] = pd.to_numeric(df["id"], errors="coerce")
|
| 107 |
+
df["id"] = df["id"].fillna(-1).astype(int)
|
| 108 |
+
df["user_id"] = df["user_id"].astype("string")
|
| 109 |
+
df["end_time"] = pd.to_datetime(df["end_time"], errors="coerce", utc=True)
|
| 110 |
+
|
| 111 |
+
df = df.dropna(subset=["user_id", "end_time"]).copy()
|
| 112 |
+
df = df.sort_values(["user_id", "end_time", "id"], kind="mergesort")
|
| 113 |
+
|
| 114 |
+
if max_rows is not None:
|
| 115 |
+
if max_rows <= 0:
|
| 116 |
+
raise ValueError("--max-rows must be a positive integer.")
|
| 117 |
+
df = df.head(max_rows).copy()
|
| 118 |
+
|
| 119 |
+
return df
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
def select_student_by_index(
|
| 123 |
+
df: pd.DataFrame,
|
| 124 |
+
student_idx: int,
|
| 125 |
+
) -> tuple[pd.DataFrame, str, int]:
|
| 126 |
+
"""Select one student's interactions by 0-based index over unique IDs."""
|
| 127 |
+
student_ids = df["user_id"].drop_duplicates().tolist()
|
| 128 |
+
total_students = len(student_ids)
|
| 129 |
+
|
| 130 |
+
if total_students == 0:
|
| 131 |
+
raise ValueError("No students found in loaded interactions.")
|
| 132 |
+
if student_idx < 0 or student_idx >= total_students:
|
| 133 |
+
raise ValueError(
|
| 134 |
+
f"--student-idx must be in [0, {total_students - 1}], got {student_idx}."
|
| 135 |
+
)
|
| 136 |
+
|
| 137 |
+
selected_student_id = str(student_ids[student_idx])
|
| 138 |
+
selected_df = df[df["user_id"] == selected_student_id].copy()
|
| 139 |
+
return selected_df, selected_student_id, total_students
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
def append_student_id_to_output_path(path: Path, student_id: str) -> Path:
|
| 143 |
+
"""Append a safe student-id suffix to output filename."""
|
| 144 |
+
safe_id = "".join(
|
| 145 |
+
ch if ch.isalnum() or ch in ("-", "_") else "_" for ch in student_id
|
| 146 |
+
)
|
| 147 |
+
return path.with_name(f"{path.stem}_{safe_id}{path.suffix}")
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
def compute_attempt_elapsed_minutes(df: pd.DataFrame) -> pd.Series:
|
| 151 |
+
"""Compute elapsed minutes of each attempt from student's first attempt."""
|
| 152 |
+
first_times = df.groupby("user_id", sort=False)["end_time"].transform("min")
|
| 153 |
+
elapsed_minutes = (df["end_time"] - first_times).dt.total_seconds() / 60.0
|
| 154 |
+
elapsed_minutes.name = "elapsed_minutes"
|
| 155 |
+
return elapsed_minutes
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
def build_fixed_width_bin_edges_minutes(
|
| 159 |
+
valid_elapsed_minutes: pd.Series,
|
| 160 |
+
bin_time_minutes: float,
|
| 161 |
+
) -> list[float]:
|
| 162 |
+
"""Build fixed-width bin edges from min/max elapsed minutes."""
|
| 163 |
+
min_elapsed = float(valid_elapsed_minutes.min())
|
| 164 |
+
max_elapsed = float(valid_elapsed_minutes.max())
|
| 165 |
+
|
| 166 |
+
start = bin_time_minutes * math.floor(min_elapsed / bin_time_minutes)
|
| 167 |
+
end = bin_time_minutes * math.ceil(max_elapsed / bin_time_minutes)
|
| 168 |
+
|
| 169 |
+
if math.isclose(start, 0.0, abs_tol=1e-12):
|
| 170 |
+
start = 0.0
|
| 171 |
+
if math.isclose(end, start, abs_tol=1e-12):
|
| 172 |
+
end = start + bin_time_minutes
|
| 173 |
+
|
| 174 |
+
n_bins = int(round((end - start) / bin_time_minutes))
|
| 175 |
+
edges = [start + i * bin_time_minutes for i in range(n_bins + 1)]
|
| 176 |
+
if edges[-1] <= max_elapsed:
|
| 177 |
+
edges.append(edges[-1] + bin_time_minutes)
|
| 178 |
+
|
| 179 |
+
return edges
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
def format_minutes_tick(value: float, _pos: float) -> str:
|
| 183 |
+
if value < 60:
|
| 184 |
+
return f"{value:.0f}m"
|
| 185 |
+
if value < 1440:
|
| 186 |
+
return f"{value / 60:.0f}h"
|
| 187 |
+
return f"{value / 1440:.0f}d"
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
def summarize_attempt_distribution(
|
| 191 |
+
elapsed_minutes: pd.Series,
|
| 192 |
+
bin_time_minutes: float,
|
| 193 |
+
) -> pd.DataFrame:
|
| 194 |
+
valid = elapsed_minutes.dropna().copy()
|
| 195 |
+
if valid.empty:
|
| 196 |
+
raise ValueError("No valid elapsed attempt times found.")
|
| 197 |
+
|
| 198 |
+
edges = build_fixed_width_bin_edges_minutes(valid, bin_time_minutes)
|
| 199 |
+
binned = pd.cut(valid, bins=edges, right=False, include_lowest=True)
|
| 200 |
+
counts = binned.value_counts(sort=False)
|
| 201 |
+
total_attempts = int(counts.sum())
|
| 202 |
+
probabilities = (counts / total_attempts).astype(float)
|
| 203 |
+
|
| 204 |
+
bin_left = pd.Series(edges[:-1], dtype=float)
|
| 205 |
+
bin_right = pd.Series(edges[1:], dtype=float)
|
| 206 |
+
bin_width = bin_right - bin_left
|
| 207 |
+
|
| 208 |
+
summary = pd.DataFrame(
|
| 209 |
+
{
|
| 210 |
+
"bin_left_min": bin_left.to_numpy(),
|
| 211 |
+
"bin_right_min": bin_right.to_numpy(),
|
| 212 |
+
"bin_width_min": bin_width.to_numpy(),
|
| 213 |
+
"attempt_count": counts.to_numpy(),
|
| 214 |
+
"probability": probabilities.to_numpy(dtype=float),
|
| 215 |
+
"percentage": probabilities.to_numpy(dtype=float) * 100.0,
|
| 216 |
+
}
|
| 217 |
+
)
|
| 218 |
+
return summary
|
| 219 |
+
|
| 220 |
+
|
| 221 |
+
def plot_distribution(
|
| 222 |
+
summary_df: pd.DataFrame,
|
| 223 |
+
output_path: Path,
|
| 224 |
+
log_y: bool = False,
|
| 225 |
+
plot_upper_limit_minutes: float | None = None,
|
| 226 |
+
student_idx: int | None = None,
|
| 227 |
+
) -> None:
|
| 228 |
+
"""Create and save student-attempt distribution columns."""
|
| 229 |
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
| 230 |
+
|
| 231 |
+
plt.style.use("seaborn-v0_8-whitegrid")
|
| 232 |
+
if student_idx is not None:
|
| 233 |
+
fig, ax = plt.subplots(figsize=(10, 5))
|
| 234 |
+
else:
|
| 235 |
+
fig, ax = plt.subplots(figsize=(20, 5))
|
| 236 |
+
|
| 237 |
+
left = summary_df["bin_left_min"].to_numpy(dtype=float)
|
| 238 |
+
width = summary_df["bin_width_min"].to_numpy(dtype=float)
|
| 239 |
+
counts = summary_df["attempt_count"].to_numpy(dtype=float)
|
| 240 |
+
|
| 241 |
+
bars = ax.bar(
|
| 242 |
+
left,
|
| 243 |
+
counts,
|
| 244 |
+
width=width,
|
| 245 |
+
align="edge",
|
| 246 |
+
color="#4C78A8",
|
| 247 |
+
# edgecolor="white",
|
| 248 |
+
# linewidth=1.0,
|
| 249 |
+
)
|
| 250 |
+
|
| 251 |
+
title = "Distribution of Student Attempts Over Elapsed Time"
|
| 252 |
+
if student_idx is not None:
|
| 253 |
+
title = f"{title} (student_idx={student_idx})"
|
| 254 |
+
ax.set_title(title)
|
| 255 |
+
ax.set_xlabel("Elapsed Time Since Student's First Attempt")
|
| 256 |
+
ax.set_ylabel("Number of Attempts")
|
| 257 |
+
|
| 258 |
+
x_min = float(left.min())
|
| 259 |
+
x_max = float((left + width).max())
|
| 260 |
+
if plot_upper_limit_minutes is not None:
|
| 261 |
+
x_max = min(x_max, float(plot_upper_limit_minutes))
|
| 262 |
+
ax.set_xlim(x_min, x_max)
|
| 263 |
+
|
| 264 |
+
ax.xaxis.set_major_locator(MaxNLocator(nbins=9))
|
| 265 |
+
ax.xaxis.set_major_formatter(FuncFormatter(format_minutes_tick))
|
| 266 |
+
ax.grid(axis="y", alpha=0.25, linewidth=0.8)
|
| 267 |
+
ax.spines["top"].set_visible(False)
|
| 268 |
+
ax.spines["right"].set_visible(False)
|
| 269 |
+
|
| 270 |
+
if log_y:
|
| 271 |
+
ax.set_yscale("log")
|
| 272 |
+
|
| 273 |
+
annotate_bars = len(summary_df) <= 40
|
| 274 |
+
if annotate_bars:
|
| 275 |
+
for bar, pct in zip(bars, summary_df["percentage"]):
|
| 276 |
+
if pct < 1.0:
|
| 277 |
+
continue
|
| 278 |
+
h = bar.get_height()
|
| 279 |
+
if h <= 0:
|
| 280 |
+
continue
|
| 281 |
+
ax.annotate(
|
| 282 |
+
f"{pct:.1f}%",
|
| 283 |
+
xy=(bar.get_x() + bar.get_width() / 2.0, h),
|
| 284 |
+
xytext=(0, 3),
|
| 285 |
+
textcoords="offset points",
|
| 286 |
+
ha="center",
|
| 287 |
+
va="bottom",
|
| 288 |
+
fontsize=8,
|
| 289 |
+
)
|
| 290 |
+
|
| 291 |
+
plt.tight_layout()
|
| 292 |
+
fig.savefig(output_path, dpi=400, bbox_inches="tight")
|
| 293 |
+
plt.close(fig)
|
| 294 |
+
|
| 295 |
+
|
| 296 |
+
def main() -> None:
|
| 297 |
+
args = parse_args()
|
| 298 |
+
|
| 299 |
+
if not args.interactions_path.exists():
|
| 300 |
+
raise FileNotFoundError(
|
| 301 |
+
f"Interactions file not found: {args.interactions_path}"
|
| 302 |
+
)
|
| 303 |
+
if args.bin_time <= 0:
|
| 304 |
+
raise ValueError("--bin-time must be a positive number.")
|
| 305 |
+
if args.plot_upper_limit_minutes is not None and args.plot_upper_limit_minutes <= 0:
|
| 306 |
+
raise ValueError("--plot-upper-limit-minutes must be a positive number.")
|
| 307 |
+
|
| 308 |
+
df = load_interactions(args.interactions_path, max_rows=args.max_rows)
|
| 309 |
+
|
| 310 |
+
selected_student_id: str | None = None
|
| 311 |
+
total_students = int(df["user_id"].nunique())
|
| 312 |
+
if args.student_idx is not None:
|
| 313 |
+
df, selected_student_id, total_students = select_student_by_index(
|
| 314 |
+
df,
|
| 315 |
+
args.student_idx,
|
| 316 |
+
)
|
| 317 |
+
|
| 318 |
+
output_plot_path = args.output_plot
|
| 319 |
+
output_counts_path = args.output_counts
|
| 320 |
+
if selected_student_id is not None:
|
| 321 |
+
output_plot_path = append_student_id_to_output_path(
|
| 322 |
+
output_plot_path,
|
| 323 |
+
selected_student_id,
|
| 324 |
+
)
|
| 325 |
+
output_counts_path = append_student_id_to_output_path(
|
| 326 |
+
output_counts_path,
|
| 327 |
+
selected_student_id,
|
| 328 |
+
)
|
| 329 |
+
|
| 330 |
+
elapsed_minutes = compute_attempt_elapsed_minutes(df)
|
| 331 |
+
summary = summarize_attempt_distribution(elapsed_minutes, args.bin_time)
|
| 332 |
+
output_counts_path.parent.mkdir(parents=True, exist_ok=True)
|
| 333 |
+
summary.to_csv(output_counts_path, index=False)
|
| 334 |
+
|
| 335 |
+
plot_distribution(
|
| 336 |
+
summary,
|
| 337 |
+
output_plot_path,
|
| 338 |
+
log_y=args.log_y,
|
| 339 |
+
plot_upper_limit_minutes=args.plot_upper_limit_minutes,
|
| 340 |
+
student_idx=args.student_idx,
|
| 341 |
+
)
|
| 342 |
+
|
| 343 |
+
total_attempts = int(summary["attempt_count"].sum())
|
| 344 |
+
print("Done.")
|
| 345 |
+
print(f"Interactions loaded: {len(df):,}")
|
| 346 |
+
print(f"Students in loaded data: {total_students:,}")
|
| 347 |
+
if selected_student_id is not None:
|
| 348 |
+
print(f"Selected student idx: {args.student_idx}")
|
| 349 |
+
print(f"Selected student id: {selected_student_id}")
|
| 350 |
+
print(f"Attempts used: {total_attempts:,}")
|
| 351 |
+
print(f"Bin width (min): {args.bin_time}")
|
| 352 |
+
print(f"Saved plot: {output_plot_path}")
|
| 353 |
+
print(f"Saved bin counts: {output_counts_path}")
|
| 354 |
+
|
| 355 |
+
|
| 356 |
+
if __name__ == "__main__":
|
| 357 |
+
main()
|
Code/plot_timegap_distribution.py
ADDED
|
@@ -0,0 +1,484 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""Plot the distribution of time gaps between consecutive student attempts.
|
| 3 |
+
|
| 4 |
+
This script reads FoundationalASSIST `Interactions.csv`, groups interactions by
|
| 5 |
+
student (`user_id`), computes the time difference between each pair of
|
| 6 |
+
consecutive attempts (`end_time`), discretizes these differences into bins, and
|
| 7 |
+
plots the resulting distribution.
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
from __future__ import annotations
|
| 11 |
+
|
| 12 |
+
import argparse
|
| 13 |
+
import math
|
| 14 |
+
from pathlib import Path
|
| 15 |
+
|
| 16 |
+
import matplotlib.pyplot as plt
|
| 17 |
+
import pandas as pd
|
| 18 |
+
from matplotlib.ticker import FuncFormatter, MaxNLocator
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
DEFAULT_INTERACTIONS_PATH = (
|
| 22 |
+
Path(__file__).resolve().parent.parent / "Data" / "Interactions.csv"
|
| 23 |
+
)
|
| 24 |
+
DEFAULT_OUTPUT_PLOT = (
|
| 25 |
+
Path(__file__).resolve().parent.parent / "Results" / "time_gap_distribution.png"
|
| 26 |
+
)
|
| 27 |
+
DEFAULT_OUTPUT_COUNTS = (
|
| 28 |
+
Path(__file__).resolve().parent.parent
|
| 29 |
+
/ "Results"
|
| 30 |
+
/ "time_gap_distribution_counts.csv"
|
| 31 |
+
)
|
| 32 |
+
CDF_MARKER_MINUTES = 60.0
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def parse_args() -> argparse.Namespace:
|
| 36 |
+
parser = argparse.ArgumentParser(
|
| 37 |
+
description=(
|
| 38 |
+
"Compute per-student consecutive-attempt time gaps from "
|
| 39 |
+
"Interactions.csv and plot their binned distribution."
|
| 40 |
+
)
|
| 41 |
+
)
|
| 42 |
+
parser.add_argument(
|
| 43 |
+
"--interactions-path",
|
| 44 |
+
type=Path,
|
| 45 |
+
default=DEFAULT_INTERACTIONS_PATH,
|
| 46 |
+
help="Path to Interactions.csv.",
|
| 47 |
+
)
|
| 48 |
+
parser.add_argument(
|
| 49 |
+
"--output-plot",
|
| 50 |
+
type=Path,
|
| 51 |
+
default=DEFAULT_OUTPUT_PLOT,
|
| 52 |
+
help="Path to save the output figure.",
|
| 53 |
+
)
|
| 54 |
+
parser.add_argument(
|
| 55 |
+
"--output-counts",
|
| 56 |
+
type=Path,
|
| 57 |
+
default=DEFAULT_OUTPUT_COUNTS,
|
| 58 |
+
help="Path to save bin counts as CSV.",
|
| 59 |
+
)
|
| 60 |
+
parser.add_argument(
|
| 61 |
+
"--max-rows",
|
| 62 |
+
type=int,
|
| 63 |
+
default=None,
|
| 64 |
+
help="Optional cap on rows after sorting (for quick debugging).",
|
| 65 |
+
)
|
| 66 |
+
parser.add_argument(
|
| 67 |
+
"--keep-nonpositive-gaps",
|
| 68 |
+
action="store_true",
|
| 69 |
+
help=(
|
| 70 |
+
"Keep zero/negative gaps. By default, only strictly positive "
|
| 71 |
+
"gaps are used."
|
| 72 |
+
),
|
| 73 |
+
)
|
| 74 |
+
parser.add_argument(
|
| 75 |
+
"--log-y",
|
| 76 |
+
action="store_true",
|
| 77 |
+
help="Use log scale on y-axis.",
|
| 78 |
+
)
|
| 79 |
+
parser.add_argument(
|
| 80 |
+
"--plot-upper-limit-minutes",
|
| 81 |
+
type=float,
|
| 82 |
+
default=None,
|
| 83 |
+
help=(
|
| 84 |
+
"Optional upper limit for x-axis in minutes. "
|
| 85 |
+
"If omitted, uses the full range implied by bins."
|
| 86 |
+
),
|
| 87 |
+
)
|
| 88 |
+
parser.add_argument(
|
| 89 |
+
"--bin-time",
|
| 90 |
+
type=float,
|
| 91 |
+
default=None,
|
| 92 |
+
help=(
|
| 93 |
+
"Optional fixed bin width in minutes. "
|
| 94 |
+
"For example, --bin-time 10 creates bins [0,10), [10,20), ..."
|
| 95 |
+
),
|
| 96 |
+
)
|
| 97 |
+
parser.add_argument(
|
| 98 |
+
"--student-idx",
|
| 99 |
+
type=int,
|
| 100 |
+
default=None,
|
| 101 |
+
help=(
|
| 102 |
+
"Optional 0-based index of student to plot. Index is based on "
|
| 103 |
+
"sorted unique user_id values in the loaded interactions."
|
| 104 |
+
),
|
| 105 |
+
)
|
| 106 |
+
return parser.parse_args()
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
def load_interactions(path: Path, max_rows: int | None = None) -> pd.DataFrame:
|
| 110 |
+
"""Load the minimum columns required for time-gap analysis."""
|
| 111 |
+
usecols = ["id", "user_id", "end_time"]
|
| 112 |
+
df = pd.read_csv(path, usecols=usecols, low_memory=False)
|
| 113 |
+
|
| 114 |
+
df["id"] = pd.to_numeric(df["id"], errors="coerce")
|
| 115 |
+
df["id"] = df["id"].fillna(-1).astype(int)
|
| 116 |
+
df["user_id"] = df["user_id"].astype("string")
|
| 117 |
+
df["end_time"] = pd.to_datetime(df["end_time"], errors="coerce", utc=True)
|
| 118 |
+
|
| 119 |
+
df = df.dropna(subset=["user_id", "end_time"]).copy()
|
| 120 |
+
df = df.sort_values(["user_id", "end_time", "id"], kind="mergesort")
|
| 121 |
+
|
| 122 |
+
if max_rows is not None:
|
| 123 |
+
if max_rows <= 0:
|
| 124 |
+
raise ValueError("--max-rows must be a positive integer.")
|
| 125 |
+
df = df.head(max_rows).copy()
|
| 126 |
+
|
| 127 |
+
return df
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
def compute_time_gaps_minutes(df: pd.DataFrame) -> pd.Series:
|
| 131 |
+
"""Compute consecutive attempt gaps per student in minutes."""
|
| 132 |
+
gaps_seconds = (
|
| 133 |
+
df.groupby("user_id", sort=False)["end_time"].diff().dt.total_seconds()
|
| 134 |
+
)
|
| 135 |
+
return gaps_seconds / 60.0
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
def default_bin_edges_minutes() -> list[float]:
|
| 139 |
+
"""Base minute-scale edges (final open tail may be added from data max)."""
|
| 140 |
+
return [
|
| 141 |
+
0.0,
|
| 142 |
+
1.0,
|
| 143 |
+
5.0,
|
| 144 |
+
10.0,
|
| 145 |
+
30.0,
|
| 146 |
+
60.0,
|
| 147 |
+
180.0,
|
| 148 |
+
720.0,
|
| 149 |
+
1440.0,
|
| 150 |
+
4320.0,
|
| 151 |
+
10080.0,
|
| 152 |
+
]
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
def build_bin_edges_minutes(valid: pd.Series) -> list[float]:
|
| 156 |
+
"""Build finite plotting edges so bar widths are proportional on x-axis."""
|
| 157 |
+
edges = default_bin_edges_minutes()
|
| 158 |
+
base_tail_start = edges[-1]
|
| 159 |
+
max_gap = float(valid.max())
|
| 160 |
+
|
| 161 |
+
if max_gap > base_tail_start:
|
| 162 |
+
# Add a finite terminal edge that fully contains the data tail.
|
| 163 |
+
tail_edge = max(base_tail_start + 60.0, max_gap * 1.05)
|
| 164 |
+
edges.append(tail_edge)
|
| 165 |
+
|
| 166 |
+
return edges
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
def build_fixed_width_bin_edges_minutes(
|
| 170 |
+
valid: pd.Series, bin_time_minutes: float
|
| 171 |
+
) -> list[float]:
|
| 172 |
+
"""Build fixed-width edges from min/max observed gaps."""
|
| 173 |
+
min_gap = float(valid.min())
|
| 174 |
+
max_gap = float(valid.max())
|
| 175 |
+
|
| 176 |
+
start = bin_time_minutes * math.floor(min_gap / bin_time_minutes)
|
| 177 |
+
end = bin_time_minutes * math.ceil(max_gap / bin_time_minutes)
|
| 178 |
+
|
| 179 |
+
if math.isclose(start, 0.0, abs_tol=1e-12):
|
| 180 |
+
start = 0.0
|
| 181 |
+
if math.isclose(end, start, abs_tol=1e-12):
|
| 182 |
+
end = start + bin_time_minutes
|
| 183 |
+
|
| 184 |
+
n_bins = int(round((end - start) / bin_time_minutes))
|
| 185 |
+
edges = [start + i * bin_time_minutes for i in range(n_bins + 1)]
|
| 186 |
+
if edges[-1] <= max_gap:
|
| 187 |
+
edges.append(edges[-1] + bin_time_minutes)
|
| 188 |
+
|
| 189 |
+
return edges
|
| 190 |
+
|
| 191 |
+
|
| 192 |
+
def format_bin_bound(minutes: float) -> str:
|
| 193 |
+
if math.isclose(minutes, round(minutes), abs_tol=1e-9):
|
| 194 |
+
return str(int(round(minutes)))
|
| 195 |
+
return f"{minutes:.2f}".rstrip("0").rstrip(".")
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
def make_bin_labels(
|
| 199 |
+
edges: list[float], open_tail_from: float | None = None
|
| 200 |
+
) -> list[str]:
|
| 201 |
+
labels: list[str] = []
|
| 202 |
+
last_idx = len(edges) - 2
|
| 203 |
+
for idx, (left, right) in enumerate(zip(edges[:-1], edges[1:])):
|
| 204 |
+
if open_tail_from is not None and idx == last_idx and left >= open_tail_from:
|
| 205 |
+
labels.append(f">= {format_bin_bound(left)} min")
|
| 206 |
+
else:
|
| 207 |
+
labels.append(f"[{format_bin_bound(left)}, {format_bin_bound(right)}) min")
|
| 208 |
+
return labels
|
| 209 |
+
|
| 210 |
+
|
| 211 |
+
def format_minutes_tick(value: float, _pos: float) -> str:
|
| 212 |
+
if value < 60:
|
| 213 |
+
return f"{int(round(value))}m"
|
| 214 |
+
|
| 215 |
+
if value < 1440:
|
| 216 |
+
hours = value / 60.0
|
| 217 |
+
if math.isclose(hours, round(hours), abs_tol=1e-9):
|
| 218 |
+
return f"{int(round(hours))}h"
|
| 219 |
+
return f"{hours:.1f}h"
|
| 220 |
+
|
| 221 |
+
days = value / 1440.0
|
| 222 |
+
if math.isclose(days, round(days), abs_tol=1e-9):
|
| 223 |
+
return f"{int(round(days))}d"
|
| 224 |
+
return f"{days:.1f}d"
|
| 225 |
+
|
| 226 |
+
|
| 227 |
+
def summarize_binned_distribution(
|
| 228 |
+
gaps_minutes: pd.Series,
|
| 229 |
+
keep_nonpositive: bool,
|
| 230 |
+
bin_time_minutes: float | None = None,
|
| 231 |
+
) -> pd.DataFrame:
|
| 232 |
+
valid = filter_valid_gaps(gaps_minutes, keep_nonpositive)
|
| 233 |
+
|
| 234 |
+
if valid.empty:
|
| 235 |
+
raise ValueError("No valid time gaps found after filtering.")
|
| 236 |
+
|
| 237 |
+
if bin_time_minutes is not None:
|
| 238 |
+
edges = build_fixed_width_bin_edges_minutes(valid, bin_time_minutes)
|
| 239 |
+
open_tail_from = None
|
| 240 |
+
else:
|
| 241 |
+
base_edges = default_bin_edges_minutes()
|
| 242 |
+
edges = build_bin_edges_minutes(valid)
|
| 243 |
+
open_tail_from = base_edges[-1] if len(edges) > len(base_edges) else None
|
| 244 |
+
|
| 245 |
+
labels = make_bin_labels(edges, open_tail_from=open_tail_from)
|
| 246 |
+
binned = pd.cut(valid, bins=edges, labels=labels, right=False, include_lowest=True)
|
| 247 |
+
|
| 248 |
+
counts = binned.value_counts(sort=False)
|
| 249 |
+
probabilities = (counts / counts.sum()).astype(float)
|
| 250 |
+
|
| 251 |
+
bin_left = pd.Series(edges[:-1], dtype=float)
|
| 252 |
+
bin_right = pd.Series(edges[1:], dtype=float)
|
| 253 |
+
bin_width = bin_right - bin_left
|
| 254 |
+
probabilities_np = probabilities.to_numpy(dtype=float)
|
| 255 |
+
density_per_min = probabilities_np / bin_width.to_numpy(dtype=float)
|
| 256 |
+
|
| 257 |
+
summary = pd.DataFrame(
|
| 258 |
+
{
|
| 259 |
+
"bin": counts.index.astype(str),
|
| 260 |
+
"bin_left_min": bin_left.to_numpy(),
|
| 261 |
+
"bin_right_min": bin_right.to_numpy(),
|
| 262 |
+
"bin_width_min": bin_width.to_numpy(),
|
| 263 |
+
"count": counts.values,
|
| 264 |
+
"probability": probabilities_np,
|
| 265 |
+
"percentage": probabilities_np * 100.0,
|
| 266 |
+
"density_per_min": density_per_min,
|
| 267 |
+
}
|
| 268 |
+
)
|
| 269 |
+
return summary
|
| 270 |
+
|
| 271 |
+
|
| 272 |
+
def filter_valid_gaps(gaps_minutes: pd.Series, keep_nonpositive: bool) -> pd.Series:
|
| 273 |
+
valid = gaps_minutes.dropna().copy()
|
| 274 |
+
if not keep_nonpositive:
|
| 275 |
+
valid = valid[valid > 0]
|
| 276 |
+
return valid
|
| 277 |
+
|
| 278 |
+
|
| 279 |
+
def cumulative_probability_at_minutes(
|
| 280 |
+
gaps_minutes: pd.Series,
|
| 281 |
+
threshold_minutes: float,
|
| 282 |
+
keep_nonpositive: bool,
|
| 283 |
+
) -> float:
|
| 284 |
+
valid = filter_valid_gaps(gaps_minutes, keep_nonpositive)
|
| 285 |
+
if valid.empty:
|
| 286 |
+
raise ValueError("No valid time gaps found after filtering.")
|
| 287 |
+
return float((valid <= threshold_minutes).mean())
|
| 288 |
+
|
| 289 |
+
|
| 290 |
+
def select_student_by_index(
|
| 291 |
+
df: pd.DataFrame,
|
| 292 |
+
student_idx: int,
|
| 293 |
+
) -> tuple[pd.DataFrame, str, int]:
|
| 294 |
+
"""Select one student's interactions by 0-based index over unique IDs."""
|
| 295 |
+
student_ids = df["user_id"].drop_duplicates().tolist()
|
| 296 |
+
total_students = len(student_ids)
|
| 297 |
+
|
| 298 |
+
if total_students == 0:
|
| 299 |
+
raise ValueError("No students found in loaded interactions.")
|
| 300 |
+
if student_idx < 0 or student_idx >= total_students:
|
| 301 |
+
raise ValueError(
|
| 302 |
+
f"--student-idx must be in [0, {total_students - 1}], got {student_idx}."
|
| 303 |
+
)
|
| 304 |
+
|
| 305 |
+
selected_student_id = str(student_ids[student_idx])
|
| 306 |
+
selected_df = df[df["user_id"] == selected_student_id].copy()
|
| 307 |
+
return selected_df, selected_student_id, total_students
|
| 308 |
+
|
| 309 |
+
|
| 310 |
+
def append_student_id_to_output_path(path: Path, student_id: str) -> Path:
|
| 311 |
+
"""Append a safe student-id suffix to output filename."""
|
| 312 |
+
safe_id = "".join(
|
| 313 |
+
ch if ch.isalnum() or ch in ("-", "_") else "_" for ch in student_id
|
| 314 |
+
)
|
| 315 |
+
return path.with_name(f"{path.stem}_{safe_id}{path.suffix}")
|
| 316 |
+
|
| 317 |
+
|
| 318 |
+
def plot_distribution(
|
| 319 |
+
summary_df: pd.DataFrame,
|
| 320 |
+
output_path: Path,
|
| 321 |
+
log_y: bool = False,
|
| 322 |
+
plot_upper_limit_minutes: float | None = None,
|
| 323 |
+
cdf_marker_minutes: float = CDF_MARKER_MINUTES,
|
| 324 |
+
cdf_at_marker: float | None = None,
|
| 325 |
+
student_idx: int | None = None,
|
| 326 |
+
) -> None:
|
| 327 |
+
"""Create and save a publication-ready distribution histogram."""
|
| 328 |
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
| 329 |
+
|
| 330 |
+
plt.style.use("seaborn-v0_8-whitegrid")
|
| 331 |
+
|
| 332 |
+
if student_idx is not None:
|
| 333 |
+
fig, ax = plt.subplots(figsize=(10, 5))
|
| 334 |
+
else:
|
| 335 |
+
fig, ax = plt.subplots(figsize=(20, 5))
|
| 336 |
+
|
| 337 |
+
left = summary_df["bin_left_min"].to_numpy(dtype=float)
|
| 338 |
+
width = summary_df["bin_width_min"].to_numpy(dtype=float)
|
| 339 |
+
height = summary_df["density_per_min"].to_numpy(dtype=float)
|
| 340 |
+
|
| 341 |
+
bars = ax.bar(
|
| 342 |
+
left,
|
| 343 |
+
height,
|
| 344 |
+
width=width,
|
| 345 |
+
align="edge",
|
| 346 |
+
color="#4C78A8",
|
| 347 |
+
# edgecolor="white",
|
| 348 |
+
# linewidth=1.0,
|
| 349 |
+
)
|
| 350 |
+
|
| 351 |
+
title = "Distribution of Time Gaps Between Consecutive Attempts"
|
| 352 |
+
if student_idx is not None:
|
| 353 |
+
title = f"{title} (student_idx={student_idx})"
|
| 354 |
+
ax.set_title(title)
|
| 355 |
+
ax.set_xlabel("Time Gap")
|
| 356 |
+
ax.set_ylabel("Probability Density (1/min)")
|
| 357 |
+
x_min = float(left.min())
|
| 358 |
+
x_max = float((left + width).max())
|
| 359 |
+
if plot_upper_limit_minutes is not None:
|
| 360 |
+
x_max = min(x_max, float(plot_upper_limit_minutes))
|
| 361 |
+
ax.set_xlim(x_min, x_max)
|
| 362 |
+
ax.xaxis.set_major_locator(MaxNLocator(nbins=9))
|
| 363 |
+
ax.xaxis.set_major_formatter(FuncFormatter(format_minutes_tick))
|
| 364 |
+
ax.grid(axis="y", alpha=0.25, linewidth=0.8)
|
| 365 |
+
ax.spines["top"].set_visible(False)
|
| 366 |
+
ax.spines["right"].set_visible(False)
|
| 367 |
+
|
| 368 |
+
if log_y:
|
| 369 |
+
ax.set_yscale("log")
|
| 370 |
+
|
| 371 |
+
marker_label = f"CDF <= {int(cdf_marker_minutes)} min"
|
| 372 |
+
if cdf_at_marker is not None:
|
| 373 |
+
marker_label = f"{marker_label}: {cdf_at_marker * 100:.1f}%"
|
| 374 |
+
ax.axvline(
|
| 375 |
+
cdf_marker_minutes,
|
| 376 |
+
color="#E45756",
|
| 377 |
+
linestyle="--",
|
| 378 |
+
linewidth=1.6,
|
| 379 |
+
label=marker_label,
|
| 380 |
+
)
|
| 381 |
+
ax.legend(loc="upper right", frameon=False, fontsize=9)
|
| 382 |
+
|
| 383 |
+
# Label non-trivial bins for readability in papers.
|
| 384 |
+
for bar, pct in zip(bars, summary_df["percentage"]):
|
| 385 |
+
if pct < 1.0:
|
| 386 |
+
continue
|
| 387 |
+
height = bar.get_height()
|
| 388 |
+
if height <= 0:
|
| 389 |
+
continue
|
| 390 |
+
ax.annotate(
|
| 391 |
+
f"{pct:.1f}%",
|
| 392 |
+
xy=(bar.get_x() + bar.get_width() / 2.0, height),
|
| 393 |
+
xytext=(0, 3),
|
| 394 |
+
textcoords="offset points",
|
| 395 |
+
ha="center",
|
| 396 |
+
va="bottom",
|
| 397 |
+
fontsize=8,
|
| 398 |
+
)
|
| 399 |
+
|
| 400 |
+
plt.tight_layout()
|
| 401 |
+
fig.savefig(output_path, dpi=400, bbox_inches="tight")
|
| 402 |
+
plt.close(fig)
|
| 403 |
+
|
| 404 |
+
|
| 405 |
+
def main() -> None:
|
| 406 |
+
args = parse_args()
|
| 407 |
+
|
| 408 |
+
if not args.interactions_path.exists():
|
| 409 |
+
raise FileNotFoundError(
|
| 410 |
+
f"Interactions file not found: {args.interactions_path}"
|
| 411 |
+
)
|
| 412 |
+
|
| 413 |
+
df = load_interactions(args.interactions_path, max_rows=args.max_rows)
|
| 414 |
+
selected_student_id: str | None = None
|
| 415 |
+
total_students = int(df["user_id"].nunique())
|
| 416 |
+
if args.student_idx is not None:
|
| 417 |
+
df, selected_student_id, total_students = select_student_by_index(
|
| 418 |
+
df,
|
| 419 |
+
args.student_idx,
|
| 420 |
+
)
|
| 421 |
+
|
| 422 |
+
output_plot_path = args.output_plot
|
| 423 |
+
output_counts_path = args.output_counts
|
| 424 |
+
if selected_student_id is not None:
|
| 425 |
+
output_plot_path = append_student_id_to_output_path(
|
| 426 |
+
output_plot_path,
|
| 427 |
+
selected_student_id,
|
| 428 |
+
)
|
| 429 |
+
output_counts_path = append_student_id_to_output_path(
|
| 430 |
+
output_counts_path,
|
| 431 |
+
selected_student_id,
|
| 432 |
+
)
|
| 433 |
+
|
| 434 |
+
gaps_minutes = compute_time_gaps_minutes(df)
|
| 435 |
+
|
| 436 |
+
if args.plot_upper_limit_minutes is not None and args.plot_upper_limit_minutes <= 0:
|
| 437 |
+
raise ValueError("--plot-upper-limit-minutes must be a positive number.")
|
| 438 |
+
if args.bin_time is not None and args.bin_time <= 0:
|
| 439 |
+
raise ValueError("--bin-time must be a positive number.")
|
| 440 |
+
|
| 441 |
+
summary = summarize_binned_distribution(
|
| 442 |
+
gaps_minutes,
|
| 443 |
+
keep_nonpositive=args.keep_nonpositive_gaps,
|
| 444 |
+
bin_time_minutes=args.bin_time,
|
| 445 |
+
)
|
| 446 |
+
output_counts_path.parent.mkdir(parents=True, exist_ok=True)
|
| 447 |
+
summary.to_csv(output_counts_path, index=False)
|
| 448 |
+
|
| 449 |
+
cdf_at_marker = cumulative_probability_at_minutes(
|
| 450 |
+
gaps_minutes=gaps_minutes,
|
| 451 |
+
threshold_minutes=CDF_MARKER_MINUTES,
|
| 452 |
+
keep_nonpositive=args.keep_nonpositive_gaps,
|
| 453 |
+
)
|
| 454 |
+
|
| 455 |
+
plot_distribution(
|
| 456 |
+
summary,
|
| 457 |
+
output_plot_path,
|
| 458 |
+
log_y=args.log_y,
|
| 459 |
+
plot_upper_limit_minutes=args.plot_upper_limit_minutes,
|
| 460 |
+
cdf_marker_minutes=CDF_MARKER_MINUTES,
|
| 461 |
+
cdf_at_marker=cdf_at_marker,
|
| 462 |
+
student_idx=args.student_idx,
|
| 463 |
+
)
|
| 464 |
+
|
| 465 |
+
total_pairs = int(summary["count"].sum())
|
| 466 |
+
print("Done.")
|
| 467 |
+
print(f"Interactions loaded: {len(df):,}")
|
| 468 |
+
print(f"Students in loaded data: {total_students:,}")
|
| 469 |
+
if selected_student_id is not None:
|
| 470 |
+
print(f"Selected student idx: {args.student_idx}")
|
| 471 |
+
print(f"Selected student id: {selected_student_id}")
|
| 472 |
+
print(f"Consecutive attempt pairs used: {total_pairs:,}")
|
| 473 |
+
if args.bin_time is not None:
|
| 474 |
+
print(f"Bin width (min): {args.bin_time}")
|
| 475 |
+
print(
|
| 476 |
+
f"Cumulative P(gap <= {int(CDF_MARKER_MINUTES)} min): "
|
| 477 |
+
f"{cdf_at_marker * 100:.2f}%"
|
| 478 |
+
)
|
| 479 |
+
print(f"Saved plot: {output_plot_path}")
|
| 480 |
+
print(f"Saved bin counts: {output_counts_path}")
|
| 481 |
+
|
| 482 |
+
|
| 483 |
+
if __name__ == "__main__":
|
| 484 |
+
main()
|
Code/plot_totaltime_distribution.py
ADDED
|
@@ -0,0 +1,433 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""Plot distribution of per-student total time from first to last attempt.
|
| 3 |
+
|
| 4 |
+
This script reads FoundationalASSIST `Interactions.csv`, groups interactions by
|
| 5 |
+
student (`user_id`), computes each student's total time span from first to last
|
| 6 |
+
recorded attempt (`end_time`), discretizes these totals into bins, and plots
|
| 7 |
+
the resulting distribution.
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
from __future__ import annotations
|
| 11 |
+
|
| 12 |
+
import argparse
|
| 13 |
+
import math
|
| 14 |
+
from pathlib import Path
|
| 15 |
+
|
| 16 |
+
import matplotlib.pyplot as plt
|
| 17 |
+
import pandas as pd
|
| 18 |
+
from matplotlib.ticker import FuncFormatter, MaxNLocator
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
DEFAULT_INTERACTIONS_PATH = (
|
| 22 |
+
Path(__file__).resolve().parent.parent / "Data" / "Interactions.csv"
|
| 23 |
+
)
|
| 24 |
+
DEFAULT_OUTPUT_PLOT = (
|
| 25 |
+
Path(__file__).resolve().parent.parent / "Results" / "total_time_distribution.png"
|
| 26 |
+
)
|
| 27 |
+
DEFAULT_OUTPUT_COUNTS = (
|
| 28 |
+
Path(__file__).resolve().parent.parent
|
| 29 |
+
/ "Results"
|
| 30 |
+
/ "total_time_distribution_counts.csv"
|
| 31 |
+
)
|
| 32 |
+
DEFAULT_CDF_MARKER_MINUTES = 1051200.0
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def parse_args() -> argparse.Namespace:
|
| 36 |
+
parser = argparse.ArgumentParser(
|
| 37 |
+
description=(
|
| 38 |
+
"Compute per-student total time (first to last attempt) from "
|
| 39 |
+
"Interactions.csv and plot the binned distribution."
|
| 40 |
+
)
|
| 41 |
+
)
|
| 42 |
+
parser.add_argument(
|
| 43 |
+
"--interactions-path",
|
| 44 |
+
type=Path,
|
| 45 |
+
default=DEFAULT_INTERACTIONS_PATH,
|
| 46 |
+
help="Path to Interactions.csv.",
|
| 47 |
+
)
|
| 48 |
+
parser.add_argument(
|
| 49 |
+
"--output-plot",
|
| 50 |
+
type=Path,
|
| 51 |
+
default=DEFAULT_OUTPUT_PLOT,
|
| 52 |
+
help="Path to save the output figure.",
|
| 53 |
+
)
|
| 54 |
+
parser.add_argument(
|
| 55 |
+
"--output-counts",
|
| 56 |
+
type=Path,
|
| 57 |
+
default=DEFAULT_OUTPUT_COUNTS,
|
| 58 |
+
help="Path to save bin counts as CSV.",
|
| 59 |
+
)
|
| 60 |
+
parser.add_argument(
|
| 61 |
+
"--max-rows",
|
| 62 |
+
type=int,
|
| 63 |
+
default=None,
|
| 64 |
+
help="Optional cap on rows after sorting (for quick debugging).",
|
| 65 |
+
)
|
| 66 |
+
parser.add_argument(
|
| 67 |
+
"--keep-nonpositive-total-times",
|
| 68 |
+
action="store_true",
|
| 69 |
+
help=(
|
| 70 |
+
"Keep zero/negative total times. By default, only strictly "
|
| 71 |
+
"positive total times are used."
|
| 72 |
+
),
|
| 73 |
+
)
|
| 74 |
+
parser.add_argument(
|
| 75 |
+
"--log-y",
|
| 76 |
+
action="store_true",
|
| 77 |
+
help="Use log scale on y-axis.",
|
| 78 |
+
)
|
| 79 |
+
parser.add_argument(
|
| 80 |
+
"--plot-upper-limit-minutes",
|
| 81 |
+
type=float,
|
| 82 |
+
default=None,
|
| 83 |
+
help=(
|
| 84 |
+
"Optional upper limit for x-axis in minutes. "
|
| 85 |
+
"If omitted, uses the full range implied by bins."
|
| 86 |
+
),
|
| 87 |
+
)
|
| 88 |
+
parser.add_argument(
|
| 89 |
+
"--bin-time",
|
| 90 |
+
type=float,
|
| 91 |
+
default=None,
|
| 92 |
+
help=(
|
| 93 |
+
"Optional fixed bin width in minutes. "
|
| 94 |
+
"For example, --bin-time 60 creates hourly bins."
|
| 95 |
+
),
|
| 96 |
+
)
|
| 97 |
+
parser.add_argument(
|
| 98 |
+
"--cdf-marker-minutes",
|
| 99 |
+
type=float,
|
| 100 |
+
default=DEFAULT_CDF_MARKER_MINUTES,
|
| 101 |
+
help="Threshold (in minutes) for plotting cumulative probability marker.",
|
| 102 |
+
)
|
| 103 |
+
return parser.parse_args()
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
def load_interactions(path: Path, max_rows: int | None = None) -> pd.DataFrame:
|
| 107 |
+
"""Load minimum interaction fields required for timing analysis."""
|
| 108 |
+
usecols = ["id", "user_id", "end_time"]
|
| 109 |
+
df = pd.read_csv(path, usecols=usecols, low_memory=False)
|
| 110 |
+
|
| 111 |
+
df["id"] = pd.to_numeric(df["id"], errors="coerce")
|
| 112 |
+
df["id"] = df["id"].fillna(-1).astype(int)
|
| 113 |
+
df["user_id"] = df["user_id"].astype("string")
|
| 114 |
+
df["end_time"] = pd.to_datetime(df["end_time"], errors="coerce", utc=True)
|
| 115 |
+
|
| 116 |
+
df = df.dropna(subset=["user_id", "end_time"]).copy()
|
| 117 |
+
df = df.sort_values(["user_id", "end_time", "id"], kind="mergesort")
|
| 118 |
+
|
| 119 |
+
if max_rows is not None:
|
| 120 |
+
if max_rows <= 0:
|
| 121 |
+
raise ValueError("--max-rows must be a positive integer.")
|
| 122 |
+
df = df.head(max_rows).copy()
|
| 123 |
+
|
| 124 |
+
return df
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
def compute_student_total_times_minutes(df: pd.DataFrame) -> pd.Series:
|
| 128 |
+
"""Compute per-student total time span from first to last attempt."""
|
| 129 |
+
grouped = df.groupby("user_id", sort=False)["end_time"]
|
| 130 |
+
first_times = grouped.min()
|
| 131 |
+
last_times = grouped.max()
|
| 132 |
+
total_minutes = (last_times - first_times).dt.total_seconds() / 60.0
|
| 133 |
+
total_minutes.name = "total_time_minutes"
|
| 134 |
+
return total_minutes
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
def default_bin_edges_minutes() -> list[float]:
|
| 138 |
+
"""Base edges for total-time distribution in minutes."""
|
| 139 |
+
return [
|
| 140 |
+
0.0,
|
| 141 |
+
10.0,
|
| 142 |
+
30.0,
|
| 143 |
+
60.0,
|
| 144 |
+
180.0,
|
| 145 |
+
360.0,
|
| 146 |
+
720.0,
|
| 147 |
+
1440.0,
|
| 148 |
+
2880.0,
|
| 149 |
+
4320.0,
|
| 150 |
+
10080.0,
|
| 151 |
+
20160.0,
|
| 152 |
+
43200.0,
|
| 153 |
+
100800.0,
|
| 154 |
+
]
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
def build_bin_edges_minutes(valid: pd.Series) -> list[float]:
|
| 158 |
+
"""Build finite plotting edges for proportional-width histogram bars."""
|
| 159 |
+
edges = default_bin_edges_minutes()
|
| 160 |
+
base_tail_start = edges[-1]
|
| 161 |
+
max_total = float(valid.max())
|
| 162 |
+
|
| 163 |
+
if max_total > base_tail_start:
|
| 164 |
+
tail_edge = max(base_tail_start + 60.0, max_total * 1.05)
|
| 165 |
+
edges.append(tail_edge)
|
| 166 |
+
|
| 167 |
+
return edges
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
def build_fixed_width_bin_edges_minutes(
|
| 171 |
+
valid: pd.Series, bin_time_minutes: float
|
| 172 |
+
) -> list[float]:
|
| 173 |
+
"""Build fixed-width edges from min/max observed total times."""
|
| 174 |
+
min_total = float(valid.min())
|
| 175 |
+
max_total = float(valid.max())
|
| 176 |
+
|
| 177 |
+
start = bin_time_minutes * math.floor(min_total / bin_time_minutes)
|
| 178 |
+
end = bin_time_minutes * math.ceil(max_total / bin_time_minutes)
|
| 179 |
+
|
| 180 |
+
if math.isclose(start, 0.0, abs_tol=1e-12):
|
| 181 |
+
start = 0.0
|
| 182 |
+
if math.isclose(end, start, abs_tol=1e-12):
|
| 183 |
+
end = start + bin_time_minutes
|
| 184 |
+
|
| 185 |
+
n_bins = int(round((end - start) / bin_time_minutes))
|
| 186 |
+
edges = [start + i * bin_time_minutes for i in range(n_bins + 1)]
|
| 187 |
+
if edges[-1] <= max_total:
|
| 188 |
+
edges.append(edges[-1] + bin_time_minutes)
|
| 189 |
+
|
| 190 |
+
return edges
|
| 191 |
+
|
| 192 |
+
|
| 193 |
+
def format_bin_bound(minutes: float) -> str:
|
| 194 |
+
if math.isclose(minutes, round(minutes), abs_tol=1e-9):
|
| 195 |
+
return str(int(round(minutes)))
|
| 196 |
+
return f"{minutes:.2f}".rstrip("0").rstrip(".")
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
def make_bin_labels(
|
| 200 |
+
edges: list[float], open_tail_from: float | None = None
|
| 201 |
+
) -> list[str]:
|
| 202 |
+
labels: list[str] = []
|
| 203 |
+
last_idx = len(edges) - 2
|
| 204 |
+
for idx, (left, right) in enumerate(zip(edges[:-1], edges[1:])):
|
| 205 |
+
if open_tail_from is not None and idx == last_idx and left >= open_tail_from:
|
| 206 |
+
labels.append(f">= {format_bin_bound(left)} min")
|
| 207 |
+
else:
|
| 208 |
+
labels.append(f"[{format_bin_bound(left)}, {format_bin_bound(right)}) min")
|
| 209 |
+
return labels
|
| 210 |
+
|
| 211 |
+
|
| 212 |
+
def format_minutes_tick(value: float, _pos: float) -> str:
|
| 213 |
+
if value < 60:
|
| 214 |
+
return f"{value:.0f}m"
|
| 215 |
+
if value < 1440:
|
| 216 |
+
return f"{value / 60:.0f}h"
|
| 217 |
+
return f"{value / 1440:.0f}d"
|
| 218 |
+
|
| 219 |
+
|
| 220 |
+
def filter_valid_total_times(
|
| 221 |
+
total_times_minutes: pd.Series, keep_nonpositive: bool
|
| 222 |
+
) -> pd.Series:
|
| 223 |
+
valid = total_times_minutes.dropna().copy()
|
| 224 |
+
if not keep_nonpositive:
|
| 225 |
+
valid = valid[valid > 0]
|
| 226 |
+
return valid
|
| 227 |
+
|
| 228 |
+
|
| 229 |
+
def summarize_binned_distribution(
|
| 230 |
+
total_times_minutes: pd.Series,
|
| 231 |
+
keep_nonpositive: bool,
|
| 232 |
+
bin_time_minutes: float | None = None,
|
| 233 |
+
) -> pd.DataFrame:
|
| 234 |
+
valid = filter_valid_total_times(total_times_minutes, keep_nonpositive)
|
| 235 |
+
|
| 236 |
+
if valid.empty:
|
| 237 |
+
raise ValueError("No valid total times found after filtering.")
|
| 238 |
+
|
| 239 |
+
if bin_time_minutes is not None:
|
| 240 |
+
edges = build_fixed_width_bin_edges_minutes(valid, bin_time_minutes)
|
| 241 |
+
open_tail_from = None
|
| 242 |
+
else:
|
| 243 |
+
base_edges = default_bin_edges_minutes()
|
| 244 |
+
edges = build_bin_edges_minutes(valid)
|
| 245 |
+
open_tail_from = base_edges[-1] if len(edges) > len(base_edges) else None
|
| 246 |
+
|
| 247 |
+
labels = make_bin_labels(edges, open_tail_from=open_tail_from)
|
| 248 |
+
binned = pd.cut(valid, bins=edges, labels=labels, right=False, include_lowest=True)
|
| 249 |
+
|
| 250 |
+
counts = binned.value_counts(sort=False)
|
| 251 |
+
probabilities = (counts / counts.sum()).astype(float)
|
| 252 |
+
|
| 253 |
+
bin_left = pd.Series(edges[:-1], dtype=float)
|
| 254 |
+
bin_right = pd.Series(edges[1:], dtype=float)
|
| 255 |
+
bin_width = bin_right - bin_left
|
| 256 |
+
probabilities_np = probabilities.to_numpy(dtype=float)
|
| 257 |
+
density_per_min = probabilities_np / bin_width.to_numpy(dtype=float)
|
| 258 |
+
|
| 259 |
+
summary = pd.DataFrame(
|
| 260 |
+
{
|
| 261 |
+
"bin": counts.index.astype(str),
|
| 262 |
+
"bin_left_min": bin_left.to_numpy(),
|
| 263 |
+
"bin_right_min": bin_right.to_numpy(),
|
| 264 |
+
"bin_width_min": bin_width.to_numpy(),
|
| 265 |
+
"count": counts.values,
|
| 266 |
+
"probability": probabilities_np,
|
| 267 |
+
"percentage": probabilities_np * 100.0,
|
| 268 |
+
"density_per_min": density_per_min,
|
| 269 |
+
}
|
| 270 |
+
)
|
| 271 |
+
return summary
|
| 272 |
+
|
| 273 |
+
|
| 274 |
+
def cumulative_probability_at_minutes(
|
| 275 |
+
total_times_minutes: pd.Series,
|
| 276 |
+
threshold_minutes: float,
|
| 277 |
+
keep_nonpositive: bool,
|
| 278 |
+
) -> float:
|
| 279 |
+
valid = filter_valid_total_times(total_times_minutes, keep_nonpositive)
|
| 280 |
+
if valid.empty:
|
| 281 |
+
raise ValueError("No valid total times found after filtering.")
|
| 282 |
+
return float((valid <= threshold_minutes).mean())
|
| 283 |
+
|
| 284 |
+
|
| 285 |
+
def plot_distribution(
|
| 286 |
+
summary_df: pd.DataFrame,
|
| 287 |
+
output_path: Path,
|
| 288 |
+
log_y: bool = False,
|
| 289 |
+
plot_upper_limit_minutes: float | None = None,
|
| 290 |
+
cdf_marker_minutes: float = DEFAULT_CDF_MARKER_MINUTES,
|
| 291 |
+
cdf_at_marker: float | None = None,
|
| 292 |
+
) -> None:
|
| 293 |
+
"""Create and save a publication-ready total-time distribution histogram."""
|
| 294 |
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
| 295 |
+
|
| 296 |
+
plt.style.use("seaborn-v0_8-whitegrid")
|
| 297 |
+
|
| 298 |
+
fig, ax = plt.subplots(figsize=(20, 5))
|
| 299 |
+
|
| 300 |
+
left = summary_df["bin_left_min"].to_numpy(dtype=float)
|
| 301 |
+
width = summary_df["bin_width_min"].to_numpy(dtype=float)
|
| 302 |
+
height = summary_df["density_per_min"].to_numpy(dtype=float)
|
| 303 |
+
|
| 304 |
+
bars = ax.bar(
|
| 305 |
+
left,
|
| 306 |
+
height,
|
| 307 |
+
width=width,
|
| 308 |
+
align="edge",
|
| 309 |
+
color="#4C78A8",
|
| 310 |
+
# edgecolor="white",
|
| 311 |
+
# linewidth=1.0,
|
| 312 |
+
)
|
| 313 |
+
|
| 314 |
+
ax.set_title("Distribution of Student Total Time (First to Last Attempt)")
|
| 315 |
+
ax.set_xlabel("Total Time Per Student")
|
| 316 |
+
ax.set_ylabel("Probability Density (1/min)")
|
| 317 |
+
|
| 318 |
+
x_min = float(left.min())
|
| 319 |
+
x_max = float((left + width).max())
|
| 320 |
+
if plot_upper_limit_minutes is not None:
|
| 321 |
+
x_max = min(x_max, float(plot_upper_limit_minutes))
|
| 322 |
+
ax.set_xlim(x_min, x_max)
|
| 323 |
+
|
| 324 |
+
ax.xaxis.set_major_locator(MaxNLocator(nbins=9))
|
| 325 |
+
ax.xaxis.set_major_formatter(FuncFormatter(format_minutes_tick))
|
| 326 |
+
ax.grid(axis="y", alpha=0.25, linewidth=0.8)
|
| 327 |
+
ax.spines["top"].set_visible(False)
|
| 328 |
+
ax.spines["right"].set_visible(False)
|
| 329 |
+
|
| 330 |
+
if log_y:
|
| 331 |
+
ax.set_yscale("log")
|
| 332 |
+
|
| 333 |
+
marker_label = f"CDF <= {format_bin_bound(cdf_marker_minutes)} min"
|
| 334 |
+
if cdf_at_marker is not None:
|
| 335 |
+
marker_label = f"{marker_label}: {cdf_at_marker * 100:.1f}%"
|
| 336 |
+
ax.axvline(
|
| 337 |
+
cdf_marker_minutes,
|
| 338 |
+
color="#E45756",
|
| 339 |
+
linestyle="--",
|
| 340 |
+
linewidth=1.6,
|
| 341 |
+
label=marker_label,
|
| 342 |
+
)
|
| 343 |
+
ax.legend(loc="upper right", frameon=False, fontsize=9)
|
| 344 |
+
|
| 345 |
+
# Skip dense labeling when there are many bins to keep figure readable.
|
| 346 |
+
annotate_bars = len(summary_df) <= 40
|
| 347 |
+
if annotate_bars:
|
| 348 |
+
for bar, pct in zip(bars, summary_df["percentage"]):
|
| 349 |
+
if pct < 1.0:
|
| 350 |
+
continue
|
| 351 |
+
h = bar.get_height()
|
| 352 |
+
if h <= 0:
|
| 353 |
+
continue
|
| 354 |
+
ax.annotate(
|
| 355 |
+
f"{pct:.1f}%",
|
| 356 |
+
xy=(bar.get_x() + bar.get_width() / 2.0, h),
|
| 357 |
+
xytext=(0, 3),
|
| 358 |
+
textcoords="offset points",
|
| 359 |
+
ha="center",
|
| 360 |
+
va="bottom",
|
| 361 |
+
fontsize=8,
|
| 362 |
+
)
|
| 363 |
+
|
| 364 |
+
plt.tight_layout()
|
| 365 |
+
fig.savefig(output_path, dpi=400, bbox_inches="tight")
|
| 366 |
+
plt.close(fig)
|
| 367 |
+
|
| 368 |
+
|
| 369 |
+
def main() -> None:
|
| 370 |
+
args = parse_args()
|
| 371 |
+
|
| 372 |
+
if not args.interactions_path.exists():
|
| 373 |
+
raise FileNotFoundError(
|
| 374 |
+
f"Interactions file not found: {args.interactions_path}"
|
| 375 |
+
)
|
| 376 |
+
|
| 377 |
+
if args.plot_upper_limit_minutes is not None and args.plot_upper_limit_minutes <= 0:
|
| 378 |
+
raise ValueError("--plot-upper-limit-minutes must be a positive number.")
|
| 379 |
+
if args.bin_time is not None and args.bin_time <= 0:
|
| 380 |
+
raise ValueError("--bin-time must be a positive number.")
|
| 381 |
+
if args.cdf_marker_minutes <= 0:
|
| 382 |
+
raise ValueError("--cdf-marker-minutes must be a positive number.")
|
| 383 |
+
|
| 384 |
+
df = load_interactions(args.interactions_path, max_rows=args.max_rows)
|
| 385 |
+
total_times_minutes = compute_student_total_times_minutes(df)
|
| 386 |
+
|
| 387 |
+
summary = summarize_binned_distribution(
|
| 388 |
+
total_times_minutes,
|
| 389 |
+
keep_nonpositive=args.keep_nonpositive_total_times,
|
| 390 |
+
bin_time_minutes=args.bin_time,
|
| 391 |
+
)
|
| 392 |
+
summary.to_csv(args.output_counts, index=False)
|
| 393 |
+
|
| 394 |
+
cdf_at_marker = cumulative_probability_at_minutes(
|
| 395 |
+
total_times_minutes=total_times_minutes,
|
| 396 |
+
threshold_minutes=args.cdf_marker_minutes,
|
| 397 |
+
keep_nonpositive=args.keep_nonpositive_total_times,
|
| 398 |
+
)
|
| 399 |
+
|
| 400 |
+
plot_distribution(
|
| 401 |
+
summary,
|
| 402 |
+
args.output_plot,
|
| 403 |
+
log_y=args.log_y,
|
| 404 |
+
plot_upper_limit_minutes=args.plot_upper_limit_minutes,
|
| 405 |
+
cdf_marker_minutes=args.cdf_marker_minutes,
|
| 406 |
+
cdf_at_marker=cdf_at_marker,
|
| 407 |
+
)
|
| 408 |
+
|
| 409 |
+
total_students = int(df["user_id"].nunique())
|
| 410 |
+
students_used = int(
|
| 411 |
+
len(
|
| 412 |
+
filter_valid_total_times(
|
| 413 |
+
total_times_minutes,
|
| 414 |
+
keep_nonpositive=args.keep_nonpositive_total_times,
|
| 415 |
+
)
|
| 416 |
+
)
|
| 417 |
+
)
|
| 418 |
+
print("Done.")
|
| 419 |
+
print(f"Interactions loaded: {len(df):,}")
|
| 420 |
+
print(f"Students observed: {total_students:,}")
|
| 421 |
+
print(f"Students used in distribution: {students_used:,}")
|
| 422 |
+
if args.bin_time is not None:
|
| 423 |
+
print(f"Bin width (min): {args.bin_time}")
|
| 424 |
+
print(
|
| 425 |
+
f"Cumulative P(total_time <= {format_bin_bound(args.cdf_marker_minutes)} min): "
|
| 426 |
+
f"{cdf_at_marker * 100:.2f}%"
|
| 427 |
+
)
|
| 428 |
+
print(f"Saved plot: {args.output_plot}")
|
| 429 |
+
print(f"Saved bin counts: {args.output_counts}")
|
| 430 |
+
|
| 431 |
+
|
| 432 |
+
if __name__ == "__main__":
|
| 433 |
+
main()
|
Code/process_to_single_file.py
ADDED
|
@@ -0,0 +1,820 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Convert FoundationalASSIST CSV files to the CSEDM/OEKT JSON format.
|
| 4 |
+
|
| 5 |
+
Inputs (under Data/ by default):
|
| 6 |
+
- Interactions.csv
|
| 7 |
+
- Problems.csv
|
| 8 |
+
- Skill_Set.csv
|
| 9 |
+
- Skills.csv
|
| 10 |
+
|
| 11 |
+
Outputs (under src/data/FoundationalASSIST/ by default):
|
| 12 |
+
- dataset.json
|
| 13 |
+
- qmatrix.json
|
| 14 |
+
- trainset.json
|
| 15 |
+
- validset.json
|
| 16 |
+
- testset.json
|
| 17 |
+
|
| 18 |
+
The produced dataset JSON follows the same schema used by src/data/CSEDM.
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
from __future__ import annotations
|
| 22 |
+
|
| 23 |
+
import argparse
|
| 24 |
+
import json
|
| 25 |
+
import random
|
| 26 |
+
import re
|
| 27 |
+
from pathlib import Path
|
| 28 |
+
from typing import Literal, cast
|
| 29 |
+
|
| 30 |
+
import pandas as pd
|
| 31 |
+
from tqdm import tqdm
|
| 32 |
+
from clean_utils import clean_problem_body
|
| 33 |
+
|
| 34 |
+
PROJECT_ROOT = Path(__file__).resolve().parents[3]
|
| 35 |
+
DEFAULT_DATA_DIR = Path(__file__).resolve().parent.parent / "Data"
|
| 36 |
+
DEFAULT_OUTPUT_DIR = PROJECT_ROOT / "src" / "data" / "FoundationalASSIST"
|
| 37 |
+
GroupingMode = Literal["none", "1h", "halfday", "day", "week", "month", "year"]
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def parse_grouping_mode(value: str) -> GroupingMode:
|
| 41 |
+
"""Normalize grouping mode aliases used by --grouping-time."""
|
| 42 |
+
normalized = value.strip().lower()
|
| 43 |
+
aliases: dict[str, GroupingMode] = {
|
| 44 |
+
"0": "none",
|
| 45 |
+
"0.0": "none",
|
| 46 |
+
"none": "none",
|
| 47 |
+
"off": "none",
|
| 48 |
+
"no": "none",
|
| 49 |
+
"1h": "1h",
|
| 50 |
+
"hour": "1h",
|
| 51 |
+
"halfday": "halfday",
|
| 52 |
+
"half-day": "halfday",
|
| 53 |
+
"day": "day",
|
| 54 |
+
"week": "week",
|
| 55 |
+
"month": "month",
|
| 56 |
+
"year": "year",
|
| 57 |
+
}
|
| 58 |
+
mode = aliases.get(normalized)
|
| 59 |
+
if mode is None:
|
| 60 |
+
valid_values = "1h, halfday, day, week, month, year, none"
|
| 61 |
+
raise argparse.ArgumentTypeError(
|
| 62 |
+
f"Invalid grouping mode '{value}'. Valid values: {valid_values}."
|
| 63 |
+
)
|
| 64 |
+
return mode
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def parse_args() -> argparse.Namespace:
|
| 68 |
+
parser = argparse.ArgumentParser(
|
| 69 |
+
description="Convert FoundationalASSIST to CSEDM/OEKT JSON format."
|
| 70 |
+
)
|
| 71 |
+
parser.add_argument(
|
| 72 |
+
"--data-dir",
|
| 73 |
+
type=Path,
|
| 74 |
+
default=DEFAULT_DATA_DIR,
|
| 75 |
+
help="Directory containing Interactions.csv, Problems.csv, Skills.csv.",
|
| 76 |
+
)
|
| 77 |
+
parser.add_argument(
|
| 78 |
+
"--output-dir",
|
| 79 |
+
type=Path,
|
| 80 |
+
default=DEFAULT_OUTPUT_DIR,
|
| 81 |
+
help="Directory to write dataset.json/qmatrix.json/split files.",
|
| 82 |
+
)
|
| 83 |
+
parser.add_argument(
|
| 84 |
+
"--seed",
|
| 85 |
+
type=int,
|
| 86 |
+
default=42,
|
| 87 |
+
help="Random seed used for train/valid/test student split.",
|
| 88 |
+
)
|
| 89 |
+
parser.add_argument(
|
| 90 |
+
"--train-ratio",
|
| 91 |
+
type=float,
|
| 92 |
+
default=0.8,
|
| 93 |
+
help="Fraction of students in train split.",
|
| 94 |
+
)
|
| 95 |
+
parser.add_argument(
|
| 96 |
+
"--valid-ratio",
|
| 97 |
+
type=float,
|
| 98 |
+
default=0.1,
|
| 99 |
+
help="Fraction of students in valid split.",
|
| 100 |
+
)
|
| 101 |
+
parser.add_argument(
|
| 102 |
+
"--test-ratio",
|
| 103 |
+
type=float,
|
| 104 |
+
default=0.1,
|
| 105 |
+
help="Fraction of students in test split.",
|
| 106 |
+
)
|
| 107 |
+
parser.add_argument(
|
| 108 |
+
"--max-interactions",
|
| 109 |
+
type=int,
|
| 110 |
+
default=None,
|
| 111 |
+
help=(
|
| 112 |
+
"Optional cap on number of interaction rows after sorting. "
|
| 113 |
+
"Useful for quick smoke tests."
|
| 114 |
+
),
|
| 115 |
+
)
|
| 116 |
+
parser.add_argument(
|
| 117 |
+
"--grouping-time",
|
| 118 |
+
type=parse_grouping_mode,
|
| 119 |
+
default="none",
|
| 120 |
+
help=(
|
| 121 |
+
"Calendar grouping mode per student: 1h, halfday, day, week, "
|
| 122 |
+
"month, year, or none."
|
| 123 |
+
),
|
| 124 |
+
)
|
| 125 |
+
return parser.parse_args()
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
def _text(v: object) -> str:
|
| 129 |
+
if v is None:
|
| 130 |
+
return ""
|
| 131 |
+
if v is pd.NA:
|
| 132 |
+
return ""
|
| 133 |
+
if isinstance(v, float) and pd.isna(v):
|
| 134 |
+
return ""
|
| 135 |
+
return str(v)
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
def _as_int(v: object) -> int:
|
| 139 |
+
return int(float(_text(v)))
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
def _as_float(v: object) -> float:
|
| 143 |
+
return float(_text(v))
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
def label_answer_options(answer_string: object) -> dict[str, str] | None:
|
| 147 |
+
"""Convert pipe-delimited answers to lettered format."""
|
| 148 |
+
answer_text = _text(answer_string).strip()
|
| 149 |
+
if not answer_text:
|
| 150 |
+
return None
|
| 151 |
+
|
| 152 |
+
options = [opt.strip() for opt in answer_text.split("||")]
|
| 153 |
+
letters = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"]
|
| 154 |
+
return {letters[i]: opt for i, opt in enumerate(options) if i < len(letters)}
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
def clean_html_and_normalize(text: object) -> str:
|
| 158 |
+
"""Remove HTML tags and normalize text for reliable comparisons."""
|
| 159 |
+
normalized = _text(text)
|
| 160 |
+
if not normalized:
|
| 161 |
+
return ""
|
| 162 |
+
|
| 163 |
+
normalized = re.sub(r"<[^>]+>", "", normalized)
|
| 164 |
+
normalized = " ".join(normalized.split())
|
| 165 |
+
normalized = re.sub(r"\s*:\s*", ":", normalized)
|
| 166 |
+
return normalized.strip()
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
def match_student_answer_to_letters(
|
| 170 |
+
student_answer_text: object,
|
| 171 |
+
answer_options_dict: dict[str, str] | None,
|
| 172 |
+
) -> str:
|
| 173 |
+
"""Map student multiple-choice answer text(s) to letter labels."""
|
| 174 |
+
answer_text = _text(student_answer_text)
|
| 175 |
+
if not answer_text or not answer_options_dict:
|
| 176 |
+
return answer_text
|
| 177 |
+
|
| 178 |
+
student_answers = [ans.strip() for ans in answer_text.split(" , ")]
|
| 179 |
+
normalized_options = {
|
| 180 |
+
letter: clean_html_and_normalize(text)
|
| 181 |
+
for letter, text in answer_options_dict.items()
|
| 182 |
+
}
|
| 183 |
+
|
| 184 |
+
matched_letters: list[str] = []
|
| 185 |
+
for student_ans in student_answers:
|
| 186 |
+
normalized_student = clean_html_and_normalize(student_ans)
|
| 187 |
+
|
| 188 |
+
for letter, normalized_option in normalized_options.items():
|
| 189 |
+
if normalized_student == normalized_option:
|
| 190 |
+
matched_letters.append(letter)
|
| 191 |
+
break
|
| 192 |
+
else:
|
| 193 |
+
for letter, normalized_option in normalized_options.items():
|
| 194 |
+
if (
|
| 195 |
+
normalized_student in normalized_option
|
| 196 |
+
or normalized_option in normalized_student
|
| 197 |
+
):
|
| 198 |
+
matched_letters.append(letter)
|
| 199 |
+
break
|
| 200 |
+
|
| 201 |
+
if matched_letters:
|
| 202 |
+
return ", ".join(sorted(set(matched_letters)))
|
| 203 |
+
return answer_text
|
| 204 |
+
|
| 205 |
+
|
| 206 |
+
def get_correct_option_letters(
|
| 207 |
+
answer_options: dict[str, str] | None,
|
| 208 |
+
correct_answers: object,
|
| 209 |
+
) -> str:
|
| 210 |
+
"""Resolve the correct answer text(s) to option letters for MC items."""
|
| 211 |
+
correct_answer_text = _text(correct_answers).strip()
|
| 212 |
+
if not answer_options or not correct_answer_text:
|
| 213 |
+
return correct_answer_text
|
| 214 |
+
|
| 215 |
+
correct_list = [ans.strip() for ans in correct_answer_text.split("||")]
|
| 216 |
+
correct_letters = [
|
| 217 |
+
letter for letter, text in answer_options.items() if text in correct_list
|
| 218 |
+
]
|
| 219 |
+
return (
|
| 220 |
+
", ".join(sorted(correct_letters)) if correct_letters else correct_answer_text
|
| 221 |
+
)
|
| 222 |
+
|
| 223 |
+
|
| 224 |
+
def format_answer_options_for_prompt(answer_options: dict[str, str] | None) -> str:
|
| 225 |
+
"""Format answer options dictionary for human-readable prompt text."""
|
| 226 |
+
if not answer_options:
|
| 227 |
+
return ""
|
| 228 |
+
return "\n".join([f"{letter}) {text}" for letter, text in answer_options.items()])
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
def load_and_preprocess_problems(problems_path: Path) -> pd.DataFrame:
|
| 232 |
+
"""Load and preprocess problems with the same answer handling as KT inference."""
|
| 233 |
+
problems_df = pd.read_csv(problems_path, low_memory=False)
|
| 234 |
+
problems_df["problem_id"] = pd.to_numeric(
|
| 235 |
+
problems_df["problem_id"], errors="coerce"
|
| 236 |
+
)
|
| 237 |
+
problems_df = problems_df.dropna(subset=["problem_id"]).copy()
|
| 238 |
+
problems_df["problem_id"] = problems_df["problem_id"].astype(int)
|
| 239 |
+
|
| 240 |
+
problems_df = problems_df.sort_values(["problem_id"]).drop_duplicates(
|
| 241 |
+
subset=["problem_id"], keep="first"
|
| 242 |
+
)
|
| 243 |
+
|
| 244 |
+
problems_df["cleaned body"] = problems_df["Problem Body"].apply(clean_problem_body)
|
| 245 |
+
problems_df["answer_options"] = problems_df["Multiple Choice Options"].apply(
|
| 246 |
+
label_answer_options
|
| 247 |
+
)
|
| 248 |
+
|
| 249 |
+
mc_types = {"Multiple Choice (select 1)", "Multiple Choice (select all)"}
|
| 250 |
+
problems_df["correct_answers"] = problems_df.apply(
|
| 251 |
+
lambda row: (
|
| 252 |
+
get_correct_option_letters(
|
| 253 |
+
row["answer_options"],
|
| 254 |
+
row["Multiple Choice Answers"],
|
| 255 |
+
)
|
| 256 |
+
if _text(row["Problem Type"]).strip() in mc_types
|
| 257 |
+
else _text(row.get("Fill-in Answers", ""))
|
| 258 |
+
),
|
| 259 |
+
axis=1,
|
| 260 |
+
)
|
| 261 |
+
problems_df["answer_options_formatted"] = problems_df["answer_options"].apply(
|
| 262 |
+
format_answer_options_for_prompt
|
| 263 |
+
)
|
| 264 |
+
return problems_df
|
| 265 |
+
|
| 266 |
+
|
| 267 |
+
def load_skill_tables(
|
| 268 |
+
skills_path: Path,
|
| 269 |
+
skill_set_path: Path,
|
| 270 |
+
) -> tuple[list[dict], dict[int, list[int]], int]:
|
| 271 |
+
"""Load skills and build a problem_id -> skill_ids mapping.
|
| 272 |
+
|
| 273 |
+
Returns:
|
| 274 |
+
skills: OEKT skill list.
|
| 275 |
+
problem_to_skills: Mapping from original problem_id to contiguous skill IDs.
|
| 276 |
+
fallback_skill_id: Skill ID for untagged problems.
|
| 277 |
+
"""
|
| 278 |
+
usecols = ["problem_id", "node_code", "node_name"]
|
| 279 |
+
skills_df = pd.read_csv(skills_path, usecols=usecols, low_memory=False)
|
| 280 |
+
|
| 281 |
+
skills_df["problem_id"] = pd.to_numeric(skills_df["problem_id"], errors="coerce")
|
| 282 |
+
skills_df = skills_df.dropna(subset=["problem_id"]).copy()
|
| 283 |
+
skills_df["problem_id"] = skills_df["problem_id"].astype(int)
|
| 284 |
+
skills_df["node_code"] = skills_df["node_code"].apply(lambda v: _text(v).strip())
|
| 285 |
+
skills_df["node_name"] = skills_df["node_name"].apply(lambda v: _text(v).strip())
|
| 286 |
+
skills_df = skills_df[skills_df["node_code"] != ""].copy()
|
| 287 |
+
|
| 288 |
+
skill_set_df = pd.read_csv(
|
| 289 |
+
skill_set_path,
|
| 290 |
+
usecols=["index", "skill_code", "full_description"],
|
| 291 |
+
low_memory=False,
|
| 292 |
+
)
|
| 293 |
+
skill_set_df["index"] = pd.to_numeric(skill_set_df["index"], errors="coerce")
|
| 294 |
+
skill_set_df = skill_set_df.dropna(subset=["index"]).copy()
|
| 295 |
+
skill_set_df["index"] = skill_set_df["index"].astype(int)
|
| 296 |
+
skill_set_df["skill_code"] = skill_set_df["skill_code"].apply(
|
| 297 |
+
lambda v: _text(v).strip()
|
| 298 |
+
)
|
| 299 |
+
skill_set_df["full_description"] = skill_set_df["full_description"].apply(
|
| 300 |
+
lambda v: _text(v).strip()
|
| 301 |
+
)
|
| 302 |
+
skill_set_df = skill_set_df[skill_set_df["skill_code"] != ""].copy()
|
| 303 |
+
skill_set_df = (
|
| 304 |
+
skill_set_df.sort_values(["index", "skill_code"])
|
| 305 |
+
.drop_duplicates(subset=["skill_code"], keep="first")
|
| 306 |
+
.copy()
|
| 307 |
+
)
|
| 308 |
+
|
| 309 |
+
node_name_by_code = (
|
| 310 |
+
skills_df.sort_values(["node_code", "node_name"])
|
| 311 |
+
.drop_duplicates(subset=["node_code"], keep="first")
|
| 312 |
+
.set_index("node_code")["node_name"]
|
| 313 |
+
.to_dict()
|
| 314 |
+
)
|
| 315 |
+
|
| 316 |
+
skill_rows: list[tuple[str, int, str, str]] = []
|
| 317 |
+
skill_id_map: dict[str, int] = {}
|
| 318 |
+
for row in skill_set_df.itertuples(index=False):
|
| 319 |
+
node_code = _text(row.skill_code).strip()
|
| 320 |
+
skill_id = _as_int(row.index) - 1
|
| 321 |
+
skill_id_map[node_code] = skill_id
|
| 322 |
+
|
| 323 |
+
node_name = _text(node_name_by_code.get(node_code, "")).strip()
|
| 324 |
+
name = node_name if node_name else node_code
|
| 325 |
+
description = _text(row.full_description).strip()
|
| 326 |
+
if not description:
|
| 327 |
+
print(
|
| 328 |
+
f"Warning: Missing description for skill '{node_code}' in Skill_Set.csv. "
|
| 329 |
+
f"Using default description."
|
| 330 |
+
)
|
| 331 |
+
|
| 332 |
+
description = (
|
| 333 |
+
f"Common Core State StandardS for Mathematics: Skill {node_code}"
|
| 334 |
+
)
|
| 335 |
+
|
| 336 |
+
skill_rows.append((node_code, skill_id, name, description))
|
| 337 |
+
|
| 338 |
+
# max_skill_id = max(skill_id_map.values(), default=-1)
|
| 339 |
+
missing_node_codes = sorted(
|
| 340 |
+
set(skills_df["node_code"].tolist()) - set(skill_id_map)
|
| 341 |
+
)
|
| 342 |
+
# for node_code in missing_node_codes:
|
| 343 |
+
# max_skill_id += 1
|
| 344 |
+
# skill_id_map[node_code] = max_skill_id
|
| 345 |
+
|
| 346 |
+
# node_name = _text(node_name_by_code.get(node_code, "")).strip()
|
| 347 |
+
# name = node_name if node_name else node_code
|
| 348 |
+
# description = (
|
| 349 |
+
# node_name
|
| 350 |
+
# if node_name
|
| 351 |
+
# else f"Common Core State StandardS for Mathematics: Skill {node_code}"
|
| 352 |
+
# )
|
| 353 |
+
|
| 354 |
+
# skill_rows.append((node_code, max_skill_id, name, description))
|
| 355 |
+
|
| 356 |
+
if missing_node_codes:
|
| 357 |
+
raise ValueError(
|
| 358 |
+
f"Error: Found {len(missing_node_codes)} node_code(s) in Skills.csv that are missing from Skill_Set.csv. "
|
| 359 |
+
f"Please ensure all node_code values in Skills.csv have a corresponding skill_code in Skill_Set.csv. "
|
| 360 |
+
f"Missing node_codes: {missing_node_codes}"
|
| 361 |
+
)
|
| 362 |
+
|
| 363 |
+
skills: list[dict] = []
|
| 364 |
+
for _, skill_id, name, description in sorted(skill_rows, key=lambda r: r[0]):
|
| 365 |
+
skills.append(
|
| 366 |
+
{
|
| 367 |
+
"id": skill_id,
|
| 368 |
+
"name": name,
|
| 369 |
+
"description": description,
|
| 370 |
+
"prerequisites": [],
|
| 371 |
+
}
|
| 372 |
+
)
|
| 373 |
+
|
| 374 |
+
fallback_skill_id = max([s["id"] for s in skills], default=-1) + 1
|
| 375 |
+
skills.append(
|
| 376 |
+
{
|
| 377 |
+
"id": fallback_skill_id,
|
| 378 |
+
"name": "UnmappedSkill",
|
| 379 |
+
"description": "Fallback skill for questions without explicit skill tags.",
|
| 380 |
+
"prerequisites": [],
|
| 381 |
+
}
|
| 382 |
+
)
|
| 383 |
+
|
| 384 |
+
problem_to_skills: dict[int, list[int]] = {}
|
| 385 |
+
pairs = skills_df[["problem_id", "node_code"]].drop_duplicates()
|
| 386 |
+
for row in pairs.itertuples(index=False):
|
| 387 |
+
pid = _as_int(row.problem_id)
|
| 388 |
+
sid = skill_id_map[_text(row.node_code).strip()]
|
| 389 |
+
problem_to_skills.setdefault(pid, []).append(sid)
|
| 390 |
+
|
| 391 |
+
for pid, sids in problem_to_skills.items():
|
| 392 |
+
if len(sids) == 0:
|
| 393 |
+
print(f"Warning: Problem {pid} has no valid skill mappings.")
|
| 394 |
+
problem_to_skills[pid] = sorted(set(sids))
|
| 395 |
+
|
| 396 |
+
return skills, problem_to_skills, fallback_skill_id
|
| 397 |
+
|
| 398 |
+
|
| 399 |
+
def build_question_content(problem_row: pd.Series) -> tuple[str, str]:
|
| 400 |
+
"""Create question content and canonical correct answer from preprocessed fields."""
|
| 401 |
+
body = _text(problem_row.get("cleaned body", "")).strip()
|
| 402 |
+
problem_type = _text(problem_row.get("Problem Type", "")).strip()
|
| 403 |
+
answer_options_formatted = _text(
|
| 404 |
+
problem_row.get("answer_options_formatted", "")
|
| 405 |
+
).strip()
|
| 406 |
+
correct_answer = _text(problem_row.get("correct_answers", "")).strip()
|
| 407 |
+
|
| 408 |
+
body_parts: list[str] = []
|
| 409 |
+
if body:
|
| 410 |
+
body_parts.append(body)
|
| 411 |
+
if problem_type:
|
| 412 |
+
body_parts.append(f"Problem Type: {problem_type}")
|
| 413 |
+
if answer_options_formatted:
|
| 414 |
+
body_parts.append(f"Answer Options:\n{answer_options_formatted}")
|
| 415 |
+
|
| 416 |
+
if not body_parts:
|
| 417 |
+
problem_id = problem_row.get("problem_id", "unknown")
|
| 418 |
+
return f"Problem {problem_id}", correct_answer
|
| 419 |
+
|
| 420 |
+
return "\n\n".join(body_parts), correct_answer
|
| 421 |
+
|
| 422 |
+
|
| 423 |
+
def load_questions(
|
| 424 |
+
problems_df: pd.DataFrame,
|
| 425 |
+
problem_to_skills: dict[int, list[int]],
|
| 426 |
+
fallback_skill_id: int,
|
| 427 |
+
) -> tuple[list[dict], dict[int, str], int]:
|
| 428 |
+
"""Build OEKT question objects from preprocessed Problems data."""
|
| 429 |
+
|
| 430 |
+
questions: list[dict] = []
|
| 431 |
+
problem_to_qid: dict[int, str] = {}
|
| 432 |
+
unmapped_questions = 0
|
| 433 |
+
|
| 434 |
+
for row in problems_df.to_dict(orient="records"):
|
| 435 |
+
pid = _as_int(row["problem_id"])
|
| 436 |
+
qid = f"q_{pid}"
|
| 437 |
+
skill_ids = problem_to_skills.get(pid, [])
|
| 438 |
+
if not skill_ids:
|
| 439 |
+
skill_ids = [fallback_skill_id]
|
| 440 |
+
unmapped_questions += 1
|
| 441 |
+
content, correct_answer = build_question_content(pd.Series(row))
|
| 442 |
+
question = {
|
| 443 |
+
"id": qid,
|
| 444 |
+
"content": content,
|
| 445 |
+
"skill_ids": skill_ids,
|
| 446 |
+
"rubrics": [
|
| 447 |
+
{
|
| 448 |
+
"id": f"r_{pid}_0",
|
| 449 |
+
"description": (f"Match the correct answer: {correct_answer}"),
|
| 450 |
+
"skill_ids": skill_ids,
|
| 451 |
+
}
|
| 452 |
+
],
|
| 453 |
+
}
|
| 454 |
+
|
| 455 |
+
questions.append(question)
|
| 456 |
+
problem_to_qid[pid] = qid
|
| 457 |
+
|
| 458 |
+
return questions, problem_to_qid, unmapped_questions
|
| 459 |
+
|
| 460 |
+
|
| 461 |
+
def load_interactions(
|
| 462 |
+
interactions_path: Path,
|
| 463 |
+
problem_meta_df: pd.DataFrame,
|
| 464 |
+
max_interactions: int | None = None,
|
| 465 |
+
) -> pd.DataFrame:
|
| 466 |
+
"""Load and normalize interaction logs used to build student trajectories."""
|
| 467 |
+
usecols = [
|
| 468 |
+
"id",
|
| 469 |
+
"problem_id",
|
| 470 |
+
"answer_text",
|
| 471 |
+
"discrete_score",
|
| 472 |
+
"end_time",
|
| 473 |
+
"user_id",
|
| 474 |
+
]
|
| 475 |
+
df = pd.read_csv(interactions_path, usecols=usecols, low_memory=False)
|
| 476 |
+
|
| 477 |
+
df["problem_id"] = pd.to_numeric(df["problem_id"], errors="coerce")
|
| 478 |
+
df["discrete_score"] = pd.to_numeric(df["discrete_score"], errors="coerce")
|
| 479 |
+
df["id"] = pd.to_numeric(df["id"], errors="coerce")
|
| 480 |
+
df["end_time"] = pd.to_datetime(df["end_time"], errors="coerce", utc=True)
|
| 481 |
+
|
| 482 |
+
df = df.dropna(subset=["user_id", "problem_id", "discrete_score"]).copy()
|
| 483 |
+
df["user_id"] = df["user_id"].astype(str)
|
| 484 |
+
df["problem_id"] = df["problem_id"].astype(int)
|
| 485 |
+
df["id"] = df["id"].fillna(-1).astype(int)
|
| 486 |
+
|
| 487 |
+
answer_meta = problem_meta_df[
|
| 488 |
+
["problem_id", "Problem Type", "answer_options"]
|
| 489 |
+
].copy()
|
| 490 |
+
df = df.merge(answer_meta, on="problem_id", how="left")
|
| 491 |
+
|
| 492 |
+
mc_types = {"Multiple Choice (select 1)", "Multiple Choice (select all)"}
|
| 493 |
+
df["answer_text"] = df.apply(
|
| 494 |
+
lambda row: (
|
| 495 |
+
match_student_answer_to_letters(row["answer_text"], row["answer_options"])
|
| 496 |
+
if _text(row.get("Problem Type", "")).strip() in mc_types
|
| 497 |
+
and isinstance(row.get("answer_options"), dict)
|
| 498 |
+
else _text(row["answer_text"])
|
| 499 |
+
),
|
| 500 |
+
axis=1,
|
| 501 |
+
)
|
| 502 |
+
|
| 503 |
+
df = df.drop(columns=["Problem Type", "answer_options"])
|
| 504 |
+
|
| 505 |
+
df = df.sort_values(["user_id", "end_time", "id"], kind="mergesort")
|
| 506 |
+
if max_interactions is not None:
|
| 507 |
+
if max_interactions <= 0:
|
| 508 |
+
raise ValueError("--max-interactions must be a positive integer.")
|
| 509 |
+
df = df.head(max_interactions).copy()
|
| 510 |
+
return df
|
| 511 |
+
|
| 512 |
+
|
| 513 |
+
def build_qmatrix(questions: list[dict], num_skills: int) -> list[list[float]]:
|
| 514 |
+
"""Build a rubric x skill matrix consistent with question/rubric ordering."""
|
| 515 |
+
qmatrix: list[list[float]] = []
|
| 516 |
+
for question in questions:
|
| 517 |
+
for rubric in question["rubrics"]:
|
| 518 |
+
row = [0.0] * num_skills
|
| 519 |
+
for sid in rubric["skill_ids"]:
|
| 520 |
+
row[int(sid)] = 1.0
|
| 521 |
+
qmatrix.append(row)
|
| 522 |
+
return qmatrix
|
| 523 |
+
|
| 524 |
+
|
| 525 |
+
def split_student_ids(
|
| 526 |
+
student_ids: list[str],
|
| 527 |
+
train_ratio: float,
|
| 528 |
+
valid_ratio: float,
|
| 529 |
+
test_ratio: float,
|
| 530 |
+
seed: int,
|
| 531 |
+
) -> tuple[list[str], list[str], list[str]]:
|
| 532 |
+
"""Create deterministic train/valid/test split lists at the student level."""
|
| 533 |
+
if train_ratio < 0 or valid_ratio < 0 or test_ratio < 0:
|
| 534 |
+
raise ValueError("Split ratios must be non-negative.")
|
| 535 |
+
|
| 536 |
+
total = train_ratio + valid_ratio + test_ratio
|
| 537 |
+
if total <= 0:
|
| 538 |
+
raise ValueError("At least one split ratio must be > 0.")
|
| 539 |
+
|
| 540 |
+
ids = list(student_ids)
|
| 541 |
+
ids.sort()
|
| 542 |
+
rng = random.Random(seed)
|
| 543 |
+
rng.shuffle(ids)
|
| 544 |
+
|
| 545 |
+
train_count = int(len(ids) * (train_ratio / total))
|
| 546 |
+
valid_count = int(len(ids) * (valid_ratio / total))
|
| 547 |
+
|
| 548 |
+
train_ids = ids[:train_count]
|
| 549 |
+
valid_ids = ids[train_count : train_count + valid_count]
|
| 550 |
+
test_ids = ids[train_count + valid_count :]
|
| 551 |
+
return train_ids, valid_ids, test_ids
|
| 552 |
+
|
| 553 |
+
|
| 554 |
+
def get_calendar_group_key(
|
| 555 |
+
end_time: pd.Timestamp | None,
|
| 556 |
+
grouping_mode: GroupingMode,
|
| 557 |
+
missing_idx: int,
|
| 558 |
+
) -> tuple[object, ...]:
|
| 559 |
+
"""Return a stable calendar bucket key for an interaction timestamp."""
|
| 560 |
+
if end_time is None:
|
| 561 |
+
return ("missing", missing_idx)
|
| 562 |
+
|
| 563 |
+
ts = end_time
|
| 564 |
+
if ts.tzinfo is None:
|
| 565 |
+
ts = ts.tz_localize("UTC")
|
| 566 |
+
else:
|
| 567 |
+
ts = ts.tz_convert("UTC")
|
| 568 |
+
|
| 569 |
+
if grouping_mode == "1h":
|
| 570 |
+
return ("1h", ts.year, ts.month, ts.day, ts.hour)
|
| 571 |
+
if grouping_mode == "halfday":
|
| 572 |
+
return ("halfday", ts.year, ts.month, ts.day, 0 if ts.hour < 12 else 1)
|
| 573 |
+
if grouping_mode == "day":
|
| 574 |
+
return ("day", ts.year, ts.month, ts.day)
|
| 575 |
+
if grouping_mode == "week":
|
| 576 |
+
iso = ts.isocalendar()
|
| 577 |
+
return ("week", int(iso.year), int(iso.week))
|
| 578 |
+
if grouping_mode == "month":
|
| 579 |
+
return ("month", ts.year, ts.month)
|
| 580 |
+
if grouping_mode == "year":
|
| 581 |
+
return ("year", ts.year)
|
| 582 |
+
|
| 583 |
+
raise ValueError(f"Unsupported grouping mode: {grouping_mode}")
|
| 584 |
+
|
| 585 |
+
|
| 586 |
+
def write_dataset_json(
|
| 587 |
+
dataset_path: Path,
|
| 588 |
+
skills: list[dict],
|
| 589 |
+
questions: list[dict],
|
| 590 |
+
interactions_df: pd.DataFrame,
|
| 591 |
+
problem_to_qid: dict[int, str],
|
| 592 |
+
grouping_mode: GroupingMode = "none",
|
| 593 |
+
save_unmapped_skills: bool = False,
|
| 594 |
+
) -> tuple[list[str], int, int, int, int]:
|
| 595 |
+
"""Stream-write dataset.json while optionally grouping by calendar buckets."""
|
| 596 |
+
dataset_path.parent.mkdir(parents=True, exist_ok=True)
|
| 597 |
+
|
| 598 |
+
student_ids: list[str] = []
|
| 599 |
+
num_students = 0
|
| 600 |
+
num_time_steps = 0
|
| 601 |
+
num_questions = 0
|
| 602 |
+
skipped_interactions = 0
|
| 603 |
+
|
| 604 |
+
with open(dataset_path, "w", encoding="utf-8") as f:
|
| 605 |
+
f.write("{")
|
| 606 |
+
f.write('"skills":')
|
| 607 |
+
if not save_unmapped_skills:
|
| 608 |
+
saving_skills = (
|
| 609 |
+
skills[:-1]
|
| 610 |
+
if skills and skills[-1]["name"] == "UnmappedSkill"
|
| 611 |
+
else skills
|
| 612 |
+
)
|
| 613 |
+
else:
|
| 614 |
+
saving_skills = skills
|
| 615 |
+
json.dump(saving_skills, f, ensure_ascii=False)
|
| 616 |
+
f.write(',"questions":')
|
| 617 |
+
json.dump(questions, f, ensure_ascii=False)
|
| 618 |
+
f.write(',"students":[')
|
| 619 |
+
|
| 620 |
+
first_student = True
|
| 621 |
+
for user_id, student_df in tqdm(interactions_df.groupby("user_id", sort=False)):
|
| 622 |
+
time_steps: list[dict] = []
|
| 623 |
+
current_group_key: tuple[object, ...] | None = None
|
| 624 |
+
|
| 625 |
+
for row_idx, row in enumerate(student_df.itertuples(index=False)):
|
| 626 |
+
pid = _as_int(row.problem_id)
|
| 627 |
+
qid = problem_to_qid.get(pid)
|
| 628 |
+
if qid is None:
|
| 629 |
+
skipped_interactions += 1
|
| 630 |
+
continue
|
| 631 |
+
|
| 632 |
+
score = 1 if _as_float(row.discrete_score) >= 1.0 else 0
|
| 633 |
+
answer_text = _text(row.answer_text)
|
| 634 |
+
response = {
|
| 635 |
+
"question_id": qid,
|
| 636 |
+
"answer_text": answer_text,
|
| 637 |
+
"rubric_scores": [score],
|
| 638 |
+
}
|
| 639 |
+
num_questions += 1
|
| 640 |
+
|
| 641 |
+
if grouping_mode == "none":
|
| 642 |
+
time_steps.append(
|
| 643 |
+
{
|
| 644 |
+
"t": len(time_steps),
|
| 645 |
+
"responses": [response],
|
| 646 |
+
}
|
| 647 |
+
)
|
| 648 |
+
continue
|
| 649 |
+
|
| 650 |
+
row_end_time_raw = row.end_time
|
| 651 |
+
row_end_time: pd.Timestamp | None = (
|
| 652 |
+
None
|
| 653 |
+
if pd.isna(row_end_time_raw)
|
| 654 |
+
else cast(pd.Timestamp, row_end_time_raw)
|
| 655 |
+
)
|
| 656 |
+
|
| 657 |
+
group_key = get_calendar_group_key(
|
| 658 |
+
end_time=row_end_time,
|
| 659 |
+
grouping_mode=grouping_mode,
|
| 660 |
+
missing_idx=row_idx,
|
| 661 |
+
)
|
| 662 |
+
if time_steps and current_group_key == group_key:
|
| 663 |
+
time_steps[-1]["responses"].append(response)
|
| 664 |
+
continue
|
| 665 |
+
|
| 666 |
+
time_steps.append(
|
| 667 |
+
{
|
| 668 |
+
"t": len(time_steps),
|
| 669 |
+
"responses": [response],
|
| 670 |
+
}
|
| 671 |
+
)
|
| 672 |
+
current_group_key = group_key
|
| 673 |
+
|
| 674 |
+
if not time_steps:
|
| 675 |
+
continue
|
| 676 |
+
|
| 677 |
+
student_obj = {
|
| 678 |
+
"student_id": user_id,
|
| 679 |
+
"time_steps": time_steps,
|
| 680 |
+
}
|
| 681 |
+
|
| 682 |
+
if not first_student:
|
| 683 |
+
f.write(",")
|
| 684 |
+
json.dump(student_obj, f, ensure_ascii=False)
|
| 685 |
+
first_student = False
|
| 686 |
+
|
| 687 |
+
student_ids.append(str(user_id))
|
| 688 |
+
num_students += 1
|
| 689 |
+
num_time_steps += len(time_steps)
|
| 690 |
+
|
| 691 |
+
f.write("]}")
|
| 692 |
+
|
| 693 |
+
return (
|
| 694 |
+
student_ids,
|
| 695 |
+
num_students,
|
| 696 |
+
num_time_steps,
|
| 697 |
+
num_questions,
|
| 698 |
+
skipped_interactions,
|
| 699 |
+
)
|
| 700 |
+
|
| 701 |
+
|
| 702 |
+
def save_json(path: Path, obj: object) -> None:
|
| 703 |
+
path.parent.mkdir(parents=True, exist_ok=True)
|
| 704 |
+
with open(path, "w", encoding="utf-8") as f:
|
| 705 |
+
json.dump(obj, f, indent=2, ensure_ascii=False)
|
| 706 |
+
|
| 707 |
+
|
| 708 |
+
def main() -> None:
|
| 709 |
+
args = parse_args()
|
| 710 |
+
|
| 711 |
+
data_dir = args.data_dir
|
| 712 |
+
output_dir = args.output_dir
|
| 713 |
+
|
| 714 |
+
interactions_path = data_dir / "Interactions.csv"
|
| 715 |
+
problems_path = data_dir / "Problems.csv"
|
| 716 |
+
skill_set_path = data_dir / "Skill_Set.csv"
|
| 717 |
+
skills_path = data_dir / "Skills.csv"
|
| 718 |
+
|
| 719 |
+
for p in [interactions_path, problems_path, skill_set_path, skills_path]:
|
| 720 |
+
if not p.exists():
|
| 721 |
+
raise FileNotFoundError(f"Required input file not found: {p}")
|
| 722 |
+
|
| 723 |
+
print("Loading skills...")
|
| 724 |
+
skills, problem_to_skills, fallback_skill_id = load_skill_tables(
|
| 725 |
+
skills_path=skills_path,
|
| 726 |
+
skill_set_path=skill_set_path,
|
| 727 |
+
)
|
| 728 |
+
|
| 729 |
+
print("Loading and preprocessing problems...")
|
| 730 |
+
problems_df = load_and_preprocess_problems(problems_path)
|
| 731 |
+
|
| 732 |
+
print("Loading questions...")
|
| 733 |
+
questions, problem_to_qid, unmapped_questions = load_questions(
|
| 734 |
+
problems_df=problems_df,
|
| 735 |
+
problem_to_skills=problem_to_skills,
|
| 736 |
+
fallback_skill_id=fallback_skill_id,
|
| 737 |
+
)
|
| 738 |
+
|
| 739 |
+
print("Loading interactions...")
|
| 740 |
+
interactions_df = load_interactions(
|
| 741 |
+
interactions_path,
|
| 742 |
+
problem_meta_df=problems_df,
|
| 743 |
+
max_interactions=args.max_interactions,
|
| 744 |
+
)
|
| 745 |
+
|
| 746 |
+
print("Writing dataset.json...")
|
| 747 |
+
dataset_path = output_dir / "dataset.json"
|
| 748 |
+
(
|
| 749 |
+
student_ids,
|
| 750 |
+
num_students,
|
| 751 |
+
num_time_steps,
|
| 752 |
+
num_questions,
|
| 753 |
+
skipped_interactions,
|
| 754 |
+
) = write_dataset_json(
|
| 755 |
+
dataset_path=dataset_path,
|
| 756 |
+
skills=skills,
|
| 757 |
+
questions=questions,
|
| 758 |
+
interactions_df=interactions_df,
|
| 759 |
+
problem_to_qid=problem_to_qid,
|
| 760 |
+
grouping_mode=args.grouping_time,
|
| 761 |
+
save_unmapped_skills=(unmapped_questions > 0),
|
| 762 |
+
)
|
| 763 |
+
|
| 764 |
+
print("Building qmatrix.json...")
|
| 765 |
+
num_skills = len(skills) - int(unmapped_questions == 0)
|
| 766 |
+
qmatrix = build_qmatrix(questions, num_skills=num_skills)
|
| 767 |
+
save_json(output_dir / "qmatrix.json", qmatrix)
|
| 768 |
+
|
| 769 |
+
print("Building train/valid/test split files...")
|
| 770 |
+
train_ids, valid_ids, test_ids = split_student_ids(
|
| 771 |
+
student_ids=student_ids,
|
| 772 |
+
train_ratio=args.train_ratio,
|
| 773 |
+
valid_ratio=args.valid_ratio,
|
| 774 |
+
test_ratio=args.test_ratio,
|
| 775 |
+
seed=args.seed,
|
| 776 |
+
)
|
| 777 |
+
save_json(output_dir / "trainset.json", train_ids)
|
| 778 |
+
save_json(output_dir / "validset.json", valid_ids)
|
| 779 |
+
save_json(output_dir / "testset.json", test_ids)
|
| 780 |
+
|
| 781 |
+
total_rubrics = sum(len(q["rubrics"]) for q in questions)
|
| 782 |
+
question_skill_counts = [len(q.get("skill_ids", [])) for q in questions]
|
| 783 |
+
rubric_skill_counts = [
|
| 784 |
+
len(r.get("skill_ids", [])) for q in questions for r in q.get("rubrics", [])
|
| 785 |
+
]
|
| 786 |
+
avg_skills_per_question = (
|
| 787 |
+
sum(question_skill_counts) / len(question_skill_counts)
|
| 788 |
+
if question_skill_counts
|
| 789 |
+
else 0.0
|
| 790 |
+
)
|
| 791 |
+
avg_skills_per_rubric = (
|
| 792 |
+
sum(rubric_skill_counts) / len(rubric_skill_counts)
|
| 793 |
+
if rubric_skill_counts
|
| 794 |
+
else 0.0
|
| 795 |
+
)
|
| 796 |
+
avg_time_steps_per_student = (
|
| 797 |
+
num_time_steps / num_students if num_students > 0 else 0.0
|
| 798 |
+
)
|
| 799 |
+
avg_questions_per_timestep = (
|
| 800 |
+
num_questions / num_time_steps if num_time_steps > 0 else 0.0
|
| 801 |
+
)
|
| 802 |
+
|
| 803 |
+
print("\n=== Conversion Summary ===")
|
| 804 |
+
print(f"Skills: {num_skills}")
|
| 805 |
+
print(f"Questions: {len(questions)}")
|
| 806 |
+
print(f"Rubrics: {total_rubrics}")
|
| 807 |
+
print(f"Avg skills/question: {avg_skills_per_question:.3f}")
|
| 808 |
+
print(f"Avg skills/rubric: {avg_skills_per_rubric:.3f}")
|
| 809 |
+
print(f"Students: {num_students}")
|
| 810 |
+
print(f"Time steps: {num_time_steps}")
|
| 811 |
+
print(f"Avg timesteps/student: {avg_time_steps_per_student:.3f}")
|
| 812 |
+
print(f"Avg questions/timestep: {avg_questions_per_timestep:.3f}")
|
| 813 |
+
print(f"Grouping mode: {args.grouping_time}")
|
| 814 |
+
print(f"Unmapped questions: {unmapped_questions}")
|
| 815 |
+
print(f"Skipped interactions:{skipped_interactions}")
|
| 816 |
+
print(f"Output directory: {output_dir}")
|
| 817 |
+
|
| 818 |
+
|
| 819 |
+
if __name__ == "__main__":
|
| 820 |
+
main()
|
Code/qwen3next80bvllm_instruct.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Knowledge Tracing inference with Qwen3-Next-80B-A3B-Instruct model.
|
| 3 |
+
|
| 4 |
+
This is the standard instruction-following model (no thinking blocks).
|
| 5 |
+
Recommended sampling: temperature=0.7, top_p=0.8, top_k=20, min_p=0
|
| 6 |
+
|
| 7 |
+
Usage:
|
| 8 |
+
CUDA_VISIBLE_DEVICES=0,1,2,3 python qwen3next80bvllm_instruct.py \
|
| 9 |
+
--data-dir foundationalktdataset/ \
|
| 10 |
+
--num-gpus 4 \
|
| 11 |
+
--batch-size 10 \
|
| 12 |
+
--cache-dir /data1/ \
|
| 13 |
+
--num-students 500 \
|
| 14 |
+
--bin-size 50 \
|
| 15 |
+
--min-history 50
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
from kt_inference_base import run_inference
|
| 19 |
+
|
| 20 |
+
MODEL_CONFIG = {
|
| 21 |
+
"model_id": "Qwen/Qwen3-Next-80B-A3B-Instruct",
|
| 22 |
+
"gen_configs": {
|
| 23 |
+
"temperature": 0.7,
|
| 24 |
+
"top_p": 0.8,
|
| 25 |
+
"top_k": 20,
|
| 26 |
+
"min_p": 0.0,
|
| 27 |
+
"max_tokens": 32768,
|
| 28 |
+
"repetition_penalty": 1.0,
|
| 29 |
+
},
|
| 30 |
+
"output_prefix": "qwen3next80binstruct",
|
| 31 |
+
"system_prompt_prefix": "", # No prefix - standard instruct model
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
if __name__ == "__main__":
|
| 35 |
+
run_inference(MODEL_CONFIG)
|
Code/qwen3next80bvllm_thinking.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Knowledge Tracing inference with Qwen3-Next-80B-A3B-Thinking model.
|
| 3 |
+
|
| 4 |
+
This model has native thinking mode - it automatically generates <think>...</think> blocks.
|
| 5 |
+
Recommended sampling: temperature=0.6, top_p=0.95, top_k=20, min_p=0
|
| 6 |
+
|
| 7 |
+
Usage:
|
| 8 |
+
CUDA_VISIBLE_DEVICES=0,1,2,3 python qwen3next80bvllm_thinking.py \
|
| 9 |
+
--data-dir foundationalktdataset/ \
|
| 10 |
+
--num-gpus 4 \
|
| 11 |
+
--batch-size 10 \
|
| 12 |
+
--cache-dir /data1/ \
|
| 13 |
+
--num-students 500 \
|
| 14 |
+
--bin-size 50 \
|
| 15 |
+
--min-history 50
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
from kt_inference_base import run_inference
|
| 19 |
+
|
| 20 |
+
MODEL_CONFIG = {
|
| 21 |
+
"model_id": "Qwen/Qwen3-Next-80B-A3B-Thinking",
|
| 22 |
+
"gen_configs": {
|
| 23 |
+
"temperature": 0.6,
|
| 24 |
+
"top_p": 0.95,
|
| 25 |
+
"top_k": 20,
|
| 26 |
+
"min_p": 0.0,
|
| 27 |
+
"max_tokens": 32768,
|
| 28 |
+
"repetition_penalty": 1.0,
|
| 29 |
+
},
|
| 30 |
+
"output_prefix": "qwen3next80bthinking",
|
| 31 |
+
"system_prompt_prefix": "", # No prefix - model has native thinking
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
if __name__ == "__main__":
|
| 35 |
+
run_inference(MODEL_CONFIG)
|
Code/scripts.sh
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# !/bin/bash
|
| 2 |
+
|
| 3 |
+
# Student Time Gap Distribution
|
| 4 |
+
python plot_timegap_distribution.py \
|
| 5 |
+
--bin-time 5 \
|
| 6 |
+
--plot-upper-limit-minutes 240
|
| 7 |
+
|
| 8 |
+
# Student Attempt Distribution
|
| 9 |
+
python plot_student_attempt_distribution.py \
|
| 10 |
+
--bin-time 1440
|
| 11 |
+
|
| 12 |
+
python plot_student_attempt_distribution.py \
|
| 13 |
+
--bin-time 10800
|
| 14 |
+
|
| 15 |
+
python plot_student_attempt_distribution.py \
|
| 16 |
+
--bin-time 43200
|
| 17 |
+
|
| 18 |
+
# Total Time Distribution
|
| 19 |
+
python plot_totaltime_distribution.py \
|
| 20 |
+
--bin-time 10800
|
| 21 |
+
|
| 22 |
+
# Preprocess
|
| 23 |
+
python process_to_single_file.py \
|
| 24 |
+
--grouping-time 10800
|
Data/CASE-Common Core State Standards for Math.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Data/Interactions.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:671e97d320d0cf9b7e2bd75830d531cfbd95d307a1a9a590531934ad0d3d8ba4
|
| 3 |
+
size 245145636
|
Data/Math_Standards1.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2d28ded26c7394f55525550e32bb96786da2e9a3276ccca8873e80ebcdebab11
|
| 3 |
+
size 1242082
|
Data/Problems.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Data/Skill_Set.csv
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
index,skill_code,full_description
|
| 2 |
+
1,2.MD.B.5,"Use addition and subtraction within 100 to solve word problems involving lengths that are given in the same units, e.g., by using drawings (such as drawings of rulers) and equations with a symbol for the unknown number to represent the problem."
|
| 3 |
+
2,3.MD.B.3,"Draw a scaled picture graph and a scaled bar graph to represent a data set with several categories. Solve one- and two-step ""how many more"" and ""how many less"" problems using information presented in scaled bar graphs. For example, draw a bar graph in which each square in the bar graph might represent 5 pets."
|
| 4 |
+
3,3.MD.C.6,"Measure areas by counting unit squares (square cm, square m, square in, square ft, and improvised units)."
|
| 5 |
+
4,3.MD.C.7a,"Find the area of a rectangle with whole-number side lengths by tiling it, and show that the area is the same as would be found by multiplying the side lengths."
|
| 6 |
+
5,3.MD.C.7d,"Recognize area as additive. Find areas of rectilinear figures by decomposing them into non-overlapping rectangles and adding the areas of the non-overlapping parts, applying this technique to solve real world problems."
|
| 7 |
+
6,3.MD.D.8,"Solve real world and mathematical problems involving perimeters of polygons, including finding the perimeter given the side lengths, finding an unknown side length, and exhibiting rectangles with the same perimeter and different areas or with the same area and different perimeters."
|
| 8 |
+
7,3.NF.A.2a,Represent a fraction 1/b on a number line diagram by defining the interval from 0 to 1 as the whole and partitioning it into b equal parts. Recognize that each part has size 1/b and that the endpoint of the part based at 0 locates the number 1/b on the number line.
|
| 9 |
+
8,3.NF.A.2b,Represent a fraction a/b on a number line diagram by marking off a lengths 1/b from 0. Recognize that the resulting interval has size a/b and that its endpoint locates the number a/b on the number line.
|
| 10 |
+
9,3.NF.A.3b,"Recognize and generate simple equivalent fractions, e.g., 1/2 = 2/4, 4/6 = 2/3). Explain why the fractions are equivalent, e.g., by using a visual fraction model."
|
| 11 |
+
10,3.NF.A.3d,"Compare two fractions with the same numerator or the same denominator by reasoning about their size. Recognize that comparisons are valid only when the two fractions refer to the same whole. Record the results of comparisons with the symbols >, =, or <, and justify the conclusions, e.g., by using a visual fraction model."
|
| 12 |
+
11,3.OA.A.1,"Interpret products of whole numbers, e.g., interpret 5 × 7 as the total number of objects in 5 groups of 7 objects each. For example, describe a context in which a total number of objects can be expressed as 5 × 7."
|
| 13 |
+
12,3.OA.A.4,"Determine the unknown whole number in a multiplication or division equation relating three whole numbers. For example, determine the unknown number that makes the equation true in each of the equations 8 × ? = 48, 5 = _ ÷ 3, 6 × 6 = ?."
|
| 14 |
+
13,3.OA.B.5,Apply properties of operations as strategies to multiply and divide.
|
| 15 |
+
14,4.G.A.1,"Draw points, lines, line segments, rays, angles (right, acute, obtuse), and perpendicular and parallel lines. Identify these in two-dimensional figures."
|
| 16 |
+
15,4.G.A.2,"Classify two-dimensional figures based on the presence or absence of parallel or perpendicular lines, or the presence or absence of angles of a specified size. Recognize right triangles as a category, and identify right triangles."
|
| 17 |
+
16,4.MD.A.1,"Know relative sizes of measurement units within one system of units including km, m, cm; kg, g; lb, oz.; l, ml; hr, min, sec. Within a single system of measurement, express measurements in a larger unit in terms of a smaller unit. Record measurement equivalents in a two column table. For example, know that 1 ft is 12 times as long as 1 in. Express the length of a 4 ft snake as 48 in. Generate a conversion table for feet and inches listing the number pairs (1, 12), (2, 24), (3, 36), …"
|
| 18 |
+
17,4.MD.A.3,"Apply the area and perimeter formulas for rectangles in real world and mathematical problems. For example, find the width of a rectangular room given the area of the flooring and the length, by viewing the area formula as a multiplication equation with an unknown factor."
|
| 19 |
+
18,4.NBT.A.1,"Recognize that in a multi-digit whole number, a digit in one place represents ten times what it represents in the place to its right. For example, recognize that 700 ÷ 70 = 10 by applying concepts of place value and division."
|
| 20 |
+
19,4.NBT.B.6,"Find whole-number quotients and remainders with up to four-digit dividends and one-digit divisors, using strategies based on place value, the properties of operations, and/or the relationship between multiplication and division. Illustrate and explain the calculation by using equations, rectangular arrays, and/or area models."
|
| 21 |
+
20,4.NF.A.1,"Explain why a fraction a/b is equivalent to a fraction (n × a)/(n × b) by using visual fraction models, with attention to how the number and size of the parts differ even though the two fractions themselves are the same size. Use this principle to recognize and generate equivalent fractions."
|
| 22 |
+
21,4.NF.A.2,"Compare two fractions with different numerators and different denominators, e.g., by creating common denominators or numerators, or by comparing to a benchmark fraction such as 1/2. Recognize that comparisons are valid only when the two fractions refer to the same whole. Record the results of comparisons with symbols >, =, or <, and justify the conclusions, e.g., by using a visual fraction model."
|
| 23 |
+
22,4.NF.B.4b,"Understand a multiple of a/b as a multiple of 1/b, and use this understanding to multiply a fraction by a whole number. For example, use a visual fraction model to express 3 × (2/5) as 6 × (1/5), recognizing this product as 6/5. (In general, n × (a/b) = (n × a)/b.)"
|
| 24 |
+
23,4.NF.C.6,"Use decimal notation for fractions with denominators 10 or 100. For example, rewrite 0.62 as 62/100; describe a length as 0.62 meters; locate 0.62 on a number line diagram."
|
| 25 |
+
24,4.OA.A.1,"Interpret a multiplication equation as a comparison, e.g., interpret 35 = 5 × 7 as a statement that 35 is 5 times as many as 7 and 7 times as many as 5. Represent verbal statements of multiplicative comparisons as multiplication equations."
|
| 26 |
+
25,4.OA.B.4,Find all factor pairs for a whole number in the range 1—100. Recognize that a whole number is a multiple of each of its factors. Determine whether a given whole number in the range 1—100 is a multiple of a given one-digit number. Determine whether a given whole number in the range 1—100 is prime or composite.
|
| 27 |
+
26,5.G.A.1,"Use a pair of perpendicular number lines, called axes, to define a coordinate system, with the intersection of the lines (the origin) arranged to coincide with the 0 on each line and a given point in the plane located by using an ordered pair of numbers, called its coordinates. Understand that the first number indicates how far to travel from the origin in the direction of one axis, and the second number indicates how far to travel in the direction of the second axis, with the convention that the names of the two axes and the coordinates correspond (e.g., x-axis and x-coordinate, y-axis and y-coordinate)."
|
| 28 |
+
27,5.G.A.2,"Represent real world and mathematical problems by graphing points in the first quadrant of the coordinate plane, and interpret coordinate values of points in the context of the situation."
|
| 29 |
+
28,5.G.B.4,Classify two-dimensional figures in a hierarchy based on properties.
|
| 30 |
+
29,5.MD.A.1,"Convert among different-sized standard measurement units within a given measurement system (e.g., convert 5 cm to 0.05 m), and use these conversions in solving multi-step, real world problems."
|
| 31 |
+
30,5.MD.B.2,"Make a line plot to display a data set of measurements in fractions of a unit (1/2, 1/4, 1/8). Use operations on fractions for this grade to solve problems involving information presented in line plots. For example, given different measurements of liquid in identical beakers, find the amount of liquid each beaker would contain if the total amount in all the beakers were redistributed equally."
|
| 32 |
+
31,5.MD.C.3b,A solid figure which can be packed without gaps or overlaps using n unit cubes is said to have a volume of n cubic units.
|
| 33 |
+
32,5.MD.C.4,"Measure volumes by counting unit cubes, using cubic cm, cubic in, cubic ft, and improvised units."
|
| 34 |
+
33,5.MD.C.5a,"Find the volume of a right rectangular prism with whole-number side lengths by packing it with unit cubes, and show that the volume is the same as would be found by multiplying the edge lengths, equivalently by multiplying the height by the area of the base. Represent threefold whole-number products as volumes, e.g., to represent the associative property of multiplication."
|
| 35 |
+
34,5.MD.C.5b,Apply the formulas V = l × w × h and V = b × h for rectangular prisms to find volumes of right rectangular prisms with whole-number edge lengths in the context of solving real world and mathematical problems.
|
| 36 |
+
35,5.NBT.A.1,"Recognize that in a multi-digit number, a digit in one place represents 10 times as much as it represents in the place to its right and 1/10 of what it represents in the place to its left."
|
| 37 |
+
36,5.NBT.A.2,"Explain patterns in the number of zeros of the product when multiplying a number by powers of 10, and explain patterns in the placement of the decimal point when a decimal is multiplied or divided by a power of 10. Use whole-number exponents to denote powers of 10."
|
| 38 |
+
37,5.NBT.A.3b,"Compare two decimals to thousandths based on meanings of the digits in each place, using >, =, and < symbols to record the results of comparisons."
|
| 39 |
+
38,5.NBT.B.5,Fluently multiply multi-digit whole numbers using the standard algorithm.
|
| 40 |
+
39,5.NBT.B.6,"Find whole-number quotients of whole numbers with up to four-digit dividends and two-digit divisors, using strategies based on place value, the properties of operations, and/or the relationship between multiplication and division. Illustrate and explain the calculation by using equations, rectangular arrays, and/or area models."
|
| 41 |
+
40,5.NBT.B.7,"Add, subtract, multiply, and divide decimals to hundredths, using concrete models or drawings and strategies based on place value, properties of operations, and/or the relationship between addition and subtraction; relate the strategy to a written method and explain the reasoning used."
|
| 42 |
+
41,5.NF.A.1,"Add and subtract fractions with unlike denominators (including mixed numbers) by replacing given fractions with equivalent fractions in such a way as to produce an equivalent sum or difference of fractions with like denominators. For example, 2/3 + 5/4 = 8/12 + 15/12 = 23/12. (In general, a/b + c/d = (ad + bc)/bd.)"
|
| 43 |
+
42,5.NF.A.2,"Solve word problems involving addition and subtraction of fractions referring to the same whole, including cases of unlike denominators, e.g., by using visual fraction models or equations to represent the problem. Use benchmark fractions and number sense of fractions to estimate mentally and assess the reasonableness of answers. For example, recognize an incorrect result 2/5 + 1/2 = 3/7, by observing that 3/7"
|
| 44 |
+
43,5.NF.B.3,"Interpret a fraction as division of the numerator by the denominator (a/b = a ÷ b). Solve word problems involving division of whole numbers leading to answers in the form of fractions or mixed numbers, e.g., by using visual fraction models or equations to represent the problem. For example, interpret 3/4 as the result of dividing 3 by 4, noting that 3/4 multiplied by 4 equals 3, and that when 3 wholes are shared equally among 4 people each person has a share of size 3/4. If 9 people want to share a 50-pound sack of rice equally by weight, how many pounds of rice should each person get? Between what two whole numbers does your answer lie?"
|
| 45 |
+
44,5.NF.B.4a,"Interpret the product (a/b) × q as a parts of a partition of q into b equal parts; equivalently, as the result of a sequence of operations a × q ÷ b. For example, use a visual fraction model to show (2/3) × 4 = 8/3, and create a story context for this equation. Do the same with (2/3) × (4/5) = 8/15. (In general, (a/b) × (c/d) = ac/bd.)"
|
| 46 |
+
45,5.NF.B.4b,"Find the area of a rectangle with fractional side lengths by tiling it with unit squares of the appropriate unit fraction side lengths, and show that the area is the same as would be found by multiplying the side lengths. Multiply fractional side lengths to find areas of rectangles, and represent fraction products as rectangular areas."
|
| 47 |
+
46,5.NF.B.5a,"Comparing the size of a product to the size of one factor on the basis of the size of the other factor, without performing the indicated multiplication."
|
| 48 |
+
47,5.NF.B.5b,Explaining why multiplying a given number by a fraction greater than 1 results in a product greater than the given number (recognizing multiplication by whole numbers greater than 1 as a familiar case); explaining why multiplying a given number by a fraction less than 1 results in a product smaller than the given number; and relating the principle of fraction equivalence a/b = (n×a)/(n×b) to the effect of multiplying a/b by 1.
|
| 49 |
+
48,5.NF.B.6,"Solve real world problems involving multiplication of fractions and mixed numbers, e.g., by using visual fraction models or equations to represent the problem."
|
| 50 |
+
49,5.NF.B.7a,"Interpret division of a unit fraction by a non-zero whole number, and compute such quotients. For example, create a story context for (1/3) ÷ 4, and use a visual fraction model to show the quotient. Use the relationship between multiplication and division to explain that (1/3) ÷ 4 = 1/12 because (1/12) × 4 = 1/3."
|
| 51 |
+
50,5.NF.B.7b,"Interpret division of a whole number by a unit fraction, and compute such quotients. For example, create a story context for 4 ÷ (1/5), and use a visual fraction model to show the quotient. Use the relationship between multiplication and division to explain that 4 ÷ (1/5) = 20 because 20 × (1/5) = 4."
|
| 52 |
+
51,5.NF.B.7c,"Solve real world problems involving division of unit fractions by non-zero whole numbers and division of whole numbers by unit fractions, e.g., by using visual fraction models and equations to represent the problem. For example, how much chocolate will each person get if 3 people share 1/2 lb of chocolate equally? How many 1/3-cup servings are in 2 cups of raisins?"
|
| 53 |
+
52,5.OA.A.1,"Use parentheses, brackets, or braces in numerical expressions, and evaluate expressions with these symbols."
|
| 54 |
+
53,5.OA.A.2,"Write simple expressions that record calculations with numbers, and interpret numerical expressions without evaluating them. For example, express the calculation ""add 8 and 7, then multiply by 2"" as 2 × (8 + 7). Recognize that 3 × (18932 + 921) is three times as large as 18932 + 921, without having to calculate the indicated sum or product."
|
| 55 |
+
54,6.EE.A.1,Write and evaluate numerical expressions involving whole-number exponents.
|
| 56 |
+
55,6.EE.A.2a,"Write expressions that record operations with numbers and with letters standing for numbers. For example, express the calculation ""Subtract y from 5"" as 5 - y."
|
| 57 |
+
56,6.EE.A.2b,"Identify parts of an expression using mathematical terms (sum, term, product, factor, quotient, coefficient); view one or more parts of an expression as a single entity. For example, describe the expression 2 (8 + 7) as a product of two factors; view (8 + 7) as both a single entity and a sum of two terms."
|
| 58 |
+
57,6.EE.A.2c,"Evaluate expressions at specific values of their variables. Include expressions that arise from formulas used in real-world problems. Perform arithmetic operations, including those involving whole-number exponents, in the conventional order when there are no parentheses to specify a particular order (Order of Operations). For example, use the formulas V = s³ and A = 6 s² to find the volume and surface area of a cube with sides of length s = 1/2."
|
| 59 |
+
58,6.EE.A.3,"Apply the properties of operations to generate equivalent expressions. For example, apply the distributive property to the expression 3 (2 + x) to produce the equivalent expression 6 + 3x; apply the distributive property to the expression 24x + 18y to produce the equivalent expression 6 (4x + 3y); apply properties of operations to y + y + y to produce the equivalent expression 3y."
|
| 60 |
+
59,6.EE.A.4,"Identify when two expressions are equivalent (i.e., when the two expressions name the same number regardless of which value is substituted into them). For example, the expressions y + y + y and 3y are equivalent because they name the same number regardless of which number y stands for."
|
| 61 |
+
60,6.EE.B.5,"Understand solving an equation or inequality as a process of answering a question: which values from a specified set, if any, make the equation or inequality true? Use substitution to determine whether a given number in a specified set makes an equation or inequality true."
|
| 62 |
+
61,6.EE.B.6,"Use variables to represent numbers and write expressions when solving a real-world or mathematical problem; understand that a variable can represent an unknown number, or, depending on the purpose at hand, any number in a specified set."
|
| 63 |
+
62,6.EE.B.7,"Solve real-world and mathematical problems by writing and solving equations of the form x + p = q and px = q for cases in which p, q and x are all nonnegative rational numbers."
|
| 64 |
+
63,6.EE.B.8,Write an inequality of the form x > c or x c or x < c have infinitely many solutions; represent solutions of such inequalities on number line diagrams.
|
| 65 |
+
64,6.EE.C.9,"Use variables to represent two quantities in a real-world problem that change in relationship to one another; write an equation to express one quantity, thought of as the dependent variable, in terms of the other quantity, thought of as the independent variable. Analyze the relationship between the dependent and independent variables using graphs and tables, and relate these to the equation. For example, in a problem involving motion at constant speed, list and graph ordered pairs of distances and times, and write the equation d = 65t to represent the relationship between distance and time."
|
| 66 |
+
65,6.G.A.1,"Find the area of right triangles, other triangles, special quadrilaterals, and polygons by composing into rectangles or decomposing into triangles and other shapes; apply these techniques in the context of solving real-world and mathematical problems."
|
| 67 |
+
66,6.G.A.2,"Find the volume of a right rectangular prism with fractional edge lengths by packing it with unit cubes of the appropriate unit fraction edge lengths, and show that the volume is the same as would be found by multiplying the edge lengths of the prism. Apply the formulas V = l w h and V = b h to find volumes of right rectangular prisms with fractional edge lengths in the context of solving real-world and mathematical problems."
|
| 68 |
+
67,6.G.A.3,Draw polygons in the coordinate plane given coordinates for the vertices; use coordinates to find the length of a side joining points with the same first coordinate or the same second coordinate. Apply these techniques in the context of solving real-world and mathematical problems.
|
| 69 |
+
68,6.G.A.4,"Represent three-dimensional figures using nets made up of rectangles and triangles, and use the nets to find the surface area of these figures. Apply these techniques in the context of solving real-world and mathematical problems."
|
| 70 |
+
69,6.NS.A.1,"Interpret and compute quotients of fractions, and solve word problems involving division of fractions by fractions, e.g., by using visual fraction models and equations to represent the problem. For example, create a story context for (2/3) ÷ (3/4) and use a visual fraction model to show the quotient; use the relationship between multiplication and division to explain that (2/3) ÷ (3/4) = 8/9 because 3/4 of 8/9 is 2/3. (In general, (a/b) ÷ (c/d) = ad/bc.) How much chocolate will each person get if 3 people share 1/2 lb of chocolate equally? How many 3/4-cup servings are in 2/3 of a cup of yogurt? How wide is a rectangular strip of land with length 3/4 mi and area 1/2 square mi?"
|
| 71 |
+
70,6.NS.B.2,Fluently divide multi-digit numbers using the standard algorithm.
|
| 72 |
+
71,6.NS.B.3,"Fluently add, subtract, multiply, and divide multi-digit decimals using the standard algorithm for each operation."
|
| 73 |
+
72,6.NS.B.4,"Find the greatest common factor of two whole numbers less than or equal to 100 and the least common multiple of two whole numbers less than or equal to 12. Use the distributive property to express a sum of two whole numbers 1—100 with a common factor as a multiple of a sum of two whole numbers with no common factor. For example, express 36 + 8 as 4 (9 + 2)."
|
| 74 |
+
73,6.NS.C.5,"Understand that positive and negative numbers are used together to describe quantities having opposite directions or values (e.g., temperature above/below zero, elevation above/below sea level, credits/debits, positive/negative electric charge); use positive and negative numbers to represent quantities in real-world contexts, explaining the meaning of 0 in each situation."
|
| 75 |
+
74,6.NS.C.6a,"Recognize opposite signs of numbers as indicating locations on opposite sides of 0 on the number line; recognize that the opposite of the opposite of a number is the number itself, e.g., -(-3) = 3, and that 0 is its own opposite."
|
| 76 |
+
75,6.NS.C.6c,Find and position integers and other rational numbers on a horizontal or vertical number line diagram; find and position pairs of integers and other rational numbers on a coordinate plane.
|
| 77 |
+
76,6.NS.C.7a,"Interpret statements of inequality as statements about the relative position of two numbers on a number line diagram. For example, interpret -3 > -7 as a statement that -3 is located to the right of -7 on a number line oriented from left to right."
|
| 78 |
+
77,6.NS.C.7b,"Write, interpret, and explain statements of order for rational numbers in real-world contexts. For example, write -3 °C > -7 °C to express the fact that -3 °C is warmer than -7 °C."
|
| 79 |
+
78,6.NS.C.7c,"Understand the absolute value of a rational number as its distance from 0 on the number line; interpret absolute value as magnitude for a positive or negative quantity in a real-world situation. For example, for an account balance of -30 dollars, write |-30| = 30 to describe the size of the debt in dollars."
|
| 80 |
+
79,6.NS.C.7d,"Distinguish comparisons of absolute value from statements about order. For example, recognize that an account balance less than -30 dollars represents a debt greater than 30 dollars."
|
| 81 |
+
80,6.NS.C.8,Solve real-world and mathematical problems by graphing points in all four quadrants of the coordinate plane. Include use of coordinates and absolute value to find distances between points with the same first coordinate or the same second coordinate.
|
| 82 |
+
81,6.RP.A.1,"Understand the concept of a ratio and use ratio language to describe a ratio relationship between two quantities. For example, ""The ratio of wings to beaks in the bird house at the zoo was 2:1, because for every 2 wings there was 1 beak."" ""For every vote candidate A received, candidate C received nearly three votes."""
|
| 83 |
+
82,6.RP.A.2,"Understand the concept of a unit rate a/b associated with a ratio a:b with b ≠ 0, and use rate language in the context of a ratio relationship."
|
| 84 |
+
83,6.RP.A.3a,"Make tables of equivalent ratios relating quantities with whole number measurements, find missing values in the tables, and plot the pairs of values on the coordinate plane. Use tables to compare ratios."
|
| 85 |
+
84,6.RP.A.3b,"Solve unit rate problems including those involving unit pricing and constant speed. For example, if it took 7 hours to mow 4 lawns, then at that rate, how many lawns could be mowed in 35 hours? At what rate were lawns being mowed?"
|
| 86 |
+
85,6.RP.A.3c,"Find a percent of a quantity as a rate per 100 (e.g., 30% of a quantity means 30/100 times the quantity); solve problems involving finding the whole, given a part and the percent."
|
| 87 |
+
86,6.RP.A.3d,Use ratio reasoning to convert measurement units; manipulate and transform units appropriately when multiplying or dividing quantities.
|
| 88 |
+
87,6.SP.A.1,"Recognize a statistical question as one that anticipates variability in the data related to the question and accounts for it in the answers. For example, ""How old am I?"" is not a statistical question, but ""How old are the students in my school?"" is a statistical question because one anticipates variability in students' ages."
|
| 89 |
+
88,6.SP.A.2,"Understand that a set of data collected to answer a statistical question has a distribution which can be described by its center, spread, and overall shape."
|
| 90 |
+
89,6.SP.A.3,"Recognize that a measure of center for a numerical data set summarizes all of its values with a single number, while a measure of variation describes how its values vary with a single number."
|
| 91 |
+
90,6.SP.B.4,"Display numerical data in plots on a number line, including dot plots, histograms, and box plots."
|
| 92 |
+
91,6.SP.B.5a,Reporting the number of observations.
|
| 93 |
+
92,6.SP.B.5b,"Describing the nature of the attribute under investigation, including how it was measured and its units of measurement."
|
| 94 |
+
93,6.SP.B.5c,"Giving quantitative measures of center (median and/or mean) and variability (interquartile range and/or mean absolute deviation), as well as describing any overall pattern and any striking deviations from the overall pattern with reference to the context in which the data were gathered."
|
| 95 |
+
94,6.SP.B.5d,Relating the choice of measures of center and variability to the shape of the data distribution and the context in which the data were gathered.
|
| 96 |
+
95,7.EE.A.1,"Apply properties of operations as strategies to add, subtract, factor, and expand linear expressions with rational coefficients."
|
| 97 |
+
96,7.EE.A.2,"Understand that rewriting an expression in different forms in a problem context can shed light on the problem and how the quantities in it are related. For example, a + 0.05a = 1.05a means that ""increase by 5%"" is the same as ""multiply by 1.05."""
|
| 98 |
+
97,7.EE.B.3,"Solve multi-step real-life and mathematical problems posed with positive and negative rational numbers in any form (whole numbers, fractions, and decimals), using tools strategically. Apply properties of operations to calculate with numbers in any form; convert between forms as appropriate; and assess the reasonableness of answers using mental computation and estimation strategies. For example: If a woman making $25 an hour gets a 10% raise, she will make an additional 1/10 of her salary an hour, or $2.50, for a new salary of $27.50. If you want to place a towel bar 9 3/4 inches long in the center of a door that is 27 1/2 inches wide, you will need to place the bar about 9 inches from each edge; this estimate can be used as a check on the exact computation."
|
| 99 |
+
98,7.EE.B.4a,"Solve word problems leading to equations of the form px + q = r and p(x + q) = r, where p, q, and r are specific rational numbers. Solve equations of these forms fluently. Compare an algebraic solution to an arithmetic solution, identifying the sequence of the operations used in each approach. For example, the perimeter of a rectangle is 54 cm. Its length is 6 cm. What is its width?"
|
| 100 |
+
99,7.EE.B.4b,"Solve word problems leading to inequalities of the form px + q > r or px + q For example: As a salesperson, you are paid $50 per week plus $3 per sale. This week you want your pay to be at least $100. Write an inequality for the number of sales you need to make, and describe the solutions."
|
| 101 |
+
100,7.G.A.1,"Solve problems involving scale drawings of geometric figures, including computing actual lengths and areas from a scale drawing and reproducing a scale drawing at a different scale."
|
| 102 |
+
101,7.G.A.2,"Draw (freehand, with ruler and protractor, and with technology) geometric shapes with given conditions. Focus on constructing triangles from three measures of angles or sides, noticing when the conditions determine a unique triangle, more than one triangle, or no triangle."
|
| 103 |
+
102,7.G.A.3,"Describe the two-dimensional figures that result from slicing three-dimensional figures, as in plane sections of right rectangular prisms and right rectangular pyramids."
|
| 104 |
+
103,7.G.B.4,Know the formulas for the area and circumference of a circle and use them to solve problems; give an informal derivation of the relationship between the circumference and area of a circle.
|
| 105 |
+
104,7.G.B.5,"Use facts about supplementary, complementary, vertical, and adjacent angles in a multi-step problem to write and solve simple equations for an unknown angle in a figure."
|
| 106 |
+
105,7.G.B.6,"Solve real-world and mathematical problems involving area, volume and surface area of two- and three-dimensional objects composed of triangles, quadrilaterals, polygons, cubes, and right prisms."
|
| 107 |
+
106,7.NS.A.1a,"Describe situations in which opposite quantities combine to make 0. For example, a hydrogen atom has 0 charge because its two constituents are oppositely charged."
|
| 108 |
+
107,7.NS.A.1b,"Understand p + q as the number located a distance |q| from p, in the positive or negative direction depending on whether q is positive or negative. Show that a number and its opposite have a sum of 0 (are additive inverses). Interpret sums of rational numbers by describing real-world contexts."
|
| 109 |
+
108,7.NS.A.1c,"Understand subtraction of rational numbers as adding the additive inverse, p - q = p + (-q). Show that the distance between two rational numbers on the number line is the absolute value of their difference, and apply this principle in real-world contexts."
|
| 110 |
+
109,7.NS.A.1d,Apply properties of operations as strategies to add and subtract rational numbers.
|
| 111 |
+
110,7.NS.A.2a,"Understand that multiplication is extended from fractions to rational numbers by requiring that operations continue to satisfy the properties of operations, particularly the distributive property, leading to products such as (-1)(-1) = 1 and the rules for multiplying signed numbers. Interpret products of rational numbers by describing real-world contexts."
|
| 112 |
+
111,7.NS.A.2b,"Understand that integers can be divided, provided that the divisor is not zero, and every quotient of integers (with non-zero divisor) is a rational number. If p and q are integers, then -(p/q) = (-p)/q = p/(-q). Interpret quotients of rational numbers by describing real-world contexts."
|
| 113 |
+
112,7.NS.A.2c,Apply properties of operations as strategies to multiply and divide rational numbers.
|
| 114 |
+
113,7.NS.A.2d,Convert a rational number to a decimal using long division; know that the decimal form of a rational number terminates in 0s or eventually repeats.
|
| 115 |
+
114,7.NS.A.3,Solve real-world and mathematical problems involving the four operations with rational numbers.
|
| 116 |
+
115,7.RP.A.1,"Compute unit rates associated with ratios of fractions, including ratios of lengths, areas and other quantities measured in like or different units. For example, if a person walks 1/2 mile in each 1/4 hour, compute the unit rate as the complex fraction 1/2/1/4 miles per hour, equivalently 2 miles per hour."
|
| 117 |
+
116,7.RP.A.2a,"Decide whether two quantities are in a proportional relationship, e.g., by testing for equivalent ratios in a table or graphing on a coordinate plane and observing whether the graph is a straight line through the origin."
|
| 118 |
+
117,7.RP.A.2b,"Identify the constant of proportionality (unit rate) in tables, graphs, equations, diagrams, and verbal descriptions of proportional relationships."
|
| 119 |
+
118,7.RP.A.2c,"Represent proportional relationships by equations. For example, if total cost t is proportional to the number n of items purchased at a constant price p, the relationship between the total cost and the number of items can be expressed as t = pn."
|
| 120 |
+
119,7.RP.A.2d,"Explain what a point (x, y) on the graph of a proportional relationship means in terms of the situation, with special attention to the points (0, 0) and (1, r) where r is the unit rate."
|
| 121 |
+
120,7.RP.A.3,Use proportional relationships to solve multistep ratio and percent problems.
|
| 122 |
+
121,7.SP.A.1,Understand that statistics can be used to gain information about a population by examining a sample of the population; generalizations about a population from a sample are valid only if the sample is representative of that population. Understand that random sampling tends to produce representative samples and support valid inferences.
|
| 123 |
+
122,7.SP.A.2,"Use data from a random sample to draw inferences about a population with an unknown characteristic of interest. Generate multiple samples (or simulated samples) of the same size to gauge the variation in estimates or predictions. For example, estimate the mean word length in a book by randomly sampling words from the book; predict the winner of a school election based on randomly sampled survey data. Gauge how far off the estimate or prediction might be."
|
| 124 |
+
123,7.SP.B.3,"Informally assess the degree of visual overlap of two numerical data distributions with similar variabilities, measuring the difference between the centers by expressing it as a multiple of a measure of variability. For example, the mean height of players on the basketball team is 10 cm greater than the mean height of players on the soccer team, about twice the variability (mean absolute deviation) on either team; on a dot plot, the separation between the two distributions of heights is noticeable."
|
| 125 |
+
124,7.SP.B.4,"Use measures of center and measures of variability for numerical data from random samples to draw informal comparative inferences about two populations. For example, decide whether the words in a chapter of a seventh-grade science book are generally longer than the words in a chapter of a fourth-grade science book."
|
| 126 |
+
125,7.SP.C.5,"Understand that the probability of a chance event is a number between 0 and 1 that expresses the likelihood of the event occurring. Larger numbers indicate greater likelihood. A probability near 0 indicates an unlikely event, a probability around 1/2 indicates an event that is neither unlikely nor likely, and a probability near 1 indicates a likely event."
|
| 127 |
+
126,7.SP.C.6,"Approximate the probability of a chance event by collecting data on the chance process that produces it and observing its long-run relative frequency, and predict the approximate relative frequency given the probability. For example, when rolling a number cube 600 times, predict that a 3 or 6 would be rolled roughly 200 times, but probably not exactly 200 times."
|
| 128 |
+
127,7.SP.C.7a,"Develop a uniform probability model by assigning equal probability to all outcomes, and use the model to determine probabilities of events. For example, if a student is selected at random from a class, find the probability that Jane will be selected and the probability that a girl will be selected."
|
| 129 |
+
128,7.SP.C.7b,"Develop a probability model (which may not be uniform) by observing frequencies in data generated from a chance process. For example, find the approximate probability that a spinning penny will land heads up or that a tossed paper cup will land open-end down. Do the outcomes for the spinning penny appear to be equally likely based on the observed frequencies?"
|
| 130 |
+
129,7.SP.C.8a,"Understand that, just as with simple events, the probability of a compound event is the fraction of outcomes in the sample space for which the compound event occurs."
|
| 131 |
+
130,7.SP.C.8b,"Represent sample spaces for compound events using methods such as organized lists, tables and tree diagrams. For an event described in everyday language (e.g., ""rolling double sixes""), identify the outcomes in the sample space which compose the event."
|
| 132 |
+
131,7.SP.C.8c,"Design and use a simulation to generate frequencies for compound events. For example, use random digits as a simulation tool to approximate the answer to the question: If 40% of donors have type A blood, what is the probability that it will take at least 4 donors to find one with type A blood?"
|
| 133 |
+
132,8.EE.A.1,"Know and apply the properties of integer exponents to generate equivalent numerical expressions. For example, 3² × 3-5 = 3-3 = 1/3³ = 1/27."
|
| 134 |
+
133,8.EE.A.2,"Use square root and cube root symbols to represent solutions to equations of the form x² = p and x³ = p, where p is a positive rational number. Evaluate square roots of small perfect squares and cube roots of small perfect cubes. Know that √2 is irrational."
|
| 135 |
+
134,8.EE.A.3,"Use numbers expressed in the form of a single digit times an integer power of 10 to estimate very large or very small quantities, and to express how many times as much one is than the other. For example, estimate the population of the United States as 3 × 108 and the population of the world as 7 × 109, and determine that the world population is more than 20 times larger."
|
| 136 |
+
135,8.EE.A.4,"Perform operations with numbers expressed in scientific notation, including problems where both decimal and scientific notation are used. Use scientific notation and choose units of appropriate size for measurements of very large or very small quantities (e.g., use millimeters per year for seafloor spreading). Interpret scientific notation that has been generated by technology."
|
| 137 |
+
136,8.EE.B.5,"Graph proportional relationships, interpreting the unit rate as the slope of the graph. Compare two different proportional relationships represented in different ways. For example, compare a distance-time graph to a distance-time equation to determine which of two moving objects has greater speed."
|
| 138 |
+
137,8.EE.B.6,Use similar triangles to explain why the slope m is the same between any two distinct points on a non-vertical line in the coordinate plane; derive the equation y = mx for a line through the origin and the equation y = mx + b for a line intercepting the vertical axis at b.
|
| 139 |
+
138,8.EE.C.7a,"Give examples of linear equations in one variable with one solution, infinitely many solutions, or no solutions. Show which of these possibilities is the case by successively transforming the given equation into simpler forms, until an equivalent equation of the form x = a, a = a, or a = b results (where a and b are different numbers)."
|
| 140 |
+
139,8.EE.C.7b,"Solve linear equations with rational number coefficients, including equations whose solutions require expanding expressions using the distributive property and collecting like terms."
|
| 141 |
+
140,8.EE.C.8a,"Understand that solutions to a system of two linear equations in two variables correspond to points of intersection of their graphs, because points of intersection satisfy both equations simultaneously."
|
| 142 |
+
141,8.EE.C.8b,"Solve systems of two linear equations in two variables algebraically, and estimate solutions by graphing the equations. Solve simple cases by inspection. For example, 3x + 2y = 5 and 3x + 2y = 6 have no solution because 3x + 2y cannot simultaneously be 5 and 6."
|
| 143 |
+
142,8.EE.C.8c,"Solve real-world and mathematical problems leading to two linear equations in two variables. For example, given coordinates for two pairs of points, determine whether the line through the first pair of points intersects the line through the second pair."
|
| 144 |
+
143,8.F.A.1,Understand that a function is a rule that assigns to each input exactly one output. The graph of a function is the set of ordered pairs consisting of an input and the corresponding output.
|
| 145 |
+
144,8.F.A.2,"Compare properties of two functions each represented in a different way (algebraically, graphically, numerically in tables, or by verbal descriptions). For example, given a linear function represented by a table of values and a linear function represented by an algebraic expression, determine which function has the greater rate of change."
|
| 146 |
+
145,8.F.A.3,"Interpret the equation y = mx + b as defining a linear function, whose graph is a straight line; give examples of functions that are not linear. For example, the function A = s² giving the area of a square as a function of its side length is not linear because its graph contains the points (1,1), (2,4) and (3,9), which are not on a straight line."
|
| 147 |
+
146,8.F.B.4,"Construct a function to model a linear relationship between two quantities. Determine the rate of change and initial value of the function from a description of a relationship or from two (x, y) values, including reading these from a table or from a graph. Interpret the rate of change and initial value of a linear function in terms of the situation it models, and in terms of its graph or a table of values."
|
| 148 |
+
147,8.F.B.5,"Describe qualitatively the functional relationship between two quantities by analyzing a graph (e.g., where the function is increasing or decreasing, linear or nonlinear). Sketch a graph that exhibits the qualitative features of a function that has been described verbally."
|
| 149 |
+
148,8.G.A.1a,"Lines are taken to lines, and line segments to line segments of the same length."
|
| 150 |
+
149,8.G.A.1b,Angles are taken to angles of the same measure.
|
| 151 |
+
150,8.G.A.1c,Parallel lines are taken to parallel lines.
|
| 152 |
+
151,8.G.A.2,"Understand that a two-dimensional figure is congruent to another if the second can be obtained from the first by a sequence of rotations, reflections, and translations; given two congruent figures, describe a sequence that exhibits the congruence between them."
|
| 153 |
+
152,8.G.A.3,"Describe the effect of dilations, translations, rotations, and reflections on two-dimensional figures using coordinates."
|
| 154 |
+
153,8.G.A.4,"Understand that a two-dimensional figure is similar to another if the second can be obtained from the first by a sequence of rotations, reflections, translations, and dilations; given two similar two-dimensional figures, describe a sequence that exhibits the similarity between them."
|
| 155 |
+
154,8.G.A.5,"Use informal arguments to establish facts about the angle sum and exterior angle of triangles, about the angles created when parallel lines are cut by a transversal, and the angle-angle criterion for similarity of triangles. For example, arrange three copies of the same triangle so that the sum of the three angles appears to form a line, and give an argument in terms of transversals why this is so."
|
| 156 |
+
155,8.G.B.6,Explain a proof of the Pythagorean Theorem and its converse.
|
| 157 |
+
156,8.G.B.7,Apply the Pythagorean Theorem to determine unknown side lengths in right triangles in real-world and mathematical problems in two and three dimensions.
|
| 158 |
+
157,8.G.B.8,Apply the Pythagorean Theorem to find the distance between two points in a coordinate system.
|
| 159 |
+
158,8.G.C.9,"Know the formulas for the volumes of cones, cylinders, and spheres and use them to solve real-world and mathematical problems."
|
| 160 |
+
159,8.NS.A.1,"Know that numbers that are not rational are called irrational. Understand informally that every number has a decimal expansion; for rational numbers show that the decimal expansion repeats eventually, and convert a decimal expansion which repeats eventually into a rational number."
|
| 161 |
+
160,8.NS.A.2,"Use rational approximations of irrational numbers to compare the size of irrational numbers, locate them approximately on a number line diagram, and estimate the value of expressions (e.g., π²). For example, by truncating the decimal expansion of √2, show that √2 is between 1 and 2, then between 1.4 and 1.5, and explain how to continue on to get better approximations."
|
| 162 |
+
161,8.SP.A.1,"Construct and interpret scatter plots for bivariate measurement data to investigate patterns of association between two quantities. Describe patterns such as clustering, outliers, positive or negative association, linear association, and nonlinear association."
|
| 163 |
+
162,8.SP.A.2,"Know that straight lines are widely used to model relationships between two quantitative variables. For scatter plots that suggest a linear association, informally fit a straight line, and informally assess the model fit by judging the closeness of the data points to the line."
|
| 164 |
+
163,8.SP.A.3,"Use the equation of a linear model to solve problems in the context of bivariate measurement data, interpreting the slope and intercept. For example, in a linear model for a biology experiment, interpret a slope of 1.5 cm/hr as meaning that an additional hour of sunlight each day is associated with an additional 1.5 cm in mature plant height."
|
| 165 |
+
164,8.SP.A.4,"Understand that patterns of association can also be seen in bivariate categorical data by displaying frequencies and relative frequencies in a two-way table. Construct and interpret a two-way table summarizing data on two categorical variables collected from the same subjects. Use relative frequencies calculated for rows or columns to describe possible association between the two variables. For example, collect data from students in your class on whether or not they have a curfew on school nights and whether or not they have assigned chores at home. Is there evidence that those who have a curfew also tend to have chores?"
|
Data/Skills.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
README.md
ADDED
|
@@ -0,0 +1,246 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: cc-by-nc-4.0
|
| 3 |
+
|
| 4 |
+
extra_gated_prompt: "You agree to our [Responsible Use Guidelines](https://www.etrialstestbed.org/mathnet57963-guidelines)."
|
| 5 |
+
|
| 6 |
+
extra_gated_fields:
|
| 7 |
+
First and Last Name: text
|
| 8 |
+
Affiliation(university, company, etc): text
|
| 9 |
+
Country: country
|
| 10 |
+
Why are you asking to use this dataset?: text
|
| 11 |
+
How are you going to use this dataset?: text
|
| 12 |
+
How will you store and secure this data?: text
|
| 13 |
+
Do you have a university-affiliated email we could use to verify your request? If so, please enter it, if not, please explain why: text
|
| 14 |
+
|
| 15 |
+
I agree to use this dataset for non-commercial use ONLY: checkbox
|
| 16 |
+
While we think it's impossible for you to identify a student from these answers, you need to agree to not try to do so, and you also need to inform us if you find any PII in any of the images with the filenames: checkbox
|
| 17 |
+
Check box - I agree that this data will be stored on secured institutional systems, will not be shared with unauthorized parties and will deleted or returned to ASSISTments when my research is complete: checkbox
|
| 18 |
+
|
| 19 |
+
configs:
|
| 20 |
+
- config_name: Foundational ASSIST Dataset
|
| 21 |
+
data_files: Data/Problems.csv
|
| 22 |
+
- config_name: Interactions
|
| 23 |
+
data_files: Data/Interactions.csv
|
| 24 |
+
- config_name: Skills
|
| 25 |
+
data_files: Data/Skills.csv
|
| 26 |
+
|
| 27 |
+
---
|
| 28 |
+
|
| 29 |
+
**IMPORTANT UPDATE ON 3/19:** Due to an issue with git LFS 700k rows in interactions.csv were missing. Please redownload the dataset to use these rows.
|
| 30 |
+
|
| 31 |
+
# Overview of Foundational ASSIST
|
| 32 |
+
Foundational ASSIST is a dataset containing all natural text of problems and student answers as recorded by ASSISTments. The problems are from from Illustrative Mathematics 6th - 8th grade math curriculum, a common core aligned curriculum popular in the United States.
|
| 33 |
+
|
| 34 |
+
The data is in the "Data" folder. The code used to generate results in the original paper by Worden et al. is in the Code folder, and the results (in case you do not wish to reinference) are in the "Results" folder.
|
| 35 |
+
|
| 36 |
+
The dataset is comprised of three files:
|
| 37 |
+
1. **Interactions**, which contains students attempts of problems,
|
| 38 |
+
2. **Problems**, which includes information relevant to problems, and
|
| 39 |
+
3. **Skills**, which links Problems to skills.
|
| 40 |
+
The dataset was curated to include 5,000 unique students who have each completed between 211-421 problems in ASSISTments. The dataset includes 1.7 million instances of students solving problems, complete with the answer text, problem text, distractor text, and more.
|
| 41 |
+
|
| 42 |
+
# Interactions File
|
| 43 |
+
Interactions consists of 1,722,169 unique instances of students solving problems. The information provided includes a) problem_id, linking to Problems b) hint_count, the number of hints the student requested c) answer_text, the exact text of their first answer d) saw_answer, a boolean indicating whether the student requested to see the correct answer e) discrete_score, 0 the students score f) end_time, the time at which the student put in the correct answer to the problem g) user_xid, a unique identifier for each student.
|
| 44 |
+
|
| 45 |
+
Note that ASSISTments provides a discrete_score of 1 only if the student gets the problem correct on their first attempt, without requesting any support. If the student requests a hint, requests the answer, or has multiple tries they receive a 0. Accordingly, a student's answer can be the correct answer, but they can receive a 0 if the student requests a hint or sees the answer before entering the answer. This is shown in “table 7 cognitive accuracy when answers are incorrect” in our paper.
|
| 46 |
+
|
| 47 |
+
# Problems File
|
| 48 |
+
This file consists of information about 3,395 unique problems. The columns include
|
| 49 |
+
1. Problem Set Id, which links problems that follow one another
|
| 50 |
+
2. Problem Part, which indicates where in the Problem Set the problem occurs
|
| 51 |
+
3. Problem Type, describing the type of problem
|
| 52 |
+
4. Answer Type, describing the type of answer (see below table or more information)
|
| 53 |
+
5. Problem Body, the problem text with HTML or markup illustrating exactly what the student saw (code to convert to natural language is available on the github)
|
| 54 |
+
6. Fill-in Options
|
| 55 |
+
7. Fill-in Answers
|
| 56 |
+
8. Multiple Choice Options
|
| 57 |
+
9. Multiple Choice Answers
|
| 58 |
+
10. problem_id
|
| 59 |
+
|
| 60 |
+
Problem Set example: PSB6N4 consists of three problems. The first is problem_id 151389 (as it has ‘Problem Part’ = 1, the second is problem_id 151533, and the third/last is 151647.
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
<table>
|
| 64 |
+
<tr>
|
| 65 |
+
<td>Answer Type
|
| 66 |
+
</td>
|
| 67 |
+
<td>Description
|
| 68 |
+
</td>
|
| 69 |
+
<td>Fill-in Options
|
| 70 |
+
</td>
|
| 71 |
+
<td>Fill-in Answers
|
| 72 |
+
</td>
|
| 73 |
+
<td>Multiple Choice Options
|
| 74 |
+
</td>
|
| 75 |
+
<td>Multiple Choice Answers
|
| 76 |
+
</td>
|
| 77 |
+
</tr>
|
| 78 |
+
<tr>
|
| 79 |
+
<td>Numeric
|
| 80 |
+
</td>
|
| 81 |
+
<td>The student must type in the correct number
|
| 82 |
+
</td>
|
| 83 |
+
<td>The correct answer. If there are multiple correct answers they are separated by a “,”.
|
| 84 |
+
</td>
|
| 85 |
+
<td>The correct answer. If there are multiple correct answers they are separated by a “,”.
|
| 86 |
+
</td>
|
| 87 |
+
<td>n/a
|
| 88 |
+
</td>
|
| 89 |
+
<td>n/a
|
| 90 |
+
</td>
|
| 91 |
+
</tr>
|
| 92 |
+
<tr>
|
| 93 |
+
<td>Drop Down
|
| 94 |
+
</td>
|
| 95 |
+
<td>The student must select the correct option from a drop-down menu. These are similar to multiple choice
|
| 96 |
+
</td>
|
| 97 |
+
<td>All the drop down options, separated by “</p>,”
|
| 98 |
+
</td>
|
| 99 |
+
<td>The correct dropdown option.
|
| 100 |
+
</td>
|
| 101 |
+
<td>n/a
|
| 102 |
+
</td>
|
| 103 |
+
<td>n/a
|
| 104 |
+
</td>
|
| 105 |
+
</tr>
|
| 106 |
+
<tr>
|
| 107 |
+
<td>Algebraic Expression
|
| 108 |
+
</td>
|
| 109 |
+
<td>The student must type in the correct, short, algebraic expression, or a similar equivalent expression. E.g. if the answer is a^2+b^2 then b^2+a^2 would also be correct.
|
| 110 |
+
</td>
|
| 111 |
+
<td>The correct answer. If there are multiple correct answers they are separated by a “,”.
|
| 112 |
+
</td>
|
| 113 |
+
<td>The correct answer. If there are multiple correct answers they are separated by a “,”.
|
| 114 |
+
</td>
|
| 115 |
+
<td>n/a
|
| 116 |
+
</td>
|
| 117 |
+
<td>n/a
|
| 118 |
+
</td>
|
| 119 |
+
</tr>
|
| 120 |
+
<tr>
|
| 121 |
+
<td>Ordering
|
| 122 |
+
</td>
|
| 123 |
+
<td>The student must order some values in some order. Note the initial order of how these are presented to students is randomized.
|
| 124 |
+
</td>
|
| 125 |
+
<td>The correct order of objects, separated by “,”.
|
| 126 |
+
</td>
|
| 127 |
+
<td>The correct order of objects, separated by “,”.
|
| 128 |
+
</td>
|
| 129 |
+
<td>n/a
|
| 130 |
+
</td>
|
| 131 |
+
<td>n/a
|
| 132 |
+
</td>
|
| 133 |
+
</tr>
|
| 134 |
+
<tr>
|
| 135 |
+
<td>Exact Match
|
| 136 |
+
</td>
|
| 137 |
+
<td>The student must type in exactly the correct answer. This could be a number, expression, point, list, etc.
|
| 138 |
+
</td>
|
| 139 |
+
<td>The correct answer. If there are multiple correct answers they are separated by a “,”. Note that some answers, e.g. lists, require the whole text (1,2,3) and there is only a single answer.
|
| 140 |
+
</td>
|
| 141 |
+
<td>The correct answer. If there are multiple correct answers they are separated by a “,”. Note that some answers, e.g. lists, require the whole text (1,2,3) and there is only a single answer.
|
| 142 |
+
</td>
|
| 143 |
+
<td>n/a
|
| 144 |
+
</td>
|
| 145 |
+
<td>n/a
|
| 146 |
+
</td>
|
| 147 |
+
</tr>
|
| 148 |
+
<tr>
|
| 149 |
+
<td>Exact Fraction
|
| 150 |
+
</td>
|
| 151 |
+
<td>The student must type in exactly the correct fraction.
|
| 152 |
+
</td>
|
| 153 |
+
<td>The correct answer. If there are multiple correct answers they are separated by a “,”.
|
| 154 |
+
</td>
|
| 155 |
+
<td>The correct answer. If there are multiple correct answers they are separated by a “,”.
|
| 156 |
+
</td>
|
| 157 |
+
<td>n/a
|
| 158 |
+
</td>
|
| 159 |
+
<td>n/a
|
| 160 |
+
</td>
|
| 161 |
+
</tr>
|
| 162 |
+
<tr>
|
| 163 |
+
<td>Numeric Expression
|
| 164 |
+
</td>
|
| 165 |
+
<td>The student must type in a numeric expression. Note that simplification occurs, e.g. if the answer is 11^3, 1331 is also considered correct.
|
| 166 |
+
</td>
|
| 167 |
+
<td>The correct answer. If there are multiple correct answers they are separated by a “,”.
|
| 168 |
+
</td>
|
| 169 |
+
<td>The correct answer. If there are multiple correct answers they are separated by a “,”.
|
| 170 |
+
</td>
|
| 171 |
+
<td>n/a
|
| 172 |
+
</td>
|
| 173 |
+
<td>n/a
|
| 174 |
+
</td>
|
| 175 |
+
</tr>
|
| 176 |
+
<tr>
|
| 177 |
+
<td>Multiple Choice
|
| 178 |
+
</td>
|
| 179 |
+
<td>The student must select the correct option.
|
| 180 |
+
</td>
|
| 181 |
+
<td>n/a
|
| 182 |
+
</td>
|
| 183 |
+
<td>n/a
|
| 184 |
+
</td>
|
| 185 |
+
<td>A list of options, separated by ‘||’.
|
| 186 |
+
</td>
|
| 187 |
+
<td>The correct option.
|
| 188 |
+
</td>
|
| 189 |
+
</tr>
|
| 190 |
+
<tr>
|
| 191 |
+
<td>Check all that apply
|
| 192 |
+
</td>
|
| 193 |
+
<td>The student must select all correct option(s).
|
| 194 |
+
</td>
|
| 195 |
+
<td>n/a
|
| 196 |
+
</td>
|
| 197 |
+
<td>n/a
|
| 198 |
+
</td>
|
| 199 |
+
<td>A list of options, separated by ‘||’.
|
| 200 |
+
</td>
|
| 201 |
+
<td>The correct option(s), separated by ‘||’.
|
| 202 |
+
</td>
|
| 203 |
+
</tr>
|
| 204 |
+
</table>
|
| 205 |
+
|
| 206 |
+
# Skills File
|
| 207 |
+
The skills file consists of
|
| 208 |
+
1) problem_id, linking to problems in the Problems file
|
| 209 |
+
2) skill_id, a unique identifier per skill
|
| 210 |
+
3) node_code, which identifies the ASSISTments Skill tag Illustrative Math code for the skill and
|
| 211 |
+
4) node_name, a description of the skill. In total there are 224 unique skills.
|
| 212 |
+
|
| 213 |
+
# Data Source
|
| 214 |
+
All data across each file are from [ASSISTments](https://new.assistments.org/), where students complete in-class work as well as homework and receive support and feedback from the platform. This work was done in conjunction with [Dr. Heffernan’s lab at WPI](https://www.neilheffernan.net/home). To ensure student privacy, our team attempted to remove all Personal Identifiable Information (PII). However, it is possible students could type PII into fill-in problems, which we aimed to detect and remove, but short of a manual review of 1.7 million interaction logs becomes infeasible. Accordingly, we ask that if people using this dataset come across PII to please contact us at [etrials@assistments.org](etrials@assistments.org) so it can be removed.
|
| 215 |
+
|
| 216 |
+
# License and Sharing Agreement
|
| 217 |
+
This dataset is licensed under CC-BY-NC-4.0. We require that this dataset is used for research and educational purposes following this
|
| 218 |
+
[Responsible Use Guidelines](https://www.etrialstestbed.org/mathnet57963-guidelines).
|
| 219 |
+
|
| 220 |
+
# Citation
|
| 221 |
+
|
| 222 |
+
If you use the **FoundationalASSIST** dataset in your research, please cite the following paper:
|
| 223 |
+
|
| 224 |
+
> Worden, E., Heffernan, C., Heffernan, N., & Sonkar, S. (2026). FoundationalASSIST: An Educational Dataset for Foundational Knowledge Tracing and Pedagogical Grounding of LLMs. *arXiv preprint arXiv:2602.00070*.
|
| 225 |
+
|
| 226 |
+
### BibTeX
|
| 227 |
+
|
| 228 |
+
```bibtex
|
| 229 |
+
@article{worden2026foundationalassist,
|
| 230 |
+
title={FoundationalASSIST: An Educational Dataset for Foundational Knowledge Tracing and Pedagogical Grounding of LLMs},
|
| 231 |
+
author={Worden, Eamon and Heffernan, Cristina and Heffernan, Neil and Sonkar, Shashank},
|
| 232 |
+
journal={arXiv preprint arXiv:2602.00070},
|
| 233 |
+
year={2026}
|
| 234 |
+
}
|
| 235 |
+
```
|
| 236 |
+
|
| 237 |
+
### FAQs
|
| 238 |
+
Q: Where is the code that cleans the problem text?
|
| 239 |
+
A: This repository, in Code/clean_utils.py or cleantext.py, both are similar.
|
| 240 |
+
|
| 241 |
+
Q: How does discrete_score work?
|
| 242 |
+
A: Discrete score is 1 if the student solved the problem on their first try without requesting a hint (hint_count) or an explanation/seeing the answer (saw_answer).
|
| 243 |
+
Note this is recorded by ASSISTments. There are 433 rows where discrete_score = 1 despite saw_answer = True or hint_count > 0. In these instances, the student correctly solved the problem then requested hints/explanation/the answer anyways.
|
| 244 |
+
|
| 245 |
+
Q: Does the dataset track multiple attempts? (E.g. the student first incorrectly said 3, then incorrectly answered 6, then correctly answered 9)
|
| 246 |
+
A: No. We recognize it may be valuable to have this data. However, currently, answer_text (in Interactions.csv) is the first answer (right or wrong) the student submitted for the problem. Second/future attempts are not included in this dataset, but stay tuned.
|
Results/Problems_duplicated_problem_id.csv
ADDED
|
@@ -0,0 +1,209 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Problem Set Id,Problem Part,Problem Type,Answer Types,Problem Body,Fill-in Options,Fill-in Answers,Multiple Choice Options,Multiple Choice Answers,problem_id,duplicate_group_id,duplicate_problem_id_count,distinct_problem_body_count
|
| 2 |
+
PSBBP4J,2,Fill-in-the-blank(s),Numeric,"<p>Find the area of the triangle.</p>
|
| 3 |
+
<p> </p>
|
| 4 |
+
<p><ast-r type=""text"" marker=""1""></ast-r> square units.</p>
|
| 5 |
+
<p> </p>
|
| 6 |
+
<p><img src=""//resources.assistments.org/fetch/C/dd6e2450-6c1c-4383-af11-f5f38d8a554f.jpeg"" alt=""A triangle on a grid."" width=""400"" height=""148""></p>",11,11,,,242640,1,2,1
|
| 7 |
+
PSBBP4J,2,Fill-in-the-blank(s),Numeric,"<p>Find the area of the triangle.</p>
|
| 8 |
+
<p> </p>
|
| 9 |
+
<p><ast-r type=""text"" marker=""1""></ast-r> square units.</p>
|
| 10 |
+
<p> </p>
|
| 11 |
+
<p><img src=""//resources.assistments.org/fetch/C/dd6e2450-6c1c-4383-af11-f5f38d8a554f.jpeg"" alt=""A triangle on a grid."" width=""400"" height=""148""></p>",11,11,,,242640,1,2,1
|
| 12 |
+
PSBBPYN,1,Fill-in-the-blank(s),Numeric,"<p>Find the area of this trapezoid.</p>
|
| 13 |
+
<p> </p>
|
| 14 |
+
<p><img src=""//resources.assistments.org/fetch/C/de588b0b-4edc-4a7d-a1c9-ae9534107846.jpeg"" alt=""Trapezoid, bases 8 and 4 units. Height 3 units."" width=""287"" height=""158""></p>
|
| 15 |
+
<p> </p>
|
| 16 |
+
<p><ast-r type=""text"" marker=""1""></ast-r>square units</p>",18,18,,,242072,2,2,1
|
| 17 |
+
PSBBPYN,1,Fill-in-the-blank(s),Numeric,"<p>Find the area of this trapezoid.</p>
|
| 18 |
+
<p> </p>
|
| 19 |
+
<p><img src=""//resources.assistments.org/fetch/C/de588b0b-4edc-4a7d-a1c9-ae9534107846.jpeg"" alt=""Trapezoid, bases 8 and 4 units. Height 3 units."" width=""287"" height=""158""></p>
|
| 20 |
+
<p> </p>
|
| 21 |
+
<p><ast-r type=""text"" marker=""1""></ast-r>square units</p>",18,18,,,242072,2,2,1
|
| 22 |
+
PSBBUFM,1,Fill-in-the-blank(s),Numeric,"<p>Find the area of the shaded region in square units.</p>
|
| 23 |
+
<p> </p>
|
| 24 |
+
<p><img src=""//resources.assistments.org/fetch/C/ad6cb685-7371-4c50-b2d5-b0417869595c.jpeg"" width=""278"" height=""277""></p>
|
| 25 |
+
<p> </p>
|
| 26 |
+
<p><ast-r type=""text"" marker=""1""></ast-r> square units</p>",40,40,,,266614,3,2,1
|
| 27 |
+
PSBBUFM,1,Fill-in-the-blank(s),Numeric,"<p>Find the area of the shaded region in square units.</p>
|
| 28 |
+
<p> </p>
|
| 29 |
+
<p><img src=""//resources.assistments.org/fetch/C/ad6cb685-7371-4c50-b2d5-b0417869595c.jpeg"" width=""278"" height=""277""></p>
|
| 30 |
+
<p> </p>
|
| 31 |
+
<p><ast-r type=""text"" marker=""1""></ast-r> square units</p>",40,40,,,266614,3,2,1
|
| 32 |
+
PSBCTHE,3,Fill-in-the-blank(s),Numeric,"<p>There are 3 cats in a room and no other creatures. Each cat has 2 ears, 4 paws, and 1 tail.</p>
|
| 33 |
+
<p> </p>
|
| 34 |
+
<p><img src=""//resources.assistments.org/fetch/C/89240f58-bda5-4371-9d5b-35881563e526.jpeg"" width=""357"" height=""143""></p>
|
| 35 |
+
<p> </p>
|
| 36 |
+
<p> </p>
|
| 37 |
+
<p> </p>
|
| 38 |
+
<p>Complete each statement:</p>
|
| 39 |
+
<p> </p>
|
| 40 |
+
<p>There are <ast-r type=""text"" marker=""1""></ast-r> paws for every tail.<br><br></p>
|
| 41 |
+
<p>There are <ast-r type=""text"" marker=""2""></ast-r> paws for every ear.</p>","2, 4","2, 4",,,437233,4,4,1
|
| 42 |
+
PSBCTHE,3,Fill-in-the-blank(s),Numeric,"<p>There are 3 cats in a room and no other creatures. Each cat has 2 ears, 4 paws, and 1 tail.</p>
|
| 43 |
+
<p> </p>
|
| 44 |
+
<p><img src=""//resources.assistments.org/fetch/C/89240f58-bda5-4371-9d5b-35881563e526.jpeg"" width=""357"" height=""143""></p>
|
| 45 |
+
<p> </p>
|
| 46 |
+
<p> </p>
|
| 47 |
+
<p> </p>
|
| 48 |
+
<p>Complete each statement:</p>
|
| 49 |
+
<p> </p>
|
| 50 |
+
<p>There are <ast-r type=""text"" marker=""1""></ast-r> paws for every tail.<br><br></p>
|
| 51 |
+
<p>There are <ast-r type=""text"" marker=""2""></ast-r> paws for every ear.</p>","2, 4","2, 4",,,437233,4,4,1
|
| 52 |
+
PSBCTHE,3,Fill-in-the-blank(s),Numeric,"<p>There are 3 cats in a room and no other creatures. Each cat has 2 ears, 4 paws, and 1 tail.</p>
|
| 53 |
+
<p> </p>
|
| 54 |
+
<p><img src=""//resources.assistments.org/fetch/C/89240f58-bda5-4371-9d5b-35881563e526.jpeg"" width=""357"" height=""143""></p>
|
| 55 |
+
<p> </p>
|
| 56 |
+
<p> </p>
|
| 57 |
+
<p> </p>
|
| 58 |
+
<p>Complete each statement:</p>
|
| 59 |
+
<p> </p>
|
| 60 |
+
<p>There are <ast-r type=""text"" marker=""1""></ast-r> paws for every tail.<br><br></p>
|
| 61 |
+
<p>There are <ast-r type=""text"" marker=""2""></ast-r> paws for every ear.</p>","2, 4","2, 4",,,437233,4,4,1
|
| 62 |
+
PSBCTHE,3,Fill-in-the-blank(s),Numeric,"<p>There are 3 cats in a room and no other creatures. Each cat has 2 ears, 4 paws, and 1 tail.</p>
|
| 63 |
+
<p> </p>
|
| 64 |
+
<p><img src=""//resources.assistments.org/fetch/C/89240f58-bda5-4371-9d5b-35881563e526.jpeg"" width=""357"" height=""143""></p>
|
| 65 |
+
<p> </p>
|
| 66 |
+
<p> </p>
|
| 67 |
+
<p> </p>
|
| 68 |
+
<p>Complete each statement:</p>
|
| 69 |
+
<p> </p>
|
| 70 |
+
<p>There are <ast-r type=""text"" marker=""1""></ast-r> paws for every tail.<br><br></p>
|
| 71 |
+
<p>There are <ast-r type=""text"" marker=""2""></ast-r> paws for every ear.</p>","4, 2","4, 2",,,437233,4,4,1
|
| 72 |
+
PRABFEFN,1,Fill-in-the-blank(s),Numeric,"<p>A square has a side length of 5 feet. What is its area?</p>
|
| 73 |
+
<p> </p>
|
| 74 |
+
<p><ast-r type=""text"" marker=""1""></ast-r> square feet</p>",25,25,,,89104,5,4,1
|
| 75 |
+
PRABFEFN,1,Fill-in-the-blank(s),Numeric,"<p>A square has a side length of 5 feet. What is its area?</p>
|
| 76 |
+
<p> </p>
|
| 77 |
+
<p><ast-r type=""text"" marker=""1""></ast-r> square feet</p>",25,25,,,89104,5,4,1
|
| 78 |
+
PRABFEFN,1,Fill-in-the-blank(s),Numeric,"<p>A square has a side length of 5 feet. What is its area?</p>
|
| 79 |
+
<p> </p>
|
| 80 |
+
<p><ast-r type=""text"" marker=""1""></ast-r> square feet</p>",25,25,,,89104,5,4,1
|
| 81 |
+
PRABFEFN,1,Fill-in-the-blank(s),Numeric,"<p>A square has a side length of 5 feet. What is its area?</p>
|
| 82 |
+
<p> </p>
|
| 83 |
+
<p><ast-r type=""text"" marker=""1""></ast-r> square feet</p>",25,25,,,89104,5,4,1
|
| 84 |
+
PRABE64P,1,Fill-in-the-blank(s),Numeric,"<p>A square is 3 inches by 3 inches. What is its area?</p>
|
| 85 |
+
<p> </p>
|
| 86 |
+
<p><ast-r type=""text"" marker=""1""></ast-r> square inches</p>",9,9,,,88850,6,4,1
|
| 87 |
+
PRABE64P,1,Fill-in-the-blank(s),Numeric,"<p>A square is 3 inches by 3 inches. What is its area?</p>
|
| 88 |
+
<p> </p>
|
| 89 |
+
<p><ast-r type=""text"" marker=""1""></ast-r> square inches</p>",9,9,,,88850,6,4,1
|
| 90 |
+
PRABE64P,1,Fill-in-the-blank(s),Numeric,"<p>A square is 3 inches by 3 inches. What is its area?</p>
|
| 91 |
+
<p> </p>
|
| 92 |
+
<p><ast-r type=""text"" marker=""1""></ast-r> square inches</p>",9,9,,,88850,6,4,1
|
| 93 |
+
PRABE64P,1,Fill-in-the-blank(s),Numeric,"<p>A square is 3 inches by 3 inches. What is its area?</p>
|
| 94 |
+
<p> </p>
|
| 95 |
+
<p><ast-r type=""text"" marker=""1""></ast-r> square inches</p>",9,9,,,88850,6,4,1
|
| 96 |
+
PRABFEFP,1,Fill-in-the-blank(s),Numeric,"<p>The area of a square is 36 square centimeters. What is the length of each side of the square?</p>
|
| 97 |
+
<p> </p>
|
| 98 |
+
<p><ast-r type=""text"" marker=""1""></ast-r> centimeters</p>",6,6,,,89321,7,4,1
|
| 99 |
+
PRABFEFP,1,Fill-in-the-blank(s),Numeric,"<p>The area of a square is 36 square centimeters. What is the length of each side of the square?</p>
|
| 100 |
+
<p> </p>
|
| 101 |
+
<p><ast-r type=""text"" marker=""1""></ast-r> centimeters</p>",6,6,,,89321,7,4,1
|
| 102 |
+
PRABFEFP,1,Fill-in-the-blank(s),Numeric,"<p>The area of a square is 36 square centimeters. What is the length of each side of the square?</p>
|
| 103 |
+
<p> </p>
|
| 104 |
+
<p><ast-r type=""text"" marker=""1""></ast-r> centimeters</p>",6,6,,,89321,7,4,1
|
| 105 |
+
PRABFEFP,1,Fill-in-the-blank(s),Numeric,"<p>The area of a square is 36 square centimeters. What is the length of each side of the square?</p>
|
| 106 |
+
<p> </p>
|
| 107 |
+
<p><ast-r type=""text"" marker=""1""></ast-r> centimeters</p>",6,6,,,89321,7,4,1
|
| 108 |
+
PSBTTU,1,Fill-in-the-blank(s),Numeric,"<p>Find the area of this quadrilateral.</p>
|
| 109 |
+
<p> </p>
|
| 110 |
+
<p><img src=""//resources.assistments.org/fetch/C/e2e6b7c7-6b81-42ca-b6df-a45cd250d4dd.jpeg"" alt=""A blue quadrilateral in the shape of a kite. Two smaller sides span across 3 squares. Two longer sides span across 5 squares."" width=""303"" height=""238""></p>
|
| 111 |
+
<p> </p>
|
| 112 |
+
<p><ast-r type=""text"" marker=""1""></ast-r> square units</p>",24,24,,,88316,8,4,1
|
| 113 |
+
PSBTTU,1,Fill-in-the-blank(s),Numeric,"<p>Find the area of this quadrilateral.</p>
|
| 114 |
+
<p> </p>
|
| 115 |
+
<p><img src=""//resources.assistments.org/fetch/C/e2e6b7c7-6b81-42ca-b6df-a45cd250d4dd.jpeg"" alt=""A blue quadrilateral in the shape of a kite. Two smaller sides span across 3 squares. Two longer sides span across 5 squares."" width=""303"" height=""238""></p>
|
| 116 |
+
<p> </p>
|
| 117 |
+
<p><ast-r type=""text"" marker=""1""></ast-r> square units</p>",24,24,,,88316,8,4,1
|
| 118 |
+
PSBTTU,1,Fill-in-the-blank(s),Numeric,"<p>Find the area of this quadrilateral.</p>
|
| 119 |
+
<p> </p>
|
| 120 |
+
<p><img src=""//resources.assistments.org/fetch/C/e2e6b7c7-6b81-42ca-b6df-a45cd250d4dd.jpeg"" alt=""A blue quadrilateral in the shape of a kite. Two smaller sides span across 3 squares. Two longer sides span across 5 squares."" width=""303"" height=""238""></p>
|
| 121 |
+
<p> </p>
|
| 122 |
+
<p><ast-r type=""text"" marker=""1""></ast-r> square units</p>",24,24,,,88316,8,4,1
|
| 123 |
+
PSBTTU,1,Fill-in-the-blank(s),Numeric,"<p>Find the area of this quadrilateral.</p>
|
| 124 |
+
<p> </p>
|
| 125 |
+
<p><img src=""//resources.assistments.org/fetch/C/e2e6b7c7-6b81-42ca-b6df-a45cd250d4dd.jpeg"" alt=""A blue quadrilateral in the shape of a kite. Two smaller sides span across 3 squares. Two longer sides span across 5 squares."" width=""303"" height=""238""></p>
|
| 126 |
+
<p> </p>
|
| 127 |
+
<p><ast-r type=""text"" marker=""1""></ast-r> square units</p>",24,24,,,88316,8,4,1
|
| 128 |
+
PRABE64M,1,Multiple Choice (select all),Check All That Apply,"<p>Here is a diagram that describes the cups of green and white paint in a mixture.</p>
|
| 129 |
+
<p> </p>
|
| 130 |
+
<table style=""height: 46px; width: 439px;"">
|
| 131 |
+
<tbody>
|
| 132 |
+
<tr>
|
| 133 |
+
<td style=""width: 162.599px;"">green paint (cups)</td>
|
| 134 |
+
<td style=""width: 259.401px;""><img src=""/images/assistments/519919.jpg"" alt=""Four squares labeled "green paint (cups)""" width=""200"" height=""43""></td>
|
| 135 |
+
</tr>
|
| 136 |
+
<tr>
|
| 137 |
+
<td style=""width: 162.599px;"">white paint (cups)</td>
|
| 138 |
+
<td style=""width: 259.401px;""><img src=""/images/assistments/519920.jpg"" alt=""Two squares labeled "white paint (cups)""" width=""200"" height=""50""></td>
|
| 139 |
+
</tr>
|
| 140 |
+
</tbody>
|
| 141 |
+
</table>
|
| 142 |
+
<p> </p>
|
| 143 |
+
<p>Select <strong>all</strong> the statements that accurately describe this diagram.</p>",,,"The ratio of cups of white paint to cups of green paint is 2 to 4. || For every cup of green paint, there are two cups of white paint. || <p>The ratio of cups of green paint to cups of white paint is 4 : 2.</p> || For every cup of white paint, there are two cups of green paint. || <p>The ratio of cups of green paint to cups of white paint is 2 : 4.</p>","The ratio of cups of white paint to cups of green paint is 2 to 4. || <p>The ratio of cups of green paint to cups of white paint is 4 : 2.</p> || For every cup of white paint, there are two cups of green paint.",88681,9,4,1
|
| 144 |
+
PRABE64M,1,Multiple Choice (select all),Check All That Apply,"<p>Here is a diagram that describes the cups of green and white paint in a mixture.</p>
|
| 145 |
+
<p> </p>
|
| 146 |
+
<table style=""height: 46px; width: 439px;"">
|
| 147 |
+
<tbody>
|
| 148 |
+
<tr>
|
| 149 |
+
<td style=""width: 162.599px;"">green paint (cups)</td>
|
| 150 |
+
<td style=""width: 259.401px;""><img src=""/images/assistments/519919.jpg"" alt=""Four squares labeled "green paint (cups)""" width=""200"" height=""43""></td>
|
| 151 |
+
</tr>
|
| 152 |
+
<tr>
|
| 153 |
+
<td style=""width: 162.599px;"">white paint (cups)</td>
|
| 154 |
+
<td style=""width: 259.401px;""><img src=""/images/assistments/519920.jpg"" alt=""Two squares labeled "white paint (cups)""" width=""200"" height=""50""></td>
|
| 155 |
+
</tr>
|
| 156 |
+
</tbody>
|
| 157 |
+
</table>
|
| 158 |
+
<p> </p>
|
| 159 |
+
<p>Select <strong>all</strong> the statements that accurately describe this diagram.</p>",,,"The ratio of cups of white paint to cups of green paint is 2 to 4. || For every cup of green paint, there are two cups of white paint. || <p>The ratio of cups of green paint to cups of white paint is 4 : 2.</p> || For every cup of white paint, there are two cups of green paint. || <p>The ratio of cups of green paint to cups of white paint is 2 : 4.</p>","The ratio of cups of white paint to cups of green paint is 2 to 4. || <p>The ratio of cups of green paint to cups of white paint is 4 : 2.</p> || For every cup of white paint, there are two cups of green paint.",88681,9,4,1
|
| 160 |
+
PRABE64M,1,Multiple Choice (select all),Check All That Apply,"<p>Here is a diagram that describes the cups of green and white paint in a mixture.</p>
|
| 161 |
+
<p> </p>
|
| 162 |
+
<table style=""height: 46px; width: 439px;"">
|
| 163 |
+
<tbody>
|
| 164 |
+
<tr>
|
| 165 |
+
<td style=""width: 162.599px;"">green paint (cups)</td>
|
| 166 |
+
<td style=""width: 259.401px;""><img src=""/images/assistments/519919.jpg"" alt=""Four squares labeled "green paint (cups)""" width=""200"" height=""43""></td>
|
| 167 |
+
</tr>
|
| 168 |
+
<tr>
|
| 169 |
+
<td style=""width: 162.599px;"">white paint (cups)</td>
|
| 170 |
+
<td style=""width: 259.401px;""><img src=""/images/assistments/519920.jpg"" alt=""Two squares labeled "white paint (cups)""" width=""200"" height=""50""></td>
|
| 171 |
+
</tr>
|
| 172 |
+
</tbody>
|
| 173 |
+
</table>
|
| 174 |
+
<p> </p>
|
| 175 |
+
<p>Select <strong>all</strong> the statements that accurately describe this diagram.</p>",,,"The ratio of cups of white paint to cups of green paint is 2 to 4. || For every cup of green paint, there are two cups of white paint. || <p>The ratio of cups of green paint to cups of white paint is 4 : 2.</p> || For every cup of white paint, there are two cups of green paint. || <p>The ratio of cups of green paint to cups of white paint is 2 : 4.</p>","The ratio of cups of white paint to cups of green paint is 2 to 4. || <p>The ratio of cups of green paint to cups of white paint is 4 : 2.</p> || For every cup of white paint, there are two cups of green paint.",88681,9,4,1
|
| 176 |
+
PRABE64M,1,Multiple Choice (select all),Check All That Apply,"<p>Here is a diagram that describes the cups of green and white paint in a mixture.</p>
|
| 177 |
+
<p> </p>
|
| 178 |
+
<table style=""height: 46px; width: 439px;"">
|
| 179 |
+
<tbody>
|
| 180 |
+
<tr>
|
| 181 |
+
<td style=""width: 162.599px;"">green paint (cups)</td>
|
| 182 |
+
<td style=""width: 259.401px;""><img src=""/images/assistments/519919.jpg"" alt=""Four squares labeled "green paint (cups)""" width=""200"" height=""43""></td>
|
| 183 |
+
</tr>
|
| 184 |
+
<tr>
|
| 185 |
+
<td style=""width: 162.599px;"">white paint (cups)</td>
|
| 186 |
+
<td style=""width: 259.401px;""><img src=""/images/assistments/519920.jpg"" alt=""Two squares labeled "white paint (cups)""" width=""200"" height=""50""></td>
|
| 187 |
+
</tr>
|
| 188 |
+
</tbody>
|
| 189 |
+
</table>
|
| 190 |
+
<p> </p>
|
| 191 |
+
<p>Select <strong>all</strong> the statements that accurately describe this diagram.</p>",,,"The ratio of cups of white paint to cups of green paint is 2 to 4. || For every cup of green paint, there are two cups of white paint. || <p>The ratio of cups of green paint to cups of white paint is 4 : 2.</p> || For every cup of white paint, there are two cups of green paint. || <p>The ratio of cups of green paint to cups of white paint is 2 : 4.</p>","The ratio of cups of white paint to cups of green paint is 2 to 4. || <p>The ratio of cups of green paint to cups of white paint is 4 : 2.</p> || For every cup of white paint, there are two cups of green paint.",88681,9,4,1
|
| 192 |
+
PRABERFN,1,Multiple Choice (select 1),Multiple Choice,"<p>Which scale is equivalent to 1 cm to 1 km?</p>
|
| 193 |
+
<div style=""position: absolute; left: 57px; top: 51px;""> </div>",,,"1 to 1,000 || 10,000 to 1 || 1 to 100,000 || 100,000 to 1 || 1 to 1,000,000","1 to 100,000",146478,10,2,1
|
| 194 |
+
PRABERFN,1,Multiple Choice (select 1),Multiple Choice,"<p>Which scale is equivalent to 1 cm to 1 km?</p>
|
| 195 |
+
<div style=""position: absolute; left: 57px; top: 51px;""> </div>",,,"1 to 1,000 || 10,000 to 1 || 1 to 100,000 || 100,000 to 1 || 1 to 1,000,000","1 to 100,000",146478,10,2,1
|
| 196 |
+
PSB5BQ,1,Multiple Choice (select 1),Multiple Choice,"<p>In one version of trail mix, there are 3 cups of peanuts mixed with 2 cups of raisins. In another version of trail mix, there are 4.5 cups of peanuts mixed with 3 cups of raisins. Are the ratios equivalent for the two mixes?</p>",,,Yes || No,Yes,143888,11,2,1
|
| 197 |
+
PSB5BQ,1,Multiple Choice (select 1),Multiple Choice,"<p>In one version of trail mix, there are 3 cups of peanuts mixed with 2 cups of raisins. In another version of trail mix, there are 4.5 cups of peanuts mixed with 3 cups of raisins. Are the ratios equivalent for the two mixes?</p>",,,Yes || No,Yes,143888,11,2,1
|
| 198 |
+
PRABE55W,1,Multiple Choice (select all),Check All That Apply,<p>Select <strong>all</strong> the polygons.</p>,,,"<p><img src=""//resources.assistments.org/fetch/C/5001ccaa-f02f-4c1b-87b8-492ba85fe5be.jpeg"" alt=""Figure A"" width=""200""></p> || <p><img src=""//resources.assistments.org/fetch/C/b0f7cbc8-accd-4c14-b0f5-b23ae3461ed5.jpeg"" alt=""Figure B"" width=""200""></p> || <p><img src=""//resources.assistments.org/fetch/C/5995031e-500c-46c0-8dcb-26f5cbc80407.jpeg"" alt=""Figure C"" width=""200""></p> || <p><img src=""//resources.assistments.org/fetch/C/0444903e-1fce-4e76-ac8d-35cb8f9a6e9e.jpeg"" alt=""Figure D"" width=""200""></p> || <p><img src=""//resources.assistments.org/fetch/C/8ac8f6fa-3da0-4d32-8800-4f155bc611d7.jpeg"" alt=""Figure E"" width=""200""></p> || <p><img src=""//resources.assistments.org/fetch/C/236cc5d3-5eb9-4f13-aaca-df2a9021603a.jpeg"" alt=""Figure F"" width=""200""></p>","<p><img src=""//resources.assistments.org/fetch/C/5001ccaa-f02f-4c1b-87b8-492ba85fe5be.jpeg"" alt=""Figure A"" width=""200""></p> || <p><img src=""//resources.assistments.org/fetch/C/5995031e-500c-46c0-8dcb-26f5cbc80407.jpeg"" alt=""Figure C"" width=""200""></p>",243115,12,2,1
|
| 199 |
+
PRABE55W,1,Multiple Choice (select all),Check All That Apply,<p>Select <strong>all</strong> the polygons.</p>,,,"<p><img src=""//resources.assistments.org/fetch/C/5001ccaa-f02f-4c1b-87b8-492ba85fe5be.jpeg"" alt=""Figure A"" width=""200""></p> || <p><img src=""//resources.assistments.org/fetch/C/b0f7cbc8-accd-4c14-b0f5-b23ae3461ed5.jpeg"" alt=""Figure B"" width=""200""></p> || <p><img src=""//resources.assistments.org/fetch/C/5995031e-500c-46c0-8dcb-26f5cbc80407.jpeg"" alt=""Figure C"" width=""200""></p> || <p><img src=""//resources.assistments.org/fetch/C/0444903e-1fce-4e76-ac8d-35cb8f9a6e9e.jpeg"" alt=""Figure D"" width=""200""></p> || <p><img src=""//resources.assistments.org/fetch/C/8ac8f6fa-3da0-4d32-8800-4f155bc611d7.jpeg"" alt=""Figure E"" width=""200""></p> || <p><img src=""//resources.assistments.org/fetch/C/236cc5d3-5eb9-4f13-aaca-df2a9021603a.jpeg"" alt=""Figure F"" width=""200""></p>","<p><img src=""//resources.assistments.org/fetch/C/5001ccaa-f02f-4c1b-87b8-492ba85fe5be.jpeg"" alt=""Figure A"" width=""200""></p> || <p><img src=""//resources.assistments.org/fetch/C/5995031e-500c-46c0-8dcb-26f5cbc80407.jpeg"" alt=""Figure C"" width=""200""></p>",243115,12,2,1
|
| 200 |
+
PRABP75T,1,Multiple Choice (select all),Check All That Apply,"<p>Salt and sugar give two distinctly different tastes, one salty and the other sweet. In a mixture of salt and sugar, it is possible for the mixture to be salty, sweet or both. Will any of these mixtures taste exactly the same?</p>",,,"Mixture A: 2 cups water, 4 teaspoons salt, 0.25 cup sugar || Mixture B: 1.5 cups water, 3 teaspoons salt, 0.2 cup sugar || Mixture C: 1 cup water, 2 teaspoons salt, 0.125 cup sugar || None of these mixtures taste the same.","Mixture A: 2 cups water, 4 teaspoons salt, 0.25 cup sugar || Mixture C: 1 cup water, 2 teaspoons salt, 0.125 cup sugar",48435,13,2,1
|
| 201 |
+
PRABP75T,1,Multiple Choice (select all),Check All That Apply,"<p>Salt and sugar give two distinctly different tastes, one salty and the other sweet. In a mixture of salt and sugar, it is possible for the mixture to be salty, sweet or both. Will any of these mixtures taste exactly the same?</p>",,,"Mixture A: 2 cups water, 4 teaspoons salt, 0.25 cup sugar || Mixture B: 1.5 cups water, 3 teaspoons salt, 0.2 cup sugar || Mixture C: 1 cup water, 2 teaspoons salt, 0.125 cup sugar || None of these mixtures taste the same.","Mixture A: 2 cups water, 4 teaspoons salt, 0.25 cup sugar || Mixture C: 1 cup water, 2 teaspoons salt, 0.125 cup sugar",48435,13,2,1
|
| 202 |
+
PSBGXA,1,Multiple Choice (select 1),Multiple Choice,<p>The side lengths of Triangle B are all 5 more than the side lengths of Triangle A. Can Triangle B be a scaled copy of Triangle A?</p>,,,Yes || No,Yes,36042,14,2,1
|
| 203 |
+
PSBGXA,1,Multiple Choice (select 1),Multiple Choice,<p>The side lengths of Triangle B are all 5 more than the side lengths of Triangle A. Can Triangle B be a scaled copy of Triangle A?</p>,,,Yes || No,Yes,36042,14,2,1
|
| 204 |
+
PRABMQH8,1,Multiple Choice (select all),Check All That Apply,"<p>Triangle Z is a scale copy of Triangle M.<br><br></p>
|
| 205 |
+
<p><img src=""/images/assistments/274148.jpg"" alt=""Triangle M with side lengths of 4, 7, and 10."" width=""497"" height=""142""></p>
|
| 206 |
+
<p><br>Select <strong>all</strong> the sets of values that could be the side lengths of Triangle Z. </p>",,,"<p>8,11,14</p> || 10,17.5,25 || 6,9,11 || 6,10.5,15 || 8,14,20","10,17.5,25 || 6,10.5,15 || 8,14,20",569234,15,2,1
|
| 207 |
+
PRABMQH8,1,Multiple Choice (select all),Check All That Apply,"<p>Triangle Z is a scale copy of Triangle M.<br><br></p>
|
| 208 |
+
<p><img src=""/images/assistments/274148.jpg"" alt=""Triangle M with side lengths of 4, 7, and 10."" width=""497"" height=""142""></p>
|
| 209 |
+
<p><br>Select <strong>all</strong> the sets of values that could be the side lengths of Triangle Z. </p>",,,"<p>8,11,14</p> || 10,17.5,25 || 6,9,11 || 6,10.5,15 || 8,14,20","10,17.5,25 || 6,10.5,15 || 8,14,20",569234,15,2,1
|
Results/Problems_same_body_different_problem_id.csv
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Problem Set Id,Problem Part,Problem Type,Answer Types,Problem Body,Fill-in Options,Fill-in Answers,Multiple Choice Options,Multiple Choice Answers,problem_id,duplicate_group_id,distinct_problem_id_count,distinct_problem_ids
|
| 2 |
+
PSB2CK,6,Fill-in-the-blank(s),Numeric,"<p>What is its answer?</p>
|
| 3 |
+
<p> </p>
|
| 4 |
+
<p><ast-r type=""text"" marker=""1""></ast-r></p>",2.64,2.64,,,126628,1,3,126091;126362;126628
|
| 5 |
+
PSB2CK,2,Fill-in-the-blank(s),Numeric,"<p>What is its answer?</p>
|
| 6 |
+
<p> </p>
|
| 7 |
+
<p><ast-r type=""text"" marker=""1""></ast-r></p>",264,264,,,126091,1,3,126091;126362;126628
|
| 8 |
+
PSB2CK,4,Fill-in-the-blank(s),Numeric,"<p>What is its answer?</p>
|
| 9 |
+
<p> </p>
|
| 10 |
+
<p><ast-r type=""text"" marker=""1""></ast-r></p>",26.4,26.4,,,126362,1,3,126091;126362;126628
|
| 11 |
+
PSBBS8M,2,Fill-in-the-blank(s),Numeric,"<p>How many different triangles are there?</p>
|
| 12 |
+
<p> </p>
|
| 13 |
+
<p><ast-r type=""text"" marker=""1""></ast-r> different triangles</p>",3,3,,,259177,2,2,258731;259177
|
| 14 |
+
PSBBS6F,2,Fill-in-the-blank(s),Numeric,"<p>How many different triangles are there?</p>
|
| 15 |
+
<p> </p>
|
| 16 |
+
<p><ast-r type=""text"" marker=""1""></ast-r> different triangles</p>",4,4,,,258731,2,2,258731;259177
|
| 17 |
+
PRABFG57,1,Order / Sort,Ordering,<p>Order these numbers from least to greatest:</p>,"<p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mo>-</mo><mfrac><mn>1</mn><mn>2</mn></mfrac></math></p>, <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mfrac><mn>1</mn><mn>2</mn></mfrac></math></p>, <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mo>-</mo><mn>1</mn></math></p>, <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mo>-</mo><mn>1</mn><mfrac><mn>1</mn><mn>2</mn></mfrac></math></p>, <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mn>1</mn></math></p>, <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mn>0</mn></math></p>","<p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mo>-</mo><mfrac><mn>1</mn><mn>2</mn></mfrac></math></p>, <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mfrac><mn>1</mn><mn>2</mn></mfrac></math></p>, <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mo>-</mo><mn>1</mn></math></p>, <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mo>-</mo><mn>1</mn><mfrac><mn>1</mn><mn>2</mn></mfrac></math></p>, <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mn>1</mn></math></p>, <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mn>0</mn></math></p>",,,406337,3,2,406337;6158
|
| 18 |
+
PRABFKN2,1,Order / Sort,Ordering,<p>Order these numbers from least to greatest:</p>,"<p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mo>-</mo><mn>18</mn></math></p>, <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mfenced open=""|"" close=""|""><mrow><mo>-</mo><mn>17</mn></mrow></mfenced></math></p>, <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mfenced open=""|"" close=""|""><mrow><mo>-</mo><mn>18</mn></mrow></mfenced></math></p>, <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mfenced open=""|"" close=""|""><mn>19</mn></mfenced></math></p>, <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mn>20</mn></math></p>","<p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mo>-</mo><mn>18</mn></math></p>, <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mfenced open=""|"" close=""|""><mrow><mo>-</mo><mn>17</mn></mrow></mfenced></math></p>, <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mfenced open=""|"" close=""|""><mrow><mo>-</mo><mn>18</mn></mrow></mfenced></math></p>, <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mfenced open=""|"" close=""|""><mn>19</mn></mfenced></math></p>, <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mn>20</mn></math></p>",,,6158,3,2,406337;6158
|
| 19 |
+
PSBDASC,2,Fill-in-the-blank(s),Numeric,"<p>What is the decimal representation of that number?</p>
|
| 20 |
+
<p> </p>
|
| 21 |
+
<p><ast-r type=""text"" marker=""1""></ast-r></p>",0.5,0.5,,,522651,4,2,522651;523084
|
| 22 |
+
PSBDASC,6,Fill-in-the-blank(s),Numeric,"<p>What is the decimal representation of that number?</p>
|
| 23 |
+
<p> </p>
|
| 24 |
+
<p><ast-r type=""text"" marker=""1""></ast-r></p>",0.125,0.125,,,523084,4,2,522651;523084
|
| 25 |
+
PSBCDYX,3,Fill-in-the-blank(s),Numeric,"<p>Solve the equation you wrote.</p>
|
| 26 |
+
<p> </p>
|
| 27 |
+
<p><em>x</em> = <ast-r type=""text"" marker=""1""></ast-r></p>",7.8,7.8,,,366764,5,2,366764;56163
|
| 28 |
+
PSBMSN,3,Fill-in-the-blank(s),Numeric,"<p>Solve the equation you wrote.</p>
|
| 29 |
+
<p> </p>
|
| 30 |
+
<p><em>x</em> = <ast-r type=""text"" marker=""1""></ast-r></p>",11.6,11.6,,,56163,5,2,366764;56163
|
| 31 |
+
PSBBSYB,2,Fill-in-the-blank(s),Numeric,"<p>If not, write it in scientific notation.</p>
|
| 32 |
+
<p> </p>
|
| 33 |
+
<p><ast-r type=""text"" marker=""1""></ast-r><math xmlns=""http://www.w3.org/1998/Math/MathML""><mo>×</mo><mn>10</mn></math><sup><ast-r type=""text"" marker=""2""></ast-r></sup></p>","6, 3.6","6, 3.6",,,257604,6,3,257097;257352;257604
|
| 34 |
+
PSBBSWV,2,Fill-in-the-blank(s),Numeric,"<p>If not, write it in scientific notation.</p>
|
| 35 |
+
<p> </p>
|
| 36 |
+
<p><ast-r type=""text"" marker=""1""></ast-r><math xmlns=""http://www.w3.org/1998/Math/MathML""><mo>×</mo><mn>10</mn></math><sup><ast-r type=""text"" marker=""2""></ast-r></sup></p>","-4, 9.9","-4, 9.9",,,257352,6,3,257097;257352;257604
|
| 37 |
+
PSBBSVC,2,Fill-in-the-blank(s),Numeric,"<p>If not, write it in scientific notation.</p>
|
| 38 |
+
<p> </p>
|
| 39 |
+
<p><ast-r type=""text"" marker=""1""></ast-r><math xmlns=""http://www.w3.org/1998/Math/MathML""><mo>×</mo><mn>10</mn></math><sup><ast-r type=""text"" marker=""2""></ast-r></sup></p>","4, 4.82","4, 4.82",,,257097,6,3,257097;257352;257604
|
| 40 |
+
PSBGBG,4,Fill-in-the-blank(s),Numeric,"<p>On which day did it occur?</p>
|
| 41 |
+
<p> </p>
|
| 42 |
+
<p>Day <ast-r type=""text"" marker=""1""></ast-r></p>",11,11,,,32600,7,2,32363;32600
|
| 43 |
+
PSBGBG,2,Fill-in-the-blank(s),Numeric,"<p>On which day did it occur?</p>
|
| 44 |
+
<p> </p>
|
| 45 |
+
<p>Day <ast-r type=""text"" marker=""1""></ast-r></p>",14,14,,,32363,7,2,32363;32600
|
| 46 |
+
PRABQM8U,1,Multiple Choice (select all),Check All That Apply,<p>Select <strong>all</strong> the true statements.</p>,,,"Given a box plot, it is always possible to calculate the mean of the data. || Given a box plot, it is always possible to calculate the median of the data. || Given a box plot, it is always possible to construct a corresponding dot plot. || Given a dot plot, it is always possible to construct a corresponding box plot. || Given a histogram, it is always possible to construct a corresponding box plot.","Given a box plot, it is always possible to calculate the median of the data. || Given a dot plot, it is always possible to construct a corresponding box plot.",193572,8,3,193572;362112;69320
|
| 47 |
+
PRABEVAG,1,Multiple Choice (select all),Check All That Apply,<p>Select <strong>all</strong> the true statements.</p>,,,"<p>2.3 + (-2.3) is equal to zero.</p> || <p>(-3.7) + (-4.1) is positive.</p> || <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mo>-</mo><mn>2</mn><mo>.</mo><mn>6</mn><mo> </mo><mo>-</mo><mo> </mo><mfenced><mrow><mo>-</mo><mfrac><mn>12</mn><mn>4</mn></mfrac></mrow></mfenced><mo> </mo><mi>is</mi><mo> </mo><mi>positive</mi></math>.</p> || <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mfenced><mfrac><mn>5</mn><mn>2</mn></mfrac></mfenced><mo> </mo><mo>+</mo><mo> </mo><mfenced><mrow><mo>-</mo><mn>2</mn><mo>.</mo><mn>5</mn></mrow></mfenced><mo> </mo><mi>is</mi><mo> </mo><mi>negative</mi></math>.</p> || <p>72 - (-100) is negative.</p>","<p>2.3 + (-2.3) is equal to zero.</p> || <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mo>-</mo><mn>2</mn><mo>.</mo><mn>6</mn><mo> </mo><mo>-</mo><mo> </mo><mfenced><mrow><mo>-</mo><mfrac><mn>12</mn><mn>4</mn></mfrac></mrow></mfenced><mo> </mo><mi>is</mi><mo> </mo><mi>positive</mi></math>.</p>",69320,8,3,193572;362112;69320
|
| 48 |
+
PRABFJP8,1,Multiple Choice (select all),Check All That Apply,<p>Select <strong>all</strong> the true statements.</p>,,,"<p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mo>-</mo><mn>5</mn><mo> </mo><mo><</mo><mo> </mo><mfenced open=""|"" close=""|""><mrow><mo>-</mo><mn>5</mn></mrow></mfenced></math></p> || <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mfenced open=""|"" close=""|""><mrow><mo>-</mo><mn>6</mn></mrow></mfenced><mo> </mo><mo><</mo><mo> </mo><mo>-</mo><mn>5</mn></math></p> || <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mfenced open=""|"" close=""|""><mrow><mo>-</mo><mn>6</mn></mrow></mfenced><mo> </mo><mo><</mo><mo> </mo><mn>3</mn></math></p> || <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mn>4</mn><mo> </mo><mo><</mo><mo> </mo><mfenced open=""|"" close=""|""><mrow><mo>-</mo><mn>7</mn></mrow></mfenced></math></p> || <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mfenced open=""|"" close=""|""><mrow><mo>-</mo><mn>7</mn></mrow></mfenced><mo> </mo><mo><</mo><mo> </mo><mfenced open=""|"" close=""|""><mrow><mo>-</mo><mn>8</mn></mrow></mfenced></math></p>","<p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mo>-</mo><mn>5</mn><mo> </mo><mo><</mo><mo> </mo><mfenced open=""|"" close=""|""><mrow><mo>-</mo><mn>5</mn></mrow></mfenced></math></p> || <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mn>4</mn><mo> </mo><mo><</mo><mo> </mo><mfenced open=""|"" close=""|""><mrow><mo>-</mo><mn>7</mn></mrow></mfenced></math></p> || <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mfenced open=""|"" close=""|""><mrow><mo>-</mo><mn>7</mn></mrow></mfenced><mo> </mo><mo><</mo><mo> </mo><mfenced open=""|"" close=""|""><mrow><mo>-</mo><mn>8</mn></mrow></mfenced></math></p>",362112,8,3,193572;362112;69320
|
| 49 |
+
PSBCKT6,1,Multiple Choice (select 1),Multiple Choice,"<p>For the pair of numbers below, select the number that is greater.</p>",,,"<p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mn>42</mn><mo> </mo><mo>·</mo><mo> </mo><msup><mn>10</mn><mn>7</mn></msup></math></p> || <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mn>8</mn><mo>.</mo><mn>5</mn><mo> </mo><mo>·</mo><mo> </mo><msup><mn>10</mn><mn>8</mn></msup></math></p>","<p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mn>8</mn><mo>.</mo><mn>5</mn><mo> </mo><mo>·</mo><mo> </mo><msup><mn>10</mn><mn>8</mn></msup></math></p>",399091,9,3,398365;398730;399091
|
| 50 |
+
PSBCKRX,1,Multiple Choice (select 1),Multiple Choice,"<p>For the pair of numbers below, select the number that is greater.</p>",,,"<p><span><span><span><span><span><math xmlns=""http://www.w3.org/1998/Math/MathML""><mn>2</mn><mo> </mo><mo>·</mo><mo> </mo><msup><mn>10</mn><mn>6</mn></msup></math></span></span></span></span></span> </p> || <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mn>7</mn><mo>.</mo><mn>839</mn><mo> </mo><mo>·</mo><mo> </mo><msup><mn>10</mn><mn>6</mn></msup></math><span><span><span><span></span></span></span></span> </p>","<p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mn>7</mn><mo>.</mo><mn>839</mn><mo> </mo><mo>·</mo><mo> </mo><msup><mn>10</mn><mn>6</mn></msup></math><span><span><span><span></span></span></span></span> </p>",398730,9,3,398365;398730;399091
|
| 51 |
+
PSBCKP4,1,Multiple Choice (select 1),Multiple Choice,"<p>For the pair of numbers below, select the number that is greater.</p>",,,"<p><span><span><span><span><span><math xmlns=""http://www.w3.org/1998/Math/MathML""><mn>17</mn><mo>·</mo><msup><mn>10</mn><mn>8</mn></msup></math></span></span></span></span></span> </p> || <p><math xmlns=""http://www.w3.org/1998/Math/MathML""><mn>4</mn><mo>·</mo><msup><mn>10</mn><mn>8</mn></msup></math></p>","<p><span><span><span><span><span><math xmlns=""http://www.w3.org/1998/Math/MathML""><mn>17</mn><mo>·</mo><msup><mn>10</mn><mn>8</mn></msup></math></span></span></span></span></span> </p>",398365,9,3,398365;398730;399091
|
| 52 |
+
PRABQPRF,1,Multiple Choice (select 1),Multiple Choice,<p>Which of these describes a unique polygon?</p>,,,"A quadrilateral with 4 right angles || A triangle with angles 30°, 80°, and 70° || A triangle with side lengths 7 cm and 8 cm and a 70° angle || A triangle with each side length 5 inches",A triangle with each side length 5 inches,558308,10,2,483667;558308
|
| 53 |
+
PRABEVDM,1,Multiple Choice (select 1),Multiple Choice,<p>Which of these describes a unique polygon?</p>,,,"A triangle with angles 30°, 50°, and 100° || A quadrilateral with each side length 5 cm || A triangle with side lengths 6 cm, 7 cm, and 8 cm || A triangle with side lengths 4 cm and 5 cm and a 50° angle","A triangle with side lengths 6 cm, 7 cm, and 8 cm",483667,10,2,483667;558308
|
| 54 |
+
PSBBD9R,2,Multiple Choice (select 1),Multiple Choice,<p>Which group shows greater variability?</p>,,,<p>Group A</p> || group B,<p>Group A</p>,193337,11,2,193337;529063
|
| 55 |
+
PSBDBQU,2,Multiple Choice (select 1),Multiple Choice,<p>Which group shows greater variability?</p>,,,Group A || Group B,Group A,529063,11,2,193337;529063
|
Results/day_student_attempt_distribution.png
ADDED
|
Git LFS Details
|
Results/day_student_attempt_distribution_00173df52c65a9c13b45978c5a56ea2db67b17deded674486f7af64f7c5fd9ac.png
ADDED
|
Git LFS Details
|
Results/day_student_attempt_distribution_001849d40d6bf1e2ed5826e4420e291e54655a2b6b77a9ab701faaeb4e899abe.png
ADDED
|
Git LFS Details
|
Results/day_student_attempt_distribution_counts.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Results/day_student_attempt_distribution_counts_00173df52c65a9c13b45978c5a56ea2db67b17deded674486f7af64f7c5fd9ac.csv
ADDED
|
@@ -0,0 +1,262 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
bin_left_min,bin_right_min,bin_width_min,attempt_count,probability,percentage
|
| 2 |
+
0.0,1440.0,1440.0,1,0.002617801047120419,0.2617801047120419
|
| 3 |
+
1440.0,2880.0,1440.0,2,0.005235602094240838,0.5235602094240838
|
| 4 |
+
2880.0,4320.0,1440.0,1,0.002617801047120419,0.2617801047120419
|
| 5 |
+
4320.0,5760.0,1440.0,0,0.0,0.0
|
| 6 |
+
5760.0,7200.0,1440.0,0,0.0,0.0
|
| 7 |
+
7200.0,8640.0,1440.0,0,0.0,0.0
|
| 8 |
+
8640.0,10080.0,1440.0,0,0.0,0.0
|
| 9 |
+
10080.0,11520.0,1440.0,0,0.0,0.0
|
| 10 |
+
11520.0,12960.0,1440.0,3,0.007853403141361256,0.7853403141361256
|
| 11 |
+
12960.0,14400.0,1440.0,0,0.0,0.0
|
| 12 |
+
14400.0,15840.0,1440.0,0,0.0,0.0
|
| 13 |
+
15840.0,17280.0,1440.0,6,0.015706806282722512,1.5706806282722512
|
| 14 |
+
17280.0,18720.0,1440.0,0,0.0,0.0
|
| 15 |
+
18720.0,20160.0,1440.0,0,0.0,0.0
|
| 16 |
+
20160.0,21600.0,1440.0,2,0.005235602094240838,0.5235602094240838
|
| 17 |
+
21600.0,23040.0,1440.0,3,0.007853403141361256,0.7853403141361256
|
| 18 |
+
23040.0,24480.0,1440.0,0,0.0,0.0
|
| 19 |
+
24480.0,25920.0,1440.0,0,0.0,0.0
|
| 20 |
+
25920.0,27360.0,1440.0,8,0.020942408376963352,2.094240837696335
|
| 21 |
+
27360.0,28800.0,1440.0,0,0.0,0.0
|
| 22 |
+
28800.0,30240.0,1440.0,0,0.0,0.0
|
| 23 |
+
30240.0,31680.0,1440.0,0,0.0,0.0
|
| 24 |
+
31680.0,33120.0,1440.0,1,0.002617801047120419,0.2617801047120419
|
| 25 |
+
33120.0,34560.0,1440.0,1,0.002617801047120419,0.2617801047120419
|
| 26 |
+
34560.0,36000.0,1440.0,4,0.010471204188481676,1.0471204188481675
|
| 27 |
+
36000.0,37440.0,1440.0,1,0.002617801047120419,0.2617801047120419
|
| 28 |
+
37440.0,38880.0,1440.0,0,0.0,0.0
|
| 29 |
+
38880.0,40320.0,1440.0,0,0.0,0.0
|
| 30 |
+
40320.0,41760.0,1440.0,0,0.0,0.0
|
| 31 |
+
41760.0,43200.0,1440.0,0,0.0,0.0
|
| 32 |
+
43200.0,44640.0,1440.0,1,0.002617801047120419,0.2617801047120419
|
| 33 |
+
44640.0,46080.0,1440.0,0,0.0,0.0
|
| 34 |
+
46080.0,47520.0,1440.0,0,0.0,0.0
|
| 35 |
+
47520.0,48960.0,1440.0,0,0.0,0.0
|
| 36 |
+
48960.0,50400.0,1440.0,0,0.0,0.0
|
| 37 |
+
50400.0,51840.0,1440.0,9,0.02356020942408377,2.356020942408377
|
| 38 |
+
51840.0,53280.0,1440.0,0,0.0,0.0
|
| 39 |
+
53280.0,54720.0,1440.0,0,0.0,0.0
|
| 40 |
+
54720.0,56160.0,1440.0,0,0.0,0.0
|
| 41 |
+
56160.0,57600.0,1440.0,0,0.0,0.0
|
| 42 |
+
57600.0,59040.0,1440.0,0,0.0,0.0
|
| 43 |
+
59040.0,60480.0,1440.0,0,0.0,0.0
|
| 44 |
+
60480.0,61920.0,1440.0,6,0.015706806282722512,1.5706806282722512
|
| 45 |
+
61920.0,63360.0,1440.0,3,0.007853403141361256,0.7853403141361256
|
| 46 |
+
63360.0,64800.0,1440.0,8,0.020942408376963352,2.094240837696335
|
| 47 |
+
64800.0,66240.0,1440.0,8,0.020942408376963352,2.094240837696335
|
| 48 |
+
66240.0,67680.0,1440.0,0,0.0,0.0
|
| 49 |
+
67680.0,69120.0,1440.0,0,0.0,0.0
|
| 50 |
+
69120.0,70560.0,1440.0,2,0.005235602094240838,0.5235602094240838
|
| 51 |
+
70560.0,72000.0,1440.0,12,0.031413612565445025,3.1413612565445024
|
| 52 |
+
72000.0,73440.0,1440.0,9,0.02356020942408377,2.356020942408377
|
| 53 |
+
73440.0,74880.0,1440.0,3,0.007853403141361256,0.7853403141361256
|
| 54 |
+
74880.0,76320.0,1440.0,6,0.015706806282722512,1.5706806282722512
|
| 55 |
+
76320.0,77760.0,1440.0,0,0.0,0.0
|
| 56 |
+
77760.0,79200.0,1440.0,0,0.0,0.0
|
| 57 |
+
79200.0,80640.0,1440.0,0,0.0,0.0
|
| 58 |
+
80640.0,82080.0,1440.0,0,0.0,0.0
|
| 59 |
+
82080.0,83520.0,1440.0,0,0.0,0.0
|
| 60 |
+
83520.0,84960.0,1440.0,0,0.0,0.0
|
| 61 |
+
84960.0,86400.0,1440.0,0,0.0,0.0
|
| 62 |
+
86400.0,87840.0,1440.0,0,0.0,0.0
|
| 63 |
+
87840.0,89280.0,1440.0,0,0.0,0.0
|
| 64 |
+
89280.0,90720.0,1440.0,0,0.0,0.0
|
| 65 |
+
90720.0,92160.0,1440.0,0,0.0,0.0
|
| 66 |
+
92160.0,93600.0,1440.0,1,0.002617801047120419,0.2617801047120419
|
| 67 |
+
93600.0,95040.0,1440.0,1,0.002617801047120419,0.2617801047120419
|
| 68 |
+
95040.0,96480.0,1440.0,0,0.0,0.0
|
| 69 |
+
96480.0,97920.0,1440.0,1,0.002617801047120419,0.2617801047120419
|
| 70 |
+
97920.0,99360.0,1440.0,0,0.0,0.0
|
| 71 |
+
99360.0,100800.0,1440.0,0,0.0,0.0
|
| 72 |
+
100800.0,102240.0,1440.0,0,0.0,0.0
|
| 73 |
+
102240.0,103680.0,1440.0,3,0.007853403141361256,0.7853403141361256
|
| 74 |
+
103680.0,105120.0,1440.0,8,0.020942408376963352,2.094240837696335
|
| 75 |
+
105120.0,106560.0,1440.0,0,0.0,0.0
|
| 76 |
+
106560.0,108000.0,1440.0,0,0.0,0.0
|
| 77 |
+
108000.0,109440.0,1440.0,0,0.0,0.0
|
| 78 |
+
109440.0,110880.0,1440.0,3,0.007853403141361256,0.7853403141361256
|
| 79 |
+
110880.0,112320.0,1440.0,0,0.0,0.0
|
| 80 |
+
112320.0,113760.0,1440.0,0,0.0,0.0
|
| 81 |
+
113760.0,115200.0,1440.0,0,0.0,0.0
|
| 82 |
+
115200.0,116640.0,1440.0,7,0.01832460732984293,1.832460732984293
|
| 83 |
+
116640.0,118080.0,1440.0,1,0.002617801047120419,0.2617801047120419
|
| 84 |
+
118080.0,119520.0,1440.0,0,0.0,0.0
|
| 85 |
+
119520.0,120960.0,1440.0,0,0.0,0.0
|
| 86 |
+
120960.0,122400.0,1440.0,0,0.0,0.0
|
| 87 |
+
122400.0,123840.0,1440.0,0,0.0,0.0
|
| 88 |
+
123840.0,125280.0,1440.0,8,0.020942408376963352,2.094240837696335
|
| 89 |
+
125280.0,126720.0,1440.0,3,0.007853403141361256,0.7853403141361256
|
| 90 |
+
126720.0,128160.0,1440.0,0,0.0,0.0
|
| 91 |
+
128160.0,129600.0,1440.0,0,0.0,0.0
|
| 92 |
+
129600.0,131040.0,1440.0,10,0.02617801047120419,2.6178010471204187
|
| 93 |
+
131040.0,132480.0,1440.0,3,0.007853403141361256,0.7853403141361256
|
| 94 |
+
132480.0,133920.0,1440.0,14,0.03664921465968586,3.664921465968586
|
| 95 |
+
133920.0,135360.0,1440.0,0,0.0,0.0
|
| 96 |
+
135360.0,136800.0,1440.0,1,0.002617801047120419,0.2617801047120419
|
| 97 |
+
136800.0,138240.0,1440.0,1,0.002617801047120419,0.2617801047120419
|
| 98 |
+
138240.0,139680.0,1440.0,0,0.0,0.0
|
| 99 |
+
139680.0,141120.0,1440.0,1,0.002617801047120419,0.2617801047120419
|
| 100 |
+
141120.0,142560.0,1440.0,0,0.0,0.0
|
| 101 |
+
142560.0,144000.0,1440.0,0,0.0,0.0
|
| 102 |
+
144000.0,145440.0,1440.0,1,0.002617801047120419,0.2617801047120419
|
| 103 |
+
145440.0,146880.0,1440.0,0,0.0,0.0
|
| 104 |
+
146880.0,148320.0,1440.0,0,0.0,0.0
|
| 105 |
+
148320.0,149760.0,1440.0,0,0.0,0.0
|
| 106 |
+
149760.0,151200.0,1440.0,0,0.0,0.0
|
| 107 |
+
151200.0,152640.0,1440.0,0,0.0,0.0
|
| 108 |
+
152640.0,154080.0,1440.0,0,0.0,0.0
|
| 109 |
+
154080.0,155520.0,1440.0,0,0.0,0.0
|
| 110 |
+
155520.0,156960.0,1440.0,0,0.0,0.0
|
| 111 |
+
156960.0,158400.0,1440.0,0,0.0,0.0
|
| 112 |
+
158400.0,159840.0,1440.0,0,0.0,0.0
|
| 113 |
+
159840.0,161280.0,1440.0,0,0.0,0.0
|
| 114 |
+
161280.0,162720.0,1440.0,0,0.0,0.0
|
| 115 |
+
162720.0,164160.0,1440.0,7,0.01832460732984293,1.832460732984293
|
| 116 |
+
164160.0,165600.0,1440.0,3,0.007853403141361256,0.7853403141361256
|
| 117 |
+
165600.0,167040.0,1440.0,7,0.01832460732984293,1.832460732984293
|
| 118 |
+
167040.0,168480.0,1440.0,0,0.0,0.0
|
| 119 |
+
168480.0,169920.0,1440.0,0,0.0,0.0
|
| 120 |
+
169920.0,171360.0,1440.0,1,0.002617801047120419,0.2617801047120419
|
| 121 |
+
171360.0,172800.0,1440.0,5,0.013089005235602094,1.3089005235602094
|
| 122 |
+
172800.0,174240.0,1440.0,7,0.01832460732984293,1.832460732984293
|
| 123 |
+
174240.0,175680.0,1440.0,3,0.007853403141361256,0.7853403141361256
|
| 124 |
+
175680.0,177120.0,1440.0,0,0.0,0.0
|
| 125 |
+
177120.0,178560.0,1440.0,0,0.0,0.0
|
| 126 |
+
178560.0,180000.0,1440.0,0,0.0,0.0
|
| 127 |
+
180000.0,181440.0,1440.0,2,0.005235602094240838,0.5235602094240838
|
| 128 |
+
181440.0,182880.0,1440.0,9,0.02356020942408377,2.356020942408377
|
| 129 |
+
182880.0,184320.0,1440.0,2,0.005235602094240838,0.5235602094240838
|
| 130 |
+
184320.0,185760.0,1440.0,0,0.0,0.0
|
| 131 |
+
185760.0,187200.0,1440.0,0,0.0,0.0
|
| 132 |
+
187200.0,188640.0,1440.0,0,0.0,0.0
|
| 133 |
+
188640.0,190080.0,1440.0,0,0.0,0.0
|
| 134 |
+
190080.0,191520.0,1440.0,0,0.0,0.0
|
| 135 |
+
191520.0,192960.0,1440.0,0,0.0,0.0
|
| 136 |
+
192960.0,194400.0,1440.0,0,0.0,0.0
|
| 137 |
+
194400.0,195840.0,1440.0,0,0.0,0.0
|
| 138 |
+
195840.0,197280.0,1440.0,0,0.0,0.0
|
| 139 |
+
197280.0,198720.0,1440.0,0,0.0,0.0
|
| 140 |
+
198720.0,200160.0,1440.0,0,0.0,0.0
|
| 141 |
+
200160.0,201600.0,1440.0,0,0.0,0.0
|
| 142 |
+
201600.0,203040.0,1440.0,0,0.0,0.0
|
| 143 |
+
203040.0,204480.0,1440.0,0,0.0,0.0
|
| 144 |
+
204480.0,205920.0,1440.0,0,0.0,0.0
|
| 145 |
+
205920.0,207360.0,1440.0,0,0.0,0.0
|
| 146 |
+
207360.0,208800.0,1440.0,0,0.0,0.0
|
| 147 |
+
208800.0,210240.0,1440.0,0,0.0,0.0
|
| 148 |
+
210240.0,211680.0,1440.0,0,0.0,0.0
|
| 149 |
+
211680.0,213120.0,1440.0,0,0.0,0.0
|
| 150 |
+
213120.0,214560.0,1440.0,2,0.005235602094240838,0.5235602094240838
|
| 151 |
+
214560.0,216000.0,1440.0,1,0.002617801047120419,0.2617801047120419
|
| 152 |
+
216000.0,217440.0,1440.0,8,0.020942408376963352,2.094240837696335
|
| 153 |
+
217440.0,218880.0,1440.0,6,0.015706806282722512,1.5706806282722512
|
| 154 |
+
218880.0,220320.0,1440.0,0,0.0,0.0
|
| 155 |
+
220320.0,221760.0,1440.0,2,0.005235602094240838,0.5235602094240838
|
| 156 |
+
221760.0,223200.0,1440.0,3,0.007853403141361256,0.7853403141361256
|
| 157 |
+
223200.0,224640.0,1440.0,3,0.007853403141361256,0.7853403141361256
|
| 158 |
+
224640.0,226080.0,1440.0,0,0.0,0.0
|
| 159 |
+
226080.0,227520.0,1440.0,1,0.002617801047120419,0.2617801047120419
|
| 160 |
+
227520.0,228960.0,1440.0,0,0.0,0.0
|
| 161 |
+
228960.0,230400.0,1440.0,0,0.0,0.0
|
| 162 |
+
230400.0,231840.0,1440.0,0,0.0,0.0
|
| 163 |
+
231840.0,233280.0,1440.0,0,0.0,0.0
|
| 164 |
+
233280.0,234720.0,1440.0,6,0.015706806282722512,1.5706806282722512
|
| 165 |
+
234720.0,236160.0,1440.0,8,0.020942408376963352,2.094240837696335
|
| 166 |
+
236160.0,237600.0,1440.0,1,0.002617801047120419,0.2617801047120419
|
| 167 |
+
237600.0,239040.0,1440.0,4,0.010471204188481676,1.0471204188481675
|
| 168 |
+
239040.0,240480.0,1440.0,0,0.0,0.0
|
| 169 |
+
240480.0,241920.0,1440.0,0,0.0,0.0
|
| 170 |
+
241920.0,243360.0,1440.0,2,0.005235602094240838,0.5235602094240838
|
| 171 |
+
243360.0,244800.0,1440.0,3,0.007853403141361256,0.7853403141361256
|
| 172 |
+
244800.0,246240.0,1440.0,4,0.010471204188481676,1.0471204188481675
|
| 173 |
+
246240.0,247680.0,1440.0,2,0.005235602094240838,0.5235602094240838
|
| 174 |
+
247680.0,249120.0,1440.0,1,0.002617801047120419,0.2617801047120419
|
| 175 |
+
249120.0,250560.0,1440.0,0,0.0,0.0
|
| 176 |
+
250560.0,252000.0,1440.0,1,0.002617801047120419,0.2617801047120419
|
| 177 |
+
252000.0,253440.0,1440.0,0,0.0,0.0
|
| 178 |
+
253440.0,254880.0,1440.0,0,0.0,0.0
|
| 179 |
+
254880.0,256320.0,1440.0,0,0.0,0.0
|
| 180 |
+
256320.0,257760.0,1440.0,4,0.010471204188481676,1.0471204188481675
|
| 181 |
+
257760.0,259200.0,1440.0,4,0.010471204188481676,1.0471204188481675
|
| 182 |
+
259200.0,260640.0,1440.0,0,0.0,0.0
|
| 183 |
+
260640.0,262080.0,1440.0,0,0.0,0.0
|
| 184 |
+
262080.0,263520.0,1440.0,5,0.013089005235602094,1.3089005235602094
|
| 185 |
+
263520.0,264960.0,1440.0,2,0.005235602094240838,0.5235602094240838
|
| 186 |
+
264960.0,266400.0,1440.0,2,0.005235602094240838,0.5235602094240838
|
| 187 |
+
266400.0,267840.0,1440.0,0,0.0,0.0
|
| 188 |
+
267840.0,269280.0,1440.0,0,0.0,0.0
|
| 189 |
+
269280.0,270720.0,1440.0,0,0.0,0.0
|
| 190 |
+
270720.0,272160.0,1440.0,6,0.015706806282722512,1.5706806282722512
|
| 191 |
+
272160.0,273600.0,1440.0,0,0.0,0.0
|
| 192 |
+
273600.0,275040.0,1440.0,4,0.010471204188481676,1.0471204188481675
|
| 193 |
+
275040.0,276480.0,1440.0,0,0.0,0.0
|
| 194 |
+
276480.0,277920.0,1440.0,0,0.0,0.0
|
| 195 |
+
277920.0,279360.0,1440.0,0,0.0,0.0
|
| 196 |
+
279360.0,280800.0,1440.0,0,0.0,0.0
|
| 197 |
+
280800.0,282240.0,1440.0,0,0.0,0.0
|
| 198 |
+
282240.0,283680.0,1440.0,4,0.010471204188481676,1.0471204188481675
|
| 199 |
+
283680.0,285120.0,1440.0,0,0.0,0.0
|
| 200 |
+
285120.0,286560.0,1440.0,0,0.0,0.0
|
| 201 |
+
286560.0,288000.0,1440.0,7,0.01832460732984293,1.832460732984293
|
| 202 |
+
288000.0,289440.0,1440.0,0,0.0,0.0
|
| 203 |
+
289440.0,290880.0,1440.0,0,0.0,0.0
|
| 204 |
+
290880.0,292320.0,1440.0,0,0.0,0.0
|
| 205 |
+
292320.0,293760.0,1440.0,5,0.013089005235602094,1.3089005235602094
|
| 206 |
+
293760.0,295200.0,1440.0,1,0.002617801047120419,0.2617801047120419
|
| 207 |
+
295200.0,296640.0,1440.0,6,0.015706806282722512,1.5706806282722512
|
| 208 |
+
296640.0,298080.0,1440.0,6,0.015706806282722512,1.5706806282722512
|
| 209 |
+
298080.0,299520.0,1440.0,5,0.013089005235602094,1.3089005235602094
|
| 210 |
+
299520.0,300960.0,1440.0,0,0.0,0.0
|
| 211 |
+
300960.0,302400.0,1440.0,0,0.0,0.0
|
| 212 |
+
302400.0,303840.0,1440.0,6,0.015706806282722512,1.5706806282722512
|
| 213 |
+
303840.0,305280.0,1440.0,17,0.04450261780104712,4.450261780104712
|
| 214 |
+
305280.0,306720.0,1440.0,0,0.0,0.0
|
| 215 |
+
306720.0,308160.0,1440.0,8,0.020942408376963352,2.094240837696335
|
| 216 |
+
308160.0,309600.0,1440.0,0,0.0,0.0
|
| 217 |
+
309600.0,311040.0,1440.0,0,0.0,0.0
|
| 218 |
+
311040.0,312480.0,1440.0,0,0.0,0.0
|
| 219 |
+
312480.0,313920.0,1440.0,0,0.0,0.0
|
| 220 |
+
313920.0,315360.0,1440.0,0,0.0,0.0
|
| 221 |
+
315360.0,316800.0,1440.0,0,0.0,0.0
|
| 222 |
+
316800.0,318240.0,1440.0,0,0.0,0.0
|
| 223 |
+
318240.0,319680.0,1440.0,0,0.0,0.0
|
| 224 |
+
319680.0,321120.0,1440.0,0,0.0,0.0
|
| 225 |
+
321120.0,322560.0,1440.0,0,0.0,0.0
|
| 226 |
+
322560.0,324000.0,1440.0,1,0.002617801047120419,0.2617801047120419
|
| 227 |
+
324000.0,325440.0,1440.0,3,0.007853403141361256,0.7853403141361256
|
| 228 |
+
325440.0,326880.0,1440.0,0,0.0,0.0
|
| 229 |
+
326880.0,328320.0,1440.0,4,0.010471204188481676,1.0471204188481675
|
| 230 |
+
328320.0,329760.0,1440.0,1,0.002617801047120419,0.2617801047120419
|
| 231 |
+
329760.0,331200.0,1440.0,0,0.0,0.0
|
| 232 |
+
331200.0,332640.0,1440.0,0,0.0,0.0
|
| 233 |
+
332640.0,334080.0,1440.0,0,0.0,0.0
|
| 234 |
+
334080.0,335520.0,1440.0,0,0.0,0.0
|
| 235 |
+
335520.0,336960.0,1440.0,0,0.0,0.0
|
| 236 |
+
336960.0,338400.0,1440.0,0,0.0,0.0
|
| 237 |
+
338400.0,339840.0,1440.0,0,0.0,0.0
|
| 238 |
+
339840.0,341280.0,1440.0,0,0.0,0.0
|
| 239 |
+
341280.0,342720.0,1440.0,0,0.0,0.0
|
| 240 |
+
342720.0,344160.0,1440.0,0,0.0,0.0
|
| 241 |
+
344160.0,345600.0,1440.0,0,0.0,0.0
|
| 242 |
+
345600.0,347040.0,1440.0,0,0.0,0.0
|
| 243 |
+
347040.0,348480.0,1440.0,0,0.0,0.0
|
| 244 |
+
348480.0,349920.0,1440.0,0,0.0,0.0
|
| 245 |
+
349920.0,351360.0,1440.0,0,0.0,0.0
|
| 246 |
+
351360.0,352800.0,1440.0,0,0.0,0.0
|
| 247 |
+
352800.0,354240.0,1440.0,0,0.0,0.0
|
| 248 |
+
354240.0,355680.0,1440.0,0,0.0,0.0
|
| 249 |
+
355680.0,357120.0,1440.0,0,0.0,0.0
|
| 250 |
+
357120.0,358560.0,1440.0,5,0.013089005235602094,1.3089005235602094
|
| 251 |
+
358560.0,360000.0,1440.0,0,0.0,0.0
|
| 252 |
+
360000.0,361440.0,1440.0,0,0.0,0.0
|
| 253 |
+
361440.0,362880.0,1440.0,0,0.0,0.0
|
| 254 |
+
362880.0,364320.0,1440.0,0,0.0,0.0
|
| 255 |
+
364320.0,365760.0,1440.0,0,0.0,0.0
|
| 256 |
+
365760.0,367200.0,1440.0,0,0.0,0.0
|
| 257 |
+
367200.0,368640.0,1440.0,1,0.002617801047120419,0.2617801047120419
|
| 258 |
+
368640.0,370080.0,1440.0,0,0.0,0.0
|
| 259 |
+
370080.0,371520.0,1440.0,0,0.0,0.0
|
| 260 |
+
371520.0,372960.0,1440.0,1,0.002617801047120419,0.2617801047120419
|
| 261 |
+
372960.0,374400.0,1440.0,1,0.002617801047120419,0.2617801047120419
|
| 262 |
+
374400.0,375840.0,1440.0,2,0.005235602094240838,0.5235602094240838
|
Results/day_student_attempt_distribution_counts_001849d40d6bf1e2ed5826e4420e291e54655a2b6b77a9ab701faaeb4e899abe.csv
ADDED
|
@@ -0,0 +1,256 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
bin_left_min,bin_right_min,bin_width_min,attempt_count,probability,percentage
|
| 2 |
+
0.0,1440.0,1440.0,4,0.010958904109589041,1.095890410958904
|
| 3 |
+
1440.0,2880.0,1440.0,4,0.010958904109589041,1.095890410958904
|
| 4 |
+
2880.0,4320.0,1440.0,1,0.0027397260273972603,0.273972602739726
|
| 5 |
+
4320.0,5760.0,1440.0,2,0.005479452054794521,0.547945205479452
|
| 6 |
+
5760.0,7200.0,1440.0,0,0.0,0.0
|
| 7 |
+
7200.0,8640.0,1440.0,0,0.0,0.0
|
| 8 |
+
8640.0,10080.0,1440.0,4,0.010958904109589041,1.095890410958904
|
| 9 |
+
10080.0,11520.0,1440.0,5,0.0136986301369863,1.36986301369863
|
| 10 |
+
11520.0,12960.0,1440.0,1,0.0027397260273972603,0.273972602739726
|
| 11 |
+
12960.0,14400.0,1440.0,5,0.0136986301369863,1.36986301369863
|
| 12 |
+
14400.0,15840.0,1440.0,2,0.005479452054794521,0.547945205479452
|
| 13 |
+
15840.0,17280.0,1440.0,0,0.0,0.0
|
| 14 |
+
17280.0,18720.0,1440.0,0,0.0,0.0
|
| 15 |
+
18720.0,20160.0,1440.0,6,0.01643835616438356,1.643835616438356
|
| 16 |
+
20160.0,21600.0,1440.0,3,0.00821917808219178,0.821917808219178
|
| 17 |
+
21600.0,23040.0,1440.0,2,0.005479452054794521,0.547945205479452
|
| 18 |
+
23040.0,24480.0,1440.0,2,0.005479452054794521,0.547945205479452
|
| 19 |
+
24480.0,25920.0,1440.0,1,0.0027397260273972603,0.273972602739726
|
| 20 |
+
25920.0,27360.0,1440.0,0,0.0,0.0
|
| 21 |
+
27360.0,28800.0,1440.0,0,0.0,0.0
|
| 22 |
+
28800.0,30240.0,1440.0,5,0.0136986301369863,1.36986301369863
|
| 23 |
+
30240.0,31680.0,1440.0,0,0.0,0.0
|
| 24 |
+
31680.0,33120.0,1440.0,0,0.0,0.0
|
| 25 |
+
33120.0,34560.0,1440.0,1,0.0027397260273972603,0.273972602739726
|
| 26 |
+
34560.0,36000.0,1440.0,16,0.043835616438356165,4.383561643835616
|
| 27 |
+
36000.0,37440.0,1440.0,0,0.0,0.0
|
| 28 |
+
37440.0,38880.0,1440.0,0,0.0,0.0
|
| 29 |
+
38880.0,40320.0,1440.0,1,0.0027397260273972603,0.273972602739726
|
| 30 |
+
40320.0,41760.0,1440.0,0,0.0,0.0
|
| 31 |
+
41760.0,43200.0,1440.0,2,0.005479452054794521,0.547945205479452
|
| 32 |
+
43200.0,44640.0,1440.0,0,0.0,0.0
|
| 33 |
+
44640.0,46080.0,1440.0,1,0.0027397260273972603,0.273972602739726
|
| 34 |
+
46080.0,47520.0,1440.0,0,0.0,0.0
|
| 35 |
+
47520.0,48960.0,1440.0,0,0.0,0.0
|
| 36 |
+
48960.0,50400.0,1440.0,4,0.010958904109589041,1.095890410958904
|
| 37 |
+
50400.0,51840.0,1440.0,1,0.0027397260273972603,0.273972602739726
|
| 38 |
+
51840.0,53280.0,1440.0,2,0.005479452054794521,0.547945205479452
|
| 39 |
+
53280.0,54720.0,1440.0,2,0.005479452054794521,0.547945205479452
|
| 40 |
+
54720.0,56160.0,1440.0,2,0.005479452054794521,0.547945205479452
|
| 41 |
+
56160.0,57600.0,1440.0,0,0.0,0.0
|
| 42 |
+
57600.0,59040.0,1440.0,0,0.0,0.0
|
| 43 |
+
59040.0,60480.0,1440.0,0,0.0,0.0
|
| 44 |
+
60480.0,61920.0,1440.0,2,0.005479452054794521,0.547945205479452
|
| 45 |
+
61920.0,63360.0,1440.0,0,0.0,0.0
|
| 46 |
+
63360.0,64800.0,1440.0,0,0.0,0.0
|
| 47 |
+
64800.0,66240.0,1440.0,0,0.0,0.0
|
| 48 |
+
66240.0,67680.0,1440.0,0,0.0,0.0
|
| 49 |
+
67680.0,69120.0,1440.0,0,0.0,0.0
|
| 50 |
+
69120.0,70560.0,1440.0,2,0.005479452054794521,0.547945205479452
|
| 51 |
+
70560.0,72000.0,1440.0,3,0.00821917808219178,0.821917808219178
|
| 52 |
+
72000.0,73440.0,1440.0,3,0.00821917808219178,0.821917808219178
|
| 53 |
+
73440.0,74880.0,1440.0,7,0.019178082191780823,1.9178082191780823
|
| 54 |
+
74880.0,76320.0,1440.0,3,0.00821917808219178,0.821917808219178
|
| 55 |
+
76320.0,77760.0,1440.0,0,0.0,0.0
|
| 56 |
+
77760.0,79200.0,1440.0,0,0.0,0.0
|
| 57 |
+
79200.0,80640.0,1440.0,2,0.005479452054794521,0.547945205479452
|
| 58 |
+
80640.0,82080.0,1440.0,8,0.021917808219178082,2.191780821917808
|
| 59 |
+
82080.0,83520.0,1440.0,2,0.005479452054794521,0.547945205479452
|
| 60 |
+
83520.0,84960.0,1440.0,4,0.010958904109589041,1.095890410958904
|
| 61 |
+
84960.0,86400.0,1440.0,0,0.0,0.0
|
| 62 |
+
86400.0,87840.0,1440.0,0,0.0,0.0
|
| 63 |
+
87840.0,89280.0,1440.0,0,0.0,0.0
|
| 64 |
+
89280.0,90720.0,1440.0,4,0.010958904109589041,1.095890410958904
|
| 65 |
+
90720.0,92160.0,1440.0,0,0.0,0.0
|
| 66 |
+
92160.0,93600.0,1440.0,6,0.01643835616438356,1.643835616438356
|
| 67 |
+
93600.0,95040.0,1440.0,0,0.0,0.0
|
| 68 |
+
95040.0,96480.0,1440.0,4,0.010958904109589041,1.095890410958904
|
| 69 |
+
96480.0,97920.0,1440.0,0,0.0,0.0
|
| 70 |
+
97920.0,99360.0,1440.0,0,0.0,0.0
|
| 71 |
+
99360.0,100800.0,1440.0,0,0.0,0.0
|
| 72 |
+
100800.0,102240.0,1440.0,3,0.00821917808219178,0.821917808219178
|
| 73 |
+
102240.0,103680.0,1440.0,6,0.01643835616438356,1.643835616438356
|
| 74 |
+
103680.0,105120.0,1440.0,3,0.00821917808219178,0.821917808219178
|
| 75 |
+
105120.0,106560.0,1440.0,1,0.0027397260273972603,0.273972602739726
|
| 76 |
+
106560.0,108000.0,1440.0,0,0.0,0.0
|
| 77 |
+
108000.0,109440.0,1440.0,0,0.0,0.0
|
| 78 |
+
109440.0,110880.0,1440.0,0,0.0,0.0
|
| 79 |
+
110880.0,112320.0,1440.0,0,0.0,0.0
|
| 80 |
+
112320.0,113760.0,1440.0,0,0.0,0.0
|
| 81 |
+
113760.0,115200.0,1440.0,0,0.0,0.0
|
| 82 |
+
115200.0,116640.0,1440.0,0,0.0,0.0
|
| 83 |
+
116640.0,118080.0,1440.0,0,0.0,0.0
|
| 84 |
+
118080.0,119520.0,1440.0,0,0.0,0.0
|
| 85 |
+
119520.0,120960.0,1440.0,1,0.0027397260273972603,0.273972602739726
|
| 86 |
+
120960.0,122400.0,1440.0,3,0.00821917808219178,0.821917808219178
|
| 87 |
+
122400.0,123840.0,1440.0,2,0.005479452054794521,0.547945205479452
|
| 88 |
+
123840.0,125280.0,1440.0,5,0.0136986301369863,1.36986301369863
|
| 89 |
+
125280.0,126720.0,1440.0,3,0.00821917808219178,0.821917808219178
|
| 90 |
+
126720.0,128160.0,1440.0,0,0.0,0.0
|
| 91 |
+
128160.0,129600.0,1440.0,0,0.0,0.0
|
| 92 |
+
129600.0,131040.0,1440.0,8,0.021917808219178082,2.191780821917808
|
| 93 |
+
131040.0,132480.0,1440.0,5,0.0136986301369863,1.36986301369863
|
| 94 |
+
132480.0,133920.0,1440.0,4,0.010958904109589041,1.095890410958904
|
| 95 |
+
133920.0,135360.0,1440.0,6,0.01643835616438356,1.643835616438356
|
| 96 |
+
135360.0,136800.0,1440.0,4,0.010958904109589041,1.095890410958904
|
| 97 |
+
136800.0,138240.0,1440.0,0,0.0,0.0
|
| 98 |
+
138240.0,139680.0,1440.0,0,0.0,0.0
|
| 99 |
+
139680.0,141120.0,1440.0,0,0.0,0.0
|
| 100 |
+
141120.0,142560.0,1440.0,0,0.0,0.0
|
| 101 |
+
142560.0,144000.0,1440.0,0,0.0,0.0
|
| 102 |
+
144000.0,145440.0,1440.0,0,0.0,0.0
|
| 103 |
+
145440.0,146880.0,1440.0,0,0.0,0.0
|
| 104 |
+
146880.0,148320.0,1440.0,0,0.0,0.0
|
| 105 |
+
148320.0,149760.0,1440.0,0,0.0,0.0
|
| 106 |
+
149760.0,151200.0,1440.0,0,0.0,0.0
|
| 107 |
+
151200.0,152640.0,1440.0,0,0.0,0.0
|
| 108 |
+
152640.0,154080.0,1440.0,0,0.0,0.0
|
| 109 |
+
154080.0,155520.0,1440.0,0,0.0,0.0
|
| 110 |
+
155520.0,156960.0,1440.0,0,0.0,0.0
|
| 111 |
+
156960.0,158400.0,1440.0,0,0.0,0.0
|
| 112 |
+
158400.0,159840.0,1440.0,0,0.0,0.0
|
| 113 |
+
159840.0,161280.0,1440.0,0,0.0,0.0
|
| 114 |
+
161280.0,162720.0,1440.0,0,0.0,0.0
|
| 115 |
+
162720.0,164160.0,1440.0,0,0.0,0.0
|
| 116 |
+
164160.0,165600.0,1440.0,0,0.0,0.0
|
| 117 |
+
165600.0,167040.0,1440.0,0,0.0,0.0
|
| 118 |
+
167040.0,168480.0,1440.0,0,0.0,0.0
|
| 119 |
+
168480.0,169920.0,1440.0,0,0.0,0.0
|
| 120 |
+
169920.0,171360.0,1440.0,0,0.0,0.0
|
| 121 |
+
171360.0,172800.0,1440.0,0,0.0,0.0
|
| 122 |
+
172800.0,174240.0,1440.0,0,0.0,0.0
|
| 123 |
+
174240.0,175680.0,1440.0,0,0.0,0.0
|
| 124 |
+
175680.0,177120.0,1440.0,0,0.0,0.0
|
| 125 |
+
177120.0,178560.0,1440.0,0,0.0,0.0
|
| 126 |
+
178560.0,180000.0,1440.0,0,0.0,0.0
|
| 127 |
+
180000.0,181440.0,1440.0,3,0.00821917808219178,0.821917808219178
|
| 128 |
+
181440.0,182880.0,1440.0,7,0.019178082191780823,1.9178082191780823
|
| 129 |
+
182880.0,184320.0,1440.0,0,0.0,0.0
|
| 130 |
+
184320.0,185760.0,1440.0,8,0.021917808219178082,2.191780821917808
|
| 131 |
+
185760.0,187200.0,1440.0,5,0.0136986301369863,1.36986301369863
|
| 132 |
+
187200.0,188640.0,1440.0,0,0.0,0.0
|
| 133 |
+
188640.0,190080.0,1440.0,0,0.0,0.0
|
| 134 |
+
190080.0,191520.0,1440.0,0,0.0,0.0
|
| 135 |
+
191520.0,192960.0,1440.0,3,0.00821917808219178,0.821917808219178
|
| 136 |
+
192960.0,194400.0,1440.0,0,0.0,0.0
|
| 137 |
+
194400.0,195840.0,1440.0,2,0.005479452054794521,0.547945205479452
|
| 138 |
+
195840.0,197280.0,1440.0,0,0.0,0.0
|
| 139 |
+
197280.0,198720.0,1440.0,0,0.0,0.0
|
| 140 |
+
198720.0,200160.0,1440.0,0,0.0,0.0
|
| 141 |
+
200160.0,201600.0,1440.0,3,0.00821917808219178,0.821917808219178
|
| 142 |
+
201600.0,203040.0,1440.0,1,0.0027397260273972603,0.273972602739726
|
| 143 |
+
203040.0,204480.0,1440.0,4,0.010958904109589041,1.095890410958904
|
| 144 |
+
204480.0,205920.0,1440.0,8,0.021917808219178082,2.191780821917808
|
| 145 |
+
205920.0,207360.0,1440.0,0,0.0,0.0
|
| 146 |
+
207360.0,208800.0,1440.0,0,0.0,0.0
|
| 147 |
+
208800.0,210240.0,1440.0,0,0.0,0.0
|
| 148 |
+
210240.0,211680.0,1440.0,2,0.005479452054794521,0.547945205479452
|
| 149 |
+
211680.0,213120.0,1440.0,2,0.005479452054794521,0.547945205479452
|
| 150 |
+
213120.0,214560.0,1440.0,0,0.0,0.0
|
| 151 |
+
214560.0,216000.0,1440.0,5,0.0136986301369863,1.36986301369863
|
| 152 |
+
216000.0,217440.0,1440.0,3,0.00821917808219178,0.821917808219178
|
| 153 |
+
217440.0,218880.0,1440.0,0,0.0,0.0
|
| 154 |
+
218880.0,220320.0,1440.0,0,0.0,0.0
|
| 155 |
+
220320.0,221760.0,1440.0,4,0.010958904109589041,1.095890410958904
|
| 156 |
+
221760.0,223200.0,1440.0,5,0.0136986301369863,1.36986301369863
|
| 157 |
+
223200.0,224640.0,1440.0,6,0.01643835616438356,1.643835616438356
|
| 158 |
+
224640.0,226080.0,1440.0,0,0.0,0.0
|
| 159 |
+
226080.0,227520.0,1440.0,0,0.0,0.0
|
| 160 |
+
227520.0,228960.0,1440.0,0,0.0,0.0
|
| 161 |
+
228960.0,230400.0,1440.0,0,0.0,0.0
|
| 162 |
+
230400.0,231840.0,1440.0,0,0.0,0.0
|
| 163 |
+
231840.0,233280.0,1440.0,4,0.010958904109589041,1.095890410958904
|
| 164 |
+
233280.0,234720.0,1440.0,0,0.0,0.0
|
| 165 |
+
234720.0,236160.0,1440.0,1,0.0027397260273972603,0.273972602739726
|
| 166 |
+
236160.0,237600.0,1440.0,0,0.0,0.0
|
| 167 |
+
237600.0,239040.0,1440.0,0,0.0,0.0
|
| 168 |
+
239040.0,240480.0,1440.0,0,0.0,0.0
|
| 169 |
+
240480.0,241920.0,1440.0,0,0.0,0.0
|
| 170 |
+
241920.0,243360.0,1440.0,0,0.0,0.0
|
| 171 |
+
243360.0,244800.0,1440.0,6,0.01643835616438356,1.643835616438356
|
| 172 |
+
244800.0,246240.0,1440.0,5,0.0136986301369863,1.36986301369863
|
| 173 |
+
246240.0,247680.0,1440.0,0,0.0,0.0
|
| 174 |
+
247680.0,249120.0,1440.0,0,0.0,0.0
|
| 175 |
+
249120.0,250560.0,1440.0,0,0.0,0.0
|
| 176 |
+
250560.0,252000.0,1440.0,0,0.0,0.0
|
| 177 |
+
252000.0,253440.0,1440.0,0,0.0,0.0
|
| 178 |
+
253440.0,254880.0,1440.0,0,0.0,0.0
|
| 179 |
+
254880.0,256320.0,1440.0,0,0.0,0.0
|
| 180 |
+
256320.0,257760.0,1440.0,3,0.00821917808219178,0.821917808219178
|
| 181 |
+
257760.0,259200.0,1440.0,0,0.0,0.0
|
| 182 |
+
259200.0,260640.0,1440.0,0,0.0,0.0
|
| 183 |
+
260640.0,262080.0,1440.0,2,0.005479452054794521,0.547945205479452
|
| 184 |
+
262080.0,263520.0,1440.0,0,0.0,0.0
|
| 185 |
+
263520.0,264960.0,1440.0,0,0.0,0.0
|
| 186 |
+
264960.0,266400.0,1440.0,2,0.005479452054794521,0.547945205479452
|
| 187 |
+
266400.0,267840.0,1440.0,6,0.01643835616438356,1.643835616438356
|
| 188 |
+
267840.0,269280.0,1440.0,0,0.0,0.0
|
| 189 |
+
269280.0,270720.0,1440.0,0,0.0,0.0
|
| 190 |
+
270720.0,272160.0,1440.0,4,0.010958904109589041,1.095890410958904
|
| 191 |
+
272160.0,273600.0,1440.0,0,0.0,0.0
|
| 192 |
+
273600.0,275040.0,1440.0,4,0.010958904109589041,1.095890410958904
|
| 193 |
+
275040.0,276480.0,1440.0,5,0.0136986301369863,1.36986301369863
|
| 194 |
+
276480.0,277920.0,1440.0,0,0.0,0.0
|
| 195 |
+
277920.0,279360.0,1440.0,0,0.0,0.0
|
| 196 |
+
279360.0,280800.0,1440.0,0,0.0,0.0
|
| 197 |
+
280800.0,282240.0,1440.0,0,0.0,0.0
|
| 198 |
+
282240.0,283680.0,1440.0,0,0.0,0.0
|
| 199 |
+
283680.0,285120.0,1440.0,0,0.0,0.0
|
| 200 |
+
285120.0,286560.0,1440.0,0,0.0,0.0
|
| 201 |
+
286560.0,288000.0,1440.0,0,0.0,0.0
|
| 202 |
+
288000.0,289440.0,1440.0,0,0.0,0.0
|
| 203 |
+
289440.0,290880.0,1440.0,0,0.0,0.0
|
| 204 |
+
290880.0,292320.0,1440.0,0,0.0,0.0
|
| 205 |
+
292320.0,293760.0,1440.0,1,0.0027397260273972603,0.273972602739726
|
| 206 |
+
293760.0,295200.0,1440.0,0,0.0,0.0
|
| 207 |
+
295200.0,296640.0,1440.0,0,0.0,0.0
|
| 208 |
+
296640.0,298080.0,1440.0,0,0.0,0.0
|
| 209 |
+
298080.0,299520.0,1440.0,0,0.0,0.0
|
| 210 |
+
299520.0,300960.0,1440.0,0,0.0,0.0
|
| 211 |
+
300960.0,302400.0,1440.0,2,0.005479452054794521,0.547945205479452
|
| 212 |
+
302400.0,303840.0,1440.0,2,0.005479452054794521,0.547945205479452
|
| 213 |
+
303840.0,305280.0,1440.0,3,0.00821917808219178,0.821917808219178
|
| 214 |
+
305280.0,306720.0,1440.0,6,0.01643835616438356,1.643835616438356
|
| 215 |
+
306720.0,308160.0,1440.0,4,0.010958904109589041,1.095890410958904
|
| 216 |
+
308160.0,309600.0,1440.0,0,0.0,0.0
|
| 217 |
+
309600.0,311040.0,1440.0,0,0.0,0.0
|
| 218 |
+
311040.0,312480.0,1440.0,4,0.010958904109589041,1.095890410958904
|
| 219 |
+
312480.0,313920.0,1440.0,6,0.01643835616438356,1.643835616438356
|
| 220 |
+
313920.0,315360.0,1440.0,4,0.010958904109589041,1.095890410958904
|
| 221 |
+
315360.0,316800.0,1440.0,0,0.0,0.0
|
| 222 |
+
316800.0,318240.0,1440.0,0,0.0,0.0
|
| 223 |
+
318240.0,319680.0,1440.0,0,0.0,0.0
|
| 224 |
+
319680.0,321120.0,1440.0,0,0.0,0.0
|
| 225 |
+
321120.0,322560.0,1440.0,0,0.0,0.0
|
| 226 |
+
322560.0,324000.0,1440.0,0,0.0,0.0
|
| 227 |
+
324000.0,325440.0,1440.0,0,0.0,0.0
|
| 228 |
+
325440.0,326880.0,1440.0,0,0.0,0.0
|
| 229 |
+
326880.0,328320.0,1440.0,0,0.0,0.0
|
| 230 |
+
328320.0,329760.0,1440.0,0,0.0,0.0
|
| 231 |
+
329760.0,331200.0,1440.0,0,0.0,0.0
|
| 232 |
+
331200.0,332640.0,1440.0,6,0.01643835616438356,1.643835616438356
|
| 233 |
+
332640.0,334080.0,1440.0,4,0.010958904109589041,1.095890410958904
|
| 234 |
+
334080.0,335520.0,1440.0,1,0.0027397260273972603,0.273972602739726
|
| 235 |
+
335520.0,336960.0,1440.0,1,0.0027397260273972603,0.273972602739726
|
| 236 |
+
336960.0,338400.0,1440.0,1,0.0027397260273972603,0.273972602739726
|
| 237 |
+
338400.0,339840.0,1440.0,0,0.0,0.0
|
| 238 |
+
339840.0,341280.0,1440.0,0,0.0,0.0
|
| 239 |
+
341280.0,342720.0,1440.0,1,0.0027397260273972603,0.273972602739726
|
| 240 |
+
342720.0,344160.0,1440.0,0,0.0,0.0
|
| 241 |
+
344160.0,345600.0,1440.0,5,0.0136986301369863,1.36986301369863
|
| 242 |
+
345600.0,347040.0,1440.0,9,0.024657534246575342,2.4657534246575343
|
| 243 |
+
347040.0,348480.0,1440.0,0,0.0,0.0
|
| 244 |
+
348480.0,349920.0,1440.0,0,0.0,0.0
|
| 245 |
+
349920.0,351360.0,1440.0,0,0.0,0.0
|
| 246 |
+
351360.0,352800.0,1440.0,0,0.0,0.0
|
| 247 |
+
352800.0,354240.0,1440.0,0,0.0,0.0
|
| 248 |
+
354240.0,355680.0,1440.0,0,0.0,0.0
|
| 249 |
+
355680.0,357120.0,1440.0,0,0.0,0.0
|
| 250 |
+
357120.0,358560.0,1440.0,0,0.0,0.0
|
| 251 |
+
358560.0,360000.0,1440.0,0,0.0,0.0
|
| 252 |
+
360000.0,361440.0,1440.0,0,0.0,0.0
|
| 253 |
+
361440.0,362880.0,1440.0,3,0.00821917808219178,0.821917808219178
|
| 254 |
+
362880.0,364320.0,1440.0,3,0.00821917808219178,0.821917808219178
|
| 255 |
+
364320.0,365760.0,1440.0,2,0.005479452054794521,0.547945205479452
|
| 256 |
+
365760.0,367200.0,1440.0,1,0.0027397260273972603,0.273972602739726
|
Results/inference_data_kt_results.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d5ecb945b29f013a0da9f770266b80724f4f9d92da04e41357b69f309622d0ea
|
| 3 |
+
size 2080602643
|
Results/month_student_attempt_distribution.png
ADDED
|
Git LFS Details
|
Results/month_student_attempt_distribution_00173df52c65a9c13b45978c5a56ea2db67b17deded674486f7af64f7c5fd9ac.png
ADDED
|
Git LFS Details
|
Results/month_student_attempt_distribution_001849d40d6bf1e2ed5826e4420e291e54655a2b6b77a9ab701faaeb4e899abe.png
ADDED
|
Git LFS Details
|
Results/month_student_attempt_distribution_counts.csv
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
bin_left_min,bin_right_min,bin_width_min,attempt_count,probability,percentage
|
| 2 |
+
0.0,43200.0,43200.0,209461,0.1198159238984321,11.98159238984321
|
| 3 |
+
43200.0,86400.0,43200.0,177545,0.1015593270754323,10.155932707543231
|
| 4 |
+
86400.0,129600.0,43200.0,176130,0.10074991848712096,10.074991848712097
|
| 5 |
+
129600.0,172800.0,43200.0,167676,0.09591405968458806,9.591405968458806
|
| 6 |
+
172800.0,216000.0,43200.0,155115,0.08872891390523913,8.872891390523913
|
| 7 |
+
216000.0,259200.0,43200.0,174585,0.09986614727232165,9.986614727232165
|
| 8 |
+
259200.0,302400.0,43200.0,189411,0.10834691881317249,10.834691881317248
|
| 9 |
+
302400.0,345600.0,43200.0,125970,0.07205738506684056,7.205738506684057
|
| 10 |
+
345600.0,388800.0,43200.0,97012,0.055492824006543914,5.549282400654391
|
| 11 |
+
388800.0,432000.0,43200.0,28306,0.016191603887449304,1.6191603887449304
|
| 12 |
+
432000.0,475200.0,43200.0,18574,0.010624703264519303,1.0624703264519304
|
| 13 |
+
475200.0,518400.0,43200.0,24530,0.014031655598075725,1.4031655598075725
|
| 14 |
+
518400.0,561600.0,43200.0,29081,0.016634919545358343,1.6634919545358344
|
| 15 |
+
561600.0,604800.0,43200.0,29056,0.016620619040264503,1.6620619040264504
|
| 16 |
+
604800.0,648000.0,43200.0,23457,0.013417877919448115,1.3417877919448116
|
| 17 |
+
648000.0,691200.0,43200.0,23176,0.013257140242193355,1.3257140242193355
|
| 18 |
+
691200.0,734400.0,43200.0,21084,0.01206047397594083,1.206047397594083
|
| 19 |
+
734400.0,777600.0,43200.0,21215,0.012135408622632552,1.213540862263255
|
| 20 |
+
777600.0,820800.0,43200.0,20921,0.011967234682728993,1.1967234682728993
|
| 21 |
+
820800.0,864000.0,43200.0,13592,0.007774898609418885,0.7774898609418884
|
| 22 |
+
864000.0,907200.0,43200.0,10107,0.0057814081993376,0.57814081993376
|
| 23 |
+
907200.0,950400.0,43200.0,1273,0.0007281817193783284,0.07281817193783284
|
| 24 |
+
950400.0,993600.0,43200.0,1086,0.0006212139412764059,0.06212139412764059
|
| 25 |
+
993600.0,1036800.0,43200.0,1155,0.000660683335335404,0.0660683335335404
|
| 26 |
+
1036800.0,1080000.0,43200.0,1266,0.0007241775779520533,0.07241775779520533
|
| 27 |
+
1080000.0,1123200.0,43200.0,1494,0.0008545981844078733,0.08545981844078733
|
| 28 |
+
1123200.0,1166400.0,43200.0,810,0.0004633363650404132,0.04633363650404132
|
| 29 |
+
1166400.0,1209600.0,43200.0,730,0.0004175747487401255,0.04175747487401255
|
| 30 |
+
1209600.0,1252800.0,43200.0,362,0.00020707131375880195,0.020707131375880195
|
| 31 |
+
1252800.0,1296000.0,43200.0,1075,0.0006149217190351163,0.06149217190351163
|
| 32 |
+
1296000.0,1339200.0,43200.0,716,0.00040956646588757514,0.040956646588757514
|
| 33 |
+
1339200.0,1382400.0,43200.0,972,0.0005560036380484959,0.05560036380484959
|
| 34 |
+
1382400.0,1425600.0,43200.0,817,0.0004673405064666884,0.04673405064666884
|
| 35 |
+
1425600.0,1468800.0,43200.0,258,0.00014758121256842793,0.014758121256842793
|
| 36 |
+
1468800.0,1512000.0,43200.0,4,2.288080815014386e-06,0.00022880808150143862
|
| 37 |
+
1512000.0,1555200.0,43200.0,5,2.8601010187679828e-06,0.0002860101018767983
|
| 38 |
+
1555200.0,1598400.0,43200.0,0,0.0,0.0
|
| 39 |
+
1598400.0,1641600.0,43200.0,0,0.0,0.0
|
| 40 |
+
1641600.0,1684800.0,43200.0,10,5.7202020375359656e-06,0.0005720202037535966
|
| 41 |
+
1684800.0,1728000.0,43200.0,1,5.720202037535965e-07,5.7202020375359656e-05
|
| 42 |
+
1728000.0,1771200.0,43200.0,0,0.0,0.0
|
| 43 |
+
1771200.0,1814400.0,43200.0,3,1.7160606112607897e-06,0.00017160606112607898
|
| 44 |
+
1814400.0,1857600.0,43200.0,0,0.0,0.0
|
| 45 |
+
1857600.0,1900800.0,43200.0,0,0.0,0.0
|
| 46 |
+
1900800.0,1944000.0,43200.0,0,0.0,0.0
|
| 47 |
+
1944000.0,1987200.0,43200.0,0,0.0,0.0
|
| 48 |
+
1987200.0,2030400.0,43200.0,0,0.0,0.0
|
| 49 |
+
2030400.0,2073600.0,43200.0,0,0.0,0.0
|
| 50 |
+
2073600.0,2116800.0,43200.0,2,1.144040407507193e-06,0.00011440404075071931
|
| 51 |
+
2116800.0,2160000.0,43200.0,3,1.7160606112607897e-06,0.00017160606112607898
|
| 52 |
+
2160000.0,2203200.0,43200.0,5,2.8601010187679828e-06,0.0002860101018767983
|
| 53 |
+
2203200.0,2246400.0,43200.0,11,6.292222241289563e-06,0.0006292222241289563
|
| 54 |
+
2246400.0,2289600.0,43200.0,19,1.0868383871318335e-05,0.0010868383871318334
|
| 55 |
+
2289600.0,2332800.0,43200.0,5,2.8601010187679828e-06,0.0002860101018767983
|
| 56 |
+
2332800.0,2376000.0,43200.0,12,6.864242445043159e-06,0.0006864242445043159
|
| 57 |
+
2376000.0,2419200.0,43200.0,11,6.292222241289563e-06,0.0006292222241289563
|
| 58 |
+
2419200.0,2462400.0,43200.0,9,5.148181833782369e-06,0.000514818183378237
|
| 59 |
+
2462400.0,2505600.0,43200.0,4,2.288080815014386e-06,0.00022880808150143862
|
| 60 |
+
2505600.0,2548800.0,43200.0,0,0.0,0.0
|
| 61 |
+
2548800.0,2592000.0,43200.0,8,4.576161630028772e-06,0.00045761616300287725
|
| 62 |
+
2592000.0,2635200.0,43200.0,18,1.0296363667564739e-05,0.001029636366756474
|
| 63 |
+
2635200.0,2678400.0,43200.0,13,7.436262648796756e-06,0.0007436262648796755
|
| 64 |
+
2678400.0,2721600.0,43200.0,11,6.292222241289563e-06,0.0006292222241289563
|
| 65 |
+
2721600.0,2764800.0,43200.0,0,0.0,0.0
|
| 66 |
+
2764800.0,2808000.0,43200.0,0,0.0,0.0
|
| 67 |
+
2808000.0,2851200.0,43200.0,4,2.288080815014386e-06,0.00022880808150143862
|
| 68 |
+
2851200.0,2894400.0,43200.0,3,1.7160606112607897e-06,0.00017160606112607898
|
| 69 |
+
2894400.0,2937600.0,43200.0,11,6.292222241289563e-06,0.0006292222241289563
|
Results/month_student_attempt_distribution_counts_00173df52c65a9c13b45978c5a56ea2db67b17deded674486f7af64f7c5fd9ac.csv
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
bin_left_min,bin_right_min,bin_width_min,attempt_count,probability,percentage
|
| 2 |
+
0.0,43200.0,43200.0,33,0.08638743455497382,8.638743455497382
|
| 3 |
+
43200.0,86400.0,43200.0,67,0.17539267015706805,17.539267015706805
|
| 4 |
+
86400.0,129600.0,43200.0,36,0.09424083769633508,9.424083769633508
|
| 5 |
+
129600.0,172800.0,43200.0,54,0.14136125654450263,14.136125654450263
|
| 6 |
+
172800.0,216000.0,43200.0,26,0.06806282722513089,6.806282722513089
|
| 7 |
+
216000.0,259200.0,43200.0,63,0.1649214659685864,16.49214659685864
|
| 8 |
+
259200.0,302400.0,43200.0,53,0.1387434554973822,13.874345549738221
|
| 9 |
+
302400.0,345600.0,43200.0,40,0.10471204188481675,10.471204188481675
|
| 10 |
+
345600.0,388800.0,43200.0,10,0.02617801047120419,2.6178010471204187
|
Results/month_student_attempt_distribution_counts_001849d40d6bf1e2ed5826e4420e291e54655a2b6b77a9ab701faaeb4e899abe.csv
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
bin_left_min,bin_right_min,bin_width_min,attempt_count,probability,percentage
|
| 2 |
+
0.0,43200.0,43200.0,67,0.18356164383561643,18.356164383561644
|
| 3 |
+
43200.0,86400.0,43200.0,48,0.13150684931506848,13.150684931506849
|
| 4 |
+
86400.0,129600.0,43200.0,41,0.11232876712328767,11.232876712328768
|
| 5 |
+
129600.0,172800.0,43200.0,27,0.07397260273972603,7.397260273972603
|
| 6 |
+
172800.0,216000.0,43200.0,53,0.14520547945205478,14.520547945205479
|
| 7 |
+
216000.0,259200.0,43200.0,37,0.10136986301369863,10.136986301369863
|
| 8 |
+
259200.0,302400.0,43200.0,26,0.07123287671232877,7.123287671232877
|
| 9 |
+
302400.0,345600.0,43200.0,48,0.13150684931506848,13.150684931506849
|
| 10 |
+
345600.0,388800.0,43200.0,18,0.049315068493150684,4.931506849315069
|
Results/pedagogical_grounding/README.md
ADDED
|
@@ -0,0 +1,275 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Pedagogical Grounding
|
| 2 |
+
|
| 3 |
+
This module provides tools for analyzing educational assessment data and benchmarking LLMs on pedagogical reasoning tasks.
|
| 4 |
+
|
| 5 |
+
## Overview
|
| 6 |
+
|
| 7 |
+
The pedagogical grounding work consists of two parts:
|
| 8 |
+
|
| 9 |
+
1. **Analysis Scripts** - Compute pedagogical metrics from student response data
|
| 10 |
+
2. **LLM Benchmark** - Evaluate LLMs on pedagogical reasoning tasks
|
| 11 |
+
|
| 12 |
+
---
|
| 13 |
+
|
| 14 |
+
## Part 1: Analysis Scripts
|
| 15 |
+
|
| 16 |
+
These scripts analyze the FoundationalKT dataset to compute pedagogical metrics.
|
| 17 |
+
|
| 18 |
+
### IRT Parameters (`irt_parameters.py`)
|
| 19 |
+
|
| 20 |
+
Computes Item Response Theory parameters for assessment items using Bayesian inference.
|
| 21 |
+
|
| 22 |
+
```bash
|
| 23 |
+
python pedagogical_grounding/irt_parameters.py \
|
| 24 |
+
--data-dir foundationalktdataset/ \
|
| 25 |
+
--output-dir pedagogical_grounding/output/
|
| 26 |
+
```
|
| 27 |
+
|
| 28 |
+
**Output:** `irt_parameters.json` (~2,548 problems)
|
| 29 |
+
|
| 30 |
+
| Field | Description |
|
| 31 |
+
|-------|-------------|
|
| 32 |
+
| `problem_id` | Unique problem identifier |
|
| 33 |
+
| `difficulty_1pl` | 1PL (Rasch) difficulty parameter |
|
| 34 |
+
| `difficulty_2pl` | 2PL difficulty parameter (primary metric) |
|
| 35 |
+
| `discrimination_2pl` | 2PL discrimination parameter |
|
| 36 |
+
| `percent_correct` | Empirical accuracy rate |
|
| 37 |
+
| `n_responses` | Number of student responses |
|
| 38 |
+
|
| 39 |
+
**Interpretation:**
|
| 40 |
+
- `difficulty_2pl`: Higher = harder (range: -1.35 to 0.91)
|
| 41 |
+
- `discrimination_2pl`: Higher = better at distinguishing student ability (range: 0.01 to 0.91)
|
| 42 |
+
|
| 43 |
+
### Distractor Analysis (`distractor_analysis.py`)
|
| 44 |
+
|
| 45 |
+
Analyzes the effectiveness of wrong answer choices in multiple-choice questions.
|
| 46 |
+
|
| 47 |
+
```bash
|
| 48 |
+
python pedagogical_grounding/distractor_analysis.py \
|
| 49 |
+
--data-dir foundationalktdataset/ \
|
| 50 |
+
--output-dir pedagogical_grounding/output/
|
| 51 |
+
```
|
| 52 |
+
|
| 53 |
+
**Output:** `distractor_stats.json` (~236 MC problems)
|
| 54 |
+
|
| 55 |
+
| Field | Description |
|
| 56 |
+
|-------|-------------|
|
| 57 |
+
| `problem_id` | Unique problem identifier |
|
| 58 |
+
| `n_choices` | Number of answer options |
|
| 59 |
+
| `correct_rate` | Fraction choosing correct answer |
|
| 60 |
+
| `distractors` | Dict of wrong answers → count |
|
| 61 |
+
| `distractor_frequencies` | Dict of wrong answers → frequency |
|
| 62 |
+
| `most_common_distractor` | Most frequently chosen wrong answer |
|
| 63 |
+
| `least_common_distractor` | Least frequently chosen wrong answer |
|
| 64 |
+
|
| 65 |
+
---
|
| 66 |
+
|
| 67 |
+
## Part 2: LLM Benchmark
|
| 68 |
+
|
| 69 |
+
Benchmarks LLMs on 4 pedagogical reasoning tasks using zero-shot evaluation.
|
| 70 |
+
|
| 71 |
+
### Tasks
|
| 72 |
+
|
| 73 |
+
| Task | Description | Data Source | Metric |
|
| 74 |
+
|------|-------------|-------------|--------|
|
| 75 |
+
| `difficulty` | Which of two questions is harder? | IRT params | Binary accuracy |
|
| 76 |
+
| `discrimination` | Which question better distinguishes ability? | IRT params | Binary accuracy |
|
| 77 |
+
| `distractor_most` | Which wrong answer is most commonly chosen? | Distractor stats | Multi-class accuracy |
|
| 78 |
+
| `distractor_least` | Which wrong answer is least commonly chosen? | Distractor stats | Multi-class accuracy |
|
| 79 |
+
|
| 80 |
+
### Scripts
|
| 81 |
+
|
| 82 |
+
All benchmark scripts are located in this folder (`pedagogical_grounding/`):
|
| 83 |
+
|
| 84 |
+
| File | Description |
|
| 85 |
+
|------|-------------|
|
| 86 |
+
| `pedagogical_inference_base.py` | Base module with data loading, sampling, prompts, inference |
|
| 87 |
+
| `gptoss120b_pedagogical.py` | GPT-OSS-120B model config |
|
| 88 |
+
| `qwen3next80b_instruct_pedagogical.py` | Qwen3-Next-80B-Instruct model config |
|
| 89 |
+
| `qwen3next80b_thinking_pedagogical.py` | Qwen3-Next-80B-Thinking model config |
|
| 90 |
+
| `evaluate_pedagogical.py` | Evaluation script for accuracy metrics |
|
| 91 |
+
|
| 92 |
+
### Usage
|
| 93 |
+
|
| 94 |
+
**Run all commands from the project root directory.**
|
| 95 |
+
|
| 96 |
+
#### Run Inference
|
| 97 |
+
|
| 98 |
+
```bash
|
| 99 |
+
# Difficulty comparison (stratified sampling, 1000 pairs)
|
| 100 |
+
CUDA_VISIBLE_DEVICES=0,1,2,3 python pedagogical_grounding/gptoss120b_pedagogical.py \
|
| 101 |
+
--task difficulty \
|
| 102 |
+
--num-samples 1000 \
|
| 103 |
+
--sampling-mode stratified \
|
| 104 |
+
--num-gpus 4
|
| 105 |
+
|
| 106 |
+
# Discrimination comparison
|
| 107 |
+
CUDA_VISIBLE_DEVICES=0,1,2,3 python pedagogical_grounding/gptoss120b_pedagogical.py \
|
| 108 |
+
--task discrimination \
|
| 109 |
+
--num-samples 1000 \
|
| 110 |
+
--num-gpus 4
|
| 111 |
+
|
| 112 |
+
# Most common distractor prediction
|
| 113 |
+
CUDA_VISIBLE_DEVICES=0,1,2,3 python pedagogical_grounding/gptoss120b_pedagogical.py \
|
| 114 |
+
--task distractor_most \
|
| 115 |
+
--num-gpus 4
|
| 116 |
+
|
| 117 |
+
# Least common distractor prediction
|
| 118 |
+
CUDA_VISIBLE_DEVICES=0,1,2,3 python pedagogical_grounding/gptoss120b_pedagogical.py \
|
| 119 |
+
--task distractor_least \
|
| 120 |
+
--num-gpus 4
|
| 121 |
+
|
| 122 |
+
# All tasks at once
|
| 123 |
+
CUDA_VISIBLE_DEVICES=0,1,2,3 python pedagogical_grounding/gptoss120b_pedagogical.py \
|
| 124 |
+
--task all \
|
| 125 |
+
--num-samples 500 \
|
| 126 |
+
--num-gpus 4
|
| 127 |
+
```
|
| 128 |
+
|
| 129 |
+
#### Evaluate Results
|
| 130 |
+
|
| 131 |
+
```bash
|
| 132 |
+
python pedagogical_grounding/evaluate_pedagogical.py --input gptoss120b_pedagogical_difficulty_n1000_stratified.jsonl
|
| 133 |
+
python pedagogical_grounding/evaluate_pedagogical.py --input results.jsonl --output metrics.json
|
| 134 |
+
```
|
| 135 |
+
|
| 136 |
+
### CLI Arguments
|
| 137 |
+
|
| 138 |
+
| Argument | Description | Default |
|
| 139 |
+
|----------|-------------|---------|
|
| 140 |
+
| `--task` | Task to run: `difficulty`, `discrimination`, `distractor_most`, `distractor_least`, `all` | Required |
|
| 141 |
+
| `--sampling-mode` | `random` or `stratified` (ensures meaningful differences) | `stratified` |
|
| 142 |
+
| `--num-samples` | Number of pairs/problems to sample | 1000 |
|
| 143 |
+
| `--min-difference` | Minimum difference for stratified sampling | 0.2 |
|
| 144 |
+
| `--batch-size` | Batch size for vLLM inference | 500 |
|
| 145 |
+
| `--num-gpus` | Number of GPUs for tensor parallelism | 1 |
|
| 146 |
+
| `--data-dir` | Base directory containing data files | `.` |
|
| 147 |
+
|
| 148 |
+
### Sampling Modes
|
| 149 |
+
|
| 150 |
+
**Random:** Any two questions randomly paired (may include trivial comparisons).
|
| 151 |
+
|
| 152 |
+
**Stratified:** Ensures meaningful differences between pairs:
|
| 153 |
+
- Small: 0.2–0.5 difference
|
| 154 |
+
- Medium: 0.5–1.0 difference
|
| 155 |
+
- Large: >1.0 difference
|
| 156 |
+
|
| 157 |
+
### Output Format (JSONL)
|
| 158 |
+
|
| 159 |
+
**Comparison tasks:**
|
| 160 |
+
```json
|
| 161 |
+
{
|
| 162 |
+
"prediction_id": "difficulty_405080_448452",
|
| 163 |
+
"task": "difficulty",
|
| 164 |
+
"problem_id_a": 405080,
|
| 165 |
+
"problem_id_b": 448452,
|
| 166 |
+
"value_a": -0.234,
|
| 167 |
+
"value_b": -0.533,
|
| 168 |
+
"difference": 0.299,
|
| 169 |
+
"stratum": "small",
|
| 170 |
+
"ground_truth": "A",
|
| 171 |
+
"predicted_answer": "A",
|
| 172 |
+
"is_correct": true
|
| 173 |
+
}
|
| 174 |
+
```
|
| 175 |
+
|
| 176 |
+
**Distractor tasks:**
|
| 177 |
+
```json
|
| 178 |
+
{
|
| 179 |
+
"prediction_id": "distractor_most_74236",
|
| 180 |
+
"task": "distractor_most",
|
| 181 |
+
"problem_id": 74236,
|
| 182 |
+
"n_choices": 4,
|
| 183 |
+
"ground_truth_letter": "B",
|
| 184 |
+
"ground_truth_text": "Elena is correct",
|
| 185 |
+
"ground_truth_freq": 0.207,
|
| 186 |
+
"predicted_answer": "B",
|
| 187 |
+
"is_correct": true
|
| 188 |
+
}
|
| 189 |
+
```
|
| 190 |
+
|
| 191 |
+
### Baselines
|
| 192 |
+
|
| 193 |
+
| Task | Random Baseline |
|
| 194 |
+
|------|-----------------|
|
| 195 |
+
| Difficulty comparison | 50% |
|
| 196 |
+
| Discrimination comparison | 50% |
|
| 197 |
+
| Distractor (3 options) | 33% |
|
| 198 |
+
| Distractor (4 options) | 25% |
|
| 199 |
+
|
| 200 |
+
### Evaluation Metrics
|
| 201 |
+
|
| 202 |
+
The evaluation script computes:
|
| 203 |
+
|
| 204 |
+
- **Overall accuracy** vs random baseline
|
| 205 |
+
- **By stratum** (for comparison tasks): small/medium/large difference
|
| 206 |
+
- **By difference magnitude**: very_small/small/medium/large
|
| 207 |
+
- **By number of distractors** (for distractor tasks)
|
| 208 |
+
- **By ground truth frequency**: rare/moderate/common distractors
|
| 209 |
+
|
| 210 |
+
---
|
| 211 |
+
|
| 212 |
+
## Data Flow
|
| 213 |
+
|
| 214 |
+
```
|
| 215 |
+
foundationalktdataset/
|
| 216 |
+
├── Interactions.csv # Student responses
|
| 217 |
+
├── Problems.csv # Problem text and answers
|
| 218 |
+
└── Skills.csv # Skill metadata
|
| 219 |
+
│
|
| 220 |
+
▼
|
| 221 |
+
┌───────────────────────────────────────┐
|
| 222 |
+
│ Analysis Scripts (Part 1) │
|
| 223 |
+
│ irt_parameters.py, distractor_analysis.py │
|
| 224 |
+
└───────────────────────────────────────┘
|
| 225 |
+
│
|
| 226 |
+
▼
|
| 227 |
+
pedagogical_grounding/output/
|
| 228 |
+
├── irt_parameters.json # 2,548 problems with IRT params
|
| 229 |
+
└── distractor_stats.json # 236 MC problems with distractor data
|
| 230 |
+
│
|
| 231 |
+
▼
|
| 232 |
+
┌───────────────────────────────────────┐
|
| 233 |
+
│ LLM Benchmark (Part 2) │
|
| 234 |
+
│ pedagogical_inference_base.py │
|
| 235 |
+
│ + model configs │
|
| 236 |
+
└───────────────────────────────────────┘
|
| 237 |
+
│
|
| 238 |
+
▼
|
| 239 |
+
Output: JSONL predictions + evaluation metrics
|
| 240 |
+
```
|
| 241 |
+
|
| 242 |
+
---
|
| 243 |
+
|
| 244 |
+
## File Structure
|
| 245 |
+
|
| 246 |
+
```
|
| 247 |
+
foundationalKT-ss/
|
| 248 |
+
├── pedagogical_grounding/
|
| 249 |
+
│ ├── README.md # This file
|
| 250 |
+
│ │
|
| 251 |
+
│ │ # Analysis Scripts (Part 1)
|
| 252 |
+
│ ├── irt_parameters.py # IRT parameter computation
|
| 253 |
+
│ ├── distractor_analysis.py # Distractor effectiveness analysis
|
| 254 |
+
│ │
|
| 255 |
+
│ │ # LLM Benchmark (Part 2)
|
| 256 |
+
│ ├── pedagogical_inference_base.py # Benchmark base module
|
| 257 |
+
│ ├── gptoss120b_pedagogical.py # GPT-OSS-120B config
|
| 258 |
+
│ ├── qwen3next80b_instruct_pedagogical.py # Qwen3 Instruct config
|
| 259 |
+
│ ├── qwen3next80b_thinking_pedagogical.py # Qwen3 Thinking config
|
| 260 |
+
│ ├── evaluate_pedagogical.py # Evaluation script
|
| 261 |
+
│ │
|
| 262 |
+
│ │ # Output Data
|
| 263 |
+
│ └── output/
|
| 264 |
+
│ ├── irt_parameters.json # IRT results (2,548 problems)
|
| 265 |
+
│ ├── irt_parameters.png # IRT visualizations
|
| 266 |
+
│ ├── distractor_stats.json # Distractor results (236 problems)
|
| 267 |
+
│ └── distractor_analysis.png # Distractor visualizations
|
| 268 |
+
│
|
| 269 |
+
├── foundationalktdataset/ # Source data
|
| 270 |
+
│ ├── Interactions.csv
|
| 271 |
+
│ ├── Problems.csv
|
| 272 |
+
│ └── Skills.csv
|
| 273 |
+
│
|
| 274 |
+
└── clean_utils.py # Text cleaning utilities
|
| 275 |
+
```
|
Results/pedagogical_grounding/batch_evaluate.py
ADDED
|
@@ -0,0 +1,328 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Batch evaluation script for Pedagogical Grounding LLM Benchmark.
|
| 3 |
+
|
| 4 |
+
Analyzes all result files in a directory and creates aggregate comparison tables.
|
| 5 |
+
|
| 6 |
+
Usage:
|
| 7 |
+
python pedagogical_grounding/batch_evaluate.py --input-dir pedagogical_grounding/results
|
| 8 |
+
python pedagogical_grounding/batch_evaluate.py --input-dir results --output summary.json
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import argparse
|
| 12 |
+
import glob
|
| 13 |
+
import json
|
| 14 |
+
import os
|
| 15 |
+
import re
|
| 16 |
+
from collections import defaultdict
|
| 17 |
+
from typing import Dict, List, Tuple
|
| 18 |
+
|
| 19 |
+
from evaluate_pedagogical import (
|
| 20 |
+
load_results,
|
| 21 |
+
evaluate_comparison_task,
|
| 22 |
+
evaluate_distractor_task,
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def extract_model_name(filename: str) -> str:
|
| 27 |
+
"""Extract model name from filename."""
|
| 28 |
+
basename = os.path.basename(filename)
|
| 29 |
+
# Pattern: {model}_pedagogical_{task}_...
|
| 30 |
+
match = re.match(r'^(.+?)_pedagogical_', basename)
|
| 31 |
+
if match:
|
| 32 |
+
return match.group(1)
|
| 33 |
+
return basename.replace('.jsonl', '')
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def evaluate_file(filepath: str) -> Tuple[str, Dict]:
|
| 37 |
+
"""Evaluate a single file and return model name and metrics."""
|
| 38 |
+
model_name = extract_model_name(filepath)
|
| 39 |
+
results = load_results(filepath)
|
| 40 |
+
|
| 41 |
+
metrics = {
|
| 42 |
+
'file': os.path.basename(filepath),
|
| 43 |
+
'total_predictions': len(results),
|
| 44 |
+
'tasks': {}
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
# Identify and evaluate tasks
|
| 48 |
+
tasks = set(r.get('task') for r in results if r.get('task'))
|
| 49 |
+
|
| 50 |
+
for task in tasks:
|
| 51 |
+
if task in ['difficulty', 'discrimination']:
|
| 52 |
+
task_metrics = evaluate_comparison_task(results, task)
|
| 53 |
+
elif task in ['distractor_most', 'distractor_least']:
|
| 54 |
+
task_metrics = evaluate_distractor_task(results, task)
|
| 55 |
+
else:
|
| 56 |
+
continue
|
| 57 |
+
|
| 58 |
+
if 'error' not in task_metrics:
|
| 59 |
+
metrics['tasks'][task] = task_metrics
|
| 60 |
+
|
| 61 |
+
return model_name, metrics
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def print_header(title: str, width: int = 80) -> None:
|
| 65 |
+
"""Print a formatted header."""
|
| 66 |
+
print()
|
| 67 |
+
print("=" * width)
|
| 68 |
+
print(f" {title}")
|
| 69 |
+
print("=" * width)
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def print_accuracy_table(all_metrics: Dict[str, Dict], task: str) -> None:
|
| 73 |
+
"""Print accuracy comparison table for a task."""
|
| 74 |
+
models = sorted(all_metrics.keys())
|
| 75 |
+
|
| 76 |
+
# Get baseline
|
| 77 |
+
if task in ['difficulty', 'discrimination']:
|
| 78 |
+
baseline = 0.5
|
| 79 |
+
baseline_label = "50.0%"
|
| 80 |
+
else:
|
| 81 |
+
baseline = None # Varies by model
|
| 82 |
+
baseline_label = "varies"
|
| 83 |
+
|
| 84 |
+
# Header
|
| 85 |
+
print(f"\n{'Model':<35} {'Accuracy':>10} {'Lift':>10} {'N':>8}")
|
| 86 |
+
print("-" * 65)
|
| 87 |
+
|
| 88 |
+
# Data rows
|
| 89 |
+
for model in models:
|
| 90 |
+
task_metrics = all_metrics[model]['tasks'].get(task)
|
| 91 |
+
if task_metrics and 'error' not in task_metrics:
|
| 92 |
+
acc = task_metrics['accuracy']
|
| 93 |
+
lift = task_metrics['lift_over_random']
|
| 94 |
+
n = task_metrics['total']
|
| 95 |
+
print(f"{model:<35} {acc:>9.1%} {lift:>+9.1%} {n:>8}")
|
| 96 |
+
else:
|
| 97 |
+
print(f"{model:<35} {'N/A':>10} {'N/A':>10} {'N/A':>8}")
|
| 98 |
+
|
| 99 |
+
print("-" * 65)
|
| 100 |
+
print(f"{'Random Baseline':<35} {baseline_label:>10}")
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
def print_stratum_table(all_metrics: Dict[str, Dict], task: str) -> None:
|
| 104 |
+
"""Print stratum breakdown comparison table."""
|
| 105 |
+
models = sorted(all_metrics.keys())
|
| 106 |
+
strata = ['small', 'medium', 'large']
|
| 107 |
+
|
| 108 |
+
# Header
|
| 109 |
+
header = f"{'Model':<30}"
|
| 110 |
+
for s in strata:
|
| 111 |
+
header += f" {s:>12}"
|
| 112 |
+
print(f"\n{header}")
|
| 113 |
+
print("-" * (30 + 13 * len(strata)))
|
| 114 |
+
|
| 115 |
+
# Data rows
|
| 116 |
+
for model in models:
|
| 117 |
+
task_metrics = all_metrics[model]['tasks'].get(task)
|
| 118 |
+
if task_metrics and 'error' not in task_metrics:
|
| 119 |
+
row = f"{model:<30}"
|
| 120 |
+
by_stratum = task_metrics.get('by_stratum', {})
|
| 121 |
+
for s in strata:
|
| 122 |
+
if s in by_stratum:
|
| 123 |
+
acc = by_stratum[s]['accuracy']
|
| 124 |
+
row += f" {acc:>11.1%}"
|
| 125 |
+
else:
|
| 126 |
+
row += f" {'N/A':>12}"
|
| 127 |
+
print(row)
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
def print_summary_table(all_metrics: Dict[str, Dict]) -> None:
|
| 131 |
+
"""Print overall summary table with all tasks."""
|
| 132 |
+
models = sorted(all_metrics.keys())
|
| 133 |
+
tasks = ['difficulty', 'discrimination', 'distractor_most', 'distractor_least']
|
| 134 |
+
task_abbrev = {
|
| 135 |
+
'difficulty': 'Diff',
|
| 136 |
+
'discrimination': 'Disc',
|
| 137 |
+
'distractor_most': 'D-Most',
|
| 138 |
+
'distractor_least': 'D-Least'
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
# Header
|
| 142 |
+
header = f"{'Model':<30}"
|
| 143 |
+
for t in tasks:
|
| 144 |
+
header += f" {task_abbrev[t]:>10}"
|
| 145 |
+
header += f" {'Avg':>10}"
|
| 146 |
+
print(f"\n{header}")
|
| 147 |
+
print("-" * (30 + 11 * (len(tasks) + 1)))
|
| 148 |
+
|
| 149 |
+
# Data rows
|
| 150 |
+
for model in models:
|
| 151 |
+
row = f"{model:<30}"
|
| 152 |
+
accs = []
|
| 153 |
+
for t in tasks:
|
| 154 |
+
task_metrics = all_metrics[model]['tasks'].get(t)
|
| 155 |
+
if task_metrics and 'error' not in task_metrics:
|
| 156 |
+
acc = task_metrics['accuracy']
|
| 157 |
+
row += f" {acc:>9.1%}"
|
| 158 |
+
accs.append(acc)
|
| 159 |
+
else:
|
| 160 |
+
row += f" {'N/A':>10}"
|
| 161 |
+
|
| 162 |
+
# Average
|
| 163 |
+
if accs:
|
| 164 |
+
avg = sum(accs) / len(accs)
|
| 165 |
+
row += f" {avg:>9.1%}"
|
| 166 |
+
else:
|
| 167 |
+
row += f" {'N/A':>10}"
|
| 168 |
+
|
| 169 |
+
print(row)
|
| 170 |
+
|
| 171 |
+
# Baseline row
|
| 172 |
+
print("-" * (30 + 11 * (len(tasks) + 1)))
|
| 173 |
+
baseline_row = f"{'Random Baseline':<30}"
|
| 174 |
+
baseline_row += f" {'50.0%':>10}" # difficulty
|
| 175 |
+
baseline_row += f" {'50.0%':>10}" # discrimination
|
| 176 |
+
baseline_row += f" {'~35%':>10}" # distractor_most
|
| 177 |
+
baseline_row += f" {'~35%':>10}" # distractor_least
|
| 178 |
+
baseline_row += f" {'~43%':>10}" # avg
|
| 179 |
+
print(baseline_row)
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
def print_lift_table(all_metrics: Dict[str, Dict]) -> None:
|
| 183 |
+
"""Print lift over random baseline table."""
|
| 184 |
+
models = sorted(all_metrics.keys())
|
| 185 |
+
tasks = ['difficulty', 'discrimination', 'distractor_most', 'distractor_least']
|
| 186 |
+
task_abbrev = {
|
| 187 |
+
'difficulty': 'Diff',
|
| 188 |
+
'discrimination': 'Disc',
|
| 189 |
+
'distractor_most': 'D-Most',
|
| 190 |
+
'distractor_least': 'D-Least'
|
| 191 |
+
}
|
| 192 |
+
|
| 193 |
+
# Header
|
| 194 |
+
header = f"{'Model':<30}"
|
| 195 |
+
for t in tasks:
|
| 196 |
+
header += f" {task_abbrev[t]:>10}"
|
| 197 |
+
header += f" {'Avg Lift':>10}"
|
| 198 |
+
print(f"\n{header}")
|
| 199 |
+
print("-" * (30 + 11 * (len(tasks) + 1)))
|
| 200 |
+
|
| 201 |
+
# Data rows
|
| 202 |
+
for model in models:
|
| 203 |
+
row = f"{model:<30}"
|
| 204 |
+
lifts = []
|
| 205 |
+
for t in tasks:
|
| 206 |
+
task_metrics = all_metrics[model]['tasks'].get(t)
|
| 207 |
+
if task_metrics and 'error' not in task_metrics:
|
| 208 |
+
lift = task_metrics['lift_over_random']
|
| 209 |
+
row += f" {lift:>+9.1%}"
|
| 210 |
+
lifts.append(lift)
|
| 211 |
+
else:
|
| 212 |
+
row += f" {'N/A':>10}"
|
| 213 |
+
|
| 214 |
+
# Average lift
|
| 215 |
+
if lifts:
|
| 216 |
+
avg_lift = sum(lifts) / len(lifts)
|
| 217 |
+
row += f" {avg_lift:>+9.1%}"
|
| 218 |
+
else:
|
| 219 |
+
row += f" {'N/A':>10}"
|
| 220 |
+
|
| 221 |
+
print(row)
|
| 222 |
+
|
| 223 |
+
|
| 224 |
+
def print_best_model_per_task(all_metrics: Dict[str, Dict]) -> None:
|
| 225 |
+
"""Print best model for each task."""
|
| 226 |
+
tasks = ['difficulty', 'discrimination', 'distractor_most', 'distractor_least']
|
| 227 |
+
|
| 228 |
+
print(f"\n{'Task':<20} {'Best Model':<30} {'Accuracy':>10} {'Lift':>10}")
|
| 229 |
+
print("-" * 72)
|
| 230 |
+
|
| 231 |
+
for task in tasks:
|
| 232 |
+
best_model = None
|
| 233 |
+
best_acc = -1
|
| 234 |
+
best_lift = 0
|
| 235 |
+
|
| 236 |
+
for model, metrics in all_metrics.items():
|
| 237 |
+
task_metrics = metrics['tasks'].get(task)
|
| 238 |
+
if task_metrics and 'error' not in task_metrics:
|
| 239 |
+
if task_metrics['accuracy'] > best_acc:
|
| 240 |
+
best_acc = task_metrics['accuracy']
|
| 241 |
+
best_lift = task_metrics['lift_over_random']
|
| 242 |
+
best_model = model
|
| 243 |
+
|
| 244 |
+
if best_model:
|
| 245 |
+
print(f"{task:<20} {best_model:<30} {best_acc:>9.1%} {best_lift:>+9.1%}")
|
| 246 |
+
else:
|
| 247 |
+
print(f"{task:<20} {'N/A':<30} {'N/A':>10} {'N/A':>10}")
|
| 248 |
+
|
| 249 |
+
|
| 250 |
+
def main():
|
| 251 |
+
parser = argparse.ArgumentParser(
|
| 252 |
+
description="Batch evaluate Pedagogical Grounding Benchmark results"
|
| 253 |
+
)
|
| 254 |
+
parser.add_argument(
|
| 255 |
+
"--input-dir", "-i",
|
| 256 |
+
type=str,
|
| 257 |
+
required=True,
|
| 258 |
+
help="Directory containing JSONL result files"
|
| 259 |
+
)
|
| 260 |
+
parser.add_argument(
|
| 261 |
+
"--output", "-o",
|
| 262 |
+
type=str,
|
| 263 |
+
default=None,
|
| 264 |
+
help="Output JSON file for aggregate metrics (optional)"
|
| 265 |
+
)
|
| 266 |
+
parser.add_argument(
|
| 267 |
+
"--pattern",
|
| 268 |
+
type=str,
|
| 269 |
+
default="*.jsonl",
|
| 270 |
+
help="Glob pattern for result files (default: *.jsonl)"
|
| 271 |
+
)
|
| 272 |
+
args = parser.parse_args()
|
| 273 |
+
|
| 274 |
+
# Find all result files
|
| 275 |
+
pattern = os.path.join(args.input_dir, args.pattern)
|
| 276 |
+
files = sorted(glob.glob(pattern))
|
| 277 |
+
|
| 278 |
+
if not files:
|
| 279 |
+
print(f"No files found matching: {pattern}")
|
| 280 |
+
return
|
| 281 |
+
|
| 282 |
+
print(f"Found {len(files)} result files in {args.input_dir}")
|
| 283 |
+
|
| 284 |
+
# Evaluate all files
|
| 285 |
+
all_metrics = {}
|
| 286 |
+
for filepath in files:
|
| 287 |
+
print(f" Loading: {os.path.basename(filepath)}")
|
| 288 |
+
model_name, metrics = evaluate_file(filepath)
|
| 289 |
+
all_metrics[model_name] = metrics
|
| 290 |
+
|
| 291 |
+
# Print aggregate tables
|
| 292 |
+
print_header("PEDAGOGICAL GROUNDING BENCHMARK - AGGREGATE RESULTS")
|
| 293 |
+
|
| 294 |
+
# Summary table
|
| 295 |
+
print_header("ACCURACY BY TASK", width=72)
|
| 296 |
+
print_summary_table(all_metrics)
|
| 297 |
+
|
| 298 |
+
# Lift table
|
| 299 |
+
print_header("LIFT OVER RANDOM BASELINE", width=72)
|
| 300 |
+
print_lift_table(all_metrics)
|
| 301 |
+
|
| 302 |
+
# Best model per task
|
| 303 |
+
print_header("BEST MODEL PER TASK", width=72)
|
| 304 |
+
print_best_model_per_task(all_metrics)
|
| 305 |
+
|
| 306 |
+
# Detailed stratum breakdown for comparison tasks
|
| 307 |
+
print_header("DIFFICULTY - ACCURACY BY STRATUM", width=70)
|
| 308 |
+
print_stratum_table(all_metrics, 'difficulty')
|
| 309 |
+
|
| 310 |
+
print_header("DISCRIMINATION - ACCURACY BY STRATUM", width=70)
|
| 311 |
+
print_stratum_table(all_metrics, 'discrimination')
|
| 312 |
+
|
| 313 |
+
# Individual task tables
|
| 314 |
+
for task in ['difficulty', 'discrimination', 'distractor_most', 'distractor_least']:
|
| 315 |
+
print_header(f"{task.upper()} - DETAILED", width=65)
|
| 316 |
+
print_accuracy_table(all_metrics, task)
|
| 317 |
+
|
| 318 |
+
# Save aggregate metrics if output specified
|
| 319 |
+
if args.output:
|
| 320 |
+
with open(args.output, 'w') as f:
|
| 321 |
+
json.dump(all_metrics, f, indent=2)
|
| 322 |
+
print(f"\nAggregate metrics saved to {args.output}")
|
| 323 |
+
|
| 324 |
+
print()
|
| 325 |
+
|
| 326 |
+
|
| 327 |
+
if __name__ == "__main__":
|
| 328 |
+
main()
|
Results/pedagogical_grounding/distractor_analysis.py
ADDED
|
@@ -0,0 +1,299 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Distractor Analysis for Pedagogical Grounding
|
| 3 |
+
|
| 4 |
+
Computes distractor effectiveness for Multiple Choice (select 1) questions
|
| 5 |
+
with more than 2 choices. Identifies most/least common wrong answers.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import argparse
|
| 9 |
+
import os
|
| 10 |
+
import json
|
| 11 |
+
import pandas as pd
|
| 12 |
+
import numpy as np
|
| 13 |
+
import matplotlib.pyplot as plt
|
| 14 |
+
from collections import Counter
|
| 15 |
+
|
| 16 |
+
# Configuration
|
| 17 |
+
DEFAULT_DATA_DIR = "foundationalktdataset"
|
| 18 |
+
STUDENT_FILE = "Interactions.csv"
|
| 19 |
+
PROBLEMS_FILE = "Problems.csv"
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def parse_args():
|
| 23 |
+
parser = argparse.ArgumentParser(description="Analyze distractor effectiveness")
|
| 24 |
+
parser.add_argument(
|
| 25 |
+
"--data-dir", "-d",
|
| 26 |
+
type=str,
|
| 27 |
+
default=DEFAULT_DATA_DIR,
|
| 28 |
+
help=f"Directory containing input CSV files (default: {DEFAULT_DATA_DIR})"
|
| 29 |
+
)
|
| 30 |
+
parser.add_argument(
|
| 31 |
+
"--output-dir", "-o",
|
| 32 |
+
type=str,
|
| 33 |
+
default="pedagogical_grounding/output",
|
| 34 |
+
help="Directory to save output (default: pedagogical_grounding/output)"
|
| 35 |
+
)
|
| 36 |
+
parser.add_argument(
|
| 37 |
+
"--no-plots",
|
| 38 |
+
action="store_true",
|
| 39 |
+
help="Skip generating plots"
|
| 40 |
+
)
|
| 41 |
+
return parser.parse_args()
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def load_data(data_dir):
|
| 45 |
+
"""Load student and problem data."""
|
| 46 |
+
print(f"Loading data from {data_dir}...")
|
| 47 |
+
|
| 48 |
+
student_df = pd.read_csv(os.path.join(data_dir, STUDENT_FILE))
|
| 49 |
+
problems_df = pd.read_csv(os.path.join(data_dir, PROBLEMS_FILE))
|
| 50 |
+
|
| 51 |
+
print(f" Loaded {len(student_df):,} student interactions")
|
| 52 |
+
print(f" Loaded {len(problems_df):,} problems")
|
| 53 |
+
|
| 54 |
+
return student_df, problems_df
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def get_answer_options(answer_string):
|
| 58 |
+
"""Parse pipe-delimited answer options."""
|
| 59 |
+
if pd.isna(answer_string) or answer_string == '':
|
| 60 |
+
return []
|
| 61 |
+
return [opt.strip() for opt in answer_string.split('||') if opt.strip()]
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def get_correct_answers(correct_string):
|
| 65 |
+
"""Parse pipe-delimited correct answers."""
|
| 66 |
+
if pd.isna(correct_string) or correct_string == '':
|
| 67 |
+
return []
|
| 68 |
+
return [ans.strip() for ans in correct_string.split('||') if ans.strip()]
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def normalize_answer(text):
|
| 72 |
+
"""Normalize answer text for comparison."""
|
| 73 |
+
if pd.isna(text):
|
| 74 |
+
return ""
|
| 75 |
+
import re
|
| 76 |
+
# Remove HTML tags
|
| 77 |
+
text = re.sub(r'<[^>]+>', '', str(text))
|
| 78 |
+
# Normalize whitespace
|
| 79 |
+
text = ' '.join(text.split())
|
| 80 |
+
return text.strip().lower()
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def analyze_distractors(student_df, problems_df):
|
| 84 |
+
"""Analyze distractor effectiveness for MC (select 1) with >2 choices."""
|
| 85 |
+
print("\n" + "=" * 60)
|
| 86 |
+
print(" DISTRACTOR ANALYSIS")
|
| 87 |
+
print("=" * 60)
|
| 88 |
+
|
| 89 |
+
# Filter to MC (select 1) problems
|
| 90 |
+
mc_problems = problems_df[problems_df['Problem Type'] == 'Multiple Choice (select 1)'].copy()
|
| 91 |
+
|
| 92 |
+
# Count choices
|
| 93 |
+
mc_problems['answer_options'] = mc_problems['Multiple Choice Options'].apply(get_answer_options)
|
| 94 |
+
mc_problems['n_choices'] = mc_problems['answer_options'].apply(len)
|
| 95 |
+
|
| 96 |
+
# Filter to >2 choices
|
| 97 |
+
mc_problems = mc_problems[mc_problems['n_choices'] > 2].copy()
|
| 98 |
+
print(f"\nMC (select 1) problems with >2 choices: {len(mc_problems)}")
|
| 99 |
+
|
| 100 |
+
# Get correct answers
|
| 101 |
+
mc_problems['correct_answers'] = mc_problems['Multiple Choice Answers'].apply(get_correct_answers)
|
| 102 |
+
|
| 103 |
+
# Filter student data to these problems
|
| 104 |
+
problem_ids = set(mc_problems['problem_id'])
|
| 105 |
+
mc_interactions = student_df[student_df['problem_id'].isin(problem_ids)].copy()
|
| 106 |
+
print(f"Student interactions for these problems: {len(mc_interactions):,}")
|
| 107 |
+
|
| 108 |
+
# Merge to get answer options
|
| 109 |
+
mc_interactions = mc_interactions.merge(
|
| 110 |
+
mc_problems[['problem_id', 'answer_options', 'correct_answers', 'n_choices']],
|
| 111 |
+
on='problem_id',
|
| 112 |
+
how='left'
|
| 113 |
+
)
|
| 114 |
+
|
| 115 |
+
# Analyze each problem
|
| 116 |
+
results = []
|
| 117 |
+
|
| 118 |
+
for problem_id in mc_problems['problem_id'].unique():
|
| 119 |
+
problem_data = mc_interactions[mc_interactions['problem_id'] == problem_id]
|
| 120 |
+
|
| 121 |
+
if len(problem_data) < 10: # Skip problems with too few responses
|
| 122 |
+
continue
|
| 123 |
+
|
| 124 |
+
problem_info = mc_problems[mc_problems['problem_id'] == problem_id].iloc[0]
|
| 125 |
+
answer_options = problem_info['answer_options']
|
| 126 |
+
correct_answers = problem_info['Fill-in Answers']
|
| 127 |
+
|
| 128 |
+
# Normalize correct answers for comparison
|
| 129 |
+
correct_normalized = set(normalize_answer(a) for a in correct_answers)
|
| 130 |
+
|
| 131 |
+
# Count responses for each option
|
| 132 |
+
option_counts = Counter()
|
| 133 |
+
total_responses = 0
|
| 134 |
+
|
| 135 |
+
for _, row in problem_data.iterrows():
|
| 136 |
+
student_answer = row['answer_text']
|
| 137 |
+
if pd.isna(student_answer):
|
| 138 |
+
continue
|
| 139 |
+
|
| 140 |
+
# Normalize student answer
|
| 141 |
+
student_normalized = normalize_answer(student_answer)
|
| 142 |
+
|
| 143 |
+
# Match to options
|
| 144 |
+
for opt in answer_options:
|
| 145 |
+
opt_normalized = normalize_answer(opt)
|
| 146 |
+
if student_normalized == opt_normalized or student_normalized in opt_normalized or opt_normalized in student_normalized:
|
| 147 |
+
option_counts[opt] += 1
|
| 148 |
+
total_responses += 1
|
| 149 |
+
break
|
| 150 |
+
|
| 151 |
+
if total_responses < 10:
|
| 152 |
+
continue
|
| 153 |
+
|
| 154 |
+
# Separate correct and incorrect options
|
| 155 |
+
distractors = {}
|
| 156 |
+
correct_count = 0
|
| 157 |
+
|
| 158 |
+
for opt in answer_options:
|
| 159 |
+
opt_normalized = normalize_answer(opt)
|
| 160 |
+
count = option_counts.get(opt, 0)
|
| 161 |
+
|
| 162 |
+
if opt_normalized in correct_normalized or any(normalize_answer(c) in opt_normalized or opt_normalized in normalize_answer(c) for c in correct_answers):
|
| 163 |
+
correct_count = count
|
| 164 |
+
else:
|
| 165 |
+
distractors[opt] = count
|
| 166 |
+
|
| 167 |
+
if not distractors:
|
| 168 |
+
continue
|
| 169 |
+
|
| 170 |
+
# Find most/least common distractor
|
| 171 |
+
sorted_distractors = sorted(distractors.items(), key=lambda x: x[1], reverse=True)
|
| 172 |
+
most_common = sorted_distractors[0]
|
| 173 |
+
least_common = sorted_distractors[-1]
|
| 174 |
+
|
| 175 |
+
# Compute frequencies
|
| 176 |
+
distractor_freqs = {opt: count / total_responses for opt, count in distractors.items()}
|
| 177 |
+
|
| 178 |
+
results.append({
|
| 179 |
+
'problem_id': int(problem_id),
|
| 180 |
+
'n_choices': int(problem_info['n_choices']),
|
| 181 |
+
'total_responses': int(total_responses),
|
| 182 |
+
'correct_count': int(correct_count),
|
| 183 |
+
'correct_rate': correct_count / total_responses,
|
| 184 |
+
'distractors': {opt: int(count) for opt, count in distractors.items()},
|
| 185 |
+
'distractor_frequencies': distractor_freqs,
|
| 186 |
+
'most_common_distractor': most_common[0],
|
| 187 |
+
'most_common_distractor_freq': most_common[1] / total_responses,
|
| 188 |
+
'least_common_distractor': least_common[0],
|
| 189 |
+
'least_common_distractor_freq': least_common[1] / total_responses,
|
| 190 |
+
})
|
| 191 |
+
|
| 192 |
+
print(f"Problems with sufficient data: {len(results)}")
|
| 193 |
+
|
| 194 |
+
return results
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
def print_summary(results):
|
| 198 |
+
"""Print summary statistics."""
|
| 199 |
+
print("\n--- Summary Statistics ---")
|
| 200 |
+
|
| 201 |
+
correct_rates = [r['correct_rate'] for r in results]
|
| 202 |
+
most_common_freqs = [r['most_common_distractor_freq'] for r in results]
|
| 203 |
+
least_common_freqs = [r['least_common_distractor_freq'] for r in results]
|
| 204 |
+
|
| 205 |
+
print(f"\nCorrect Answer Rate:")
|
| 206 |
+
print(f" Mean: {np.mean(correct_rates):.1%}")
|
| 207 |
+
print(f" Median: {np.median(correct_rates):.1%}")
|
| 208 |
+
print(f" Min: {np.min(correct_rates):.1%}")
|
| 209 |
+
print(f" Max: {np.max(correct_rates):.1%}")
|
| 210 |
+
|
| 211 |
+
print(f"\nMost Common Distractor Frequency:")
|
| 212 |
+
print(f" Mean: {np.mean(most_common_freqs):.1%}")
|
| 213 |
+
print(f" Median: {np.median(most_common_freqs):.1%}")
|
| 214 |
+
|
| 215 |
+
print(f"\nLeast Common Distractor Frequency:")
|
| 216 |
+
print(f" Mean: {np.mean(least_common_freqs):.1%}")
|
| 217 |
+
print(f" Median: {np.median(least_common_freqs):.1%}")
|
| 218 |
+
|
| 219 |
+
# Count problems where least common distractor is never chosen
|
| 220 |
+
never_chosen = sum(1 for r in results if r['least_common_distractor_freq'] == 0)
|
| 221 |
+
print(f"\nProblems with ineffective distractor (0 selections): {never_chosen} ({100*never_chosen/len(results):.1f}%)")
|
| 222 |
+
|
| 223 |
+
|
| 224 |
+
def save_results(results, output_dir):
|
| 225 |
+
"""Save results to JSON."""
|
| 226 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 227 |
+
output_path = os.path.join(output_dir, 'distractor_stats.json')
|
| 228 |
+
|
| 229 |
+
with open(output_path, 'w') as f:
|
| 230 |
+
json.dump(results, f, indent=2)
|
| 231 |
+
|
| 232 |
+
print(f"\nSaved: {output_path}")
|
| 233 |
+
|
| 234 |
+
|
| 235 |
+
def plot_results(results, output_dir):
|
| 236 |
+
"""Generate plots."""
|
| 237 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 238 |
+
|
| 239 |
+
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
|
| 240 |
+
|
| 241 |
+
# Plot 1: Correct rate distribution
|
| 242 |
+
correct_rates = [r['correct_rate'] for r in results]
|
| 243 |
+
axes[0].hist(correct_rates, bins=20, edgecolor='black', alpha=0.7)
|
| 244 |
+
axes[0].set_xlabel('Correct Answer Rate')
|
| 245 |
+
axes[0].set_ylabel('Number of Problems')
|
| 246 |
+
axes[0].set_title('Distribution of Correct Answer Rates')
|
| 247 |
+
axes[0].axvline(np.mean(correct_rates), color='red', linestyle='--', label=f'Mean: {np.mean(correct_rates):.1%}')
|
| 248 |
+
axes[0].legend()
|
| 249 |
+
|
| 250 |
+
# Plot 2: Most common distractor frequency
|
| 251 |
+
most_common_freqs = [r['most_common_distractor_freq'] for r in results]
|
| 252 |
+
axes[1].hist(most_common_freqs, bins=20, edgecolor='black', alpha=0.7, color='orange')
|
| 253 |
+
axes[1].set_xlabel('Most Common Distractor Frequency')
|
| 254 |
+
axes[1].set_ylabel('Number of Problems')
|
| 255 |
+
axes[1].set_title('Distribution of Most Common Distractor')
|
| 256 |
+
axes[1].axvline(np.mean(most_common_freqs), color='red', linestyle='--', label=f'Mean: {np.mean(most_common_freqs):.1%}')
|
| 257 |
+
axes[1].legend()
|
| 258 |
+
|
| 259 |
+
# Plot 3: Least common distractor frequency
|
| 260 |
+
least_common_freqs = [r['least_common_distractor_freq'] for r in results]
|
| 261 |
+
axes[2].hist(least_common_freqs, bins=20, edgecolor='black', alpha=0.7, color='green')
|
| 262 |
+
axes[2].set_xlabel('Least Common Distractor Frequency')
|
| 263 |
+
axes[2].set_ylabel('Number of Problems')
|
| 264 |
+
axes[2].set_title('Distribution of Least Common Distractor')
|
| 265 |
+
axes[2].axvline(np.mean(least_common_freqs), color='red', linestyle='--', label=f'Mean: {np.mean(least_common_freqs):.1%}')
|
| 266 |
+
axes[2].legend()
|
| 267 |
+
|
| 268 |
+
plt.tight_layout()
|
| 269 |
+
plot_path = os.path.join(output_dir, 'distractor_analysis.png')
|
| 270 |
+
plt.savefig(plot_path, dpi=150)
|
| 271 |
+
plt.close()
|
| 272 |
+
|
| 273 |
+
print(f"Saved: {plot_path}")
|
| 274 |
+
|
| 275 |
+
|
| 276 |
+
def main():
|
| 277 |
+
args = parse_args()
|
| 278 |
+
|
| 279 |
+
# Load data
|
| 280 |
+
student_df, problems_df = load_data(args.data_dir)
|
| 281 |
+
|
| 282 |
+
# Analyze distractors
|
| 283 |
+
results = analyze_distractors(student_df, problems_df)
|
| 284 |
+
|
| 285 |
+
# Print summary
|
| 286 |
+
print_summary(results)
|
| 287 |
+
|
| 288 |
+
# Save results
|
| 289 |
+
save_results(results, args.output_dir)
|
| 290 |
+
|
| 291 |
+
# Plot
|
| 292 |
+
if not args.no_plots:
|
| 293 |
+
plot_results(results, args.output_dir)
|
| 294 |
+
|
| 295 |
+
print("\nDone!")
|
| 296 |
+
|
| 297 |
+
|
| 298 |
+
if __name__ == "__main__":
|
| 299 |
+
main()
|
Results/pedagogical_grounding/evaluate_pedagogical.py
ADDED
|
@@ -0,0 +1,285 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Evaluation script for Pedagogical Grounding LLM Benchmark.
|
| 3 |
+
|
| 4 |
+
Computes accuracy metrics for:
|
| 5 |
+
1. Difficulty comparison task
|
| 6 |
+
2. Discrimination comparison task
|
| 7 |
+
3. Most common distractor prediction
|
| 8 |
+
4. Least common distractor prediction
|
| 9 |
+
|
| 10 |
+
Usage:
|
| 11 |
+
python evaluate_pedagogical.py --input results.jsonl
|
| 12 |
+
python evaluate_pedagogical.py --input results.jsonl --output metrics.json
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
import argparse
|
| 16 |
+
import json
|
| 17 |
+
from collections import defaultdict
|
| 18 |
+
from typing import Dict, List, Optional
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def load_results(jsonl_path: str) -> List[Dict]:
|
| 22 |
+
"""Load results from JSONL file."""
|
| 23 |
+
results = []
|
| 24 |
+
with open(jsonl_path, 'r') as f:
|
| 25 |
+
for line in f:
|
| 26 |
+
if line.strip():
|
| 27 |
+
results.append(json.loads(line))
|
| 28 |
+
return results
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def evaluate_comparison_task(results: List[Dict], task_name: str) -> Dict:
|
| 32 |
+
"""Evaluate a comparison task (difficulty or discrimination)."""
|
| 33 |
+
task_results = [r for r in results if r.get('task') == task_name]
|
| 34 |
+
|
| 35 |
+
if not task_results:
|
| 36 |
+
return {'error': f'No results found for task: {task_name}'}
|
| 37 |
+
|
| 38 |
+
total = len(task_results)
|
| 39 |
+
correct = sum(1 for r in task_results if r.get('is_correct', False))
|
| 40 |
+
accuracy = correct / total if total > 0 else 0
|
| 41 |
+
|
| 42 |
+
# Breakdown by stratum
|
| 43 |
+
stratum_stats = defaultdict(lambda: {'correct': 0, 'total': 0})
|
| 44 |
+
for r in task_results:
|
| 45 |
+
stratum = r.get('stratum', 'unknown')
|
| 46 |
+
stratum_stats[stratum]['total'] += 1
|
| 47 |
+
if r.get('is_correct', False):
|
| 48 |
+
stratum_stats[stratum]['correct'] += 1
|
| 49 |
+
|
| 50 |
+
stratum_accuracy = {}
|
| 51 |
+
for stratum, stats in stratum_stats.items():
|
| 52 |
+
if stats['total'] > 0:
|
| 53 |
+
stratum_accuracy[stratum] = {
|
| 54 |
+
'accuracy': stats['correct'] / stats['total'],
|
| 55 |
+
'correct': stats['correct'],
|
| 56 |
+
'total': stats['total']
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
# Breakdown by difficulty difference bins
|
| 60 |
+
diff_bins = [
|
| 61 |
+
('very_small', 0, 0.2),
|
| 62 |
+
('small', 0.2, 0.5),
|
| 63 |
+
('medium', 0.5, 1.0),
|
| 64 |
+
('large', 1.0, float('inf'))
|
| 65 |
+
]
|
| 66 |
+
|
| 67 |
+
diff_stats = defaultdict(lambda: {'correct': 0, 'total': 0})
|
| 68 |
+
for r in task_results:
|
| 69 |
+
diff = r.get('difference', 0)
|
| 70 |
+
for bin_name, low, high in diff_bins:
|
| 71 |
+
if low <= diff < high:
|
| 72 |
+
diff_stats[bin_name]['total'] += 1
|
| 73 |
+
if r.get('is_correct', False):
|
| 74 |
+
diff_stats[bin_name]['correct'] += 1
|
| 75 |
+
break
|
| 76 |
+
|
| 77 |
+
diff_accuracy = {}
|
| 78 |
+
for bin_name, stats in diff_stats.items():
|
| 79 |
+
if stats['total'] > 0:
|
| 80 |
+
diff_accuracy[bin_name] = {
|
| 81 |
+
'accuracy': stats['correct'] / stats['total'],
|
| 82 |
+
'correct': stats['correct'],
|
| 83 |
+
'total': stats['total']
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
return {
|
| 87 |
+
'task': task_name,
|
| 88 |
+
'total': total,
|
| 89 |
+
'correct': correct,
|
| 90 |
+
'accuracy': accuracy,
|
| 91 |
+
'baseline_random': 0.5, # Random guess for binary choice
|
| 92 |
+
'lift_over_random': accuracy - 0.5,
|
| 93 |
+
'by_stratum': dict(stratum_accuracy),
|
| 94 |
+
'by_difference': dict(diff_accuracy)
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
def evaluate_distractor_task(results: List[Dict], task_name: str) -> Dict:
|
| 99 |
+
"""Evaluate a distractor task (most or least common)."""
|
| 100 |
+
task_results = [r for r in results if r.get('task') == task_name]
|
| 101 |
+
|
| 102 |
+
if not task_results:
|
| 103 |
+
return {'error': f'No results found for task: {task_name}'}
|
| 104 |
+
|
| 105 |
+
total = len(task_results)
|
| 106 |
+
correct = sum(1 for r in task_results if r.get('is_correct', False))
|
| 107 |
+
accuracy = correct / total if total > 0 else 0
|
| 108 |
+
|
| 109 |
+
# Breakdown by number of choices
|
| 110 |
+
choices_stats = defaultdict(lambda: {'correct': 0, 'total': 0})
|
| 111 |
+
for r in task_results:
|
| 112 |
+
n_choices = r.get('n_choices', 0)
|
| 113 |
+
# Number of distractors = n_choices - 1 (excluding correct answer)
|
| 114 |
+
n_distractors = n_choices - 1 if n_choices > 1 else 1
|
| 115 |
+
choices_stats[n_distractors]['total'] += 1
|
| 116 |
+
if r.get('is_correct', False):
|
| 117 |
+
choices_stats[n_distractors]['correct'] += 1
|
| 118 |
+
|
| 119 |
+
choices_accuracy = {}
|
| 120 |
+
baseline_by_choices = {}
|
| 121 |
+
for n_distractors, stats in sorted(choices_stats.items()):
|
| 122 |
+
if stats['total'] > 0:
|
| 123 |
+
acc = stats['correct'] / stats['total']
|
| 124 |
+
baseline = 1.0 / n_distractors if n_distractors > 0 else 0
|
| 125 |
+
choices_accuracy[f'{n_distractors}_distractors'] = {
|
| 126 |
+
'accuracy': acc,
|
| 127 |
+
'correct': stats['correct'],
|
| 128 |
+
'total': stats['total'],
|
| 129 |
+
'baseline_random': baseline,
|
| 130 |
+
'lift_over_random': acc - baseline
|
| 131 |
+
}
|
| 132 |
+
baseline_by_choices[n_distractors] = baseline
|
| 133 |
+
|
| 134 |
+
# Compute weighted average baseline
|
| 135 |
+
total_weighted_baseline = 0
|
| 136 |
+
for r in task_results:
|
| 137 |
+
n_choices = r.get('n_choices', 0)
|
| 138 |
+
n_distractors = n_choices - 1 if n_choices > 1 else 1
|
| 139 |
+
if n_distractors > 0:
|
| 140 |
+
total_weighted_baseline += 1.0 / n_distractors
|
| 141 |
+
avg_baseline = total_weighted_baseline / total if total > 0 else 0
|
| 142 |
+
|
| 143 |
+
# Breakdown by ground truth frequency
|
| 144 |
+
freq_bins = [
|
| 145 |
+
('very_rare', 0, 0.05),
|
| 146 |
+
('rare', 0.05, 0.10),
|
| 147 |
+
('moderate', 0.10, 0.20),
|
| 148 |
+
('common', 0.20, 0.30),
|
| 149 |
+
('very_common', 0.30, 1.0)
|
| 150 |
+
]
|
| 151 |
+
|
| 152 |
+
freq_stats = defaultdict(lambda: {'correct': 0, 'total': 0})
|
| 153 |
+
for r in task_results:
|
| 154 |
+
freq = r.get('ground_truth_freq', 0)
|
| 155 |
+
for bin_name, low, high in freq_bins:
|
| 156 |
+
if low <= freq < high:
|
| 157 |
+
freq_stats[bin_name]['total'] += 1
|
| 158 |
+
if r.get('is_correct', False):
|
| 159 |
+
freq_stats[bin_name]['correct'] += 1
|
| 160 |
+
break
|
| 161 |
+
|
| 162 |
+
freq_accuracy = {}
|
| 163 |
+
for bin_name, stats in freq_stats.items():
|
| 164 |
+
if stats['total'] > 0:
|
| 165 |
+
freq_accuracy[bin_name] = {
|
| 166 |
+
'accuracy': stats['correct'] / stats['total'],
|
| 167 |
+
'correct': stats['correct'],
|
| 168 |
+
'total': stats['total']
|
| 169 |
+
}
|
| 170 |
+
|
| 171 |
+
return {
|
| 172 |
+
'task': task_name,
|
| 173 |
+
'total': total,
|
| 174 |
+
'correct': correct,
|
| 175 |
+
'accuracy': accuracy,
|
| 176 |
+
'baseline_random_avg': avg_baseline,
|
| 177 |
+
'lift_over_random': accuracy - avg_baseline,
|
| 178 |
+
'by_num_distractors': dict(choices_accuracy),
|
| 179 |
+
'by_ground_truth_freq': dict(freq_accuracy)
|
| 180 |
+
}
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
def print_comparison_results(metrics: Dict) -> None:
|
| 184 |
+
"""Print comparison task results."""
|
| 185 |
+
print(f"\n{'='*60}")
|
| 186 |
+
print(f"Task: {metrics['task'].upper()}")
|
| 187 |
+
print(f"{'='*60}")
|
| 188 |
+
|
| 189 |
+
print(f"\nOverall Accuracy: {metrics['accuracy']:.1%} ({metrics['correct']}/{metrics['total']})")
|
| 190 |
+
print(f"Random Baseline: {metrics['baseline_random']:.1%}")
|
| 191 |
+
print(f"Lift over Random: {metrics['lift_over_random']:+.1%}")
|
| 192 |
+
|
| 193 |
+
if metrics.get('by_stratum'):
|
| 194 |
+
print(f"\nBy Sampling Stratum:")
|
| 195 |
+
for stratum, stats in sorted(metrics['by_stratum'].items()):
|
| 196 |
+
print(f" {stratum:12}: {stats['accuracy']:.1%} ({stats['correct']}/{stats['total']})")
|
| 197 |
+
|
| 198 |
+
if metrics.get('by_difference'):
|
| 199 |
+
print(f"\nBy Value Difference:")
|
| 200 |
+
for bin_name, stats in metrics['by_difference'].items():
|
| 201 |
+
print(f" {bin_name:12}: {stats['accuracy']:.1%} ({stats['correct']}/{stats['total']})")
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
def print_distractor_results(metrics: Dict) -> None:
|
| 205 |
+
"""Print distractor task results."""
|
| 206 |
+
print(f"\n{'='*60}")
|
| 207 |
+
print(f"Task: {metrics['task'].upper()}")
|
| 208 |
+
print(f"{'='*60}")
|
| 209 |
+
|
| 210 |
+
print(f"\nOverall Accuracy: {metrics['accuracy']:.1%} ({metrics['correct']}/{metrics['total']})")
|
| 211 |
+
print(f"Random Baseline: {metrics['baseline_random_avg']:.1%} (weighted avg)")
|
| 212 |
+
print(f"Lift over Random: {metrics['lift_over_random']:+.1%}")
|
| 213 |
+
|
| 214 |
+
if metrics.get('by_num_distractors'):
|
| 215 |
+
print(f"\nBy Number of Distractors:")
|
| 216 |
+
for key, stats in sorted(metrics['by_num_distractors'].items()):
|
| 217 |
+
print(f" {key:15}: {stats['accuracy']:.1%} ({stats['correct']}/{stats['total']}) "
|
| 218 |
+
f"[baseline: {stats['baseline_random']:.1%}, lift: {stats['lift_over_random']:+.1%}]")
|
| 219 |
+
|
| 220 |
+
if metrics.get('by_ground_truth_freq'):
|
| 221 |
+
print(f"\nBy Ground Truth Frequency:")
|
| 222 |
+
for bin_name, stats in metrics['by_ground_truth_freq'].items():
|
| 223 |
+
print(f" {bin_name:12}: {stats['accuracy']:.1%} ({stats['correct']}/{stats['total']})")
|
| 224 |
+
|
| 225 |
+
|
| 226 |
+
def main():
|
| 227 |
+
parser = argparse.ArgumentParser(
|
| 228 |
+
description="Evaluate Pedagogical Grounding Benchmark Results"
|
| 229 |
+
)
|
| 230 |
+
parser.add_argument(
|
| 231 |
+
"--input", "-i",
|
| 232 |
+
type=str,
|
| 233 |
+
required=True,
|
| 234 |
+
help="Input JSONL file with predictions"
|
| 235 |
+
)
|
| 236 |
+
parser.add_argument(
|
| 237 |
+
"--output", "-o",
|
| 238 |
+
type=str,
|
| 239 |
+
default=None,
|
| 240 |
+
help="Output JSON file for metrics (optional)"
|
| 241 |
+
)
|
| 242 |
+
args = parser.parse_args()
|
| 243 |
+
|
| 244 |
+
print(f"Loading results from {args.input}...")
|
| 245 |
+
results = load_results(args.input)
|
| 246 |
+
print(f"Loaded {len(results)} predictions")
|
| 247 |
+
|
| 248 |
+
# Identify tasks in results
|
| 249 |
+
tasks = set(r.get('task') for r in results if r.get('task'))
|
| 250 |
+
print(f"Tasks found: {tasks}")
|
| 251 |
+
|
| 252 |
+
all_metrics = {}
|
| 253 |
+
|
| 254 |
+
# Evaluate each task
|
| 255 |
+
for task in sorted(tasks):
|
| 256 |
+
if task in ['difficulty', 'discrimination']:
|
| 257 |
+
metrics = evaluate_comparison_task(results, task)
|
| 258 |
+
print_comparison_results(metrics)
|
| 259 |
+
elif task in ['distractor_most', 'distractor_least']:
|
| 260 |
+
metrics = evaluate_distractor_task(results, task)
|
| 261 |
+
print_distractor_results(metrics)
|
| 262 |
+
else:
|
| 263 |
+
print(f"Unknown task: {task}")
|
| 264 |
+
continue
|
| 265 |
+
|
| 266 |
+
all_metrics[task] = metrics
|
| 267 |
+
|
| 268 |
+
# Summary
|
| 269 |
+
print(f"\n{'='*60}")
|
| 270 |
+
print("SUMMARY")
|
| 271 |
+
print(f"{'='*60}")
|
| 272 |
+
for task, metrics in all_metrics.items():
|
| 273 |
+
if 'error' not in metrics:
|
| 274 |
+
print(f"{task:20}: {metrics['accuracy']:.1%} accuracy "
|
| 275 |
+
f"({metrics['lift_over_random']:+.1%} vs random)")
|
| 276 |
+
|
| 277 |
+
# Save metrics if output specified
|
| 278 |
+
if args.output:
|
| 279 |
+
with open(args.output, 'w') as f:
|
| 280 |
+
json.dump(all_metrics, f, indent=2)
|
| 281 |
+
print(f"\nMetrics saved to {args.output}")
|
| 282 |
+
|
| 283 |
+
|
| 284 |
+
if __name__ == "__main__":
|
| 285 |
+
main()
|
Results/pedagogical_grounding/gptoss120b_pedagogical.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Pedagogical Grounding benchmark with GPT-OSS-120B model.
|
| 3 |
+
|
| 4 |
+
Usage:
|
| 5 |
+
# Run difficulty comparison task
|
| 6 |
+
CUDA_VISIBLE_DEVICES=0,1,2,3 python gptoss120b_pedagogical.py \
|
| 7 |
+
--task difficulty \
|
| 8 |
+
--data-dir . \
|
| 9 |
+
--num-gpus 4 \
|
| 10 |
+
--num-samples 1000 \
|
| 11 |
+
--sampling-mode stratified \
|
| 12 |
+
--cache-dir /data1/
|
| 13 |
+
|
| 14 |
+
# Run discrimination comparison task
|
| 15 |
+
CUDA_VISIBLE_DEVICES=0,1,2,3 python gptoss120b_pedagogical.py \
|
| 16 |
+
--task discrimination \
|
| 17 |
+
--data-dir . \
|
| 18 |
+
--num-gpus 4 \
|
| 19 |
+
--num-samples 1000
|
| 20 |
+
|
| 21 |
+
# Run most common distractor task
|
| 22 |
+
CUDA_VISIBLE_DEVICES=0,1,2,3 python gptoss120b_pedagogical.py \
|
| 23 |
+
--task distractor_most \
|
| 24 |
+
--data-dir . \
|
| 25 |
+
--num-gpus 4
|
| 26 |
+
|
| 27 |
+
# Run least common distractor task
|
| 28 |
+
CUDA_VISIBLE_DEVICES=0,1,2,3 python gptoss120b_pedagogical.py \
|
| 29 |
+
--task distractor_least \
|
| 30 |
+
--data-dir . \
|
| 31 |
+
--num-gpus 4
|
| 32 |
+
|
| 33 |
+
# Run all tasks
|
| 34 |
+
CUDA_VISIBLE_DEVICES=0,1,2,3 python gptoss120b_pedagogical.py \
|
| 35 |
+
--task all \
|
| 36 |
+
--data-dir . \
|
| 37 |
+
--num-gpus 4 \
|
| 38 |
+
--num-samples 500
|
| 39 |
+
"""
|
| 40 |
+
|
| 41 |
+
from pedagogical_inference_base import run_inference
|
| 42 |
+
|
| 43 |
+
MODEL_CONFIG = {
|
| 44 |
+
"model_id": "openai/gpt-oss-120b",
|
| 45 |
+
"gen_configs": {
|
| 46 |
+
"temperature": 0.7,
|
| 47 |
+
"top_p": 0.95,
|
| 48 |
+
"top_k": 20,
|
| 49 |
+
"max_tokens": 1024, # Shorter responses expected for this task
|
| 50 |
+
"repetition_penalty": 1.0,
|
| 51 |
+
},
|
| 52 |
+
"output_prefix": "gptoss120b",
|
| 53 |
+
"system_prompt_prefix": "Reasoning: medium\n\n",
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
if __name__ == "__main__":
|
| 57 |
+
run_inference(MODEL_CONFIG)
|