SpringyBon commited on
Commit
ac44a3f
·
verified ·
1 Parent(s): b7719a5

Create train.py

Browse files
Files changed (1) hide show
  1. train.py +63 -0
train.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # train.py
2
+ import pandas as pd, numpy as np, pickle
3
+ from sklearn.model_selection import train_test_split
4
+ from sklearn.ensemble import RandomForestRegressor
5
+ from sklearn.multioutput import MultiOutputRegressor
6
+ from sklearn.metrics import mean_absolute_error
7
+
8
+ CSV_PATH = "student_allinone_300_padded.csv" # change path if needed
9
+ FEATURES = ["Attendance","StudyHours","ParentalSupport","SleepHours",
10
+ "ReadingHours","BehaviorScore","PretestScore",
11
+ "HomeworkCompletion","Participation"]
12
+ TARGETS = ["AssignmentAvg","TestScore"]
13
+
14
+ def main():
15
+ df = pd.read_csv(CSV_PATH).copy()
16
+
17
+ # === Highly recommended: make targets depend on inputs (if your CSV targets were random) ===
18
+ rng = np.random.default_rng(42)
19
+ if ("AssignmentAvg" in df.columns) and ("TestScore" in df.columns):
20
+ # Always recompute to ensure consistency
21
+ df["AssignmentAvg"] = (
22
+ df["PretestScore"] * 0.5
23
+ + df["StudyHours"] * 3
24
+ + df["HomeworkCompletion"] * 0.20
25
+ + df["Participation"] * 2
26
+ + rng.integers(-5, 6, size=len(df))
27
+ ).clip(0, 100).round(2)
28
+
29
+ df["TestScore"] = (
30
+ df["PretestScore"] * 0.6
31
+ + df["Attendance"] * 0.20
32
+ + df["ParentalSupport"] * 3
33
+ + df["SleepHours"] * 2
34
+ + df["ReadingHours"] * 2
35
+ + df["BehaviorScore"] * 2
36
+ + rng.integers(-5, 6, size=len(df))
37
+ ).clip(0, 100).round(2)
38
+
39
+ X = df[FEATURES]
40
+ y = df[TARGETS]
41
+
42
+ Xtr, Xte, ytr, yte = train_test_split(X, y, test_size=0.2, random_state=42)
43
+ model = MultiOutputRegressor(RandomForestRegressor(n_estimators=200, random_state=42)).fit(Xtr, ytr)
44
+ mae = mean_absolute_error(yte, model.predict(Xte))
45
+ print("MAE:", round(mae, 3))
46
+
47
+ # Save feature bounds so app can clip
48
+ feature_mins = X.min().to_dict()
49
+ feature_maxs = X.max().to_dict()
50
+
51
+ with open("student_model.pkl", "wb") as f:
52
+ pickle.dump({
53
+ "model": model,
54
+ "features": FEATURES,
55
+ "targets": TARGETS,
56
+ "feature_mins": feature_mins,
57
+ "feature_maxs": feature_maxs
58
+ }, f)
59
+
60
+ print("Saved student_model.pkl with bounds.")
61
+
62
+ if __name__ == "__main__":
63
+ main()