calendar-workload-demo / train_model.py
ModelKiln's picture
Upload 3 files
8293c58 verified
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import joblib
def generate_synthetic_data(n_samples=5000, random_state=42):
rng = np.random.RandomState(random_state)
meetings_count = rng.randint(0, 12, size=n_samples) # kpl
total_meeting_hours = rng.uniform(0, 9, size=n_samples) # h
context_switches = rng.randint(0, 20, size=n_samples) # kpl
deep_work_blocks = rng.randint(0, 5, size=n_samples) # kpl
break_minutes = rng.randint(0, 120, size=n_samples) # min
day_start_hour = rng.randint(7, 11, size=n_samples) # 7–10
day_end_hour = rng.randint(14, 21, size=n_samples) # 14–20
df = pd.DataFrame({
"meetings_count": meetings_count,
"total_meeting_hours": total_meeting_hours,
"context_switches": context_switches,
"deep_work_blocks": deep_work_blocks,
"break_minutes": break_minutes,
"day_start_hour": day_start_hour,
"day_end_hour": day_end_hour,
})
# Heuristic "actual workload" [0, 1]
day_length = day_end_hour - day_start_hour
load_score = (
0.3 * (meetings_count / 10)
+ 0.25 * (total_meeting_hours / 8)
+ 0.2 * (context_switches / 20)
+ 0.15 * (day_length / 12)
- 0.15 * (deep_work_blocks / 4)
- 0.1 * (break_minutes / 120)
+ rng.normal(0, 0.05, size=n_samples)
)
load_score = np.clip(load_score, 0, 1)
# Discretize into classes 0 = low, 1 = medium, 2 = high
labels = np.zeros(n_samples, dtype=int)
labels[load_score > 0.33] = 1
labels[load_score > 0.66] = 2
return df, labels
if __name__ == "__main__":
X, y = generate_synthetic_data()
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42, stratify=y
)
clf = RandomForestClassifier(
n_estimators=150,
max_depth=8,
random_state=42
)
clf.fit(X_train, y_train)
acc = clf.score(X_test, y_test)
print(f"Test accuracy: {acc:.3f}")
joblib.dump(clf, "workload_model.joblib")
print("Saved model to workload_model.joblib")