Spaces:
Sleeping
Sleeping
| import numpy as np | |
| import pandas as pd | |
| from sklearn.ensemble import RandomForestClassifier | |
| from sklearn.model_selection import train_test_split | |
| import joblib | |
| def generate_synthetic_data(n_samples=5000, random_state=42): | |
| rng = np.random.RandomState(random_state) | |
| meetings_count = rng.randint(0, 12, size=n_samples) # kpl | |
| total_meeting_hours = rng.uniform(0, 9, size=n_samples) # h | |
| context_switches = rng.randint(0, 20, size=n_samples) # kpl | |
| deep_work_blocks = rng.randint(0, 5, size=n_samples) # kpl | |
| break_minutes = rng.randint(0, 120, size=n_samples) # min | |
| day_start_hour = rng.randint(7, 11, size=n_samples) # 7–10 | |
| day_end_hour = rng.randint(14, 21, size=n_samples) # 14–20 | |
| df = pd.DataFrame({ | |
| "meetings_count": meetings_count, | |
| "total_meeting_hours": total_meeting_hours, | |
| "context_switches": context_switches, | |
| "deep_work_blocks": deep_work_blocks, | |
| "break_minutes": break_minutes, | |
| "day_start_hour": day_start_hour, | |
| "day_end_hour": day_end_hour, | |
| }) | |
| # Heuristic "actual workload" [0, 1] | |
| day_length = day_end_hour - day_start_hour | |
| load_score = ( | |
| 0.3 * (meetings_count / 10) | |
| + 0.25 * (total_meeting_hours / 8) | |
| + 0.2 * (context_switches / 20) | |
| + 0.15 * (day_length / 12) | |
| - 0.15 * (deep_work_blocks / 4) | |
| - 0.1 * (break_minutes / 120) | |
| + rng.normal(0, 0.05, size=n_samples) | |
| ) | |
| load_score = np.clip(load_score, 0, 1) | |
| # Discretize into classes 0 = low, 1 = medium, 2 = high | |
| labels = np.zeros(n_samples, dtype=int) | |
| labels[load_score > 0.33] = 1 | |
| labels[load_score > 0.66] = 2 | |
| return df, labels | |
| if __name__ == "__main__": | |
| X, y = generate_synthetic_data() | |
| X_train, X_test, y_train, y_test = train_test_split( | |
| X, y, test_size=0.2, random_state=42, stratify=y | |
| ) | |
| clf = RandomForestClassifier( | |
| n_estimators=150, | |
| max_depth=8, | |
| random_state=42 | |
| ) | |
| clf.fit(X_train, y_train) | |
| acc = clf.score(X_test, y_test) | |
| print(f"Test accuracy: {acc:.3f}") | |
| joblib.dump(clf, "workload_model.joblib") | |
| print("Saved model to workload_model.joblib") | |