import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import joblib

def generate_synthetic_data(n_samples=5000, random_state=42):
    rng = np.random.RandomState(random_state)

    meetings_count = rng.randint(0, 12, size=n_samples)               # kpl
    total_meeting_hours = rng.uniform(0, 9, size=n_samples)           # h
    context_switches = rng.randint(0, 20, size=n_samples)             # kpl
    deep_work_blocks = rng.randint(0, 5, size=n_samples)              # kpl
    break_minutes = rng.randint(0, 120, size=n_samples)               # min
    day_start_hour = rng.randint(7, 11, size=n_samples)               # 7–10
    day_end_hour = rng.randint(14, 21, size=n_samples)                # 14–20

    df = pd.DataFrame({
        "meetings_count": meetings_count,
        "total_meeting_hours": total_meeting_hours,
        "context_switches": context_switches,
        "deep_work_blocks": deep_work_blocks,
        "break_minutes": break_minutes,
        "day_start_hour": day_start_hour,
        "day_end_hour": day_end_hour,
    })

    # Heuristic "actual workload" [0, 1]
    day_length = day_end_hour - day_start_hour
    load_score = (
        0.3 * (meetings_count / 10)
        + 0.25 * (total_meeting_hours / 8)
        + 0.2 * (context_switches / 20)
        + 0.15 * (day_length / 12)
        - 0.15 * (deep_work_blocks / 4)
        - 0.1 * (break_minutes / 120)
        + rng.normal(0, 0.05, size=n_samples)
    )

    load_score = np.clip(load_score, 0, 1)

    # Discretize into classes 0 = low, 1 = medium, 2 = high
    labels = np.zeros(n_samples, dtype=int)
    labels[load_score > 0.33] = 1
    labels[load_score > 0.66] = 2

    return df, labels

if __name__ == "__main__":
    X, y = generate_synthetic_data()

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y
    )

    clf = RandomForestClassifier(
        n_estimators=150,
        max_depth=8,
        random_state=42
    )
    clf.fit(X_train, y_train)

    acc = clf.score(X_test, y_test)
    print(f"Test accuracy: {acc:.3f}")

    joblib.dump(clf, "workload_model.joblib")
    print("Saved model to workload_model.joblib")