Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| from sklearn.model_selection import train_test_split | |
| # ---------------- App Data ---------------- # | |
| def load_app_data(path="data/lsapp.tsv"): | |
| df = pd.read_csv(path, sep="\t") | |
| df.rename(columns={"lsapp.tsv": "userid"}, inplace=True) | |
| df["timestamp"] = pd.to_datetime(df["timestamp"]) | |
| return df | |
| def preprocess_app(df): | |
| # feature engineering: session count, recency, churn | |
| user_sessions = df.groupby("userid").size().reset_index(name="session_count") | |
| last_session_time = df.groupby("userid")["timestamp"].max().reset_index() | |
| last_session_time["recency"] = (df["timestamp"].max() - last_session_time["timestamp"]).dt.days | |
| features = pd.merge(user_sessions, last_session_time, on="userid") | |
| features["churn"] = (features["recency"] > 30).astype(int) | |
| return features | |
| def split_app(features): | |
| X = features[["session_count", "recency"]] | |
| y = features["churn"] | |
| return train_test_split(X, y, test_size=0.3, random_state=42, stratify=y) | |
| # ---------------- Fitness Data ---------------- # | |
| def load_fitness_data(path="data/DadosV3.csv"): | |
| df = pd.read_csv(path) | |
| df["Gender"] = df["Gender"].map({"Male": 1, "Female": 0}) # encode gender | |
| return df | |
| def preprocess_fitness(df): | |
| features = df[["Age","Gender","EnrollmentDuration","DaysWithoutFrequency", | |
| "AttendedClasses","NumberOfActivities","NumberOfRenewals","Dropout"]] | |
| return features | |
| def split_fitness(features): | |
| X = features.drop(columns=["Dropout"]) | |
| y = features["Dropout"] | |
| return train_test_split(X, y, test_size=0.3, random_state=42, stratify=y) | |