Spaces:
Sleeping
Sleeping
File size: 1,623 Bytes
53b92fc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
import pandas as pd
from sklearn.model_selection import train_test_split
# ---------------- App Data ---------------- #
def load_app_data(path="data/lsapp.tsv"):
df = pd.read_csv(path, sep="\t")
df.rename(columns={"lsapp.tsv": "userid"}, inplace=True)
df["timestamp"] = pd.to_datetime(df["timestamp"])
return df
def preprocess_app(df):
# feature engineering: session count, recency, churn
user_sessions = df.groupby("userid").size().reset_index(name="session_count")
last_session_time = df.groupby("userid")["timestamp"].max().reset_index()
last_session_time["recency"] = (df["timestamp"].max() - last_session_time["timestamp"]).dt.days
features = pd.merge(user_sessions, last_session_time, on="userid")
features["churn"] = (features["recency"] > 30).astype(int)
return features
def split_app(features):
X = features[["session_count", "recency"]]
y = features["churn"]
return train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
# ---------------- Fitness Data ---------------- #
def load_fitness_data(path="data/DadosV3.csv"):
df = pd.read_csv(path)
df["Gender"] = df["Gender"].map({"Male": 1, "Female": 0}) # encode gender
return df
def preprocess_fitness(df):
features = df[["Age","Gender","EnrollmentDuration","DaysWithoutFrequency",
"AttendedClasses","NumberOfActivities","NumberOfRenewals","Dropout"]]
return features
def split_fitness(features):
X = features.drop(columns=["Dropout"])
y = features["Dropout"]
return train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
|