import pandas as pd def load_parquet_data(split="train_sample"): df = pd.read_csv(f"{split}.csv") return df def preprocess_parquet(df): # Skip filtering 'event_type' because it's already been preprocessed df = df[["user_id", "product_id", "rating"]] df = df.groupby("user_id").filter(lambda x: len(x) >= 5) return df