File size: 345 Bytes
1849db4
 
76ee1a1
 
 
 
 
 
 
 
1849db4
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
import pandas as pd

def load_parquet_data(split="train_sample"):
    df = pd.read_csv(f"{split}.csv")
    return df

def preprocess_parquet(df):
    # Skip filtering 'event_type' because it's already been preprocessed
    df = df[["user_id", "product_id", "rating"]]
    df = df.groupby("user_id").filter(lambda x: len(x) >= 5)
    return df