commerce-recommender / src /data_preprocessing.py
“vineetha00”
Cleaned up for Hugging Face deployment
76ee1a1
raw
history blame contribute delete
345 Bytes
import pandas as pd
def load_parquet_data(split="train_sample"):
df = pd.read_csv(f"{split}.csv")
return df
def preprocess_parquet(df):
# Skip filtering 'event_type' because it's already been preprocessed
df = df[["user_id", "product_id", "rating"]]
df = df.groupby("user_id").filter(lambda x: len(x) >= 5)
return df