# 🚀 End-to-End Sequential Recommender System 

This project implements and evaluates a series of recommender system models, culminating in a state-of-the-art **SASRec (Self-Attentive Sequential Recommendation)** model for Top-N next-item prediction. The system is trained on the [RetailRocket e-commerce dataset](https://www.kaggle.com/datasets/retailrocket/ecommerce-dataset) and includes an interactive web demo built with Gradio. 

## EDA

In [None]:
import pandas as pd
import time
from datetime import datetime

# Define the path to your data folder
DATA_FOLDER = 'data/'

# Load the events data
print("Loading events.csv...")
events_df = pd.read_csv(DATA_FOLDER + 'events.csv')

# --- Initial Inspection ---

# See the first few rows
print("Data Head:")
print(events_df.head())

# Get a summary of the dataframe (columns, data types, memory usage)
print("\nData Info:")
events_df.info()

# Check for any missing values
print("\nMissing Values:")
print(events_df.isnull().sum())

Loading events.csv...
Data Head:
 timestamp visitorid event itemid transactionid
0 1433221332117 257597 view 355908 NaN
1 1433224214164 992329 view 248676 NaN
2 1433221999827 111016 view 318965 NaN
3 1433221955914 483717 view 253185 NaN
4 1433221337106 951259 view 367447 NaN

Data Info:

RangeIndex: 2756101 entries, 0 to 2756100
Data columns (total 5 columns):
 # Column Dtype 
--- ------ ----- 
 0 timestamp int64 
 1 visitorid int64 
 2 event object 
 3 itemid int64 
 4 transactionid float64
dtypes: float64(1), int64(3), object(1)
memory usage: 105.1+ MB

Missing Values:
timestamp 0
visitorid 0
event 0
itemid 0
transactionid 2733644
dtype: int64


In [2]:
# --- Data Cleaning and Understanding ---

# 1. Convert timestamp to datetime
# The timestamp is in milliseconds, so we divide by 1000
events_df['timestamp_dt'] = pd.to_datetime(events_df['timestamp'], unit='ms')
print(f"\nData timeframe is from {events_df['timestamp_dt'].min()} to {events_df['timestamp_dt'].max()}")


# 2. Analyze the distribution of event types
print("\nEvent Counts:")
event_counts = events_df['event'].value_counts()
print(event_counts)


# 3. Calculate number of unique users and items
n_users = events_df['visitorid'].nunique()
n_items = events_df['itemid'].nunique()

print(f"\nNumber of unique visitors: {n_users}")
print(f"Number of unique items: {n_items}")


Data timeframe is from 2015-05-03 03:00:04.384000 to 2015-09-18 02:59:47.788000

Event Counts:
event
view 2664312
addtocart 69332
transaction 22457
Name: count, dtype: int64

Number of unique visitors: 1407580
Number of unique items: 235061


## Preparing the data

In [None]:
import zipfile
import pandas as pd
from datetime import datetime, timedelta
import numpy as np
from scipy.sparse import csr_matrix
import math
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pytorch_lightning as pl

def prepare_data(data_folder='data/', val_days=7, test_days=7):
 """
 Loads, preprocesses, and splits the events data into train, validation, and test sets.
 
 args:
 data_folder: str, path to the folder containing 'events.csv'
 val_days: int, number of days for the validation set
 test_days: int, number of days for the test set
 """
 # --- Load Data ---
 print(f"Loading events.csv from folder: {data_folder}")
 try:
 events_df = pd.read_csv(data_folder + 'events.csv')
 print("Successfully loaded events.csv.")
 events_df['timestamp_dt'] = pd.to_datetime(events_df['timestamp'], unit='ms')
 print("\n--- Initial Data Summary ---")
 print(f"Data shape: {events_df.shape}")
 print(f"Full timeframe: {events_df['timestamp_dt'].min()} to {events_df['timestamp_dt'].max()}")
 print("----------------------------\n")
 except FileNotFoundError:
 print(f"Error: 'events.csv' not found in '{data_folder}'. Please check the path.")
 return None, None, None

 # --- Split Data ---
 sorted_df = events_df.sort_values('timestamp_dt').reset_index(drop=True)
 print(f"Splitting data: {test_days} days for test, {val_days} for validation.")
 end_time = sorted_df['timestamp_dt'].max()
 test_start_time = end_time - timedelta(days=test_days)
 val_start_time = test_start_time - timedelta(days=val_days)

 test_df = sorted_df[sorted_df['timestamp_dt'] >= test_start_time]
 val_df = sorted_df[(sorted_df['timestamp_dt'] >= val_start_time) & (sorted_df['timestamp_dt'] < test_start_time)]
 train_df = sorted_df[sorted_df['timestamp_dt'] < val_start_time]

 print("--- Data Splitting Summary ---")
 print(f"Training set: {train_df.shape[0]:>8} records | from {train_df['timestamp_dt'].min()} to {train_df['timestamp_dt'].max()}")
 print(f"Validation set: {val_df.shape[0]:>8} records | from {val_df['timestamp_dt'].min()} to {val_df['timestamp_dt'].max()}")
 print(f"Test set: {test_df.shape[0]:>8} records | from {test_df['timestamp_dt'].min()} to {test_df['timestamp_dt'].max()}")
 print("------------------------------")
 
 return train_df, val_df, test_df


In [None]:
DATA_PATH = "data"

train_set, validation_set, test_set = prepare_data(data_folder=DATA_PATH)

In [None]:
class SASRecDataset(Dataset):
 """
 SASRec Dataset.
 - Precomputes (sequence_id, cutoff_idx) pairs for O(1) __getitem__.
 - Supports 'last' or 'all' target modes.
 """
 def __init__(self, sequences, max_len, target_mode="last"):
 """
 Args:
 sequences: list of user sequences (list of item IDs).
 max_len: maximum sequence length (padding applied).
 target_mode: 'last' (only last prediction) or 'all' (predict at every step).
 """
 self.sequences = sequences
 self.max_len = max_len
 self.target_mode = target_mode

 # Build index once
 self.index = []
 for seq_id, seq in enumerate(sequences):
 for i in range(1, len(seq)):
 self.index.append((seq_id, i))

 def __len__(self):
 return len(self.index)

 def __getitem__(self, idx):
 seq_id, cutoff = self.index[idx]
 seq = self.sequences[seq_id][:cutoff]

 # Truncate & pad
 seq = seq[-self.max_len:]
 pad_len = self.max_len - len(seq)

 input_seq = np.zeros(self.max_len, dtype=np.int64)
 input_seq[pad_len:] = seq

 if self.target_mode == "last":
 target = self.sequences[seq_id][cutoff]
 return torch.LongTensor(input_seq), torch.LongTensor([target])

 elif self.target_mode == "all":
 # Predict next item at each step
 target_seq = self.sequences[seq_id][1:cutoff+1]
 target_seq = target_seq[-self.max_len:]
 target = np.zeros(self.max_len, dtype=np.int64)
 target[-len(target_seq):] = target_seq
 return torch.LongTensor(input_seq), torch.LongTensor(target)

class SASRecDataModule(pl.LightningDataModule):
 """
 PyTorch Lightning DataModule for preparing the RetailRocket dataset for the SASRec model.

 This class handles all aspects of data preparation, including:
 - Filtering out infrequent users and items to reduce noise.
 - Building a consistent item vocabulary.
 - Converting user event histories into sequential data.
 - Creating and providing `DataLoader` instances for training, validation, and testing.
 """
 def __init__(self, train_df, val_df, test_df, min_item_interactions=5, 
 min_user_interactions=5, max_len=50, batch_size=256):
 """
 Initializes the DataModule.

 Args:
 train_df (pd.DataFrame): DataFrame for training.
 val_df (pd.DataFrame): DataFrame for validation.
 test_df (pd.DataFrame): DataFrame for testing.
 min_item_interactions (int): Minimum number of interactions for an item to be kept.
 min_user_interactions (int): Minimum number of interactions for a user to be kept.
 max_len (int): The maximum length of a user sequence fed to the model.
 batch_size (int): The batch size for the DataLoaders.
 """
 super().__init__()
 self.train_df = train_df
 self.val_df = val_df
 self.test_df = test_df
 self.min_item_interactions = min_item_interactions
 self.min_user_interactions = min_user_interactions
 self.max_len = max_len
 self.batch_size = batch_size

 self.item_map = None
 self.inverse_item_map = None
 self.vocab_size = 0
 self.user_history = None

 def setup(self, stage=None):
 """
 Prepares the data for training, validation, and testing.

 This method is called automatically by PyTorch Lightning. It performs the following steps:
 1. Determines filtering criteria (which users and items to keep) based on the training set only
 to prevent data leakage.
 2. Applies these filters to the train, validation, and test sets.
 3. Builds an item vocabulary (mapping item IDs to integer indices) from the combined
 training and validation sets to ensure consistency for model checkpointing.
 4. Converts the event logs into sequences of item indices for each user in each data split.
 """
 item_counts = self.train_df['itemid'].value_counts()
 user_counts = self.train_df['visitorid'].value_counts()
 items_to_keep = item_counts[item_counts >= self.min_item_interactions].index
 users_to_keep = user_counts[user_counts >= self.min_user_interactions].index

 self.filtered_train_df = self.train_df[
 (self.train_df['itemid'].isin(items_to_keep)) & 
 (self.train_df['visitorid'].isin(users_to_keep))
 ].copy()
 self.filtered_val_df = self.val_df[
 (self.val_df['itemid'].isin(items_to_keep)) & 
 (self.val_df['visitorid'].isin(users_to_keep))
 ].copy()
 self.filtered_test_df = self.test_df[
 (self.test_df['itemid'].isin(items_to_keep)) & 
 (self.test_df['visitorid'].isin(users_to_keep))
 ].copy()

 all_known_items_df = pd.concat([self.filtered_train_df, self.filtered_val_df])
 unique_items = all_known_items_df['itemid'].unique()
 self.item_map = {item_id: i + 1 for i, item_id in enumerate(unique_items)}
 self.inverse_item_map = {i: item_id for item_id, i in self.item_map.items()}
 self.vocab_size = len(self.item_map) + 1 # +1 for padding token 0

 self.user_history = self.filtered_train_df.groupby('visitorid')['itemid'].apply(list)
 
 self.train_sequences = self._create_sequences(self.filtered_train_df)
 self.val_sequences = self._create_sequences(self.filtered_val_df)
 self.test_sequences = self._create_sequences(self.filtered_test_df)

 def _create_sequences(self, df):
 """
 Helper function to convert a DataFrame of events into user interaction sequences.
 
 Args:
 df (pd.DataFrame): The input DataFrame to process.

 Returns:
 list[list[int]]: A list of user sequences, where each sequence is a list of item indices.
 """
 df_sorted = df.sort_values(['visitorid', 'timestamp_dt'])
 sequences = df_sorted.groupby('visitorid')['itemid'].apply(
 lambda x: [self.item_map[i] for i in x if i in self.item_map]
 ).tolist()
 return [s for s in sequences if len(s) > 1]

 def train_dataloader(self):
 """Creates the DataLoader for the training set."""
 dataset = SASRecDataset(self.train_sequences, self.max_len)
 return DataLoader(dataset, batch_size=self.batch_size, shuffle=True, num_workers=0)

 def val_dataloader(self):
 """Creates the DataLoader for the validation set."""
 dataset = SASRecDataset(self.val_sequences, self.max_len)
 return DataLoader(dataset, batch_size=self.batch_size, shuffle=False, num_workers=0)
 
 def test_dataloader(self):
 """Creates the DataLoader for the test set."""
 dataset = SASRecDataset(self.test_sequences, self.max_len)
 return DataLoader(dataset, batch_size=self.batch_size, shuffle=False, num_workers=0)

In [None]:
BATCH_SIZE = 256 
MAX_TOKEN_LEN = 50 # 50–100 is standard for SASRec

# --- 1. Prepare the data into train, validation, and test sets ---
train_set, validation_set, test_set = prepare_data(data_folder=DATA_PATH)

# --- 2. Initialize DataModule ---
print("Initializing DataModule...")
datamodule = SASRecDataModule(
 train_df=train_set,
 val_df=validation_set,
 test_df=test_set,
 batch_size=BATCH_SIZE,
 max_len=MAX_TOKEN_LEN
)
datamodule.setup()

## Define train and evaluate the base models 

In [None]:
import pandas as pd
from datetime import datetime, timedelta
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.metrics.pairwise import cosine_similarity
import implicit
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pytorch_lightning as pl

# --- 1. Evaluation Helper Functions ---

def prepare_ground_truth(df, mode="purchase", event_weights=None):
 """
 Prepares ground truth dictionaries for evaluation.

 Parameters
 ----------
 df : pd.DataFrame
 Test dataframe containing at least ['visitorid', 'itemid', 'event'].
 mode : str, default="purchase"
 - "purchase" : Only use transactions as ground truth.
 - "all" : Use all events. Optionally weight them.
 event_weights : dict, optional
 Example: {"view": 1, "addtocart": 3, "transaction": 5}.
 Used only if mode == "all".

 Returns
 -------
 dict : {user_id: set of item_ids}
 """
 if mode == "purchase":
 df_filtered = df[df["event"] == "transaction"]
 ground_truth = df_filtered.groupby("visitorid")["itemid"].apply(set).to_dict()

 elif mode == "all":
 if event_weights is None:
 # Default: treat all events equally
 ground_truth = df.groupby("visitorid")["itemid"].apply(set).to_dict()
 else:
 # Weighted ground truth (for more advanced eval)
 ground_truth = {}
 for uid, user_df in df.groupby("visitorid"):
 weighted_items = []
 for _, row in user_df.iterrows():
 weight = event_weights.get(row["event"], 1)
 weighted_items.extend([row["itemid"]] * weight)
 ground_truth[uid] = set(weighted_items)
 else:
 raise ValueError("mode must be 'purchase' or 'all'")

 return ground_truth

def calculate_metrics(recommendations_dict, ground_truth_dict, k):
 """
 Calculates Precision@k, Recall@k, and HitRate@k.

 args:
 ----------
 recommendations_dict : {user_id: [recommended_item_ids]}
 ground_truth_dict : {user_id: set of ground truth item_ids}
 k : int

 Returns
 -------
 dict with mean precision, recall, and hit rate
 """
 all_precisions, all_recalls, all_hits = [], [], []

 for user_id, true_items in ground_truth_dict.items():
 recs = recommendations_dict.get(user_id, [])[:k]
 if not true_items:
 continue
 hits = len(set(recs) & true_items)

 precision = hits / k if k > 0 else 0
 recall = hits / len(true_items)
 hit_rate = 1.0 if hits > 0 else 0.0

 all_precisions.append(precision)
 all_recalls.append(recall)
 all_hits.append(hit_rate)

 if not all_precisions:
 return {"mean_precision@k": 0, "mean_recall@k": 0, "mean_hitrate@k": 0}

 return {
 "mean_precision@k": np.mean(all_precisions),
 "mean_recall@k": np.mean(all_recalls),
 "mean_hitrate@k": np.mean(all_hits)
 }

# --- 2. Model Functions (Popularity, Item-Item, ALS) ---

def recommend_popular_items_and_evaluate(train_df, test_df, k=10, prepare_ground_truth=None, calculate_metrics=None):
 """
 Trains a non-personalized Popularity model and evaluates its performance.

 This model recommends the top-k most frequently transacted items from the training
 set to every user. It serves as a simple but strong baseline.

 Args:
 train_df (pd.DataFrame): The training dataset.
 test_df (pd.DataFrame): The test dataset for evaluation.
 k (int): The number of items to recommend.
 prepare_ground_truth (function): A function to process the test_df into a ground truth dict.
 calculate_metrics (function): A function to compute ranking metrics.

 Returns:
 dict: A dictionary containing the calculated evaluation metrics (e.g., precision, recall).
 """
 print(f"\n--- Evaluating Popularity Model (Top {k} items) ---")
 
 # 1. "Train" the model by finding the most popular items based on transactions
 purchase_counts = train_df[train_df['event'] == 'transaction']['itemid'].value_counts()
 popular_items = purchase_counts.head(k).index.tolist()
 print(f"Top {k} popular items identified from training data.")

 # 2. Evaluate the model
 ground_truth = prepare_ground_truth(test_df)
 # Every user receives the same list of popular items
 recommendations = {user_id: popular_items for user_id in ground_truth.keys()}
 
 metrics = calculate_metrics(recommendations, ground_truth, k)
 print("Evaluation complete.")
 return metrics

def recommend_item_item_and_evaluate(train_df, test_df, k=10, min_item_interactions=5, min_user_interactions=5, prepare_ground_truth=None, calculate_metrics=None):
 """
 Trains an Item-Item Collaborative Filtering model and evaluates its performance.

 This model recommends items that are similar to items a user has interacted
 with in the past, based on co-occurrence patterns in the training data.

 Args:
 train_df (pd.DataFrame): The training dataset.
 test_df (pd.DataFrame): The test dataset for evaluation.
 k (int): The number of items to recommend.
 min_item_interactions (int): Minimum number of interactions for an item to be kept.
 min_user_interactions (int): Minimum number of interactions for a user to be kept.
 prepare_ground_truth (function): A function to process the test_df into a ground truth dict.
 calculate_metrics (function): A function to compute ranking metrics.

 Returns:
 dict: A dictionary containing the calculated evaluation metrics.
 """
 print(f"\n--- Evaluating Item-Item CF Model (Top {k} items) ---")
 
 # 1. Filter out infrequent users and items to reduce noise and computation
 item_counts = train_df['itemid'].value_counts()
 user_counts = train_df['visitorid'].value_counts()
 items_to_keep = item_counts[item_counts >= min_item_interactions].index
 users_to_keep = user_counts[user_counts >= min_user_interactions].index
 filtered_df = train_df[(train_df['itemid'].isin(items_to_keep)) & (train_df['visitorid'].isin(users_to_keep))].copy()
 print(f"Filtered training data from {len(train_df)} to {len(filtered_df)} records.")

 # 2. Create user-item interaction matrix and vocabulary mappings
 user_map = {uid: i for i, uid in enumerate(filtered_df['visitorid'].unique())}
 item_map = {iid: i for i, iid in enumerate(filtered_df['itemid'].unique())}
 inverse_item_map = {i: iid for iid, i in item_map.items()}
 user_indices = filtered_df['visitorid'].map(user_map)
 item_indices = filtered_df['itemid'].map(item_map)
 user_item_matrix = csr_matrix((np.ones(len(filtered_df)), (user_indices, item_indices)))

 # 3. Calculate the cosine similarity matrix between all items
 print("Calculating item similarity matrix...")
 item_similarity_matrix = cosine_similarity(user_item_matrix.T, dense_output=False)
 print("Similarity matrix calculated.")

 # 4. Generate recommendations and evaluate
 ground_truth = prepare_ground_truth(test_df)
 recommendations = {}
 print("Generating recommendations for users in test set...")
 test_users = [u for u in ground_truth.keys() if u in user_map]
 
 for user_id in test_users:
 user_index = user_map[user_id]
 user_interactions_indices = user_item_matrix[user_index].indices
 
 if len(user_interactions_indices) > 0:
 # Aggregate scores from items the user has interacted with
 all_scores = np.asarray(item_similarity_matrix[user_interactions_indices].sum(axis=0)).flatten()
 # Remove already interacted items from recommendations
 all_scores[user_interactions_indices] = -1
 top_indices = np.argsort(all_scores)[::-1][:k]
 recs = [inverse_item_map[idx] for idx in top_indices if idx in inverse_item_map]
 recommendations[user_id] = recs
 
 metrics = calculate_metrics(recommendations, ground_truth, k)
 print("Evaluation complete.")
 return metrics

def recommend_als_and_evaluate(train_df, test_df, k=10, min_item_interactions=5, min_user_interactions=5, 
 factors=25, regularization=0.02, iterations=48, prepare_ground_truth=None, calculate_metrics=None):
 """
 Trains an Alternating Least Squares (ALS) model and evaluates its performance.

 This model uses matrix factorization to learn latent embeddings for users and
 items from implicit feedback data. Default hyperparameters are set from a
 previous Optuna tuning process.

 Args:
 train_df (pd.DataFrame): The training dataset.
 test_df (pd.DataFrame): The test dataset for evaluation.
 k (int): The number of items to recommend.
 min_item_interactions (int): Minimum number of interactions for an item to be kept.
 min_user_interactions (int): Minimum number of interactions for a user to be kept.
 factors (int): The number of latent factors to compute.
 regularization (float): The regularization factor.
 iterations (int): The number of ALS iterations to run.
 prepare_ground_truth (function): A function to process the test_df into a ground truth dict.
 calculate_metrics (function): A function to compute ranking metrics.

 Returns:
 dict: A dictionary containing the calculated evaluation metrics.
 """
 print(f"\n--- Evaluating ALS Model (Top {k} items) ---")
 
 # 1. Filter data
 item_counts = train_df['itemid'].value_counts()
 user_counts = train_df['visitorid'].value_counts()
 items_to_keep = item_counts[item_counts >= min_item_interactions].index
 users_to_keep = user_counts[user_counts >= min_user_interactions].index
 filtered_df = train_df[(train_df['itemid'].isin(items_to_keep)) & (train_df['visitorid'].isin(users_to_keep))].copy()
 print(f"Filtered training data from {len(train_df)} to {len(filtered_df)} records.")

 # 2. Create mappings and confidence matrix
 user_map = {uid: i for i, uid in enumerate(filtered_df['visitorid'].unique())}
 item_map = {iid: i for i, iid in enumerate(filtered_df['itemid'].unique())}
 inverse_item_map = {i: iid for iid, i in item_map.items()}
 user_indices = filtered_df['visitorid'].map(user_map).astype(np.int32)
 item_indices = filtered_df['itemid'].map(item_map).astype(np.int32)
 
 event_weights = {'view': 1, 'addtocart': 3, 'transaction': 5}
 confidence = filtered_df['event'].map(event_weights).astype(np.float32)
 user_item_matrix = csr_matrix((confidence, (user_indices, item_indices)))

 # 3. Train the ALS model
 print("Training ALS model...")
 als_model = implicit.als.AlternatingLeastSquares(factors=factors, regularization=regularization, iterations=iterations)
 als_model.fit(user_item_matrix)
 print("ALS model trained.")

 # 4. Generate recommendations and evaluate
 ground_truth = prepare_ground_truth(test_df)
 recommendations = {}
 print("Generating recommendations for users in test set...")
 test_users_indices = [user_map[u] for u in ground_truth.keys() if u in user_map]
 
 if test_users_indices:
 user_item_matrix_for_recs = user_item_matrix[test_users_indices]
 ids, _ = als_model.recommend(test_users_indices, user_item_matrix_for_recs, N=k)
 
 for i, user_index in enumerate(test_users_indices):
 original_user_id = list(user_map.keys())[list(user_map.values()).index(user_index)]
 recs = [inverse_item_map[item_idx] for item_idx in ids[i] if item_idx in inverse_item_map]
 recommendations[original_user_id] = recs
 
 metrics = calculate_metrics(recommendations, ground_truth, k)
 print("Evaluation complete.")
 return metrics


 train_set, validation_set, test_set = prepare_data(data_folder='C:/Users/dania/vsproject/projects/recommernder_system/data/')
 if train_set is not None:
 results = {}
 full_train_set = pd.concat([train_set, validation_set])
 
# # Evaluate classical models
 print("\n>>> Running evaluations on the VALIDATION set <<<")
 results['Popularity (Validation)'] = recommend_popular_items_and_evaluate(train_set, validation_set)
 results['Item-Item CF (Validation)'] = recommend_item_item_and_evaluate(train_set, validation_set)
 results['ALS (Validation)'] = recommend_als_and_evaluate(train_set, validation_set)
 
 print("\n>>> Running final evaluations on the TEST set <<<")
 results['Popularity (Test)'] = recommend_popular_items_and_evaluate(full_train_set, test_set)
 results['Item-Item CF (Test)'] = recommend_item_item_and_evaluate(full_train_set, test_set)
 results['ALS (Test)'] = recommend_als_and_evaluate(full_train_set, test_set)
 
 print("\n--- Final Evaluation Results ---")
 results_df = pd.DataFrame.from_dict(results, orient='index')
 print(results_df)
 print("--------------------------------")


In [None]:
train_set, validation_set, test_set = prepare_data(data_folder=DATA_PATH)
if train_set is not None:
 results = {}
 full_train_set = pd.concat([train_set, validation_set])
 
 # Evaluate base models
 print("\n>>> Running evaluations on the VALIDATION set <<<")
 results['Popularity (Validation)'] = recommend_popular_items_and_evaluate(train_set, validation_set)
 results['Item-Item CF (Validation)'] = recommend_item_item_and_evaluate(train_set, validation_set)
 results['ALS (Validation)'] = recommend_als_and_evaluate(train_set, validation_set)
 
 print("\n>>> Running final evaluations on the TEST set <<<")
 results['Popularity (Test)'] = recommend_popular_items_and_evaluate(full_train_set, test_set)
 results['Item-Item CF (Test)'] = recommend_item_item_and_evaluate(full_train_set, test_set)
 results['ALS (Test)'] = recommend_als_and_evaluate(full_train_set, test_set)
 
 print("\n--- Final Evaluation Results ---")
 results_df = pd.DataFrame.from_dict(results, orient='index')
 print(results_df)
 print("--------------------------------")

## Use Optuna to find the best Hyperparameters for the ALS model

In [None]:
import optuna

def objective_als(trial, train_df, val_df):
 """
 The objective function for Optuna to optimize.
 """
 # 1. Define the hyperparameter search space
 params = {
 'factors': trial.suggest_int('factors', 20, 200),
 'regularization': trial.suggest_float('regularization', 1e-3, 1e-1, log=True),
 'iterations': trial.suggest_int('iterations', 10, 50)
 }
 
 # 2. Run an evaluation with the suggested parameters
 metrics = recommend_als_and_evaluate(train_df, val_df, **params)
 
 # 3. Return the metric we want to maximize (precision)
 return metrics['mean_precision@k']

def tune_als_hyperparameters(train_df, val_df, n_trials=25):
 """
 Orchestrates the Optuna study to find the best hyperparameters for ALS.
 """
 study = optuna.create_study(direction='maximize')
 study.optimize(lambda trial: objective_als(trial, train_df, val_df), n_trials=n_trials)
 
 print("\n--- Optuna Study Complete ---")
 print(f"Number of finished trials: {len(study.trials)}")
 print("Best trial:")
 trial = study.best_trial
 print(f" Value (Precision@10): {trial.value}")
 print(" Params: ")
 for key, value in trial.params.items():
 print(f" {key}: {value}")
 
 return trial.params


In [None]:
# 1. Prepare all data
train_set, validation_set, test_set = prepare_data(data_folder=DATA_PATH)


# --- Hyperparameter Tuning Step ---
print("\n>>> 1. TUNING ALS Hyperparameters on the VALIDATION set <<<")
# You can increase n_trials for a more thorough search, e.g., to 50 or 100
best_als_params = tune_als_hyperparameters(train_set, validation_set, n_trials=25) 


## Define train and evaluate the SASRec model

In [None]:
class SASRec(pl.LightningModule):
 """
 A PyTorch Lightning implementation of the SASRec model for sequential recommendation.

 SASRec (Self-Attentive Sequential Recommendation) uses a Transformer-based
 architecture to capture the sequential patterns in a user's interaction history
 to predict the next item they are likely to interact with.

 Attributes:
 save_hyperparameters: Automatically saves all constructor arguments as hyperparameters.
 item_embedding (nn.Embedding): Embedding layer for item IDs.
 positional_embedding (nn.Embedding): Embedding layer to encode the position of items in a sequence.
 transformer_encoder (nn.TransformerEncoder): The core self-attention module.
 fc (nn.Linear): Final fully connected layer to produce logits over the item vocabulary.
 loss_fn (nn.CrossEntropyLoss): The loss function used for training.
 """
 def __init__(self, vocab_size, max_len, hidden_dim, num_heads, num_layers,
 dropout=0.2, learning_rate=1e-3, weight_decay=1e-6, warmup_steps=2000, max_steps=100000):
 """
 Initializes the SASRec model layers and hyperparameters.

 Args:
 vocab_size (int): The total number of unique items in the dataset (+1 for padding).
 max_len (int): The maximum length of the input sequences.
 hidden_dim (int): The dimensionality of the item and positional embeddings.
 num_heads (int): The number of attention heads in the Transformer encoder.
 num_layers (int): The number of layers in the Transformer encoder.
 dropout (float): The dropout rate to be applied.
 learning_rate (float): The learning rate for the optimizer.
 weight_decay (float): The weight decay (L2 penalty) for the optimizer.
 warmup_steps (int): The number of linear warmup steps for the learning rate scheduler.
 max_steps (int): The total number of training steps for the learning rate scheduler's decay phase.
 """
 super().__init__()
 # This saves all hyperparameters to self.hparams, making them accessible later
 self.save_hyperparameters()

 # Embedding layers
 self.item_embedding = nn.Embedding(vocab_size, hidden_dim, padding_idx=0)
 self.positional_embedding = nn.Embedding(max_len, hidden_dim)
 self.dropout = nn.Dropout(dropout)

 # Transformer Encoder
 encoder_layer = nn.TransformerEncoderLayer(
 d_model=hidden_dim, nhead=num_heads, dim_feedforward=hidden_dim * 4,
 dropout=dropout, batch_first=True, activation='gelu'
 )
 self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

 # Output layer
 self.fc = nn.Linear(hidden_dim, vocab_size)

 # Loss function, ignoring the padding token
 self.loss_fn = nn.CrossEntropyLoss(ignore_index=0)
 
 # Lists to store outputs from validation and test steps
 self.validation_step_outputs = []
 self.test_step_outputs = []

 def forward(self, x):
 """
 Defines the forward pass of the model.

 Args:
 x (torch.Tensor): A batch of input sequences of shape (batch_size, seq_len).

 Returns:
 torch.Tensor: The output logits of shape (batch_size, seq_len, vocab_size).
 """
 seq_len = x.size(1)
 # Create positional indices (0, 1, 2, ..., seq_len-1)
 positions = torch.arange(seq_len, device=self.device).unsqueeze(0)

 # Create a causal mask to ensure the model doesn't look ahead in the sequence
 causal_mask = nn.Transformer.generate_square_subsequent_mask(seq_len, device=self.device)

 # Combine item and positional embeddings
 x = self.item_embedding(x) + self.positional_embedding(positions)
 x = self.dropout(x)
 
 # Pass through the Transformer encoder
 x = self.transformer_encoder(x, mask=causal_mask)
 
 # Get final logits
 logits = self.fc(x)
 return logits

 def training_step(self, batch, batch_idx):
 """
 Performs a single training step.

 Args:
 batch (tuple): A tuple containing input sequences and target items.
 batch_idx (int): The index of the current batch.

 Returns:
 torch.Tensor: The calculated loss for the batch.
 """
 inputs, targets = batch
 logits = self.forward(inputs)

 # We only care about the prediction for the very last item in the input sequence
 last_logits = logits[:, -1, :]
 
 # Calculate loss against the single target item
 loss = self.loss_fn(last_logits, targets.squeeze())
 
 self.log('train_loss', loss, prog_bar=True, on_step=True, on_epoch=True)
 return loss

 def validation_step(self, batch, batch_idx):
 """
 Performs a single validation step.
 Calculates loss and stores predictions for metric computation at the end of the epoch.
 """
 inputs, targets = batch
 logits = self.forward(inputs)
 last_item_logits = logits[:, -1, :]
 loss = self.loss_fn(last_item_logits, targets.squeeze())
 self.log('val_loss', loss, prog_bar=True, on_epoch=True)

 # Get top-10 predictions for metric calculation
 top_k_preds = torch.topk(last_item_logits, 10, dim=-1).indices
 self.validation_step_outputs.append({'preds': top_k_preds, 'targets': targets})
 return loss

 def on_validation_epoch_end(self):
 """
 Calculates and logs ranking metrics at the end of the validation epoch.
 """
 if not self.validation_step_outputs: return

 # Concatenate all predictions and targets from the epoch
 preds = torch.cat([x['preds'] for x in self.validation_step_outputs], dim=0)
 targets = torch.cat([x['targets'] for x in self.validation_step_outputs], dim=0)

 k = preds.size(1)
 # Check if the target is in the top-k predictions for each example
 hits_tensor = (preds == targets).any(dim=1)
 num_hits = hits_tensor.sum().item()
 num_targets = len(targets)

 if num_targets > 0:
 hit_rate = num_hits / num_targets
 recall = hit_rate # For next-item prediction, recall@k is the same as hit_rate@k
 precision = num_hits / (k * num_targets)
 else:
 hit_rate, recall, precision = 0.0, 0.0, 0.0

 self.log('val_hitrate@10', hit_rate, prog_bar=True)
 self.log('val_precision@10', precision, prog_bar=True)
 self.log('val_recall@10', recall, prog_bar=True)

 self.validation_step_outputs.clear() # Free up memory

 def test_step(self, batch, batch_idx):
 """
 Performs a single test step.
 Mirrors the logic of the validation_step.
 """
 inputs, targets = batch
 logits = self.forward(inputs)
 last_item_logits = logits[:, -1, :]
 loss = self.loss_fn(last_item_logits, targets.squeeze())
 self.log('test_loss', loss, prog_bar=True)

 top_k_preds = torch.topk(last_item_logits, 10, dim=-1).indices
 self.test_step_outputs.append({'preds': top_k_preds, 'targets': targets})
 return loss

 def on_test_epoch_end(self):
 """
 Calculates and logs ranking metrics at the end of the test epoch.
 """
 if not self.test_step_outputs: return

 preds = torch.cat([x['preds'] for x in self.test_step_outputs], dim=0)
 targets = torch.cat([x['targets'] for x in self.test_step_outputs], dim=0)

 k = preds.size(1)
 hits_tensor = (preds == targets).any(dim=1)
 num_hits = hits_tensor.sum().item()
 num_targets = len(targets)

 if num_targets > 0:
 hit_rate = num_hits / num_targets
 recall = hit_rate
 precision = num_hits / (k * num_targets)
 else:
 hit_rate, recall, precision = 0.0, 0.0, 0.0

 self.log('test_hitrate@10', hit_rate, prog_bar=True)
 self.log('test_precision@10', precision, prog_bar=True)
 self.log('test_recall@10', recall, prog_bar=True)

 self.test_step_outputs.clear() # Free up memory

 def configure_optimizers(self):
 """
 Configures the optimizer and learning rate scheduler.
 
 Uses AdamW optimizer and a linear warmup followed by a cosine decay schedule,
 which is a standard practice for training Transformer models.
 """
 optimizer = torch.optim.AdamW(
 self.parameters(),
 lr=self.hparams.learning_rate,
 weight_decay=self.hparams.weight_decay
 )
 
 # Learning rate scheduler: linear warmup and cosine decay
 def lr_lambda(current_step: int):
 warmup_steps = self.hparams.warmup_steps
 max_steps = self.hparams.max_steps
 if current_step < warmup_steps:
 return float(current_step) / float(max(1, warmup_steps))
 progress = float(current_step - warmup_steps) / float(max(1, max_steps - warmup_steps))
 return max(0.0, 0.5 * (1.0 + math.cos(math.pi * progress)))

 scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda)

 return {
 "optimizer": optimizer,
 "lr_scheduler": {
 "scheduler": scheduler,
 "interval": "step", # Update the scheduler at every training step
 "frequency": 1
 }
 }

In [None]:
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
import torch

def train_and_eval_SASRec_model(train_set, validation_set, test_set, checkpoint_dir_path='checkpoints/',
 checkpoint_path=None, n_epochs=10, mode='train',
 batchsize=256, max_token_len=50, learning_rate=1e-3, hidden_dim=128,
 num_heads=2, num_layers=2, dropout=0.2, weight_decay=1e-6):
 """
 Train or evaluate a SASRec sequential recommendation model using PyTorch Lightning.

 This function wraps the entire SASRec pipeline:
 - Initializes the SASRecDataModule (handles dataset preprocessing and dataloaders).
 - Builds the SASRec Transformer-based model.
 - Configures training callbacks (checkpointing, early stopping, LR monitoring).
 - Runs either training (`mode='train'`) or evaluation on the test set (`mode='test'`).

 Args
 ----------
 train_set : pd.DataFrame
 Training interactions dataset .
 validation_set : pd.DataFrame
 Validation dataset with the same structure as `train_set`.
 test_set : pd.DataFrame
 Test dataset with the same structure as `train_set`.
 checkpoint_dir_path : str, optional (default='checkpoints/')
 Directory to save model checkpoints.
 checkpoint_path : str or None, optional (default=None)
 Path to a checkpoint file for resuming training or loading a pretrained model for testing.
 n_epochs : int, optional (default=10)
 Number of training epochs.
 mode : {'train', 'test'}, optional (default='train')
 - `'train'`: trains the model on the training/validation data.
 - `'test'`: evaluates the model on the test set using a checkpoint.
 batchsize : int, optional (default=256)
 Batch size for training and evaluation.
 max_token_len : int, optional (default=50)
 Maximum sequence length per user (recent interactions kept).
 learning_rate : float, optional (default=1e-3)
 Learning rate for the AdamW optimizer.
 hidden_dim : int, optional (default=128)
 Dimensionality of item and positional embeddings.
 num_heads : int, optional (default=2)
 Number of attention heads in each Transformer encoder layer.
 num_layers : int, optional (default=2)
 Number of Transformer encoder layers.
 dropout : float, optional (default=0.2)
 Dropout probability applied in embeddings and Transformer layers.
 weight_decay : float, optional (default=1e-6)
 Weight decay regularization coefficient for AdamW.
 """
 # --- 1. Initialize DataModule ---
 print("Initializing DataModule...")
 datamodule = SASRecDataModule(
 train_df=train_set,
 val_df=validation_set,
 test_df=test_set,
 batch_size=batchsize,
 max_len=max_token_len
 )
 datamodule.setup()

 # --- 2. Initialize Model ---
 print("Initializing SASRec model...")
 model = SASRec(
 vocab_size=datamodule.vocab_size,
 max_len=max_token_len,
 hidden_dim=hidden_dim,
 num_heads=num_heads,
 num_layers=num_layers,
 dropout=dropout,
 learning_rate=learning_rate,
 weight_decay=weight_decay
 )

 # --- 3. Configure Training Callbacks ---
 checkpoint_callback = ModelCheckpoint(
 dirpath=checkpoint_dir_path,
 filename="sasrec-{epoch:02d}-{val_hitrate@10:.4f}",
 save_top_k=1,
 verbose=True,
 monitor="val_hitrate@10",
 mode="max"
 )

 early_stopping_callback = EarlyStopping(
 monitor="val_hitrate@10", # stop if ranking metric stagnates
 patience=5,
 mode="max"
 )

 lr_monitor = LearningRateMonitor(logging_interval="step")

 logger = TensorBoardLogger("lightning_logs", name="sasrec")

 # --- 4. Initialize Trainer ---
 print("Initializing PyTorch Lightning Trainer...")
 trainer = pl.Trainer(
 logger=logger,
 callbacks=[checkpoint_callback, early_stopping_callback, lr_monitor],
 max_epochs=n_epochs,
 accelerator='auto',
 devices=1,
 gradient_clip_val=1.0, # helps with exploding gradients
 )

 if mode == 'train' :
 # --- 5. Start Training ---
 print(f"Starting training for up to {n_epochs} epochs...")
 trainer.fit(model, datamodule,
 ckpt_path=checkpoint_path
 )

 elif mode == 'test':
 # --- 6. Test on best checkpoint ---
 print("Evaluating on test set...")
 trainer.test(model, datamodule,
 ckpt_path=checkpoint_path
 )


In [None]:
# --- Configuration ---
BATCH_SIZE = 256
MAX_TOKEN_LEN = 50 # 50–100 is standard
LEARNING_RATE = 1e-3
HIDDEN_DIM = 128
NUM_HEADS = 2
NUM_LAYERS = 2
DROPOUT = 0.2
WEIGHT_DECAY = 1e-6
N_EPOCHS = 50
MODE = 'train' # 'train' or 'test'

# Train and evaluate SASRec model
print("\n>>> Training and evaluating SASRec model <<<")
train_and_eval_SASRec_model(train_set, validation_set, test_set, n_epochs=10, mode='train')

print("\n>>> Evaluating trained SASRec model on TEST set <<<")
train_and_eval_SASRec_model(train_set, validation_set, test_set, mode='test')

## Main function to run the complete Recommender System

In [None]:
def load_item_properties(data_folder='data/'):
 """
 Loads item properties and creates a mapping from item ID to its category ID.
 Handles both a single properties file or two split parts.
 
 Args:
 data_folder (str): The path to the folder containing item property files.

 Returns:
 dict: A dictionary mapping {itemid: categoryid}.
 """
 print("Loading item properties...")
 try:
 # First, try to load the two separate parts and combine them.
 props_df_part1 = pd.read_csv(data_folder + 'item_properties_part1.csv')
 props_df_part2 = pd.read_csv(data_folder + 'item_properties_part2.csv')
 props_df = pd.concat([props_df_part1, props_df_part2], ignore_index=True)
 print("Successfully loaded and combined item_properties_part1.csv and item_properties_part2.csv.")

 except FileNotFoundError:
 try:
 # If the parts are not found, try to load a single combined file.
 props_df = pd.read_csv(data_folder + 'item_properties.csv')
 print("Successfully loaded a single item_properties.csv.")
 except FileNotFoundError:
 print(f"Warning: No item properties files found. Cannot display category information.")
 return {}

 category_df = props_df[props_df['property'] == 'categoryid'].copy()
 category_df['value'] = pd.to_numeric(category_df['value'], errors='coerce').astype('Int64')
 item_to_category_map = category_df.set_index('itemid')['value'].to_dict()
 print("Item to category mapping created successfully.")
 return item_to_category_map

def load_category_tree(data_folder='data/'):
 """
 Loads the category tree to map categories to their parent categories.

 Args:
 data_folder (str): The path to the folder containing category_tree.csv.

 Returns:
 dict: A dictionary mapping {categoryid: parentid}.
 """
 print("Loading category tree...")
 try:
 tree_df = pd.read_csv(data_folder + 'category_tree.csv')
 category_parent_map = tree_df.set_index('categoryid')['parentid'].to_dict()
 print("Category tree loaded successfully.")
 return category_parent_map
 except FileNotFoundError:
 print("Warning: 'category_tree.csv' not found. Cannot display parent category information.")
 return {}

def get_popular_items(train_df, k=10):
 """
 Calculates the top-k most popular items based on transaction count.
 """
 purchase_counts = train_df[train_df['event'] == 'transaction']['itemid'].value_counts()
 return purchase_counts.head(k).index.tolist()

def show_user_recommendations(visitor_id, model, datamodule, popular_items, item_category_map, category_parent_map, k=10):
 """
 Displays recommendations for a user, including category and parent category information.
 """
 print(f"\n--- Recommendations for Visitor ID: {visitor_id} ---")
 model.eval()

 def format_item_with_category(item_id):
 category_id = item_category_map.get(item_id, 'N/A')
 parent_id = category_parent_map.get(category_id, 'N/A') if category_id != 'N/A' else 'N/A'
 return f"Item: {item_id} (Category: {category_id}, Parent: {parent_id})"

 user_history_ids = datamodule.user_history.get(visitor_id)

 if user_history_ids is None:
 print(f"User {visitor_id} not found in training history. Providing popularity-based recommendations.")
 print(f"\nTop {k} Popular Items (Fallback):")
 recs_with_cats = [format_item_with_category(item_id) for item_id in popular_items]
 print(recs_with_cats)
 print("-------------------------------------------------")
 return

 history_with_cats = [format_item_with_category(item_id) for item_id in user_history_ids]
 print(f"User's Historical Interactions:")
 print(history_with_cats)

 history_indices = [datamodule.item_map[i] for i in user_history_ids if i in datamodule.item_map]
 if not history_indices:
 print("None of the user's historical items are in the model's vocabulary.")
 return

 max_len = datamodule.max_len
 input_seq = history_indices[-max_len:]
 padded_input = np.zeros(max_len, dtype=np.int64)
 padded_input[-len(input_seq):] = input_seq
 
 input_tensor = torch.LongTensor(np.array([padded_input]))
 input_tensor = input_tensor.to(model.device)

 with torch.no_grad():
 logits = model(input_tensor)
 last_item_logits = logits[0, -1, :]
 top_indices = torch.topk(last_item_logits, k).indices.tolist()

 recommended_item_ids = [datamodule.inverse_item_map[idx] for idx in top_indices if idx in datamodule.inverse_item_map]

 print(f"\nTop {k} Recommended Items:")
 recs_with_cats = [format_item_with_category(item_id) for item_id in recommended_item_ids]
 print(recs_with_cats)
 print("-------------------------------------------------")


In [None]:
def main(checkpoint_path="checkpoints/sasrec-epoch=06-val_hitrate@10=0.3629.ckpt", data_folder="data/"):
 """
 Main function to run the inference and qualitative analysis pipeline.
 """

 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f"Using device: {device}")

 print("Loading model from checkpoint...")
 best_model = SASRec.load_from_checkpoint(checkpoint_path)
 best_model.to(device)

 print("Preparing data...")
 train_set, validation_set, test_set = prepare_data(data_folder=data_folder)
 
 datamodule = SASRecDataModule(train_set, validation_set, test_set)
 datamodule.setup()
 
 item_category_map = load_item_properties(data_folder=data_folder)
 category_parent_map = load_category_tree(data_folder=data_folder)
 
 print("\nCalculating popular items for cold-start users...")
 popular_items_list = get_popular_items(train_set, k=10)

 users_in_train_history = set(datamodule.user_history.keys())
 users_in_test_set = set(datamodule.test_df['visitorid'].unique())
 valid_example_users = list(users_in_train_history.intersection(users_in_test_set))

 print(f"\nFound {len(valid_example_users)} users for qualitative analysis.")
 
 for user_id in valid_example_users[:3]:
 show_user_recommendations(user_id, best_model, datamodule, popular_items_list, item_category_map, category_parent_map)
 
 new_user_id = -999
 show_user_recommendations(new_user_id, best_model, datamodule, popular_items_list, item_category_map, category_parent_map)


In [None]:
main()