File size: 2,358 Bytes
80843b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import numpy as np
import pandas as pd
import pickle
from scipy.sparse.linalg import svds


class ALSRecommender:
    def __init__(self, factors=50, iterations=20, regularization=0.01, alpha=40):
        self.factors        = factors
        self.iterations     = iterations
        self.regularization = regularization
        self.alpha          = alpha
        self.user_factors   = None
        self.item_factors   = None
        self.user_history   = {}
        self.is_fitted      = False

    def fit(self, weighted_matrix, train_df):
        print(f"Fitting SVD with factors={self.factors} ...")
        matrix = (weighted_matrix * self.alpha).tocsr().astype(float)
        k      = min(self.factors, min(matrix.shape) - 1)
        U, sigma, Vt = svds(matrix, k=k)
        idx          = np.argsort(sigma)[::-1]
        sigma = sigma[idx]
        U     = U[:, idx]
        Vt    = Vt[idx, :]
        sqrt_s            = np.sqrt(sigma)
        self.user_factors = U    * sqrt_s[np.newaxis, :]
        self.item_factors = Vt.T * sqrt_s[np.newaxis, :]
        self.user_history = (
            train_df.groupby("user_idx")["item_idx"].apply(set).to_dict()
        )
        self.is_fitted = True
        print(f"Fitted — user_factors: {self.user_factors.shape}, item_factors: {self.item_factors.shape}")
        return self

    def recommend(self, user_idx, k=10):
        if not self.is_fitted:
            raise RuntimeError("Call fit() first")
        scores = self.item_factors @ self.user_factors[user_idx]
        for i in self.user_history.get(user_idx, set()):
            if i < len(scores):
                scores[i] = -np.inf
        return np.argsort(scores)[::-1][:k]

    def recommend_batch(self, user_indices, k=10):
        return {u: self.recommend(u, k) for u in user_indices}

    def get_similar_items(self, item_idx, k=10):
        scores           = self.item_factors @ self.item_factors[item_idx]
        scores[item_idx] = -np.inf
        top_k            = np.argsort(scores)[::-1][:k]
        return top_k, scores[top_k]

    def save(self, path):
        with open(path, "wb") as f:
            pickle.dump(self, f)
        print(f"Saved to {path}")

    @staticmethod
    def load(path):
        with open(path, "rb") as f:
            return pickle.load(f)