Module reclab.recommenders.llorma.llorma_lib.anchor
Anchor Manager module
Expand source code
"""Anchor Manager module
"""
import random
import numpy as np
from sklearn.preprocessing import normalize
from scipy.spatial import distance_matrix
def _init_anchor_points(data, n_anchor, row_k, col_k):
""" Helper function that
Parameters
----------
data : array-like, shape [n_ratings, 3]
Rating data
Each row is of the form [user_id, item_id, rating]
n_anchor : int
Number of anchor points
row_k : array-like, shape [n_users, n_users]
Symmetric kernel matrix where entry (i,j) is
the similarity between user_i and user_j
col_k : array-like, shape [n_items, n_items]
Symmetric kernel matrix where entry (i, j) id
the similarity between item_i and item_j
Returns
-------
np.ndarray, shape (n_anchor,)
Array of anchor indices, indexed according
to their order in the rating data
"""
user_ids = data[:, 0].astype(np.int64)
item_ids = data[:, 1].astype(np.int64)
anchor_idxs = []
while len(anchor_idxs) < n_anchor:
anchor_idx = random.randint(0, data.shape[0] - 1)
if anchor_idx in anchor_idxs:
continue
anchor_row = data[anchor_idx]
uid = int(anchor_row[0])
iid = int(anchor_row[1])
k = np.multiply(row_k[uid][user_ids],
col_k[iid][item_ids])
sum_a_of_anchor = np.sum(k)
if sum_a_of_anchor < 1:
continue
#print('>> %10d\t%d' % (anchor_idx, sum_a_of_anchor))
anchor_idxs.append(anchor_idx)
return anchor_idxs
def _get_distance_matrix(latent):
"""Helper function to compute a matrix
of pairwise cosine distances between latent
factors of a pair of users of a pair of items
Parameters
----------
latent : array-like, shape (N, latent_dim)
Matrix of latent factors
Number of rows is the number of users or items
Number of columns is the latent dimension
Returns
-------
array-like, shape (N, N)
Matrix of cosine distances between every
pair of users (items)
"""
_normalized_latent = normalize(latent, axis=1)
d_mat = distance_matrix(_normalized_latent, _normalized_latent)
assert np.count_nonzero(np.isnan(d_mat)) == 0
return d_mat
def _get_k_from_distance(d_mat):
"""Helper function to compute kernel matrix from distance matrix
Parameters
----------
d_mat : array-like, shape [N, N]
Matrix of cosine distances between every
pair of users (items)
Returns
-------
np.ndarray, shape [N, N]
Kernel matrix corresponding to the distance matrix
"""
m_mat = np.zeros(d_mat.shape)
m_mat[d_mat < 0.9] = 1
k_mat = np.multiply(np.subtract(np.ones(d_mat.shape), np.square(d_mat)), m_mat)
return k_mat
def _get_rbf_k(latent, gamma=None, scaled=True):
"""Helper function to compute scaled
Gaussian Kernel matrix for latent factors
Parameters
----------
latent : array-like, shape (N, latent_dim)
Matrix of latent factors
Number of rows is the number of users or items
Number of columns is the latent dimension
gamma : float, optional
parameter for the , by default None
scaled : bool, optional
if true, the kernel is scaled by the norms of the factors
by default True
"""
if gamma is None:
gamma = 1
d_mat = _get_distance_matrix(latent)
rbf_mat = np.exp(-1*gamma*d_mat)
row_norms = np.linalg.norm(latent, axis=1)
if scaled:
norms_mat = np.outer(row_norms, row_norms)
k_mat = np.multiply(rbf_mat, norms_mat)
else: k_mat = rbf_mat
# normalize such that diagonals have value 1
row_avg = np.mean(k_mat, axis=1, keepdims=True).reshape(-1, 1)
col_avg = np.mean(k_mat, axis=0, keepdims=True).reshape(1, -1)
avg = np.mean(k_mat)
k_mat = k_mat-col_avg-row_avg+2*avg
k_diag = np.sqrt(np.diagonal(k_mat))
k_diag_outer = np.outer(k_diag, k_diag)
k_mat = np.divide(k_mat, k_diag_outer)
# return (k_mat - 1)*2
return(k_mat)
def _get_ks_from_latents(row_latent, col_latent):
"""Helper function to get kernels
Parameters
----------
row_latent : array-like, shape (N_users, rank)
Matrix of latent factors corresponding to users
col_latent : array-like, shape (N_items, rank)
Matrix of latent factors corresponding to items
Returns
-------
(row_k, col_k): array-like, (N_users, N_users), (N_items, N_items)
Returns two square matrices corresponding to similarity kernels
row_k: entry (i,j) is the similarity between user_i and user_j
col_k: entry (i,j) is the similarity between item_i and item_j
"""
# row_d = _get_distance_matrix(row_latent)
# col_d = _get_distance_matrix(col_latent)
# row_k = _get_k_from_distance(row_d)
# col_k = _get_k_from_distance(col_d)
row_k = _get_rbf_k(row_latent)
col_k = _get_rbf_k(col_latent)
return row_k, col_k
class AnchorManager:
""" AnchorManager class
Parameters
----------
n_anchor : int
number of anchor points
batch_manager : obj: BatchManager
an instance of BatchManager class
row_latent_init : array-like, shape (n_users, latent_dim)
Matrix of latent factors for users.
Typically this is set to factors pre-trained in a
pre-train Matrix Factorization step
col_latent_init : array-like, shape (n_item, latent_dim)
Matrix of latent factors for items.
Typically this is set to factors pre-trained in a
pre-train Matrix Factorization step
"""
def __init__(
self,
n_anchor,
batch_manager,
row_latent_init,
col_latent_init,
kernel_fun):
""" Instantiate an AnchorManager
"""
train_data = batch_manager.train_data
row_latent = row_latent_init
col_latent = col_latent_init
if kernel_fun is None:
row_k, col_k = _get_ks_from_latents(row_latent, col_latent)
else:
row_k = kernel_fun(row_latent)
col_k = kernel_fun(col_latent)
anchor_idxs = _init_anchor_points(train_data, n_anchor, row_k, col_k)
assert len(anchor_idxs) == n_anchor
anchor_points = train_data[anchor_idxs]
self.train_data = train_data
self.valid_data = batch_manager.valid_data
self.test_data = batch_manager.test_data
self.anchor_idxs = anchor_idxs
self.anchor_points = anchor_points
self.row_k = row_k
self.col_k = col_k
def get_k(self, anchor_idx, user_item_data):
"""Returns the Kernel similarity between the
anchor user_item pair and the user_item pairs
in the user_item data
Parameters
----------
anchor_idx : Array-like, shape (2,)
(user_id, item_id) of the anchor point
user_item_data : Array-like, shape (N_ratings, >2)
Array where first 2 columns are (user_id, item_id) pairs
Returns
-------
np.ndarray, shape (N_ratings,)
Returns an array of kernel weights corresponding to
the chosen anchor for each user_item pair in the data
"""
row_k = self.row_k
col_k = self.col_k
anchor_point = self.anchor_points[anchor_idx]
anchor_uid = int(anchor_point[0])
anchor_iid = int(anchor_point[1])
user_ids = user_item_data[:, 0].astype(np.int64)
item_ids = user_item_data[:, 1].astype(np.int64)
return np.multiply(row_k[anchor_uid][user_ids], col_k[anchor_iid][item_ids])
def get_train_k(self, anchor_idx):
""" Get Kernel matrix of the train_data of a given anchor
Parameters
----------
anchor_idx : Array-like, shape (2,)
(user_id, item_id) of the anchor point
Returns
-------
np.ndarray, shape (N_ratings,)
Returns an array of kernel weights corresponding to
the chosen anchor for each user_item pair in the train data
"""
return self.get_k(anchor_idx, self.train_data)
def get_valid_k(self, anchor_idx):
""" Get Kernel matrix of the validation_data of a given anchor
Parameters
----------
anchor_idx : Array-like, shape (2,)
(user_id, item_id) of the anchor point
Returns
-------
np.ndarray, shape (N_ratings,)
Returns an array of kernel weights corresponding to
the chosen anchor for each user_item pair in the valid data
"""
return self.get_k(anchor_idx, self.valid_data)
def get_test_k(self, anchor_idx):
""" Get Kernel matrix of the test_data of a given anchor
Parameters
----------
anchor_idx : Array-like, shape (2,)
(user_id, item_id) of the anchor point
Returns
-------
np.ndarray, shape (N_ratings,)
Returns an array of kernel weights corresponding to
the chosen anchor for each user_item pair in the test data
"""
return self.get_k(anchor_idx, self.test_data)
Classes
class AnchorManager (n_anchor, batch_manager, row_latent_init, col_latent_init, kernel_fun)
-
AnchorManager class
Parameters
n_anchor
:int
- number of anchor points
batch_manager
:obj: BatchManager
- an instance of BatchManager class
row_latent_init
:array-like, shape (n_users, latent_dim)
- Matrix of latent factors for users. Typically this is set to factors pre-trained in a pre-train Matrix Factorization step
col_latent_init
:array-like, shape (n_item, latent_dim)
- Matrix of latent factors for items. Typically this is set to factors pre-trained in a pre-train Matrix Factorization step
Instantiate an AnchorManager
Expand source code
class AnchorManager: """ AnchorManager class Parameters ---------- n_anchor : int number of anchor points batch_manager : obj: BatchManager an instance of BatchManager class row_latent_init : array-like, shape (n_users, latent_dim) Matrix of latent factors for users. Typically this is set to factors pre-trained in a pre-train Matrix Factorization step col_latent_init : array-like, shape (n_item, latent_dim) Matrix of latent factors for items. Typically this is set to factors pre-trained in a pre-train Matrix Factorization step """ def __init__( self, n_anchor, batch_manager, row_latent_init, col_latent_init, kernel_fun): """ Instantiate an AnchorManager """ train_data = batch_manager.train_data row_latent = row_latent_init col_latent = col_latent_init if kernel_fun is None: row_k, col_k = _get_ks_from_latents(row_latent, col_latent) else: row_k = kernel_fun(row_latent) col_k = kernel_fun(col_latent) anchor_idxs = _init_anchor_points(train_data, n_anchor, row_k, col_k) assert len(anchor_idxs) == n_anchor anchor_points = train_data[anchor_idxs] self.train_data = train_data self.valid_data = batch_manager.valid_data self.test_data = batch_manager.test_data self.anchor_idxs = anchor_idxs self.anchor_points = anchor_points self.row_k = row_k self.col_k = col_k def get_k(self, anchor_idx, user_item_data): """Returns the Kernel similarity between the anchor user_item pair and the user_item pairs in the user_item data Parameters ---------- anchor_idx : Array-like, shape (2,) (user_id, item_id) of the anchor point user_item_data : Array-like, shape (N_ratings, >2) Array where first 2 columns are (user_id, item_id) pairs Returns ------- np.ndarray, shape (N_ratings,) Returns an array of kernel weights corresponding to the chosen anchor for each user_item pair in the data """ row_k = self.row_k col_k = self.col_k anchor_point = self.anchor_points[anchor_idx] anchor_uid = int(anchor_point[0]) anchor_iid = int(anchor_point[1]) user_ids = user_item_data[:, 0].astype(np.int64) item_ids = user_item_data[:, 1].astype(np.int64) return np.multiply(row_k[anchor_uid][user_ids], col_k[anchor_iid][item_ids]) def get_train_k(self, anchor_idx): """ Get Kernel matrix of the train_data of a given anchor Parameters ---------- anchor_idx : Array-like, shape (2,) (user_id, item_id) of the anchor point Returns ------- np.ndarray, shape (N_ratings,) Returns an array of kernel weights corresponding to the chosen anchor for each user_item pair in the train data """ return self.get_k(anchor_idx, self.train_data) def get_valid_k(self, anchor_idx): """ Get Kernel matrix of the validation_data of a given anchor Parameters ---------- anchor_idx : Array-like, shape (2,) (user_id, item_id) of the anchor point Returns ------- np.ndarray, shape (N_ratings,) Returns an array of kernel weights corresponding to the chosen anchor for each user_item pair in the valid data """ return self.get_k(anchor_idx, self.valid_data) def get_test_k(self, anchor_idx): """ Get Kernel matrix of the test_data of a given anchor Parameters ---------- anchor_idx : Array-like, shape (2,) (user_id, item_id) of the anchor point Returns ------- np.ndarray, shape (N_ratings,) Returns an array of kernel weights corresponding to the chosen anchor for each user_item pair in the test data """ return self.get_k(anchor_idx, self.test_data)
Methods
def get_k(self, anchor_idx, user_item_data)
-
Returns the Kernel similarity between the anchor user_item pair and the user_item pairs in the user_item data
Parameters
anchor_idx
:Array-like, shape (2,)
- (user_id, item_id) of the anchor point
user_item_data
:Array-like, shape (N_ratings, >2)
- Array where first 2 columns are (user_id, item_id) pairs
Returns
np.ndarray, shape (N_ratings,)
- Returns an array of kernel weights corresponding to the chosen anchor for each user_item pair in the data
Expand source code
def get_k(self, anchor_idx, user_item_data): """Returns the Kernel similarity between the anchor user_item pair and the user_item pairs in the user_item data Parameters ---------- anchor_idx : Array-like, shape (2,) (user_id, item_id) of the anchor point user_item_data : Array-like, shape (N_ratings, >2) Array where first 2 columns are (user_id, item_id) pairs Returns ------- np.ndarray, shape (N_ratings,) Returns an array of kernel weights corresponding to the chosen anchor for each user_item pair in the data """ row_k = self.row_k col_k = self.col_k anchor_point = self.anchor_points[anchor_idx] anchor_uid = int(anchor_point[0]) anchor_iid = int(anchor_point[1]) user_ids = user_item_data[:, 0].astype(np.int64) item_ids = user_item_data[:, 1].astype(np.int64) return np.multiply(row_k[anchor_uid][user_ids], col_k[anchor_iid][item_ids])
def get_test_k(self, anchor_idx)
-
Get Kernel matrix of the test_data of a given anchor
Parameters
anchor_idx
:Array-like, shape (2,)
- (user_id, item_id) of the anchor point
Returns
np.ndarray, shape (N_ratings,)
- Returns an array of kernel weights corresponding to the chosen anchor for each user_item pair in the test data
Expand source code
def get_test_k(self, anchor_idx): """ Get Kernel matrix of the test_data of a given anchor Parameters ---------- anchor_idx : Array-like, shape (2,) (user_id, item_id) of the anchor point Returns ------- np.ndarray, shape (N_ratings,) Returns an array of kernel weights corresponding to the chosen anchor for each user_item pair in the test data """ return self.get_k(anchor_idx, self.test_data)
def get_train_k(self, anchor_idx)
-
Get Kernel matrix of the train_data of a given anchor
Parameters
anchor_idx
:Array-like, shape (2,)
- (user_id, item_id) of the anchor point
Returns
np.ndarray, shape (N_ratings,)
- Returns an array of kernel weights corresponding to the chosen anchor for each user_item pair in the train data
Expand source code
def get_train_k(self, anchor_idx): """ Get Kernel matrix of the train_data of a given anchor Parameters ---------- anchor_idx : Array-like, shape (2,) (user_id, item_id) of the anchor point Returns ------- np.ndarray, shape (N_ratings,) Returns an array of kernel weights corresponding to the chosen anchor for each user_item pair in the train data """ return self.get_k(anchor_idx, self.train_data)
def get_valid_k(self, anchor_idx)
-
Get Kernel matrix of the validation_data of a given anchor
Parameters
anchor_idx
:Array-like, shape (2,)
- (user_id, item_id) of the anchor point
Returns
np.ndarray, shape (N_ratings,)
- Returns an array of kernel weights corresponding to the chosen anchor for each user_item pair in the valid data
Expand source code
def get_valid_k(self, anchor_idx): """ Get Kernel matrix of the validation_data of a given anchor Parameters ---------- anchor_idx : Array-like, shape (2,) (user_id, item_id) of the anchor point Returns ------- np.ndarray, shape (N_ratings,) Returns an array of kernel weights corresponding to the chosen anchor for each user_item pair in the valid data """ return self.get_k(anchor_idx, self.valid_data)