Module reclab.recommenders.cfnade.cfnade

Implementation of the CF-NADE recommender using Keras.

Expand source code
"""Implementation of the CF-NADE recommender using Keras."""
from keras.layers import Input, Dropout, Lambda, add
from keras.models import Model
import keras.regularizers
from keras.optimizers import Adam
import numpy as np

from .cfnade_lib.nade import NADE
from .cfnade_lib import utils
from .. import recommender


class Cfnade(recommender.PredictRecommender):
    """
    A Neural Autoregressive Distribution Estimator (NADE) for collaborative filtering (CF) tasks.

    Parameters
    ---------
    num_users : int
        Number of users in the environment.
    num_items : int
        Number of items in the environment.
    train_set : np.matrix
        Matrix of shape (num_users, num_items) populated with user ratings.
    train_epoch : int
        Number of epochs to train for each call.
    batch_size : int
        Batch size during initial training phase.
    rating_bucket: int
        number of rating buckets
    rate_score: array of float
        An array of corresponding rating score for each bucket
    hidden_dim: int
        hidden dimension to construct the layer
    learning_rate: float
        learning rate

    """

    def __init__(
            self, num_users, num_items,
            batch_size=64, train_epoch=10,
            rating_bucket=5, hidden_dim=500,
            learning_rate=0.001, normalized_layer=False):
        """Create new Cfnade recommender."""
        super().__init__()
        self._num_users = num_users
        self._num_items = num_items
        self._batch_size = batch_size
        if num_items <= batch_size:
            self._batch_size = num_items
        self._input_dim0 = num_users
        self._rating_bucket = rating_bucket
        self._rate_score = np.array(np.arange(1, rating_bucket+1), np.float32)
        self._hidden_dim = hidden_dim
        self._learning_rate = learning_rate
        self._train_epoch = train_epoch
        self._hyperparameters.update(locals())
        self._new_items = np.zeros(num_items)

        # We only want the function arguments so remove class related objects.
        del self._hyperparameters['self']
        del self._hyperparameters['__class__']

        # Prepare model
        input_layer = Input(shape=(self._input_dim0, self._rating_bucket), name='input_ratings')
        output_ratings = Input(shape=(self._input_dim0, self._rating_bucket), name='output_ratings')
        input_masks = Input(shape=(self._input_dim0,), name='input_masks')
        output_masks = Input(shape=(self._input_dim0,), name='output_masks')
        nade_layer = Dropout(0.0)(input_layer)
        nade_layer = NADE(
                        hidden_dim=self._hidden_dim, activation='tanh', bias=True,
                        W_regularizer=keras.regularizers.l2(0.02),
                        V_regularizer=keras.regularizers.l2(0.02),
                        b_regularizer=keras.regularizers.l2(0.02),
                        c_regularizer=keras.regularizers.l2(0.02),
                        normalized_layer=normalized_layer)(nade_layer)

        predicted_ratings = Lambda(
            utils.prediction_layer,
            output_shape=utils.prediction_output_shape,
            name='predicted_ratings')(nade_layer)

        func_d = Lambda(
            utils.d_layer, output_shape=utils.d_output_shape,
            name='func_d')(input_masks)
        sum_masks = add([input_masks, output_masks])
        func_d_2 = Lambda(
            utils.D_layer, output_shape=utils.D_output_shape,
            name='func_d_2')(sum_masks)
        loss_out = Lambda(
            utils.rating_cost_lambda_func, output_shape=(1, ),
            name='nade_loss')([nade_layer, output_ratings,
                               input_masks, output_masks, func_d_2, func_d])

        self._cf_nade_model = Model(
            inputs=[input_layer, output_ratings, input_masks, output_masks],
            outputs=[loss_out, predicted_ratings])
        optimizer = Adam(self._learning_rate, 0.9, 0.999, 1e-8)
        self._cf_nade_model.compile(
            loss={'nade_loss': lambda y_true, y_pred: y_pred},
            optimizer=optimizer)
        self._cf_nade_model.save_weights('model.h5')

    @property
    def name(self):  # noqa: D102
        return 'cfnade'

    def update(self, users=None, items=None, ratings=None):  # noqa: D102
        super().update(users, items, ratings)
        self._cf_nade_model.load_weights('model.h5')

        ratings_matrix = self._ratings.toarray()
        ratings_matrix = np.around(ratings_matrix.transpose())
        ratings_matrix = ratings_matrix.astype(int)

        train_set = utils.DataSet(ratings_matrix,
                                  num_users=self._num_users,
                                  num_items=self._num_items,
                                  batch_size=self._batch_size,
                                  rating_bucket=self._rating_bucket,
                                  mode=0)
        self._cf_nade_model.fit_generator(train_set.generate(),
                                          steps_per_epoch=(self._num_items // self._batch_size),
                                          epochs=self._train_epoch,
                                          callbacks=[train_set], verbose=1)

    def _predict(self, user_item):  # noqa: D102
        ratings_matrix = self._ratings.toarray()
        ratings_matrix = np.around(ratings_matrix.transpose())
        ratings_matrix = ratings_matrix.astype(int)

        # keep track of unseen items in ratings
        ratings_matrix_total = ratings_matrix.transpose().sum(axis=1)
        self._new_items = np.where(ratings_matrix_total == 0)[0]

        test_set = utils.DataSet(ratings_matrix,
                                 num_users=self._num_users,
                                 num_items=self._num_items,
                                 batch_size=self._batch_size,
                                 rating_bucket=self._rating_bucket,
                                 mode=2)
        pred_rating = []
        for batch in test_set.generate():
            pred_matrix = self._cf_nade_model.predict(batch[0])[1]
            pred_rating_batch = pred_matrix * self._rate_score[np.newaxis, np.newaxis, :]
            pred_rating_batch = pred_rating_batch.sum(axis=2)
            pred_rating.append(pred_rating_batch)
        pred_rating = np.concatenate(pred_rating, axis=0)

        predictions = []
        for user, item, _ in user_item:
            if item in self._new_items:
                predictions.append(3)
            else:
                predictions.append(pred_rating[item, user])

        return np.array(predictions)

Classes

class Cfnade (num_users, num_items, batch_size=64, train_epoch=10, rating_bucket=5, hidden_dim=500, learning_rate=0.001, normalized_layer=False)

A Neural Autoregressive Distribution Estimator (NADE) for collaborative filtering (CF) tasks.

Parameters

num_users : int
Number of users in the environment.
num_items : int
Number of items in the environment.
train_set : np.matrix
Matrix of shape (num_users, num_items) populated with user ratings.
train_epoch : int
Number of epochs to train for each call.
batch_size : int
Batch size during initial training phase.
rating_bucket : int
number of rating buckets
rate_score : array of float
An array of corresponding rating score for each bucket
hidden_dim : int
hidden dimension to construct the layer
learning_rate : float
learning rate

Create new Cfnade recommender.

Expand source code
class Cfnade(recommender.PredictRecommender):
    """
    A Neural Autoregressive Distribution Estimator (NADE) for collaborative filtering (CF) tasks.

    Parameters
    ---------
    num_users : int
        Number of users in the environment.
    num_items : int
        Number of items in the environment.
    train_set : np.matrix
        Matrix of shape (num_users, num_items) populated with user ratings.
    train_epoch : int
        Number of epochs to train for each call.
    batch_size : int
        Batch size during initial training phase.
    rating_bucket: int
        number of rating buckets
    rate_score: array of float
        An array of corresponding rating score for each bucket
    hidden_dim: int
        hidden dimension to construct the layer
    learning_rate: float
        learning rate

    """

    def __init__(
            self, num_users, num_items,
            batch_size=64, train_epoch=10,
            rating_bucket=5, hidden_dim=500,
            learning_rate=0.001, normalized_layer=False):
        """Create new Cfnade recommender."""
        super().__init__()
        self._num_users = num_users
        self._num_items = num_items
        self._batch_size = batch_size
        if num_items <= batch_size:
            self._batch_size = num_items
        self._input_dim0 = num_users
        self._rating_bucket = rating_bucket
        self._rate_score = np.array(np.arange(1, rating_bucket+1), np.float32)
        self._hidden_dim = hidden_dim
        self._learning_rate = learning_rate
        self._train_epoch = train_epoch
        self._hyperparameters.update(locals())
        self._new_items = np.zeros(num_items)

        # We only want the function arguments so remove class related objects.
        del self._hyperparameters['self']
        del self._hyperparameters['__class__']

        # Prepare model
        input_layer = Input(shape=(self._input_dim0, self._rating_bucket), name='input_ratings')
        output_ratings = Input(shape=(self._input_dim0, self._rating_bucket), name='output_ratings')
        input_masks = Input(shape=(self._input_dim0,), name='input_masks')
        output_masks = Input(shape=(self._input_dim0,), name='output_masks')
        nade_layer = Dropout(0.0)(input_layer)
        nade_layer = NADE(
                        hidden_dim=self._hidden_dim, activation='tanh', bias=True,
                        W_regularizer=keras.regularizers.l2(0.02),
                        V_regularizer=keras.regularizers.l2(0.02),
                        b_regularizer=keras.regularizers.l2(0.02),
                        c_regularizer=keras.regularizers.l2(0.02),
                        normalized_layer=normalized_layer)(nade_layer)

        predicted_ratings = Lambda(
            utils.prediction_layer,
            output_shape=utils.prediction_output_shape,
            name='predicted_ratings')(nade_layer)

        func_d = Lambda(
            utils.d_layer, output_shape=utils.d_output_shape,
            name='func_d')(input_masks)
        sum_masks = add([input_masks, output_masks])
        func_d_2 = Lambda(
            utils.D_layer, output_shape=utils.D_output_shape,
            name='func_d_2')(sum_masks)
        loss_out = Lambda(
            utils.rating_cost_lambda_func, output_shape=(1, ),
            name='nade_loss')([nade_layer, output_ratings,
                               input_masks, output_masks, func_d_2, func_d])

        self._cf_nade_model = Model(
            inputs=[input_layer, output_ratings, input_masks, output_masks],
            outputs=[loss_out, predicted_ratings])
        optimizer = Adam(self._learning_rate, 0.9, 0.999, 1e-8)
        self._cf_nade_model.compile(
            loss={'nade_loss': lambda y_true, y_pred: y_pred},
            optimizer=optimizer)
        self._cf_nade_model.save_weights('model.h5')

    @property
    def name(self):  # noqa: D102
        return 'cfnade'

    def update(self, users=None, items=None, ratings=None):  # noqa: D102
        super().update(users, items, ratings)
        self._cf_nade_model.load_weights('model.h5')

        ratings_matrix = self._ratings.toarray()
        ratings_matrix = np.around(ratings_matrix.transpose())
        ratings_matrix = ratings_matrix.astype(int)

        train_set = utils.DataSet(ratings_matrix,
                                  num_users=self._num_users,
                                  num_items=self._num_items,
                                  batch_size=self._batch_size,
                                  rating_bucket=self._rating_bucket,
                                  mode=0)
        self._cf_nade_model.fit_generator(train_set.generate(),
                                          steps_per_epoch=(self._num_items // self._batch_size),
                                          epochs=self._train_epoch,
                                          callbacks=[train_set], verbose=1)

    def _predict(self, user_item):  # noqa: D102
        ratings_matrix = self._ratings.toarray()
        ratings_matrix = np.around(ratings_matrix.transpose())
        ratings_matrix = ratings_matrix.astype(int)

        # keep track of unseen items in ratings
        ratings_matrix_total = ratings_matrix.transpose().sum(axis=1)
        self._new_items = np.where(ratings_matrix_total == 0)[0]

        test_set = utils.DataSet(ratings_matrix,
                                 num_users=self._num_users,
                                 num_items=self._num_items,
                                 batch_size=self._batch_size,
                                 rating_bucket=self._rating_bucket,
                                 mode=2)
        pred_rating = []
        for batch in test_set.generate():
            pred_matrix = self._cf_nade_model.predict(batch[0])[1]
            pred_rating_batch = pred_matrix * self._rate_score[np.newaxis, np.newaxis, :]
            pred_rating_batch = pred_rating_batch.sum(axis=2)
            pred_rating.append(pred_rating_batch)
        pred_rating = np.concatenate(pred_rating, axis=0)

        predictions = []
        for user, item, _ in user_item:
            if item in self._new_items:
                predictions.append(3)
            else:
                predictions.append(pred_rating[item, user])

        return np.array(predictions)

Ancestors

Inherited members