Module `reclab.environments.environment`

Defines a set of base classes from which environments can inherit.

Environment is the interface all environments must implement. The other classes represent specific environment variants that occur often enough to be abstract classes to inherit from.

Expand source code

"""Defines a set of base classes from which environments can inherit.

Environment is the interface all environments must implement. The other classes represent
specific environment variants that occur often enough to be abstract classes to inherit from.
"""
import abc
import collections

import numpy as np
from scipy.stats import norm, lognorm, pareto


class Environment(abc.ABC):
    """The interface all environments must implement."""

    @abc.abstractmethod
    def reset(self):
        """Reset the environment to its original state. Must be called before the first step.

        Returns
        -------
        users : iterable
            New users and users whose information got updated this timestep.
        items : iterable
            New items and items whose information got updated this timestep.
        ratings : iterable
            New ratings and ratings whose information got updated this timestep.
        info : dict
            Extra information that can be used for debugging but should not be made accessible to
            the recommender.

        """
        raise NotImplementedError

    @abc.abstractmethod
    def step(self, recommendations):
        """Run one timestep of the environment.

        Parameters
        ----------
        recommendations : iterable
            The recommendations made to each user. The i-th recommendation should correspond to
            the i-th user that was online at this timestep.

        Returns
        -------
        users : iterable
            New users and users whose information got updated this timestep.
        items : iterable
            New items and items whose information got updated this timestep.
        ratings : iterable
            New ratings and ratings whose information got updated this timestep.
        info : dict
            Extra information that can be used for debugging but should not be made accessible to
            the recommender.

        """
        raise NotImplementedError

    @abc.abstractmethod
    def online_users(self):
        """Return the users that need a recommendation at the current timestep.

        Returns
        -------
        users : iterable
            The users that are online.

        """
        raise NotImplementedError

    @property
    @abc.abstractmethod
    def users(self):
        """Return all users currently in the environment.

        Returns
        -------
        users : iterable
            All users in the environment.

        """
        raise NotImplementedError

    @property
    @abc.abstractmethod
    def items(self):
        """Return all items currently in the environment.

        Returns
        -------
        items : iterable
            All items in the environment.

        """
        raise NotImplementedError

    @property
    @abc.abstractmethod
    def ratings(self):
        """Return all ratings that have been made in the environment.

        Returns
        -------
        ratings : iterable
            All ratings in the environment.

        """
        raise NotImplementedError

    @property
    @abc.abstractmethod
    def name(self):
        """Name of environment, used for saving."""
        raise NotImplementedError

    def seed(self, seed=None):
        """Set the seed the seed for this environment's random number generator(s)."""

    def close(self):
        """Perform any necessary cleanup."""

    def __exit__(self, *args):
        """Perform any necessary cleanup when the object goes out of context."""
        self.close()
        return False


class DictEnvironment(Environment):
    """An environment where data gets passed around as dictionaries.

    Environments can subclass this class by implementing name, true_ratings, _rate_item and
    _reset_state. Optionally environments can also implement _update_state, _rating_env, and
    _select_online_users.

    Parameters
    ----------
    rating_frequency : float
        The proportion of users that will need a recommendation at each step.
        Must be between 0 and 1.
    num_init_ratings : int
        The number of ratings available from the start. User-item pairs are randomly selected.
    memory : int
        The number of recent items a user remembers which affect the rating
    user_dist_choice : str
        The choice of user distribution for selecting online users. By default, the subset of
        online users is chosen from a uniform distribution. Currently supports normal and lognormal.

    """

    def __init__(self, rating_frequency=0.02, num_init_ratings=0, memory_length=0,
                 user_dist_choice='uniform'):
        """Create a Topics environment."""
        self._timestep = -1
        # The RandomState to use while initializing the environment.
        self._init_random = np.random.RandomState()
        # The RandomState to use after the environment is initialized.
        self._dynamics_random = np.random.RandomState()
        self._rating_frequency = rating_frequency
        self._num_init_ratings = num_init_ratings
        self._user_dist_choice = user_dist_choice
        self._users = None
        self._items = None
        self._ratings = None
        self._dense_ratings = None
        self._online_users = None
        self._user_prob = None
        self._user_histories = collections.defaultdict(list)
        self._memory_length = memory_length

    def reset(self):
        """Reset the environment to its original state. Must be called before the first step.

        Returns
        -------
        users : OrderedDict
            The initial users where the key represents the user id and the value represents
            the visible features associated with the user.
        items : OrderedDict
            The initial items where the key represents the item id and the value represents
            the visible features associated with the item.
        ratings : dict
            The initial ratings where the key is a double whose first element is the user id
            and the second element is the item id. The value represents the features associated
            with the setting in which the rating was made.

        """
        # Initialize the state of the environment.
        self._timestep = -1
        self._reset_state()
        self._user_histories = collections.defaultdict(list)
        num_users = len(self._users)
        num_items = len(self._items)
        self._user_prob = self._get_user_prob()

        # We will lazily compute dense ratings.
        self._dense_ratings = None

        # Fill the rating dict with initial data.
        idx_1d = self._init_random.choice(num_users * num_items, self._num_init_ratings,
                                          replace=False,
                                          p=np.repeat(self._user_prob, num_items) / num_items)
        user_ids = idx_1d // num_items
        item_ids = idx_1d % num_items
        self._ratings = {}
        for user_id, item_id in zip(user_ids, item_ids):
            # TODO: This is a hack, but I don't think we should necessarily put the burden
            # of having to implement a version of _rate_item that knows whether it's being called
            # in reset or not on people deriving from this class. Need to think of a better way
            # than doing this though.
            temp_random = self._dynamics_random
            self._dynamics_random = self._init_random
            self._ratings[user_id, item_id] = (self._rate_item(user_id, item_id),
                                               self._rating_context(user_id))
            self._dynamics_random = temp_random

        # Finally, set the users that will be online for the first step.
        self._online_users = self._select_online_users()

        self._timestep += 1
        return self._users.copy(), self._items.copy(), self._ratings.copy()

    def step(self, recommendations):
        """Run one timestep of the environment.

        Parameters
        ----------
        recommendations : np.ndarray
            The recommendations made to each user. recommendations[i] corresponds to the
            item id recommended to the i-th online user. This array must have the same size as
            the ordered dict returned by online_users.

        Returns
        -------
        users : OrderedDict
            The new users where the key represents the user id and the value represents
            the visible features associated with the user.
        items : OrderedDict
            The new items where the key represents the item id and the value represents
            the visible features associated with the item.
        ratings : dict
            The new ratings where the key is a double whose first element is the user id
            and the second element is the item id. The value represents the features associated
            with the setting in which the rating was made.
        info : dict
            Extra information for debugging and evaluation. info["users"] will return the dict
            of visible user states, info["items"] will return the dict of visible item states, and
            info["ratings"] gets the dict of all ratings.

        """
        assert len(recommendations) == len(self._online_users)
        new_users, new_items = self._update_state()
        # Old dense ratings are now invalid so set it to None and lazily recompute.
        self._dense_ratings = None

        # Get online users to rate the recommended items.
        ratings = {}
        for user_id, item_id in zip(self._online_users, recommendations):
            ratings[user_id, item_id] = (self._rate_item(user_id, item_id),
                                         self._rating_context(user_id))
            self._user_histories[user_id].append(item_id)
            if len(self._user_histories[user_id]) == self._memory_length + 1:
                self._user_histories[user_id].pop(0)
            assert len(self._user_histories[user_id]) <= self._memory_length

        self._ratings.update(ratings)

        # Update the online users.
        self._online_users = self._select_online_users()

        # Create the info dict.
        info = {'users': self._users,
                'items': self._items,
                'ratings': self._ratings}

        self._timestep += 1
        return new_users, new_items, ratings, info

    def online_users(self):
        """Return the users that need a recommendation at the current timestep.

        Returns
        -------
        users_contexts : OrderedDict
            The users that are online. The key is the user id and the value is the
            features that represent the context in which the rating will be made.

        """
        user_contexts = collections.OrderedDict()
        for user_id in self._online_users:
            user_contexts[user_id] = self._rating_context(user_id)
        return user_contexts

    @property
    def users(self):
        """Return all users currently in the environment.

        Returns
        -------
        users : OrderedDict
            All users in the environment, the key represents the user id and the value is the
            visible features associated with the user.

        """
        return self._users

    @property
    def items(self):
        """Return all items currently in the environment.

        Returns
        -------
        items : OrderedDict
            All items in the environment, the key represents the item id and the value is the
            visible features associated with the item.

        """
        return self._items

    @property
    def ratings(self):
        """Return all ratings that have been made in the environment.

        Returns
        -------
        ratings : dict
            All ratings where the key is a double whose first element is the user id
            and the second element is the item id. The value represents the features associated
            with the setting in which the rating was made.

        """
        return self._ratings

    @property
    def dense_ratings(self):
        """Return all the true ratings on every user-item pair at the current timestep.

        A true rating is defined as the rating a user would make with all noise removed.

        Returns
        -------
        dense_ratings : np.ndarray
            The array of all true ratings where true_ratings[i, j] is the rating by user i
            on item j.

        """
        if self._dense_ratings is None:
            self._dense_ratings = self._get_dense_ratings()
        return self._dense_ratings

    def seed(self, seed=None):
        """Set the seed for this environment's random number generator.

        Parameters
        ----------
        seed : int or tuple of int
            The seed for the random number generators. If seed is an int or a tuple of length 1 all
            random number generators will be initialized with that seed. If it is a tuple of length
            2 the random number generator for the initial state of the environment will be
            initialized with seed[0] and the random number generator for the environment dynamics
            will be initialized with seed[1].

        """
        if seed is None or np.issubdtype(type(seed), np.integer):
            self._init_random.seed(seed)
            self._dynamics_random.seed(seed)
        elif len(seed) == 1:
            self._init_random.seed(seed[0])
            self._dynamics_random.seed(seed[0])
        else:
            self._init_random.seed(seed[0])
            self._dynamics_random.seed(seed[1])

    @abc.abstractmethod
    def _get_dense_ratings(self):
        """Compute all the true ratings on every user-item pair at the current timestep.

        A true rating is defined as the rating a user would make with all noise removed.

        Returns
        -------
        dense_ratings : np.ndarray
            The array of all true ratings where true_ratings[i, j] is the rating by user i
            on item j.

        """
        raise NotImplementedError

    @abc.abstractmethod
    def _rate_item(self, user_id, item_id):
        """Get a user to rate an item and update the internal rating state.

        Parameters
        ----------
        user_id : int
            The id of the user making the rating.
        item_id : int
            The id of the item being rated.

        Returns
        -------
        rating : float
            The rating the item was given by the user.

        """
        raise NotImplementedError

    @abc.abstractmethod
    def _reset_state(self):
        """Reset the state associated with users and items."""
        raise NotImplementedError

    def _update_state(self):  # pylint: disable=no-self-use
        """Update the state associated with users and items.

        The default implementation assumes there is no state that ever gets updated and no new
        users and items ever get added to the environment after being reset. If this is untrue
        you must override this function.

        Returns
        -------
        new_users : OrderedDict
            The newly added users. The key represents the user id and the value
            represents the visible features of the user.
        new_items : OrderedDict
            The newly added items. The key represents the user id and the value
            represents the visible features of the user.

        """
        return collections.OrderedDict(), collections.OrderedDict()

    def _rating_context(self, user_id):  # pylint: disable=no-self-use, unused-argument
        """Get the visible features of the context that the user will make the rating in.

        The default implementation assumes there are no visible features associated with each
        rating. If this is untrue you must override this function.

        Parameters
        ----------
        user_id : int
            The id of the user that will be rating an item.

        Returns
        -------
        context : np.ndarray
            The vector that represents the visible features of the context in which the given user
            will consume and rate the content.

        """
        return np.zeros(0)

    def _get_user_prob(self):
        """Get the probability distribution for choosing online users at each timestep.

        The default assumes that users are drawn at uniform. To modify, change the parameters
        _user_dist when initializing the environment.

        """
        dist_choice = self._user_dist_choice
        num_users = len(self._users)

        if dist_choice == 'uniform':
            user_dist = np.ones(num_users) / num_users
        elif dist_choice == 'norm':
            idx = np.random.permutation(num_users)
            user_dist = np.array([norm.pdf(idx[i], scale=num_users / 7, loc=num_users / 2)
                                  for i in range(num_users)])
            user_dist = user_dist / sum(user_dist)
            user_dist = np.clip(user_dist, 0, 1)
        elif dist_choice == 'lognorm':
            idx = np.random.permutation(num_users)
            user_dist = np.array([lognorm.pdf(idx[i], 1, scale=num_users / 7, loc=-1)
                                  for i in range(num_users)])
            user_dist = user_dist / sum(user_dist)
            user_dist = np.clip(user_dist, 0, 1)
        elif dist_choice == 'pareto':
            idx = np.random.permutation(num_users)
            user_dist = np.array([pareto.pdf(idx[i], 1, scale=num_users / 1e4, loc=-1)
                                  for i in range(num_users)])
            user_dist = user_dist / sum(user_dist)
            user_dist = np.clip(user_dist, 0, 1)
        else:
            raise ValueError('user distribution name not recognized')

        return user_dist

    def _select_online_users(self):
        """Select the online users at this timestep.

        Returns
        -------
        online_users : np.ndarray
            The ids of all users that are online.

        """
        num_users = len(self._users)
        num_online = int(self._rating_frequency * num_users)
        return self._dynamics_random.choice(num_users, size=num_online,
                                            replace=False, p=self._user_prob)

Classes

class DictEnvironment (rating_frequency=0.02, num_init_ratings=0, memory_length=0, user_dist_choice='uniform')

An environment where data gets passed around as dictionaries.

Environments can subclass this class by implementing name, true_ratings, _rate_item and _reset_state. Optionally environments can also implement _update_state, _rating_env, and _select_online_users.

Parameters

rating_frequency : float: The proportion of users that will need a recommendation at each step. Must be between 0 and 1.
num_init_ratings : int: The number of ratings available from the start. User-item pairs are randomly selected.
memory : int: The number of recent items a user remembers which affect the rating
user_dist_choice : str: The choice of user distribution for selecting online users. By default, the subset of online users is chosen from a uniform distribution. Currently supports normal and lognormal.

Create a Topics environment.

Expand source code

class DictEnvironment(Environment):
    """An environment where data gets passed around as dictionaries.

    Environments can subclass this class by implementing name, true_ratings, _rate_item and
    _reset_state. Optionally environments can also implement _update_state, _rating_env, and
    _select_online_users.

    Parameters
    ----------
    rating_frequency : float
        The proportion of users that will need a recommendation at each step.
        Must be between 0 and 1.
    num_init_ratings : int
        The number of ratings available from the start. User-item pairs are randomly selected.
    memory : int
        The number of recent items a user remembers which affect the rating
    user_dist_choice : str
        The choice of user distribution for selecting online users. By default, the subset of
        online users is chosen from a uniform distribution. Currently supports normal and lognormal.

    """

    def __init__(self, rating_frequency=0.02, num_init_ratings=0, memory_length=0,
                 user_dist_choice='uniform'):
        """Create a Topics environment."""
        self._timestep = -1
        # The RandomState to use while initializing the environment.
        self._init_random = np.random.RandomState()
        # The RandomState to use after the environment is initialized.
        self._dynamics_random = np.random.RandomState()
        self._rating_frequency = rating_frequency
        self._num_init_ratings = num_init_ratings
        self._user_dist_choice = user_dist_choice
        self._users = None
        self._items = None
        self._ratings = None
        self._dense_ratings = None
        self._online_users = None
        self._user_prob = None
        self._user_histories = collections.defaultdict(list)
        self._memory_length = memory_length

    def reset(self):
        """Reset the environment to its original state. Must be called before the first step.

        Returns
        -------
        users : OrderedDict
            The initial users where the key represents the user id and the value represents
            the visible features associated with the user.
        items : OrderedDict
            The initial items where the key represents the item id and the value represents
            the visible features associated with the item.
        ratings : dict
            The initial ratings where the key is a double whose first element is the user id
            and the second element is the item id. The value represents the features associated
            with the setting in which the rating was made.

        """
        # Initialize the state of the environment.
        self._timestep = -1
        self._reset_state()
        self._user_histories = collections.defaultdict(list)
        num_users = len(self._users)
        num_items = len(self._items)
        self._user_prob = self._get_user_prob()

        # We will lazily compute dense ratings.
        self._dense_ratings = None

        # Fill the rating dict with initial data.
        idx_1d = self._init_random.choice(num_users * num_items, self._num_init_ratings,
                                          replace=False,
                                          p=np.repeat(self._user_prob, num_items) / num_items)
        user_ids = idx_1d // num_items
        item_ids = idx_1d % num_items
        self._ratings = {}
        for user_id, item_id in zip(user_ids, item_ids):
            # TODO: This is a hack, but I don't think we should necessarily put the burden
            # of having to implement a version of _rate_item that knows whether it's being called
            # in reset or not on people deriving from this class. Need to think of a better way
            # than doing this though.
            temp_random = self._dynamics_random
            self._dynamics_random = self._init_random
            self._ratings[user_id, item_id] = (self._rate_item(user_id, item_id),
                                               self._rating_context(user_id))
            self._dynamics_random = temp_random

        # Finally, set the users that will be online for the first step.
        self._online_users = self._select_online_users()

        self._timestep += 1
        return self._users.copy(), self._items.copy(), self._ratings.copy()

    def step(self, recommendations):
        """Run one timestep of the environment.

        Parameters
        ----------
        recommendations : np.ndarray
            The recommendations made to each user. recommendations[i] corresponds to the
            item id recommended to the i-th online user. This array must have the same size as
            the ordered dict returned by online_users.

        Returns
        -------
        users : OrderedDict
            The new users where the key represents the user id and the value represents
            the visible features associated with the user.
        items : OrderedDict
            The new items where the key represents the item id and the value represents
            the visible features associated with the item.
        ratings : dict
            The new ratings where the key is a double whose first element is the user id
            and the second element is the item id. The value represents the features associated
            with the setting in which the rating was made.
        info : dict
            Extra information for debugging and evaluation. info["users"] will return the dict
            of visible user states, info["items"] will return the dict of visible item states, and
            info["ratings"] gets the dict of all ratings.

        """
        assert len(recommendations) == len(self._online_users)
        new_users, new_items = self._update_state()
        # Old dense ratings are now invalid so set it to None and lazily recompute.
        self._dense_ratings = None

        # Get online users to rate the recommended items.
        ratings = {}
        for user_id, item_id in zip(self._online_users, recommendations):
            ratings[user_id, item_id] = (self._rate_item(user_id, item_id),
                                         self._rating_context(user_id))
            self._user_histories[user_id].append(item_id)
            if len(self._user_histories[user_id]) == self._memory_length + 1:
                self._user_histories[user_id].pop(0)
            assert len(self._user_histories[user_id]) <= self._memory_length

        self._ratings.update(ratings)

        # Update the online users.
        self._online_users = self._select_online_users()

        # Create the info dict.
        info = {'users': self._users,
                'items': self._items,
                'ratings': self._ratings}

        self._timestep += 1
        return new_users, new_items, ratings, info

    def online_users(self):
        """Return the users that need a recommendation at the current timestep.

        Returns
        -------
        users_contexts : OrderedDict
            The users that are online. The key is the user id and the value is the
            features that represent the context in which the rating will be made.

        """
        user_contexts = collections.OrderedDict()
        for user_id in self._online_users:
            user_contexts[user_id] = self._rating_context(user_id)
        return user_contexts

    @property
    def users(self):
        """Return all users currently in the environment.

        Returns
        -------
        users : OrderedDict
            All users in the environment, the key represents the user id and the value is the
            visible features associated with the user.

        """
        return self._users

    @property
    def items(self):
        """Return all items currently in the environment.

        Returns
        -------
        items : OrderedDict
            All items in the environment, the key represents the item id and the value is the
            visible features associated with the item.

        """
        return self._items

    @property
    def ratings(self):
        """Return all ratings that have been made in the environment.

        Returns
        -------
        ratings : dict
            All ratings where the key is a double whose first element is the user id
            and the second element is the item id. The value represents the features associated
            with the setting in which the rating was made.

        """
        return self._ratings

    @property
    def dense_ratings(self):
        """Return all the true ratings on every user-item pair at the current timestep.

        A true rating is defined as the rating a user would make with all noise removed.

        Returns
        -------
        dense_ratings : np.ndarray
            The array of all true ratings where true_ratings[i, j] is the rating by user i
            on item j.

        """
        if self._dense_ratings is None:
            self._dense_ratings = self._get_dense_ratings()
        return self._dense_ratings

    def seed(self, seed=None):
        """Set the seed for this environment's random number generator.

        Parameters
        ----------
        seed : int or tuple of int
            The seed for the random number generators. If seed is an int or a tuple of length 1 all
            random number generators will be initialized with that seed. If it is a tuple of length
            2 the random number generator for the initial state of the environment will be
            initialized with seed[0] and the random number generator for the environment dynamics
            will be initialized with seed[1].

        """
        if seed is None or np.issubdtype(type(seed), np.integer):
            self._init_random.seed(seed)
            self._dynamics_random.seed(seed)
        elif len(seed) == 1:
            self._init_random.seed(seed[0])
            self._dynamics_random.seed(seed[0])
        else:
            self._init_random.seed(seed[0])
            self._dynamics_random.seed(seed[1])

    @abc.abstractmethod
    def _get_dense_ratings(self):
        """Compute all the true ratings on every user-item pair at the current timestep.

        A true rating is defined as the rating a user would make with all noise removed.

        Returns
        -------
        dense_ratings : np.ndarray
            The array of all true ratings where true_ratings[i, j] is the rating by user i
            on item j.

        """
        raise NotImplementedError

    @abc.abstractmethod
    def _rate_item(self, user_id, item_id):
        """Get a user to rate an item and update the internal rating state.

        Parameters
        ----------
        user_id : int
            The id of the user making the rating.
        item_id : int
            The id of the item being rated.

        Returns
        -------
        rating : float
            The rating the item was given by the user.

        """
        raise NotImplementedError

    @abc.abstractmethod
    def _reset_state(self):
        """Reset the state associated with users and items."""
        raise NotImplementedError

    def _update_state(self):  # pylint: disable=no-self-use
        """Update the state associated with users and items.

        The default implementation assumes there is no state that ever gets updated and no new
        users and items ever get added to the environment after being reset. If this is untrue
        you must override this function.

        Returns
        -------
        new_users : OrderedDict
            The newly added users. The key represents the user id and the value
            represents the visible features of the user.
        new_items : OrderedDict
            The newly added items. The key represents the user id and the value
            represents the visible features of the user.

        """
        return collections.OrderedDict(), collections.OrderedDict()

    def _rating_context(self, user_id):  # pylint: disable=no-self-use, unused-argument
        """Get the visible features of the context that the user will make the rating in.

        The default implementation assumes there are no visible features associated with each
        rating. If this is untrue you must override this function.

        Parameters
        ----------
        user_id : int
            The id of the user that will be rating an item.

        Returns
        -------
        context : np.ndarray
            The vector that represents the visible features of the context in which the given user
            will consume and rate the content.

        """
        return np.zeros(0)

    def _get_user_prob(self):
        """Get the probability distribution for choosing online users at each timestep.

        The default assumes that users are drawn at uniform. To modify, change the parameters
        _user_dist when initializing the environment.

        """
        dist_choice = self._user_dist_choice
        num_users = len(self._users)

        if dist_choice == 'uniform':
            user_dist = np.ones(num_users) / num_users
        elif dist_choice == 'norm':
            idx = np.random.permutation(num_users)
            user_dist = np.array([norm.pdf(idx[i], scale=num_users / 7, loc=num_users / 2)
                                  for i in range(num_users)])
            user_dist = user_dist / sum(user_dist)
            user_dist = np.clip(user_dist, 0, 1)
        elif dist_choice == 'lognorm':
            idx = np.random.permutation(num_users)
            user_dist = np.array([lognorm.pdf(idx[i], 1, scale=num_users / 7, loc=-1)
                                  for i in range(num_users)])
            user_dist = user_dist / sum(user_dist)
            user_dist = np.clip(user_dist, 0, 1)
        elif dist_choice == 'pareto':
            idx = np.random.permutation(num_users)
            user_dist = np.array([pareto.pdf(idx[i], 1, scale=num_users / 1e4, loc=-1)
                                  for i in range(num_users)])
            user_dist = user_dist / sum(user_dist)
            user_dist = np.clip(user_dist, 0, 1)
        else:
            raise ValueError('user distribution name not recognized')

        return user_dist

    def _select_online_users(self):
        """Select the online users at this timestep.

        Returns
        -------
        online_users : np.ndarray
            The ids of all users that are online.

        """
        num_users = len(self._users)
        num_online = int(self._rating_frequency * num_users)
        return self._dynamics_random.choice(num_users, size=num_online,
                                            replace=False, p=self._user_prob)

Ancestors

Environment
abc.ABC

Subclasses

Instance variables

var dense_ratings

Return all the true ratings on every user-item pair at the current timestep.

A true rating is defined as the rating a user would make with all noise removed.

Returns

dense_ratings : np.ndarray: The array of all true ratings where true_ratings[i, j] is the rating by user i on item j.

Expand source code

@property
def dense_ratings(self):
    """Return all the true ratings on every user-item pair at the current timestep.

    A true rating is defined as the rating a user would make with all noise removed.

    Returns
    -------
    dense_ratings : np.ndarray
        The array of all true ratings where true_ratings[i, j] is the rating by user i
        on item j.

    """
    if self._dense_ratings is None:
        self._dense_ratings = self._get_dense_ratings()
    return self._dense_ratings

var items

Return all items currently in the environment.

Returns

items : OrderedDict: All items in the environment, the key represents the item id and the value is the visible features associated with the item.

Expand source code

@property
def items(self):
    """Return all items currently in the environment.

    Returns
    -------
    items : OrderedDict
        All items in the environment, the key represents the item id and the value is the
        visible features associated with the item.

    """
    return self._items

var ratings

Return all ratings that have been made in the environment.

Returns

ratings : dict: All ratings where the key is a double whose first element is the user id and the second element is the item id. The value represents the features associated with the setting in which the rating was made.

Expand source code

@property
def ratings(self):
    """Return all ratings that have been made in the environment.

    Returns
    -------
    ratings : dict
        All ratings where the key is a double whose first element is the user id
        and the second element is the item id. The value represents the features associated
        with the setting in which the rating was made.

    """
    return self._ratings

var users

Return all users currently in the environment.

Returns

users : OrderedDict: All users in the environment, the key represents the user id and the value is the visible features associated with the user.

Expand source code

@property
def users(self):
    """Return all users currently in the environment.

    Returns
    -------
    users : OrderedDict
        All users in the environment, the key represents the user id and the value is the
        visible features associated with the user.

    """
    return self._users

Methods

def online_users(self)

Return the users that need a recommendation at the current timestep.

Returns

users_contexts : OrderedDict: The users that are online. The key is the user id and the value is the features that represent the context in which the rating will be made.

Expand source code

def online_users(self):
    """Return the users that need a recommendation at the current timestep.

    Returns
    -------
    users_contexts : OrderedDict
        The users that are online. The key is the user id and the value is the
        features that represent the context in which the rating will be made.

    """
    user_contexts = collections.OrderedDict()
    for user_id in self._online_users:
        user_contexts[user_id] = self._rating_context(user_id)
    return user_contexts

def reset(self)

Reset the environment to its original state. Must be called before the first step.

Returns

users : OrderedDict: The initial users where the key represents the user id and the value represents the visible features associated with the user.
items : OrderedDict: The initial items where the key represents the item id and the value represents the visible features associated with the item.
ratings : dict: The initial ratings where the key is a double whose first element is the user id and the second element is the item id. The value represents the features associated with the setting in which the rating was made.

Expand source code

def reset(self):
    """Reset the environment to its original state. Must be called before the first step.

    Returns
    -------
    users : OrderedDict
        The initial users where the key represents the user id and the value represents
        the visible features associated with the user.
    items : OrderedDict
        The initial items where the key represents the item id and the value represents
        the visible features associated with the item.
    ratings : dict
        The initial ratings where the key is a double whose first element is the user id
        and the second element is the item id. The value represents the features associated
        with the setting in which the rating was made.

    """
    # Initialize the state of the environment.
    self._timestep = -1
    self._reset_state()
    self._user_histories = collections.defaultdict(list)
    num_users = len(self._users)
    num_items = len(self._items)
    self._user_prob = self._get_user_prob()

    # We will lazily compute dense ratings.
    self._dense_ratings = None

    # Fill the rating dict with initial data.
    idx_1d = self._init_random.choice(num_users * num_items, self._num_init_ratings,
                                      replace=False,
                                      p=np.repeat(self._user_prob, num_items) / num_items)
    user_ids = idx_1d // num_items
    item_ids = idx_1d % num_items
    self._ratings = {}
    for user_id, item_id in zip(user_ids, item_ids):
        # TODO: This is a hack, but I don't think we should necessarily put the burden
        # of having to implement a version of _rate_item that knows whether it's being called
        # in reset or not on people deriving from this class. Need to think of a better way
        # than doing this though.
        temp_random = self._dynamics_random
        self._dynamics_random = self._init_random
        self._ratings[user_id, item_id] = (self._rate_item(user_id, item_id),
                                           self._rating_context(user_id))
        self._dynamics_random = temp_random

    # Finally, set the users that will be online for the first step.
    self._online_users = self._select_online_users()

    self._timestep += 1
    return self._users.copy(), self._items.copy(), self._ratings.copy()

def seed(self, seed=None)

Set the seed for this environment's random number generator.

Parameters

seed : int or tuple of int: The seed for the random number generators. If seed is an int or a tuple of length 1 all random number generators will be initialized with that seed. If it is a tuple of length 2 the random number generator for the initial state of the environment will be initialized with seed[0] and the random number generator for the environment dynamics will be initialized with seed[1].

Expand source code

def seed(self, seed=None):
    """Set the seed for this environment's random number generator.

    Parameters
    ----------
    seed : int or tuple of int
        The seed for the random number generators. If seed is an int or a tuple of length 1 all
        random number generators will be initialized with that seed. If it is a tuple of length
        2 the random number generator for the initial state of the environment will be
        initialized with seed[0] and the random number generator for the environment dynamics
        will be initialized with seed[1].

    """
    if seed is None or np.issubdtype(type(seed), np.integer):
        self._init_random.seed(seed)
        self._dynamics_random.seed(seed)
    elif len(seed) == 1:
        self._init_random.seed(seed[0])
        self._dynamics_random.seed(seed[0])
    else:
        self._init_random.seed(seed[0])
        self._dynamics_random.seed(seed[1])

def step(self, recommendations)

Run one timestep of the environment.

Parameters

recommendations : np.ndarray: The recommendations made to each user. recommendations[i] corresponds to the item id recommended to the i-th online user. This array must have the same size as the ordered dict returned by online_users.

Returns

users : OrderedDict: The new users where the key represents the user id and the value represents the visible features associated with the user.
items : OrderedDict: The new items where the key represents the item id and the value represents the visible features associated with the item.
ratings : dict: The new ratings where the key is a double whose first element is the user id and the second element is the item id. The value represents the features associated with the setting in which the rating was made.
info : dict: Extra information for debugging and evaluation. info["users"] will return the dict of visible user states, info["items"] will return the dict of visible item states, and info["ratings"] gets the dict of all ratings.

Expand source code

def step(self, recommendations):
    """Run one timestep of the environment.

    Parameters
    ----------
    recommendations : np.ndarray
        The recommendations made to each user. recommendations[i] corresponds to the
        item id recommended to the i-th online user. This array must have the same size as
        the ordered dict returned by online_users.

    Returns
    -------
    users : OrderedDict
        The new users where the key represents the user id and the value represents
        the visible features associated with the user.
    items : OrderedDict
        The new items where the key represents the item id and the value represents
        the visible features associated with the item.
    ratings : dict
        The new ratings where the key is a double whose first element is the user id
        and the second element is the item id. The value represents the features associated
        with the setting in which the rating was made.
    info : dict
        Extra information for debugging and evaluation. info["users"] will return the dict
        of visible user states, info["items"] will return the dict of visible item states, and
        info["ratings"] gets the dict of all ratings.

    """
    assert len(recommendations) == len(self._online_users)
    new_users, new_items = self._update_state()
    # Old dense ratings are now invalid so set it to None and lazily recompute.
    self._dense_ratings = None

    # Get online users to rate the recommended items.
    ratings = {}
    for user_id, item_id in zip(self._online_users, recommendations):
        ratings[user_id, item_id] = (self._rate_item(user_id, item_id),
                                     self._rating_context(user_id))
        self._user_histories[user_id].append(item_id)
        if len(self._user_histories[user_id]) == self._memory_length + 1:
            self._user_histories[user_id].pop(0)
        assert len(self._user_histories[user_id]) <= self._memory_length

    self._ratings.update(ratings)

    # Update the online users.
    self._online_users = self._select_online_users()

    # Create the info dict.
    info = {'users': self._users,
            'items': self._items,
            'ratings': self._ratings}

    self._timestep += 1
    return new_users, new_items, ratings, info

Inherited members

Environment:
- close
- name

class Environment

The interface all environments must implement.

Expand source code

class Environment(abc.ABC):
    """The interface all environments must implement."""

    @abc.abstractmethod
    def reset(self):
        """Reset the environment to its original state. Must be called before the first step.

        Returns
        -------
        users : iterable
            New users and users whose information got updated this timestep.
        items : iterable
            New items and items whose information got updated this timestep.
        ratings : iterable
            New ratings and ratings whose information got updated this timestep.
        info : dict
            Extra information that can be used for debugging but should not be made accessible to
            the recommender.

        """
        raise NotImplementedError

    @abc.abstractmethod
    def step(self, recommendations):
        """Run one timestep of the environment.

        Parameters
        ----------
        recommendations : iterable
            The recommendations made to each user. The i-th recommendation should correspond to
            the i-th user that was online at this timestep.

        Returns
        -------
        users : iterable
            New users and users whose information got updated this timestep.
        items : iterable
            New items and items whose information got updated this timestep.
        ratings : iterable
            New ratings and ratings whose information got updated this timestep.
        info : dict
            Extra information that can be used for debugging but should not be made accessible to
            the recommender.

        """
        raise NotImplementedError

    @abc.abstractmethod
    def online_users(self):
        """Return the users that need a recommendation at the current timestep.

        Returns
        -------
        users : iterable
            The users that are online.

        """
        raise NotImplementedError

    @property
    @abc.abstractmethod
    def users(self):
        """Return all users currently in the environment.

        Returns
        -------
        users : iterable
            All users in the environment.

        """
        raise NotImplementedError

    @property
    @abc.abstractmethod
    def items(self):
        """Return all items currently in the environment.

        Returns
        -------
        items : iterable
            All items in the environment.

        """
        raise NotImplementedError

    @property
    @abc.abstractmethod
    def ratings(self):
        """Return all ratings that have been made in the environment.

        Returns
        -------
        ratings : iterable
            All ratings in the environment.

        """
        raise NotImplementedError

    @property
    @abc.abstractmethod
    def name(self):
        """Name of environment, used for saving."""
        raise NotImplementedError

    def seed(self, seed=None):
        """Set the seed the seed for this environment's random number generator(s)."""

    def close(self):
        """Perform any necessary cleanup."""

    def __exit__(self, *args):
        """Perform any necessary cleanup when the object goes out of context."""
        self.close()
        return False

Ancestors

abc.ABC

Subclasses

DictEnvironment

Instance variables

var items

Return all items currently in the environment.

Returns

items : iterable: All items in the environment.

Expand source code

@property
@abc.abstractmethod
def items(self):
    """Return all items currently in the environment.

    Returns
    -------
    items : iterable
        All items in the environment.

    """
    raise NotImplementedError

var name

Name of environment, used for saving.

Expand source code

@property
@abc.abstractmethod
def name(self):
    """Name of environment, used for saving."""
    raise NotImplementedError

var ratings

Return all ratings that have been made in the environment.

Returns

ratings : iterable: All ratings in the environment.

Expand source code

@property
@abc.abstractmethod
def ratings(self):
    """Return all ratings that have been made in the environment.

    Returns
    -------
    ratings : iterable
        All ratings in the environment.

    """
    raise NotImplementedError

var users

Return all users currently in the environment.

Returns

users : iterable: All users in the environment.

Expand source code

@property
@abc.abstractmethod
def users(self):
    """Return all users currently in the environment.

    Returns
    -------
    users : iterable
        All users in the environment.

    """
    raise NotImplementedError

Methods

def close(self)

Perform any necessary cleanup.

Expand source code

def close(self):
    """Perform any necessary cleanup."""

def online_users(self)

Return the users that need a recommendation at the current timestep.

Returns

users : iterable: The users that are online.

Expand source code

@abc.abstractmethod
def online_users(self):
    """Return the users that need a recommendation at the current timestep.

    Returns
    -------
    users : iterable
        The users that are online.

    """
    raise NotImplementedError

def reset(self)

Reset the environment to its original state. Must be called before the first step.

Returns

users : iterable: New users and users whose information got updated this timestep.
items : iterable: New items and items whose information got updated this timestep.
ratings : iterable: New ratings and ratings whose information got updated this timestep.
info : dict: Extra information that can be used for debugging but should not be made accessible to the recommender.

Expand source code

@abc.abstractmethod
def reset(self):
    """Reset the environment to its original state. Must be called before the first step.

    Returns
    -------
    users : iterable
        New users and users whose information got updated this timestep.
    items : iterable
        New items and items whose information got updated this timestep.
    ratings : iterable
        New ratings and ratings whose information got updated this timestep.
    info : dict
        Extra information that can be used for debugging but should not be made accessible to
        the recommender.

    """
    raise NotImplementedError

def seed(self, seed=None)

Set the seed the seed for this environment's random number generator(s).

Expand source code

def seed(self, seed=None):
    """Set the seed the seed for this environment's random number generator(s)."""

def step(self, recommendations)

Run one timestep of the environment.

Parameters

recommendations : iterable: The recommendations made to each user. The i-th recommendation should correspond to the i-th user that was online at this timestep.

Returns

users : iterable: New users and users whose information got updated this timestep.
items : iterable: New items and items whose information got updated this timestep.
ratings : iterable: New ratings and ratings whose information got updated this timestep.
info : dict: Extra information that can be used for debugging but should not be made accessible to the recommender.

Expand source code

@abc.abstractmethod
def step(self, recommendations):
    """Run one timestep of the environment.

    Parameters
    ----------
    recommendations : iterable
        The recommendations made to each user. The i-th recommendation should correspond to
        the i-th user that was online at this timestep.

    Returns
    -------
    users : iterable
        New users and users whose information got updated this timestep.
    items : iterable
        New items and items whose information got updated this timestep.
    ratings : iterable
        New ratings and ratings whose information got updated this timestep.
    info : dict
        Extra information that can be used for debugging but should not be made accessible to
        the recommender.

    """
    raise NotImplementedError