Module `reclab.environments.topics`

Contains the implementation for the Topics environment.

In this environment users have a hidden preference for each topic and each item has a hidden topic assigned to it.

Expand source code

"""Contains the implementation for the Topics environment.

In this environment users have a hidden preference for each topic and each item has a
hidden topic assigned to it.
"""
import collections
import numpy as np

from . import environment


class Topics(environment.DictEnvironment):
    """An environment where items have a single topic and users prefer certain topics.

    The user preference for any given topic is initialized as Unif(0.5, 5.5) while
    topics are uniformly assigned to items. Users will rate items as clip(p + e, 0, 5)
    where p is their preference for a given topic and e ~ N(0, self._noise). Users will
    also have a changing preference for topics they get recommended based on the topic_change
    parameter.

    Parameters
    ----------
    num_topics : int
        The number of topics items can be assigned to.
    num_users : int
        The number of users in the environment.
    num_items : int
        The number of items in the environment.
    rating_frequency : float
        The proportion of users that will need a recommendation at each step.
        Must be between 0 and 1.
    num_init_ratings : int
        The number of ratings available from the start. User-item pairs are randomly selected.
    noise : float
        The standard deviation of the noise added to ratings.
    topic_change : float
        How much the user's preference for a topic changes each time that topic is recommended
        to them. The negative of topic_change gets split across all other topics as well.
    memory_length : int
        The number of recent topics a user remembers which affect the rating
    boredom_threshold : int
        The number of times a topics has to be seen within the memory to gain a
        penalty.
    boredom_penalty : float
        The penalty on the rating when a user is bored
    user_dist_choice : str
        The choice of user distribution for selecting online users. By default, the subset of
        online users is chosen from a uniform distribution. Currently supports normal and lognormal.
    shift_steps : int
        The number of timesteps to wait between each user preference shift.
    shift_frequency : float
        The proportion of users whose preference we wish to change during a preference shift.
    shift_weight : float
        The weight to assign to a user's new preferences after a preference shift.
        User's old preferences get assigned a weight of 1 - shift_weight.

    """

    def __init__(self,
                 num_topics,
                 num_users,
                 num_items,
                 rating_frequency=1.0,
                 num_init_ratings=0,
                 noise=0.0,
                 topic_change=0.0,
                 memory_length=0,
                 boredom_threshold=0,
                 boredom_penalty=0.0,
                 user_dist_choice='uniform',
                 shift_steps=1,
                 shift_frequency=0.0,
                 shift_weight=0.0):
        """Create a Topics environment."""
        super().__init__(rating_frequency, num_init_ratings, memory_length, user_dist_choice)
        self._num_topics = num_topics
        self._num_users = num_users
        self._num_items = num_items
        self._topic_change = topic_change
        self._noise = noise
        self._user_preferences = None
        self._item_topics = None
        self._boredom_threshold = boredom_threshold
        self._boredom_penalty = boredom_penalty
        self._shift_steps = shift_steps
        self._shift_frequency = shift_frequency
        self._shift_weight = shift_weight

    @property
    def name(self):  # noqa: D102
        return 'topics'

    def _get_dense_ratings(self):  # noqa: D102
        ratings = np.zeros([self._num_users, self._num_items])
        for item_id in range(self._num_items):
            topic = self._item_topics[item_id]
            ratings[:, item_id] = self._user_preferences[:, topic]

        # Account for boredom.
        for user_id in range(self._num_users):
            recent_topics = [self._item_topics[item] for item in self._user_histories[user_id]]
            recent_topics, counts = np.unique(recent_topics, return_counts=True)
            recent_topics = recent_topics[counts > self._boredom_threshold]
            for topic_id in recent_topics:
                ratings[user_id, self._item_topics == topic_id] -= self._boredom_penalty

        return ratings

    def _get_rating(self, user_id, item_id):  # noqa: D102
        topic = self._item_topics[item_id]
        rating = self._user_preferences[user_id, topic]
        recent_topics = [self._item_topics[item] for item in self._user_histories[user_id]]
        if recent_topics.count(topic) > self._boredom_threshold:
            rating -= self._boredom_penalty
        rating = np.clip(rating + self._dynamics_random.randn() * self._noise, 1, 5)
        return rating

    def _rate_item(self, user_id, item_id):  # noqa: D102
        rating = self._get_rating(user_id, item_id)
        # Updating underlying preference
        topic = self._item_topics[item_id]
        preference = self._user_preferences[user_id, topic]
        if preference <= 5:
            self._user_preferences[user_id, topic] += self._topic_change
            not_topic = np.arange(self._num_topics) != topic
            self._user_preferences[user_id, not_topic] -= (
                self._topic_change / (self._num_topics - 1))
        return rating

    def _reset_state(self):  # noqa: D102
        self._user_preferences = self._init_random.uniform(low=0.5, high=5.5,
                                                           size=(self._num_users, self._num_topics))
        self._item_topics = self._init_random.choice(self._num_topics, size=self._num_items)
        self._users = collections.OrderedDict((user_id, np.zeros(0))
                                              for user_id in range(self._num_users))
        self._items = collections.OrderedDict((item_id, np.zeros(0))
                                              for item_id in range(self._num_items))

    def _update_state(self):  # noqa: D102
        if self._timestep % self._shift_steps == 0:
            # Apply the preference shift to a fraction of users.
            shifted_users = self._dynamics_random.choice(
                self._num_users, int(self._num_users * self._shift_frequency))

            new_preferences = self._init_random.uniform(low=0.5, high=5.5,
                                                        size=(len(shifted_users), self._num_topics))
            self._user_preferences[shifted_users] = (
                self._shift_weight * self._user_preferences[shifted_users] +
                (1 - self._shift_weight) * new_preferences)

        return collections.OrderedDict(), collections.OrderedDict()

Classes

class Topics (num_topics, num_users, num_items, rating_frequency=1.0, num_init_ratings=0, noise=0.0, topic_change=0.0, memory_length=0, boredom_threshold=0, boredom_penalty=0.0, user_dist_choice='uniform', shift_steps=1, shift_frequency=0.0, shift_weight=0.0)

An environment where items have a single topic and users prefer certain topics.

The user preference for any given topic is initialized as Unif(0.5, 5.5) while topics are uniformly assigned to items. Users will rate items as clip(p + e, 0, 5) where p is their preference for a given topic and e ~ N(0, self._noise). Users will also have a changing preference for topics they get recommended based on the topic_change parameter.

Parameters

num_topics : int: The number of topics items can be assigned to.
num_users : int: The number of users in the environment.
num_items : int: The number of items in the environment.
rating_frequency : float: The proportion of users that will need a recommendation at each step. Must be between 0 and 1.
num_init_ratings : int: The number of ratings available from the start. User-item pairs are randomly selected.
noise : float: The standard deviation of the noise added to ratings.
topic_change : float: How much the user's preference for a topic changes each time that topic is recommended to them. The negative of topic_change gets split across all other topics as well.
memory_length : int: The number of recent topics a user remembers which affect the rating
boredom_threshold : int: The number of times a topics has to be seen within the memory to gain a penalty.
boredom_penalty : float: The penalty on the rating when a user is bored
user_dist_choice : str: The choice of user distribution for selecting online users. By default, the subset of online users is chosen from a uniform distribution. Currently supports normal and lognormal.
shift_steps : int: The number of timesteps to wait between each user preference shift.
shift_frequency : float: The proportion of users whose preference we wish to change during a preference shift.
shift_weight : float: The weight to assign to a user's new preferences after a preference shift. User's old preferences get assigned a weight of 1 - shift_weight.

Create a Topics environment.

Expand source code

class Topics(environment.DictEnvironment):
    """An environment where items have a single topic and users prefer certain topics.

    The user preference for any given topic is initialized as Unif(0.5, 5.5) while
    topics are uniformly assigned to items. Users will rate items as clip(p + e, 0, 5)
    where p is their preference for a given topic and e ~ N(0, self._noise). Users will
    also have a changing preference for topics they get recommended based on the topic_change
    parameter.

    Parameters
    ----------
    num_topics : int
        The number of topics items can be assigned to.
    num_users : int
        The number of users in the environment.
    num_items : int
        The number of items in the environment.
    rating_frequency : float
        The proportion of users that will need a recommendation at each step.
        Must be between 0 and 1.
    num_init_ratings : int
        The number of ratings available from the start. User-item pairs are randomly selected.
    noise : float
        The standard deviation of the noise added to ratings.
    topic_change : float
        How much the user's preference for a topic changes each time that topic is recommended
        to them. The negative of topic_change gets split across all other topics as well.
    memory_length : int
        The number of recent topics a user remembers which affect the rating
    boredom_threshold : int
        The number of times a topics has to be seen within the memory to gain a
        penalty.
    boredom_penalty : float
        The penalty on the rating when a user is bored
    user_dist_choice : str
        The choice of user distribution for selecting online users. By default, the subset of
        online users is chosen from a uniform distribution. Currently supports normal and lognormal.
    shift_steps : int
        The number of timesteps to wait between each user preference shift.
    shift_frequency : float
        The proportion of users whose preference we wish to change during a preference shift.
    shift_weight : float
        The weight to assign to a user's new preferences after a preference shift.
        User's old preferences get assigned a weight of 1 - shift_weight.

    """

    def __init__(self,
                 num_topics,
                 num_users,
                 num_items,
                 rating_frequency=1.0,
                 num_init_ratings=0,
                 noise=0.0,
                 topic_change=0.0,
                 memory_length=0,
                 boredom_threshold=0,
                 boredom_penalty=0.0,
                 user_dist_choice='uniform',
                 shift_steps=1,
                 shift_frequency=0.0,
                 shift_weight=0.0):
        """Create a Topics environment."""
        super().__init__(rating_frequency, num_init_ratings, memory_length, user_dist_choice)
        self._num_topics = num_topics
        self._num_users = num_users
        self._num_items = num_items
        self._topic_change = topic_change
        self._noise = noise
        self._user_preferences = None
        self._item_topics = None
        self._boredom_threshold = boredom_threshold
        self._boredom_penalty = boredom_penalty
        self._shift_steps = shift_steps
        self._shift_frequency = shift_frequency
        self._shift_weight = shift_weight

    @property
    def name(self):  # noqa: D102
        return 'topics'

    def _get_dense_ratings(self):  # noqa: D102
        ratings = np.zeros([self._num_users, self._num_items])
        for item_id in range(self._num_items):
            topic = self._item_topics[item_id]
            ratings[:, item_id] = self._user_preferences[:, topic]

        # Account for boredom.
        for user_id in range(self._num_users):
            recent_topics = [self._item_topics[item] for item in self._user_histories[user_id]]
            recent_topics, counts = np.unique(recent_topics, return_counts=True)
            recent_topics = recent_topics[counts > self._boredom_threshold]
            for topic_id in recent_topics:
                ratings[user_id, self._item_topics == topic_id] -= self._boredom_penalty

        return ratings

    def _get_rating(self, user_id, item_id):  # noqa: D102
        topic = self._item_topics[item_id]
        rating = self._user_preferences[user_id, topic]
        recent_topics = [self._item_topics[item] for item in self._user_histories[user_id]]
        if recent_topics.count(topic) > self._boredom_threshold:
            rating -= self._boredom_penalty
        rating = np.clip(rating + self._dynamics_random.randn() * self._noise, 1, 5)
        return rating

    def _rate_item(self, user_id, item_id):  # noqa: D102
        rating = self._get_rating(user_id, item_id)
        # Updating underlying preference
        topic = self._item_topics[item_id]
        preference = self._user_preferences[user_id, topic]
        if preference <= 5:
            self._user_preferences[user_id, topic] += self._topic_change
            not_topic = np.arange(self._num_topics) != topic
            self._user_preferences[user_id, not_topic] -= (
                self._topic_change / (self._num_topics - 1))
        return rating

    def _reset_state(self):  # noqa: D102
        self._user_preferences = self._init_random.uniform(low=0.5, high=5.5,
                                                           size=(self._num_users, self._num_topics))
        self._item_topics = self._init_random.choice(self._num_topics, size=self._num_items)
        self._users = collections.OrderedDict((user_id, np.zeros(0))
                                              for user_id in range(self._num_users))
        self._items = collections.OrderedDict((item_id, np.zeros(0))
                                              for item_id in range(self._num_items))

    def _update_state(self):  # noqa: D102
        if self._timestep % self._shift_steps == 0:
            # Apply the preference shift to a fraction of users.
            shifted_users = self._dynamics_random.choice(
                self._num_users, int(self._num_users * self._shift_frequency))

            new_preferences = self._init_random.uniform(low=0.5, high=5.5,
                                                        size=(len(shifted_users), self._num_topics))
            self._user_preferences[shifted_users] = (
                self._shift_weight * self._user_preferences[shifted_users] +
                (1 - self._shift_weight) * new_preferences)

        return collections.OrderedDict(), collections.OrderedDict()

Ancestors

Inherited members

DictEnvironment:
- close
- dense_ratings
- items
- name
- online_users
- ratings
- reset
- seed
- step
- users