Module reclab.environments.schmit
Contains implementation for environment in "Human Interaction with Recommendation Systems".
https://arxiv.org/pdf/1703.00535.pdf
Expand source code
"""
Contains implementation for environment in "Human Interaction with Recommendation Systems".
https://arxiv.org/pdf/1703.00535.pdf
"""
import numpy as np
from . import environment
class Schmit(environment.DictEnvironment):
"""
Implementation of environment with static private user preferences and user-item interactions.
Based on "Human Interaction with Recommendation Systems" by Schmit and Riquelme (2018).
Parameters
----------
num_users : int
The number of users in the environment.
num_items : int
The number of items in the environment.
rating_frequency : float
What proportion of users will need a recommendation at each step.
num_init_ratings: : int
The number of initial ratings available when the environment is reset.
rank : int
Rank of user preferences.
sigma : float
Variance of the Gaussian noise added to determine user-item value.
user_dist_choice : str
The choice of user distribution for selecting online users. By default, the subset of
online users is chosen from a uniform distribution. Currently supports normal and lognormal.
"""
def __init__(self, num_users, num_items, rating_frequency=0.2,
num_init_ratings=0, rank=10, sigma=0.2,
user_dist_choice='uniform'):
"""Create an environment."""
super().__init__(rating_frequency, num_init_ratings, 0, user_dist_choice)
self._num_users = num_users
self._num_items = num_items
self.rank = rank
self.sigma = sigma
# constants
self.item_bias = self._init_random.randn(num_items, 1) / 1.5
self.user_bias = self._init_random.randn(num_users, 1) / 3
# unobserved by agents
self.U = self._init_random.randn(num_users, rank) / np.sqrt(self.rank)
self.V = self._init_random.randn(num_items, rank) / np.sqrt(self.rank)
# observed by agents
self.X = self._init_random.randn(num_users, rank) / np.sqrt(self.rank)
self.Y = self._init_random.randn(num_items, rank) / np.sqrt(self.rank)
@property
def name(self):
"""Name of environment, used for saving."""
return 'schmit'
def true_score(self, user, item):
"""
Calculate true score.
user : int
User id for calculating preferences.
item : int
Item id.
"""
return float(self.item_bias[item] + self.user_bias[user] + self.U[user] @ self.V[item].T)
def value(self, user, item):
"""
Add private user preferences and Gaussian noise to true score.
user : int
User id for calculating preferences.
item : int
Item id.
"""
ratings = float(self.true_score(user, item) + self.X[user] @ self.Y[item].T +
self._dynamics_random.normal(loc=0, scale=self.sigma) + 3)
return np.clip(ratings, 1, 5)
def _reset_state(self):
self._users = {user_id: np.zeros((0,))
for user_id in range(self._num_users)}
self._items = {item_id: np.zeros((0,))
for item_id in range(self._num_items)}
self.item_bias = self._init_random.randn(self._num_items, 1) / 1.5
self.user_bias = self._init_random.randn(self._num_users, 1) / 3
self.U = self._init_random.randn(self._num_users, self.rank) / np.sqrt(self.rank)
self.V = self._init_random.randn(self._num_items, self.rank) / np.sqrt(self.rank)
self.X = self._init_random.randn(self._num_users, self.rank) / np.sqrt(self.rank)
self.Y = self._init_random.randn(self._num_items, self.rank) / np.sqrt(self.rank)
def _rate_item(self, user_id, item_id):
return self.value(user_id, item_id)
def _get_dense_ratings(self):
"""Compute all the true ratings on every user-item pair at the current timestep.
A true rating is defined as the rating a user would make with all noise removed.
Returns
-------
dense_ratings : np.ndarray
The array of all true ratings where true_ratings[i, j] is the rating by user i
on item j.
"""
dense_ratings = np.zeros([self._num_users, self._num_items])
for u in range(self._num_users):
for i in range(self._num_items):
dense_ratings[u, i] = self.true_score(u, i) + self.X[u] @ self.Y[i].T + 3
return dense_ratings
Classes
class Schmit (num_users, num_items, rating_frequency=0.2, num_init_ratings=0, rank=10, sigma=0.2, user_dist_choice='uniform')
-
Implementation of environment with static private user preferences and user-item interactions.
Based on "Human Interaction with Recommendation Systems" by Schmit and Riquelme (2018).
Parameters
num_users
:int
- The number of users in the environment.
num_items
:int
- The number of items in the environment.
rating_frequency
:float
- What proportion of users will need a recommendation at each step.
num_init_ratings
:: int
- The number of initial ratings available when the environment is reset.
rank
:int
- Rank of user preferences.
sigma
:float
- Variance of the Gaussian noise added to determine user-item value.
user_dist_choice
:str
- The choice of user distribution for selecting online users. By default, the subset of online users is chosen from a uniform distribution. Currently supports normal and lognormal.
Create an environment.
Expand source code
class Schmit(environment.DictEnvironment): """ Implementation of environment with static private user preferences and user-item interactions. Based on "Human Interaction with Recommendation Systems" by Schmit and Riquelme (2018). Parameters ---------- num_users : int The number of users in the environment. num_items : int The number of items in the environment. rating_frequency : float What proportion of users will need a recommendation at each step. num_init_ratings: : int The number of initial ratings available when the environment is reset. rank : int Rank of user preferences. sigma : float Variance of the Gaussian noise added to determine user-item value. user_dist_choice : str The choice of user distribution for selecting online users. By default, the subset of online users is chosen from a uniform distribution. Currently supports normal and lognormal. """ def __init__(self, num_users, num_items, rating_frequency=0.2, num_init_ratings=0, rank=10, sigma=0.2, user_dist_choice='uniform'): """Create an environment.""" super().__init__(rating_frequency, num_init_ratings, 0, user_dist_choice) self._num_users = num_users self._num_items = num_items self.rank = rank self.sigma = sigma # constants self.item_bias = self._init_random.randn(num_items, 1) / 1.5 self.user_bias = self._init_random.randn(num_users, 1) / 3 # unobserved by agents self.U = self._init_random.randn(num_users, rank) / np.sqrt(self.rank) self.V = self._init_random.randn(num_items, rank) / np.sqrt(self.rank) # observed by agents self.X = self._init_random.randn(num_users, rank) / np.sqrt(self.rank) self.Y = self._init_random.randn(num_items, rank) / np.sqrt(self.rank) @property def name(self): """Name of environment, used for saving.""" return 'schmit' def true_score(self, user, item): """ Calculate true score. user : int User id for calculating preferences. item : int Item id. """ return float(self.item_bias[item] + self.user_bias[user] + self.U[user] @ self.V[item].T) def value(self, user, item): """ Add private user preferences and Gaussian noise to true score. user : int User id for calculating preferences. item : int Item id. """ ratings = float(self.true_score(user, item) + self.X[user] @ self.Y[item].T + self._dynamics_random.normal(loc=0, scale=self.sigma) + 3) return np.clip(ratings, 1, 5) def _reset_state(self): self._users = {user_id: np.zeros((0,)) for user_id in range(self._num_users)} self._items = {item_id: np.zeros((0,)) for item_id in range(self._num_items)} self.item_bias = self._init_random.randn(self._num_items, 1) / 1.5 self.user_bias = self._init_random.randn(self._num_users, 1) / 3 self.U = self._init_random.randn(self._num_users, self.rank) / np.sqrt(self.rank) self.V = self._init_random.randn(self._num_items, self.rank) / np.sqrt(self.rank) self.X = self._init_random.randn(self._num_users, self.rank) / np.sqrt(self.rank) self.Y = self._init_random.randn(self._num_items, self.rank) / np.sqrt(self.rank) def _rate_item(self, user_id, item_id): return self.value(user_id, item_id) def _get_dense_ratings(self): """Compute all the true ratings on every user-item pair at the current timestep. A true rating is defined as the rating a user would make with all noise removed. Returns ------- dense_ratings : np.ndarray The array of all true ratings where true_ratings[i, j] is the rating by user i on item j. """ dense_ratings = np.zeros([self._num_users, self._num_items]) for u in range(self._num_users): for i in range(self._num_items): dense_ratings[u, i] = self.true_score(u, i) + self.X[u] @ self.Y[i].T + 3 return dense_ratings
Ancestors
- DictEnvironment
- Environment
- abc.ABC
Methods
def true_score(self, user, item)
-
Calculate true score.
user : int User id for calculating preferences. item : int Item id.
Expand source code
def true_score(self, user, item): """ Calculate true score. user : int User id for calculating preferences. item : int Item id. """ return float(self.item_bias[item] + self.user_bias[user] + self.U[user] @ self.V[item].T)
def value(self, user, item)
-
Add private user preferences and Gaussian noise to true score.
user : int User id for calculating preferences. item : int Item id.
Expand source code
def value(self, user, item): """ Add private user preferences and Gaussian noise to true score. user : int User id for calculating preferences. item : int Item id. """ ratings = float(self.true_score(user, item) + self.X[user] @ self.Y[item].T + self._dynamics_random.normal(loc=0, scale=self.sigma) + 3) return np.clip(ratings, 1, 5)
Inherited members