Module reclab.recommenders.autorec.autorec
Pytorch implementation of AutoRec recommender.
Expand source code
"""Pytorch implementation of AutoRec recommender."""
import math
import numpy as np
import torch
from .autorec_lib import autorec
from .. import recommender
class Autorec(recommender.PredictRecommender):
"""The Autorec recommender.
Parameters
----------
num_users : int
Number of users in the environment.
num_items : int
Number of items in the environment.
hidden_neuron : int
Output dimension of hidden layer.
lambda_value : float
Coefficient for regularization while training layers.
train_epoch : int
Number of epochs to train for each call.
batch_size : int
Batch size during initial training phase.
optimizer_method : str
Optimizer for training model; either Adam or RMSProp.
grad_clip : bool
Set to true to clip gradients to [-5, 5].
base_lr : float
Base learning rate for optimizer.
lr_decay : float
Rate for decaying learning rate during training.
dropout : float
Probability to initialize dropout layer. Set to 0 for no dropout.
random_seed : int
Random seed to reproduce results.
"""
def __init__(self, num_users, num_items,
hidden_neuron=500, lambda_value=1,
train_epoch=1000, batch_size=1000, optimizer_method='RMSProp',
grad_clip=False, base_lr=1e-3, lr_decay=1e-2,
dropout=0.05, random_seed=0):
"""Create new Autorec recommender."""
super().__init__()
# We only want the function arguments so remove class related objects.
self._hyperparameters.update(locals())
del self._hyperparameters['self']
del self._hyperparameters['__class__']
self.model = autorec.AutoRec(num_users,
num_items,
seen_users=set(),
seen_items=set(),
hidden_neuron=hidden_neuron,
dropout=dropout,
random_seed=random_seed)
self.lambda_value = lambda_value
self.num_users = num_users
self.num_items = num_items
self.train_epoch = train_epoch
self.batch_size = batch_size
self.num_batch = int(math.ceil(self.num_items / float(self.batch_size)))
self.base_lr = base_lr
self.optimizer_method = optimizer_method
self.random_seed = random_seed
self.lr_decay = lr_decay
self.grad_clip = grad_clip
np.random.seed(self.random_seed)
self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
def train_model(self, data):
"""Train for all epochs in train_epoch."""
self.model.train()
if self.optimizer_method == 'Adam':
optimizer = torch.optim.Adam(self.model.parameters(), lr=self.base_lr)
elif self.optimizer_method == 'RMSProp':
optimizer = torch.optim.RMSprop(self.model.parameters(), lr=self.base_lr)
else:
raise ValueError('Optimizer Key ERROR')
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=self.lr_decay)
self.model.to(self.device)
for epoch in range(self.train_epoch):
self.train(data, optimizer, scheduler)
def train(self, data, optimizer, scheduler):
"""Train for a single epoch."""
random_perm_doc_idx = np.random.permutation(self.num_items)
for i in range(self.num_batch):
if i == self.num_batch - 1:
batch_set_idx = random_perm_doc_idx[i * self.batch_size:]
elif i < self.num_batch - 1:
batch_set_idx = random_perm_doc_idx[i * self.batch_size:(i+1) * self.batch_size]
batch = data[batch_set_idx, :].to(self.device)
output = self.model.forward(batch)
mask = self.mask_R[batch_set_idx, :].to(self.device)
loss = self.model.loss(output,
batch,
mask,
lambda_value=self.lambda_value)
loss.backward()
if self.grad_clip:
torch.nn.utils.clip_grad_norm_(self.model.parameters(), 5)
optimizer.step()
scheduler.step()
@property
def name(self): # noqa: D102
return 'autorec'
def _predict(self, user_item):
self.model = self.model.eval()
return self.model.predict(user_item, self.R.to(self.device))
def reset(self, users=None, items=None, ratings=None): # noqa: D102
self.model.prepare_model()
super().reset(users, items, ratings)
def update(self, users=None, items=None, ratings=None): # noqa: D102
super().update(users, items, ratings)
self.model.prepare_model()
self.model = self.model.train()
for user_item in ratings:
self.model.seen_users.add(user_item[0])
self.model.seen_items.add(user_item[1])
ratings = self._ratings.toarray()
# Item-based autorec expects rows that represent items
self.R = torch.FloatTensor(ratings.T)
self.mask_R = torch.FloatTensor(ratings.T).clamp(0, 1)
self.train_model(self.R)
Classes
class Autorec (num_users, num_items, hidden_neuron=500, lambda_value=1, train_epoch=1000, batch_size=1000, optimizer_method='RMSProp', grad_clip=False, base_lr=0.001, lr_decay=0.01, dropout=0.05, random_seed=0)
-
The Autorec recommender.
Parameters
num_users
:int
- Number of users in the environment.
num_items
:int
- Number of items in the environment.
hidden_neuron
:int
- Output dimension of hidden layer.
lambda_value
:float
- Coefficient for regularization while training layers.
train_epoch
:int
- Number of epochs to train for each call.
batch_size
:int
- Batch size during initial training phase.
optimizer_method
:str
- Optimizer for training model; either Adam or RMSProp.
grad_clip
:bool
- Set to true to clip gradients to [-5, 5].
base_lr
:float
- Base learning rate for optimizer.
lr_decay
:float
- Rate for decaying learning rate during training.
dropout
:float
- Probability to initialize dropout layer. Set to 0 for no dropout.
random_seed
:int
- Random seed to reproduce results.
Create new Autorec recommender.
Expand source code
class Autorec(recommender.PredictRecommender): """The Autorec recommender. Parameters ---------- num_users : int Number of users in the environment. num_items : int Number of items in the environment. hidden_neuron : int Output dimension of hidden layer. lambda_value : float Coefficient for regularization while training layers. train_epoch : int Number of epochs to train for each call. batch_size : int Batch size during initial training phase. optimizer_method : str Optimizer for training model; either Adam or RMSProp. grad_clip : bool Set to true to clip gradients to [-5, 5]. base_lr : float Base learning rate for optimizer. lr_decay : float Rate for decaying learning rate during training. dropout : float Probability to initialize dropout layer. Set to 0 for no dropout. random_seed : int Random seed to reproduce results. """ def __init__(self, num_users, num_items, hidden_neuron=500, lambda_value=1, train_epoch=1000, batch_size=1000, optimizer_method='RMSProp', grad_clip=False, base_lr=1e-3, lr_decay=1e-2, dropout=0.05, random_seed=0): """Create new Autorec recommender.""" super().__init__() # We only want the function arguments so remove class related objects. self._hyperparameters.update(locals()) del self._hyperparameters['self'] del self._hyperparameters['__class__'] self.model = autorec.AutoRec(num_users, num_items, seen_users=set(), seen_items=set(), hidden_neuron=hidden_neuron, dropout=dropout, random_seed=random_seed) self.lambda_value = lambda_value self.num_users = num_users self.num_items = num_items self.train_epoch = train_epoch self.batch_size = batch_size self.num_batch = int(math.ceil(self.num_items / float(self.batch_size))) self.base_lr = base_lr self.optimizer_method = optimizer_method self.random_seed = random_seed self.lr_decay = lr_decay self.grad_clip = grad_clip np.random.seed(self.random_seed) self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') def train_model(self, data): """Train for all epochs in train_epoch.""" self.model.train() if self.optimizer_method == 'Adam': optimizer = torch.optim.Adam(self.model.parameters(), lr=self.base_lr) elif self.optimizer_method == 'RMSProp': optimizer = torch.optim.RMSprop(self.model.parameters(), lr=self.base_lr) else: raise ValueError('Optimizer Key ERROR') scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=self.lr_decay) self.model.to(self.device) for epoch in range(self.train_epoch): self.train(data, optimizer, scheduler) def train(self, data, optimizer, scheduler): """Train for a single epoch.""" random_perm_doc_idx = np.random.permutation(self.num_items) for i in range(self.num_batch): if i == self.num_batch - 1: batch_set_idx = random_perm_doc_idx[i * self.batch_size:] elif i < self.num_batch - 1: batch_set_idx = random_perm_doc_idx[i * self.batch_size:(i+1) * self.batch_size] batch = data[batch_set_idx, :].to(self.device) output = self.model.forward(batch) mask = self.mask_R[batch_set_idx, :].to(self.device) loss = self.model.loss(output, batch, mask, lambda_value=self.lambda_value) loss.backward() if self.grad_clip: torch.nn.utils.clip_grad_norm_(self.model.parameters(), 5) optimizer.step() scheduler.step() @property def name(self): # noqa: D102 return 'autorec' def _predict(self, user_item): self.model = self.model.eval() return self.model.predict(user_item, self.R.to(self.device)) def reset(self, users=None, items=None, ratings=None): # noqa: D102 self.model.prepare_model() super().reset(users, items, ratings) def update(self, users=None, items=None, ratings=None): # noqa: D102 super().update(users, items, ratings) self.model.prepare_model() self.model = self.model.train() for user_item in ratings: self.model.seen_users.add(user_item[0]) self.model.seen_items.add(user_item[1]) ratings = self._ratings.toarray() # Item-based autorec expects rows that represent items self.R = torch.FloatTensor(ratings.T) self.mask_R = torch.FloatTensor(ratings.T).clamp(0, 1) self.train_model(self.R)
Ancestors
- PredictRecommender
- Recommender
- abc.ABC
Methods
def train(self, data, optimizer, scheduler)
-
Train for a single epoch.
Expand source code
def train(self, data, optimizer, scheduler): """Train for a single epoch.""" random_perm_doc_idx = np.random.permutation(self.num_items) for i in range(self.num_batch): if i == self.num_batch - 1: batch_set_idx = random_perm_doc_idx[i * self.batch_size:] elif i < self.num_batch - 1: batch_set_idx = random_perm_doc_idx[i * self.batch_size:(i+1) * self.batch_size] batch = data[batch_set_idx, :].to(self.device) output = self.model.forward(batch) mask = self.mask_R[batch_set_idx, :].to(self.device) loss = self.model.loss(output, batch, mask, lambda_value=self.lambda_value) loss.backward() if self.grad_clip: torch.nn.utils.clip_grad_norm_(self.model.parameters(), 5) optimizer.step() scheduler.step()
def train_model(self, data)
-
Train for all epochs in train_epoch.
Expand source code
def train_model(self, data): """Train for all epochs in train_epoch.""" self.model.train() if self.optimizer_method == 'Adam': optimizer = torch.optim.Adam(self.model.parameters(), lr=self.base_lr) elif self.optimizer_method == 'RMSProp': optimizer = torch.optim.RMSprop(self.model.parameters(), lr=self.base_lr) else: raise ValueError('Optimizer Key ERROR') scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=self.lr_decay) self.model.to(self.device) for epoch in range(self.train_epoch): self.train(data, optimizer, scheduler)
Inherited members