Module reclab.recommenders.cfnade.cfnade_lib.utils
Util functions for class Cfnade
Expand source code
""" Util functions for class Cfnade"""
from itertools import islice
import numpy as np
import keras
from keras import backend as K
from keras.callbacks import Callback
class DataSet(Callback):
"""
A datagenerator the feeds data in batches.
ratings_df: rating matrix, num_iters * num_users, entry is input rating rounded to integer
batch_size: int, batch size, default is 64
num_users: int, number of users
num_items: int, number of items
mode: int, 0 for train, 1 for eval, 2 for test
"""
def __init__(self,ratings_df,
num_users,
num_items,
batch_size,
rating_bucket,
mode):
self.num_users = num_users
self.num_items = num_items
self.batch_size = batch_size
self.ratings_df = ratings_df
self.rating_bucket = rating_bucket
self.mode = mode
def generate(self, eval=False):
"""
a generator function yields ratings_df for each batch
"""
line_pointer = 0
while True:
next_n_data_lines = list(islice(self.ratings_df, line_pointer, line_pointer+self.batch_size))
if not next_n_data_lines:
if self.mode == 0 and eval==False:
line_pointer = 0
next_n_data_lines = list(islice(self.ratings_df, line_pointer, line_pointer+self.batch_size))
else:
break
input_ranking_vectors = np.zeros((self.batch_size, self.num_users, self.rating_bucket), dtype='int8')
output_ranking_vectors = np.zeros((self.batch_size, self.num_users, self.rating_bucket), dtype='int8')
input_mask_vectors = np.zeros((self.batch_size, self.num_users), dtype='int8')
output_mask_vectors = np.zeros((self.batch_size, self.num_users), dtype='int8')
for i, line in enumerate(next_n_data_lines):
user_ids = np.nonzero(line)[0]
ratings_line = line[line != 0]
if self.mode == 0 and len(user_ids) != 0:
# a random ordered list 0 to len(user_ids)-1
ordering = np.random.permutation(np.arange(len(user_ids)))
random_num = np.random.randint(0, len(ordering))
flag_in = (ordering < random_num)
flag_out = (ordering >= random_num)
input_mask_vectors[i][user_ids] = flag_in
output_mask_vectors[i][user_ids] = flag_out
for j, (user_id, value) in enumerate(zip(user_ids, ratings_line)):
if flag_in[j]:
input_ranking_vectors[i, user_id, (value-1)] = 1
else:
output_ranking_vectors[i, user_id, (value-1)] = 1
if self.mode == 2:
for j, (user_id, value) in enumerate(zip(user_ids, ratings_line)):
input_ranking_vectors[i, user_id, (value-1)] = 1
inputs = {
'input_ratings': input_ranking_vectors,
'output_ratings': output_ranking_vectors,
'input_masks': input_mask_vectors,
'output_masks': output_mask_vectors}
outputs = {'nade_loss': np.zeros([self.batch_size])}
yield (inputs, outputs)
line_pointer = line_pointer + self.batch_size
def prediction_layer(x):
# x.shape = (?,6040,5)
x_cumsum = K.cumsum(x, axis=2)
# x_cumsum.shape = (?,6040,5)
output = K.softmax(x_cumsum)
# output = (?,6040,5)
return output
def prediction_output_shape(input_shape):
return input_shape
def d_layer(x):
return K.sum(x, axis=1)
def d_output_shape(input_shape):
return (input_shape[0], )
def D_layer(x):
return K.sum(x, axis=1)
def D_output_shape(input_shape):
return (input_shape[0],)
def rating_cost_lambda_func(args):
alpha=0.01 #in the paper they reported alpha = 0.01 and std = 1.0. THis is what was used in the repo.
std=1.0
pred_score, true_ratings, input_masks, output_masks, D, d = args
pred_score_cum = K.cumsum(pred_score, axis=2)
prob_item_ratings = K.softmax(pred_score_cum)
accu_prob_1N = K.cumsum(prob_item_ratings, axis=2)
accu_prob_N1 = K.cumsum(prob_item_ratings[:, :, ::-1], axis=2)[:, :, ::-1]
mask1N = K.cumsum(true_ratings[:, :, ::-1], axis=2)[:, :, ::-1]
maskN1 = K.cumsum(true_ratings, axis=2)
cost_ordinal_1N = -K.sum((K.log(prob_item_ratings) - K.log(accu_prob_1N)) * mask1N, axis=2)
cost_ordinal_N1 = -K.sum((K.log(prob_item_ratings) - K.log(accu_prob_N1)) * maskN1, axis=2)
cost_ordinal = cost_ordinal_1N + cost_ordinal_N1
nll_item_ratings = K.sum(-(true_ratings * K.log(prob_item_ratings)), axis=2)
nll = std * K.sum(nll_item_ratings, axis=1) * 1.0 * D / (D - d + 1e-6) \
+ alpha * K.sum(cost_ordinal, axis=1) * 1.0 * D / (D - d + 1e-6)
cost = K.mean(nll)
cost = K.expand_dims(cost, 0)
return cost
Functions
def D_layer(x)
-
Expand source code
def D_layer(x): return K.sum(x, axis=1)
def D_output_shape(input_shape)
-
Expand source code
def D_output_shape(input_shape): return (input_shape[0],)
def d_layer(x)
-
Expand source code
def d_layer(x): return K.sum(x, axis=1)
def d_output_shape(input_shape)
-
Expand source code
def d_output_shape(input_shape): return (input_shape[0], )
def prediction_layer(x)
-
Expand source code
def prediction_layer(x): # x.shape = (?,6040,5) x_cumsum = K.cumsum(x, axis=2) # x_cumsum.shape = (?,6040,5) output = K.softmax(x_cumsum) # output = (?,6040,5) return output
def prediction_output_shape(input_shape)
-
Expand source code
def prediction_output_shape(input_shape): return input_shape
def rating_cost_lambda_func(args)
-
Expand source code
def rating_cost_lambda_func(args): alpha=0.01 #in the paper they reported alpha = 0.01 and std = 1.0. THis is what was used in the repo. std=1.0 pred_score, true_ratings, input_masks, output_masks, D, d = args pred_score_cum = K.cumsum(pred_score, axis=2) prob_item_ratings = K.softmax(pred_score_cum) accu_prob_1N = K.cumsum(prob_item_ratings, axis=2) accu_prob_N1 = K.cumsum(prob_item_ratings[:, :, ::-1], axis=2)[:, :, ::-1] mask1N = K.cumsum(true_ratings[:, :, ::-1], axis=2)[:, :, ::-1] maskN1 = K.cumsum(true_ratings, axis=2) cost_ordinal_1N = -K.sum((K.log(prob_item_ratings) - K.log(accu_prob_1N)) * mask1N, axis=2) cost_ordinal_N1 = -K.sum((K.log(prob_item_ratings) - K.log(accu_prob_N1)) * maskN1, axis=2) cost_ordinal = cost_ordinal_1N + cost_ordinal_N1 nll_item_ratings = K.sum(-(true_ratings * K.log(prob_item_ratings)), axis=2) nll = std * K.sum(nll_item_ratings, axis=1) * 1.0 * D / (D - d + 1e-6) \ + alpha * K.sum(cost_ordinal, axis=1) * 1.0 * D / (D - d + 1e-6) cost = K.mean(nll) cost = K.expand_dims(cost, 0) return cost
Classes
class DataSet (ratings_df, num_users, num_items, batch_size, rating_bucket, mode)
-
A datagenerator the feeds data in batches.
ratings_df: rating matrix, num_iters * num_users, entry is input rating rounded to integer batch_size: int, batch size, default is 64 num_users: int, number of users num_items: int, number of items mode: int, 0 for train, 1 for eval, 2 for test
Expand source code
class DataSet(Callback): """ A datagenerator the feeds data in batches. ratings_df: rating matrix, num_iters * num_users, entry is input rating rounded to integer batch_size: int, batch size, default is 64 num_users: int, number of users num_items: int, number of items mode: int, 0 for train, 1 for eval, 2 for test """ def __init__(self,ratings_df, num_users, num_items, batch_size, rating_bucket, mode): self.num_users = num_users self.num_items = num_items self.batch_size = batch_size self.ratings_df = ratings_df self.rating_bucket = rating_bucket self.mode = mode def generate(self, eval=False): """ a generator function yields ratings_df for each batch """ line_pointer = 0 while True: next_n_data_lines = list(islice(self.ratings_df, line_pointer, line_pointer+self.batch_size)) if not next_n_data_lines: if self.mode == 0 and eval==False: line_pointer = 0 next_n_data_lines = list(islice(self.ratings_df, line_pointer, line_pointer+self.batch_size)) else: break input_ranking_vectors = np.zeros((self.batch_size, self.num_users, self.rating_bucket), dtype='int8') output_ranking_vectors = np.zeros((self.batch_size, self.num_users, self.rating_bucket), dtype='int8') input_mask_vectors = np.zeros((self.batch_size, self.num_users), dtype='int8') output_mask_vectors = np.zeros((self.batch_size, self.num_users), dtype='int8') for i, line in enumerate(next_n_data_lines): user_ids = np.nonzero(line)[0] ratings_line = line[line != 0] if self.mode == 0 and len(user_ids) != 0: # a random ordered list 0 to len(user_ids)-1 ordering = np.random.permutation(np.arange(len(user_ids))) random_num = np.random.randint(0, len(ordering)) flag_in = (ordering < random_num) flag_out = (ordering >= random_num) input_mask_vectors[i][user_ids] = flag_in output_mask_vectors[i][user_ids] = flag_out for j, (user_id, value) in enumerate(zip(user_ids, ratings_line)): if flag_in[j]: input_ranking_vectors[i, user_id, (value-1)] = 1 else: output_ranking_vectors[i, user_id, (value-1)] = 1 if self.mode == 2: for j, (user_id, value) in enumerate(zip(user_ids, ratings_line)): input_ranking_vectors[i, user_id, (value-1)] = 1 inputs = { 'input_ratings': input_ranking_vectors, 'output_ratings': output_ranking_vectors, 'input_masks': input_mask_vectors, 'output_masks': output_mask_vectors} outputs = {'nade_loss': np.zeros([self.batch_size])} yield (inputs, outputs) line_pointer = line_pointer + self.batch_size
Ancestors
- keras.callbacks.callbacks.Callback
Methods
def generate(self, eval=False)
-
a generator function yields ratings_df for each batch
Expand source code
def generate(self, eval=False): """ a generator function yields ratings_df for each batch """ line_pointer = 0 while True: next_n_data_lines = list(islice(self.ratings_df, line_pointer, line_pointer+self.batch_size)) if not next_n_data_lines: if self.mode == 0 and eval==False: line_pointer = 0 next_n_data_lines = list(islice(self.ratings_df, line_pointer, line_pointer+self.batch_size)) else: break input_ranking_vectors = np.zeros((self.batch_size, self.num_users, self.rating_bucket), dtype='int8') output_ranking_vectors = np.zeros((self.batch_size, self.num_users, self.rating_bucket), dtype='int8') input_mask_vectors = np.zeros((self.batch_size, self.num_users), dtype='int8') output_mask_vectors = np.zeros((self.batch_size, self.num_users), dtype='int8') for i, line in enumerate(next_n_data_lines): user_ids = np.nonzero(line)[0] ratings_line = line[line != 0] if self.mode == 0 and len(user_ids) != 0: # a random ordered list 0 to len(user_ids)-1 ordering = np.random.permutation(np.arange(len(user_ids))) random_num = np.random.randint(0, len(ordering)) flag_in = (ordering < random_num) flag_out = (ordering >= random_num) input_mask_vectors[i][user_ids] = flag_in output_mask_vectors[i][user_ids] = flag_out for j, (user_id, value) in enumerate(zip(user_ids, ratings_line)): if flag_in[j]: input_ranking_vectors[i, user_id, (value-1)] = 1 else: output_ranking_vectors[i, user_id, (value-1)] = 1 if self.mode == 2: for j, (user_id, value) in enumerate(zip(user_ids, ratings_line)): input_ranking_vectors[i, user_id, (value-1)] = 1 inputs = { 'input_ratings': input_ranking_vectors, 'output_ratings': output_ranking_vectors, 'input_masks': input_mask_vectors, 'output_masks': output_mask_vectors} outputs = {'nade_loss': np.zeros([self.batch_size])} yield (inputs, outputs) line_pointer = line_pointer + self.batch_size