from .estimator_base import H2OEstimator
[docs]class H2OGeneralizedLowRankEstimator(H2OEstimator):
"""Builds a generalized low rank model of a H2O dataset.
k : int
The rank of the resulting decomposition. This must be between 1 and the number of
columns in the training frame inclusive.
max_iterations : int
The maximum number of iterations to run the optimization loop. Each iteration
consists of an update of the X matrix, followed by an update of the Y matrix.
transform : str
A character string that indicates how the training data should be transformed
before running GLRM. Possible values are
"NONE" for no transformation,
"DEMEAN" for subtracting the mean of each column,
"DESCALE" for dividing by the standard deviation of each column,
"STANDARDIZE" for demeaning and descaling, and
"NORMALIZE" for demeaning and dividing each column by its range (max - min).
seed : int, optional
Random seed used to initialize the X and Y matrices.
ignore_const_cols : bool, optional
A logical value indicating whether to ignore constant columns in the training
frame. A column is constant if all of its non-missing values are the same value.
loss : str
A character string indicating the default loss function for numeric columns.
Possible values are
"Quadratic" (default), "Absolute", "Huber", "Poisson", "Hinge", and "Logistic".
multi_loss : str
A character string indicating the default loss function for enum columns. Possible
values are "Categorical" and "Ordinal".
loss_by_col : str, optional
A list of strings indicating the loss function for specific columns by
corresponding index in loss_by_col_idx. Will override loss for numeric columns
and multi_loss for enum columns.
loss_by_col_idx : str, optional
A list of column indices to which the corresponding loss functions in loss_by_col
are assigned. Must be zero indexed.
regularization_x : str
A character string indicating the regularization function for the X matrix.
Possible values are
"None" (default), "Quadratic", "L2", "L1", "NonNegative", "OneSparse",
"UnitOneSparse", and "Simplex".
regularization_y : str
A character string indicating the regularization function for the Y matrix.
Possible values are
"None" (default), "Quadratic", "L2", "L1", "NonNegative", "OneSparse",
"UnitOneSparse", and "Simplex".
gamma_x : float
The weight on the X matrix regularization term.
gamma_y : float
The weight on the Y matrix regularization term.
init_step_size : float
Initial step size. Divided by number of columns in the training frame when
calculating the proximal gradient update. The algorithm begins at init_step_size
and decreases the step size at each iteration until a termination condition is
min_step_size : float
Minimum step size upon which the algorithm is terminated.
init : str
A character string indicating how to select the initial X and Y matrices.
Possible values are
"Random" for initialization to a random array from the standard normal
"PlusPlus" for initialization using the clusters from k-means++ initialization,
"SVD" for initialization using the first k (approximate) right singular vectors,
"User" for user-specified initial X and Y frames (must set user_y and
user_x arguments).
svd_method : str
A character string that indicates how SVD should be calculated during
initialization. Possible values are
"GramSVD" for distributed computation of the Gram matrix followed by a local SVD
using the JAMA package,
"Power" for computation of the SVD using the power iteration method,
"Randomized" for approximate SVD by projecting onto a random subspace.
user_x : H2OFrame, optional
An H2OFrame object specifying the initial X matrix. Only used when init = "User".
user_y : H2OFrame, optional
An H2OFrame object specifying the initial Y matrix. Only used when init = "User".
recover_svd : bool
A logical value indicating whether the singular values and eigenvectors should be
recovered during post-processing of the generalized low rank decomposition.
A new H2OGeneralizedLowRankEstimator instance.
def __init__(self,k=None, max_iterations=None, transform=None, seed=None,
ignore_const_cols=None,loss=None, multi_loss=None, loss_by_col=None,
loss_by_col_idx=None, regularization_x=None, regularization_y=None,
gamma_x=None, gamma_y=None, init_step_size=None, min_step_size=None,
init=None, svd_method=None, user_x=None, user_y=None, recover_svd=None):
super(H2OGeneralizedLowRankEstimator, self).__init__()
self._parms = locals()
self._parms = {k:v for k,v in self._parms.items() if k!="self"}
def max_iterations(self):
return self._parms["max_iterations"]
def max_iterations(self, value):
self._parms["max_iterations"] = value
def transform(self):
return self._parms["transform"]
def transform(self, value):
self._parms["transform"] = value
def seed(self):
return self._parms["seed"]
def seed(self, value):
self._parms["seed"] = value
def ignore_const_cols(self):
return self._parms["ignore_const_cols"]
def ignore_const_cols(self, value):
self._parms["ignore_const_cols"] = value
def loss(self):
return self._parms["loss"]
def loss(self, value):
self._parms["loss"] = value
def multi_loss(self):
return self._parms["multi_loss"]
def multi_loss(self, value):
self._parms["multi_loss"] = value
def loss_by_col(self):
return self._parms["loss_by_col"]
def loss_by_col(self, value):
self._parms["loss_by_col"] = value
def loss_by_col_idx(self):
return self._parms["loss_by_col_idx"]
def loss_by_col_idx(self, value):
self._parms["loss_by_col_idx"] = value
def regularization_x(self):
return self._parms["regularization_x"]
def regularization_x(self, value):
self._parms["regularization_x"] = value
def regularization_y(self):
return self._parms["regularization_y"]
def regularization_y(self, value):
self._parms["regularization_y"] = value
def gamma_x(self):
return self._parms["gamma_x"]
def gamma_x(self, value):
self._parms["gamma_x"] = value
def gamma_y(self):
return self._parms["gamma_y"]
def gamma_y(self, value):
self._parms["gamma_y"] = value
def init_step_size(self):
return self._parms["init_step_size"]
def init_step_size(self, value):
self._parms["init_step_size"] = value
def min_step_size(self):
return self._parms["min_step_size"]
def min_step_size(self, value):
self._parms["min_step_size"] = value
def init(self):
return self._parms["init"]
def init(self, value):
self._parms["init"] = value
def svd_method(self):
return self._parms["svd_method"]
def svd_method(self, value):
self._parms["svd_method"] = value
def user_x(self):
return self._parms["user_x"]
def user_x(self, value):
self._parms["user_x"] = value
def user_y(self):
return self._parms["user_y"]
def user_y(self, value):
self._parms["user_y"] = value
def recover_svd(self):
return self._parms["recover_svd"]
def recover_svd(self, value):
self._parms["recover_svd"] = value