Source code for h2o.estimators.glrm

from .estimator_base import H2OEstimator

[docs]class H2OGeneralizedLowRankEstimator(H2OEstimator): """Builds a generalized low rank model of a H2O dataset. Parameters ---------- k : int The rank of the resulting decomposition. This must be between 1 and the number of columns in the training frame inclusive. max_iterations : int The maximum number of iterations to run the optimization loop. Each iteration consists of an update of the X matrix, followed by an update of the Y matrix. transform : str A character string that indicates how the training data should be transformed before running GLRM. Possible values are "NONE" for no transformation, "DEMEAN" for subtracting the mean of each column, "DESCALE" for dividing by the standard deviation of each column, "STANDARDIZE" for demeaning and descaling, and "NORMALIZE" for demeaning and dividing each column by its range (max - min). seed : int, optional Random seed used to initialize the X and Y matrices. ignore_const_cols : bool, optional A logical value indicating whether to ignore constant columns in the training frame. A column is constant if all of its non-missing values are the same value. loss : str A character string indicating the default loss function for numeric columns. Possible values are "Quadratic" (default), "Absolute", "Huber", "Poisson", "Hinge", and "Logistic". multi_loss : str A character string indicating the default loss function for enum columns. Possible values are "Categorical" and "Ordinal". loss_by_col : str, optional A list of strings indicating the loss function for specific columns by corresponding index in loss_by_col_idx. Will override loss for numeric columns and multi_loss for enum columns. loss_by_col_idx : str, optional A list of column indices to which the corresponding loss functions in loss_by_col are assigned. Must be zero indexed. regularization_x : str A character string indicating the regularization function for the X matrix. Possible values are "None" (default), "Quadratic", "L2", "L1", "NonNegative", "OneSparse", "UnitOneSparse", and "Simplex". regularization_y : str A character string indicating the regularization function for the Y matrix. Possible values are "None" (default), "Quadratic", "L2", "L1", "NonNegative", "OneSparse", "UnitOneSparse", and "Simplex". gamma_x : float The weight on the X matrix regularization term. gamma_y : float The weight on the Y matrix regularization term. init_step_size : float Initial step size. Divided by number of columns in the training frame when calculating the proximal gradient update. The algorithm begins at init_step_size and decreases the step size at each iteration until a termination condition is reached. min_step_size : float Minimum step size upon which the algorithm is terminated. init : str A character string indicating how to select the initial X and Y matrices. Possible values are "Random" for initialization to a random array from the standard normal distribution, "PlusPlus" for initialization using the clusters from k-means++ initialization, "SVD" for initialization using the first k (approximate) right singular vectors, "User" for user-specified initial X and Y frames (must set user_y and user_x arguments). svd_method : str A character string that indicates how SVD should be calculated during initialization. Possible values are "GramSVD" for distributed computation of the Gram matrix followed by a local SVD using the JAMA package, "Power" for computation of the SVD using the power iteration method, "Randomized" for approximate SVD by projecting onto a random subspace. user_x : H2OFrame, optional An H2OFrame object specifying the initial X matrix. Only used when init = "User". user_y : H2OFrame, optional An H2OFrame object specifying the initial Y matrix. Only used when init = "User". recover_svd : bool A logical value indicating whether the singular values and eigenvectors should be recovered during post-processing of the generalized low rank decomposition. Returns ------- A new H2OGeneralizedLowRankEstimator instance. """ def __init__(self,k=None, max_iterations=None, transform=None, seed=None, ignore_const_cols=None,loss=None, multi_loss=None, loss_by_col=None, loss_by_col_idx=None, regularization_x=None, regularization_y=None, gamma_x=None, gamma_y=None, init_step_size=None, min_step_size=None, init=None, svd_method=None, user_x=None, user_y=None, recover_svd=None): super(H2OGeneralizedLowRankEstimator, self).__init__() self._parms = locals() self._parms = {k:v for k,v in self._parms.items() if k!="self"} self._parms['_rest_version']=3 @property def max_iterations(self): return self._parms["max_iterations"] @max_iterations.setter def max_iterations(self, value): self._parms["max_iterations"] = value @property def transform(self): return self._parms["transform"] @transform.setter def transform(self, value): self._parms["transform"] = value @property def seed(self): return self._parms["seed"] @seed.setter def seed(self, value): self._parms["seed"] = value @property def ignore_const_cols(self): return self._parms["ignore_const_cols"] @ignore_const_cols.setter def ignore_const_cols(self, value): self._parms["ignore_const_cols"] = value @property def loss(self): return self._parms["loss"] @loss.setter def loss(self, value): self._parms["loss"] = value @property def multi_loss(self): return self._parms["multi_loss"] @multi_loss.setter def multi_loss(self, value): self._parms["multi_loss"] = value @property def loss_by_col(self): return self._parms["loss_by_col"] @loss_by_col.setter def loss_by_col(self, value): self._parms["loss_by_col"] = value @property def loss_by_col_idx(self): return self._parms["loss_by_col_idx"] @loss_by_col_idx.setter def loss_by_col_idx(self, value): self._parms["loss_by_col_idx"] = value @property def regularization_x(self): return self._parms["regularization_x"] @regularization_x.setter def regularization_x(self, value): self._parms["regularization_x"] = value @property def regularization_y(self): return self._parms["regularization_y"] @regularization_y.setter def regularization_y(self, value): self._parms["regularization_y"] = value @property def gamma_x(self): return self._parms["gamma_x"] @gamma_x.setter def gamma_x(self, value): self._parms["gamma_x"] = value @property def gamma_y(self): return self._parms["gamma_y"] @gamma_y.setter def gamma_y(self, value): self._parms["gamma_y"] = value @property def init_step_size(self): return self._parms["init_step_size"] @init_step_size.setter def init_step_size(self, value): self._parms["init_step_size"] = value @property def min_step_size(self): return self._parms["min_step_size"] @min_step_size.setter def min_step_size(self, value): self._parms["min_step_size"] = value @property def init(self): return self._parms["init"] @init.setter def init(self, value): self._parms["init"] = value @property def svd_method(self): return self._parms["svd_method"] @svd_method.setter def svd_method(self, value): self._parms["svd_method"] = value @property def user_x(self): return self._parms["user_x"] @user_x.setter def user_x(self, value): self._parms["user_x"] = value @property def user_y(self): return self._parms["user_y"] @user_y.setter def user_y(self, value): self._parms["user_y"] = value @property def recover_svd(self): return self._parms["recover_svd"] @recover_svd.setter def recover_svd(self, value): self._parms["recover_svd"] = value