Source code for h2o.estimators.glrm

from .estimator_base import H2OEstimator


[docs]class H2OGeneralizedLowRankEstimator(H2OEstimator):
  """Builds a generalized low rank model of a H2O dataset.

  Parameters
  ----------
    k : int
      The rank of the resulting decomposition. This must be between 1 and the number of
      columns in the training frame inclusive.

    max_iterations : int
      The maximum number of iterations to run the optimization loop. Each iteration
      consists of an update of the X matrix, followed by an update of the Y matrix.

    transform : str
      A character string that indicates how the training data should be transformed
      before running GLRM. Possible values are
      "NONE" for no transformation,
      "DEMEAN" for subtracting the mean of each column,
      "DESCALE" for dividing by the standard deviation of each column,
      "STANDARDIZE" for demeaning and descaling, and
      "NORMALIZE" for demeaning and dividing each column by its range (max - min).

    seed : int, optional
      Random seed used to initialize the X and Y matrices.

    ignore_const_cols : bool, optional
      A logical value indicating whether to ignore constant columns in the training
      frame. A column is constant if all of its non-missing values are the same value.

    loss : str
      A character string indicating the default loss function for numeric columns.
      Possible values are
      "Quadratic" (default), "Absolute", "Huber", "Poisson", "Hinge", and "Logistic".

    multi_loss : str
      A character string indicating the default loss function for enum columns. Possible
      values are "Categorical" and "Ordinal".

    loss_by_col : str, optional
      A list of strings indicating the loss function for specific columns by
      corresponding index in loss_by_col_idx. Will override loss for numeric columns
      and multi_loss for enum columns.

    loss_by_col_idx : str, optional
      A list of column indices to which the corresponding loss functions in loss_by_col
      are assigned. Must be zero indexed.

    regularization_x : str
      A character string indicating the regularization function for the X matrix.
      Possible values are
      "None" (default), "Quadratic", "L2", "L1", "NonNegative", "OneSparse",
      "UnitOneSparse", and "Simplex".

    regularization_y : str
      A character string indicating the regularization function for the Y matrix.
      Possible values are
      "None" (default), "Quadratic", "L2", "L1", "NonNegative", "OneSparse",
      "UnitOneSparse", and "Simplex".

    gamma_x : float
      The weight on the X matrix regularization term.

    gamma_y : float
      The weight on the Y matrix regularization term.

    init_step_size : float
      Initial step size. Divided by number of columns in the training frame when
      calculating the proximal gradient update. The algorithm begins at init_step_size
      and decreases the step size at each iteration until a termination condition is
      reached.

    min_step_size : float
      Minimum step size upon which the algorithm is terminated.

    init : str
      A character string indicating how to select the initial X and Y matrices.
      Possible values are
      "Random" for initialization to a random array from the standard normal
      distribution,
      "PlusPlus" for initialization using the clusters from k-means++ initialization,
      "SVD" for initialization using the first k (approximate) right singular vectors,
      "User" for user-specified initial X and Y frames (must set user_y and
      user_x arguments).

    svd_method : str
      A character string that indicates how SVD should be calculated during
      initialization. Possible values are
      "GramSVD" for distributed computation of the Gram matrix followed by a local SVD
      using the JAMA package,
      "Power" for computation of the SVD using the power iteration method,
      "Randomized" for approximate SVD by projecting onto a random subspace.

    user_x : H2OFrame, optional
      An H2OFrame object specifying the initial X matrix. Only used when init = "User".

    user_y : H2OFrame, optional
      An H2OFrame object specifying the initial Y matrix. Only used when init = "User".

    recover_svd : bool
      A logical value indicating whether the singular values and eigenvectors should be
      recovered during post-processing of the generalized low rank decomposition.

  Returns
  -------
    A new H2OGeneralizedLowRankEstimator instance.
  """
  def __init__(self,k=None, max_iterations=None, transform=None, seed=None,
               ignore_const_cols=None,loss=None, multi_loss=None, loss_by_col=None,
               loss_by_col_idx=None, regularization_x=None, regularization_y=None,
               gamma_x=None, gamma_y=None, init_step_size=None, min_step_size=None,
               init=None, svd_method=None, user_x=None, user_y=None, recover_svd=None):
    super(H2OGeneralizedLowRankEstimator, self).__init__()
    self._parms = locals()
    self._parms = {k:v for k,v in self._parms.items() if k!="self"}
    self._parms['_rest_version']=3

  @property
  def max_iterations(self):
    return self._parms["max_iterations"]

  @max_iterations.setter
  def max_iterations(self, value):
    self._parms["max_iterations"] = value

  @property
  def transform(self):
    return self._parms["transform"]

  @transform.setter
  def transform(self, value):
    self._parms["transform"] = value

  @property
  def seed(self):
    return self._parms["seed"]

  @seed.setter
  def seed(self, value):
    self._parms["seed"] = value

  @property
  def ignore_const_cols(self):
    return self._parms["ignore_const_cols"]

  @ignore_const_cols.setter
  def ignore_const_cols(self, value):
    self._parms["ignore_const_cols"] = value

  @property
  def loss(self):
    return self._parms["loss"]

  @loss.setter
  def loss(self, value):
    self._parms["loss"] = value

  @property
  def multi_loss(self):
    return self._parms["multi_loss"]

  @multi_loss.setter
  def multi_loss(self, value):
    self._parms["multi_loss"] = value

  @property
  def loss_by_col(self):
    return self._parms["loss_by_col"]

  @loss_by_col.setter
  def loss_by_col(self, value):
    self._parms["loss_by_col"] = value

  @property
  def loss_by_col_idx(self):
    return self._parms["loss_by_col_idx"]

  @loss_by_col_idx.setter
  def loss_by_col_idx(self, value):
    self._parms["loss_by_col_idx"] = value

  @property
  def regularization_x(self):
    return self._parms["regularization_x"]

  @regularization_x.setter
  def regularization_x(self, value):
    self._parms["regularization_x"] = value

  @property
  def regularization_y(self):
    return self._parms["regularization_y"]

  @regularization_y.setter
  def regularization_y(self, value):
    self._parms["regularization_y"] = value

  @property
  def gamma_x(self):
    return self._parms["gamma_x"]

  @gamma_x.setter
  def gamma_x(self, value):
    self._parms["gamma_x"] = value

  @property
  def gamma_y(self):
    return self._parms["gamma_y"]

  @gamma_y.setter
  def gamma_y(self, value):
    self._parms["gamma_y"] = value

  @property
  def init_step_size(self):
    return self._parms["init_step_size"]

  @init_step_size.setter
  def init_step_size(self, value):
    self._parms["init_step_size"] = value

  @property
  def min_step_size(self):
    return self._parms["min_step_size"]

  @min_step_size.setter
  def min_step_size(self, value):
    self._parms["min_step_size"] = value

  @property
  def init(self):
    return self._parms["init"]

  @init.setter
  def init(self, value):
    self._parms["init"] = value

  @property
  def svd_method(self):
    return self._parms["svd_method"]

  @svd_method.setter
  def svd_method(self, value):
    self._parms["svd_method"] = value

  @property
  def user_x(self):
    return self._parms["user_x"]

  @user_x.setter
  def user_x(self, value):
    self._parms["user_x"] = value

  @property
  def user_y(self):
    return self._parms["user_y"]

  @user_y.setter
  def user_y(self, value):
    self._parms["user_y"] = value

  @property
  def recover_svd(self):
    return self._parms["recover_svd"]

  @recover_svd.setter
  def recover_svd(self, value):
    self._parms["recover_svd"] = value