Source code for h2o.estimators.glrm

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
#
# This file is auto-generated by h2o-3/h2o-bindings/bin/gen_python.py
# Copyright 2016 H2O.ai;  Apache License Version 2.0 (see LICENSE for details)
#
from __future__ import absolute_import, division, print_function, unicode_literals

from h2o.base import Keyed
from h2o.frame import H2OFrame
from h2o.expr import ExprNode
from h2o.expr import ASTId
from h2o.estimators.estimator_base import H2OEstimator
from h2o.exceptions import H2OValueError
from h2o.frame import H2OFrame
from h2o.utils.typechecks import assert_is_type, Enum, numeric


[docs]class H2OGeneralizedLowRankEstimator(H2OEstimator): """ Generalized Low Rank Modeling Builds a generalized low rank model of a H2O dataset. """ algo = "glrm" supervised_learning = False def __init__(self, model_id=None, # type: Optional[Union[None, str, H2OEstimator]] training_frame=None, # type: Optional[Union[None, str, H2OFrame]] validation_frame=None, # type: Optional[Union[None, str, H2OFrame]] ignored_columns=None, # type: Optional[List[str]] ignore_const_cols=True, # type: bool score_each_iteration=False, # type: bool representation_name=None, # type: Optional[str] loading_name=None, # type: Optional[str] transform="none", # type: Literal["none", "standardize", "normalize", "demean", "descale"] k=1, # type: int loss="quadratic", # type: Literal["quadratic", "absolute", "huber", "poisson", "hinge", "logistic", "periodic"] loss_by_col=None, # type: Optional[List[Literal["quadratic", "absolute", "huber", "poisson", "hinge", "logistic", "periodic", "categorical", "ordinal"]]] loss_by_col_idx=None, # type: Optional[List[int]] multi_loss="categorical", # type: Literal["categorical", "ordinal"] period=1, # type: int regularization_x="none", # type: Literal["none", "quadratic", "l2", "l1", "non_negative", "one_sparse", "unit_one_sparse", "simplex"] regularization_y="none", # type: Literal["none", "quadratic", "l2", "l1", "non_negative", "one_sparse", "unit_one_sparse", "simplex"] gamma_x=0.0, # type: float gamma_y=0.0, # type: float max_iterations=1000, # type: int max_updates=2000, # type: int init_step_size=1.0, # type: float min_step_size=0.0001, # type: float seed=-1, # type: int init="plus_plus", # type: Literal["random", "svd", "plus_plus", "user"] svd_method="randomized", # type: Literal["gram_s_v_d", "power", "randomized"] user_y=None, # type: Optional[Union[None, str, H2OFrame]] user_x=None, # type: Optional[Union[None, str, H2OFrame]] expand_user_y=True, # type: bool impute_original=False, # type: bool recover_svd=False, # type: bool max_runtime_secs=0.0, # type: float export_checkpoints_dir=None, # type: Optional[str] ): """ :param model_id: Destination id for this model; auto-generated if not specified. Defaults to ``None``. :type model_id: Union[None, str, H2OEstimator], optional :param training_frame: Id of the training data frame. Defaults to ``None``. :type training_frame: Union[None, str, H2OFrame], optional :param validation_frame: Id of the validation data frame. Defaults to ``None``. :type validation_frame: Union[None, str, H2OFrame], optional :param ignored_columns: Names of columns to ignore for training. Defaults to ``None``. :type ignored_columns: List[str], optional :param ignore_const_cols: Ignore constant columns. Defaults to ``True``. :type ignore_const_cols: bool :param score_each_iteration: Whether to score during each iteration of model training. Defaults to ``False``. :type score_each_iteration: bool :param representation_name: Frame key to save resulting X Defaults to ``None``. :type representation_name: str, optional :param loading_name: [Deprecated] Use representation_name instead. Frame key to save resulting X. Defaults to ``None``. :type loading_name: str, optional :param transform: Transformation of training data Defaults to ``"none"``. :type transform: Literal["none", "standardize", "normalize", "demean", "descale"] :param k: Rank of matrix approximation Defaults to ``1``. :type k: int :param loss: Numeric loss function Defaults to ``"quadratic"``. :type loss: Literal["quadratic", "absolute", "huber", "poisson", "hinge", "logistic", "periodic"] :param loss_by_col: Loss function by column (override) Defaults to ``None``. :type loss_by_col: List[Literal["quadratic", "absolute", "huber", "poisson", "hinge", "logistic", "periodic", "categorical", "ordinal"]], optional :param loss_by_col_idx: Loss function by column index (override) Defaults to ``None``. :type loss_by_col_idx: List[int], optional :param multi_loss: Categorical loss function Defaults to ``"categorical"``. :type multi_loss: Literal["categorical", "ordinal"] :param period: Length of period (only used with periodic loss function) Defaults to ``1``. :type period: int :param regularization_x: Regularization function for X matrix Defaults to ``"none"``. :type regularization_x: Literal["none", "quadratic", "l2", "l1", "non_negative", "one_sparse", "unit_one_sparse", "simplex"] :param regularization_y: Regularization function for Y matrix Defaults to ``"none"``. :type regularization_y: Literal["none", "quadratic", "l2", "l1", "non_negative", "one_sparse", "unit_one_sparse", "simplex"] :param gamma_x: Regularization weight on X matrix Defaults to ``0.0``. :type gamma_x: float :param gamma_y: Regularization weight on Y matrix Defaults to ``0.0``. :type gamma_y: float :param max_iterations: Maximum number of iterations Defaults to ``1000``. :type max_iterations: int :param max_updates: Maximum number of updates, defaults to 2*max_iterations Defaults to ``2000``. :type max_updates: int :param init_step_size: Initial step size Defaults to ``1.0``. :type init_step_size: float :param min_step_size: Minimum step size Defaults to ``0.0001``. :type min_step_size: float :param seed: RNG seed for initialization Defaults to ``-1``. :type seed: int :param init: Initialization mode Defaults to ``"plus_plus"``. :type init: Literal["random", "svd", "plus_plus", "user"] :param svd_method: Method for computing SVD during initialization (Caution: Randomized is currently experimental and unstable) Defaults to ``"randomized"``. :type svd_method: Literal["gram_s_v_d", "power", "randomized"] :param user_y: User-specified initial Y Defaults to ``None``. :type user_y: Union[None, str, H2OFrame], optional :param user_x: User-specified initial X Defaults to ``None``. :type user_x: Union[None, str, H2OFrame], optional :param expand_user_y: Expand categorical columns in user-specified initial Y Defaults to ``True``. :type expand_user_y: bool :param impute_original: Reconstruct original training data by reversing transform Defaults to ``False``. :type impute_original: bool :param recover_svd: Recover singular values and eigenvectors of XY Defaults to ``False``. :type recover_svd: bool :param max_runtime_secs: Maximum allowed runtime in seconds for model training. Use 0 to disable. Defaults to ``0.0``. :type max_runtime_secs: float :param export_checkpoints_dir: Automatically export generated models to this directory. Defaults to ``None``. :type export_checkpoints_dir: str, optional """ super(H2OGeneralizedLowRankEstimator, self).__init__() self._parms = {} self._id = self._parms['model_id'] = model_id self.training_frame = training_frame self.validation_frame = validation_frame self.ignored_columns = ignored_columns self.ignore_const_cols = ignore_const_cols self.score_each_iteration = score_each_iteration self.representation_name = representation_name self.loading_name = loading_name self.transform = transform self.k = k self.loss = loss self.loss_by_col = loss_by_col self.loss_by_col_idx = loss_by_col_idx self.multi_loss = multi_loss self.period = period self.regularization_x = regularization_x self.regularization_y = regularization_y self.gamma_x = gamma_x self.gamma_y = gamma_y self.max_iterations = max_iterations self.max_updates = max_updates self.init_step_size = init_step_size self.min_step_size = min_step_size self.seed = seed self.init = init self.svd_method = svd_method self.user_y = user_y self.user_x = user_x self.expand_user_y = expand_user_y self.impute_original = impute_original self.recover_svd = recover_svd self.max_runtime_secs = max_runtime_secs self.export_checkpoints_dir = export_checkpoints_dir @property def training_frame(self): """ Id of the training data frame. Type: ``Union[None, str, H2OFrame]``. :examples: >>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate_cat.csv") >>> prostate[0] = prostate[0].asnumeric() >>> prostate[4] = prostate[4].asnumeric() >>> pros_glrm = H2OGeneralizedLowRankEstimator(k=5, ... seed=1234) >>> pros_glrm.train(x=prostate.names, training_frame=prostate) >>> pros_glrm.show() """ return self._parms.get("training_frame") @training_frame.setter def training_frame(self, training_frame): self._parms["training_frame"] = H2OFrame._validate(training_frame, 'training_frame') @property def validation_frame(self): """ Id of the validation data frame. Type: ``Union[None, str, H2OFrame]``. :examples: >>> iris = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/iris/iris_wheader.csv") >>> iris_glrm = H2OGeneralizedLowRankEstimator(k=3, ... loss="quadratic", ... gamma_x=0.5, ... gamma_y=0.5, ... transform="standardize") >>> iris_glrm.train(x=iris.names, ... training_frame=iris, ... validation_frame=iris) >>> iris_glrm.show() """ return self._parms.get("validation_frame") @validation_frame.setter def validation_frame(self, validation_frame): self._parms["validation_frame"] = H2OFrame._validate(validation_frame, 'validation_frame') @property def ignored_columns(self): """ Names of columns to ignore for training. Type: ``List[str]``. """ return self._parms.get("ignored_columns") @ignored_columns.setter def ignored_columns(self, ignored_columns): assert_is_type(ignored_columns, None, [str]) self._parms["ignored_columns"] = ignored_columns @property def ignore_const_cols(self): """ Ignore constant columns. Type: ``bool``, defaults to ``True``. :examples: >>> iris = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/iris/iris_wheader.csv") >>> iris_glrm = H2OGeneralizedLowRankEstimator(k=3, ... ignore_const_cols=False, ... seed=1234) >>> iris_glrm.train(x=iris.names, training_frame=iris) >>> iris_glrm.show() """ return self._parms.get("ignore_const_cols") @ignore_const_cols.setter def ignore_const_cols(self, ignore_const_cols): assert_is_type(ignore_const_cols, None, bool) self._parms["ignore_const_cols"] = ignore_const_cols @property def score_each_iteration(self): """ Whether to score during each iteration of model training. Type: ``bool``, defaults to ``False``. :examples: >>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate_cat.csv") >>> prostate[0] = prostate[0].asnumeric() >>> prostate[4] = prostate[4].asnumeric() >>> loss_all = ["Hinge", "Quadratic", "Categorical", "Categorical", ... "Hinge", "Quadratic", "Quadratic", "Quadratic"] >>> pros_glrm = H2OGeneralizedLowRankEstimator(k=5, ... loss_by_col=loss_all, ... score_each_iteration=True, ... transform="standardize", ... seed=12345) >>> pros_glrm.train(x=prostate.names, training_frame=prostate) >>> pros_glrm.show() """ return self._parms.get("score_each_iteration") @score_each_iteration.setter def score_each_iteration(self, score_each_iteration): assert_is_type(score_each_iteration, None, bool) self._parms["score_each_iteration"] = score_each_iteration @property def representation_name(self): """ Frame key to save resulting X Type: ``str``. :examples: >>> acs = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/bigdata/laptop/census/ACS_13_5YR_DP02_cleaned.zip") >>> acs_fill = acs.drop("ZCTA5") >>> acs_glrm = H2OGeneralizedLowRankEstimator(k=10, ... transform="standardize", ... loss="quadratic", ... regularization_x="quadratic", ... regularization_y="L1", ... gamma_x=0.25, ... gamma_y=0.5, ... max_iterations=1, ... representation_name="acs_full") >>> acs_glrm.train(x=acs_fill.names, training_frame=acs) >>> acs_glrm.loading_name >>> acs_glrm.show() """ return self._parms.get("representation_name") @representation_name.setter def representation_name(self, representation_name): assert_is_type(representation_name, None, str) self._parms["representation_name"] = representation_name @property def loading_name(self): """ [Deprecated] Use representation_name instead. Frame key to save resulting X. Type: ``str``. :examples: >>> # loading_name will be deprecated. Use representation_name instead. >>> acs = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/bigdata/laptop/census/ACS_13_5YR_DP02_cleaned.zip") >>> acs_fill = acs.drop("ZCTA5") >>> acs_glrm = H2OGeneralizedLowRankEstimator(k=10, ... transform="standardize", ... loss="quadratic", ... regularization_x="quadratic", ... regularization_y="L1", ... gamma_x=0.25, ... gamma_y=0.5, ... max_iterations=1, ... loading_name="acs_full") >>> acs_glrm.train(x=acs_fill.names, training_frame=acs) >>> acs_glrm.loading_name >>> acs_glrm.show() """ return self._parms.get("loading_name") @loading_name.setter def loading_name(self, loading_name): assert_is_type(loading_name, None, str) self._parms["loading_name"] = loading_name @property def transform(self): """ Transformation of training data Type: ``Literal["none", "standardize", "normalize", "demean", "descale"]``, defaults to ``"none"``. :examples: >>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate_cat.csv") >>> prostate[0] = prostate[0].asnumeric() >>> prostate[4] = prostate[4].asnumeric() >>> pros_glrm = H2OGeneralizedLowRankEstimator(k=5, ... score_each_iteration=True, ... transform="standardize", ... seed=12345) >>> pros_glrm.train(x=prostate.names, training_frame=prostate) >>> pros_glrm.show() """ return self._parms.get("transform") @transform.setter def transform(self, transform): assert_is_type(transform, None, Enum("none", "standardize", "normalize", "demean", "descale")) self._parms["transform"] = transform @property def k(self): """ Rank of matrix approximation Type: ``int``, defaults to ``1``. :examples: >>> iris = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/iris/iris_wheader.csv") >>> iris_glrm = H2OGeneralizedLowRankEstimator(k=3) >>> iris_glrm.train(x=iris.names, training_frame=iris) >>> iris_glrm.show() """ return self._parms.get("k") @k.setter def k(self, k): assert_is_type(k, None, int) self._parms["k"] = k @property def loss(self): """ Numeric loss function Type: ``Literal["quadratic", "absolute", "huber", "poisson", "hinge", "logistic", "periodic"]``, defaults to ``"quadratic"``. :examples: >>> acs = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/bigdata/laptop/census/ACS_13_5YR_DP02_cleaned.zip") >>> acs_fill = acs.drop("ZCTA5") >>> acs_glrm = H2OGeneralizedLowRankEstimator(k=10, ... transform="standardize", ... loss="absolute", ... regularization_x="quadratic", ... regularization_y="L1", ... gamma_x=0.25, ... gamma_y=0.5, ... max_iterations=700) >>> acs_glrm.train(x=acs_fill.names, training_frame=acs) >>> acs_glrm.show() """ return self._parms.get("loss") @loss.setter def loss(self, loss): assert_is_type(loss, None, Enum("quadratic", "absolute", "huber", "poisson", "hinge", "logistic", "periodic")) self._parms["loss"] = loss @property def loss_by_col(self): """ Loss function by column (override) Type: ``List[Literal["quadratic", "absolute", "huber", "poisson", "hinge", "logistic", "periodic", "categorical", "ordinal"]]``. :examples: >>> arrestsH2O = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/pca_test/USArrests.csv") >>> arrests_glrm = H2OGeneralizedLowRankEstimator(k=3, ... loss="quadratic", ... loss_by_col=["absolute","huber"], ... loss_by_col_idx=[0,3], ... regularization_x="quadratic", ... regularization_y="l1") >>> arrests_glrm.train(x=arrestsH2O.names, training_frame=arrestsH2O) >>> arrests_glrm.show() """ return self._parms.get("loss_by_col") @loss_by_col.setter def loss_by_col(self, loss_by_col): assert_is_type(loss_by_col, None, [Enum("quadratic", "absolute", "huber", "poisson", "hinge", "logistic", "periodic", "categorical", "ordinal")]) self._parms["loss_by_col"] = loss_by_col @property def loss_by_col_idx(self): """ Loss function by column index (override) Type: ``List[int]``. :examples: >>> arrestsH2O = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/pca_test/USArrests.csv") >>> arrests_glrm = H2OGeneralizedLowRankEstimator(k=3, ... loss="quadratic", ... loss_by_col=["absolute","huber"], ... loss_by_col_idx=[0,3], ... regularization_x="quadratic", ... regularization_y="l1") >>> arrests_glrm.train(x=arrestsH2O.names, training_frame=arrestsH2O) >>> arrests_glrm.show() """ return self._parms.get("loss_by_col_idx") @loss_by_col_idx.setter def loss_by_col_idx(self, loss_by_col_idx): assert_is_type(loss_by_col_idx, None, [int]) self._parms["loss_by_col_idx"] = loss_by_col_idx @property def multi_loss(self): """ Categorical loss function Type: ``Literal["categorical", "ordinal"]``, defaults to ``"categorical"``. :examples: >>> arrestsH2O = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/pca_test/USArrests.csv") >>> arrests_glrm = H2OGeneralizedLowRankEstimator(k=3, ... loss="quadratic", ... loss_by_col=["absolute","huber"], ... loss_by_col_idx=[0,3], ... regularization_x="quadratic", ... regularization_y="l1" ... multi_loss="ordinal") >>> arrests_glrm.train(x=arrestsH2O.names, training_frame=arrestsH2O) >>> arrests_glrm.show() """ return self._parms.get("multi_loss") @multi_loss.setter def multi_loss(self, multi_loss): assert_is_type(multi_loss, None, Enum("categorical", "ordinal")) self._parms["multi_loss"] = multi_loss @property def period(self): """ Length of period (only used with periodic loss function) Type: ``int``, defaults to ``1``. :examples: >>> arrestsH2O = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/pca_test/USArrests.csv") >>> arrests_glrm = H2OGeneralizedLowRankEstimator(k=3, ... max_runtime_secs=15, ... max_iterations=500, ... max_updates=900, ... min_step_size=0.005, ... period=5) >>> arrests_glrm.train(x=arrestsH2O.names, training_frame=arrestsH2O) >>> arrests_glrm.show() """ return self._parms.get("period") @period.setter def period(self, period): assert_is_type(period, None, int) self._parms["period"] = period @property def regularization_x(self): """ Regularization function for X matrix Type: ``Literal["none", "quadratic", "l2", "l1", "non_negative", "one_sparse", "unit_one_sparse", "simplex"]``, defaults to ``"none"``. :examples: >>> arrestsH2O = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/pca_test/USArrests.csv") >>> arrests_glrm = H2OGeneralizedLowRankEstimator(k=3, ... loss="quadratic", ... loss_by_col=["absolute","huber"], ... loss_by_col_idx=[0,3], ... regularization_x="quadratic", ... regularization_y="l1") >>> arrests_glrm.train(x=arrestsH2O.names, training_frame=arrestsH2O) >>> arrests_glrm.show() """ return self._parms.get("regularization_x") @regularization_x.setter def regularization_x(self, regularization_x): assert_is_type(regularization_x, None, Enum("none", "quadratic", "l2", "l1", "non_negative", "one_sparse", "unit_one_sparse", "simplex")) self._parms["regularization_x"] = regularization_x @property def regularization_y(self): """ Regularization function for Y matrix Type: ``Literal["none", "quadratic", "l2", "l1", "non_negative", "one_sparse", "unit_one_sparse", "simplex"]``, defaults to ``"none"``. :examples: >>> arrestsH2O = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/pca_test/USArrests.csv") >>> arrests_glrm = H2OGeneralizedLowRankEstimator(k=3, ... loss="quadratic", ... loss_by_col=["absolute","huber"], ... loss_by_col_idx=[0,3], ... regularization_x="quadratic", ... regularization_y="l1") >>> arrests_glrm.train(x=arrestsH2O.names, training_frame=arrestsH2O) >>> arrests_glrm.show() """ return self._parms.get("regularization_y") @regularization_y.setter def regularization_y(self, regularization_y): assert_is_type(regularization_y, None, Enum("none", "quadratic", "l2", "l1", "non_negative", "one_sparse", "unit_one_sparse", "simplex")) self._parms["regularization_y"] = regularization_y @property def gamma_x(self): """ Regularization weight on X matrix Type: ``float``, defaults to ``0.0``. :examples: >>> iris = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/iris/iris_wheader.csv") >>> rank = 3 >>> gx = 0.5 >>> gy = 0.5 >>> trans = "standardize" >>> iris_glrm = H2OGeneralizedLowRankEstimator(k=rank, ... loss="Quadratic", ... gamma_x=gx, ... gamma_y=gy, ... transform=trans) >>> iris_glrm.train(x=iris.names, training_frame=iris) >>> iris_glrm.show() """ return self._parms.get("gamma_x") @gamma_x.setter def gamma_x(self, gamma_x): assert_is_type(gamma_x, None, numeric) self._parms["gamma_x"] = gamma_x @property def gamma_y(self): """ Regularization weight on Y matrix Type: ``float``, defaults to ``0.0``. :examples: >>> iris = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/iris/iris_wheader.csv") >>> rank = 3 >>> gx = 0.5 >>> gy = 0.5 >>> trans = "standardize" >>> iris_glrm = H2OGeneralizedLowRankEstimator(k=rank, ... loss="Quadratic", ... gamma_x=gx, ... gamma_y=gy, ... transform=trans) >>> iris_glrm.train(x=iris.names, training_frame=iris) >>> iris_glrm.show() """ return self._parms.get("gamma_y") @gamma_y.setter def gamma_y(self, gamma_y): assert_is_type(gamma_y, None, numeric) self._parms["gamma_y"] = gamma_y @property def max_iterations(self): """ Maximum number of iterations Type: ``int``, defaults to ``1000``. :examples: >>> acs = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/bigdata/laptop/census/ACS_13_5YR_DP02_cleaned.zip") >>> acs_fill = acs.drop("ZCTA5") >>> acs_glrm = H2OGeneralizedLowRankEstimator(k=10, ... transform="standardize", ... loss="quadratic", ... regularization_x="quadratic", ... regularization_y="L1", ... gamma_x=0.25, ... gamma_y=0.5, ... max_iterations=700) >>> acs_glrm.train(x=acs_fill.names, training_frame=acs) >>> acs_glrm.show() """ return self._parms.get("max_iterations") @max_iterations.setter def max_iterations(self, max_iterations): assert_is_type(max_iterations, None, int) self._parms["max_iterations"] = max_iterations @property def max_updates(self): """ Maximum number of updates, defaults to 2*max_iterations Type: ``int``, defaults to ``2000``. :examples: >>> arrestsH2O = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/pca_test/USArrests.csv") >>> arrests_glrm = H2OGeneralizedLowRankEstimator(k=3, ... max_runtime_secs=15, ... max_iterations=500, ... max_updates=900, ... min_step_size=0.005) >>> arrests_glrm.train(x=arrestsH2O.names, training_frame=arrestsH2O) >>> arrests_glrm.show() """ return self._parms.get("max_updates") @max_updates.setter def max_updates(self, max_updates): assert_is_type(max_updates, None, int) self._parms["max_updates"] = max_updates @property def init_step_size(self): """ Initial step size Type: ``float``, defaults to ``1.0``. :examples: >>> iris = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/iris/iris_wheader.csv") >>> iris_glrm = H2OGeneralizedLowRankEstimator(k=3, ... init_step_size=2.5, ... seed=1234) >>> iris_glrm.train(x=iris.names, training_frame=iris) >>> iris_glrm.show() """ return self._parms.get("init_step_size") @init_step_size.setter def init_step_size(self, init_step_size): assert_is_type(init_step_size, None, numeric) self._parms["init_step_size"] = init_step_size @property def min_step_size(self): """ Minimum step size Type: ``float``, defaults to ``0.0001``. :examples: >>> arrestsH2O = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/pca_test/USArrests.csv") >>> arrests_glrm = H2OGeneralizedLowRankEstimator(k=3, ... max_runtime_secs=15, ... max_iterations=500, ... max_updates=900, ... min_step_size=0.005) >>> arrests_glrm.train(x=arrestsH2O.names, training_frame=arrestsH2O) >>> arrests_glrm.show() """ return self._parms.get("min_step_size") @min_step_size.setter def min_step_size(self, min_step_size): assert_is_type(min_step_size, None, numeric) self._parms["min_step_size"] = min_step_size @property def seed(self): """ RNG seed for initialization Type: ``int``, defaults to ``-1``. :examples: >>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate_cat.csv") >>> prostate[0] = prostate[0].asnumeric() >>> prostate[4] = prostate[4].asnumeric() >>> glrm_w_seed = H2OGeneralizedLowRankEstimator(k=5, seed=12345) >>> glrm_w_seed.train(x=prostate.names, training_frame=prostate) >>> glrm_wo_seed = H2OGeneralizedLowRankEstimator(k=5, >>> glrm_wo_seed.train(x=prostate.names, training_frame=prostate) >>> glrm_w_seed.show() >>> glrm_wo_seed.show() """ return self._parms.get("seed") @seed.setter def seed(self, seed): assert_is_type(seed, None, int) self._parms["seed"] = seed @property def init(self): """ Initialization mode Type: ``Literal["random", "svd", "plus_plus", "user"]``, defaults to ``"plus_plus"``. :examples: >>> iris = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/iris/iris_wheader.csv") >>> iris_glrm = H2OGeneralizedLowRankEstimator(k=3, ... init="svd", ... seed=1234) >>> iris_glrm.train(x=iris.names, training_frame=iris) >>> iris_glrm.show() """ return self._parms.get("init") @init.setter def init(self, init): assert_is_type(init, None, Enum("random", "svd", "plus_plus", "user")) self._parms["init"] = init @property def svd_method(self): """ Method for computing SVD during initialization (Caution: Randomized is currently experimental and unstable) Type: ``Literal["gram_s_v_d", "power", "randomized"]``, defaults to ``"randomized"``. :examples: >>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate_cat.csv") >>> prostate[0] = prostate[0].asnumeric() >>> prostate[4] = prostate[4].asnumeric() >>> pros_glrm = H2OGeneralizedLowRankEstimator(k=5, ... svd_method="power", ... seed=1234) >>> pros_glrm.train(x=prostate.names, training_frame=prostate) >>> pros_glrm.show() """ return self._parms.get("svd_method") @svd_method.setter def svd_method(self, svd_method): assert_is_type(svd_method, None, Enum("gram_s_v_d", "power", "randomized")) self._parms["svd_method"] = svd_method @property def user_y(self): """ User-specified initial Y Type: ``Union[None, str, H2OFrame]``. :examples: >>> arrestsH2O = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/pca_test/USArrests.csv") >>> initial_y = [[5.412, 65.24, -7.54, -0.032], ... [2.212, 92.24, -17.54, 23.268], ... [0.312, 123.24, 14.46, 9.768], ... [1.012, 19.24, -15.54, -1.732]] >>> initial_y_h2o = h2o.H2OFrame(list(zip(*initial_y))) >>> arrests_glrm = H2OGeneralizedLowRankEstimator(k=4, ... transform="demean", ... loss="quadratic", ... gamma_x=0.5, ... gamma_y=0.3, ... init="user", ... user_y=initial_y_h2o, ... recover_svd=True) >>> arrests_glrm.train(x=arrestsH2O.names, training_frame=arrestsH2O) >>> arrests_glrm.show() """ return self._parms.get("user_y") @user_y.setter def user_y(self, user_y): self._parms["user_y"] = H2OFrame._validate(user_y, 'user_y') @property def user_x(self): """ User-specified initial X Type: ``Union[None, str, H2OFrame]``. :examples: >>> arrestsH2O = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/pca_test/USArrests.csv") >>> initial_x = ([[5.412, 65.24, -7.54, -0.032, 2.212, 92.24, -17.54, 23.268, 0.312, ... 123.24, 14.46, 9.768, 1.012, 19.24, -15.54, -1.732, 5.412, 65.24, ... -7.54, -0.032, 2.212, 92.24, -17.54, 23.268, 0.312, 123.24, 14.46, ... 9.76, 1.012, 19.24, -15.54, -1.732, 5.412, 65.24, -7.54, -0.032, ... 2.212, 92.24, -17.54, 23.268, 0.312, 123.24, 14.46, 9.768, 1.012, ... 19.24, -15.54, -1.732, 5.412, 65.24]]*4) >>> initial_x_h2o = h2o.H2OFrame(list(zip(*initial_x))) >>> arrests_glrm = H2OGeneralizedLowRankEstimator(k=4, ... transform="demean", ... loss="quadratic", ... gamma_x=0.5, ... gamma_y=0.3, ... init="user", ... user_x=initial_x_h2o, ... recover_svd=True) >>> arrests_glrm.train(x=arrestsH2O.names, training_frame=arrestsH2O) >>> arrests_glrm.show() """ return self._parms.get("user_x") @user_x.setter def user_x(self, user_x): self._parms["user_x"] = H2OFrame._validate(user_x, 'user_x') @property def expand_user_y(self): """ Expand categorical columns in user-specified initial Y Type: ``bool``, defaults to ``True``. :examples: >>> iris = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/iris/iris_wheader.csv") >>> rank = 3 >>> gx = 0.5 >>> gy = 0.5 >>> trans = "standardize" >>> iris_glrm = H2OGeneralizedLowRankEstimator(k=rank, ... loss="Quadratic", ... gamma_x=gx, ... gamma_y=gy, ... transform=trans, ... expand_user_y=False) >>> iris_glrm.train(x=iris.names, training_frame=iris) >>> iris_glrm.show() """ return self._parms.get("expand_user_y") @expand_user_y.setter def expand_user_y(self, expand_user_y): assert_is_type(expand_user_y, None, bool) self._parms["expand_user_y"] = expand_user_y @property def impute_original(self): """ Reconstruct original training data by reversing transform Type: ``bool``, defaults to ``False``. :examples: >>> iris = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/iris/iris_wheader.csv") >>> rank = 3 >>> gx = 0.5 >>> gy = 0.5 >>> trans = "standardize" >>> iris_glrm = H2OGeneralizedLowRankEstimator(k=rank, ... loss="Quadratic", ... gamma_x=gx, ... gamma_y=gy, ... transform=trans ... impute_original=True) >>> iris_glrm.train(x=iris.names, training_frame=iris) >>> iris_glrm.show() """ return self._parms.get("impute_original") @impute_original.setter def impute_original(self, impute_original): assert_is_type(impute_original, None, bool) self._parms["impute_original"] = impute_original @property def recover_svd(self): """ Recover singular values and eigenvectors of XY Type: ``bool``, defaults to ``False``. :examples: >>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate_cat.csv") >>> prostate[0] = prostate[0].asnumeric() >>> prostate[4] = prostate[4].asnumeric() >>> loss_all = ["Hinge", "Quadratic", "Categorical", "Categorical", ... "Hinge", "Quadratic", "Quadratic", "Quadratic"] >>> pros_glrm = H2OGeneralizedLowRankEstimator(k=5, ... loss_by_col=loss_all, ... recover_svd=True, ... transform="standardize", ... seed=12345) >>> pros_glrm.train(x=prostate.names, training_frame=prostate) >>> pros_glrm.show() """ return self._parms.get("recover_svd") @recover_svd.setter def recover_svd(self, recover_svd): assert_is_type(recover_svd, None, bool) self._parms["recover_svd"] = recover_svd @property def max_runtime_secs(self): """ Maximum allowed runtime in seconds for model training. Use 0 to disable. Type: ``float``, defaults to ``0.0``. :examples: >>> arrestsH2O = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/pca_test/USArrests.csv") >>> arrests_glrm = H2OGeneralizedLowRankEstimator(k=3, ... max_runtime_secs=15, ... max_iterations=500, ... max_updates=900, ... min_step_size=0.005) >>> arrests_glrm.train(x=arrestsH2O.names, training_frame=arrestsH2O) >>> arrests_glrm.show() """ return self._parms.get("max_runtime_secs") @max_runtime_secs.setter def max_runtime_secs(self, max_runtime_secs): assert_is_type(max_runtime_secs, None, numeric) self._parms["max_runtime_secs"] = max_runtime_secs @property def export_checkpoints_dir(self): """ Automatically export generated models to this directory. Type: ``str``. :examples: >>> import tempfile >>> from os import listdir >>> iris = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/iris/iris_wheader.csv") >>> checkpoints_dir = tempfile.mkdtemp() >>> iris_glrm = H2OGeneralizedLowRankEstimator(k=3, ... export_checkpoints_dir=checkpoints_dir, ... seed=1234) >>> iris_glrm.train(x=iris.names, training_frame=iris) >>> len(listdir(checkpoints_dir)) """ return self._parms.get("export_checkpoints_dir") @export_checkpoints_dir.setter def export_checkpoints_dir(self, export_checkpoints_dir): assert_is_type(export_checkpoints_dir, None, str) self._parms["export_checkpoints_dir"] = export_checkpoints_dir
[docs] def transform_frame(self, fr): """ GLRM performs A=X*Y during training. When a new dataset is given, GLRM will perform Anew = Xnew*Y. When predict is called, Xnew*Y is returned. When transform_frame is called, Xnew is returned instead. :return: an H2OFrame that contains Xnew. """ return H2OFrame._expr(expr=ExprNode("transform", ASTId(self.key), ASTId(fr.key)))._frame(fill_cache=True)