Source code for h2o.estimators.deeplearning

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
#
# This file is auto-generated by h2o-3/h2o-bindings/bin/gen_python.py
# Copyright 2016 H2O.ai;  Apache License Version 2.0 (see LICENSE for details)
#
from __future__ import absolute_import, division, print_function, unicode_literals

from h2o.estimators.estimator_base import H2OEstimator
from h2o.exceptions import H2OValueError
from h2o.frame import H2OFrame
from h2o.utils.typechecks import assert_is_type, Enum, numeric


[docs]class H2ODeepLearningEstimator(H2OEstimator): """ Deep Learning Build a Deep Neural Network model using CPUs Builds a feed-forward multilayer artificial neural network on an H2OFrame :examples: >>> from h2o.estimators.deeplearning import H2ODeepLearningEstimator >>> rows = [[1,2,3,4,0], [2,1,2,4,1], [2,1,4,2,1], ... [0,1,2,34,1], [2,3,4,1,0]] * 50 >>> fr = h2o.H2OFrame(rows) >>> fr[4] = fr[4].asfactor() >>> model = H2ODeepLearningEstimator() >>> model.train(x=range(4), y=4, training_frame=fr) >>> model.logloss() """ algo = "deeplearning" supervised_learning = True _options_ = {'model_extensions': ['h2o.model.extensions.ScoringHistoryDL', 'h2o.model.extensions.VariableImportance'], 'verbose': True} def __init__(self, model_id=None, # type: Optional[Union[None, str, H2OEstimator]] training_frame=None, # type: Optional[Union[None, str, H2OFrame]] validation_frame=None, # type: Optional[Union[None, str, H2OFrame]] nfolds=0, # type: int keep_cross_validation_models=True, # type: bool keep_cross_validation_predictions=False, # type: bool keep_cross_validation_fold_assignment=False, # type: bool fold_assignment="auto", # type: Literal["auto", "random", "modulo", "stratified"] fold_column=None, # type: Optional[str] response_column=None, # type: Optional[str] ignored_columns=None, # type: Optional[List[str]] ignore_const_cols=True, # type: bool score_each_iteration=False, # type: bool weights_column=None, # type: Optional[str] offset_column=None, # type: Optional[str] balance_classes=False, # type: bool class_sampling_factors=None, # type: Optional[List[float]] max_after_balance_size=5.0, # type: float max_confusion_matrix_size=20, # type: int checkpoint=None, # type: Optional[Union[None, str, H2OEstimator]] pretrained_autoencoder=None, # type: Optional[Union[None, str, H2OEstimator]] overwrite_with_best_model=True, # type: bool use_all_factor_levels=True, # type: bool standardize=True, # type: bool activation="rectifier", # type: Literal["tanh", "tanh_with_dropout", "rectifier", "rectifier_with_dropout", "maxout", "maxout_with_dropout"] hidden=[200, 200], # type: List[int] epochs=10.0, # type: float train_samples_per_iteration=-2, # type: int target_ratio_comm_to_comp=0.05, # type: float seed=-1, # type: int adaptive_rate=True, # type: bool rho=0.99, # type: float epsilon=1e-08, # type: float rate=0.005, # type: float rate_annealing=1e-06, # type: float rate_decay=1.0, # type: float momentum_start=0.0, # type: float momentum_ramp=1000000.0, # type: float momentum_stable=0.0, # type: float nesterov_accelerated_gradient=True, # type: bool input_dropout_ratio=0.0, # type: float hidden_dropout_ratios=None, # type: Optional[List[float]] l1=0.0, # type: float l2=0.0, # type: float max_w2=3.4028235e+38, # type: float initial_weight_distribution="uniform_adaptive", # type: Literal["uniform_adaptive", "uniform", "normal"] initial_weight_scale=1.0, # type: float initial_weights=None, # type: Optional[List[Union[None, str, H2OFrame]]] initial_biases=None, # type: Optional[List[Union[None, str, H2OFrame]]] loss="automatic", # type: Literal["automatic", "cross_entropy", "quadratic", "huber", "absolute", "quantile"] distribution="auto", # type: Literal["auto", "bernoulli", "multinomial", "gaussian", "poisson", "gamma", "tweedie", "laplace", "quantile", "huber"] quantile_alpha=0.5, # type: float tweedie_power=1.5, # type: float huber_alpha=0.9, # type: float score_interval=5.0, # type: float score_training_samples=10000, # type: int score_validation_samples=0, # type: int score_duty_cycle=0.1, # type: float classification_stop=0.0, # type: float regression_stop=1e-06, # type: float stopping_rounds=5, # type: int stopping_metric="auto", # type: Literal["auto", "deviance", "logloss", "mse", "rmse", "mae", "rmsle", "auc", "aucpr", "lift_top_group", "misclassification", "mean_per_class_error", "custom", "custom_increasing"] stopping_tolerance=0.0, # type: float max_runtime_secs=0.0, # type: float score_validation_sampling="uniform", # type: Literal["uniform", "stratified"] diagnostics=True, # type: bool fast_mode=True, # type: bool force_load_balance=True, # type: bool variable_importances=True, # type: bool replicate_training_data=True, # type: bool single_node_mode=False, # type: bool shuffle_training_data=False, # type: bool missing_values_handling="mean_imputation", # type: Literal["mean_imputation", "skip"] quiet_mode=False, # type: bool autoencoder=False, # type: bool sparse=False, # type: bool col_major=False, # type: bool average_activation=0.0, # type: float sparsity_beta=0.0, # type: float max_categorical_features=2147483647, # type: int reproducible=False, # type: bool export_weights_and_biases=False, # type: bool mini_batch_size=1, # type: int categorical_encoding="auto", # type: Literal["auto", "enum", "one_hot_internal", "one_hot_explicit", "binary", "eigen", "label_encoder", "sort_by_response", "enum_limited"] elastic_averaging=False, # type: bool elastic_averaging_moving_rate=0.9, # type: float elastic_averaging_regularization=0.001, # type: float export_checkpoints_dir=None, # type: Optional[str] auc_type="auto", # type: Literal["auto", "none", "macro_ovr", "weighted_ovr", "macro_ovo", "weighted_ovo"] ): """ :param model_id: Destination id for this model; auto-generated if not specified. Defaults to ``None``. :type model_id: Union[None, str, H2OEstimator], optional :param training_frame: Id of the training data frame. Defaults to ``None``. :type training_frame: Union[None, str, H2OFrame], optional :param validation_frame: Id of the validation data frame. Defaults to ``None``. :type validation_frame: Union[None, str, H2OFrame], optional :param nfolds: Number of folds for K-fold cross-validation (0 to disable or >= 2). Defaults to ``0``. :type nfolds: int :param keep_cross_validation_models: Whether to keep the cross-validation models. Defaults to ``True``. :type keep_cross_validation_models: bool :param keep_cross_validation_predictions: Whether to keep the predictions of the cross-validation models. Defaults to ``False``. :type keep_cross_validation_predictions: bool :param keep_cross_validation_fold_assignment: Whether to keep the cross-validation fold assignment. Defaults to ``False``. :type keep_cross_validation_fold_assignment: bool :param fold_assignment: Cross-validation fold assignment scheme, if fold_column is not specified. The 'Stratified' option will stratify the folds based on the response variable, for classification problems. Defaults to ``"auto"``. :type fold_assignment: Literal["auto", "random", "modulo", "stratified"] :param fold_column: Column with cross-validation fold index assignment per observation. Defaults to ``None``. :type fold_column: str, optional :param response_column: Response variable column. Defaults to ``None``. :type response_column: str, optional :param ignored_columns: Names of columns to ignore for training. Defaults to ``None``. :type ignored_columns: List[str], optional :param ignore_const_cols: Ignore constant columns. Defaults to ``True``. :type ignore_const_cols: bool :param score_each_iteration: Whether to score during each iteration of model training. Defaults to ``False``. :type score_each_iteration: bool :param weights_column: Column with observation weights. Giving some observation a weight of zero is equivalent to excluding it from the dataset; giving an observation a relative weight of 2 is equivalent to repeating that row twice. Negative weights are not allowed. Note: Weights are per-row observation weights and do not increase the size of the data frame. This is typically the number of times a row is repeated, but non-integer values are supported as well. During training, rows with higher weights matter more, due to the larger loss function pre-factor. If you set weight = 0 for a row, the returned prediction frame at that row is zero and this is incorrect. To get an accurate prediction, remove all rows with weight == 0. Defaults to ``None``. :type weights_column: str, optional :param offset_column: Offset column. This will be added to the combination of columns before applying the link function. Defaults to ``None``. :type offset_column: str, optional :param balance_classes: Balance training data class counts via over/under-sampling (for imbalanced data). Defaults to ``False``. :type balance_classes: bool :param class_sampling_factors: Desired over/under-sampling ratios per class (in lexicographic order). If not specified, sampling factors will be automatically computed to obtain class balance during training. Requires balance_classes. Defaults to ``None``. :type class_sampling_factors: List[float], optional :param max_after_balance_size: Maximum relative size of the training data after balancing class counts (can be less than 1.0). Requires balance_classes. Defaults to ``5.0``. :type max_after_balance_size: float :param max_confusion_matrix_size: [Deprecated] Maximum size (# classes) for confusion matrices to be printed in the Logs. Defaults to ``20``. :type max_confusion_matrix_size: int :param checkpoint: Model checkpoint to resume training with. Defaults to ``None``. :type checkpoint: Union[None, str, H2OEstimator], optional :param pretrained_autoencoder: Pretrained autoencoder model to initialize this model with. Defaults to ``None``. :type pretrained_autoencoder: Union[None, str, H2OEstimator], optional :param overwrite_with_best_model: If enabled, override the final model with the best model found during training. Defaults to ``True``. :type overwrite_with_best_model: bool :param use_all_factor_levels: Use all factor levels of categorical variables. Otherwise, the first factor level is omitted (without loss of accuracy). Useful for variable importances and auto-enabled for autoencoder. Defaults to ``True``. :type use_all_factor_levels: bool :param standardize: If enabled, automatically standardize the data. If disabled, the user must provide properly scaled input data. Defaults to ``True``. :type standardize: bool :param activation: Activation function. Defaults to ``"rectifier"``. :type activation: Literal["tanh", "tanh_with_dropout", "rectifier", "rectifier_with_dropout", "maxout", "maxout_with_dropout"] :param hidden: Hidden layer sizes (e.g. [100, 100]). Defaults to ``[200, 200]``. :type hidden: List[int] :param epochs: How many times the dataset should be iterated (streamed), can be fractional. Defaults to ``10.0``. :type epochs: float :param train_samples_per_iteration: Number of training samples (globally) per MapReduce iteration. Special values are 0: one epoch, -1: all available data (e.g., replicated training data), -2: automatic. Defaults to ``-2``. :type train_samples_per_iteration: int :param target_ratio_comm_to_comp: Target ratio of communication overhead to computation. Only for multi-node operation and train_samples_per_iteration = -2 (auto-tuning). Defaults to ``0.05``. :type target_ratio_comm_to_comp: float :param seed: Seed for random numbers (affects sampling) - Note: only reproducible when running single threaded. Defaults to ``-1``. :type seed: int :param adaptive_rate: Adaptive learning rate. Defaults to ``True``. :type adaptive_rate: bool :param rho: Adaptive learning rate time decay factor (similarity to prior updates). Defaults to ``0.99``. :type rho: float :param epsilon: Adaptive learning rate smoothing factor (to avoid divisions by zero and allow progress). Defaults to ``1e-08``. :type epsilon: float :param rate: Learning rate (higher => less stable, lower => slower convergence). Defaults to ``0.005``. :type rate: float :param rate_annealing: Learning rate annealing: rate / (1 + rate_annealing * samples). Defaults to ``1e-06``. :type rate_annealing: float :param rate_decay: Learning rate decay factor between layers (N-th layer: rate * rate_decay ^ (n - 1). Defaults to ``1.0``. :type rate_decay: float :param momentum_start: Initial momentum at the beginning of training (try 0.5). Defaults to ``0.0``. :type momentum_start: float :param momentum_ramp: Number of training samples for which momentum increases. Defaults to ``1000000.0``. :type momentum_ramp: float :param momentum_stable: Final momentum after the ramp is over (try 0.99). Defaults to ``0.0``. :type momentum_stable: float :param nesterov_accelerated_gradient: Use Nesterov accelerated gradient (recommended). Defaults to ``True``. :type nesterov_accelerated_gradient: bool :param input_dropout_ratio: Input layer dropout ratio (can improve generalization, try 0.1 or 0.2). Defaults to ``0.0``. :type input_dropout_ratio: float :param hidden_dropout_ratios: Hidden layer dropout ratios (can improve generalization), specify one value per hidden layer, defaults to 0.5. Defaults to ``None``. :type hidden_dropout_ratios: List[float], optional :param l1: L1 regularization (can add stability and improve generalization, causes many weights to become 0). Defaults to ``0.0``. :type l1: float :param l2: L2 regularization (can add stability and improve generalization, causes many weights to be small. Defaults to ``0.0``. :type l2: float :param max_w2: Constraint for squared sum of incoming weights per unit (e.g. for Rectifier). Defaults to ``3.4028235e+38``. :type max_w2: float :param initial_weight_distribution: Initial weight distribution. Defaults to ``"uniform_adaptive"``. :type initial_weight_distribution: Literal["uniform_adaptive", "uniform", "normal"] :param initial_weight_scale: Uniform: -value...value, Normal: stddev. Defaults to ``1.0``. :type initial_weight_scale: float :param initial_weights: A list of H2OFrame ids to initialize the weight matrices of this model with. Defaults to ``None``. :type initial_weights: List[Union[None, str, H2OFrame]], optional :param initial_biases: A list of H2OFrame ids to initialize the bias vectors of this model with. Defaults to ``None``. :type initial_biases: List[Union[None, str, H2OFrame]], optional :param loss: Loss function. Defaults to ``"automatic"``. :type loss: Literal["automatic", "cross_entropy", "quadratic", "huber", "absolute", "quantile"] :param distribution: Distribution function Defaults to ``"auto"``. :type distribution: Literal["auto", "bernoulli", "multinomial", "gaussian", "poisson", "gamma", "tweedie", "laplace", "quantile", "huber"] :param quantile_alpha: Desired quantile for Quantile regression, must be between 0 and 1. Defaults to ``0.5``. :type quantile_alpha: float :param tweedie_power: Tweedie power for Tweedie regression, must be between 1 and 2. Defaults to ``1.5``. :type tweedie_power: float :param huber_alpha: Desired quantile for Huber/M-regression (threshold between quadratic and linear loss, must be between 0 and 1). Defaults to ``0.9``. :type huber_alpha: float :param score_interval: Shortest time interval (in seconds) between model scoring. Defaults to ``5.0``. :type score_interval: float :param score_training_samples: Number of training set samples for scoring (0 for all). Defaults to ``10000``. :type score_training_samples: int :param score_validation_samples: Number of validation set samples for scoring (0 for all). Defaults to ``0``. :type score_validation_samples: int :param score_duty_cycle: Maximum duty cycle fraction for scoring (lower: more training, higher: more scoring). Defaults to ``0.1``. :type score_duty_cycle: float :param classification_stop: Stopping criterion for classification error fraction on training data (-1 to disable). Defaults to ``0.0``. :type classification_stop: float :param regression_stop: Stopping criterion for regression error (MSE) on training data (-1 to disable). Defaults to ``1e-06``. :type regression_stop: float :param stopping_rounds: Early stopping based on convergence of stopping_metric. Stop if simple moving average of length k of the stopping_metric does not improve for k:=stopping_rounds scoring events (0 to disable) Defaults to ``5``. :type stopping_rounds: int :param stopping_metric: Metric to use for early stopping (AUTO: logloss for classification, deviance for regression and anomaly_score for Isolation Forest). Note that custom and custom_increasing can only be used in GBM and DRF with the Python client. Defaults to ``"auto"``. :type stopping_metric: Literal["auto", "deviance", "logloss", "mse", "rmse", "mae", "rmsle", "auc", "aucpr", "lift_top_group", "misclassification", "mean_per_class_error", "custom", "custom_increasing"] :param stopping_tolerance: Relative tolerance for metric-based stopping criterion (stop if relative improvement is not at least this much) Defaults to ``0.0``. :type stopping_tolerance: float :param max_runtime_secs: Maximum allowed runtime in seconds for model training. Use 0 to disable. Defaults to ``0.0``. :type max_runtime_secs: float :param score_validation_sampling: Method used to sample validation dataset for scoring. Defaults to ``"uniform"``. :type score_validation_sampling: Literal["uniform", "stratified"] :param diagnostics: Enable diagnostics for hidden layers. Defaults to ``True``. :type diagnostics: bool :param fast_mode: Enable fast mode (minor approximation in back-propagation). Defaults to ``True``. :type fast_mode: bool :param force_load_balance: Force extra load balancing to increase training speed for small datasets (to keep all cores busy). Defaults to ``True``. :type force_load_balance: bool :param variable_importances: Compute variable importances for input features (Gedeon method) - can be slow for large networks. Defaults to ``True``. :type variable_importances: bool :param replicate_training_data: Replicate the entire training dataset onto every node for faster training on small datasets. Defaults to ``True``. :type replicate_training_data: bool :param single_node_mode: Run on a single node for fine-tuning of model parameters. Defaults to ``False``. :type single_node_mode: bool :param shuffle_training_data: Enable shuffling of training data (recommended if training data is replicated and train_samples_per_iteration is close to #nodes x #rows, of if using balance_classes). Defaults to ``False``. :type shuffle_training_data: bool :param missing_values_handling: Handling of missing values. Either MeanImputation or Skip. Defaults to ``"mean_imputation"``. :type missing_values_handling: Literal["mean_imputation", "skip"] :param quiet_mode: Enable quiet mode for less output to standard output. Defaults to ``False``. :type quiet_mode: bool :param autoencoder: Auto-Encoder. Defaults to ``False``. :type autoencoder: bool :param sparse: Sparse data handling (more efficient for data with lots of 0 values). Defaults to ``False``. :type sparse: bool :param col_major: #DEPRECATED Use a column major weight matrix for input layer. Can speed up forward propagation, but might slow down backpropagation. Defaults to ``False``. :type col_major: bool :param average_activation: Average activation for sparse auto-encoder. #Experimental Defaults to ``0.0``. :type average_activation: float :param sparsity_beta: Sparsity regularization. #Experimental Defaults to ``0.0``. :type sparsity_beta: float :param max_categorical_features: Max. number of categorical features, enforced via hashing. #Experimental Defaults to ``2147483647``. :type max_categorical_features: int :param reproducible: Force reproducibility on small data (will be slow - only uses 1 thread). Defaults to ``False``. :type reproducible: bool :param export_weights_and_biases: Whether to export Neural Network weights and biases to H2O Frames. Defaults to ``False``. :type export_weights_and_biases: bool :param mini_batch_size: Mini-batch size (smaller leads to better fit, larger can speed up and generalize better). Defaults to ``1``. :type mini_batch_size: int :param categorical_encoding: Encoding scheme for categorical features Defaults to ``"auto"``. :type categorical_encoding: Literal["auto", "enum", "one_hot_internal", "one_hot_explicit", "binary", "eigen", "label_encoder", "sort_by_response", "enum_limited"] :param elastic_averaging: Elastic averaging between compute nodes can improve distributed model convergence. #Experimental Defaults to ``False``. :type elastic_averaging: bool :param elastic_averaging_moving_rate: Elastic averaging moving rate (only if elastic averaging is enabled). Defaults to ``0.9``. :type elastic_averaging_moving_rate: float :param elastic_averaging_regularization: Elastic averaging regularization strength (only if elastic averaging is enabled). Defaults to ``0.001``. :type elastic_averaging_regularization: float :param export_checkpoints_dir: Automatically export generated models to this directory. Defaults to ``None``. :type export_checkpoints_dir: str, optional :param auc_type: Set default multinomial AUC type. Defaults to ``"auto"``. :type auc_type: Literal["auto", "none", "macro_ovr", "weighted_ovr", "macro_ovo", "weighted_ovo"] """ super(H2ODeepLearningEstimator, self).__init__() self._parms = {} self._id = self._parms['model_id'] = model_id self.training_frame = training_frame self.validation_frame = validation_frame self.nfolds = nfolds self.keep_cross_validation_models = keep_cross_validation_models self.keep_cross_validation_predictions = keep_cross_validation_predictions self.keep_cross_validation_fold_assignment = keep_cross_validation_fold_assignment self.fold_assignment = fold_assignment self.fold_column = fold_column self.response_column = response_column self.ignored_columns = ignored_columns self.ignore_const_cols = ignore_const_cols self.score_each_iteration = score_each_iteration self.weights_column = weights_column self.offset_column = offset_column self.balance_classes = balance_classes self.class_sampling_factors = class_sampling_factors self.max_after_balance_size = max_after_balance_size self.max_confusion_matrix_size = max_confusion_matrix_size self.checkpoint = checkpoint self.pretrained_autoencoder = pretrained_autoencoder self.overwrite_with_best_model = overwrite_with_best_model self.use_all_factor_levels = use_all_factor_levels self.standardize = standardize self.activation = activation self.hidden = hidden self.epochs = epochs self.train_samples_per_iteration = train_samples_per_iteration self.target_ratio_comm_to_comp = target_ratio_comm_to_comp self.seed = seed self.adaptive_rate = adaptive_rate self.rho = rho self.epsilon = epsilon self.rate = rate self.rate_annealing = rate_annealing self.rate_decay = rate_decay self.momentum_start = momentum_start self.momentum_ramp = momentum_ramp self.momentum_stable = momentum_stable self.nesterov_accelerated_gradient = nesterov_accelerated_gradient self.input_dropout_ratio = input_dropout_ratio self.hidden_dropout_ratios = hidden_dropout_ratios self.l1 = l1 self.l2 = l2 self.max_w2 = max_w2 self.initial_weight_distribution = initial_weight_distribution self.initial_weight_scale = initial_weight_scale self.initial_weights = initial_weights self.initial_biases = initial_biases self.loss = loss self.distribution = distribution self.quantile_alpha = quantile_alpha self.tweedie_power = tweedie_power self.huber_alpha = huber_alpha self.score_interval = score_interval self.score_training_samples = score_training_samples self.score_validation_samples = score_validation_samples self.score_duty_cycle = score_duty_cycle self.classification_stop = classification_stop self.regression_stop = regression_stop self.stopping_rounds = stopping_rounds self.stopping_metric = stopping_metric self.stopping_tolerance = stopping_tolerance self.max_runtime_secs = max_runtime_secs self.score_validation_sampling = score_validation_sampling self.diagnostics = diagnostics self.fast_mode = fast_mode self.force_load_balance = force_load_balance self.variable_importances = variable_importances self.replicate_training_data = replicate_training_data self.single_node_mode = single_node_mode self.shuffle_training_data = shuffle_training_data self.missing_values_handling = missing_values_handling self.quiet_mode = quiet_mode self.autoencoder = autoencoder self.sparse = sparse self.col_major = col_major self.average_activation = average_activation self.sparsity_beta = sparsity_beta self.max_categorical_features = max_categorical_features self.reproducible = reproducible self.export_weights_and_biases = export_weights_and_biases self.mini_batch_size = mini_batch_size self.categorical_encoding = categorical_encoding self.elastic_averaging = elastic_averaging self.elastic_averaging_moving_rate = elastic_averaging_moving_rate self.elastic_averaging_regularization = elastic_averaging_regularization self.export_checkpoints_dir = export_checkpoints_dir self.auc_type = auc_type @property def training_frame(self): """ Id of the training data frame. Type: ``Union[None, str, H2OFrame]``. :examples: >>> airlines= h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/airlines/allyears2k_headers.zip") >>> airlines["Year"]= airlines["Year"].asfactor() >>> airlines["Month"]= airlines["Month"].asfactor() >>> airlines["DayOfWeek"] = airlines["DayOfWeek"].asfactor() >>> airlines["Cancelled"] = airlines["Cancelled"].asfactor() >>> airlines['FlightNum'] = airlines['FlightNum'].asfactor() >>> predictors = ["Origin", "Dest", "Year", "UniqueCarrier", ... "DayOfWeek", "Month", "Distance", "FlightNum"] >>> response = "IsDepDelayed" >>> train, valid= airlines.split_frame(ratios=[.8], seed=1234) >>> airlines_dl = H2ODeepLearningEstimator() >>> airlines_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> airlines_dl.auc() """ return self._parms.get("training_frame") @training_frame.setter def training_frame(self, training_frame): self._parms["training_frame"] = H2OFrame._validate(training_frame, 'training_frame') @property def validation_frame(self): """ Id of the validation data frame. Type: ``Union[None, str, H2OFrame]``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "economy_20mpg" >>> train, valid = cars.split_frame(ratios=[.8], seed=1234) >>> cars_dl = H2ODeepLearningEstimator(standardize=True, ... seed=1234) >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cars_dl.auc() """ return self._parms.get("validation_frame") @validation_frame.setter def validation_frame(self, validation_frame): self._parms["validation_frame"] = H2OFrame._validate(validation_frame, 'validation_frame') @property def nfolds(self): """ Number of folds for K-fold cross-validation (0 to disable or >= 2). Type: ``int``, defaults to ``0``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "economy_20mpg" >>> cars_dl = H2ODeepLearningEstimator(nfolds=5, seed=1234) >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=cars) >>> cars_dl.auc() """ return self._parms.get("nfolds") @nfolds.setter def nfolds(self, nfolds): assert_is_type(nfolds, None, int) self._parms["nfolds"] = nfolds @property def keep_cross_validation_models(self): """ Whether to keep the cross-validation models. Type: ``bool``, defaults to ``True``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "economy_20mpg" >>> cars_dl = H2ODeepLearningEstimator(keep_cross_validation_models=True, ... nfolds=5, ... seed=1234) >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=cars) >>> print(cars_dl.cross_validation_models()) """ return self._parms.get("keep_cross_validation_models") @keep_cross_validation_models.setter def keep_cross_validation_models(self, keep_cross_validation_models): assert_is_type(keep_cross_validation_models, None, bool) self._parms["keep_cross_validation_models"] = keep_cross_validation_models @property def keep_cross_validation_predictions(self): """ Whether to keep the predictions of the cross-validation models. Type: ``bool``, defaults to ``False``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "economy_20mpg" >>> cars_dl = H2ODeepLearningEstimator(keep_cross_validation_predictions=True, ... nfolds=5, ... seed=1234) >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=cars) >>> print(cars_dl.cross_validation_predictions()) """ return self._parms.get("keep_cross_validation_predictions") @keep_cross_validation_predictions.setter def keep_cross_validation_predictions(self, keep_cross_validation_predictions): assert_is_type(keep_cross_validation_predictions, None, bool) self._parms["keep_cross_validation_predictions"] = keep_cross_validation_predictions @property def keep_cross_validation_fold_assignment(self): """ Whether to keep the cross-validation fold assignment. Type: ``bool``, defaults to ``False``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "economy_20mpg" >>> cars_dl = H2ODeepLearningEstimator(keep_cross_validation_fold_assignment=True, ... nfolds=5, ... seed=1234) >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=cars) >>> print(cars_dl.cross_validation_fold_assignment()) """ return self._parms.get("keep_cross_validation_fold_assignment") @keep_cross_validation_fold_assignment.setter def keep_cross_validation_fold_assignment(self, keep_cross_validation_fold_assignment): assert_is_type(keep_cross_validation_fold_assignment, None, bool) self._parms["keep_cross_validation_fold_assignment"] = keep_cross_validation_fold_assignment @property def fold_assignment(self): """ Cross-validation fold assignment scheme, if fold_column is not specified. The 'Stratified' option will stratify the folds based on the response variable, for classification problems. Type: ``Literal["auto", "random", "modulo", "stratified"]``, defaults to ``"auto"``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "cylinders" >>> train, valid = cars.split_frame(ratios=[.8], seed=1234) >>> cars_dl = H2ODeepLearningEstimator(fold_assignment="Random", ... nfolds=5, ... seed=1234) >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cars_dl.mse() """ return self._parms.get("fold_assignment") @fold_assignment.setter def fold_assignment(self, fold_assignment): assert_is_type(fold_assignment, None, Enum("auto", "random", "modulo", "stratified")) self._parms["fold_assignment"] = fold_assignment @property def fold_column(self): """ Column with cross-validation fold index assignment per observation. Type: ``str``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "cylinders" >>> fold_numbers = cars.kfold_column(n_folds=5, seed=1234) >>> fold_numbers.set_names(["fold_numbers"]) >>> cars = cars.cbind(fold_numbers) >>> print(cars['fold_numbers']) >>> cars_dl = H2ODeepLearningEstimator(seed=1234) >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=cars, ... fold_column="fold_numbers") >>> cars_dl.mse() """ return self._parms.get("fold_column") @fold_column.setter def fold_column(self, fold_column): assert_is_type(fold_column, None, str) self._parms["fold_column"] = fold_column @property def response_column(self): """ Response variable column. Type: ``str``. """ return self._parms.get("response_column") @response_column.setter def response_column(self, response_column): assert_is_type(response_column, None, str) self._parms["response_column"] = response_column @property def ignored_columns(self): """ Names of columns to ignore for training. Type: ``List[str]``. """ return self._parms.get("ignored_columns") @ignored_columns.setter def ignored_columns(self, ignored_columns): assert_is_type(ignored_columns, None, [str]) self._parms["ignored_columns"] = ignored_columns @property def ignore_const_cols(self): """ Ignore constant columns. Type: ``bool``, defaults to ``True``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "economy_20mpg" >>> cars["const_1"] = 6 >>> cars["const_2"] = 7 >>> train, valid = cars.split_frame(ratios=[.8], seed=1234) >>> cars_dl = H2ODeepLearningEstimator(seed=1234, ... ignore_const_cols=True) >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cars_dl.auc() """ return self._parms.get("ignore_const_cols") @ignore_const_cols.setter def ignore_const_cols(self, ignore_const_cols): assert_is_type(ignore_const_cols, None, bool) self._parms["ignore_const_cols"] = ignore_const_cols @property def score_each_iteration(self): """ Whether to score during each iteration of model training. Type: ``bool``, defaults to ``False``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "economy_20mpg" >>> cars_dl = H2ODeepLearningEstimator(score_each_iteration=True, ... seed=1234) >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=cars) >>> cars_dl.auc() """ return self._parms.get("score_each_iteration") @score_each_iteration.setter def score_each_iteration(self, score_each_iteration): assert_is_type(score_each_iteration, None, bool) self._parms["score_each_iteration"] = score_each_iteration @property def weights_column(self): """ Column with observation weights. Giving some observation a weight of zero is equivalent to excluding it from the dataset; giving an observation a relative weight of 2 is equivalent to repeating that row twice. Negative weights are not allowed. Note: Weights are per-row observation weights and do not increase the size of the data frame. This is typically the number of times a row is repeated, but non-integer values are supported as well. During training, rows with higher weights matter more, due to the larger loss function pre-factor. If you set weight = 0 for a row, the returned prediction frame at that row is zero and this is incorrect. To get an accurate prediction, remove all rows with weight == 0. Type: ``str``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","acceleration","year"] >>> response = "economy_20mpg" >>> train, valid = cars.split_frame(ratios=[.8], seed=1234) >>> cars_dl = H2ODeepLearningEstimator(seed=1234) >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cars_dl.auc() """ return self._parms.get("weights_column") @weights_column.setter def weights_column(self, weights_column): assert_is_type(weights_column, None, str) self._parms["weights_column"] = weights_column @property def offset_column(self): """ Offset column. This will be added to the combination of columns before applying the link function. Type: ``str``. :examples: >>> boston = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/BostonHousing.csv") >>> predictors = boston.columns[:-1] >>> response = "medv" >>> boston['chas'] = boston['chas'].asfactor() >>> boston["offset"] = boston["medv"].log() >>> train, valid = boston.split_frame(ratios=[.8], seed=1234) >>> boston_dl = H2ODeepLearningEstimator(offset_column="offset", ... seed=1234) >>> boston_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> boston_dl.mse() """ return self._parms.get("offset_column") @offset_column.setter def offset_column(self, offset_column): assert_is_type(offset_column, None, str) self._parms["offset_column"] = offset_column @property def balance_classes(self): """ Balance training data class counts via over/under-sampling (for imbalanced data). Type: ``bool``, defaults to ``False``. :examples: >>> covtype = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/covtype/covtype.20k.data") >>> covtype[54] = covtype[54].asfactor() >>> predictors = covtype.columns[0:54] >>> response = 'C55' >>> train, valid = covtype.split_frame(ratios=[.8], seed=1234) >>> cov_dl = H2ODeepLearningEstimator(balance_classes=True, ... seed=1234) >>> cov_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cov_dl.mse() """ return self._parms.get("balance_classes") @balance_classes.setter def balance_classes(self, balance_classes): assert_is_type(balance_classes, None, bool) self._parms["balance_classes"] = balance_classes @property def class_sampling_factors(self): """ Desired over/under-sampling ratios per class (in lexicographic order). If not specified, sampling factors will be automatically computed to obtain class balance during training. Requires balance_classes. Type: ``List[float]``. :examples: >>> covtype = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/covtype/covtype.20k.data") >>> covtype[54] = covtype[54].asfactor() >>> predictors = covtype.columns[0:54] >>> response = 'C55' >>> train, valid = covtype.split_frame(ratios=[.8], seed=1234) >>> sample_factors = [1., 0.5, 1., 1., 1., 1., 1.] >>> cars_dl = H2ODeepLearningEstimator(balance_classes=True, ... class_sampling_factors=sample_factors, ... seed=1234) >>> cov_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cov_dl.mse() """ return self._parms.get("class_sampling_factors") @class_sampling_factors.setter def class_sampling_factors(self, class_sampling_factors): assert_is_type(class_sampling_factors, None, [float]) self._parms["class_sampling_factors"] = class_sampling_factors @property def max_after_balance_size(self): """ Maximum relative size of the training data after balancing class counts (can be less than 1.0). Requires balance_classes. Type: ``float``, defaults to ``5.0``. :examples: >>> covtype = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/covtype/covtype.20k.data") >>> covtype[54] = covtype[54].asfactor() >>> predictors = covtype.columns[0:54] >>> response = 'C55' >>> train, valid = covtype.split_frame(ratios=[.8], seed=1234) >>> max = .85 >>> cov_dl = H2ODeepLearningEstimator(balance_classes=True, ... max_after_balance_size=max, ... seed=1234) >>> cov_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cov_dl.logloss() """ return self._parms.get("max_after_balance_size") @max_after_balance_size.setter def max_after_balance_size(self, max_after_balance_size): assert_is_type(max_after_balance_size, None, float) self._parms["max_after_balance_size"] = max_after_balance_size @property def max_confusion_matrix_size(self): """ [Deprecated] Maximum size (# classes) for confusion matrices to be printed in the Logs. Type: ``int``, defaults to ``20``. """ return self._parms.get("max_confusion_matrix_size") @max_confusion_matrix_size.setter def max_confusion_matrix_size(self, max_confusion_matrix_size): assert_is_type(max_confusion_matrix_size, None, int) self._parms["max_confusion_matrix_size"] = max_confusion_matrix_size @property def checkpoint(self): """ Model checkpoint to resume training with. Type: ``Union[None, str, H2OEstimator]``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "economy_20mpg" >>> train, valid = cars.split_frame(ratios=[.8], seed=1234) >>> cars_dl = H2ODeepLearningEstimator(activation="tanh", ... autoencoder=True, ... seed=1234, ... model_id="cars_dl") >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cars_dl.mse() >>> cars_cont = H2ODeepLearningEstimator(checkpoint=cars_dl, ... seed=1234) >>> cars_cont.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cars_cont.mse() """ return self._parms.get("checkpoint") @checkpoint.setter def checkpoint(self, checkpoint): assert_is_type(checkpoint, None, str, H2OEstimator) self._parms["checkpoint"] = checkpoint @property def pretrained_autoencoder(self): """ Pretrained autoencoder model to initialize this model with. Type: ``Union[None, str, H2OEstimator]``. :examples: >>> from h2o.estimators.deeplearning import H2OAutoEncoderEstimator >>> resp = 784 >>> nfeatures = 20 >>> train = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/bigdata/laptop/mnist/train.csv.gz") >>> test = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/bigdata/laptop/mnist/test.csv.gz") >>> train[resp] = train[resp].asfactor() >>> test[resp] = test[resp].asfactor() >>> sid = train[0].runif(0) >>> train_unsupervised = train[sid>=0.5] >>> train_unsupervised.pop(resp) >>> train_supervised = train[sid<0.5] >>> ae_model = H2OAutoEncoderEstimator(activation="Tanh", ... hidden=[nfeatures], ... model_id="ae_model", ... epochs=1, ... ignore_const_cols=False, ... reproducible=True, ... seed=1234) >>> ae_model.train(list(range(resp)), training_frame=train_unsupervised) >>> ae_model.mse() >>> pretrained_model = H2ODeepLearningEstimator(activation="Tanh", ... hidden=[nfeatures], ... epochs=1, ... reproducible = True, ... seed=1234, ... ignore_const_cols=False, ... pretrained_autoencoder="ae_model") >>> pretrained_model.train(list(range(resp)), resp, ... training_frame=train_supervised, ... validation_frame=test) >>> pretrained_model.mse() """ return self._parms.get("pretrained_autoencoder") @pretrained_autoencoder.setter def pretrained_autoencoder(self, pretrained_autoencoder): assert_is_type(pretrained_autoencoder, None, str, H2OEstimator) self._parms["pretrained_autoencoder"] = pretrained_autoencoder @property def overwrite_with_best_model(self): """ If enabled, override the final model with the best model found during training. Type: ``bool``, defaults to ``True``. :examples: >>> boston = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/BostonHousing.csv") >>> predictors = boston.columns[:-1] >>> response = "medv" >>> boston['chas'] = boston['chas'].asfactor() >>> boston["offset"] = boston["medv"].log() >>> train, valid = boston.split_frame(ratios=[.8], seed=1234) >>> boston_dl = H2ODeepLearningEstimator(overwrite_with_best_model=True, ... seed=1234) >>> boston_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> boston_dl.mse() """ return self._parms.get("overwrite_with_best_model") @overwrite_with_best_model.setter def overwrite_with_best_model(self, overwrite_with_best_model): assert_is_type(overwrite_with_best_model, None, bool) self._parms["overwrite_with_best_model"] = overwrite_with_best_model @property def use_all_factor_levels(self): """ Use all factor levels of categorical variables. Otherwise, the first factor level is omitted (without loss of accuracy). Useful for variable importances and auto-enabled for autoencoder. Type: ``bool``, defaults to ``True``. :examples: >>> airlines= h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/airlines/allyears2k_headers.zip") >>> airlines["Year"]= airlines["Year"].asfactor() >>> airlines["Month"]= airlines["Month"].asfactor() >>> airlines["DayOfWeek"] = airlines["DayOfWeek"].asfactor() >>> airlines["Cancelled"] = airlines["Cancelled"].asfactor() >>> airlines['FlightNum'] = airlines['FlightNum'].asfactor() >>> predictors = ["Origin", "Dest", "Year", "UniqueCarrier", ... "DayOfWeek", "Month", "Distance", "FlightNum"] >>> response = "IsDepDelayed" >>> train, valid= airlines.split_frame(ratios=[.8], seed=1234) >>> airlines_dl = H2ODeepLearningEstimator(use_all_factor_levels=True, ... seed=1234) >>> airlines_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> airlines_dl.mse() """ return self._parms.get("use_all_factor_levels") @use_all_factor_levels.setter def use_all_factor_levels(self, use_all_factor_levels): assert_is_type(use_all_factor_levels, None, bool) self._parms["use_all_factor_levels"] = use_all_factor_levels @property def standardize(self): """ If enabled, automatically standardize the data. If disabled, the user must provide properly scaled input data. Type: ``bool``, defaults to ``True``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "economy_20mpg" >>> cars_dl = H2ODeepLearningEstimator(standardize=True, ... seed=1234) >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=cars) >>> cars_dl.auc() """ return self._parms.get("standardize") @standardize.setter def standardize(self, standardize): assert_is_type(standardize, None, bool) self._parms["standardize"] = standardize @property def activation(self): """ Activation function. Type: ``Literal["tanh", "tanh_with_dropout", "rectifier", "rectifier_with_dropout", "maxout", "maxout_with_dropout"]``, defaults to ``"rectifier"``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "cylinders" >>> cars_dl = H2ODeepLearningEstimator(activation="tanh") >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cars_dl.mse() """ return self._parms.get("activation") @activation.setter def activation(self, activation): assert_is_type(activation, None, Enum("tanh", "tanh_with_dropout", "rectifier", "rectifier_with_dropout", "maxout", "maxout_with_dropout")) self._parms["activation"] = activation @property def hidden(self): """ Hidden layer sizes (e.g. [100, 100]). Type: ``List[int]``, defaults to ``[200, 200]``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "cylinders" >>> train, valid = cars.split_frame(ratios=[.8], seed=1234) >>> cars_dl = H2ODeepLearningEstimator(hidden=[100,100], ... seed=1234) >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cars_dl.mse() """ return self._parms.get("hidden") @hidden.setter def hidden(self, hidden): assert_is_type(hidden, None, [int]) self._parms["hidden"] = hidden @property def epochs(self): """ How many times the dataset should be iterated (streamed), can be fractional. Type: ``float``, defaults to ``10.0``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "cylinders" >>> train, valid = cars.split_frame(ratios=[.8], seed=1234) >>> cars_dl = H2ODeepLearningEstimator(epochs=15, ... seed=1234) >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cars_dl.mse() """ return self._parms.get("epochs") @epochs.setter def epochs(self, epochs): assert_is_type(epochs, None, numeric) self._parms["epochs"] = epochs @property def train_samples_per_iteration(self): """ Number of training samples (globally) per MapReduce iteration. Special values are 0: one epoch, -1: all available data (e.g., replicated training data), -2: automatic. Type: ``int``, defaults to ``-2``. :examples: >>> airlines= h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/airlines/allyears2k_headers.zip") >>> airlines["Year"]= airlines["Year"].asfactor() >>> airlines["Month"]= airlines["Month"].asfactor() >>> airlines["DayOfWeek"] = airlines["DayOfWeek"].asfactor() >>> airlines["Cancelled"] = airlines["Cancelled"].asfactor() >>> airlines['FlightNum'] = airlines['FlightNum'].asfactor() >>> predictors = ["Origin", "Dest", "Year", "UniqueCarrier", ... "DayOfWeek", "Month", "Distance", "FlightNum"] >>> response = "IsDepDelayed" >>> train, valid= airlines.split_frame(ratios=[.8], seed=1234) >>> airlines_dl = H2ODeepLearningEstimator(train_samples_per_iteration=-1, ... epochs=1, ... seed=1234) >>> airlines_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> airlines_dl.auc() """ return self._parms.get("train_samples_per_iteration") @train_samples_per_iteration.setter def train_samples_per_iteration(self, train_samples_per_iteration): assert_is_type(train_samples_per_iteration, None, int) self._parms["train_samples_per_iteration"] = train_samples_per_iteration @property def target_ratio_comm_to_comp(self): """ Target ratio of communication overhead to computation. Only for multi-node operation and train_samples_per_iteration = -2 (auto-tuning). Type: ``float``, defaults to ``0.05``. :examples: >>> airlines= h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/airlines/allyears2k_headers.zip") >>> airlines["Year"]= airlines["Year"].asfactor() >>> airlines["Month"]= airlines["Month"].asfactor() >>> airlines["DayOfWeek"] = airlines["DayOfWeek"].asfactor() >>> airlines["Cancelled"] = airlines["Cancelled"].asfactor() >>> airlines['FlightNum'] = airlines['FlightNum'].asfactor() >>> predictors = ["Origin", "Dest", "Year", "UniqueCarrier", ... "DayOfWeek", "Month", "Distance", "FlightNum"] >>> response = "IsDepDelayed" >>> train, valid= airlines.split_frame(ratios=[.8], seed=1234) >>> airlines_dl = H2ODeepLearningEstimator(target_ratio_comm_to_comp=0.05, ... seed=1234) >>> airlines_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> airlines_dl.auc() """ return self._parms.get("target_ratio_comm_to_comp") @target_ratio_comm_to_comp.setter def target_ratio_comm_to_comp(self, target_ratio_comm_to_comp): assert_is_type(target_ratio_comm_to_comp, None, numeric) self._parms["target_ratio_comm_to_comp"] = target_ratio_comm_to_comp @property def seed(self): """ Seed for random numbers (affects sampling) - Note: only reproducible when running single threaded. Type: ``int``, defaults to ``-1``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "economy_20mpg" >>> train, valid = cars.split_frame(ratios=[.8], seed=1234) >>> cars_dl = H2ODeepLearningEstimator(seed=1234) >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cars_dl.auc() """ return self._parms.get("seed") @seed.setter def seed(self, seed): assert_is_type(seed, None, int) self._parms["seed"] = seed @property def adaptive_rate(self): """ Adaptive learning rate. Type: ``bool``, defaults to ``True``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "cylinders" >>> cars_dl = H2ODeepLearningEstimator(adaptive_rate=True) >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cars_dl.mse() """ return self._parms.get("adaptive_rate") @adaptive_rate.setter def adaptive_rate(self, adaptive_rate): assert_is_type(adaptive_rate, None, bool) self._parms["adaptive_rate"] = adaptive_rate @property def rho(self): """ Adaptive learning rate time decay factor (similarity to prior updates). Type: ``float``, defaults to ``0.99``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "economy_20mpg" >>> cars_dl = H2ODeepLearningEstimator(rho=0.9, ... seed=1234) >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=cars) >>> cars_dl.auc() """ return self._parms.get("rho") @rho.setter def rho(self, rho): assert_is_type(rho, None, numeric) self._parms["rho"] = rho @property def epsilon(self): """ Adaptive learning rate smoothing factor (to avoid divisions by zero and allow progress). Type: ``float``, defaults to ``1e-08``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "cylinders" >>> train, valid = cars.split_frame(ratios=[.8], seed=1234) >>> cars_dl = H2ODeepLearningEstimator(epsilon=1e-6, ... seed=1234) >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cars_dl.mse() """ return self._parms.get("epsilon") @epsilon.setter def epsilon(self, epsilon): assert_is_type(epsilon, None, numeric) self._parms["epsilon"] = epsilon @property def rate(self): """ Learning rate (higher => less stable, lower => slower convergence). Type: ``float``, defaults to ``0.005``. :examples: >>> train = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/bigdata/laptop/mnist/train.csv.gz") >>> test = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/bigdata/laptop/mnist/test.csv.gz") >>> predictors = list(range(0,784)) >>> resp = 784 >>> train[resp] = train[resp].asfactor() >>> test[resp] = test[resp].asfactor() >>> nclasses = train[resp].nlevels()[0] >>> model = H2ODeepLearningEstimator(activation="RectifierWithDropout", ... adaptive_rate=False, ... rate=0.01, ... rate_decay=0.9, ... rate_annealing=1e-6, ... momentum_start=0.95, ... momentum_ramp=1e5, ... momentum_stable=0.99, ... nesterov_accelerated_gradient=False, ... input_dropout_ratio=0.2, ... train_samples_per_iteration=20000, ... classification_stop=-1, ... l1=1e-5) >>> model.train (x=predictors,y=resp, training_frame=train, validation_frame=test) >>> model.model_performance(valid=True) """ return self._parms.get("rate") @rate.setter def rate(self, rate): assert_is_type(rate, None, numeric) self._parms["rate"] = rate @property def rate_annealing(self): """ Learning rate annealing: rate / (1 + rate_annealing * samples). Type: ``float``, defaults to ``1e-06``. :examples: >>> train = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/bigdata/laptop/mnist/train.csv.gz") >>> test = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/bigdata/laptop/mnist/test.csv.gz") >>> predictors = list(range(0,784)) >>> resp = 784 >>> train[resp] = train[resp].asfactor() >>> test[resp] = test[resp].asfactor() >>> nclasses = train[resp].nlevels()[0] >>> model = H2ODeepLearningEstimator(activation="RectifierWithDropout", ... adaptive_rate=False, ... rate=0.01, ... rate_decay=0.9, ... rate_annealing=1e-6, ... momentum_start=0.95, ... momentum_ramp=1e5, ... momentum_stable=0.99, ... nesterov_accelerated_gradient=False, ... input_dropout_ratio=0.2, ... train_samples_per_iteration=20000, ... classification_stop=-1, ... l1=1e-5) >>> model.train (x=predictors, ... y=resp, ... training_frame=train, ... validation_frame=test) >>> model.mse() """ return self._parms.get("rate_annealing") @rate_annealing.setter def rate_annealing(self, rate_annealing): assert_is_type(rate_annealing, None, numeric) self._parms["rate_annealing"] = rate_annealing @property def rate_decay(self): """ Learning rate decay factor between layers (N-th layer: rate * rate_decay ^ (n - 1). Type: ``float``, defaults to ``1.0``. :examples: >>> train = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/bigdata/laptop/mnist/train.csv.gz") >>> test = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/bigdata/laptop/mnist/test.csv.gz") >>> predictors = list(range(0,784)) >>> resp = 784 >>> train[resp] = train[resp].asfactor() >>> test[resp] = test[resp].asfactor() >>> nclasses = train[resp].nlevels()[0] >>> model = H2ODeepLearningEstimator(activation="RectifierWithDropout", ... adaptive_rate=False, ... rate=0.01, ... rate_decay=0.9, ... rate_annealing=1e-6, ... momentum_start=0.95, ... momentum_ramp=1e5, ... momentum_stable=0.99, ... nesterov_accelerated_gradient=False, ... input_dropout_ratio=0.2, ... train_samples_per_iteration=20000, ... classification_stop=-1, ... l1=1e-5) >>> model.train (x=predictors, ... y=resp, ... training_frame=train, ... validation_frame=test) >>> model.model_performance() """ return self._parms.get("rate_decay") @rate_decay.setter def rate_decay(self, rate_decay): assert_is_type(rate_decay, None, numeric) self._parms["rate_decay"] = rate_decay @property def momentum_start(self): """ Initial momentum at the beginning of training (try 0.5). Type: ``float``, defaults to ``0.0``. :examples: >>> airlines= h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/airlines/allyears2k_headers.zip") >>> predictors = ["Year","Month","DayofMonth","DayOfWeek","CRSDepTime", ... "CRSArrTime","UniqueCarrier","FlightNum"] >>> response_col = "IsDepDelayed" >>> airlines_dl = H2ODeepLearningEstimator(hidden=[200,200], ... activation="Rectifier", ... input_dropout_ratio=0.0, ... momentum_start=0.9, ... momentum_stable=0.99, ... momentum_ramp=1e7, ... epochs=100, ... stopping_rounds=4, ... train_samples_per_iteration=30000, ... mini_batch_size=32, ... score_duty_cycle=0.25, ... score_interval=1) >>> airlines_dl.train(x=predictors, ... y=response_col, ... training_frame=airlines) >>> airlines_dl.mse() """ return self._parms.get("momentum_start") @momentum_start.setter def momentum_start(self, momentum_start): assert_is_type(momentum_start, None, numeric) self._parms["momentum_start"] = momentum_start @property def momentum_ramp(self): """ Number of training samples for which momentum increases. Type: ``float``, defaults to ``1000000.0``. :examples: >>> airlines= h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/airlines/allyears2k_headers.zip") >>> predictors = ["Year","Month","DayofMonth","DayOfWeek","CRSDepTime", ... "CRSArrTime","UniqueCarrier","FlightNum"] >>> response_col = "IsDepDelayed" >>> airlines_dl = H2ODeepLearningEstimator(hidden=[200,200], ... activation="Rectifier", ... input_dropout_ratio=0.0, ... momentum_start=0.9, ... momentum_stable=0.99, ... momentum_ramp=1e7, ... epochs=100, ... stopping_rounds=4, ... train_samples_per_iteration=30000, ... mini_batch_size=32, ... score_duty_cycle=0.25, ... score_interval=1) >>> airlines_dl.train(x=predictors, ... y=response_col, ... training_frame=airlines) >>> airlines_dl.mse() """ return self._parms.get("momentum_ramp") @momentum_ramp.setter def momentum_ramp(self, momentum_ramp): assert_is_type(momentum_ramp, None, numeric) self._parms["momentum_ramp"] = momentum_ramp @property def momentum_stable(self): """ Final momentum after the ramp is over (try 0.99). Type: ``float``, defaults to ``0.0``. :examples: >>> airlines= h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/airlines/allyears2k_headers.zip") >>> predictors = ["Year","Month","DayofMonth","DayOfWeek","CRSDepTime", ... "CRSArrTime","UniqueCarrier","FlightNum"] >>> response_col = "IsDepDelayed" >>> airlines_dl = H2ODeepLearningEstimator(hidden=[200,200], ... activation="Rectifier", ... input_dropout_ratio=0.0, ... momentum_start=0.9, ... momentum_stable=0.99, ... momentum_ramp=1e7, ... epochs=100, ... stopping_rounds=4, ... train_samples_per_iteration=30000, ... mini_batch_size=32, ... score_duty_cycle=0.25, ... score_interval=1) >>> airlines_dl.train(x=predictors, ... y=response_col, ... training_frame=airlines) >>> airlines_dl.mse() """ return self._parms.get("momentum_stable") @momentum_stable.setter def momentum_stable(self, momentum_stable): assert_is_type(momentum_stable, None, numeric) self._parms["momentum_stable"] = momentum_stable @property def nesterov_accelerated_gradient(self): """ Use Nesterov accelerated gradient (recommended). Type: ``bool``, defaults to ``True``. :examples: >>> train = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/bigdata/laptop/mnist/train.csv.gz") >>> test = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/bigdata/laptop/mnist/test.csv.gz") >>> predictors = list(range(0,784)) >>> resp = 784 >>> train[resp] = train[resp].asfactor() >>> test[resp] = test[resp].asfactor() >>> nclasses = train[resp].nlevels()[0] >>> model = H2ODeepLearningEstimator(activation="RectifierWithDropout", ... adaptive_rate=False, ... rate=0.01, ... rate_decay=0.9, ... rate_annealing=1e-6, ... momentum_start=0.95, ... momentum_ramp=1e5, ... momentum_stable=0.99, ... nesterov_accelerated_gradient=False, ... input_dropout_ratio=0.2, ... train_samples_per_iteration=20000, ... classification_stop=-1, ... l1=1e-5) >>> model.train (x=predictors, ... y=resp, ... training_frame=train, ... validation_frame=test) >>> model.model_performance() """ return self._parms.get("nesterov_accelerated_gradient") @nesterov_accelerated_gradient.setter def nesterov_accelerated_gradient(self, nesterov_accelerated_gradient): assert_is_type(nesterov_accelerated_gradient, None, bool) self._parms["nesterov_accelerated_gradient"] = nesterov_accelerated_gradient @property def input_dropout_ratio(self): """ Input layer dropout ratio (can improve generalization, try 0.1 or 0.2). Type: ``float``, defaults to ``0.0``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "economy_20mpg" >>> train, valid = cars.split_frame(ratios=[.8], seed=1234) >>> cars_dl = H2ODeepLearningEstimator(input_dropout_ratio=0.2, ... seed=1234) >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cars_dl.auc() """ return self._parms.get("input_dropout_ratio") @input_dropout_ratio.setter def input_dropout_ratio(self, input_dropout_ratio): assert_is_type(input_dropout_ratio, None, numeric) self._parms["input_dropout_ratio"] = input_dropout_ratio @property def hidden_dropout_ratios(self): """ Hidden layer dropout ratios (can improve generalization), specify one value per hidden layer, defaults to 0.5. Type: ``List[float]``. :examples: >>> train = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/bigdata/laptop/mnist/train.csv.gz") >>> valid = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/bigdata/laptop/mnist/test.csv.gz") >>> features = list(range(0,784)) >>> target = 784 >>> train[target] = train[target].asfactor() >>> valid[target] = valid[target].asfactor() >>> model = H2ODeepLearningEstimator(epochs=20, ... hidden=[200,200], ... hidden_dropout_ratios=[0.5,0.5], ... seed=1234, ... activation='tanhwithdropout') >>> model.train(x=features, ... y=target, ... training_frame=train, ... validation_frame=valid) >>> model.mse() """ return self._parms.get("hidden_dropout_ratios") @hidden_dropout_ratios.setter def hidden_dropout_ratios(self, hidden_dropout_ratios): assert_is_type(hidden_dropout_ratios, None, [numeric]) self._parms["hidden_dropout_ratios"] = hidden_dropout_ratios @property def l1(self): """ L1 regularization (can add stability and improve generalization, causes many weights to become 0). Type: ``float``, defaults to ``0.0``. :examples: >>> covtype = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/covtype/covtype.20k.data") >>> covtype[54] = covtype[54].asfactor() >>> hh_imbalanced = H2ODeepLearningEstimator(l1=1e-5, ... activation="Rectifier", ... loss="CrossEntropy", ... hidden=[200,200], ... epochs=1, ... balance_classes=False, ... reproducible=True, ... seed=1234) >>> hh_imbalanced.train(x=list(range(54)),y=54, training_frame=covtype) >>> hh_imbalanced.mse() """ return self._parms.get("l1") @l1.setter def l1(self, l1): assert_is_type(l1, None, numeric) self._parms["l1"] = l1 @property def l2(self): """ L2 regularization (can add stability and improve generalization, causes many weights to be small. Type: ``float``, defaults to ``0.0``. :examples: >>> covtype = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/covtype/covtype.20k.data") >>> covtype[54] = covtype[54].asfactor() >>> hh_imbalanced = H2ODeepLearningEstimator(l2=1e-5, ... activation="Rectifier", ... loss="CrossEntropy", ... hidden=[200,200], ... epochs=1, ... balance_classes=False, ... reproducible=True, ... seed=1234) >>> hh_imbalanced.train(x=list(range(54)),y=54, training_frame=covtype) >>> hh_imbalanced.mse() """ return self._parms.get("l2") @l2.setter def l2(self, l2): assert_is_type(l2, None, numeric) self._parms["l2"] = l2 @property def max_w2(self): """ Constraint for squared sum of incoming weights per unit (e.g. for Rectifier). Type: ``float``, defaults to ``3.4028235e+38``. :examples: >>> covtype = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/covtype/covtype.20k.data") >>> covtype[54] = covtype[54].asfactor() >>> predictors = covtype.columns[0:54] >>> response = 'C55' >>> train, valid = covtype.split_frame(ratios=[.8], seed=1234) >>> cov_dl = H2ODeepLearningEstimator(activation="RectifierWithDropout", ... hidden=[10,10], ... epochs=10, ... input_dropout_ratio=0.2, ... l1=1e-5, ... max_w2=10.5, ... stopping_rounds=0) >>> cov_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cov_dl.mse() """ return self._parms.get("max_w2") @max_w2.setter def max_w2(self, max_w2): assert_is_type(max_w2, None, float) self._parms["max_w2"] = max_w2 @property def initial_weight_distribution(self): """ Initial weight distribution. Type: ``Literal["uniform_adaptive", "uniform", "normal"]``, defaults to ``"uniform_adaptive"``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "economy_20mpg" >>> train, valid = cars.split_frame(ratios=[.8], seed=1234) >>> cars_dl = H2ODeepLearningEstimator(initial_weight_distribution="Uniform", ... seed=1234) >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cars_dl.auc() """ return self._parms.get("initial_weight_distribution") @initial_weight_distribution.setter def initial_weight_distribution(self, initial_weight_distribution): assert_is_type(initial_weight_distribution, None, Enum("uniform_adaptive", "uniform", "normal")) self._parms["initial_weight_distribution"] = initial_weight_distribution @property def initial_weight_scale(self): """ Uniform: -value...value, Normal: stddev. Type: ``float``, defaults to ``1.0``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "economy_20mpg" >>> train, valid = cars.split_frame(ratios=[.8], seed=1234) >>> cars_dl = H2ODeepLearningEstimator(initial_weight_scale=1.5, ... seed=1234) >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cars_dl.auc() """ return self._parms.get("initial_weight_scale") @initial_weight_scale.setter def initial_weight_scale(self, initial_weight_scale): assert_is_type(initial_weight_scale, None, numeric) self._parms["initial_weight_scale"] = initial_weight_scale @property def initial_weights(self): """ A list of H2OFrame ids to initialize the weight matrices of this model with. Type: ``List[Union[None, str, H2OFrame]]``. :examples: >>> iris = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/iris/iris.csv") >>> dl1 = H2ODeepLearningEstimator(hidden=[10,10], ... export_weights_and_biases=True) >>> dl1.train(x=list(range(4)), y=4, training_frame=iris) >>> p1 = dl1.model_performance(iris).logloss() >>> ll1 = dl1.predict(iris) >>> print(p1) >>> w1 = dl1.weights(0) >>> w2 = dl1.weights(1) >>> w3 = dl1.weights(2) >>> b1 = dl1.biases(0) >>> b2 = dl1.biases(1) >>> b3 = dl1.biases(2) >>> dl2 = H2ODeepLearningEstimator(hidden=[10,10], ... initial_weights=[w1, w2, w3], ... initial_biases=[b1, b2, b3], ... epochs=0) >>> dl2.train(x=list(range(4)), y=4, training_frame=iris) >>> dl2.initial_weights """ return self._parms.get("initial_weights") @initial_weights.setter def initial_weights(self, initial_weights): assert_is_type(initial_weights, None, [None, str, H2OFrame]) self._parms["initial_weights"] = initial_weights @property def initial_biases(self): """ A list of H2OFrame ids to initialize the bias vectors of this model with. Type: ``List[Union[None, str, H2OFrame]]``. :examples: >>> iris = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/iris/iris.csv") >>> dl1 = H2ODeepLearningEstimator(hidden=[10,10], ... export_weights_and_biases=True) >>> dl1.train(x=list(range(4)), y=4, training_frame=iris) >>> p1 = dl1.model_performance(iris).logloss() >>> ll1 = dl1.predict(iris) >>> print(p1) >>> w1 = dl1.weights(0) >>> w2 = dl1.weights(1) >>> w3 = dl1.weights(2) >>> b1 = dl1.biases(0) >>> b2 = dl1.biases(1) >>> b3 = dl1.biases(2) >>> dl2 = H2ODeepLearningEstimator(hidden=[10,10], ... initial_weights=[w1, w2, w3], ... initial_biases=[b1, b2, b3], ... epochs=0) >>> dl2.train(x=list(range(4)), y=4, training_frame=iris) >>> dl2.initial_biases """ return self._parms.get("initial_biases") @initial_biases.setter def initial_biases(self, initial_biases): assert_is_type(initial_biases, None, [None, str, H2OFrame]) self._parms["initial_biases"] = initial_biases @property def loss(self): """ Loss function. Type: ``Literal["automatic", "cross_entropy", "quadratic", "huber", "absolute", "quantile"]``, defaults to ``"automatic"``. :examples: >>> covtype = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/covtype/covtype.20k.data") >>> covtype[54] = covtype[54].asfactor() >>> hh_imbalanced = H2ODeepLearningEstimator(l1=1e-5, ... activation="Rectifier", ... loss="CrossEntropy", ... hidden=[200,200], ... epochs=1, ... balance_classes=False, ... reproducible=True, ... seed=1234) >>> hh_imbalanced.train(x=list(range(54)),y=54, training_frame=covtype) >>> hh_imbalanced.mse() """ return self._parms.get("loss") @loss.setter def loss(self, loss): assert_is_type(loss, None, Enum("automatic", "cross_entropy", "quadratic", "huber", "absolute", "quantile")) self._parms["loss"] = loss @property def distribution(self): """ Distribution function Type: ``Literal["auto", "bernoulli", "multinomial", "gaussian", "poisson", "gamma", "tweedie", "laplace", "quantile", "huber"]``, defaults to ``"auto"``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "cylinders" >>> train, valid = cars.split_frame(ratios=[.8], seed=1234) >>> cars_dl = H2ODeepLearningEstimator(distribution="poisson", ... seed=1234) >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cars_dl.mse() """ return self._parms.get("distribution") @distribution.setter def distribution(self, distribution): assert_is_type(distribution, None, Enum("auto", "bernoulli", "multinomial", "gaussian", "poisson", "gamma", "tweedie", "laplace", "quantile", "huber")) self._parms["distribution"] = distribution @property def quantile_alpha(self): """ Desired quantile for Quantile regression, must be between 0 and 1. Type: ``float``, defaults to ``0.5``. :examples: >>> boston = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/BostonHousing.csv") >>> predictors = boston.columns[:-1] >>> response = "medv" >>> boston['chas'] = boston['chas'].asfactor() >>> train, valid = boston.split_frame(ratios=[.8], seed=1234) >>> boston_dl = H2ODeepLearningEstimator(distribution="quantile", ... quantile_alpha=.8, ... seed=1234) >>> boston_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> boston_dl.mse() """ return self._parms.get("quantile_alpha") @quantile_alpha.setter def quantile_alpha(self, quantile_alpha): assert_is_type(quantile_alpha, None, numeric) self._parms["quantile_alpha"] = quantile_alpha @property def tweedie_power(self): """ Tweedie power for Tweedie regression, must be between 1 and 2. Type: ``float``, defaults to ``1.5``. :examples: >>> airlines= h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/airlines/allyears2k_headers.zip") >>> airlines["Year"]= airlines["Year"].asfactor() >>> airlines["Month"]= airlines["Month"].asfactor() >>> airlines["DayOfWeek"] = airlines["DayOfWeek"].asfactor() >>> airlines["Cancelled"] = airlines["Cancelled"].asfactor() >>> airlines['FlightNum'] = airlines['FlightNum'].asfactor() >>> predictors = ["Origin", "Dest", "Year", "UniqueCarrier", ... "DayOfWeek", "Month", "Distance", "FlightNum"] >>> response = "IsDepDelayed" >>> train, valid= airlines.split_frame(ratios=[.8], seed=1234) >>> airlines_dl = H2ODeepLearningEstimator(tweedie_power=1.5, ... seed=1234) >>> airlines_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> airlines_dl.auc() """ return self._parms.get("tweedie_power") @tweedie_power.setter def tweedie_power(self, tweedie_power): assert_is_type(tweedie_power, None, numeric) self._parms["tweedie_power"] = tweedie_power @property def huber_alpha(self): """ Desired quantile for Huber/M-regression (threshold between quadratic and linear loss, must be between 0 and 1). Type: ``float``, defaults to ``0.9``. :examples: >>> insurance = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/insurance.csv") >>> predictors = insurance.columns[0:4] >>> response = 'Claims' >>> insurance['Group'] = insurance['Group'].asfactor() >>> insurance['Age'] = insurance['Age'].asfactor() >>> train, valid = insurance.split_frame(ratios=[.8], seed=1234) >>> insurance_dl = H2ODeepLearningEstimator(distribution="huber", ... huber_alpha=0.9, ... seed=1234) >>> insurance_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> insurance_dl.mse() """ return self._parms.get("huber_alpha") @huber_alpha.setter def huber_alpha(self, huber_alpha): assert_is_type(huber_alpha, None, numeric) self._parms["huber_alpha"] = huber_alpha @property def score_interval(self): """ Shortest time interval (in seconds) between model scoring. Type: ``float``, defaults to ``5.0``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "economy_20mpg" >>> cars_dl = H2ODeepLearningEstimator(score_interval=3, ... seed=1234) >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=cars) >>> cars_dl.auc() """ return self._parms.get("score_interval") @score_interval.setter def score_interval(self, score_interval): assert_is_type(score_interval, None, numeric) self._parms["score_interval"] = score_interval @property def score_training_samples(self): """ Number of training set samples for scoring (0 for all). Type: ``int``, defaults to ``10000``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "economy_20mpg" >>> cars_dl = H2ODeepLearningEstimator(score_training_samples=10000, ... seed=1234) >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=cars) >>> cars_dl.auc() """ return self._parms.get("score_training_samples") @score_training_samples.setter def score_training_samples(self, score_training_samples): assert_is_type(score_training_samples, None, int) self._parms["score_training_samples"] = score_training_samples @property def score_validation_samples(self): """ Number of validation set samples for scoring (0 for all). Type: ``int``, defaults to ``0``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "economy_20mpg" >>> train, valid = cars.split_frame(ratios=[.8], seed=1234) >>> cars_dl = H2ODeepLearningEstimator(score_validation_samples=3, ... seed=1234) >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cars_dl.auc() """ return self._parms.get("score_validation_samples") @score_validation_samples.setter def score_validation_samples(self, score_validation_samples): assert_is_type(score_validation_samples, None, int) self._parms["score_validation_samples"] = score_validation_samples @property def score_duty_cycle(self): """ Maximum duty cycle fraction for scoring (lower: more training, higher: more scoring). Type: ``float``, defaults to ``0.1``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "economy_20mpg" >>> cars_dl = H2ODeepLearningEstimator(score_duty_cycle=0.2, ... seed=1234) >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=cars) >>> cars_dl.auc() """ return self._parms.get("score_duty_cycle") @score_duty_cycle.setter def score_duty_cycle(self, score_duty_cycle): assert_is_type(score_duty_cycle, None, numeric) self._parms["score_duty_cycle"] = score_duty_cycle @property def classification_stop(self): """ Stopping criterion for classification error fraction on training data (-1 to disable). Type: ``float``, defaults to ``0.0``. :examples: >>> covtype = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/covtype/covtype.20k.data") >>> covtype[54] = covtype[54].asfactor() >>> predictors = covtype.columns[0:54] >>> response = 'C55' >>> train, valid = covtype.split_frame(ratios=[.8], seed=1234) >>> cars_dl = H2ODeepLearningEstimator(classification_stop=1.5, ... seed=1234) >>> cov_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cov_dl.mse() """ return self._parms.get("classification_stop") @classification_stop.setter def classification_stop(self, classification_stop): assert_is_type(classification_stop, None, numeric) self._parms["classification_stop"] = classification_stop @property def regression_stop(self): """ Stopping criterion for regression error (MSE) on training data (-1 to disable). Type: ``float``, defaults to ``1e-06``. :examples: >>> airlines= h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/airlines/allyears2k_headers.zip") >>> airlines["Year"]= airlines["Year"].asfactor() >>> airlines["Month"]= airlines["Month"].asfactor() >>> airlines["DayOfWeek"] = airlines["DayOfWeek"].asfactor() >>> airlines["Cancelled"] = airlines["Cancelled"].asfactor() >>> airlines['FlightNum'] = airlines['FlightNum'].asfactor() >>> predictors = ["Origin", "Dest", "Year", "UniqueCarrier", ... "DayOfWeek", "Month", "Distance", "FlightNum"] >>> response = "IsDepDelayed" >>> train, valid= airlines.split_frame(ratios=[.8], seed=1234) >>> airlines_dl = H2ODeepLearningEstimator(regression_stop=1e-6, ... seed=1234) >>> airlines_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> airlines_dl.auc() """ return self._parms.get("regression_stop") @regression_stop.setter def regression_stop(self, regression_stop): assert_is_type(regression_stop, None, numeric) self._parms["regression_stop"] = regression_stop @property def stopping_rounds(self): """ Early stopping based on convergence of stopping_metric. Stop if simple moving average of length k of the stopping_metric does not improve for k:=stopping_rounds scoring events (0 to disable) Type: ``int``, defaults to ``5``. :examples: >>> airlines= h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/airlines/allyears2k_headers.zip") >>> airlines["Year"]= airlines["Year"].asfactor() >>> airlines["Month"]= airlines["Month"].asfactor() >>> airlines["DayOfWeek"] = airlines["DayOfWeek"].asfactor() >>> airlines["Cancelled"] = airlines["Cancelled"].asfactor() >>> airlines['FlightNum'] = airlines['FlightNum'].asfactor() >>> predictors = ["Origin", "Dest", "Year", "UniqueCarrier", ... "DayOfWeek", "Month", "Distance", "FlightNum"] >>> response = "IsDepDelayed" >>> train, valid= airlines.split_frame(ratios=[.8], seed=1234) >>> airlines_dl = H2ODeepLearningEstimator(stopping_metric="auc", ... stopping_rounds=3, ... stopping_tolerance=1e-2, ... seed=1234) >>> airlines_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> airlines_dl.auc() """ return self._parms.get("stopping_rounds") @stopping_rounds.setter def stopping_rounds(self, stopping_rounds): assert_is_type(stopping_rounds, None, int) self._parms["stopping_rounds"] = stopping_rounds @property def stopping_metric(self): """ Metric to use for early stopping (AUTO: logloss for classification, deviance for regression and anomaly_score for Isolation Forest). Note that custom and custom_increasing can only be used in GBM and DRF with the Python client. Type: ``Literal["auto", "deviance", "logloss", "mse", "rmse", "mae", "rmsle", "auc", "aucpr", "lift_top_group", "misclassification", "mean_per_class_error", "custom", "custom_increasing"]``, defaults to ``"auto"``. :examples: >>> airlines= h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/airlines/allyears2k_headers.zip") >>> airlines["Year"]= airlines["Year"].asfactor() >>> airlines["Month"]= airlines["Month"].asfactor() >>> airlines["DayOfWeek"] = airlines["DayOfWeek"].asfactor() >>> airlines["Cancelled"] = airlines["Cancelled"].asfactor() >>> airlines['FlightNum'] = airlines['FlightNum'].asfactor() >>> predictors = ["Origin", "Dest", "Year", "UniqueCarrier", ... "DayOfWeek", "Month", "Distance", "FlightNum"] >>> response = "IsDepDelayed" >>> train, valid= airlines.split_frame(ratios=[.8], seed=1234) >>> airlines_dl = H2ODeepLearningEstimator(stopping_metric="auc", ... stopping_rounds=3, ... stopping_tolerance=1e-2, ... seed=1234) >>> airlines_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> airlines_dl.auc() """ return self._parms.get("stopping_metric") @stopping_metric.setter def stopping_metric(self, stopping_metric): assert_is_type(stopping_metric, None, Enum("auto", "deviance", "logloss", "mse", "rmse", "mae", "rmsle", "auc", "aucpr", "lift_top_group", "misclassification", "mean_per_class_error", "custom", "custom_increasing")) self._parms["stopping_metric"] = stopping_metric @property def stopping_tolerance(self): """ Relative tolerance for metric-based stopping criterion (stop if relative improvement is not at least this much) Type: ``float``, defaults to ``0.0``. :examples: >>> airlines= h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/airlines/allyears2k_headers.zip") >>> airlines["Year"]= airlines["Year"].asfactor() >>> airlines["Month"]= airlines["Month"].asfactor() >>> airlines["DayOfWeek"] = airlines["DayOfWeek"].asfactor() >>> airlines["Cancelled"] = airlines["Cancelled"].asfactor() >>> airlines['FlightNum'] = airlines['FlightNum'].asfactor() >>> predictors = ["Origin", "Dest", "Year", "UniqueCarrier", ... "DayOfWeek", "Month", "Distance", "FlightNum"] >>> response = "IsDepDelayed" >>> train, valid= airlines.split_frame(ratios=[.8], seed=1234) >>> airlines_dl = H2ODeepLearningEstimator(stopping_metric="auc", ... stopping_rounds=3, ... stopping_tolerance=1e-2, ... seed=1234) >>> airlines_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> airlines_dl.auc() """ return self._parms.get("stopping_tolerance") @stopping_tolerance.setter def stopping_tolerance(self, stopping_tolerance): assert_is_type(stopping_tolerance, None, numeric) self._parms["stopping_tolerance"] = stopping_tolerance @property def max_runtime_secs(self): """ Maximum allowed runtime in seconds for model training. Use 0 to disable. Type: ``float``, defaults to ``0.0``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "economy_20mpg" >>> train, valid = cars.split_frame(ratios=[.8], seed=1234) >>> cars_dl = H2ODeepLearningEstimator(max_runtime_secs=10, ... seed=1234) >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cars_dl.auc() """ return self._parms.get("max_runtime_secs") @max_runtime_secs.setter def max_runtime_secs(self, max_runtime_secs): assert_is_type(max_runtime_secs, None, numeric) self._parms["max_runtime_secs"] = max_runtime_secs @property def score_validation_sampling(self): """ Method used to sample validation dataset for scoring. Type: ``Literal["uniform", "stratified"]``, defaults to ``"uniform"``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "economy_20mpg" >>> train, valid = cars.split_frame(ratios=[.8], seed=1234) >>> cars_dl = H2ODeepLearningEstimator(score_validation_sampling="uniform", ... seed=1234) >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cars_dl.auc() """ return self._parms.get("score_validation_sampling") @score_validation_sampling.setter def score_validation_sampling(self, score_validation_sampling): assert_is_type(score_validation_sampling, None, Enum("uniform", "stratified")) self._parms["score_validation_sampling"] = score_validation_sampling @property def diagnostics(self): """ Enable diagnostics for hidden layers. Type: ``bool``, defaults to ``True``. :examples: >>> covtype = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/covtype/covtype.20k.data") >>> covtype[54] = covtype[54].asfactor() >>> predictors = covtype.columns[0:54] >>> response = 'C55' >>> train, valid = covtype.split_frame(ratios=[.8], seed=1234) >>> cars_dl = H2ODeepLearningEstimator(diagnostics=True, ... seed=1234) >>> cov_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cov_dl.mse() """ return self._parms.get("diagnostics") @diagnostics.setter def diagnostics(self, diagnostics): assert_is_type(diagnostics, None, bool) self._parms["diagnostics"] = diagnostics @property def fast_mode(self): """ Enable fast mode (minor approximation in back-propagation). Type: ``bool``, defaults to ``True``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "cylinders" >>> train, valid = cars.split_frame(ratios=[.8], seed=1234) >>> cars_dl = H2ODeepLearningEstimator(fast_mode=False, ... seed=1234) >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cars_dl.mse() """ return self._parms.get("fast_mode") @fast_mode.setter def fast_mode(self, fast_mode): assert_is_type(fast_mode, None, bool) self._parms["fast_mode"] = fast_mode @property def force_load_balance(self): """ Force extra load balancing to increase training speed for small datasets (to keep all cores busy). Type: ``bool``, defaults to ``True``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "cylinders" >>> train, valid = cars.split_frame(ratios=[.8], seed=1234) >>> cars_dl = H2ODeepLearningEstimator(force_load_balance=False, ... seed=1234) >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cars_dl.mse() """ return self._parms.get("force_load_balance") @force_load_balance.setter def force_load_balance(self, force_load_balance): assert_is_type(force_load_balance, None, bool) self._parms["force_load_balance"] = force_load_balance @property def variable_importances(self): """ Compute variable importances for input features (Gedeon method) - can be slow for large networks. Type: ``bool``, defaults to ``True``. :examples: >>> airlines= h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/airlines/allyears2k_headers.zip") >>> airlines["Year"]= airlines["Year"].asfactor() >>> airlines["Month"]= airlines["Month"].asfactor() >>> airlines["DayOfWeek"] = airlines["DayOfWeek"].asfactor() >>> airlines["Cancelled"] = airlines["Cancelled"].asfactor() >>> airlines['FlightNum'] = airlines['FlightNum'].asfactor() >>> predictors = ["Origin", "Dest", "Year", "UniqueCarrier", ... "DayOfWeek", "Month", "Distance", "FlightNum"] >>> response = "IsDepDelayed" >>> train, valid= airlines.split_frame(ratios=[.8], seed=1234) >>> airlines_dl = H2ODeepLearningEstimator(variable_importances=True, ... seed=1234) >>> airlines_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> airlines_dl.mse() """ return self._parms.get("variable_importances") @variable_importances.setter def variable_importances(self, variable_importances): assert_is_type(variable_importances, None, bool) self._parms["variable_importances"] = variable_importances @property def replicate_training_data(self): """ Replicate the entire training dataset onto every node for faster training on small datasets. Type: ``bool``, defaults to ``True``. :examples: >>> airlines= h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/airlines/allyears2k_headers.zip") >>> airlines["Year"]= airlines["Year"].asfactor() >>> airlines["Month"]= airlines["Month"].asfactor() >>> airlines["DayOfWeek"] = airlines["DayOfWeek"].asfactor() >>> airlines["Cancelled"] = airlines["Cancelled"].asfactor() >>> airlines['FlightNum'] = airlines['FlightNum'].asfactor() >>> predictors = ["Origin", "Dest", "Year", "UniqueCarrier", ... "DayOfWeek", "Month", "Distance", "FlightNum"] >>> response = "IsDepDelayed" >>> airlines_dl = H2ODeepLearningEstimator(replicate_training_data=False) >>> airlines_dl.train(x=predictors, ... y=response, ... training_frame=airlines) >>> airlines_dl.auc() """ return self._parms.get("replicate_training_data") @replicate_training_data.setter def replicate_training_data(self, replicate_training_data): assert_is_type(replicate_training_data, None, bool) self._parms["replicate_training_data"] = replicate_training_data @property def single_node_mode(self): """ Run on a single node for fine-tuning of model parameters. Type: ``bool``, defaults to ``False``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "economy_20mpg" >>> train, valid = cars.split_frame(ratios=[.8], seed=1234) >>> cars_dl = H2ODeepLearningEstimator(single_node_mode=True, ... seed=1234) >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=cars) >>> cars_dl.auc() """ return self._parms.get("single_node_mode") @single_node_mode.setter def single_node_mode(self, single_node_mode): assert_is_type(single_node_mode, None, bool) self._parms["single_node_mode"] = single_node_mode @property def shuffle_training_data(self): """ Enable shuffling of training data (recommended if training data is replicated and train_samples_per_iteration is close to #nodes x #rows, of if using balance_classes). Type: ``bool``, defaults to ``False``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "economy_20mpg" >>> train, valid = cars.split_frame(ratios=[.8], seed=1234) >>> cars_dl = H2ODeepLearningEstimator(shuffle_training_data=True, ... seed=1234) >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=cars) >>> cars_dl.auc() """ return self._parms.get("shuffle_training_data") @shuffle_training_data.setter def shuffle_training_data(self, shuffle_training_data): assert_is_type(shuffle_training_data, None, bool) self._parms["shuffle_training_data"] = shuffle_training_data @property def missing_values_handling(self): """ Handling of missing values. Either MeanImputation or Skip. Type: ``Literal["mean_imputation", "skip"]``, defaults to ``"mean_imputation"``. :examples: >>> boston = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/BostonHousing.csv") >>> predictors = boston.columns[:-1] >>> response = "medv" >>> boston['chas'] = boston['chas'].asfactor() >>> boston.insert_missing_values() >>> train, valid = boston.split_frame(ratios=[.8]) >>> boston_dl = H2ODeepLearningEstimator(missing_values_handling="skip") >>> boston_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> boston_dl.mse() """ return self._parms.get("missing_values_handling") @missing_values_handling.setter def missing_values_handling(self, missing_values_handling): assert_is_type(missing_values_handling, None, Enum("mean_imputation", "skip")) self._parms["missing_values_handling"] = missing_values_handling @property def quiet_mode(self): """ Enable quiet mode for less output to standard output. Type: ``bool``, defaults to ``False``. :examples: >>> titanic = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv") >>> titanic['survived'] = titanic['survived'].asfactor() >>> predictors = titanic.columns >>> del predictors[1:3] >>> response = 'survived' >>> train, valid = titanic.split_frame(ratios=[.8], seed=1234) >>> titanic_dl = H2ODeepLearningEstimator(quiet_mode=True, ... seed=1234) >>> titanic_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> titanic_dl.mse() """ return self._parms.get("quiet_mode") @quiet_mode.setter def quiet_mode(self, quiet_mode): assert_is_type(quiet_mode, None, bool) self._parms["quiet_mode"] = quiet_mode @property def autoencoder(self): """ Auto-Encoder. Type: ``bool``, defaults to ``False``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "cylinders" >>> cars_dl = H2ODeepLearningEstimator(autoencoder=True) >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cars_dl.mse() """ return self._parms.get("autoencoder") @autoencoder.setter def autoencoder(self, autoencoder): assert_is_type(autoencoder, bool) self._parms["autoencoder"] = autoencoder self.supervised_learning = not autoencoder @property def sparse(self): """ Sparse data handling (more efficient for data with lots of 0 values). Type: ``bool``, defaults to ``False``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "economy_20mpg" >>> train, valid = cars.split_frame(ratios=[.8], seed=1234) >>> cars_dl = H2ODeepLearningEstimator(sparse=True, ... seed=1234) >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=cars) >>> cars_dl.auc() """ return self._parms.get("sparse") @sparse.setter def sparse(self, sparse): assert_is_type(sparse, None, bool) self._parms["sparse"] = sparse @property def col_major(self): """ #DEPRECATED Use a column major weight matrix for input layer. Can speed up forward propagation, but might slow down backpropagation. Type: ``bool``, defaults to ``False``. """ return self._parms.get("col_major") @col_major.setter def col_major(self, col_major): assert_is_type(col_major, None, bool) self._parms["col_major"] = col_major @property def average_activation(self): """ Average activation for sparse auto-encoder. #Experimental Type: ``float``, defaults to ``0.0``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "cylinders" >>> cars_dl = H2ODeepLearningEstimator(average_activation=1.5, ... seed=1234) >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cars_dl.mse() """ return self._parms.get("average_activation") @average_activation.setter def average_activation(self, average_activation): assert_is_type(average_activation, None, numeric) self._parms["average_activation"] = average_activation @property def sparsity_beta(self): """ Sparsity regularization. #Experimental Type: ``float``, defaults to ``0.0``. :examples: >>> from h2o.estimators import H2OAutoEncoderEstimator >>> resp = 784 >>> nfeatures = 20 >>> train = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/bigdata/laptop/mnist/train.csv.gz") >>> test = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/bigdata/laptop/mnist/test.csv.gz") >>> train[resp] = train[resp].asfactor() >>> test[resp] = test[resp].asfactor() >>> sid = train[0].runif(0) >>> train_unsupervised = train[sid>=0.5] >>> train_unsupervised.pop(resp) >>> ae_model = H2OAutoEncoderEstimator(activation="Tanh", ... hidden=[nfeatures], ... epochs=1, ... ignore_const_cols=False, ... reproducible=True, ... sparsity_beta=0.5, ... seed=1234) >>> ae_model.train(list(range(resp)), ... training_frame=train_unsupervised) >>> ae_model.mse() """ return self._parms.get("sparsity_beta") @sparsity_beta.setter def sparsity_beta(self, sparsity_beta): assert_is_type(sparsity_beta, None, numeric) self._parms["sparsity_beta"] = sparsity_beta @property def max_categorical_features(self): """ Max. number of categorical features, enforced via hashing. #Experimental Type: ``int``, defaults to ``2147483647``. :examples: >>> covtype = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/covtype/covtype.20k.data") >>> covtype[54] = covtype[54].asfactor() >>> predictors = covtype.columns[0:54] >>> response = 'C55' >>> train, valid = covtype.split_frame(ratios=[.8], seed=1234) >>> cov_dl = H2ODeepLearningEstimator(balance_classes=True, ... max_categorical_features=2147483647, ... seed=1234) >>> cov_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cov_dl.logloss() """ return self._parms.get("max_categorical_features") @max_categorical_features.setter def max_categorical_features(self, max_categorical_features): assert_is_type(max_categorical_features, None, int) self._parms["max_categorical_features"] = max_categorical_features @property def reproducible(self): """ Force reproducibility on small data (will be slow - only uses 1 thread). Type: ``bool``, defaults to ``False``. :examples: >>> airlines= h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/airlines/allyears2k_headers.zip") >>> airlines["Year"]= airlines["Year"].asfactor() >>> airlines["Month"]= airlines["Month"].asfactor() >>> airlines["DayOfWeek"] = airlines["DayOfWeek"].asfactor() >>> airlines["Cancelled"] = airlines["Cancelled"].asfactor() >>> airlines['FlightNum'] = airlines['FlightNum'].asfactor() >>> predictors = ["Origin", "Dest", "Year", "UniqueCarrier", ... "DayOfWeek", "Month", "Distance", "FlightNum"] >>> response = "IsDepDelayed" >>> train, valid= airlines.split_frame(ratios=[.8], seed=1234) >>> airlines_dl = H2ODeepLearningEstimator(reproducible=True) >>> airlines_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> airlines_dl.auc() """ return self._parms.get("reproducible") @reproducible.setter def reproducible(self, reproducible): assert_is_type(reproducible, None, bool) self._parms["reproducible"] = reproducible @property def export_weights_and_biases(self): """ Whether to export Neural Network weights and biases to H2O Frames. Type: ``bool``, defaults to ``False``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "cylinders" >>> train, valid = cars.split_frame(ratios=[.8], seed=1234) >>> cars_dl = H2ODeepLearningEstimator(export_weights_and_biases=True, ... seed=1234) >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cars_dl.mse() """ return self._parms.get("export_weights_and_biases") @export_weights_and_biases.setter def export_weights_and_biases(self, export_weights_and_biases): assert_is_type(export_weights_and_biases, None, bool) self._parms["export_weights_and_biases"] = export_weights_and_biases @property def mini_batch_size(self): """ Mini-batch size (smaller leads to better fit, larger can speed up and generalize better). Type: ``int``, defaults to ``1``. :examples: >>> covtype = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/covtype/covtype.20k.data") >>> covtype[54] = covtype[54].asfactor() >>> predictors = covtype.columns[0:54] >>> response = 'C55' >>> train, valid = covtype.split_frame(ratios=[.8], seed=1234) >>> cov_dl = H2ODeepLearningEstimator(activation="RectifierWithDropout", ... hidden=[10,10], ... epochs=10, ... input_dropout_ratio=0.2, ... l1=1e-5, ... max_w2=10.5, ... stopping_rounds=0) ... mini_batch_size=35 >>> cov_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cov_dl.mse() """ return self._parms.get("mini_batch_size") @mini_batch_size.setter def mini_batch_size(self, mini_batch_size): assert_is_type(mini_batch_size, None, int) self._parms["mini_batch_size"] = mini_batch_size @property def categorical_encoding(self): """ Encoding scheme for categorical features Type: ``Literal["auto", "enum", "one_hot_internal", "one_hot_explicit", "binary", "eigen", "label_encoder", "sort_by_response", "enum_limited"]``, defaults to ``"auto"``. :examples: >>> airlines= h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/airlines/allyears2k_headers.zip") >>> airlines["Year"]= airlines["Year"].asfactor() >>> airlines["Month"]= airlines["Month"].asfactor() >>> airlines["DayOfWeek"] = airlines["DayOfWeek"].asfactor() >>> airlines["Cancelled"] = airlines["Cancelled"].asfactor() >>> airlines['FlightNum'] = airlines['FlightNum'].asfactor() >>> predictors = ["Origin", "Dest", "Year", "UniqueCarrier", ... "DayOfWeek", "Month", "Distance", "FlightNum"] >>> response = "IsDepDelayed" >>> train, valid= airlines.split_frame(ratios=[.8], seed=1234) >>> encoding = "one_hot_internal" >>> airlines_dl = H2ODeepLearningEstimator(categorical_encoding=encoding, ... seed=1234) >>> airlines_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> airlines_dl.mse() """ return self._parms.get("categorical_encoding") @categorical_encoding.setter def categorical_encoding(self, categorical_encoding): assert_is_type(categorical_encoding, None, Enum("auto", "enum", "one_hot_internal", "one_hot_explicit", "binary", "eigen", "label_encoder", "sort_by_response", "enum_limited")) self._parms["categorical_encoding"] = categorical_encoding @property def elastic_averaging(self): """ Elastic averaging between compute nodes can improve distributed model convergence. #Experimental Type: ``bool``, defaults to ``False``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "cylinders" >>> train, valid = cars.split_frame(ratios=[.8], seed=1234) >>> cars_dl = H2ODeepLearningEstimator(elastic_averaging=True, ... seed=1234) >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cars_dl.mse() """ return self._parms.get("elastic_averaging") @elastic_averaging.setter def elastic_averaging(self, elastic_averaging): assert_is_type(elastic_averaging, None, bool) self._parms["elastic_averaging"] = elastic_averaging @property def elastic_averaging_moving_rate(self): """ Elastic averaging moving rate (only if elastic averaging is enabled). Type: ``float``, defaults to ``0.9``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "cylinders" >>> train, valid = cars.split_frame(ratios=[.8], seed=1234) >>> cars_dl = H2ODeepLearningEstimator(elastic_averaging_moving_rate=.8, ... seed=1234) >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cars_dl.mse() """ return self._parms.get("elastic_averaging_moving_rate") @elastic_averaging_moving_rate.setter def elastic_averaging_moving_rate(self, elastic_averaging_moving_rate): assert_is_type(elastic_averaging_moving_rate, None, numeric) self._parms["elastic_averaging_moving_rate"] = elastic_averaging_moving_rate @property def elastic_averaging_regularization(self): """ Elastic averaging regularization strength (only if elastic averaging is enabled). Type: ``float``, defaults to ``0.001``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "cylinders" >>> train, valid = cars.split_frame(ratios=[.8], seed=1234) >>> cars_dl = H2ODeepLearningEstimator(elastic_averaging_regularization=.008, ... seed=1234) >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cars_dl.mse() """ return self._parms.get("elastic_averaging_regularization") @elastic_averaging_regularization.setter def elastic_averaging_regularization(self, elastic_averaging_regularization): assert_is_type(elastic_averaging_regularization, None, numeric) self._parms["elastic_averaging_regularization"] = elastic_averaging_regularization @property def export_checkpoints_dir(self): """ Automatically export generated models to this directory. Type: ``str``. :examples: >>> import tempfile >>> from os import listdir >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "cylinders" >>> train, valid = cars.split_frame(ratios=[.8], seed=1234) >>> checkpoints_dir = tempfile.mkdtemp() >>> cars_dl = H2ODeepLearningEstimator(export_checkpoints_dir=checkpoints_dir, ... seed=1234) >>> cars_dl.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> len(listdir(checkpoints_dir)) """ return self._parms.get("export_checkpoints_dir") @export_checkpoints_dir.setter def export_checkpoints_dir(self, export_checkpoints_dir): assert_is_type(export_checkpoints_dir, None, str) self._parms["export_checkpoints_dir"] = export_checkpoints_dir @property def auc_type(self): """ Set default multinomial AUC type. Type: ``Literal["auto", "none", "macro_ovr", "weighted_ovr", "macro_ovo", "weighted_ovo"]``, defaults to ``"auto"``. """ return self._parms.get("auc_type") @auc_type.setter def auc_type(self, auc_type): assert_is_type(auc_type, None, Enum("auto", "none", "macro_ovr", "weighted_ovr", "macro_ovo", "weighted_ovo")) self._parms["auc_type"] = auc_type
[docs]class H2OAutoEncoderEstimator(H2ODeepLearningEstimator): """ :examples: >>> import h2o as ml >>> from h2o.estimators.deeplearning import H2OAutoEncoderEstimator >>> ml.init() >>> rows = [[1,2,3,4,0]*50, [2,1,2,4,1]*50, [2,1,4,2,1]*50, [0,1,2,34,1]*50, [2,3,4,1,0]*50] >>> fr = ml.H2OFrame(rows) >>> fr[4] = fr[4].asfactor() >>> model = H2OAutoEncoderEstimator() >>> model.train(x=list(range(4)), training_frame=fr) """ supervised_learning = False def __init__(self, **kwargs): super(H2OAutoEncoderEstimator, self).__init__(**kwargs) self.autoencoder = True