from .estimator_base import *
[docs]class H2OGradientBoostingEstimator(H2OEstimator):
"""
Builds gradient boosted classification trees, and gradient boosted regression trees on
a parsed data set. The default distribution function will guess the model type based on
the response column type run properly the response column must be an numeric for
"gaussian" or an enum for "bernoulli" or "multinomial".
Parameters
----------
model_id : str, optional
The unique id assigned to the resulting model. If none is given, an id will
automatically be generated.
distribution : str
The distribution function of the response. Must be "AUTO", "bernoulli",
"multinomial", "poisson", "gamma", "tweedie", "laplace", "quantile" or "gaussian"
quantile_alpha : float
Quantile (only for Quantile regression, must be between 0 and 1)
tweedie_power : float
Tweedie power (only for Tweedie distribution, must be between 1 and 2)
ntrees : int
A non-negative integer that determines the number of trees to grow.
max_depth : int
Maximum depth to grow the tree.
min_rows : int
Minimum number of rows to assign to terminal nodes.
learn_rate : float
Learning rate (from 0.0 to 1.0)
sample_rate : float
Row sample rate (from 0.0 to 1.0)
col_sample_rate : float
Column sample rate (from 0.0 to 1.0)
col_sample_rate_per_tree : float
Column sample rate per tree (from 0.0 to 1.0)
nbins : int
For numerical columns (real/int), build a histogram of (at least) this many bins, then
split at the best point.
nbins_top_level : int
For numerical columns (real/int), build a histogram of (at most) this many bins at the
root level, then decrease by factor of two per level.
nbins_cats : int
For categorical columns (factors), build a histogram of this many bins, then split at
the best point. Higher values can lead to more overfitting.
balance_classes : bool
logical, indicates whether or not to balance training data class counts via
over/under-sampling (for imbalanced data)
max_after_balance_size : float
Maximum relative size of the training data after balancing class counts
(can be less than 1.0). Ignored if balance_classes is False, which is the
default behavior.
seed : int
Seed for random numbers (affects sampling when balance_classes=T)
build_tree_one_node : bool
Run on one node only; no network overhead but fewer cpus used.
Suitable for small datasets.
nfolds : int, optional
Number of folds for cross-validation. If nfolds >= 2, then validation must
remain empty.
fold_assignment : str
Cross-validation fold assignment scheme, if fold_column is not specified.
Must be "AUTO", "Random" or "Modulo"
keep_cross_validation_predictions : bool
Whether to keep the predictions of the cross-validation models
score_each_iteration : bool
Attempts to score each tree.
score_tree_interval : int
Score the model after every so many trees. Disabled if set to 0.
stopping_rounds : int
Early stopping based on convergence of stopping_metric.
Stop if simple moving average of length k of the stopping_metric does not improve
(by stopping_tolerance) for k=stopping_rounds scoring events.
Can only trigger after at least 2k scoring events. Use 0 to disable.
stopping_metric : str
Metric to use for convergence checking, only for _stopping_rounds > 0
Can be one of "AUTO", "deviance", "logloss", "MSE", "AUC", "r2", "misclassification".
stopping_tolerance : float
Relative tolerance for metric-based stopping criterion (stop if relative improvement is not at least this much)
Returns
-------
A new H2OGradientBoostedEstimator object.
"""
def __init__(self, model_id=None, distribution=None, quantile_alpha=None, tweedie_power=None, ntrees=None,
max_depth=None, min_rows=None, learn_rate=None, nbins=None,
sample_rate=None, col_sample_rate=None, col_sample_rate_per_tree=None,
nbins_top_level=None, nbins_cats=None, balance_classes=None,
max_after_balance_size=None, seed=None, build_tree_one_node=None,
nfolds=None, fold_assignment=None, keep_cross_validation_predictions=None,
stopping_rounds=None, stopping_metric=None, stopping_tolerance=None,
score_each_iteration=None, score_tree_interval=None, checkpoint=None):
super(H2OGradientBoostingEstimator, self).__init__()
self._parms = locals()
self._parms = {k:v for k,v in self._parms.items() if k!="self"}
@property
def distribution(self):
return self._parms["distribution"]
@distribution.setter
def distribution(self, value):
self._parms["distribution"] = value
@property
def quantile_alpha(self):
return self._parms["quantile_alpha"]
@quantile_alpha.setter
def quantile_alpha(self, value):
self._parms["quantile_alpha"] = value
@property
def tweedie_power(self):
return self._parms["tweedie_power"]
@tweedie_power.setter
def tweedie_power(self, value):
self._parms["tweedie_power"] = value
@property
def ntrees(self):
return self._parms["ntrees"]
@ntrees.setter
def ntrees(self, value):
self._parms["ntrees"] = value
@property
def max_depth(self):
return self._parms["max_depth"]
@max_depth.setter
def max_depth(self, value):
self._parms["max_depth"] = value
@property
def min_rows(self):
return self._parms["min_rows"]
@min_rows.setter
def min_rows(self, value):
self._parms["min_rows"] = value
@property
def learn_rate(self):
return self._parms["learn_rate"]
@learn_rate.setter
def learn_rate(self, value):
self._parms["learn_rate"] = value
@property
def sample_rate(self):
return self._parms["sample_rate"]
@sample_rate.setter
def sample_rate(self, value):
self._parms["sample_rate"] = value
@property
def col_sample_rate(self):
return self._parms["col_sample_rate"]
@col_sample_rate.setter
def col_sample_rate(self, value):
self._parms["col_sample_rate"] = value
@property
def col_sample_rate_per_tree(self):
return self._parms["col_sample_rate_per_tree"]
@col_sample_rate_per_tree.setter
def col_sample_rate_per_tree(self, value):
self._parms["col_sample_rate_per_tree"] = value
@property
def nbins(self):
return self._parms["nbins"]
@nbins.setter
def nbins(self, value):
self._parms["nbins"] = value
@property
def nbins_top_level(self):
return self._parms["nbins_top_level"]
@nbins_top_level.setter
def nbins_top_level(self, value):
self._parms["nbins_top_level"] = value
@property
def nbins_cats(self):
return self._parms["nbins_cats"]
@nbins_cats.setter
def nbins_cats(self, value):
self._parms["nbins_cats"] = value
@property
def balance_classes(self):
return self._parms["balance_classes"]
@balance_classes.setter
def balance_classes(self, value):
self._parms["balance_classes"] = value
@property
def max_after_balance_size(self):
return self._parms["max_after_balance_size"]
@max_after_balance_size.setter
def max_after_balance_size(self, value):
self._parms["max_after_balance_size"] = value
@property
def seed(self):
return self._parms["seed"]
@seed.setter
def seed(self, value):
self._parms["seed"] = value
@property
def build_tree_one_node(self):
return self._parms["build_tree_one_node"]
@build_tree_one_node.setter
def build_tree_one_node(self, value):
self._parms["build_tree_one_node"] = value
@property
def nfolds(self):
return self._parms["nfolds"]
@nfolds.setter
def nfolds(self, value):
self._parms["nfolds"] = value
@property
def fold_assignment(self):
return self._parms["fold_assignment"]
@fold_assignment.setter
def fold_assignment(self, value):
self._parms["fold_assignment"] = value
@property
def keep_cross_validation_predictions(self):
return self._parms["keep_cross_validation_predictions"]
@keep_cross_validation_predictions.setter
def keep_cross_validation_predictions(self, value):
self._parms["keep_cross_validation_predictions"] = value
@property
def score_each_iteration(self):
return self._parms["score_each_iteration"]
@score_each_iteration.setter
def score_each_iteration(self, value):
self._parms["score_each_iteration"] = value
@property
def score_tree_interval(self):
return self._parms["score_tree_interval"]
@score_tree_interval.setter
def score_tree_interval(self, value):
self._parms["score_tree_interval"] = value
@property
def stopping_rounds(self):
return self._parms["stopping_rounds"]
@stopping_rounds.setter
def stopping_rounds(self, value):
self._parms["stopping_rounds"] = value
@property
def stopping_metric(self):
return self._parms["stopping_metric"]
@stopping_metric.setter
def stopping_metric(self, value):
self._parms["stopping_metric"] = value
@property
def stopping_tolerance(self):
return self._parms["stopping_tolerance"]
@stopping_tolerance.setter
def stopping_tolerance(self, value):
self._parms["stopping_tolerance"] = value
@property
def checkpoint(self):
return self._parms["checkpoint"]
@checkpoint.setter
def checkpoint(self, value):
self._parms["checkpoint"] = value