Source code for h2o.estimators.stackedensemble

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
#
# This file is auto-generated by h2o-3/h2o-bindings/bin/gen_python.py
# Copyright 2016 H2O.ai;  Apache License Version 2.0 (see LICENSE for details)
#
from __future__ import absolute_import, division, print_function, unicode_literals

from h2o.estimators.estimator_base import H2OEstimator
from h2o.exceptions import H2OValueError
from h2o.frame import H2OFrame
from h2o.utils.typechecks import assert_is_type, Enum, numeric, is_type


[docs]class H2OStackedEnsembleEstimator(H2OEstimator): """ Stacked Ensemble Builds a stacked ensemble (aka "super learner") machine learning method that uses two or more H2O learning algorithms to improve predictive performance. It is a loss-based supervised learning method that finds the optimal combination of a collection of prediction algorithms.This method supports regression and binary classification. Examples -------- >>> import h2o >>> h2o.init() >>> from h2o.estimators.random_forest import H2ORandomForestEstimator >>> from h2o.estimators.gbm import H2OGradientBoostingEstimator >>> from h2o.estimators.stackedensemble import H2OStackedEnsembleEstimator >>> col_types = ["numeric", "numeric", "numeric", "enum", "enum", "numeric", "numeric", "numeric", "numeric"] >>> data = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/prostate/prostate.csv", col_types=col_types) >>> train, test = data.split_frame(ratios=[.8], seed=1) >>> x = ["CAPSULE","GLEASON","RACE","DPROS","DCAPS","PSA","VOL"] >>> y = "AGE" >>> nfolds = 5 >>> my_gbm = H2OGradientBoostingEstimator(nfolds=nfolds, fold_assignment="Modulo", keep_cross_validation_predictions=True) >>> my_gbm.train(x=x, y=y, training_frame=train) >>> my_rf = H2ORandomForestEstimator(nfolds=nfolds, fold_assignment="Modulo", keep_cross_validation_predictions=True) >>> my_rf.train(x=x, y=y, training_frame=train) >>> stack = H2OStackedEnsembleEstimator(model_id="my_ensemble", training_frame=train, validation_frame=test, base_models=[my_gbm.model_id, my_rf.model_id]) >>> stack.train(x=x, y=y, training_frame=train, validation_frame=test) >>> stack.model_performance() """ algo = "stackedensemble" def __init__(self, **kwargs): super(H2OStackedEnsembleEstimator, self).__init__() self._parms = {} names_list = {"model_id", "training_frame", "response_column", "validation_frame", "base_models"} if "Lambda" in kwargs: kwargs["lambda_"] = kwargs.pop("Lambda") for pname, pvalue in kwargs.items(): if pname == 'model_id': self._id = pvalue self._parms["model_id"] = pvalue elif pname in names_list: # Using setattr(...) will invoke type-checking of the arguments setattr(self, pname, pvalue) else: raise H2OValueError("Unknown parameter %s = %r" % (pname, pvalue)) self._parms["_rest_version"] = 99 @property def training_frame(self): """ Id of the training data frame (Not required, to allow initial validation of model parameters). Type: ``H2OFrame``. """ return self._parms.get("training_frame") @training_frame.setter def training_frame(self, training_frame): assert_is_type(training_frame, None, H2OFrame) self._parms["training_frame"] = training_frame @property def response_column(self): """ Response variable column. Type: ``str``. """ return self._parms.get("response_column") @response_column.setter def response_column(self, response_column): assert_is_type(response_column, None, str) self._parms["response_column"] = response_column @property def validation_frame(self): """ Id of the validation data frame. Type: ``H2OFrame``. """ return self._parms.get("validation_frame") @validation_frame.setter def validation_frame(self, validation_frame): assert_is_type(validation_frame, None, H2OFrame) self._parms["validation_frame"] = validation_frame @property def base_models(self): """ List of model ids which we can stack together. Models must have been cross-validated using nfolds > 1, and folds must be identical across models. Type: ``List[str]`` (default: ``[]``). """ return self._parms.get("base_models") @base_models.setter def base_models(self, base_models): if is_type(base_models,[H2OEstimator]): base_models = [b.model_id for b in base_models] self._parms["base_models"] = base_models else: assert_is_type(base_models, None, [str]) self._parms["base_models"] = base_models