Source code for h2o.estimators.stackedensemble

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
#
# This file is auto-generated by h2o-3/h2o-bindings/bin/gen_python.py
# Copyright 2016 H2O.ai;  Apache License Version 2.0 (see LICENSE for details)
#
from __future__ import absolute_import, division, print_function, unicode_literals

from h2o.estimators.estimator_base import H2OEstimator
from h2o.exceptions import H2OValueError
from h2o.frame import H2OFrame
from h2o.utils.typechecks import assert_is_type, Enum, numeric


[docs]class H2OStackedEnsembleEstimator(H2OEstimator):
    """
    Stacked Ensemble

    Builds a stacked ensemble (aka "super learner") machine learning method that uses two
    or more H2O learning algorithms to improve predictive performance. It is a loss-based
    supervised learning method that finds the optimal combination of a collection of prediction
    algorithms.This method supports regression and binary classification.

    Examples
    --------
      >>> import h2o
      >>> h2o.init()
      >>> from h2o.estimators.random_forest import H2ORandomForestEstimator
      >>> from h2o.estimators.gbm import H2OGradientBoostingEstimator
      >>> from h2o.estimators.stackedensemble import H2OStackedEnsembleEstimator
      >>> col_types = ["numeric", "numeric", "numeric", "enum", "enum", "numeric", "numeric", "numeric", "numeric"]
      >>> dat = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/prostate/prostate.csv", destination_frame="prostate_hex", col_types= col_types)
      >>> train, test = dat.split_frame(ratios=[.8], seed=1)
      >>> x = ["CAPSULE","GLEASON","RACE","DPROS","DCAPS","PSA","VOL"]
      >>> y = "AGE"
      >>> my_gbm = H2OGradientBoostingEstimator()
      >>> my_gbm.train(x=x, y=y, training_frame=train)
      >>> my_rf = H2ORandomForestEstimator()
      >>> my_rf.train(x=x, y=y, training_frame=train)
      >>> stack = H2OStackedEnsembleEstimator(model_id="my_ensemble_guassian", training_frame=train, validation_frame=test, base_models=[my_gbm.model_id,  my_rf.model_id], selection_strategy="choose_all")
      >>> stack.train(x=x, y=y, training_frame=train, validation_frame=test)
      >>> stack.model_performance()
    """

    algo = "stackedensemble"

    def __init__(self, **kwargs):
        super(H2OStackedEnsembleEstimator, self).__init__()
        self._parms = {}
        names_list = {"model_id", "training_frame", "validation_frame", "base_models", "selection_strategy"}
        if "Lambda" in kwargs: kwargs["lambda_"] = kwargs.pop("Lambda")
        for pname, pvalue in kwargs.items():
            if pname == 'model_id':
                self._id = pvalue
                self._parms["model_id"] = pvalue
            elif pname in names_list:
                # Using setattr(...) will invoke type-checking of the arguments
                setattr(self, pname, pvalue)
            else:
                raise H2OValueError("Unknown parameter %s = %r" % (pname, pvalue))
        self._parms["_rest_version"] = 99

    @property
    def training_frame(self):
        """
        Id of the training data frame (Not required, to allow initial validation of model parameters).

        Type: ``str``.
        """
        return self._parms.get("training_frame")

    @training_frame.setter
    def training_frame(self, training_frame):
        assert_is_type(training_frame, None, H2OFrame)
        self._parms["training_frame"] = training_frame


    @property
    def validation_frame(self):
        """
        Id of the validation data frame.

        Type: ``str``.
        """
        return self._parms.get("validation_frame")

    @validation_frame.setter
    def validation_frame(self, validation_frame):
        assert_is_type(validation_frame, None, H2OFrame)
        self._parms["validation_frame"] = validation_frame


    @property
    def base_models(self):
        """
        List of model ids which we can stack together.  Which ones are chosen depends on the selection_strategy
        (currently, all models will be used since selection_strategy can only be set to choose_all).  Models must have
        been cross-validated using nfolds > 1, fold_assignment equal to Modulo, and keep_cross_validation_folds must be
        set to True.

        Type: ``List[str]``  (default: ``[]``).
        """
        return self._parms.get("base_models")

    @base_models.setter
    def base_models(self, base_models):
        assert_is_type(base_models, None, [str])
        self._parms["base_models"] = base_models


    @property
    def selection_strategy(self):
        """
        Strategy for choosing which models to stack.

        One of: ``"choose_all"``.
        """
        return self._parms.get("selection_strategy")

    @selection_strategy.setter
    def selection_strategy(self, selection_strategy):
        assert_is_type(selection_strategy, None, Enum("choose_all"))
        self._parms["selection_strategy"] = selection_strategy