Source code for h2o.estimators.stackedensemble

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
#
# This file is auto-generated by h2o-3/h2o-bindings/bin/gen_python.py
# Copyright 2016 H2O.ai;  Apache License Version 2.0 (see LICENSE for details)
#
from __future__ import absolute_import, division, print_function, unicode_literals

from h2o.estimators.estimator_base import H2OEstimator
from h2o.exceptions import H2OValueError
from h2o.frame import H2OFrame
from h2o.utils.typechecks import assert_is_type, Enum, numeric, is_type


[docs]class H2OStackedEnsembleEstimator(H2OEstimator):
    """
    Stacked Ensemble

    Builds a stacked ensemble (aka "super learner") machine learning method that uses two
    or more H2O learning algorithms to improve predictive performance. It is a loss-based
    supervised learning method that finds the optimal combination of a collection of prediction
    algorithms.This method supports regression and binary classification.

    Examples
    --------
      >>> import h2o
      >>> h2o.init()
      >>> from h2o.estimators.random_forest import H2ORandomForestEstimator
      >>> from h2o.estimators.gbm import H2OGradientBoostingEstimator
      >>> from h2o.estimators.stackedensemble import H2OStackedEnsembleEstimator
      >>> col_types = ["numeric", "numeric", "numeric", "enum", "enum", "numeric", "numeric", "numeric", "numeric"]
      >>> data = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/prostate/prostate.csv", col_types=col_types)
      >>> train, test = data.split_frame(ratios=[.8], seed=1)
      >>> x = ["CAPSULE","GLEASON","RACE","DPROS","DCAPS","PSA","VOL"]
      >>> y = "AGE"
      >>> nfolds = 5
      >>> my_gbm = H2OGradientBoostingEstimator(nfolds=nfolds, fold_assignment="Modulo", keep_cross_validation_predictions=True)
      >>> my_gbm.train(x=x, y=y, training_frame=train)
      >>> my_rf = H2ORandomForestEstimator(nfolds=nfolds, fold_assignment="Modulo", keep_cross_validation_predictions=True)
      >>> my_rf.train(x=x, y=y, training_frame=train)
      >>> stack = H2OStackedEnsembleEstimator(model_id="my_ensemble", training_frame=train, validation_frame=test, base_models=[my_gbm.model_id, my_rf.model_id])
      >>> stack.train(x=x, y=y, training_frame=train, validation_frame=test)
      >>> stack.model_performance()
    """

    algo = "stackedensemble"

    def __init__(self, **kwargs):
        super(H2OStackedEnsembleEstimator, self).__init__()
        self._parms = {}
        names_list = {"model_id", "training_frame", "response_column", "validation_frame", "base_models"}
        if "Lambda" in kwargs: kwargs["lambda_"] = kwargs.pop("Lambda")
        for pname, pvalue in kwargs.items():
            if pname == 'model_id':
                self._id = pvalue
                self._parms["model_id"] = pvalue
            elif pname in names_list:
                # Using setattr(...) will invoke type-checking of the arguments
                setattr(self, pname, pvalue)
            else:
                raise H2OValueError("Unknown parameter %s = %r" % (pname, pvalue))
        self._parms["_rest_version"] = 99

    @property
    def training_frame(self):
        """
        Id of the training data frame (Not required, to allow initial validation of model parameters).

        Type: ``H2OFrame``.
        """
        return self._parms.get("training_frame")

    @training_frame.setter
    def training_frame(self, training_frame):
        assert_is_type(training_frame, None, H2OFrame)
        self._parms["training_frame"] = training_frame


    @property
    def response_column(self):
        """
        Response variable column.

        Type: ``str``.
        """
        return self._parms.get("response_column")

    @response_column.setter
    def response_column(self, response_column):
        assert_is_type(response_column, None, str)
        self._parms["response_column"] = response_column


    @property
    def validation_frame(self):
        """
        Id of the validation data frame.

        Type: ``H2OFrame``.
        """
        return self._parms.get("validation_frame")

    @validation_frame.setter
    def validation_frame(self, validation_frame):
        assert_is_type(validation_frame, None, H2OFrame)
        self._parms["validation_frame"] = validation_frame


    @property
    def base_models(self):
        """
        List of model ids which we can stack together. Models must have been cross-validated using nfolds > 1, and folds
        must be identical across models.

        Type: ``List[str]``  (default: ``[]``).
        """
        return self._parms.get("base_models")

    @base_models.setter
    def base_models(self, base_models):
         if is_type(base_models,[H2OEstimator]):
            base_models = [b.model_id for b in base_models]
            self._parms["base_models"] = base_models
         else:
            assert_is_type(base_models, None, [str])
            self._parms["base_models"] = base_models