Source code for h2o.model.binomial

# -*- encoding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
from h2o.utils.compatibility import *  # NOQA

from h2o.exceptions import H2OValueError
from h2o.utils.typechecks import assert_is_type
from .extensions import has_extension
from .model_base import ModelBase
from ..utils.metaclass import deprecated_params


[docs]class H2OBinomialModel(ModelBase):

[docs]    def F1(self, thresholds=None, train=False, valid=False, xval=False):
        """
        Get the F1 value for a set of thresholds.

        If all are ``False`` (default), then return the training metric value.
        If more than one option is set to ``True``, then return a dictionary of metrics where
        the keys are "train", "valid", and "xval".

        :param thresholds: If None, then the threshold maximizing the metric will be used.
        :param bool train: If ``True``, return the F1 value for the training data.
        :param bool valid: If ``True``, return the F1 value for the validation data.
        :param bool xval: If ``True``, return the F1 value for each of the cross-validated splits.

        :returns: The F1 values for the specified key(s).

        :examples:

        >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv")
        >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor()
        >>> r = cars[0].runif()
        >>> train = cars[r > .2]
        >>> valid = cars[r <=.2] 
        >>> response_col = "economy_20mpg"
        >>> distribution = "bernoulli"
        >>> predictors = ["displacement", "power", "weight", "acceleration", "year"]
        >>> from h2o.estimators.gbm import H2OGradientBoostingEstimator
        >>> gbm = H2OGradientBoostingEstimator(nfolds=3,
        ...                                    distribution=distribution,
        ...                                    fold_assignment="Random")
        >>> gbm.train(y=response_col,
        ...           x=predictors,
        ...           validation_frame=valid,
        ...           training_frame=train)
        >>> gbm.F1()# <- Default: return training metric value
        >>> gbm.F1(train=True,  valid=True,  xval=True)
        """
        return self.metric('f1', thresholds, train, valid, xval)

[docs]    def F2(self, thresholds=None, train=False, valid=False, xval=False):
        """
        Get the F2 for a set of thresholds.

        If all are ``False`` (default), then return the training metric value.
        If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train",
        "valid", and "xval".

        :param thresholds: If None, then the threshold maximizing the metric will be used.
        :param bool train: If ``True``, return the F2 value for the training data.
        :param bool valid: If ``True``, return the F2 value for the validation data.
        :param bool xval: If ``True``, return the F2 value for each of the cross-validated splits.

        :returns: The F2 values for the specified key(s).

        :examples:

        >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv")
        >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor()
        >>> r = cars[0].runif()
        >>> train = cars[r > .2]
        >>> valid = cars[r <=.2]
        >>> response_col = "economy_20mpg"
        >>> distribution = "bernoulli"
        >>> predictors = ["displacement", "power", "weight", "acceleration", "year"]
        >>> from h2o.estimators.gbm import H2OGradientBoostingEstimator
        >>> gbm = H2OGradientBoostingEstimator(nfolds=3,
        ...                                    distribution=distribution,
        ...                                    fold_assignment="Random")
        >>> gbm.train(y=response_col,
        ...           x=predictors,
        ...           validation_frame=valid,
        ...           training_frame=train)
        >>> gbm.F2() # <- Default: return training metric value
        >>> gbm.F2(train=True, valid=True, xval=True)
        """
        return self.metric('f2', thresholds, train, valid, xval)

[docs]    def F0point5(self, thresholds=None, train=False, valid=False, xval=False):
        """
        Get the F0.5 for a set of thresholds.

        If all are ``False`` (default), then return the training metric value.
        If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train",
        "valid", and "xval".

        :param thresholds: If None, then the threshold maximizing the metric will be used.
        :param bool train: If ``True``, return the F0.5 value for the training data.
        :param bool valid: If ``True``, return the F0.5 value for the validation data.
        :param bool xval: If ``True``, return the F0.5 value for each of the cross-validated splits.

        :returns: The F0.5 values for the specified key(s).

        :examples:

        >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv")
        >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor()
        >>> r = cars[0].runif()
        >>> train = cars[r > .2]
        >>> valid = cars[r <=.2]
        >>> response_col = "economy_20mpg"
        >>> distribution = "bernoulli"
        >>> predictors = ["displacement", "power", "weight", "acceleration", "year"]
        >>> gbm = H2OGradientBoostingEstimator(nfolds=3,
        ...                                    distribution=distribution,
        ...                                    fold_assignment="Random")
        >>> gbm.train(y=response_col,
        ...           x=predictors,
        ...           validation_frame=valid,
        ...           training_frame=train)         
        >>> F0point5 = gbm.F0point5() # <- Default: return training metric value
        >>> F0point5 = gbm.F0point5(train=True,  valid=True,  xval=True)
        """
        return self.metric('f0point5', thresholds, train, valid, xval)

[docs]    def accuracy(self, thresholds=None, train=False, valid=False, xval=False):
        """
        Get the accuracy for a set of thresholds.

        If all are ``False`` (default), then return the training metric value.
        If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train",
        "valid", and "xval".

        :param thresholds: If None, then the threshold maximizing the metric will be used.
        :param bool train: If ``True``, return the accuracy value for the training data.
        :param bool valid: If ``True``, return the accuracy value for the validation data.
        :param bool xval: If ``True``, return the accuracy value for each of the cross-validated splits.

        :returns: The accuracy values for the specified key(s).

        :examples:

        >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv")
        >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor()
        >>> r = cars[0].runif()
        >>> train = cars[r > .2]
        >>> valid = cars[r <=.2]
        >>> response_col = "economy_20mpg"
        >>> distribution = "bernoulli"
        >>> predictors = ["displacement", "power", "weight", "acceleration", "year"]
        >>> from h2o.estimators.gbm import H2OGradientBoostingEstimator
        >>> gbm = H2OGradientBoostingEstimator(nfolds=3,
        ...                                    distribution=distribution,
        ...                                    fold_assignment="Random")
        >>> gbm.train(y=response_col,
        ...           x=predictors,
        ...           validation_frame=valid,
        ...           training_frame=train)
        >>> gbm.accuracy() # <- Default: return training metric value
        >>> gbm.accuracy(train=True, valid=True, xval=True)
        """
        return self.metric('accuracy', thresholds, train, valid, xval)

[docs]    def error(self, thresholds=None, train=False, valid=False, xval=False):
        """
        Get the error for a set of thresholds.

        If all are ``False`` (default), then return the training metric value.
        If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train",
        "valid", and "xval".

        :param thresholds: If None, then the threshold minimizing the error will be used.
        :param bool train: If ``True``, return the error value for the training data.
        :param bool valid: If ``True``, return the error value for the validation data.
        :param bool xval: If ``True``, return the error value for each of the cross-validated splits.

        :returns: The error values for the specified key(s).

        :examples:

        >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv")
        >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor()
        >>> r = cars[0].runif()
        >>> train = cars[r > .2]
        >>> valid = cars[r <=.2]
        >>> response_col = "economy_20mpg"
        >>> distribution = "bernoulli"
        >>> predictors = ["displacement", "power", "weight", "acceleration", "year"]
        >>> from h2o.estimators.gbm import H2OGradientBoostingEstimator
        >>> gbm = H2OGradientBoostingEstimator(nfolds=3,
        ...                                    distribution=distribution,
        ...                                    fold_assignment="Random")
        >>> gbm.train(y=response_col,
        ...           x=predictors,
        ...           validation_frame=valid,
        ...           training_frame=train)
        >>> gbm.error() # <- Default: return training metric
        >>> gbm.error(train=True, valid=True, xval=True)
        """
        return self.metric('error', thresholds, train, valid, xval)

[docs]    def precision(self, thresholds=None, train=False, valid=False, xval=False):
        """
        Get the precision for a set of thresholds.

        If all are ``False`` (default), then return the training metric value.
        If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train",
        "valid", and "xval".

        :param thresholds: If None, then the threshold maximizing the metric will be used.
        :param bool train: If ``True``, return the precision value for the training data.
        :param bool valid: If ``True``, return the precision value for the validation data.
        :param bool xval: If ``True``, return the precision value for each of the cross-validated splits.

        :returns: The precision values for the specified key(s).

        :examples:

        >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv")
        >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor()
        >>> r = cars[0].runif()
        >>> train = cars[r > .2]
        >>> valid = cars[r <=.2]
        >>> response_col = "economy_20mpg"
        >>> distribution = "bernoulli"
        >>> predictors = ["displacement", "power", "weight", "acceleration", "year"]
        >>> from h2o.estimators.gbm import H2OGradientBoostingEstimator
        >>> gbm = H2OGradientBoostingEstimator(nfolds=3,
        ...                                    distribution=distribution,
        ...                                    fold_assignment="Random")
        >>> gbm.train(y=response_col,
        ...           x=predictors,
        ...           validation_frame=valid,
        ...           training_frame=train)
        >>> gbm.precision() # <- Default: return training metric value
        >>> gbm.precision(train=True, valid=True, xval=True)
        """
        return self.metric('precision', thresholds, train, valid, xval)

[docs]    def tpr(self, thresholds=None, train=False, valid=False, xval=False):
        """
        Get the True Positive Rate for a set of thresholds.

        If all are ``False`` (default), then return the training metric value.
        If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train",
        "valid", and "xval".

        :param thresholds: If None, then the threshold maximizing the metric will be used.
        :param bool train: If ``True``, return the TPR value for the training data.
        :param bool valid: If ``True``, return the TPR value for the validation data.
        :param bool xval: If ``True``, return the TPR value for each of the cross-validated splits.

        :returns: The TPR values for the specified key(s).

        :examples:

        >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv")
        >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor()
        >>> r = cars[0].runif()
        >>> train = cars[r > .2]
        >>> valid = cars[r <=.2]
        >>> response_col = "economy_20mpg"
        >>> distribution = "bernoulli"
        >>> predictors = ["displacement", "power", "weight", "acceleration", "year"]
        >>> from h2o.estimators.gbm import H2OGradientBoostingEstimator
        >>> gbm = H2OGradientBoostingEstimator(nfolds=3,
        ...                                    distribution=distribution,
        ...                                    fold_assignment="Random")
        >>> gbm.train(y=response_col,
        ...           x=predictors,
        ...           validation_frame=valid,
        ...           training_frame=train)
        >>> gbm.tpr() # <- Default: return training metric
        >>> gbm.tpr(train=True, valid=True, xval=True)
        """
        return self.metric('tpr', thresholds, train, valid, xval)

[docs]    def tnr(self, thresholds=None, train=False, valid=False, xval=False):
        """
        Get the True Negative Rate for a set of thresholds.

        If all are ``False`` (default), then return the training metric value.
        If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train",
        "valid", and "xval".

        :param thresholds: If None, then the threshold maximizing the metric will be used.
        :param bool train: If ``True``, return the TNR value for the training data.
        :param bool valid: If ``True``, return the TNR value for the validation data.
        :param bool xval: If ``True``, return the TNR value for each of the cross-validated splits.

        :returns: The TNR values for the specified key(s).

        :examples:

        >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv")
        >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor()
        >>> r = cars[0].runif()
        >>> train = cars[r > .2]
        >>> valid = cars[r <=.2]
        >>> response_col = "economy_20mpg"
        >>> distribution = "bernoulli"
        >>> predictors = ["displacement", "power", "weight", "acceleration", "year"]
        >>> from h2o.estimators.gbm import H2OGradientBoostingEstimator
        >>> gbm = H2OGradientBoostingEstimator(nfolds=3,
        ...                                    distribution=distribution,
        ...                                    fold_assignment="Random")
        >>> gbm.train(y=response_col,
        ...           x=predictors,
        ...           validation_frame=valid,
        ...           training_frame=train)
        >>> gbm.tnr() # <- Default: return training metric
        >>> gbm.tnr(train=True, valid=True, xval=True)
        """
        return self.metric('tnr', thresholds, train, valid, xval)

[docs]    def fnr(self, thresholds=None, train=False, valid=False, xval=False):
        """
        Get the False Negative Rates for a set of thresholds.

        If all are ``False`` (default), then return the training metric value.
        If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train",
        "valid", and "xval".

        :param thresholds: If None, then the threshold maximizing the metric will be used.
        :param bool train: If ``True``, return the FNR value for the training data.
        :param bool valid: If ``True``, return the FNR value for the validation data.
        :param bool xval: If ``True``, return the FNR value for each of the cross-validated splits.

        :returns: The FNR values for the specified key(s).

        :examples:

        >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv")
        >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor()
        >>> r = cars[0].runif()
        >>> train = cars[r > .2]
        >>> valid = cars[r <= .2]
        >>> response_col = "economy_20mpg"
        >>> distribution = "bernoulli"
        >>> predictors = ["displacement","power","weight","acceleration","year"]
        >>> from h2o.estimators import H2OGradientBoostingEstimator
        >>> gbm = H2OGradientBoostingEstimator(nfolds=3,
        ...                                    distribution=distribution,
        ...                                    fold_assignment="Random")
        >>> gbm.train(y=response_col,
        ...           x=predictors,
        ...           validation_frame=valid,
        ...           training_frame=train)
        >>> gbm.fnr() # <- Default: return training metric
        >>> gbm.fnr(train=True, valid=True, xval=True)
        """
        return self.metric('fnr', thresholds, train, valid, xval)

[docs]    def fpr(self, thresholds=None, train=False, valid=False, xval=False):
        """
        Get the False Positive Rates for a set of thresholds.

        If all are ``False`` (default), then return the training metric value.
        If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train",
        "valid", and "xval".

        :param thresholds: If None, then the threshold maximizing the metric will be used.
        :param bool train: If ``True``, return the FPR value for the training data.
        :param bool valid: If ``True``, return the FPR value for the validation data.
        :param bool xval: If ``True``, return the FPR value for each of the cross-validated splits.

        :returns: The FPR values for the specified key(s).

        :examples:

        >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv")
        >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor()
        >>> r = cars[0].runif()
        >>> train = cars[r > .2]
        >>> valid = cars[r <= .2]
        >>> response_col = "economy_20mpg"
        >>> distribution = "bernoulli"
        >>> predictors = ["displacement","power","weight","acceleration","year"]
        >>> from h2o.estimators import H2OGradientBoostingEstimator
        >>> gbm = H2OGradientBoostingEstimator(nfolds=3,
        ...                                    distribution=distribution,
        ...                                    fold_assignment="Random")
        >>> gbm.train(y=response_col,
        ...           x=predictors,
        ...           validation_frame=valid,
        ...           training_frame=train)
        >>> gbm.fpr() # <- Default: return training metric
        >>> gbm.fpr(train=True, valid=True, xval=True)
        """
        return self.metric('fpr', thresholds, train, valid, xval)

[docs]    def recall(self, thresholds=None, train=False, valid=False, xval=False):
        """
        Get the recall for a set of thresholds (aka True Positive Rate).

        If all are ``False`` (default), then return the training metric value.
        If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train",
        "valid", and "xval".

        :param thresholds: If None, then the threshold maximizing the metric will be used.
        :param bool train: If ``True``, return the recall value for the training data.
        :param bool valid: If ``True``, return the recall value for the validation data.
        :param bool xval: If ``True``, return the recall value for each of the cross-validated splits.

        :returns: The recall values for the specified key(s).

        :examples:

        >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv")
        >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor()
        >>> r = cars[0].runif()
        >>> train = cars[r > .2]
        >>> valid = cars[r <= .2]
        >>> response_col = "economy_20mpg"
        >>> distribution = "bernoulli"
        >>> predictors = ["displacement","power","weight","acceleration","year"]
        >>> from h2o.estimators import H2OGradientBoostingEstimator
        >>> gbm = H2OGradientBoostingEstimator(nfolds=3,
        ...                                    distribution=distribution,
        ...                                    fold_assignment="Random")
        >>> gbm.train(y=response_col,
        ...           x=predictors,
        ...           validation_frame=valid,
        ...           training_frame=train)
        >>> gbm.recall() # <- Default: return training metric
        >>> gbm.recall(train=True, valid=True, xval=True)
        """
        return self.metric('recall', thresholds, train, valid, xval)

[docs]    def sensitivity(self, thresholds=None, train=False, valid=False, xval=False):
        """
        Get the sensitivity for a set of thresholds (aka True Positive Rate or Recall).

        If all are ``False`` (default), then return the training metric value.
        If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train",
        "valid", and "xval".

        :param thresholds: If None, then the threshold maximizing the metric will be used.
        :param bool train: If ``True``, return the sensitivity value for the training data.
        :param bool valid: If ``True``, return the sensitivity value for the validation data.
        :param bool xval: If ``True``, return the sensitivity value for each of the cross-validated splits.

        :returns: The sensitivity values for the specified key(s).

        :examples:

        >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv")
        >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor()
        >>> r = cars[0].runif()
        >>> train = cars[r > .2]
        >>> valid = cars[r <= .2]
        >>> response_col = "economy_20mpg"
        >>> distribution = "bernoulli"
        >>> predictors = ["displacement","power","weight","acceleration","year"]
        >>> from h2o.estimators import H2OGradientBoostingEstimator
        >>> gbm = H2OGradientBoostingEstimator(nfolds=3,
        ...                                    distribution=distribution,
        ...                                    fold_assignment="Random")
        >>> gbm.train(y=response_col,
        ...           x=predictors,
        ...           validation_frame=valid,
        ...           training_frame=train)
        >>> gbm.sensitivity() # <- Default: return training metric
        >>> gbm.sensitivity(train=True, valid=True, xval=True)
        """
        return self.metric('sensitivity', thresholds, train, valid, xval)

[docs]    def fallout(self, thresholds=None, train=False, valid=False, xval=False):
        """
        Get the fallout for a set of thresholds (aka False Positive Rate).

        If all are ``False`` (default), then return the training metric value.
        If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train",
        "valid", and "xval".

        :param thresholds: If None, then the threshold maximizing the metric will be used.
        :param bool train: If ``True``, return the fallout value for the training data.
        :param bool valid: If ``True``, return the fallout value for the validation data.
        :param bool xval: If ``True``, return the fallout value for each of the cross-validated splits.

        :returns: The fallout values for the specified key(s).

        :examples:

        >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv")
        >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor()
        >>> r = cars[0].runif()
        >>> train = cars[r > .2]
        >>> valid = cars[r <= .2]
        >>> response_col = "economy_20mpg"
        >>> distribution = "bernoulli"
        >>> predictors = ["displacement","power","weight","acceleration","year"]
        >>> from h2o.estimators import H2OGradientBoostingEstimator
        >>> gbm = H2OGradientBoostingEstimator(nfolds=3,
        ...                                    distribution=distribution,
        ...                                    fold_assignment="Random")
        >>> gbm.train(y=response_col,
        ...           x=predictors,
        ...           validation_frame=valid,
        ...           training_frame=train)
        >>> gbm.fallout() # <- Default: return training metric
        >>> gbm.fallout(train=True, valid=True, xval=True)
        """
        return self.metric('fallout', thresholds, train, valid, xval)

[docs]    def missrate(self, thresholds=None, train=False, valid=False, xval=False):
        """
        Get the miss rate for a set of thresholds (aka False Negative Rate).

        If all are ``False`` (default), then return the training metric value.
        If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train",
        "valid", and "xval".

        :param thresholds: If None, then the threshold maximizing the metric will be used.
        :param bool train: If ``True``, return the miss rate value for the training data.
        :param bool valid: If ``True``, return the miss rate value for the validation data.
        :param bool xval: If ``True``, return the miss rate value for each of the cross-validated splits.

        :returns: The miss rate values for the specified key(s).

        :examples:

        >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv")
        >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor()
        >>> r = cars[0].runif()
        >>> train = cars[r > .2]
        >>> valid = cars[r <= .2]
        >>> response_col = "economy_20mpg"
        >>> distribution = "bernoulli"
        >>> predictors = ["displacement","power","weight","acceleration","year"]
        >>> from h2o.estimators import H2OGradientBoostingEstimator
        >>> gbm = H2OGradientBoostingEstimator(nfolds=3,
        ...                                    distribution=distribution,
        ...                                    fold_assignment="Random")
        >>> gbm.train(y=response_col,
        ...           x=predictors,
        ...           validation_frame=valid,
        ...           training_frame=train)
        >>> gbm.missrate() # <- Default: return training metric
        >>> gbm.missrate(train=True, valid=True, xval=True)
        """
        return self.metric('missrate', thresholds, train, valid, xval)

[docs]    def specificity(self, thresholds=None, train=False, valid=False, xval=False):
        """
        Get the specificity for a set of thresholds (aka True Negative Rate).

        If all are ``False`` (default), then return the training metric value.
        If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train",
        "valid", and "xval".

        :param thresholds: If None, then the threshold maximizing the metric will be used.
        :param bool train: If ``True``, return the specificity value for the training data.
        :param bool valid: If ``True``, return the specificity value for the validation data.
        :param bool xval: If ``True``, return the specificity value for each of the cross-validated splits.

        :returns: The specificity values for the specified key(s).

        :examples:

        >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv")
        >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor()
        >>> r = cars[0].runif()
        >>> train = cars[r > .2]
        >>> valid = cars[r <=.2]
        >>> response_col = "economy_20mpg"
        >>> distribution = "bernoulli"
        >>> predictors = ["displacement", "power", "weight", "acceleration", "year"]
        >>> from h2o.estimators.gbm import H2OGradientBoostingEstimator
        >>> gbm = H2OGradientBoostingEstimator(nfolds=3,
        ...                                    distribution=distribution,
        ...                                    fold_assignment="Random")
        >>> gbm.train(y=response_col,
        ...           x=predictors,
        ...           validation_frame=valid,
        ...           training_frame=train)
        >>> gbm.specificity() # <- Default: return training metric
        >>> gbm.specificity(train=True, valid=True, xval=True)
        """
        return self.metric('specificity', thresholds, train, valid, xval)

[docs]    def mcc(self, thresholds=None, train=False, valid=False, xval=False):
        """
        Get the MCC for a set of thresholds.

        If all are ``False`` (default), then return the training metric value.
        If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train",
        "valid", and "xval".

        :param thresholds: If None, then the threshold maximizing the metric will be used.
        :param bool train: If ``True``, return the MCC value for the training data.
        :param bool valid: If ``True``, return the MCC value for the validation data.
        :param bool xval: If ``True``, return the MCC value for each of the cross-validated splits.

        :returns: The MCC values for the specified key(s).

        :examples:

        >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv")
        >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor()
        >>> r = cars[0].runif()
        >>> train = cars[r > .2]
        >>> valid = cars[r <=.2]
        >>> response_col = "economy_20mpg"
        >>> distribution = "bernoulli"
        >>> predictors = ["displacement", "power", "weight", "acceleration", "year"]
        >>> from h2o.estimators.gbm import H2OGradientBoostingEstimator
        >>> gbm = H2OGradientBoostingEstimator(nfolds=3,
        ...                                    distribution=distribution,
        ...                                    fold_assignment="Random")
        >>> gbm.train(y=response_col,
        ...           x=predictors,
        ...           validation_frame=valid,
        ...           training_frame=train)
        >>> gbm.mcc() # <- Default: return training metric value
        >>> gbm.mcc(train=True, valid=True, xval=True)
        """
        return self.metric('mcc', thresholds, train, valid, xval)

[docs]    def max_per_class_error(self, thresholds=None, train=False, valid=False, xval=False):
        """
        Get the max per class error for a set of thresholds.

        If all are ``False`` (default), then return the training metric value.
        If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train",
        "valid", and "xval".

        :param thresholds: If None, then the threshold minimizing the error will be used.
        :param bool train: If ``True``, return the max per class error value for the training data.
        :param bool valid: If ``True``, return the max per class error value for the validation data.
        :param bool xval: If ``True``, return the max per class error value for each of the cross-validated splits.

        :returns: The max per class error values for the specified key(s).

        :examples:

        >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv")
        >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor()
        >>> r = cars[0].runif()
        >>> train = cars[r > .2]
        >>> valid = cars[r <=.2]
        >>> response_col = "economy_20mpg"
        >>> distribution = "bernoulli"
        >>> predictors = ["displacement", "power", "weight", "acceleration", "year"]
        >>> from h2o.estimators.gbm import H2OGradientBoostingEstimator
        >>> gbm = H2OGradientBoostingEstimator(nfolds=3,
        ...                                    distribution=distribution,
        ...                                    fold_assignment="Random")
        >>> gbm.train(y=response_col,
        ...           x=predictors,
        ...           validation_frame=valid,
        ...           training_frame=train)
        >>> gbm.max_per_class_error() # <- Default: return training metric value
        >>> gbm.max_per_class_error(train=True, valid=True, xval=True)
        """
        return self.metric('max_per_class_error', thresholds, train, valid, xval)

[docs]    def mean_per_class_error(self, thresholds=None, train=False, valid=False, xval=False):
        """
        Get the mean per class error for a set of thresholds.

        If all are ``False`` (default), then return the training metric value.
        If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train",
        "valid", and "xval".

        :param thresholds: If None, then the threshold minimizing the error will be used.
        :param bool train: If ``True``, return the mean per class error value for the training data.
        :param bool valid: If ``True``, return the mean per class error value for the validation data.
        :param bool xval: If ``True``, return the mean per class error value for each of the cross-validated splits.

        :returns: The mean per class error values for the specified key(s).

        :examples:

        >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv")
        >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor()
        >>> r = cars[0].runif()
        >>> train = cars[r > .2]
        >>> valid = cars[r <= .2]
        >>> response_col = "economy_20mpg"
        >>> distribution = "bernoulli"
        >>> predictors = ["displacement","power","weight","acceleration","year"]
        >>> from h2o.estimators import H2OGradientBoostingEstimator
        >>> gbm = H2OGradientBoostingEstimator(nfolds=3,
        ...                                    distribution=distribution,
        ...                                    fold_assignment="Random")
        >>> gbm.train(y=response_col,
        ...           x=predictors,
        ...           validation_frame=valid,
        ...           training_frame=train)
        >>> gbm.mean_per_class_error() # <- Default: return training metric
        >>> gbm.mean_per_class_error(train=True, valid=True, xval=True)
        """
        return self.metric('mean_per_class_error', thresholds, train, valid, xval)

[docs]    def metric(self, metric, thresholds=None, train=False, valid=False, xval=False):
        """
        Get the metric value for a set of thresholds.

        If all are ``False`` (default), then return the training metric value.
        If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train",
        "valid", and "xval".

        :param str metric: name of the metric to retrieve.
        :param thresholds: If None, then the threshold maximizing the metric will be used (or minimizing it if the metric is an error).
        :param bool train: If ``True``, return the metric value for the training data.
        :param bool valid: If ``True``, return the metric value for the validation data.
        :param bool xval: If ``True``, return the metric value for each of the cross-validated splits.

        :returns: The metric values for the specified key(s).

        :examples:

        >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv")
        >>> >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor()
        >>> r = cars[0].runif()
        >>> train = cars[r > .2]
        >>> valid = cars[r <= .2]
        >>> response_col = "economy_20mpg"
        >>> distribution = "bernoulli"
        >>> predictors = ["displacement","power","weight","acceleration","year"]
        # thresholds parameter must be a list (i.e. [0.01, 0.5, 0.99])
        >>> thresholds = [0.01,0.5,0.99]
        >>> gbm = H2OGradientBoostingEstimator(nfolds=3,
        ...                                    distribution=distribution,
        ...                                    fold_assignment="Random")
        >>> gbm.train(y=response_col,
        ...           x=predictors,
        ...           validation_frame=valid,
        ...           training_frame=train)
        # allowable metrics are absolute_mcc, accuracy, precision,
        # f0point5, f1, f2, mean_per_class_accuracy, min_per_class_accuracy,
        # tns, fns, fps, tps, tnr, fnr, fpr, tpr, recall, sensitivity,
        # missrate, fallout, specificity
        >>> gbm.metric(metric='tpr', thresholds=thresholds)
        """
        tm = ModelBase._get_metrics(self, train, valid, xval)
        m = {}
        for k, v in viewitems(tm):
            if v is None:
                m[k] = None
            elif hasattr(v, metric) and callable(getattr(v, metric)):
                m[k] = getattr(v, metric)(thresholds=thresholds)
            else:
                m[k] = v.metric(metric, thresholds=thresholds)
        return list(m.values())[0] if len(m) == 1 else m

[docs]    def plot(self, timestep="AUTO", metric="AUTO", server=False, save_plot_path=None):
        """
        Plot training set (and validation set if available) scoring history for an H2OBinomialModel.

        The timestep and metric arguments are restricted to what is available in its scoring history.

        :param str timestep: A unit of measurement for the x-axis.
        :param str metric: A unit of measurement for the y-axis.
        :param bool server: if ``True``, then generate the image inline (using matplotlib's "Agg" backend).
        :param save_plot_path: a path to save the plot via using matplotlib function savefig.
        
        :returns: object that contains the resulting figure (can be accessed using ``result.figure()``)

        :examples:

        >>> from h2o.estimators import H2OGeneralizedLinearEstimator
        >>> benign = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/benign.csv")
        >>> response = 3
        >>> predictors = [0, 1, 2, 4, 5, 6, 7, 8, 9, 10]
        >>> model = H2OGeneralizedLinearEstimator(family="binomial")
        >>> model.train(x=predictors, y=response, training_frame=benign)
        >>> model.plot(timestep="AUTO", metric="objective", server=False)
        """
        if not has_extension(self, 'ScoringHistory'):
            raise H2OValueError("Scoring history plot is not available for this type of model (%s)." % self.algo)
            
        valid_metrics = self._allowed_metrics('binomial')
        if valid_metrics is not None:
            assert_is_type(metric, 'AUTO', *valid_metrics), "metric for H2OBinomialModel must be one of %s" % valid_metrics
        if metric == "AUTO":
            metric = self._default_metric('binomial') or 'AUTO'
        return self.scoring_history_plot(timestep=timestep, metric=metric, server=server, save_plot_path=save_plot_path)

[docs]    def roc(self, train=False, valid=False, xval=False):
        """
        Return the coordinates of the ROC curve for a given set of data.

        The coordinates are two-tuples containing the false positive rates as a list and true positive rates as a list.
        If all are ``False`` (default), then return is the training data. If more than one ROC
        curve is requested, the data is returned as a dictionary of two-tuples.

        :param bool train: If ``True``, return the ROC value for the training data.
        :param bool valid: If ``True``, return the ROC value for the validation data.
        :param bool xval: If ``True``, return the ROC value for each of the cross-validated splits.

        :returns: The ROC values for the specified key(s).

        :examples:

        >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv")
        >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor()
        >>> r = cars[0].runif()
        >>> train = cars[r > .2]
        >>> valid = cars[r <=.2]
        >>> response_col = "economy_20mpg"
        >>> distribution = "bernoulli"
        >>> predictors = ["displacement", "power", "weight", "acceleration", "year"]
        >>> from h2o.estimators.gbm import H2OGradientBoostingEstimator
        >>> gbm = H2OGradientBoostingEstimator(nfolds=3,
        ...                                    distribution=distribution,
        ...                                    fold_assignment="Random")
        >>> gbm.train(y=response_col,
        ...           x=predictors,
        ...           validation_frame=valid,
        ...           training_frame=train)
        >>> gbm.roc() # <- Default: return training data
        >>> gbm.roc(train=True, valid=True, xval=True)
        """
        return self._delegate_to_metrics('roc', train, valid, xval)

[docs]    def gains_lift(self, train=False, valid=False, xval=False):
        """
        Get the Gains/Lift table for the specified metrics.

        If all are ``False`` (default), then return the training metric Gains/Lift table.
        If more than one option is set to ``True``, then return a dictionary of metrics where t
        he keys are "train", "valid", and "xval".

        :param bool train: If ``True``, return the gains lift value for the training data.
        :param bool valid: If ``True``, return the gains lift value for the validation data.
        :param bool xval: If ``True``, return the gains lift value for each of the cross-validated splits.

        :returns: The gains lift values for the specified key(s).

        :examples:

        >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv")
        >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor()
        >>> r = cars[0].runif()
        >>> train = cars[r > .2]
        >>> valid = cars[r <=.2]
        >>> response_col = "economy_20mpg"
        >>> distribution = "bernoulli"
        >>> predictors = ["displacement", "power", "weight", "acceleration", "year"]
        >>> from h2o.estimators.gbm import H2OGradientBoostingEstimator
        >>> gbm = H2OGradientBoostingEstimator(nfolds=3,
        ...                                    distribution=distribution,
        ...                                    fold_assignment="Random")
        >>> gbm.train(y=response_col,
        ...           x=predictors,
        ...           validation_frame=valid,
        ...           training_frame=train)
        >>> gbm.gains_lift() # <- Default: return training metric Gain/Lift table
        >>> gbm.gains_lift(train=True, valid=True, xval=True)
        """
        return self._delegate_to_metrics('gains_lift', train, valid, xval)

[docs]    @deprecated_params({'save_to_file': 'save_plot_path'})
    def gains_lift_plot(self, type="both", xval=False, server=False, save_plot_path=None, plot=True):
        """
        Plot Gains/Lift curves.

        :param type: One of:

            - "both" (default)
            - "gains"
            - "lift"
            
        :param xval: if ``True``, use cross-validation metrics.
        :param server: if ``True``, generate plot inline using matplotlib's "Agg" backend.
        :param save_plot_path: filename to save the plot to.
        :param plot: ``True`` to plot curve, ``False`` to get a gains lift table

        :returns: Gains lift table + the resulting plot (can be accessed using ``result.figure()``)
        """
        return self._delegate_to_metrics('gains_lift_plot', type=type, xval=xval, server=server, save_plot_path=save_plot_path, plot=plot)

[docs]    def kolmogorov_smirnov(self):
        """
        Retrieves the Kolmogorov-Smirnov metric (K-S metric) for a given binomial model. The number returned is in range between 0 and 1.
        The K-S metric represents the degree of separation between the positive (1) and negative (0) cumulative distribution
        functions. Detailed metrics per each group are to be found in the gains-lift table.

        :return: Kolmogorov-Smirnov metric, a number between 0 and 1.

        :examples:

        >>> from h2o.estimators import H2OGradientBoostingEstimator
        >>> airlines = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/testng/airlines_train.csv")
        >>> model = H2OGradientBoostingEstimator(ntrees=1,
        ...                                      gainslift_bins=20)
        >>> model.train(x=["Origin", "Distance"],
        ...             y="IsDepDelayed",
        ...             training_frame=airlines)
        >>> model.kolmogorov_smirnov()
        """
        return max(self.gains_lift()["kolmogorov_smirnov"])

[docs]    def confusion_matrix(self, metrics=None, thresholds=None, train=False, valid=False, xval=False):
        """
        Get the confusion matrix for the specified metrics/thresholds.

        If all are ``False`` (default), then return the training metric value.
        If more than one option is set to ``True``, then return a dictionary of metrics where the
        keys are "train", "valid", and "xval"

        :param metrics: A string (or list of strings) among metrics listed in :const:`H2OBinomialModelMetrics.maximizing_metrics`.
            Defaults to ``'f1'``.
        :param thresholds: A value (or list of values) between 0 and 1.
            If None, then the thresholds maximizing each provided metric will be used.
        :param bool train: If ``True``, return the confusion matrix value for the training data.
        :param bool valid: If ``True``, return the confusion matrix value for the validation data.
        :param bool xval: If ``True``, return the confusion matrix value for each of the cross-validated splits.

        :returns: The confusion matrix values for the specified key(s).

        :examples:

        >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv")
        >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor()
        >>> r = cars[0].runif()
        >>> train = cars[r > .2]
        >>> valid = cars[r <=.2]
        >>> response_col = "economy_20mpg"
        >>> distribution = "bernoulli"
        >>> predictors = ["displacement", "power", "weight", "acceleration", "year"]
        >>> from h2o.estimators.gbm import H2OGradientBoostingEstimator
        >>> gbm = H2OGradientBoostingEstimator(nfolds=3,
        ...                                    distribution=distribution,
        ...                                    fold_assignment="Random")
        >>> gbm.train(y=response_col,
        ...           x=predictors,
        ...           validation_frame=valid,
        ...           training_frame=train)
        >>> gbm.confusion_matrix() # <- Default: return training metric value
        >>> gbm.confusion_matrix(train=True, valid=True, xval=True)
        """
        return self._delegate_to_metrics('confusion_matrix', train, valid, xval,
                                         metrics=metrics, thresholds=thresholds)

[docs]    def find_threshold_by_max_metric(self, metric, train=False, valid=False, xval=False):
        """
        If all are ``False`` (default), then return the training metric value.

        If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train",
        "valid", and "xval".

        :param str metric: A metric among the metrics listed in :const:`H2OBinomialModelMetrics.maximizing_metrics`.
        :param bool train: If ``True``, return the find threshold by max metric value for the training data.
        :param bool valid: If ``True``, return the find threshold by max metric value for the validation data.
        :param bool xval: If ``True``, return the find threshold by max metric value for each of the cross-validated splits.

        :returns: The find threshold by max metric values for the specified key(s).

        :examples:

        >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv")
        >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor()
        >>> r = cars[0].runif()
        >>> train = cars[r > .2]
        >>> valid = cars[r <=.2]
        >>> response_col = "economy_20mpg"
        >>> distribution = "bernoulli"
        >>> predictors = ["displacement", "power", "weight",
        ...               "acceleration", "year"]
        >>> from h2o.estimators.gbm import H2OGradientBoostingEstimator
        >>> gbm = H2OGradientBoostingEstimator(nfolds=3,
        ...                                    distribution=distribution,
        ...                                    fold_assignment="Random")
        >>> gbm.train(y=response_col,
        ...           x=predictors,
        ...           validation_frame=valid,
        ...           training_frame=train)
        >>> max_metric = gbm.find_threshold_by_max_metric(metric="f2",
        ...                                               train=True)
        >>> max_metric
        """
        return self._delegate_to_metrics('find_threshold_by_max_metric', train, valid, xval, metric=metric)

[docs]    def find_idx_by_threshold(self, threshold, train=False, valid=False, xval=False):
        """
        Retrieve the index in this metric's threshold list at which the given threshold is located.

        If all are ``False`` (default), then return the training metric value.
        If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train",
        "valid", and "xval".

        :param float threshold: Threshold value to search for in the threshold list.
        :param bool train: If ``True``, return the find idx by threshold value for the training data.
        :param bool valid: If ``True``, return the find idx by threshold value for the validation data.
        :param bool xval: If ``True``, return the find idx by threshold value for each of the cross-validated splits.

        :returns: The find idx by threshold values for the specified key(s).

        :examples:

        >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv")
        >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor()
        >>> r = cars[0].runif()
        >>> train = cars[r > .2]
        >>> valid = cars[r <=.2]
        >>> response_col = "economy_20mpg"
        >>> distribution = "bernoulli"
        >>> predictors = ["displacement", "power", "weight",
        ...               "acceleration", "year"]
        >>> from h2o.estimators.gbm import H2OGradientBoostingEstimator
        >>> gbm = H2OGradientBoostingEstimator(nfolds=3,
        ...                                    distribution=distribution,
        ...                                    fold_assignment="Random")
        >>> gbm.train(y=response_col,
        ...           x=predictors,
        ...           validation_frame=valid,
        ...           training_frame=train)
        >>> idx_threshold = gbm.find_idx_by_threshold(threshold=0.39438,
        ...                                           train=True)
        >>> idx_threshold
        """
        return self._delegate_to_metrics('find_idx_by_threshold', train, valid, xval, threshold=threshold)

    def _delegate_to_metrics(self, method, train=False, valid=False, xval=False, **kwargs):
        tm = ModelBase._get_metrics(self, train, valid, xval)
        m = {}
        for k, v in viewitems(tm):
            if v is None:
                m[k] = None
            elif hasattr(v, method) and callable(getattr(v, method)):
                m[k] = getattr(v, method)(**kwargs)
            else:
                raise ValueError('no method {} in {}'.format(method, type(v)))
        return list(m.values())[0] if len(m) == 1 else m