Source code for h2o.grid.metrics

# -*- encoding: utf-8 -*-
"""
.. note::
    Classes in this module are used at runtime as mixins: their methods can (and should) be accessed directly from a trained grid.
"""
from __future__ import absolute_import, division, print_function, unicode_literals


#-----------------------------------------------------------------------------------------------------------------------
# AutoEncoder Grid Search
#-----------------------------------------------------------------------------------------------------------------------

[docs]class H2OAutoEncoderGridSearch(object):
[docs] def anomaly(self, test_data, per_feature=False): """ Obtain the reconstruction error for the input ``test_data``. :param H2OFrame test_data: The dataset upon which the reconstruction error is computed. :param bool per_feature: Whether to return the square reconstruction error per feature. Otherwise, return the mean square error. :returns: the reconstruction error. :example: >>> from h2o.grid.grid_search import H2OGridSearch >>> from h2o.estimators import H2OAutoEncoderEstimator >>> rows = [[1,2,3,4,0]*50, ... [2,1,2,4,1]*50, ... [2,1,4,2,1]*50, ... [0,1,2,34,1]*50, ... [2,3,4,1,0]*50] >>> fr = h2o.H2OFrame(rows) >>> hyper_parameters = {'activation': "Tanh", 'hidden': [50,50,50]} >>> gs = H2OGridSearch(H2OAutoEncoderEstimator(), hyper_parameters) >>> gs.train(x=range(4), training_frame=fr) >>> gs.anomaly(fr, per_feature=True) """ return {model.model_id: model.anomaly(test_data, per_feature) for model in self.models}
#----------------------------------------------------------------------------------------------------------------------- # Binomial Grid Search #-----------------------------------------------------------------------------------------------------------------------
[docs]class H2OBinomialGridSearch(object):
[docs] def F1(self, thresholds=None, train=False, valid=False, xval=False): """ Get the F1 values for a set of thresholds for the models explored. If all are ``False`` (default), then return the training metric value. If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train", "valid", and "xval". :param thresholds: thresholds parameter must be a list (e.g. ``[0.01, 0.5, 0.99]``). If None, then the threshold maximizing the metric will be used. :param bool train: If ``True``, return the F1 value for the training data. :param bool valid: If ``True``, return the F1 value for the validation data. :param bool xval: If ``True``, return the F1 value for each of the cross-validated splits. :returns: Dictionary of model keys to F1 values :examples: >>> from h2o.grid.grid_search import H2OGridSearch >>> from h2o.estimators.glm import H2OGeneralizedLinearEstimator >>> training_data = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/benign.csv") >>> hyper_parameters = {'alpha': [0.01,0.5], 'lambda': [1e-5,1e-6]} >>> gs = H2OGridSearch(H2OGeneralizedLinearEstimator(family='binomial'), ... hyper_parameters) >>> gs.train(x=[3, 4-11], ... y=3, ... training_frame=training_data) >>> gs.F1(train=True) """ return {model.model_id: model.F1(thresholds, train, valid, xval) for model in self.models} # dict model key -> F1 score
[docs] def F2(self, thresholds=None, train=False, valid=False, xval=False): """ Get the F2 for a set of thresholds. If all are ``False`` (default), then return the training metric value. If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train", "valid", and "xval". :param thresholds: thresholds parameter must be a list (e.g. ``[0.01, 0.5, 0.99]``). If None, then the threshold maximizing the metric will be used. :param bool train: If train is ``True``, then return the F2 value for the training data. :param bool valid: If valid is ``True``, then return the F2 value for the validation data. :param bool xval: If xval is ``True``, then return the F2 value for the cross validation data. :returns: Dictionary of model keys to F2 values. :examples: >>> from h2o.grid.grid_search import H2OGridSearch >>> from h2o.estimators.glm import H2OGeneralizedLinearEstimator >>> training_data = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/benign.csv") >>> hyper_parameters = {'alpha': [0.01,0.5], 'lambda': [1e-5,1e-6]} >>> gs = H2OGridSearch(H2OGeneralizedLinearEstimator(family='binomial'), ... hyper_parameters) >>> gs.train(x=[3, 4-11], ... y=3, ... training_frame=training_data) >>> gs.F2(train=True) """ return {model.model_id: model.F2(thresholds, train, valid, xval) for model in self.models}
[docs] def F0point5(self, thresholds=None, train=False, valid=False, xval=False): """ Get the F0.5 for a set of thresholds. If all are ``False`` (default), then return the training metric value. If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train", "valid", and "xval". :param thresholds: thresholds parameter must be a list (e.g. ``[0.01, 0.5, 0.99]``). If None, then the threshold maximizing the metric will be used. :param bool train: If train is ``True``, then return the F0point5 value for the training data. :param bool valid: If valid is ``True``, then return the F0point5 value for the validation data. :param bool xval: If xval is ``True``, then return the F0point5 value for the cross validation data. :returns: The F0point5 for this binomial model. :examples: >>> from h2o.grid.grid_search import H2OGridSearch >>> from h2o.estimators.glm import H2OGeneralizedLinearEstimator >>> training_data = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/benign.csv") >>> hyper_parameters = {'alpha': [0.01,0.5], 'lambda': [1e-5,1e-6]} >>> gs = H2OGridSearch(H2OGeneralizedLinearEstimator(family='binomial'), ... hyper_parameters) >>> gs.train(x=[3, 4-11], ... y=3, ... training_frame=training_data) >>> gs.F0point5(train=True) """ return {model.model_id: model.F0point5(thresholds, train, valid, xval) for model in self.models}
[docs] def accuracy(self, thresholds=None, train=False, valid=False, xval=False): """ Get the accuracy for a set of thresholds. If all are ``False`` (default), then return the training metric value. If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train", "valid", and "xval". :param thresholds: thresholds parameter must be a list (e.g. ``[0.01, 0.5, 0.99]``). If None, then the threshold maximizing the metric will be used. :param bool train: If train is ``True``, then return the accuracy value for the training data. :param bool valid: If valid is ``True``, then return the accuracy value for the validation data. :param bool xval: If xval is ``True``, then return the accuracy value for the cross validation data. :returns: The accuracy for this binomial model. :examples: >>> from h2o.grid.grid_search import H2OGridSearch >>> from h2o.estimators.glm import H2OGeneralizedLinearEstimator >>> training_data = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/benign.csv") >>> hyper_parameters = {'alpha': [0.01,0.5], 'lambda': [1e-5,1e-6]} >>> gs = H2OGridSearch(H2OGeneralizedLinearEstimator(family='binomial'), ... hyper_parameters) >>> gs.train(x=[3, 4-11], ... y=3, ... training_frame=training_data) >>> gs.accuracy(train=True) """ return {model.model_id: model.accuracy(thresholds, train, valid, xval) for model in self.models}
[docs] def error(self, thresholds=None, train=False, valid=False, xval=False): """ Get the error for a set of thresholds. If all are ``False`` (default), then return the training metric value. If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train", "valid", and "xval". :param thresholds: thresholds parameter must be a list (e.g. ``[0.01, 0.5, 0.99]``). If None, then the threshold minimizing the error will be used. :param bool train: If train is ``True``, then return the error value for the training data. :param bool valid: If valid is ``True``, then return the error value for the validation data. :param bool xval: If xval is ``True``, then return the error value for the cross validation data. :returns: The error for this binomial model. :examples: >>> from h2o.grid.grid_search import H2OGridSearch >>> from h2o.estimators.glm import H2OGeneralizedLinearEstimator >>> training_data = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/benign.csv") >>> hyper_parameters = {'alpha': [0.01,0.5], 'lambda': [1e-5,1e-6]} >>> gs = H2OGridSearch(H2OGeneralizedLinearEstimator(family='binomial'), ... hyper_parameters) >>> gs.train(x=[3, 4-11], ... y=3, ... training_frame=training_data) >>> gs.error(train=True) """ return {model.model_id: model.error(thresholds, train, valid, xval) for model in self.models}
[docs] def precision(self, thresholds=None, train=False, valid=False, xval=False): """ Get the precision for a set of thresholds. If all are ``False`` (default), then return the training metric value. If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train", "valid", and "xval". :param thresholds: thresholds parameter must be a list (e.g. ``[0.01, 0.5, 0.99]``). If None, then the threshold maximizing the metric will be used. :param bool train: If train is ``True``, then return the precision value for the training data. :param bool valid: If valid is ``True``, then return the precision value for the validation data. :param bool xval: If xval is ``True``, then return the precision value for the cross validation data. :returns: The precision for this binomial model. :examples: >>> from h2o.grid.grid_search import H2OGridSearch >>> from h2o.estimators.glm import H2OGeneralizedLinearEstimator >>> training_data = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/benign.csv") >>> hyper_parameters = {'alpha': [0.01,0.5], 'lambda': [1e-5,1e-6]} >>> gs = H2OGridSearch(H2OGeneralizedLinearEstimator(family='binomial'), ... hyper_parameters) >>> gs.train(x=[3, 4-11], ... y=3, ... training_frame=training_data) >>> gs. precision(train=True) """ return {model.model_id: model.precision(thresholds, train, valid, xval) for model in self.models}
[docs] def tpr(self, thresholds=None, train=False, valid=False, xval=False): """ Get the True Positive Rate for a set of thresholds. If all are ``False`` (default), then return the training metric value. If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train", "valid", and "xval". :param thresholds: thresholds parameter must be a list (e.g. ``[0.01, 0.5, 0.99]``). If None, then the threshold maximizing the metric will be used. :param bool train: If train is ``True``, then return the TPR value for the training data. :param bool valid: If valid is ``True``, then return the TPR value for the validation data. :param bool xval: If xval is ``True``, then return the TPR value for the cross validation data. :returns: The TPR for this binomial model. :examples: >>> from h2o.grid.grid_search import H2OGridSearch >>> from h2o.estimators.glm import H2OGeneralizedLinearEstimator >>> training_data = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/benign.csv") >>> hyper_parameters = {'alpha': [0.01,0.5], 'lambda': [1e-5,1e-6]} >>> gs = H2OGridSearch(H2OGeneralizedLinearEstimator(family='binomial'), ... hyper_parameters) >>> gs.train(x=[3, 4-11], ... y=3, ... training_frame=training_data) >>> gs.tpr(train=True) """ return {model.model_id: model.tpr(thresholds, train, valid, xval) for model in self.models}
[docs] def tnr(self, thresholds=None, train=False, valid=False, xval=False): """ Get the True Negative Rate for a set of thresholds. If all are ``False`` (default), then return the training metric value. If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train", "valid", and "xval". :param thresholds: thresholds parameter must be a list (e.g. ``[0.01, 0.5, 0.99]``). If None, then the threshold maximizing the metric will be used. :param bool train: If train is ``True``, then return the TNR value for the training data. :param bool valid: If valid is ``True``, then return the TNR value for the validation data. :param bool xval: If xval is ``True``, then return the TNR value for the cross validation data. :returns: The TNR for this binomial model. :examples: >>> from h2o.grid.grid_search import H2OGridSearch >>> from h2o.estimators.glm import H2OGeneralizedLinearEstimator >>> training_data = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/benign.csv") >>> hyper_parameters = {'alpha': [0.01,0.5], 'lambda': [1e-5,1e-6]} >>> gs = H2OGridSearch(H2OGeneralizedLinearEstimator(family='binomial'), ... hyper_parameters) >>> gs.train(x=[3, 4-11], ... y=3, ... training_frame=training_data) >>> gs.tnr(train=True) """ return {model.model_id: model.tnr(thresholds, train, valid, xval) for model in self.models}
[docs] def fnr(self, thresholds=None, train=False, valid=False, xval=False): """ Get the False Negative Rates for a set of thresholds. If all are ``False`` (default), then return the training metric value. If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train", "valid", and "xval". :param thresholds: thresholds parameter must be a list (e.g. ``[0.01, 0.5, 0.99]``). If None, then the threshold maximizing the metric will be used. :param bool train: If train is ``True``, then return the FNR value for the training data. :param bool valid: If valid is ``True``, then return the FNR value for the validation data. :param bool xval: If xval is ``True``, then return the FNR value for the cross validation data. :returns: The FNR for this binomial model. :examples: >>> from h2o.grid.grid_search import H2OGridSearch >>> from h2o.estimators.glm import H2OGeneralizedLinearEstimator >>> training_data = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/benign.csv") >>> hyper_parameters = {'alpha': [0.01,0.5], 'lambda': [1e-5,1e-6]} >>> gs = H2OGridSearch(H2OGeneralizedLinearEstimator(family='binomial'), ... hyper_parameters) >>> gs.train(x=[3, 4-11], ... y=3, ... training_frame=training_data) >>> gs.fnr(train=True) """ return {model.model_id: model.fnr(thresholds, train, valid, xval) for model in self.models}
[docs] def fpr(self, thresholds=None, train=False, valid=False, xval=False): """ Get the False Positive Rates for a set of thresholds. If all are ``False`` (default), then return the training metric value. If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train", "valid", and "xval". :param thresholds: thresholds parameter must be a list (e.g. ``[0.01, 0.5, 0.99]``). If None, then the threshold maximizing the metric will be used. :param bool train: If train is ``True``, then return the FPR value for the training data. :param bool valid: If valid is ``True``, then return the FPR value for the validation data. :param bool xval: If xval is ``True``, then return the FPR value for the cross validation data. :returns: The FPR for this binomial model. :examples: >>> from h2o.grid.grid_search import H2OGridSearch >>> from h2o.estimators.glm import H2OGeneralizedLinearEstimator >>> training_data = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/benign.csv") >>> hyper_parameters = {'alpha': [0.01,0.5], 'lambda': [1e-5,1e-6]} >>> gs = H2OGridSearch(H2OGeneralizedLinearEstimator(family='binomial'), ... hyper_parameters) >>> gs.train(x=[3, 4-11], ... y=3, ... training_frame=training_data) >>> gs.fpr(train=True) """ return {model.model_id: model.fpr(thresholds, train, valid, xval) for model in self.models}
[docs] def recall(self, thresholds=None, train=False, valid=False, xval=False): """ Get the Recall (AKA True Positive Rate) for a set of thresholds. If all are ``False`` (default), then return the training metric value. If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train", "valid", and "xval". :param thresholds: thresholds parameter must be a list (e.g. ``[0.01, 0.5, 0.99]``). If None, then the threshold maximizing the metric will be used. :param bool train: If train is ``True``, then return the recall value for the training data. :param bool valid: If valid is ``True``, then return the recall value for the validation data. :param bool xval: If xval is ``True``, then return the recall value for the cross validation data. :returns: The recall for this binomial model. :examples: >>> from h2o.grid.grid_search import H2OGridSearch >>> from h2o.estimators.glm import H2OGeneralizedLinearEstimator >>> training_data = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/benign.csv") >>> hyper_parameters = {'alpha': [0.01,0.5], 'lambda': [1e-5,1e-6]} >>> gs = H2OGridSearch(H2OGeneralizedLinearEstimator(family='binomial'), ... hyper_parameters) >>> gs.train(x=[3, 4-11], ... y=3, ... training_frame=training_data) >>> gs.recall(train=True) """ return {model.model_id: model.recall(thresholds, train, valid, xval) for model in self.models}
[docs] def sensitivity(self, thresholds=None, train=False, valid=False, xval=False): """ Get the sensitivity (AKA True Positive Rate or Recall) for a set of thresholds. If all are ``False`` (default), then return the training metric value. If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train", "valid", and "xval". :param thresholds: thresholds parameter must be a list (e.g. ``[0.01, 0.5, 0.99]``). If None, then the threshold maximizing the metric will be used. :param bool train: If train is ``True``, then return the sensitivity value for the training data. :param bool valid: If valid is ``True``, then return the sensitivity value for the validation data. :param bool xval: If xval is ``True``, then return the sensitivity value for the cross validation data. :returns: The sensitivity for this binomial model. :examples: >>> from h2o.grid.grid_search import H2OGridSearch >>> from h2o.estimators.glm import H2OGeneralizedLinearEstimator >>> training_data = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/benign.csv") >>> hyper_parameters = {'alpha': [0.01,0.5], 'lambda': [1e-5,1e-6]} >>> gs = H2OGridSearch(H2OGeneralizedLinearEstimator(family='binomial'), ... hyper_parameters) >>> gs.train(x=[3, 4-11], ... y=3, ... training_frame=training_data) >>> gs.sensitivity(train=True) """ return {model.model_id: model.sensitivity(thresholds, train, valid, xval) for model in self.models}
[docs] def fallout(self, thresholds=None, train=False, valid=False, xval=False): """ Get the Fallout (AKA False Positive Rate) for a set of thresholds. If all are ``False`` (default), then return the training metric value. If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train", "valid", and "xval". :param thresholds: thresholds parameter must be a list (e.g. ``[0.01, 0.5, 0.99]``). If None, then the threshold maximizing the metric will be used. :param bool train: If train is ``True``, then return the fallout value for the training data. :param bool valid: If valid is ``True``, then return the fallout value for the validation data. :param bool xval: If xval is ``True``, then return the fallout value for the cross validation data. :returns: The fallout for this binomial model. :examples: >>> from h2o.grid.grid_search import H2OGridSearch >>> from h2o.estimators.glm import H2OGeneralizedLinearEstimator >>> training_data = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/benign.csv") >>> hyper_parameters = {'alpha': [0.01,0.5], 'lambda': [1e-5,1e-6]} >>> gs = H2OGridSearch(H2OGeneralizedLinearEstimator(family='binomial'), ... hyper_parameters) >>> gs.train(x=[3, 4-11], ... y=3, ... training_frame=training_data) >>> gs.fallout(train=True) """ return {model.model_id: model.fallout(thresholds, train, valid, xval) for model in self.models}
[docs] def missrate(self, thresholds=None, train=False, valid=False, xval=False): """ Get the miss rate (AKA False Negative Rate) for a set of thresholds. If all are ``False`` (default), then return the training metric value. If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train", "valid", and "xval". :param thresholds: thresholds parameter must be a list (e.g. ``[0.01, 0.5, 0.99]``). If None, then the threshold maximizing the metric will be used. :param bool train: If train is ``True``, then return the missrate value for the training data. :param bool valid: If valid is ``True``, then return the missrate value for the validation data. :param bool xval: If xval is ``True``, then return the missrate value for the cross validation data. :returns: The missrate for this binomial model. :examples: >>> from h2o.grid.grid_search import H2OGridSearch >>> from h2o.estimators.glm import H2OGeneralizedLinearEstimator >>> training_data = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/benign.csv") >>> hyper_parameters = {'alpha': [0.01,0.5], 'lambda': [1e-5,1e-6]} >>> gs = H2OGridSearch(H2OGeneralizedLinearEstimator(family='binomial'), ... hyper_parameters) >>> gs.train(x=[3, 4-11], ... y=3, ... training_frame=training_data) >>> gs.missrate(train=True) """ return {model.model_id: model.missrate(thresholds, train, valid, xval) for model in self.models}
[docs] def specificity(self, thresholds=None, train=False, valid=False, xval=False): """ Get the specificity (AKA True Negative Rate) for a set of thresholds. If all are ``False`` (default), then return the training metric value. If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train", "valid", and "xval". :param thresholds: thresholds parameter must be a list (e.g. ``[0.01, 0.5, 0.99]``). If None, then the threshold maximizing the metric will be used. :param bool train: If train is ``True``, then return the specificity value for the training data. :param bool valid: If valid is ``True``, then return the specificity value for the validation data. :param bool xval: If xval is ``True``, then return the specificity value for the cross validation data. :returns: The specificity for this binomial model. :examples: >>> from h2o.grid.grid_search import H2OGridSearch >>> from h2o.estimators.glm import H2OGeneralizedLinearEstimator >>> training_data = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/benign.csv") >>> hyper_parameters = {'alpha': [0.01,0.5], 'lambda': [1e-5,1e-6]} >>> gs = H2OGridSearch(H2OGeneralizedLinearEstimator(family='binomial'), ... hyper_parameters) >>> gs.train(x=[3, 4-11], ... y=3, ... training_frame=training_data) >>> gs.specificity(train=True) """ return {model.model_id: model.specificity(thresholds, train, valid, xval) for model in self.models}
[docs] def mcc(self, thresholds=None, train=False, valid=False, xval=False): """ Get the MCC for a set of thresholds. If all are ``False`` (default), then return the training metric value. If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train", "valid", and "xval". :param thresholds: thresholds parameter must be a list (e.g. ``[0.01, 0.5, 0.99]``). If None, then the threshold maximizing the metric will be used. :param bool train: If train is ``True``, then return the mcc value for the training data. :param bool valid: If valid is ``True``, then return the mcc value for the validation data. :param bool xval: If xval is ``True``, then return the mcc value for the cross validation data. :returns: The MCC for this binomial model. :examples: >>> from h2o.grid.grid_search import H2OGridSearch >>> from h2o.estimators.glm import H2OGeneralizedLinearEstimator >>> training_data = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/benign.csv") >>> hyper_parameters = {'alpha': [0.01,0.5], 'lambda': [1e-5,1e-6]} >>> gs = H2OGridSearch(H2OGeneralizedLinearEstimator(family='binomial'), ... hyper_parameters) >>> gs.train(x=[3, 4-11], ... y=3, ... training_frame=training_data) >>> gs.mcc(train=True) """ return {model.model_id: model.mcc(thresholds, train, valid, xval) for model in self.models}
[docs] def max_per_class_error(self, thresholds=None, train=False, valid=False, xval=False): """ Get the max per class error for a set of thresholds. If all are ``False`` (default), then return the training metric value. If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train", "valid", and "xval". :param thresholds: thresholds parameter must be a list (e.g. ``[0.01, 0.5, 0.99]``). If None, then the threshold minimizing the error will be used. :param bool train: If train is ``True``, then return the ``max_per_class_error`` value for the training data. :param bool valid: If valid is ``True``, then return the ``max_per_class_error`` value for the validation data. :param bool xval: If xval is ``True``, then return the ``max_per_class_error`` value for the cross validation data. :returns: The max per class error for this binomial model. :examples: >>> from h2o.grid.grid_search import H2OGridSearch >>> from h2o.estimators.glm import H2OGeneralizedLinearEstimator >>> training_data = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/benign.csv") >>> hyper_parameters = {'alpha': [0.01,0.5], 'lambda': [1e-5,1e-6]} >>> gs = H2OGridSearch(H2OGeneralizedLinearEstimator(family='binomial'), ... hyper_parameters) >>> gs.train(x=[3, 4-11], ... y=3, ... training_frame=training_data) >>> gs.max_per_class_error(train=True) """ return {model.model_id: model.max_per_class_error(thresholds, train, valid, xval) for model in self.models}
[docs] def mean_per_class_error(self, thresholds=None, train=False, valid=False, xval=False): """ Get the mean per class error for a set of thresholds. If all are ``False`` (default), then return the training metric value. If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train", "valid", and "xval". :param thresholds: thresholds parameter must be a list (e.g. ``[0.01, 0.5, 0.99]``). If None, then the threshold minimizing the error will be used. :param bool train: If train is ``True``, then return the ``mean_per_class_error`` value for the training data. :param bool valid: If valid is ``True``, then return the ``mean_per_class_error`` value for the validation data. :param bool xval: If xval is ``True``, then return the ``mean_per_class_error`` value for the cross validation data. :returns: The mean per class error for this binomial model. :examples: >>> from h2o.grid.grid_search import H2OGridSearch >>> from h2o.estimators.glm import H2OGeneralizedLinearEstimator >>> training_data = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/benign.csv") >>> hyper_parameters = {'alpha': [0.01,0.5], 'lambda': [1e-5,1e-6]} >>> gs = H2OGridSearch(H2OGeneralizedLinearEstimator(family='binomial'), ... hyper_parameters) >>> gs.train(x=[3, 4-11], ... y=3, ... training_frame=training_data) >>> gs.mean_per_class_error(train=True) """ return {model.model_id: model.mean_per_class_error(thresholds, train, valid, xval) for model in self.models}
[docs] def metric(self, metric, thresholds=None, train=False, valid=False, xval=False): """ Get the metric value for a set of thresholds. If all are ``False`` (default), then return the training metric value. If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train", "valid", and "xval". :param metric: name of the metric to compute. :param thresholds: thresholds parameter must be a list (e.g. ``[0.01, 0.5, 0.99]``). If None, then the threshold maximizing the metric will be used. :param bool train: If train is ``True``, then return the metrics for the training data. :param bool valid: If valid is ``True``, then return the metrics for the validation data. :param bool xval: If xval is ``True``, then return the metrics for the cross validation data. :returns: The metrics for this binomial model. :examples: >>> from h2o.grid.grid_search import H2OGridSearch >>> from h2o.estimators.glm import H2OGeneralizedLinearEstimator >>> training_data = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/benign.csv") >>> hyper_parameters = {'alpha': [0.01,0.5], 'lambda': [1e-5,1e-6]} >>> gs = H2OGridSearch(H2OGeneralizedLinearEstimator(family='binomial'), ... hyper_parameters) >>> gs.train(x=[3, 4-11], ... y=3, ... training_frame=training_data) >>> gs.metric("tps", train=True) """ return {model.model_id: model.metric(metric, thresholds, train, valid, xval) for model in self.models}
[docs] def roc(self, train=False, valid=False, xval=False): """ Return the coordinates of the ROC curve for a given set of data, as a two-tuple containing the false positive rates as a list and true positive rates as a list. If all are ``False`` (default), then return the training data. If more than one ROC curve is requested, the data is returned as a dictionary of two-tuples. :param bool train: If train is ``True``, then return the ROC coordinates for the training data. :param bool valid: If valid is ``True``, then return the ROC coordinates for the validation data. :param bool xval: If xval is ``True``, then return the ROC coordinates for the cross validation data. :returns: the true cooridinates of the roc curve. :examples: >>> from h2o.grid.grid_search import H2OGridSearch >>> from h2o.estimators.glm import H2OGeneralizedLinearEstimator >>> training_data = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/benign.csv") >>> hyper_parameters = {'alpha': [0.01,0.5], 'lambda': [1e-5,1e-6]} >>> gs = H2OGridSearch(H2OGeneralizedLinearEstimator(family='binomial'), ... hyper_parameters) >>> gs.train(x=[3, 4-11], ... y=3, ... training_frame=training_data) >>> gs.roc(train=True) """ return {model.model_id: model.roc(train, valid, xval) for model in self.models}
[docs] def confusion_matrix(self, metrics=None, thresholds=None, train=False, valid=False, xval=False): """ Get the confusion matrix for the specified metrics/thresholds. If all are ``False`` (default), then return the training metric value. If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train", "valid", and "xval". :param metrics: A string (or list of strings) among metrics listed in :const:`H2OBinomialModelMetrics.maximizing_metrics`. Defaults to ``'f1'``. :param thresholds: A value (or list of values) between 0 and 1. If None, then the thresholds maximizing each provided metric will be used. :param bool train: If train is ``True``, then return the confusion matrix value for the training data. :param bool valid: If valid is ``True``, then return the confusion matrix value for the validation data. :param bool xval: If xval is ``True``, then return the confusion matrix value for the cross validation data. :returns: The confusion matrix for this binomial model. :examples: >>> from h2o.grid.grid_search import H2OGridSearch >>> from h2o.estimators.glm import H2OGeneralizedLinearEstimator >>> training_data = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/benign.csv") >>> hyper_parameters = {'alpha': [0.01,0.5], 'lambda': [1e-5,1e-6]} >>> gs = H2OGridSearch(H2OGeneralizedLinearEstimator(family='binomial'), ... hyper_parameters) >>> gs.train(x=[3, 4-11], ... y=3, ... training_frame=training_data) >>> gs.confusion_matrix(train=True) """ return {model.model_id: model.confusion_matrix(metrics, thresholds, train, valid, xval) for model in self.models}
[docs] def find_threshold_by_max_metric(self, metric, train=False, valid=False, xval=False): """ If all are ``False`` (default), then return the training metric value. If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train", "valid", and "xval". :param str metric: A metric among the metrics listed in :const:`H2OBinomialModelMetrics.maximizing_metrics`. :param bool train: If train is ``True``, then return the ``threshold_by_max_metric`` value for the training data. :param bool valid: If valid is ``True``, then return the ``threshold_by_max_metric`` value for the validation data. :param bool xval: If xval is ``True``, then return the ``threshold_by_max_metric`` value for the cross validation data. :returns: The ``threshold_by_max_metric`` for this binomial model. :examples: >>> from h2o.grid.grid_search import H2OGridSearch >>> from h2o.estimators.glm import H2OGeneralizedLinearEstimator >>> training_data = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/benign.csv") >>> hyper_parameters = {'alpha': [0.01,0.5], 'lambda': [1e-5,1e-6]} >>> gs = H2OGridSearch(H2OGeneralizedLinearEstimator(family='binomial'), ... hyper_parameters) >>> gs.train(x=[3, 4-11], ... y=3, ... training_frame=training_data) >>> gs.find_threshold_by_max_metric("tps", train=True) """ return {model.model_id: model.find_threshold_by_max_metric(metric, train, valid, xval) for model in self.models}
[docs] def find_idx_by_threshold(self, threshold, train=False, valid=False, xval=False): """ Retrieve the index in this metric's threshold list at which the given threshold is located. If all are ``False`` (default), then return the training metric value. If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train", "valid", and "xval". :param float threshold: The threshold value to search for. :param bool train: If train is ``True``, then return the ``idx_by_threshold`` for the training data. :param bool valid: If valid is ``True``, then return the ``idx_by_threshold`` for the validation data. :param bool xval: If xval is ``True``, then return the ``idx_by_threshold`` for the cross validation data. :returns: The ``idx_by_threshold`` for this binomial model. :examples: >>> from h2o.grid.grid_search import H2OGridSearch >>> from h2o.estimators.glm import H2OGeneralizedLinearEstimator >>> training_data = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/benign.csv") >>> hyper_parameters = {'alpha': [0.01,0.5], 'lambda': [1e-5,1e-6]} >>> gs = H2OGridSearch(H2OGeneralizedLinearEstimator(family='binomial'), ... hyper_parameters) >>> gs.train(x=[3, 4-11], ... y=3, ... training_frame=training_data) >>> gs.find_idx_by_threshold(0.45, train=True) """ return {model.model_id: model.find_idx_by_threshold(threshold, train, valid, xval) for model in self.models}
#----------------------------------------------------------------------------------------------------------------------- # Clustering Grid Search #-----------------------------------------------------------------------------------------------------------------------
[docs]class H2OClusteringGridSearch(object):
[docs] def size(self, train=False, valid=False, xval=False): """ Get the sizes of each cluster. If all are ``False`` (default), then return the training metric value. If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train", "valid", and "xval". :param bool train: If ``True``, then return the cluster sizes for the training data. :param bool valid: If ``True``, then return the cluster sizes for the validation data. :param bool xval: If ``True``, then return the cluster sizes for each of the cross-validated splits. :returns: the cluster sizes for the specified key(s). :examples: >>> from h2o.estimators import H2OKMeansEstimator >>> from h2o.grid.grid_search import H2OGridSearch >>> iris = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/iris/iris_train.csv") >>> hyper_parameters = {'k': [2,3,4], 'init': "random"} >>> gs = H2OGridSearch(H2OKMeansEstimator(), hyper_parameters) >>> gs.train(x=list(range(4)), training_frame=iris) >>> gs.size(train=True) """ return {model.model_id: model.size(train, valid, xval) for model in self.models}
[docs] def num_iterations(self): """Get the number of iterations that it took to converge or reach max iterations. :examples: >>> from h2o.estimators import H2OKMeansEstimator >>> from h2o.grid.grid_search import H2OGridSearch >>> iris = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/iris/iris_train.csv") >>> hyper_parameters = {'k': [2,3,4], 'init': "random"} >>> gs = H2OGridSearch(H2OKMeansEstimator(), hyper_parameters) >>> gs.train(x=list(range(4)), training_frame=iris) >>> gs.num_iterations() """ return {model.model_id: model.num_iterations() for model in self.models}
[docs] def betweenss(self, train=False, valid=False, xval=False): """ Get the between cluster sum of squares. If all are ``False`` (default), then return the training metric value. If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train", "valid", and "xval". :param bool train: If ``True``, then return the between cluster sum of squares value for the training data. :param bool valid: If ``True``, then return the between cluster sum of squares value for the validation data. :param bool xval: If ``True``, then return the between cluster sum of squares value for each of the cross-validated splits. :returns: the between cluster sum of squares values for the specified key(s). :examples: >>> from h2o.estimators import H2OKMeansEstimator >>> from h2o.grid.grid_search import H2OGridSearch >>> iris = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/iris/iris_train.csv") >>> hyper_parameters = {'k': [2,3,4], 'init': "random"} >>> gs = H2OGridSearch(H2OKMeansEstimator(), hyper_parameters) >>> gs.train(x=list(range(4)), training_frame=iris) >>> gs.betweenss(train=True) """ return {model.model_id: model.betweenss(train, valid, xval) for model in self.models}
[docs] def totss(self, train=False, valid=False, xval=False): """ Get the total sum of squares. If all are ``False`` (default), then return the training metric value. If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train", "valid", and "xval". :param bool train: If ``True``, then return total sum of squares for the training data. :param bool valid: If ``True``, then return the total sum of squares for the validation data. :param bool xval: If ``True``, then return the total sum of squares for each of the cross-validated splits. :returns: the total sum of squares values for the specified key(s). :examples: >>> from h2o.estimators import H2OKMeansEstimator >>> from h2o.grid.grid_search import H2OGridSearch >>> iris = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/iris/iris_train.csv") >>> hyper_parameters = {'k': [2,3,4], 'init': "random"} >>> gs = H2OGridSearch(H2OKMeansEstimator(), hyper_parameters) >>> gs.train(x=list(range(4)), training_frame=iris) >>> gs.totss(train=True) """ return {model.model_id: model.totss(train, valid, xval) for model in self.models}
[docs] def tot_withinss(self, train=False, valid=False, xval=False): """ Get the total within cluster sum of squares. If all are ``False`` (default), then return the training metric value. If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train", "valid", and "xval". :param bool train: If ``True``, then return the total within cluster sum of squares for the training data. :param bool valid: If ``True``, then return the total within cluster sum of squares for the validation data. :param bool xval: If ``True``, then return the total within cluster sum of squares for each of the cross-validated splits. :returns: the total within cluster sum of squares values for the specified key(s). :examples: >>> from h2o.estimators import H2OKMeansEstimator >>> from h2o.grid.grid_search import H2OGridSearch >>> iris = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/iris/iris_train.csv") >>> hyper_parameters = {'k': [2,3,4], 'init': "random"} >>> gs = H2OGridSearch(H2OKMeansEstimator(), hyper_parameters) >>> gs.train(x=list(range(4)), training_frame=iris) >>> gs.tot_withinss(train=True) """ return {model.model_id: model.tot_withinss(train, valid, xval) for model in self.models}
[docs] def withinss(self, train=False, valid=False, xval=False): """ Get the within cluster sum of squares for each cluster. If all are ``False`` (default), then return the training metric value. If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train", "valid", and "xval". :param bool train: If ``True``, then return within cluster sum of squares for the training data. :param bool valid: If ``True``, then return the within cluster sum of squares for the validation data. :param bool xval: If ``True``, then return the within cluster sum of squares for each of the cross-validated splits. :returns: the within cluster sum of squares values for the specified key(s). :examples: >>> from h2o.estimators import H2OKMeansEstimator >>> from h2o.grid.grid_search import H2OGridSearch >>> iris = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/iris/iris_train.csv") >>> hyper_parameters = {'k': [2,3,4], 'init': "random"} >>> gs = H2OGridSearch(H2OKMeansEstimator(), hyper_parameters) >>> gs.train(x=list(range(4)), training_frame=iris) >>> gs.withinss(train=True) """ return {model.model_id: model.withinss(train, valid, xval) for model in self.models}
[docs] def centroid_stats(self, train=False, valid=False, xval=False): """ Get the centroid statistics for each cluster. If all are ``False`` (default), then return the training metric value. If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train", "valid", and "xval". :param bool train: If ``True``, then return the centroid statistics for the training data. :param bool valid: If ``True``, then return the centroid statistics for the validation data. :param bool xval: If ``True``, then return the centroid statistics for each of the cross-validated splits. :returns: the centroid statistics for the specified key(s). :examples: >>> from h2o.estimators import H2OKMeansEstimator >>> from h2o.grid.grid_search import H2OGridSearch >>> iris = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/iris/iris_train.csv") >>> hyper_parameters = {'k': [2,3,4], 'init': "random"} >>> gs = H2OGridSearch(H2OKMeansEstimator(), hyper_parameters) >>> gs.train(x=list(range(4)), training_frame=iris) >>> gs.centroid_stats(train=True) """ return {model.model_id: model.centroid_stats(train, valid, xval) for model in self.models}
[docs] def centers(self): """Returns the centers for the KMeans model. :examples: >>> from h2o.estimators import H2OKMeansEstimator >>> from h2o.grid.grid_search import H2OGridSearch >>> iris = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/iris/iris_train.csv") >>> hyper_parameters = {'k': [2,3,4], 'init': "random"} >>> gs = H2OGridSearch(H2OKMeansEstimator(), hyper_parameters) >>> gs.train(x=list(range(4)), training_frame=iris) >>> gs.centers() """ return {model.model_id: model.centers() for model in self.models}
[docs] def centers_std(self): """Returns the standardized centers for the KMeans model. :examples: >>> from h2o.estimators import H2OKMeansEstimator >>> from h2o.grid.grid_search import H2OGridSearch >>> iris = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/iris/iris_train.csv") >>> hyper_parameters = {'k': [2,3,4], 'init': "random"} >>> gs = H2OGridSearch(H2OKMeansEstimator(), hyper_parameters) >>> gs.train(x=list(range(4)), training_frame=iris) >>> gs.centers_std() """ return {model.model_id: model.centers_std() for model in self.models}
#----------------------------------------------------------------------------------------------------------------------- # Dimensionality Reduction Grid Search #-----------------------------------------------------------------------------------------------------------------------
[docs]class H2ODimReductionGridSearch(object):
[docs] def num_iterations(self): """ Get the number of iterations that it took to converge or reach max iterations. :returns: number of iterations (integer). :examples: >>> from h2o.estimators import H2OGeneralizedLowRankEstimator >>> from h2o.grid.grid_search import H2OGridSearch >>> iris = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/iris/iris_wheader.csv") >>> hyper_parameters = {'gamma_x': [0.05, 0.5], 'gamma_y': [0.05,0.5]} >>> gs = H2OGridSearch(H2OGeneralizedLowRankEstimator(), ... hyper_parameters) >>> gs.train(x=iris.names, training_frame=iris) >>> gs.num_iterations() """ return {model.model_id: model.num_iterations for model in self.models}
[docs] def objective(self): """ Get the final value of the objective function from the GLRM model. :returns: final objective value (double). :examples: >>> from h2o.estimators import H2OGeneralizedLowRankEstimator >>> from h2o.grid.grid_search import H2OGridSearch >>> iris = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/iris/iris_wheader.csv") >>> hyper_parameters = {'gamma_x': [0.05, 0.5], 'gamma_y': [0.05,0.5]} >>> gs = H2OGridSearch(H2OGeneralizedLowRankEstimator(), ... hyper_parameters) >>> gs.train(x=iris.names, training_frame=iris) >>> gs.objective() """ return {model.model_id: model.objective for model in self.models}
[docs] def final_step(self): """ Get the final step size from the GLRM model. :returns: final step size (double). :examples: >>> from h2o.estimators import H2OGeneralizedLowRankEstimator >>> from h2o.grid.grid_search import H2OGridSearch >>> iris = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/iris/iris_wheader.csv") >>> hyper_parameters = {'gamma_x': [0.05, 0.5], 'gamma_y': [0.05,0.5]} >>> gs = H2OGridSearch(H2OGeneralizedLowRankEstimator(), ... hyper_parameters) >>> gs.train(x=iris.names, training_frame=iris) >>> gs.final_step() """ return {model.model_id: model.final_step for model in self.models}
[docs] def archetypes(self): """ :returns: the archetypes (Y) of the GLRM model. :examples: >>> from h2o.estimators import H2OGeneralizedLowRankEstimator >>> from h2o.grid.grid_search import H2OGridSearch >>> iris = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/iris/iris_wheader.csv") >>> hyper_parameters = {'gamma_x': [0.05, 0.5], 'gamma_y': [0.05,0.5]} >>> gs = H2OGridSearch(H2OGeneralizedLowRankEstimator(), ... hyper_parameters) >>> gs.train(x=iris.names, training_frame=iris) >>> gs.archetypes() """ return {model.model_id: model.archetypes for model in self.models}
#----------------------------------------------------------------------------------------------------------------------- # Multinomial Grid Search #-----------------------------------------------------------------------------------------------------------------------
[docs]class H2OMultinomialGridSearch(object):
[docs] def confusion_matrix(self, data): """ Returns a confusion matrix based of H2O's default prediction threshold for a dataset. :param data: metric for which the confusion matrix will be calculated. :examples: >>> from h2o.estimators.glm import H2OGeneralizedLinearEstimator >>> from h2o.grid.grid_search import H2OGridSearch >>> iris = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/iris/iris.csv") >>> hyper_parameters = {'alpha': [0.01,0.5], 'lambda': [1e-5,1e-6]} >>> gs = H2OGridSearch(H2OGeneralizedLinearEstimator(family = "multinomial"), ... hyper_parameters) >>> gs.train(x=[0,1,2,3], y=4, training_frame=iris) >>> gs.confusion_matrix(iris) """ return {model.model_id: model.confusion_matrix(data) for model in self.models}
[docs] def hit_ratio_table(self, train=False, valid=False, xval=False): """ Retrieve the Hit Ratios. If all are ``False`` (default), then return the training metric value. If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train", "valid", and "xval". :param bool train: If train is ``True``, then return the hit ratio value for the training data. :param bool valid: If valid is ``True``, then return the hit ratio value for the validation data. :param bool xval: If xval is ``True``, then return the hit ratio value for the cross validation data. :returns: The hit ratio for this multinomial model. :examples: >>> from h2o.estimators.glm import H2OGeneralizedLinearEstimator >>> from h2o.grid.grid_search import H2OGridSearch >>> hyper_parameters = {'alpha': [0.01,0.5], 'lambda': [1e-5,1e-6]} >>> iris = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/iris/iris.csv") >>> gs = H2OGridSearch(H2OGeneralizedLinearEstimator(family = "multinomial"), ... hyper_parameters) >>> gs.train(x=[0,1,2,3], y=4, training_frame=iris) >>> gs.hit_ratio_table(train=True) """ return {model.model_id: model.hit_ratio_table(train, valid, xval) for model in self.models}
[docs] def auc(self, train=False, valid=False, xval=False): """ Retrieve the AUC value. If all are ``False`` (default), then return the training metric value. If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train", "valid", and "xval". :param bool train: If train is ``True``, then return the AUC values for the training data. :param bool valid: If valid is ``True``, then return the AUC values for the validation data. :param bool xval: If xval is ``True``, then return the AUC values for the cross validation data. :returns: The AUC values for this multinomial model. :examples: >>> from h2o.estimators.glm import H2OGeneralizedLinearEstimator >>> from h2o.grid.grid_search import H2OGridSearch >>> hyper_parameters = {'alpha': [0.01,0.5], 'lambda': [1e-5,1e-6]} >>> iris = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/iris/iris.csv") >>> gs = H2OGridSearch(H2OGeneralizedLinearEstimator(family = "multinomial"), ... hyper_parameters) >>> gs.train(x=[0,1,2,3], y=4, training_frame=iris) >>> gs.auc(train=True) """ return {model.model_id: model.auc(train, valid, xval) for model in self.models}
[docs] def aucpr(self, train=False, valid=False, xval=False): """ Retrieve the PR AUC value. If all are ``False`` (default), then return the training metric value. If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train", "valid", and "xval". :param bool train: If train is ``True``, then return the PR AUC values for the training data. :param bool valid: If valid is ``True``, then return the PR AUC values for the validation data. :param bool xval: If xval is ``True``, then return the PR AUC values for the cross validation data. :returns: The PR AUC values for this multinomial model. :examples: >>> from h2o.estimators.glm import H2OGeneralizedLinearEstimator >>> from h2o.grid.grid_search import H2OGridSearch >>> hyper_parameters = {'alpha': [0.01,0.5], 'lambda': [1e-5,1e-6]} >>> iris = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/iris/iris.csv") >>> gs = H2OGridSearch(H2OGeneralizedLinearEstimator(family = "multinomial"), ... hyper_parameters) >>> gs.train(x=[0,1,2,3], y=4, training_frame=iris) >>> gs.aucpr(train=True) """ return {model.model_id: model.aucpr(train, valid, xval) for model in self.models}
[docs] def mean_per_class_error(self, train=False, valid=False, xval=False): """ Get the mean per class error. If all are ``False`` (default), then return the training metric value. If more than one options is set to ``True``, then return a dictionary of metrics where the keys are "train", "valid", and "xval". :param bool train: If train is ``True``, then return the mean per class error value for the training data. :param bool valid: If valid is ``True``, then return the mean per class error value for the validation data. :param bool xval: If xval is ``True``, then return the mean per class error value for the cross validation data. :returns: The mean per class error for this multinomial model. :examples: >>> from h2o.estimators.glm import H2OGeneralizedLinearEstimator >>> from h2o.grid.grid_search import H2OGridSearch >>> hyper_parameters = {'alpha': [0.01,0.5], 'lambda': [1e-5,1e-6]} >>> iris = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/iris/iris.csv") >>> gs = H2OGridSearch(H2OGeneralizedLinearEstimator(family = "multinomial"), ... hyper_parameters) >>> gs.train(x=[0,1,2,3], y=4, training_frame=iris) >>> gs.mean_per_class_error(train=True) """ return {model.model_id: model.mean_per_class_error(train, valid, xval) for model in self.models}
#----------------------------------------------------------------------------------------------------------------------- # Ordinal Grid Search #-----------------------------------------------------------------------------------------------------------------------
[docs]class H2OOrdinalGridSearch(object):
[docs] def confusion_matrix(self, data): """ Returns a confusion matrix based of H2O's default prediction threshold for a dataset. :param data: metric for which the confusion matrix will be calculated. :examples: >>> from h2o.estimators import H2OGeneralizedLinearEstimator >>> from h2o.grid.grid_search import H2OGridSearch >>> h2o_df = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/bigdata/laptop/glm_ordinal_logit/ordinal_multinomial_training_set.csv") >>> hyper_parameters = {'alpha': [0.01,0.5], 'lambda': [1e-5,1e-6]} >>> gs = H2OGridSearch(H2OGeneralizedLinearEstimator(family="ordinal"), hyper_parameters) >>> h2o_df['C11'] = h2o_df['C11'].asfactor() >>> gs.train(x=list(range(0,10)), y="C11", training_frame=h2o_df) >>> gs.confusion_matrix(h2o_df) """ return {model.model_id: model.confusion_matrix(data) for model in self.models}
[docs] def hit_ratio_table(self, train=False, valid=False, xval=False): """ Retrieve the Hit Ratios. If all are ``False`` (default), then return the training metric value. If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train", "valid", and "xval". :param bool train: If train is ``True``, then return the hit ratio value for the training data. :param bool valid: If valid is ``True``, then return the hit ratio value for the validation data. :param bool xval: If xval is ``True``, then return the hit ratio value for the cross validation data. :returns: The hit ratio for this ordinal model. :examples: >>> from h2o.estimators import H2OGeneralizedLinearEstimator >>> from h2o.grid.grid_search import H2OGridSearch >>> h2o_df = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/bigdata/laptop/glm_ordinal_logit/ordinal_multinomial_training_set.csv") >>> hyper_parameters = {'alpha': [0.01,0.5], 'lambda': [1e-5,1e-6]} >>> gs = H2OGridSearch(H2OGeneralizedLinearEstimator(family="ordinal"), hyper_parameters) >>> h2o_df['C11'] = h2o_df['C11'].asfactor() >>> gs.train(x=list(range(0,10)), y="C11", training_frame=h2o_df) >>> gs.hit_ratio_table(train=True) """ return {model.model_id: model.hit_ratio_table(train, valid, xval) for model in self.models}
[docs] def mean_per_class_error(self, train=False, valid=False, xval=False): """ Get the mean per class error. If all are ``False`` (default), then return the training metric value. If more than one options is set to ``True``, then return a dictionary of metrics where the keys are "train", "valid", and "xval". :param bool train: If train is ``True``, then return the mean per class error value for the training data. :param bool valid: If valid is ``True``, then return the mean per class error value for the validation data. :param bool xval: If xval is ``True``, then return the mean per class error value for the cross validation data. :returns: The mean per class error for this ordinal model. :examples: >>> from h2o.estimators import H2OGeneralizedLinearEstimator >>> from h2o.grid.grid_search import H2OGridSearch >>> h2o_df = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/bigdata/laptop/glm_ordinal_logit/ordinal_multinomial_training_set.csv") >>> hyper_parameters = {'alpha': [0.01,0.5], 'lambda': [1e-5,1e-6]} >>> gs = H2OGridSearch(H2OGeneralizedLinearEstimator(family="ordinal"), hyper_parameters) >>> h2o_df['C11'] = h2o_df['C11'].asfactor() >>> gs.train(x=list(range(0,10)), y="C11", training_frame=h2o_df) >>> gs.mean_per_class_error(train=True) """ return {model.model_id: model.mean_per_class_error(train, valid, xval) for model in self.models}
#----------------------------------------------------------------------------------------------------------------------- # Regression Grid Search #-----------------------------------------------------------------------------------------------------------------------
[docs]class H2ORegressionGridSearch(object): pass