Source code for h2o.model.multinomial

# -*- encoding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals

import h2o
from h2o.utils.compatibility import *  # NOQA
from h2o.utils.typechecks import assert_is_type
from ..exceptions import H2OValueError
from ..frame import H2OFrame
from .extensions import has_extension
from .model_base import ModelBase


[docs]class H2OMultinomialModel(ModelBase): def _make_model(self): return H2OMultinomialModel()
[docs] def confusion_matrix(self, data): """ Returns a confusion matrix based of H2O's default prediction threshold for a dataset. :param H2OFrame data: the frame with the prediction results for which the confusion matrix should be extracted. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["cylinders"] = cars["cylinders"].asfactor() >>> r = cars[0].runif() >>> train = cars[r > .2] >>> valid = cars[r <= .2] >>> response_col = "cylinders" >>> distribution = "multinomial" >>> predictors = ["displacement","power","weight","acceleration","year"] >>> gbm = H2OGradientBoostingEstimator(nfolds=3, ... distribution=distribution) >>> gbm.train(x=predictors, ... y=response_col, ... training_frame=train, ... validation_frame=valid) >>> confusion_matrix = gbm.confusion_matrix(train) >>> confusion_matrix """ assert_is_type(data, H2OFrame) j = h2o.api("POST /3/Predictions/models/%s/frames/%s" % (self._id, data.frame_id)) return j["model_metrics"][0]["cm"]["table"]
[docs] def hit_ratio_table(self, train=False, valid=False, xval=False): """ Retrieve the Hit Ratios. If all are ``False`` (default), then return the training metric value. If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train", "valid", and "xval". :param train: If train is ``True``, then return the hit ratio value for the training data. :param valid: If valid is ``True``, then return the hit ratio value for the validation data. :param xval: If xval is ``True``, then return the hit ratio value for the cross validation data. :return: The hit ratio for this regression model. :example: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["cylinders"] = cars["cylinders"].asfactor() >>> r = cars[0].runif() >>> train = cars[r > .2] >>> valid = cars[r <= .2] >>> response_col = "cylinders" >>> distribution = "multinomial" >>> predictors = ["displacement","power","weight","acceleration","year"] >>> gbm = H2OGradientBoostingEstimator(nfolds=3, ... distribution=distribution) >>> gbm.train(x=predictors, ... y=response_col, ... training_frame=train, ... validation_frame=valid) >>> hit_ratio_table = gbm.hit_ratio_table() # <- Default: return training metrics >>> hit_ratio_table >>> hit_ratio_table1 = gbm.hit_ratio_table(train=True, ... valid=True, ... xval=True) >>> hit_ratio_table1 """ tm = ModelBase._get_metrics(self, train, valid, xval) m = {} for k, v in zip(list(tm.keys()), list(tm.values())): m[k] = None if v is None else v.hit_ratio_table() return list(m.values())[0] if len(m) == 1 else m
[docs] def mean_per_class_error(self, train=False, valid=False, xval=False): """ Retrieve the mean per class error across all classes. If all are ``False`` (default), then return the training metric value. If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train", "valid", and "xval". :param bool train: If ``True``, return the ``mean_per_class_error`` value for the training data. :param bool valid: If ``True``, return the ``mean_per_class_error`` value for the validation data. :param bool xval: If ``True``, return the ``mean_per_class_error`` value for each of the cross-validated splits. :returns: The ``mean_per_class_error`` values for the specified key(s). :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["cylinders"] = cars["cylinders"].asfactor() >>> r = cars[0].runif() >>> train = cars[r > .2] >>> valid = cars[r <= .2] >>> response_col = "cylinders" >>> predictors = ["displacement","power","weight","acceleration","year"] >>> distribution = "multinomial" >>> gbm = H2OGradientBoostingEstimator(nfolds=3, distribution=distribution) >>> gbm.train(x=predictors, ... y=response_col, ... training_frame=train, ... validation_frame=valid) >>> mean_per_class_error = gbm.mean_per_class_error() # <- Default: return training metric >>> mean_per_class_error >>> mean_per_class_error1 = gbm.mean_per_class_error(train=True, ... valid=True, ... xval=True) >>> mean_per_class_error1 """ tm = ModelBase._get_metrics(self, train, valid, xval) m = {} for k, v in zip(list(tm.keys()), list(tm.values())): m[k] = None if v is None else v.mean_per_class_error() return list(m.values())[0] if len(m) == 1 else m
[docs] def multinomial_auc_table(self, train=False, valid=False, xval=False): """ Retrieve the multinomial AUC table. If all are ``False`` (default), then return the training metric value. If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train", "valid", and "xval". :param bool train: If ``True``, return the ``multinomial_auc_table`` for the training data. :param bool valid: If ``True``, return the ``multinomial_auc_table`` for the validation data. :param bool xval: If ``True``, return the ``multinomial_auc_table`` for each of the cross-validated splits. :returns: The ``multinomial_auc_table`` values for the specified key(s). :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["cylinders"] = cars["cylinders"].asfactor() >>> r = cars[0].runif() >>> train = cars[r > .2] >>> valid = cars[r <= .2] >>> response_col = "cylinders" >>> predictors = ["displacement","power","weight","acceleration","year"] >>> distribution = "multinomial" >>> gbm = H2OGradientBoostingEstimator(nfolds=3, distribution=distribution) >>> gbm.train(x=predictors, ... y=response_col, ... training_frame=train, ... validation_frame=valid) >>> multinomial_auc_table = gbm.multinomial_auc_table() # <- Default: return training metric >>> multinomial_auc_table >>> multinomial_auc_table1 = gbm.multinomial_auc_table(train=True, ... valid=True, ... xval=True) >>> multinomial_auc_table1 """ tm = ModelBase._get_metrics(self, train, valid, xval) m = {} for k, v in zip(list(tm.keys()), list(tm.values())): m[k] = None if v is None else v.multinomial_auc_table() return list(m.values())[0] if len(m) == 1 else m
[docs] def multinomial_aucpr_table(self, train=False, valid=False, xval=False): """ Retrieve the multinomial PR AUC table. If all are ``False`` (default), then return the training metric value. If more than one option is set to ``True``, then return a dictionary of metrics where the keys are "train", "valid", and "xval". :param bool train: If ``True``, return the ``multinomial_aucpr_table`` for the training data. :param bool valid: If ``True``, return the ``multinomial_aucpr_table`` for the validation data. :param bool xval: If ``True``, return the ``multinomial_aucpr_table`` for each of the cross-validated splits. :returns: The ``average_pairwise_auc`` values for the specified key(s). :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["cylinders"] = cars["cylinders"].asfactor() >>> r = cars[0].runif() >>> train = cars[r > .2] >>> valid = cars[r <= .2] >>> response_col = "cylinders" >>> predictors = ["displacement","power","weight","acceleration","year"] >>> distribution = "multinomial" >>> gbm = H2OGradientBoostingEstimator(nfolds=3, distribution=distribution) >>> gbm.train(x=predictors, ... y=response_col, ... training_frame=train, ... validation_frame=valid) >>> multinomial_aucpr_table = gbm.multinomial_aucpr_table() # <- Default: return training metric >>> multinomial_aucpr_table >>> multinomial_aucpr_table1 = gbm.multinomial_aucpr_table(train=True, ... valid=True, ... xval=True) >>> multinomial_aucpr_table1 """ tm = ModelBase._get_metrics(self, train, valid, xval) m = {} for k, v in zip(list(tm.keys()), list(tm.values())): m[k] = None if v is None else v.multinomial_aucpr_table() return list(m.values())[0] if len(m) == 1 else m
[docs] def plot(self, timestep="AUTO", metric="AUTO", save_plot_path=None, **kwargs): """ Plots training set (and validation set if available) scoring history for an H2OMultinomialModel. The timestep and metric arguments are restricted to what is available in its scoring history. :param timestep: A unit of measurement for the x-axis. One of: - 'AUTO' - 'duration' - 'number_of_trees' :param metric: A unit of measurement for the y-axis. One of: - 'AUTO' - 'logloss' - 'classification_error' - 'rmse' :returns: Object that contains the resulting scoring history plot (can be accessed using ``result.figure()``). :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["cylinders"] = cars["cylinders"].asfactor() >>> r = cars[0].runif() >>> train = cars[r > .2] >>> valid = cars[r <= .2] >>> response_col = "cylinders" >>> predictors = ["displacement","power","weight","acceleration","year"] >>> from h2o.estimators.gbm import H2OGradientBoostingEstimator >>> distribution = "multinomial" >>> gbm = H2OGradientBoostingEstimator(nfolds=3, ... distribution=distribution) >>> gbm.train(x=predictors, ... y=response_col, ... training_frame=train, ... validation_frame=valid) >>> gbm.plot(metric="AUTO", timestep="AUTO") """ if not has_extension(self, 'ScoringHistory'): raise H2OValueError("Scoring history plot is not available for this type of model (%s)." % self.algo) valid_metrics = self._allowed_metrics('multinomial') if valid_metrics is not None: assert_is_type(metric, 'AUTO', *valid_metrics), "metric for H2OMultinomialModel must be one of %s" % valid_metrics if metric == "AUTO": metric = self._default_metric('multinomial') or 'AUTO' return self.scoring_history_plot(timestep=timestep, metric=metric, save_plot_path=save_plot_path, **kwargs)