Source code for h2o.estimators.naive_bayes

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
#
# This file is auto-generated by h2o-3/h2o-bindings/bin/gen_python.py
# Copyright 2016 H2O.ai;  Apache License Version 2.0 (see LICENSE for details)
#
from __future__ import absolute_import, division, print_function, unicode_literals

from h2o.estimators.estimator_base import H2OEstimator
from h2o.exceptions import H2OValueError
from h2o.frame import H2OFrame
from h2o.utils.typechecks import assert_is_type, Enum, numeric


[docs]class H2ONaiveBayesEstimator(H2OEstimator): """ Naive Bayes The naive Bayes classifier assumes independence between predictor variables conditional on the response, and a Gaussian distribution of numeric predictors with mean and standard deviation computed from the training dataset. When building a naive Bayes classifier, every row in the training dataset that contains at least one NA will be skipped completely. If the test dataset has missing values, then those predictors are omitted in the probability calculation during prediction. """ algo = "naivebayes" supervised_learning = True def __init__(self, model_id=None, # type: Optional[Union[None, str, H2OEstimator]] nfolds=0, # type: int seed=-1, # type: int fold_assignment="auto", # type: Literal["auto", "random", "modulo", "stratified"] fold_column=None, # type: Optional[str] keep_cross_validation_models=True, # type: bool keep_cross_validation_predictions=False, # type: bool keep_cross_validation_fold_assignment=False, # type: bool training_frame=None, # type: Optional[Union[None, str, H2OFrame]] validation_frame=None, # type: Optional[Union[None, str, H2OFrame]] response_column=None, # type: Optional[str] ignored_columns=None, # type: Optional[List[str]] ignore_const_cols=True, # type: bool score_each_iteration=False, # type: bool balance_classes=False, # type: bool class_sampling_factors=None, # type: Optional[List[float]] max_after_balance_size=5.0, # type: float max_confusion_matrix_size=20, # type: int laplace=0.0, # type: float min_sdev=0.001, # type: float eps_sdev=0.0, # type: float min_prob=0.001, # type: float eps_prob=0.0, # type: float compute_metrics=True, # type: bool max_runtime_secs=0.0, # type: float export_checkpoints_dir=None, # type: Optional[str] gainslift_bins=-1, # type: int auc_type="auto", # type: Literal["auto", "none", "macro_ovr", "weighted_ovr", "macro_ovo", "weighted_ovo"] ): """ :param model_id: Destination id for this model; auto-generated if not specified. Defaults to ``None``. :type model_id: Union[None, str, H2OEstimator], optional :param nfolds: Number of folds for K-fold cross-validation (0 to disable or >= 2). Defaults to ``0``. :type nfolds: int :param seed: Seed for pseudo random number generator (only used for cross-validation and fold_assignment="Random" or "AUTO") Defaults to ``-1``. :type seed: int :param fold_assignment: Cross-validation fold assignment scheme, if fold_column is not specified. The 'Stratified' option will stratify the folds based on the response variable, for classification problems. Defaults to ``"auto"``. :type fold_assignment: Literal["auto", "random", "modulo", "stratified"] :param fold_column: Column with cross-validation fold index assignment per observation. Defaults to ``None``. :type fold_column: str, optional :param keep_cross_validation_models: Whether to keep the cross-validation models. Defaults to ``True``. :type keep_cross_validation_models: bool :param keep_cross_validation_predictions: Whether to keep the predictions of the cross-validation models. Defaults to ``False``. :type keep_cross_validation_predictions: bool :param keep_cross_validation_fold_assignment: Whether to keep the cross-validation fold assignment. Defaults to ``False``. :type keep_cross_validation_fold_assignment: bool :param training_frame: Id of the training data frame. Defaults to ``None``. :type training_frame: Union[None, str, H2OFrame], optional :param validation_frame: Id of the validation data frame. Defaults to ``None``. :type validation_frame: Union[None, str, H2OFrame], optional :param response_column: Response variable column. Defaults to ``None``. :type response_column: str, optional :param ignored_columns: Names of columns to ignore for training. Defaults to ``None``. :type ignored_columns: List[str], optional :param ignore_const_cols: Ignore constant columns. Defaults to ``True``. :type ignore_const_cols: bool :param score_each_iteration: Whether to score during each iteration of model training. Defaults to ``False``. :type score_each_iteration: bool :param balance_classes: Balance training data class counts via over/under-sampling (for imbalanced data). Defaults to ``False``. :type balance_classes: bool :param class_sampling_factors: Desired over/under-sampling ratios per class (in lexicographic order). If not specified, sampling factors will be automatically computed to obtain class balance during training. Requires balance_classes. Defaults to ``None``. :type class_sampling_factors: List[float], optional :param max_after_balance_size: Maximum relative size of the training data after balancing class counts (can be less than 1.0). Requires balance_classes. Defaults to ``5.0``. :type max_after_balance_size: float :param max_confusion_matrix_size: [Deprecated] Maximum size (# classes) for confusion matrices to be printed in the Logs Defaults to ``20``. :type max_confusion_matrix_size: int :param laplace: Laplace smoothing parameter Defaults to ``0.0``. :type laplace: float :param min_sdev: Min. standard deviation to use for observations with not enough data Defaults to ``0.001``. :type min_sdev: float :param eps_sdev: Cutoff below which standard deviation is replaced with min_sdev Defaults to ``0.0``. :type eps_sdev: float :param min_prob: Min. probability to use for observations with not enough data Defaults to ``0.001``. :type min_prob: float :param eps_prob: Cutoff below which probability is replaced with min_prob Defaults to ``0.0``. :type eps_prob: float :param compute_metrics: Compute metrics on training data Defaults to ``True``. :type compute_metrics: bool :param max_runtime_secs: Maximum allowed runtime in seconds for model training. Use 0 to disable. Defaults to ``0.0``. :type max_runtime_secs: float :param export_checkpoints_dir: Automatically export generated models to this directory. Defaults to ``None``. :type export_checkpoints_dir: str, optional :param gainslift_bins: Gains/Lift table number of bins. 0 means disabled.. Default value -1 means automatic binning. Defaults to ``-1``. :type gainslift_bins: int :param auc_type: Set default multinomial AUC type. Defaults to ``"auto"``. :type auc_type: Literal["auto", "none", "macro_ovr", "weighted_ovr", "macro_ovo", "weighted_ovo"] """ super(H2ONaiveBayesEstimator, self).__init__() self._parms = {} self._id = self._parms['model_id'] = model_id self.nfolds = nfolds self.seed = seed self.fold_assignment = fold_assignment self.fold_column = fold_column self.keep_cross_validation_models = keep_cross_validation_models self.keep_cross_validation_predictions = keep_cross_validation_predictions self.keep_cross_validation_fold_assignment = keep_cross_validation_fold_assignment self.training_frame = training_frame self.validation_frame = validation_frame self.response_column = response_column self.ignored_columns = ignored_columns self.ignore_const_cols = ignore_const_cols self.score_each_iteration = score_each_iteration self.balance_classes = balance_classes self.class_sampling_factors = class_sampling_factors self.max_after_balance_size = max_after_balance_size self.max_confusion_matrix_size = max_confusion_matrix_size self.laplace = laplace self.min_sdev = min_sdev self.eps_sdev = eps_sdev self.min_prob = min_prob self.eps_prob = eps_prob self.compute_metrics = compute_metrics self.max_runtime_secs = max_runtime_secs self.export_checkpoints_dir = export_checkpoints_dir self.gainslift_bins = gainslift_bins self.auc_type = auc_type @property def nfolds(self): """ Number of folds for K-fold cross-validation (0 to disable or >= 2). Type: ``int``, defaults to ``0``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "economy_20mpg" >>> cars_nb = H2ONaiveBayesEstimator(nfolds=5, ... seed=1234) >>> cars_nb.train(x=predictors, ... y=response, ... training_frame=cars) >>> cars_nb.auc() """ return self._parms.get("nfolds") @nfolds.setter def nfolds(self, nfolds): assert_is_type(nfolds, None, int) self._parms["nfolds"] = nfolds @property def seed(self): """ Seed for pseudo random number generator (only used for cross-validation and fold_assignment="Random" or "AUTO") Type: ``int``, defaults to ``-1``. :examples: >>> airlines= h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/airlines/allyears2k_headers.zip") >>> airlines["Year"] = airlines["Year"].asfactor() >>> airlines["Month"] = airlines["Month"].asfactor() >>> airlines["DayOfWeek"] = airlines["DayOfWeek"].asfactor() >>> airlines["Cancelled"] = airlines["Cancelled"].asfactor() >>> airlines['FlightNum'] = airlines['FlightNum'].asfactor() >>> predictors = ["Origin", "Dest", "Year", "UniqueCarrier", ... "DayOfWeek", "Month", "Distance", "FlightNum"] >>> response = "IsDepDelayed" >>> train, valid= airlines.split_frame(ratios=[.8], seed=1234) >>> nb_w_seed = H2ONaiveBayesEstimator(seed=1234) >>> nb_w_seed.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> nb_wo_seed = H2ONaiveBayesEstimator() >>> nb_wo_seed.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> nb_w_seed.auc() >>> nb_wo_seed.auc() """ return self._parms.get("seed") @seed.setter def seed(self, seed): assert_is_type(seed, None, int) self._parms["seed"] = seed @property def fold_assignment(self): """ Cross-validation fold assignment scheme, if fold_column is not specified. The 'Stratified' option will stratify the folds based on the response variable, for classification problems. Type: ``Literal["auto", "random", "modulo", "stratified"]``, defaults to ``"auto"``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "cylinders" >>> cars_nb = H2ONaiveBayesEstimator(fold_assignment="Random", ... nfolds=5, ... seed=1234) >>> response = "economy_20mpg" >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> cars_nb.train(x=predictors, y=response, training_frame=cars) >>> cars_nb.auc() """ return self._parms.get("fold_assignment") @fold_assignment.setter def fold_assignment(self, fold_assignment): assert_is_type(fold_assignment, None, Enum("auto", "random", "modulo", "stratified")) self._parms["fold_assignment"] = fold_assignment @property def fold_column(self): """ Column with cross-validation fold index assignment per observation. Type: ``str``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "economy_20mpg" >>> fold_numbers = cars.kfold_column(n_folds=5, seed=1234) >>> fold_numbers.set_names(["fold_numbers"]) >>> cars = cars.cbind(fold_numbers) >>> cars_nb = H2ONaiveBayesEstimator(seed=1234) >>> cars_nb.train(x=predictors, ... y=response, ... training_frame=cars, ... fold_column="fold_numbers") >>> cars_nb.auc() """ return self._parms.get("fold_column") @fold_column.setter def fold_column(self, fold_column): assert_is_type(fold_column, None, str) self._parms["fold_column"] = fold_column @property def keep_cross_validation_models(self): """ Whether to keep the cross-validation models. Type: ``bool``, defaults to ``True``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "economy_20mpg" >>> train, valid = cars.split_frame(ratios=[.8], seed=1234) >>> cars_nb = H2ONaiveBayesEstimator(keep_cross_validation_models=True, ... nfolds=5, ... seed=1234) >>> cars_nb.train(x=predictors, ... y=response, ... training_frame=train) >>> cars_nb.cross_validation_models() """ return self._parms.get("keep_cross_validation_models") @keep_cross_validation_models.setter def keep_cross_validation_models(self, keep_cross_validation_models): assert_is_type(keep_cross_validation_models, None, bool) self._parms["keep_cross_validation_models"] = keep_cross_validation_models @property def keep_cross_validation_predictions(self): """ Whether to keep the predictions of the cross-validation models. Type: ``bool``, defaults to ``False``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "economy_20mpg" >>> train, valid = cars.split_frame(ratios=[.8], seed=1234) >>> cars_nb = H2ONaiveBayesEstimator(keep_cross_validation_predictions=True, ... nfolds=5, ... seed=1234) >>> cars_nb.train(x=predictors, ... y=response, ... training_frame=train) >>> cars_nb.cross_validation_predictions() """ return self._parms.get("keep_cross_validation_predictions") @keep_cross_validation_predictions.setter def keep_cross_validation_predictions(self, keep_cross_validation_predictions): assert_is_type(keep_cross_validation_predictions, None, bool) self._parms["keep_cross_validation_predictions"] = keep_cross_validation_predictions @property def keep_cross_validation_fold_assignment(self): """ Whether to keep the cross-validation fold assignment. Type: ``bool``, defaults to ``False``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "economy_20mpg" >>> train, valid = cars.split_frame(ratios=[.8], seed=1234) >>> cars_nb = H2ONaiveBayesEstimator(keep_cross_validation_fold_assignment=True, ... nfolds=5, ... seed=1234) >>> cars_nb.train(x=predictors, ... y=response, ... training_frame=train) >>> cars_nb.cross_validation_fold_assignment() """ return self._parms.get("keep_cross_validation_fold_assignment") @keep_cross_validation_fold_assignment.setter def keep_cross_validation_fold_assignment(self, keep_cross_validation_fold_assignment): assert_is_type(keep_cross_validation_fold_assignment, None, bool) self._parms["keep_cross_validation_fold_assignment"] = keep_cross_validation_fold_assignment @property def training_frame(self): """ Id of the training data frame. Type: ``Union[None, str, H2OFrame]``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "economy_20mpg" >>> train, valid = cars.split_frame(ratios=[.8], seed=1234) >>> cars_nb = H2ONaiveBayesEstimator() >>> cars_nb.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cars_nb.auc() """ return self._parms.get("training_frame") @training_frame.setter def training_frame(self, training_frame): self._parms["training_frame"] = H2OFrame._validate(training_frame, 'training_frame') @property def validation_frame(self): """ Id of the validation data frame. Type: ``Union[None, str, H2OFrame]``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "economy_20mpg" >>> train, valid = cars.split_frame(ratios=[.8], seed=1234) >>> cars_nb = H2ONaiveBayesEstimator() >>> cars_nb.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cars_nb.auc() """ return self._parms.get("validation_frame") @validation_frame.setter def validation_frame(self, validation_frame): self._parms["validation_frame"] = H2OFrame._validate(validation_frame, 'validation_frame') @property def response_column(self): """ Response variable column. Type: ``str``. """ return self._parms.get("response_column") @response_column.setter def response_column(self, response_column): assert_is_type(response_column, None, str) self._parms["response_column"] = response_column @property def ignored_columns(self): """ Names of columns to ignore for training. Type: ``List[str]``. """ return self._parms.get("ignored_columns") @ignored_columns.setter def ignored_columns(self, ignored_columns): assert_is_type(ignored_columns, None, [str]) self._parms["ignored_columns"] = ignored_columns @property def ignore_const_cols(self): """ Ignore constant columns. Type: ``bool``, defaults to ``True``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "economy_20mpg" >>> cars["const_1"] = 6 >>> cars["const_2"] = 7 >>> train, valid = cars.split_frame(ratios=[.8], seed=1234) >>> cars_nb = H2ONaiveBayesEstimator(seed=1234, ... ignore_const_cols=True) >>> cars_nb.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cars_nb.auc() """ return self._parms.get("ignore_const_cols") @ignore_const_cols.setter def ignore_const_cols(self, ignore_const_cols): assert_is_type(ignore_const_cols, None, bool) self._parms["ignore_const_cols"] = ignore_const_cols @property def score_each_iteration(self): """ Whether to score during each iteration of model training. Type: ``bool``, defaults to ``False``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "economy_20mpg" >>> train, valid = cars.split_frame(ratios=[.8], seed=1234) >>> cars_nb = H2ONaiveBayesEstimator(score_each_iteration=True, ... seed=1234) >>> cars_nb.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cars_nb.auc() """ return self._parms.get("score_each_iteration") @score_each_iteration.setter def score_each_iteration(self, score_each_iteration): assert_is_type(score_each_iteration, None, bool) self._parms["score_each_iteration"] = score_each_iteration @property def balance_classes(self): """ Balance training data class counts via over/under-sampling (for imbalanced data). Type: ``bool``, defaults to ``False``. :examples: >>> iris = h2o.import_file("http://h2o-public-test-data.s3.amazonaws.com/smalldata/iris/iris_wheader.csv") >>> iris_nb = H2ONaiveBayesEstimator(balance_classes=False, ... nfolds=3, ... seed=1234) >>> iris_nb.train(x=list(range(4)), ... y=4, ... training_frame=iris) >>> iris_nb.mse() """ return self._parms.get("balance_classes") @balance_classes.setter def balance_classes(self, balance_classes): assert_is_type(balance_classes, None, bool) self._parms["balance_classes"] = balance_classes @property def class_sampling_factors(self): """ Desired over/under-sampling ratios per class (in lexicographic order). If not specified, sampling factors will be automatically computed to obtain class balance during training. Requires balance_classes. Type: ``List[float]``. :examples: >>> covtype = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/covtype/covtype.20k.data") >>> covtype[54] = covtype[54].asfactor() >>> sample_factors = [1., 0.5, 1., 1., 1., 1., 1.] >>> cov_nb = H2ONaiveBayesEstimator(class_sampling_factors=sample_factors, ... seed=1234) >>> predictors = covtype.columns[0:54] >>> response = 'C55' >>> cov_nb.train(x=predictors, y=response, training_frame=covtype) >>> cov_nb.logloss() """ return self._parms.get("class_sampling_factors") @class_sampling_factors.setter def class_sampling_factors(self, class_sampling_factors): assert_is_type(class_sampling_factors, None, [float]) self._parms["class_sampling_factors"] = class_sampling_factors @property def max_after_balance_size(self): """ Maximum relative size of the training data after balancing class counts (can be less than 1.0). Requires balance_classes. Type: ``float``, defaults to ``5.0``. :examples: >>> covtype = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/covtype/covtype.20k.data") >>> covtype[54] = covtype[54].asfactor() >>> predictors = covtype.columns[0:54] >>> response = 'C55' >>> train, valid = covtype.split_frame(ratios=[.8], seed=1234) >>> max = .85 >>> cov_nb = H2ONaiveBayesEstimator(max_after_balance_size=max, ... seed=1234) >>> cov_nb.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cars_nb.logloss() """ return self._parms.get("max_after_balance_size") @max_after_balance_size.setter def max_after_balance_size(self, max_after_balance_size): assert_is_type(max_after_balance_size, None, float) self._parms["max_after_balance_size"] = max_after_balance_size @property def max_confusion_matrix_size(self): """ [Deprecated] Maximum size (# classes) for confusion matrices to be printed in the Logs Type: ``int``, defaults to ``20``. """ return self._parms.get("max_confusion_matrix_size") @max_confusion_matrix_size.setter def max_confusion_matrix_size(self, max_confusion_matrix_size): assert_is_type(max_confusion_matrix_size, None, int) self._parms["max_confusion_matrix_size"] = max_confusion_matrix_size @property def laplace(self): """ Laplace smoothing parameter Type: ``float``, defaults to ``0.0``. :examples: >>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate.csv.zip") >>> prostate['CAPSULE'] = prostate['CAPSULE'].asfactor() >>> prostate['RACE'] = prostate['RACE'].asfactor() >>> prostate['DCAPS'] = prostate['DCAPS'].asfactor() >>> prostate['DPROS'] = prostate['DPROS'].asfactor() >>> prostate_nb = H2ONaiveBayesEstimator(laplace=1) >>> prostate_nb.train(x=list(range(3,9)), ... y=response_col, ... training_frame=prostate) >>> prostate_nb.mse() """ return self._parms.get("laplace") @laplace.setter def laplace(self, laplace): assert_is_type(laplace, None, numeric) self._parms["laplace"] = laplace @property def min_sdev(self): """ Min. standard deviation to use for observations with not enough data Type: ``float``, defaults to ``0.001``. :examples: >>> import random >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> problem = random.sample(["binomial","multinomial"],1) >>> predictors = ["displacement","power","weight","acceleration","year"] >>> if problem == "binomial": ... response_col = "economy_20mpg" ... else: ... response_col = "cylinders" >>> cars[response_col] = cars[response_col].asfactor() >>> cars_nb = H2ONaiveBayesEstimator(min_sdev=0.1, ... eps_sdev=0.5, ... seed=1234) >>> cars_nb.train(x=predictors, ... y=response_col, ... training_frame=cars) >>> cars_nb.show() """ return self._parms.get("min_sdev") @min_sdev.setter def min_sdev(self, min_sdev): assert_is_type(min_sdev, None, numeric) self._parms["min_sdev"] = min_sdev @property def eps_sdev(self): """ Cutoff below which standard deviation is replaced with min_sdev Type: ``float``, defaults to ``0.0``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> problem = random.sample(["binomial","multinomial"],1) >>> predictors = ["displacement","power","weight","acceleration","year"] >>> if problem == "binomial": ... response_col = "economy_20mpg" ... else: ... response_col = "cylinders" >>> cars[response_col] = cars[response_col].asfactor() >>> cars_nb = H2ONaiveBayesEstimator(min_sdev=0.1, ... eps_sdev=0.5, ... seed=1234) >>> cars_nb.train(x=predictors, y=response_col, training_frame=cars) >>> cars_nb.mse() """ return self._parms.get("eps_sdev") @eps_sdev.setter def eps_sdev(self, eps_sdev): assert_is_type(eps_sdev, None, numeric) self._parms["eps_sdev"] = eps_sdev @property def min_prob(self): """ Min. probability to use for observations with not enough data Type: ``float``, defaults to ``0.001``. :examples: >>> import random >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> problem = random.sample(["binomial","multinomial"],1) >>> predictors = ["displacement","power","weight","acceleration","year"] >>> if problem == "binomial": ... response_col = "economy_20mpg" ... else: ... response_col = "cylinders" >>> cars[response_col] = cars[response_col].asfactor() >>> cars_nb = H2ONaiveBayesEstimator(min_prob=0.1, ... eps_prob=0.5, ... seed=1234) >>> cars_nb.train(x=predictors, ... y=response_col, ... training_frame=cars) >>> cars_nb.show() """ return self._parms.get("min_prob") @min_prob.setter def min_prob(self, min_prob): assert_is_type(min_prob, None, numeric) self._parms["min_prob"] = min_prob @property def eps_prob(self): """ Cutoff below which probability is replaced with min_prob Type: ``float``, defaults to ``0.0``. :examples: >>> import random >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> problem = random.sample(["binomial","multinomial"],1) >>> predictors = ["displacement","power","weight","acceleration","year"] >>> if problem == "binomial": ... response_col = "economy_20mpg" ... else: ... response_col = "cylinders" >>> cars[response_col] = cars[response_col].asfactor() >>> cars_nb = H2ONaiveBayesEstimator(min_prob=0.1, ... eps_prob=0.5, ... seed=1234) >>> cars_nb.train(x=predictors, y=response_col, training_frame=cars) >>> cars_nb.mse() """ return self._parms.get("eps_prob") @eps_prob.setter def eps_prob(self, eps_prob): assert_is_type(eps_prob, None, numeric) self._parms["eps_prob"] = eps_prob @property def compute_metrics(self): """ Compute metrics on training data Type: ``bool``, defaults to ``True``. :examples: >>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate.csv.zip") >>> prostate['CAPSULE'] = prostate['CAPSULE'].asfactor() >>> prostate['RACE'] = prostate['RACE'].asfactor() >>> prostate['DCAPS'] = prostate['DCAPS'].asfactor() >>> prostate['DPROS'] = prostate['DPROS'].asfactor() >>> response_col = 'CAPSULE' >>> prostate_nb = H2ONaiveBayesEstimator(laplace=0, ... compute_metrics=False) >>> prostate_nb.train(x=list(range(3,9)), ... y=response_col, ... training_frame=prostate) >>> prostate_nb.show() """ return self._parms.get("compute_metrics") @compute_metrics.setter def compute_metrics(self, compute_metrics): assert_is_type(compute_metrics, None, bool) self._parms["compute_metrics"] = compute_metrics @property def max_runtime_secs(self): """ Maximum allowed runtime in seconds for model training. Use 0 to disable. Type: ``float``, defaults to ``0.0``. :examples: >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() >>> predictors = ["displacement","power","weight","acceleration","year"] >>> response = "economy_20mpg" >>> train, valid = cars.split_frame(ratios=[.8], seed=1234) >>> cars_nb = H2ONaiveBayesEstimator(max_runtime_secs=10, ... seed=1234) >>> cars_nb.train(x=predictors, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> cars_nb.auc() """ return self._parms.get("max_runtime_secs") @max_runtime_secs.setter def max_runtime_secs(self, max_runtime_secs): assert_is_type(max_runtime_secs, None, numeric) self._parms["max_runtime_secs"] = max_runtime_secs @property def export_checkpoints_dir(self): """ Automatically export generated models to this directory. Type: ``str``. :examples: >>> import tempfile >>> from os import listdir >>> airlines = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/airlines/allyears2k_headers.zip", destination_frame="air.hex") >>> predictors = ["DayofMonth", "DayOfWeek"] >>> response = "IsDepDelayed" >>> checkpoints_dir = tempfile.mkdtemp() >>> air_nb = H2ONaiveBayesEstimator(export_checkpoints_dir=checkpoints_dir) >>> air_nb.train(x=predictors, y=response, training_frame=airlines) >>> len(listdir(checkpoints_dir)) """ return self._parms.get("export_checkpoints_dir") @export_checkpoints_dir.setter def export_checkpoints_dir(self, export_checkpoints_dir): assert_is_type(export_checkpoints_dir, None, str) self._parms["export_checkpoints_dir"] = export_checkpoints_dir @property def gainslift_bins(self): """ Gains/Lift table number of bins. 0 means disabled.. Default value -1 means automatic binning. Type: ``int``, defaults to ``-1``. :examples: >>> airlines= h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/testng/airlines_train.csv") >>> model = H2ONaiveBayesEstimator(gainslift_bins=20) >>> model.train(x=["Origin", "Distance"], ... y="IsDepDelayed", ... training_frame=airlines) >>> model.gains_lift() """ return self._parms.get("gainslift_bins") @gainslift_bins.setter def gainslift_bins(self, gainslift_bins): assert_is_type(gainslift_bins, None, int) self._parms["gainslift_bins"] = gainslift_bins @property def auc_type(self): """ Set default multinomial AUC type. Type: ``Literal["auto", "none", "macro_ovr", "weighted_ovr", "macro_ovo", "weighted_ovo"]``, defaults to ``"auto"``. """ return self._parms.get("auc_type") @auc_type.setter def auc_type(self, auc_type): assert_is_type(auc_type, None, Enum("auto", "none", "macro_ovr", "weighted_ovr", "macro_ovo", "weighted_ovo")) self._parms["auc_type"] = auc_type