Source code for h2o.estimators.naive_bayes

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
#
# This file is auto-generated by h2o-3/h2o-bindings/bin/gen_python.py
# Copyright 2016 H2O.ai;  Apache License Version 2.0 (see LICENSE for details)
#
from .estimator_base import H2OEstimator


[docs]class H2ONaiveBayesEstimator(H2OEstimator): """ Naive Bayes The naive Bayes classifier assumes independence between predictor variables conditional on the response, and a Gaussian distribution of numeric predictors with mean and standard deviation computed from the training dataset. When building a naive Bayes classifier, every row in the training dataset that contains at least one NA will be skipped completely. If the test dataset has missing values, then those predictors are omitted in the probability calculation during prediction. Parameters ---------- model_id : str Destination id for this model; auto-generated if not specified. nfolds : int Number of folds for N-fold cross-validation (0 to disable or ≥ 2). Default: 0 seed : int Seed for pseudo random number generator (only used for cross-validation and fold_assignment="Random" or "AUTO") Default: -1 fold_assignment : "AUTO" | "Random" | "Modulo" | "Stratified" Cross-validation fold assignment scheme, if fold_column is not specified. The 'Stratified' option will stratify the folds based on the response variable, for classification problems. Default: "AUTO" fold_column : VecSpecifier Column with cross-validation fold index assignment per observation. keep_cross_validation_predictions : bool Whether to keep the predictions of the cross-validation models. Default: False keep_cross_validation_fold_assignment : bool Whether to keep the cross-validation fold assignment. Default: False training_frame : str Id of the training data frame (Not required, to allow initial validation of model parameters). validation_frame : str Id of the validation data frame. response_column : VecSpecifier Response variable column. ignored_columns : list(str) Names of columns to ignore for training. ignore_const_cols : bool Ignore constant columns. Default: True score_each_iteration : bool Whether to score during each iteration of model training. Default: False balance_classes : bool Balance training data class counts via over/under-sampling (for imbalanced data). Default: False class_sampling_factors : list(float) Desired over/under-sampling ratios per class (in lexicographic order). If not specified, sampling factors will be automatically computed to obtain class balance during training. Requires balance_classes. max_after_balance_size : float Maximum relative size of the training data after balancing class counts (can be less than 1.0). Requires balance_classes. Default: 5.0 max_confusion_matrix_size : int Maximum size (# classes) for confusion matrices to be printed in the Logs Default: 20 max_hit_ratio_k : int Max. number (top K) of predictions to use for hit ratio computation (for multi-class only, 0 to disable) Default: 0 laplace : float Laplace smoothing parameter Default: 0.0 min_sdev : float Min. standard deviation to use for observations with not enough data Default: 0.001 eps_sdev : float Cutoff below which standard deviation is replaced with min_sdev Default: 0.0 min_prob : float Min. probability to use for observations with not enough data Default: 0.001 eps_prob : float Cutoff below which probability is replaced with min_prob Default: 0.0 compute_metrics : bool Compute metrics on training data Default: True max_runtime_secs : float Maximum allowed runtime in seconds for model training. Use 0 to disable. Default: 0.0 """ def __init__(self, **kwargs): super(H2ONaiveBayesEstimator, self).__init__() self._parms = {} for name in ["model_id", "nfolds", "seed", "fold_assignment", "fold_column", "keep_cross_validation_predictions", "keep_cross_validation_fold_assignment", "training_frame", "validation_frame", "response_column", "ignored_columns", "ignore_const_cols", "score_each_iteration", "balance_classes", "class_sampling_factors", "max_after_balance_size", "max_confusion_matrix_size", "max_hit_ratio_k", "laplace", "min_sdev", "eps_sdev", "min_prob", "eps_prob", "compute_metrics", "max_runtime_secs"]: pname = name[:-1] if name[-1] == '_' else name self._parms[pname] = kwargs[name] if name in kwargs else None @property def nfolds(self): return self._parms["nfolds"] @nfolds.setter def nfolds(self, value): self._parms["nfolds"] = value @property def seed(self): return self._parms["seed"] @seed.setter def seed(self, value): self._parms["seed"] = value @property def fold_assignment(self): return self._parms["fold_assignment"] @fold_assignment.setter def fold_assignment(self, value): self._parms["fold_assignment"] = value @property def fold_column(self): return self._parms["fold_column"] @fold_column.setter def fold_column(self, value): self._parms["fold_column"] = value @property def keep_cross_validation_predictions(self): return self._parms["keep_cross_validation_predictions"] @keep_cross_validation_predictions.setter def keep_cross_validation_predictions(self, value): self._parms["keep_cross_validation_predictions"] = value @property def keep_cross_validation_fold_assignment(self): return self._parms["keep_cross_validation_fold_assignment"] @keep_cross_validation_fold_assignment.setter def keep_cross_validation_fold_assignment(self, value): self._parms["keep_cross_validation_fold_assignment"] = value @property def training_frame(self): return self._parms["training_frame"] @training_frame.setter def training_frame(self, value): self._parms["training_frame"] = value @property def validation_frame(self): return self._parms["validation_frame"] @validation_frame.setter def validation_frame(self, value): self._parms["validation_frame"] = value @property def response_column(self): return self._parms["response_column"] @response_column.setter def response_column(self, value): self._parms["response_column"] = value @property def ignored_columns(self): return self._parms["ignored_columns"] @ignored_columns.setter def ignored_columns(self, value): self._parms["ignored_columns"] = value @property def ignore_const_cols(self): return self._parms["ignore_const_cols"] @ignore_const_cols.setter def ignore_const_cols(self, value): self._parms["ignore_const_cols"] = value @property def score_each_iteration(self): return self._parms["score_each_iteration"] @score_each_iteration.setter def score_each_iteration(self, value): self._parms["score_each_iteration"] = value @property def balance_classes(self): return self._parms["balance_classes"] @balance_classes.setter def balance_classes(self, value): self._parms["balance_classes"] = value @property def class_sampling_factors(self): return self._parms["class_sampling_factors"] @class_sampling_factors.setter def class_sampling_factors(self, value): self._parms["class_sampling_factors"] = value @property def max_after_balance_size(self): return self._parms["max_after_balance_size"] @max_after_balance_size.setter def max_after_balance_size(self, value): self._parms["max_after_balance_size"] = value @property def max_confusion_matrix_size(self): return self._parms["max_confusion_matrix_size"] @max_confusion_matrix_size.setter def max_confusion_matrix_size(self, value): self._parms["max_confusion_matrix_size"] = value @property def max_hit_ratio_k(self): return self._parms["max_hit_ratio_k"] @max_hit_ratio_k.setter def max_hit_ratio_k(self, value): self._parms["max_hit_ratio_k"] = value @property def laplace(self): return self._parms["laplace"] @laplace.setter def laplace(self, value): self._parms["laplace"] = value @property def min_sdev(self): return self._parms["min_sdev"] @min_sdev.setter def min_sdev(self, value): self._parms["min_sdev"] = value @property def eps_sdev(self): return self._parms["eps_sdev"] @eps_sdev.setter def eps_sdev(self, value): self._parms["eps_sdev"] = value @property def min_prob(self): return self._parms["min_prob"] @min_prob.setter def min_prob(self, value): self._parms["min_prob"] = value @property def eps_prob(self): return self._parms["eps_prob"] @eps_prob.setter def eps_prob(self, value): self._parms["eps_prob"] = value @property def compute_metrics(self): return self._parms["compute_metrics"] @compute_metrics.setter def compute_metrics(self, value): self._parms["compute_metrics"] = value @property def max_runtime_secs(self): return self._parms["max_runtime_secs"] @max_runtime_secs.setter def max_runtime_secs(self, value): self._parms["max_runtime_secs"] = value