Source code for h2o.estimators.glm

from .estimator_base import H2OEstimator


[docs]class H2OGeneralizedLinearEstimator(H2OEstimator): """Build a Generalized Linear Model Fit a generalized linear model, specified by a response variable, a set of predictors, and a description of the error distribution. Parameters ---------- model_id : str, optional The unique id assigned to the resulting model. If none is given, an id will automatically be generated. max_iterations : int A non-negative integer specifying the maximum number of iterations. beta_epsilon : int A non-negative number specifying the magnitude of the maximum difference between the coefficient estimates from successive iterations. Defines the convergence criterion. solver : str A character string specifying the solver used: IRLSM (supports more features), L_BFGS (scales better for datasets with many columns) standardize : bool Indicates whether the numeric predictors should be standardized to have a mean of 0 and a variance of 1 prior to training the models. family : str A character string specifying the distribution of the model: gaussian, binomial, multinomial, poisson, gamma, tweedie. link : str A character string specifying the link function. The default is the canonical link for the family. The supported links for each of the family specifications are "gaussian" - "identity", "log", "inverse" "binomial" - "logit", "log" "multinomial" - "multinomial" "poisson" - "log", "identity" "gamma" - "inverse", "log", "identity" "tweedie" - "tweedie" tweedie_variance_power : int numeric specifying the power for the variance function when family = "tweedie". tweedie_link_power : int A numeric specifying the power for the link function when family = "tweedie". alpha : float A numeric in [0, 1] specifying the elastic-net mixing parameter. The elastic-net penalty is defined to be eqn{P(\alpha,\beta) = (1-\alpha)/2||\beta||_2^2 + \alpha||\beta||_1 = \sum_j [(1-\alpha)/2 \beta_j^2 + \alpha|\beta_j|], making alpha = 1 the lasso penalty and alpha = 0 the ridge penalty. Lambda : float A non-negative shrinkage parameter for the elastic-net, which multiplies \eqn{P(\alpha,\beta) in the objective function. When Lambda = 0, no elastic-net penalty is applied and ordinary generalized linear models are fit. prior : float, optional A numeric specifying the prior probability of class 1 in the response when family = "binomial". The default prior is the observational frequency of class 1. Must be from (0,1) exclusive range or None (no prior). lambda_search : bool A logical value indicating whether to conduct a search over the space of lambda values starting from the lambda max, given lambda is interpreted as lambda minself. nlambdas : int The number of lambda values to use when lambda_search = TRUE. lambda_min_ratio : float Smallest value for lambda as a fraction of lambda.max. By default if the number of observations is greater than the the number of variables then lambda_min_ratio = 0.0001; if the number of observations is less than the number of variables then lambda_min_ratio = 0.01. beta_constraints : H2OFrame A data.frame or H2OParsedData object with the columns ["names", "lower_bounds", "upper_bounds", "beta_given"], where each row corresponds to a predictor in the GLM. "names" contains the predictor names, "lower"/"upper_bounds", are the lower and upper bounds of beta, and "beta_given" is some supplied starting values. nfolds : int, optional Number of folds for cross-validation. If nfolds >= 2, then validation must remain empty. fold_assignment : str Cross-validation fold assignment scheme, if fold_column is not specified Must be "AUTO", "Random" or "Modulo" keep_cross_validation_predictions : bool Whether to keep the predictions of the cross-validation models intercept : bool Logical, include constant term (intercept) in the model max_active_predictors : int, optional Convergence criteria for number of predictors when using L1 penalty. missing_values_handling: str A character string specifying how to handle missing value: "MeanImputation","Skip". Returns ------- A subclass of ModelBase is returned. The specific subclass depends on the machine learning task at hand (if it's binomial classification, then an H2OBinomialModel is returned, if it's regression then a H2ORegressionModel is returned). The default print-out of the models is shown, but further GLM-specifc information can be queried out of the object. Upon completion of the GLM, the resulting object has coefficients, normalized coefficients, residual/null deviance, aic, and a host of model metrics including MSE, AUC (for logistic regression), degrees of freedom, and confusion matrices. """ def __init__(self, model_id=None, max_iterations=None, beta_epsilon=None, solver=None, standardize=None, family=None, link=None, tweedie_variance_power=None, tweedie_link_power=None, alpha=None, prior=None, lambda_search=None, nlambdas=None, lambda_min_ratio=None, beta_constraints=None, nfolds=None, fold_assignment=None, keep_cross_validation_predictions=None, intercept=None, Lambda=None, max_active_predictors=None, checkpoint=None, objective_epsilon=None, gradient_epsilon=None, non_negative=False, compute_p_values=False, remove_collinear_columns=False, missing_values_handling = None): super(H2OGeneralizedLinearEstimator, self).__init__() self._parms = locals() self._parms = {k: v for k, v in self._parms.items() if k != "self"} self._parms["lambda"] = self._parms.pop("Lambda") @property def max_iterations(self): return self._parms["max_iterations"] @max_iterations.setter def max_iterations(self, value): self._parms["max_iterations"] = value @property def beta_epsilon(self): return self._parms["beta_epsilon"] @beta_epsilon.setter def beta_epsilon(self, value): self._parms["beta_epsilon"] = value @property def solver(self): return self._parms["solver"] @solver.setter def solver(self, value): self._parms["solver"] = value @property def standardize(self): return self._parms["standardize"] @standardize.setter def standardize(self, value): self._parms["standardize"] = value @property def family(self): return self._parms["family"] @family.setter def family(self, value): self._parms["family"] = value @property def link(self): return self._parms["link"] @link.setter def link(self, value): self._parms["link"] = value @property def tweedie_variance_power(self): return self._parms["tweedie_variance_power"] @tweedie_variance_power.setter def tweedie_variance_power(self, value): self._parms["tweedie_variance_power"] = value @property def tweedie_link_power(self): return self._parms["tweedie_link_power"] @tweedie_link_power.setter def tweedie_link_power(self, value): self._parms["tweedie_link_power"] = value @property def alpha(self): return self._parms["alpha"] @alpha.setter def alpha(self, value): self._parms["alpha"] = value @property def prior(self): return self._parms["prior"] @prior.setter def prior(self, value): self._parms["prior"] = value @property def lambda_search(self): return self._parms["lambda_search"] @lambda_search.setter def lambda_search(self, value): self._parms["lambda_search"] = value @property def nlambdas(self): return self._parms["nlambdas"] @nlambdas.setter def nlambdas(self, value): self._parms["nlambdas"] = value @property def lambda_min_ratio(self): return self._parms["lambda_min_ratio"] @lambda_min_ratio.setter def lambda_min_ratio(self, value): self._parms["lambda_min_ratio"] = value @property def beta_constraints(self): return self._parms["beta_constraints"] @beta_constraints.setter def beta_constraints(self, value): self._parms["beta_constraints"] = value @property def nfolds(self): return self._parms["nfolds"] @nfolds.setter def nfolds(self, value): self._parms["nfolds"] = value @property def fold_assignment(self): return self._parms["fold_assignment"] @fold_assignment.setter def fold_assignment(self, value): self._parms["fold_assignment"] = value @property def keep_cross_validation_predictions(self): return self._parms["keep_cross_validation_predictions"] @keep_cross_validation_predictions.setter def keep_cross_validation_predictions(self, value): self._parms["keep_cross_validation_predictions"] = value @property def intercept(self): return self._parms["intercept"] @intercept.setter def intercept(self, value): self._parms["intercept"] = value @property def Lambda(self): return self._parms["Lambda"] @Lambda.setter def Lambda(self, value): self._parms["Lambda"] = value @property def max_active_predictors(self): return self._parms["max_active_predictors"] @max_active_predictors.setter def max_active_predictors(self, value): self._parms["max_active_predictors"] = value @property def checkpoint(self): return self._parms["checkpoint"] @checkpoint.setter def checkpoint(self, value): self._parms["checkpoint"] = value @property def objective_epsilon(self): return self._parms["objective_epsilon"] @objective_epsilon.setter def objective_epsilon(self, value): self._parms["objective_epsilon"] = value @property def gradient_epsilon(self): return self._parms["gradient_epsilon"] @gradient_epsilon.setter def gradient_epsilon(self, value): self._parms["gradient_epsilon"] = value @property def non_negative(self): return self._parms["non_negative"] @non_negative.setter def non_negative(self, value): self._parms["non_negative"] = value @property def compute_p_values(self): return self._parms["compute_p_values"] @compute_p_values.setter def compute_p_values(self, value): self._parms["compute_p_values"] = value @property def remove_collinear_columns(self): return self._parms["remove_collinear_columns"] @remove_collinear_columns.setter def remove_collinear_columns(self, value): self._parms["remove_collinear_columns"] = value @property def missing_values_handling(self): return self._parms["missing_values_handling"] @missing_values_handling.setter def missing_values_handling(self, value): self._parms["missing_values_handling"] = value