from .estimator_base import H2OEstimator
[docs]class H2OGeneralizedLinearEstimator(H2OEstimator):
"""Build a Generalized Linear Model
Fit a generalized linear model, specified by a response variable, a set of predictors,
and a description of the error distribution.
Parameters
----------
model_id : str, optional
The unique id assigned to the resulting model. If none is given, an id will
automatically be generated.
max_iterations : int
A non-negative integer specifying the maximum number of iterations.
beta_epsilon : int
A non-negative number specifying the magnitude of the maximum difference between
the coefficient estimates from successive iterations. Defines the convergence
criterion.
solver : str
A character string specifying the solver used: IRLSM (supports more features),
L_BFGS (scales better for datasets with many columns)
standardize : bool
Indicates whether the numeric predictors should be standardized to have a mean of
0 and a variance of 1 prior to training the models.
family : str
A character string specifying the distribution of the model:
gaussian, binomial, multinomial, poisson, gamma, tweedie.
link : str
A character string specifying the link function. The default is the canonical
link for the family. The supported links for each of the family specifications are
"gaussian" - "identity", "log", "inverse"
"binomial" - "logit", "log"
"multinomial" - "multinomial"
"poisson" - "log", "identity"
"gamma" - "inverse", "log", "identity"
"tweedie" - "tweedie"
tweedie_variance_power : int
numeric specifying the power for the variance function when family = "tweedie".
tweedie_link_power : int
A numeric specifying the power for the link function when family = "tweedie".
alpha : float
A numeric in [0, 1] specifying the elastic-net mixing parameter.
The elastic-net penalty is defined to be
eqn{P(\alpha,\beta) = (1-\alpha)/2||\beta||_2^2 +
\alpha||\beta||_1 = \sum_j [(1-\alpha)/2 \beta_j^2 + \alpha|\beta_j|],
making alpha = 1 the lasso penalty and alpha = 0 the ridge penalty.
Lambda : float
A non-negative shrinkage parameter for the elastic-net, which multiplies
\eqn{P(\alpha,\beta) in the objective function.
When Lambda = 0, no elastic-net penalty is applied and ordinary generalized linear
models are fit.
prior : float, optional
A numeric specifying the prior probability of class 1 in the response when
family = "binomial". The default prior is the observational frequency of class 1.
Must be from (0,1) exclusive range or None (no prior).
lambda_search : bool
A logical value indicating whether to conduct a search over the space of lambda
values starting from the lambda max, given lambda is interpreted as lambda minself.
nlambdas : int
The number of lambda values to use when lambda_search = TRUE.
lambda_min_ratio : float
Smallest value for lambda as a fraction of lambda.max. By default if the number of
observations is greater than the the number of variables then
lambda_min_ratio = 0.0001; if the number of observations is less than the number
of variables then lambda_min_ratio = 0.01.
beta_constraints : H2OFrame
A data.frame or H2OParsedData object with the columns
["names", "lower_bounds", "upper_bounds", "beta_given"],
where each row corresponds to a predictor in the GLM.
"names" contains the predictor names, "lower"/"upper_bounds",
are the lower and upper bounds of beta, and "beta_given" is some supplied starting
values.
nfolds : int, optional
Number of folds for cross-validation. If nfolds >= 2, then validation must
remain empty.
fold_assignment : str
Cross-validation fold assignment scheme, if fold_column is not specified Must be
"AUTO", "Random" or "Modulo"
keep_cross_validation_predictions : bool
Whether to keep the predictions of the cross-validation models
intercept : bool
Logical, include constant term (intercept) in the model
max_active_predictors : int, optional
Convergence criteria for number of predictors when using L1 penalty.
missing_values_handling: str
A character string specifying how to handle missing value:
"MeanImputation","Skip".
Returns
-------
A subclass of ModelBase is returned. The specific subclass depends on the machine
learning task at hand (if it's binomial classification, then an H2OBinomialModel
is returned, if it's regression then a H2ORegressionModel is returned). The default
print-out of the models is shown, but further GLM-specifc information can be
queried out of the object. Upon completion of the GLM, the resulting object has
coefficients, normalized coefficients, residual/null deviance, aic, and a host of
model metrics including MSE, AUC (for logistic regression), degrees of freedom, and
confusion matrices.
"""
def __init__(self, model_id=None, max_iterations=None, beta_epsilon=None, solver=None,
standardize=None, family=None, link=None, tweedie_variance_power=None,
tweedie_link_power=None, alpha=None, prior=None, lambda_search=None,
nlambdas=None, lambda_min_ratio=None, beta_constraints=None, nfolds=None,
fold_assignment=None, keep_cross_validation_predictions=None,
intercept=None, Lambda=None, max_active_predictors=None, checkpoint=None,
objective_epsilon=None, gradient_epsilon=None, non_negative=False,
compute_p_values=False, remove_collinear_columns=False, missing_values_handling = None):
super(H2OGeneralizedLinearEstimator, self).__init__()
self._parms = locals()
self._parms = {k: v for k, v in self._parms.items() if k != "self"}
self._parms["lambda"] = self._parms.pop("Lambda")
@property
def max_iterations(self):
return self._parms["max_iterations"]
@max_iterations.setter
def max_iterations(self, value):
self._parms["max_iterations"] = value
@property
def beta_epsilon(self):
return self._parms["beta_epsilon"]
@beta_epsilon.setter
def beta_epsilon(self, value):
self._parms["beta_epsilon"] = value
@property
def solver(self):
return self._parms["solver"]
@solver.setter
def solver(self, value):
self._parms["solver"] = value
@property
def standardize(self):
return self._parms["standardize"]
@standardize.setter
def standardize(self, value):
self._parms["standardize"] = value
@property
def family(self):
return self._parms["family"]
@family.setter
def family(self, value):
self._parms["family"] = value
@property
def link(self):
return self._parms["link"]
@link.setter
def link(self, value):
self._parms["link"] = value
@property
def tweedie_variance_power(self):
return self._parms["tweedie_variance_power"]
@tweedie_variance_power.setter
def tweedie_variance_power(self, value):
self._parms["tweedie_variance_power"] = value
@property
def tweedie_link_power(self):
return self._parms["tweedie_link_power"]
@tweedie_link_power.setter
def tweedie_link_power(self, value):
self._parms["tweedie_link_power"] = value
@property
def alpha(self):
return self._parms["alpha"]
@alpha.setter
def alpha(self, value):
self._parms["alpha"] = value
@property
def prior(self):
return self._parms["prior"]
@prior.setter
def prior(self, value):
self._parms["prior"] = value
@property
def lambda_search(self):
return self._parms["lambda_search"]
@lambda_search.setter
def lambda_search(self, value):
self._parms["lambda_search"] = value
@property
def nlambdas(self):
return self._parms["nlambdas"]
@nlambdas.setter
def nlambdas(self, value):
self._parms["nlambdas"] = value
@property
def lambda_min_ratio(self):
return self._parms["lambda_min_ratio"]
@lambda_min_ratio.setter
def lambda_min_ratio(self, value):
self._parms["lambda_min_ratio"] = value
@property
def beta_constraints(self):
return self._parms["beta_constraints"]
@beta_constraints.setter
def beta_constraints(self, value):
self._parms["beta_constraints"] = value
@property
def nfolds(self):
return self._parms["nfolds"]
@nfolds.setter
def nfolds(self, value):
self._parms["nfolds"] = value
@property
def fold_assignment(self):
return self._parms["fold_assignment"]
@fold_assignment.setter
def fold_assignment(self, value):
self._parms["fold_assignment"] = value
@property
def keep_cross_validation_predictions(self):
return self._parms["keep_cross_validation_predictions"]
@keep_cross_validation_predictions.setter
def keep_cross_validation_predictions(self, value):
self._parms["keep_cross_validation_predictions"] = value
@property
def intercept(self):
return self._parms["intercept"]
@intercept.setter
def intercept(self, value):
self._parms["intercept"] = value
@property
def Lambda(self):
return self._parms["Lambda"]
@Lambda.setter
def Lambda(self, value):
self._parms["Lambda"] = value
@property
def max_active_predictors(self):
return self._parms["max_active_predictors"]
@max_active_predictors.setter
def max_active_predictors(self, value):
self._parms["max_active_predictors"] = value
@property
def checkpoint(self):
return self._parms["checkpoint"]
@checkpoint.setter
def checkpoint(self, value):
self._parms["checkpoint"] = value
@property
def objective_epsilon(self):
return self._parms["objective_epsilon"]
@objective_epsilon.setter
def objective_epsilon(self, value):
self._parms["objective_epsilon"] = value
@property
def gradient_epsilon(self):
return self._parms["gradient_epsilon"]
@gradient_epsilon.setter
def gradient_epsilon(self, value):
self._parms["gradient_epsilon"] = value
@property
def non_negative(self):
return self._parms["non_negative"]
@non_negative.setter
def non_negative(self, value):
self._parms["non_negative"] = value
@property
def compute_p_values(self):
return self._parms["compute_p_values"]
@compute_p_values.setter
def compute_p_values(self, value):
self._parms["compute_p_values"] = value
@property
def remove_collinear_columns(self):
return self._parms["remove_collinear_columns"]
@remove_collinear_columns.setter
def remove_collinear_columns(self, value):
self._parms["remove_collinear_columns"] = value
@property
def missing_values_handling(self):
return self._parms["missing_values_handling"]
@missing_values_handling.setter
def missing_values_handling(self, value):
self._parms["missing_values_handling"] = value