prior

  • Available in: GLM, GAM

  • Hyperparameter: no

Description

This option specifies the prior probability of class 1 in the response when family = "binomial". The default value is the observation frequency of class 1. This must be a value from (0,1) exclusive range, and defaults to -1 (no prior). This parameter is useful for logistic regression if the data has been sampled and the mean of response does not reflect reality.

Example

library(h2o)
h2o.init()

# import the cars dataset:
# this dataset is used to classify whether or not a car is economical based on
# the car's displacement, power, weight, and acceleration, and the year it was made
cars <- h2o.importFile("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv")

# convert response column to a factor
cars["economy_20mpg"] <- as.factor(cars["economy_20mpg"])

# set the predictor names and the response column name
predictors <- c("displacement","power","weight","acceleration","year")
response <- "economy_20mpg"

# split into train and validation
cars.splits <- h2o.splitFrame(data = cars, ratios = .8)
train <- cars.splits[[1]]
valid <- cars.splits[[2]]

# Build a GLM model and set a prior value of 0.5
car_glm1 <- h2o.glm(x = predictors, y = response, family = 'binomial', prior=0.5,
                    training_frame = train,
                    validation_frame = valid)

# Build a GLM model without a prior value
car_glm2 <- h2o.glm(x = predictors, y = response, family = 'binomial',
                    training_frame = train,
                    validation_frame = valid)

# Check the coefficients for both models
car_glm1@model$coefficients_table
car_glm2@model$coefficients_table
import h2o
from h2o.estimators.glm import H2OGeneralizedLinearEstimator
h2o.init()

# import the cars dataset:
# this dataset is used to classify whether or not a car is economical based on
# the car's displacement, power, weight, and acceleration, and the year it was made
cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv")

# convert response column to a factor
cars["economy_20mpg"] = cars["economy_20mpg"].asfactor()

# set the predictor names and the response column name
predictors = ["displacement","power","weight","acceleration","year"]
response = "economy_20mpg"

# split into train and validation sets
train, valid = cars.split_frame(ratios = [.8])

# Build a GLM model and set a prior value of 0.5
cars_glm1 = H2OGeneralizedLinearEstimator(family = 'binomial', prior=0.5)
cars_glm1.train(x = predictors, y = response,
                training_frame = train,
                validation_frame = valid)

# Build a GLM model and set a prior value of 0.5
cars_glm2 = H2OGeneralizedLinearEstimator(family = 'binomial')
cars_glm2.train(x = predictors, y = response,
                training_frame = train,
                validation_frame = valid)

# Check the coefficients for both models
coeff_table1 = cars_glm1._model_json['output']['coefficients_table']
coeff_table1

coeff_table2 = cars_glm2._model_json['output']['coefficients_table']
coeff_table2