export_checkpoints_dir
¶
Available in: GBM, DRF, Deep Learning, GLM, GAM, PCA, GLRM, Naïve-Bayes, K-Means, Word2Vec, Stacked Ensembles, XGBoost, Aggregator, CoxPH, Isolation Forest, AutoML
Hyperparameter: no
Description¶
This option is used to automatically export generated models to a specified directory.
Example¶
library(h2o)
h2o.init()
# import the airlines dataset
airlines = h2o.importFile("http://s3.amazonaws.com/h2o-public-test-data/smalldata/airlines/allyears2k_headers.zip", destination_frame="air.hex")
# set the predictors and response
predictors <- c("DayofMonth", "DayOfWeek")
response <- "IsDepDelayed"
# set hyperparameters to build one model with 5 trees and one with 10 trees
hyper_parameters <- list(ntrees = c(5, 10))
# specify the export checkpoints directory
checkpoints_dir <- tempfile()
# perform grid search using GBM
gbm_grid <- h2o.grid("gbm",
x = predictors,
y = response,
training_frame = airlines,
distribution = "bernoulli",
stopping_rounds = 3,
stopping_metric = "AUTO",
stopping_tolerance = 1e-2,
learn_rate = 0.1,
max_depth = 3,
hyper_params = hyper_parameters,
export_checkpoints_dir = checkpoints_dir,
seed = 1234)
# retrieve the number of files in the exported checkpoints directory
num_files <- length(checkpoints_dir)
num_files
[1] 1
import h2o
h2o.init()
# import necessary modules
from h2o.estimators.glm import H2OGeneralizedLinearEstimator
import tempfile
from os import listdir
from h2o.estimators.gbm import H2OGradientBoostingEstimator
from h2o.grid.grid_search import H2OGridSearch
# import the airlines dataset
airlines = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/airlines/allyears2k_headers.zip", destination_frame="air.hex")
# set the predictors and response
predictors = ["DayofMonth", "DayOfWeek"]
response = "IsDepDelayed"
# set hyperparameters to build one model with 5 trees and one with 10 trees
hyper_parameters = {'ntrees': [5,10]}
# specify modeling options
search_crit = {'strategy': "RandomDiscrete",
'seed': 1234,
'stopping_rounds' : 3,
'stopping_metric' : "AUTO",
'stopping_tolerance': 1e-2}
# create an export checkpoints directory
checkpoints_dir = tempfile.mkdtemp()
# perform grid search using GBM
air_grid = H2OGridSearch(H2OGradientBoostingEstimator,
hyper_params=hyper_parameters,
search_criteria=search_crit)
air_grid.train(x=predictors,
y=response,
training_frame=airlines,
distribution="bernoulli",
learn_rate=0.1,
max_depth=3,
export_checkpoints_dir=checkpoints_dir)
# retrieve the number of files in the exported checkpoints directory
num_files = len(listdir(checkpoints_dir))
num_files
2