export_checkpoints_dir
¶
- Available in: GBM, DRF, Deep Learning, GLM, PCA, GLRM, Naïve-Bayes, K-Means, Word2Vec, Stacked Ensembles, XGBoost, Aggregator, CoxPH, Isolation Forest, AutoML
- Hyperparameter: no
Description¶
This option is used to automatically export generated models to a specified directory.
Example¶
library(h2o)
h2o.init()
# import the airlines dataset
airlines = h2o.importFile("http://s3.amazonaws.com/h2o-public-test-data/smalldata/airlines/allyears2k_headers.zip", destination_frame="air.hex")
# set the predictors and response
predictors <- c("DayofMonth", "DayOfWeek")
response <- "IsDepDelayed"
# set hyperparameters to build one model with 5 trees and one with 10 trees
hyper_parameters <- list(ntrees = c(5,10))
# specify the export checkpoints directory
checkpoints_dir <- tempfile()
# perform grid search using GBM
gbm_grid <- h2o.grid("gbm",
x=predictors,
y=response,
training_frame=airlines,
distribution="bernoulli",
stopping_rounds=3,
stopping_metric="AUTO",
stopping_tolerance=1e-2,
learn_rate=0.1,
max_depth=3,
hyper_params=hyper_parameters,
export_checkpoints_dir=checkpoints_dir,
seed=1234)
# retrieve the number of files in the exported checkpoints directory
num_files <- length(checkpoints_dir)
num_files
[1] 1
import h2o
h2o.init()
# import necessary modules
from h2o.estimators.glm import H2OGeneralizedLinearEstimator
import tempfile
from os import listdir
from h2o.estimators.gbm import H2OGradientBoostingEstimator
from h2o.grid.grid_search import H2OGridSearch
# import the airlines dataset
airlines = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/airlines/allyears2k_headers.zip", destination_frame="air.hex")
# set the predictors and response
predictors = ["DayofMonth", "DayOfWeek"]
response = "IsDepDelayed"
# set hyperparameters to build one model with 5 trees and one with 10 trees
hyper_parameters = {'ntrees': [5,10]}
# specify modeling options
search_crit = {'strategy': "RandomDiscrete",
'seed': 1234,
'stopping_rounds' : 3,
'stopping_metric' : "AUTO",
'stopping_tolerance': 1e-2}
# create an export checkpoints directory
checkpoints_dir = tempfile.mkdtemp()
# perform grid search using GBM
air_grid = H2OGridSearch(H2OGradientBoostingEstimator,
hyper_params=hyper_parameters,
search_criteria=search_crit)
air_grid.train(x=predictors,
y=response,
training_frame=airlines,
distribution="bernoulli",
learn_rate=0.1,
max_depth=3,
export_checkpoints_dir=checkpoints_dir)
# retrieve the number of files in the exported checkpoints directory
num_files = len(listdir(checkpoints_dir))
num_files
2