export_checkpoints_dir

  • Available in: GBM, DRF, Deep Learning, GLM, PCA, GLRM, Naïve-Bayes, K-Means, Word2Vec, Stacked Ensembles, XGBoost, Aggregator, CoxPH, Isolation Forest, AutoML
  • Hyperparameter: no

Description

This option is used to automatically export generated models to a specified directory.

Example

library(h2o)
h2o.init()

# import the airlines dataset
airlines = h2o.importFile("http://s3.amazonaws.com/h2o-public-test-data/smalldata/airlines/allyears2k_headers.zip", destination_frame="air.hex")

# set the predictors and response
predictors <- c("DayofMonth", "DayOfWeek")
response <- "IsDepDelayed"

# set hyperparameters to build one model with 5 trees and one with 10 trees
hyper_parameters <- list(ntrees = c(5,10))

# specify the export checkpoints directory
checkpoints_dir <- tempfile()

# perform grid search using GBM
gbm_grid <- h2o.grid("gbm",
                     x=predictors,
                     y=response,
                     training_frame=airlines,
                     distribution="bernoulli",
                     stopping_rounds=3,
                     stopping_metric="AUTO",
                     stopping_tolerance=1e-2,
                     learn_rate=0.1,
                     max_depth=3,
                     hyper_params=hyper_parameters,
                     export_checkpoints_dir=checkpoints_dir,
                     seed=1234)

# retrieve the number of files in the exported checkpoints directory
num_files <- length(checkpoints_dir)
num_files
[1] 1
import h2o
h2o.init()

# import necessary modules
from h2o.estimators.glm import H2OGeneralizedLinearEstimator
import tempfile
from os import listdir
from h2o.estimators.gbm import H2OGradientBoostingEstimator
from h2o.grid.grid_search import H2OGridSearch

# import the airlines dataset
airlines = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/airlines/allyears2k_headers.zip", destination_frame="air.hex")

# set the predictors and response
predictors = ["DayofMonth", "DayOfWeek"]
response = "IsDepDelayed"

# set hyperparameters to build one model with 5 trees and one with 10 trees
hyper_parameters = {'ntrees': [5,10]}

# specify modeling options
search_crit = {'strategy': "RandomDiscrete",
               'seed': 1234,
               'stopping_rounds' : 3,
               'stopping_metric' : "AUTO",
               'stopping_tolerance': 1e-2}

# create an export checkpoints directory
checkpoints_dir = tempfile.mkdtemp()

# perform grid search using GBM
air_grid = H2OGridSearch(H2OGradientBoostingEstimator,
                         hyper_params=hyper_parameters,
                         search_criteria=search_crit)
air_grid.train(x=predictors,
               y=response,
               training_frame=airlines,
               distribution="bernoulli",
               learn_rate=0.1,
               max_depth=3,
               export_checkpoints_dir=checkpoints_dir)

# retrieve the number of files in the exported checkpoints directory
num_files = len(listdir(checkpoints_dir))
num_files
2