#!/usr/bin/env python
# -*- encoding: utf-8 -*-
#
# This file is auto-generated by h2o-3/h2o-bindings/bin/gen_python.py
# Copyright 2016 H2O.ai; Apache License Version 2.0 (see LICENSE for details)
#
from __future__ import absolute_import, division, print_function, unicode_literals
from h2o.estimators.estimator_base import H2OEstimator
from h2o.exceptions import H2OValueError
from h2o.frame import H2OFrame
from h2o.utils.typechecks import assert_is_type, Enum, numeric
import h2o
[docs]class H2OTargetEncoderEstimator(H2OEstimator):
"""
TargetEncoder
"""
algo = "targetencoder"
param_names = {"blending", "k", "f", "data_leakage_handling", "noise_level", "seed", "model_id", "ignored_columns",
"training_frame", "fold_column", "response_column"}
def __init__(self, **kwargs):
super(H2OTargetEncoderEstimator, self).__init__()
self._parms = {}
for pname, pvalue in kwargs.items():
if pname == 'model_id':
self._id = pvalue
self._parms["model_id"] = pvalue
elif pname in self.param_names:
# Using setattr(...) will invoke type-checking of the arguments
setattr(self, pname, pvalue)
else:
raise H2OValueError("Unknown parameter %s = %r" % (pname, pvalue))
@property
def blending(self):
"""
Blending enabled/disabled
Type: ``bool`` (default: ``False``).
:examples:
>>> titanic = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv")
>>> predictors = ["home.dest", "cabin", "embarked"]
>>> response = "survived"
>>> titanic["survived"] = titanic["survived"].asfactor()
>>> fold_col = "kfold_column"
>>> titanic[fold_col] = titanic.kfold_column(n_folds=5, seed=1234)
>>> titanic_te = H2OTargetEncoderEstimator(k=35,
... f=25,
... blending=True)
>>> titanic_te.train(x=predictors,
... y=response,
... training_frame=titanic)
>>> titanic_te
"""
return self._parms.get("blending")
@blending.setter
def blending(self, blending):
assert_is_type(blending, None, bool)
self._parms["blending"] = blending
@property
def k(self):
"""
Inflection point. Used for blending (if enabled). Blending is to be enabled separately using the 'blending'
parameter.
Type: ``float`` (default: ``10``).
:examples:
>>> titanic = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv")
>>> predictors = ["home.dest", "cabin", "embarked"]
>>> response = "survived"
>>> titanic["survived"] = titanic["survived"].asfactor()
>>> fold_col = "kfold_column"
>>> titanic[fold_col] = titanic.kfold_column(n_folds=5, seed=1234)
>>> titanic_te = H2OTargetEncoderEstimator(k=35,
... f=25,
... blending=True)
>>> titanic_te.train(x=predictors,
... y=response,
... training_frame=titanic)
>>> titanic_te
"""
return self._parms.get("k")
@k.setter
def k(self, k):
assert_is_type(k, None, numeric)
self._parms["k"] = k
@property
def f(self):
"""
Smoothing. Used for blending (if enabled). Blending is to be enabled separately using the 'blending' parameter.
Type: ``float`` (default: ``20``).
:examples:
>>> titanic = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv")
>>> predictors = ["home.dest", "cabin", "embarked"]
>>> response = "survived"
>>> titanic["survived"] = titanic["survived"].asfactor()
>>> fold_col = "kfold_column"
>>> titanic[fold_col] = titanic.kfold_column(n_folds=5, seed=1234)
>>> titanic_te = H2OTargetEncoderEstimator(k=35,
... f=25,
... blending=True)
>>> titanic_te.train(x=predictors,
... y=response,
... training_frame=titanic)
>>> titanic_te
"""
return self._parms.get("f")
@f.setter
def f(self, f):
assert_is_type(f, None, numeric)
self._parms["f"] = f
@property
def data_leakage_handling(self):
"""
Data leakage handling strategy.
One of: ``"none"``, ``"k_fold"``, ``"leave_one_out"`` (default: ``"none"``).
:examples:
>>> titanic = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv")
>>> predictors = ["home.dest", "cabin", "embarked"]
>>> response = "survived"
>>> titanic["survived"] = titanic["survived"].asfactor()
>>> fold_col = "kfold_column"
>>> titanic[fold_col] = titanic.kfold_column(n_folds=5, seed=1234)
>>> titanic_te = H2OTargetEncoderEstimator(k=35,
... f=25,
... data_leakage_handling="k_fold",
... blending=True)
>>> titanic_te.train(x=predictors,
... y=response,
... training_frame=titanic)
>>> titanic_te
"""
return self._parms.get("data_leakage_handling")
@data_leakage_handling.setter
def data_leakage_handling(self, data_leakage_handling):
assert_is_type(data_leakage_handling, None, Enum("none", "k_fold", "leave_one_out"))
self._parms["data_leakage_handling"] = data_leakage_handling
@property
def noise_level(self):
"""
Noise level
Type: ``float`` (default: ``0.01``).
"""
return self._parms.get("noise_level")
@noise_level.setter
def noise_level(self, noise_level):
assert_is_type(noise_level, None, numeric)
self._parms["noise_level"] = noise_level
@property
def seed(self):
"""
Seed for the specified noise level
Type: ``int`` (default: ``-1``).
"""
return self._parms.get("seed")
@seed.setter
def seed(self, seed):
assert_is_type(seed, None, int)
self._parms["seed"] = seed
@property
def ignored_columns(self):
"""
Names of columns to ignore for training.
Type: ``List[str]``.
"""
return self._parms.get("ignored_columns")
@ignored_columns.setter
def ignored_columns(self, ignored_columns):
assert_is_type(ignored_columns, None, [str])
self._parms["ignored_columns"] = ignored_columns
@property
def training_frame(self):
"""
Id of the training data frame.
Type: ``H2OFrame``.
:examples:
>>> titanic = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv")
>>> predictors = ["home.dest", "cabin", "embarked"]
>>> response = "survived"
>>> titanic["survived"] = titanic["survived"].asfactor()
>>> fold_col = "kfold_column"
>>> titanic[fold_col] = titanic.kfold_column(n_folds=5, seed=1234)
>>> titanic_te = H2OTargetEncoderEstimator(k=35,
... f=25,
... blending=True)
>>> titanic_te.train(x=predictors,
... y=response,
... training_frame=titanic)
>>> titanic_te
"""
return self._parms.get("training_frame")
@training_frame.setter
def training_frame(self, training_frame):
self._parms["training_frame"] = H2OFrame._validate(training_frame, 'training_frame')
@property
def fold_column(self):
"""
Column with cross-validation fold index assignment per observation.
Type: ``str``.
:examples:
>>> titanic = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/gbm_test/titanic.csv")
>>> predictors = ["home.dest", "cabin", "embarked"]
>>> response = "survived"
>>> titanic["survived"] = titanic["survived"].asfactor()
>>> fold_col = "kfold_column"
>>> titanic[fold_col] = titanic.kfold_column(n_folds=5, seed=1234)
>>> titanic_te = H2OTargetEncoderEstimator(k=35,
... f=25,
... blending=True)
>>> titanic_te.train(x=predictors,
... y=response,
... training_frame=titanic)
>>> titanic_te
"""
return self._parms.get("fold_column")
@fold_column.setter
def fold_column(self, fold_column):
assert_is_type(fold_column, None, str)
self._parms["fold_column"] = fold_column
@property
def response_column(self):
"""
Response variable column.
Type: ``str``.
"""
return self._parms.get("response_column")
@response_column.setter
def response_column(self, response_column):
assert_is_type(response_column, None, str)
self._parms["response_column"] = response_column