Source code for h2o.estimators.kmeans

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
#
# This file is auto-generated by h2o-3/h2o-bindings/bin/gen_python.py
# Copyright 2016 H2O.ai;  Apache License Version 2.0 (see LICENSE for details)
#
from __future__ import absolute_import, division, print_function, unicode_literals

from h2o.estimators.estimator_base import H2OEstimator
from h2o.exceptions import H2OValueError
from h2o.frame import H2OFrame
from h2o.utils.typechecks import assert_is_type, Enum, numeric


[docs]class H2OKMeansEstimator(H2OEstimator): """ K-means Performs k-means clustering on an H2O dataset. """ algo = "kmeans" def __init__(self, **kwargs): super(H2OKMeansEstimator, self).__init__() self._parms = {} names_list = {"model_id", "training_frame", "validation_frame", "nfolds", "keep_cross_validation_predictions", "keep_cross_validation_fold_assignment", "fold_assignment", "fold_column", "ignored_columns", "ignore_const_cols", "score_each_iteration", "k", "estimate_k", "user_points", "max_iterations", "standardize", "seed", "init", "max_runtime_secs", "categorical_encoding"} if "Lambda" in kwargs: kwargs["lambda_"] = kwargs.pop("Lambda") for pname, pvalue in kwargs.items(): if pname == 'model_id': self._id = pvalue self._parms["model_id"] = pvalue elif pname in names_list: # Using setattr(...) will invoke type-checking of the arguments setattr(self, pname, pvalue) else: raise H2OValueError("Unknown parameter %s = %r" % (pname, pvalue)) @property def training_frame(self): """ Id of the training data frame. Type: ``H2OFrame``. """ return self._parms.get("training_frame") @training_frame.setter def training_frame(self, training_frame): assert_is_type(training_frame, None, H2OFrame) self._parms["training_frame"] = training_frame @property def validation_frame(self): """ Id of the validation data frame. Type: ``H2OFrame``. """ return self._parms.get("validation_frame") @validation_frame.setter def validation_frame(self, validation_frame): assert_is_type(validation_frame, None, H2OFrame) self._parms["validation_frame"] = validation_frame @property def nfolds(self): """ Number of folds for K-fold cross-validation (0 to disable or >= 2). Type: ``int`` (default: ``0``). """ return self._parms.get("nfolds") @nfolds.setter def nfolds(self, nfolds): assert_is_type(nfolds, None, int) self._parms["nfolds"] = nfolds @property def keep_cross_validation_predictions(self): """ Whether to keep the predictions of the cross-validation models. Type: ``bool`` (default: ``False``). """ return self._parms.get("keep_cross_validation_predictions") @keep_cross_validation_predictions.setter def keep_cross_validation_predictions(self, keep_cross_validation_predictions): assert_is_type(keep_cross_validation_predictions, None, bool) self._parms["keep_cross_validation_predictions"] = keep_cross_validation_predictions @property def keep_cross_validation_fold_assignment(self): """ Whether to keep the cross-validation fold assignment. Type: ``bool`` (default: ``False``). """ return self._parms.get("keep_cross_validation_fold_assignment") @keep_cross_validation_fold_assignment.setter def keep_cross_validation_fold_assignment(self, keep_cross_validation_fold_assignment): assert_is_type(keep_cross_validation_fold_assignment, None, bool) self._parms["keep_cross_validation_fold_assignment"] = keep_cross_validation_fold_assignment @property def fold_assignment(self): """ Cross-validation fold assignment scheme, if fold_column is not specified. The 'Stratified' option will stratify the folds based on the response variable, for classification problems. One of: ``"auto"``, ``"random"``, ``"modulo"``, ``"stratified"`` (default: ``"auto"``). """ return self._parms.get("fold_assignment") @fold_assignment.setter def fold_assignment(self, fold_assignment): assert_is_type(fold_assignment, None, Enum("auto", "random", "modulo", "stratified")) self._parms["fold_assignment"] = fold_assignment @property def fold_column(self): """ Column with cross-validation fold index assignment per observation. Type: ``str``. """ return self._parms.get("fold_column") @fold_column.setter def fold_column(self, fold_column): assert_is_type(fold_column, None, str) self._parms["fold_column"] = fold_column @property def ignored_columns(self): """ Names of columns to ignore for training. Type: ``List[str]``. """ return self._parms.get("ignored_columns") @ignored_columns.setter def ignored_columns(self, ignored_columns): assert_is_type(ignored_columns, None, [str]) self._parms["ignored_columns"] = ignored_columns @property def ignore_const_cols(self): """ Ignore constant columns. Type: ``bool`` (default: ``True``). """ return self._parms.get("ignore_const_cols") @ignore_const_cols.setter def ignore_const_cols(self, ignore_const_cols): assert_is_type(ignore_const_cols, None, bool) self._parms["ignore_const_cols"] = ignore_const_cols @property def score_each_iteration(self): """ Whether to score during each iteration of model training. Type: ``bool`` (default: ``False``). """ return self._parms.get("score_each_iteration") @score_each_iteration.setter def score_each_iteration(self, score_each_iteration): assert_is_type(score_each_iteration, None, bool) self._parms["score_each_iteration"] = score_each_iteration @property def k(self): """ The max. number of clusters. If estimate_k is disabled, the model will find k centroids, otherwise it will find up to k centroids. Type: ``int`` (default: ``1``). """ return self._parms.get("k") @k.setter def k(self, k): assert_is_type(k, None, int) self._parms["k"] = k @property def estimate_k(self): """ Whether to estimate the number of clusters (<=k) iteratively and deterministically. Type: ``bool`` (default: ``False``). """ return self._parms.get("estimate_k") @estimate_k.setter def estimate_k(self, estimate_k): assert_is_type(estimate_k, None, bool) self._parms["estimate_k"] = estimate_k @property def user_points(self): """ This option allows you to specify a dataframe, where each row represents an initial cluster center. The user- specified points must have the same number of columns as the training observations. The number of rows must equal the number of clusters Type: ``H2OFrame``. """ return self._parms.get("user_points") @user_points.setter def user_points(self, user_points): assert_is_type(user_points, None, H2OFrame) self._parms["user_points"] = user_points @property def max_iterations(self): """ Maximum training iterations (if estimate_k is enabled, then this is for each inner Lloyds iteration) Type: ``int`` (default: ``10``). """ return self._parms.get("max_iterations") @max_iterations.setter def max_iterations(self, max_iterations): assert_is_type(max_iterations, None, int) self._parms["max_iterations"] = max_iterations @property def standardize(self): """ Standardize columns before computing distances Type: ``bool`` (default: ``True``). """ return self._parms.get("standardize") @standardize.setter def standardize(self, standardize): assert_is_type(standardize, None, bool) self._parms["standardize"] = standardize @property def seed(self): """ RNG Seed Type: ``int`` (default: ``-1``). """ return self._parms.get("seed") @seed.setter def seed(self, seed): assert_is_type(seed, None, int) self._parms["seed"] = seed @property def init(self): """ Initialization mode One of: ``"random"``, ``"plus_plus"``, ``"furthest"``, ``"user"`` (default: ``"furthest"``). """ return self._parms.get("init") @init.setter def init(self, init): assert_is_type(init, None, Enum("random", "plus_plus", "furthest", "user")) self._parms["init"] = init @property def max_runtime_secs(self): """ Maximum allowed runtime in seconds for model training. Use 0 to disable. Type: ``float`` (default: ``0``). """ return self._parms.get("max_runtime_secs") @max_runtime_secs.setter def max_runtime_secs(self, max_runtime_secs): assert_is_type(max_runtime_secs, None, numeric) self._parms["max_runtime_secs"] = max_runtime_secs @property def categorical_encoding(self): """ Encoding scheme for categorical features One of: ``"auto"``, ``"enum"``, ``"one_hot_internal"``, ``"one_hot_explicit"``, ``"binary"``, ``"eigen"``, ``"label_encoder"``, ``"sort_by_response"``, ``"enum_limited"`` (default: ``"auto"``). """ return self._parms.get("categorical_encoding") @categorical_encoding.setter def categorical_encoding(self, categorical_encoding): assert_is_type(categorical_encoding, None, Enum("auto", "enum", "one_hot_internal", "one_hot_explicit", "binary", "eigen", "label_encoder", "sort_by_response", "enum_limited")) self._parms["categorical_encoding"] = categorical_encoding