Source code for h2o.estimators.kmeans

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
#
# This file is auto-generated by h2o-3/h2o-bindings/bin/gen_python.py
# Copyright 2016 H2O.ai;  Apache License Version 2.0 (see LICENSE for details)
#
from __future__ import absolute_import, division, print_function, unicode_literals

from h2o.estimators.estimator_base import H2OEstimator
from h2o.exceptions import H2OValueError
from h2o.frame import H2OFrame
from h2o.utils.typechecks import assert_is_type, Enum, numeric


[docs]class H2OKMeansEstimator(H2OEstimator): """ K-means Performs k-means clustering on an H2O dataset. """ algo = "kmeans" def __init__(self, **kwargs): super(H2OKMeansEstimator, self).__init__() self._parms = {} names_list = {"model_id", "training_frame", "validation_frame", "nfolds", "keep_cross_validation_predictions", "keep_cross_validation_fold_assignment", "fold_assignment", "fold_column", "ignored_columns", "ignore_const_cols", "score_each_iteration", "k", "estimate_k", "user_points", "max_iterations", "standardize", "seed", "init", "max_runtime_secs", "categorical_encoding"} if "Lambda" in kwargs: kwargs["lambda_"] = kwargs.pop("Lambda") for pname, pvalue in kwargs.items(): if pname == 'model_id': self._id = pvalue self._parms["model_id"] = pvalue elif pname in names_list: # Using setattr(...) will invoke type-checking of the arguments setattr(self, pname, pvalue) else: raise H2OValueError("Unknown parameter %s = %r" % (pname, pvalue)) @property def training_frame(self): """str: Id of the training data frame (Not required, to allow initial validation of model parameters).""" return self._parms.get("training_frame") @training_frame.setter def training_frame(self, training_frame): assert_is_type(training_frame, None, H2OFrame) self._parms["training_frame"] = training_frame @property def validation_frame(self): """str: Id of the validation data frame.""" return self._parms.get("validation_frame") @validation_frame.setter def validation_frame(self, validation_frame): assert_is_type(validation_frame, None, H2OFrame) self._parms["validation_frame"] = validation_frame @property def nfolds(self): """int: Number of folds for N-fold cross-validation (0 to disable or >= 2). (Default: 0)""" return self._parms.get("nfolds") @nfolds.setter def nfolds(self, nfolds): assert_is_type(nfolds, None, int) self._parms["nfolds"] = nfolds @property def keep_cross_validation_predictions(self): """bool: Whether to keep the predictions of the cross-validation models. (Default: False)""" return self._parms.get("keep_cross_validation_predictions") @keep_cross_validation_predictions.setter def keep_cross_validation_predictions(self, keep_cross_validation_predictions): assert_is_type(keep_cross_validation_predictions, None, bool) self._parms["keep_cross_validation_predictions"] = keep_cross_validation_predictions @property def keep_cross_validation_fold_assignment(self): """bool: Whether to keep the cross-validation fold assignment. (Default: False)""" return self._parms.get("keep_cross_validation_fold_assignment") @keep_cross_validation_fold_assignment.setter def keep_cross_validation_fold_assignment(self, keep_cross_validation_fold_assignment): assert_is_type(keep_cross_validation_fold_assignment, None, bool) self._parms["keep_cross_validation_fold_assignment"] = keep_cross_validation_fold_assignment @property def fold_assignment(self): """ Enum["auto", "random", "modulo", "stratified"]: Cross-validation fold assignment scheme, if fold_column is not specified. The 'Stratified' option will stratify the folds based on the response variable, for classification problems. (Default: "auto") """ return self._parms.get("fold_assignment") @fold_assignment.setter def fold_assignment(self, fold_assignment): assert_is_type(fold_assignment, None, Enum("auto", "random", "modulo", "stratified")) self._parms["fold_assignment"] = fold_assignment @property def fold_column(self): """str: Column with cross-validation fold index assignment per observation.""" return self._parms.get("fold_column") @fold_column.setter def fold_column(self, fold_column): assert_is_type(fold_column, None, str) self._parms["fold_column"] = fold_column @property def ignored_columns(self): """List[str]: Names of columns to ignore for training.""" return self._parms.get("ignored_columns") @ignored_columns.setter def ignored_columns(self, ignored_columns): assert_is_type(ignored_columns, None, [str]) self._parms["ignored_columns"] = ignored_columns @property def ignore_const_cols(self): """bool: Ignore constant columns. (Default: True)""" return self._parms.get("ignore_const_cols") @ignore_const_cols.setter def ignore_const_cols(self, ignore_const_cols): assert_is_type(ignore_const_cols, None, bool) self._parms["ignore_const_cols"] = ignore_const_cols @property def score_each_iteration(self): """bool: Whether to score during each iteration of model training. (Default: False)""" return self._parms.get("score_each_iteration") @score_each_iteration.setter def score_each_iteration(self, score_each_iteration): assert_is_type(score_each_iteration, None, bool) self._parms["score_each_iteration"] = score_each_iteration @property def k(self): """ int: The max. number of clusters. If estimate_k is disabled, the model will find k centroids, otherwise it will find up to k centroids. (Default: 1) """ return self._parms.get("k") @k.setter def k(self, k): assert_is_type(k, None, int) self._parms["k"] = k @property def estimate_k(self): """ bool: Whether to estimate the number of clusters (<=k) iteratively and deterministically. (Default: False) """ return self._parms.get("estimate_k") @estimate_k.setter def estimate_k(self, estimate_k): assert_is_type(estimate_k, None, bool) self._parms["estimate_k"] = estimate_k @property def user_points(self): """str: User-specified points""" return self._parms.get("user_points") @user_points.setter def user_points(self, user_points): assert_is_type(user_points, None, H2OFrame) self._parms["user_points"] = user_points @property def max_iterations(self): """ int: Maximum training iterations (if estimate_k is enabled, then this is for each inner Lloyds iteration) (Default: 10) """ return self._parms.get("max_iterations") @max_iterations.setter def max_iterations(self, max_iterations): assert_is_type(max_iterations, None, int) self._parms["max_iterations"] = max_iterations @property def standardize(self): """bool: Standardize columns before computing distances (Default: True)""" return self._parms.get("standardize") @standardize.setter def standardize(self, standardize): assert_is_type(standardize, None, bool) self._parms["standardize"] = standardize @property def seed(self): """int: RNG Seed (Default: -1)""" return self._parms.get("seed") @seed.setter def seed(self, seed): assert_is_type(seed, None, int) self._parms["seed"] = seed @property def init(self): """Enum["random", "plus_plus", "furthest", "user"]: Initialization mode (Default: "furthest")""" return self._parms.get("init") @init.setter def init(self, init): assert_is_type(init, None, Enum("random", "plus_plus", "furthest", "user")) self._parms["init"] = init @property def max_runtime_secs(self): """float: Maximum allowed runtime in seconds for model training. Use 0 to disable. (Default: 0.0)""" return self._parms.get("max_runtime_secs") @max_runtime_secs.setter def max_runtime_secs(self, max_runtime_secs): assert_is_type(max_runtime_secs, None, numeric) self._parms["max_runtime_secs"] = max_runtime_secs @property def categorical_encoding(self): """ Enum["auto", "enum", "one_hot_internal", "one_hot_explicit", "binary", "eigen"]: Encoding scheme for categorical features (Default: "auto") """ return self._parms.get("categorical_encoding") @categorical_encoding.setter def categorical_encoding(self, categorical_encoding): assert_is_type(categorical_encoding, None, Enum("auto", "enum", "one_hot_internal", "one_hot_explicit", "binary", "eigen")) self._parms["categorical_encoding"] = categorical_encoding