Source code for h2o.estimators.aggregator

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
#
# This file is auto-generated by h2o-3/h2o-bindings/bin/gen_python.py
# Copyright 2016 H2O.ai;  Apache License Version 2.0 (see LICENSE for details)
#
from __future__ import absolute_import, division, print_function, unicode_literals

from h2o.estimators.estimator_base import H2OEstimator
from h2o.exceptions import H2OValueError
from h2o.frame import H2OFrame
from h2o.utils.typechecks import assert_is_type, Enum, numeric


[docs]class H2OAggregatorEstimator(H2OEstimator): """ Aggregator """ algo = "aggregator" param_names = {"model_id", "training_frame", "response_column", "ignored_columns", "ignore_const_cols", "target_num_exemplars", "rel_tol_num_exemplars", "transform", "categorical_encoding", "save_mapping_frame", "num_iteration_without_new_exemplar", "export_checkpoints_dir"} def __init__(self, **kwargs): super(H2OAggregatorEstimator, self).__init__() self._parms = {} for pname, pvalue in kwargs.items(): if pname == 'model_id': self._id = pvalue self._parms["model_id"] = pvalue elif pname in self.param_names: # Using setattr(...) will invoke type-checking of the arguments setattr(self, pname, pvalue) else: raise H2OValueError("Unknown parameter %s = %r" % (pname, pvalue)) self._parms["_rest_version"] = 99 @property def training_frame(self): """ Id of the training data frame. Type: ``H2OFrame``. :examples: >>> df = h2o.create_frame(rows=10000, ... cols=10, ... categorical_fraction=0.6, ... integer_fraction=0, ... binary_fraction=0, ... real_range=100, ... integer_range=100, ... missing_fraction=0, ... factors=100, ... seed=1234) >>> params = {"target_num_exemplars": 1000, ... "rel_tol_num_exemplars": 0.5, ... "categorical_encoding": "eigen", ... "num_iteration_without_new_exemplar": 400} >>> agg = H2OAggregatorEstimator(**params) >>> agg.train(training_frame=df) >>> new_df = agg.aggregated_frame >>> new_df """ return self._parms.get("training_frame") @training_frame.setter def training_frame(self, training_frame): self._parms["training_frame"] = H2OFrame._validate(training_frame, 'training_frame') @property def response_column(self): """ Response variable column. Type: ``str``. """ return self._parms.get("response_column") @response_column.setter def response_column(self, response_column): assert_is_type(response_column, None, str) self._parms["response_column"] = response_column @property def ignored_columns(self): """ Names of columns to ignore for training. Type: ``List[str]``. """ return self._parms.get("ignored_columns") @ignored_columns.setter def ignored_columns(self, ignored_columns): assert_is_type(ignored_columns, None, [str]) self._parms["ignored_columns"] = ignored_columns @property def ignore_const_cols(self): """ Ignore constant columns. Type: ``bool`` (default: ``True``). :examples: >>> df = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/airlines/allyears2k_headers.zip") >>> params = {"ignore_const_cols": False, ... "target_num_exemplars": 500, ... "rel_tol_num_exemplars": 0.3, ... "transform": "standardize", ... "categorical_encoding": "eigen"} >>> model = H2OAggregatorEstimator(**params) >>> model.train(training_frame=df) >>> new_df = model.aggregated_frame >>> new_df """ return self._parms.get("ignore_const_cols") @ignore_const_cols.setter def ignore_const_cols(self, ignore_const_cols): assert_is_type(ignore_const_cols, None, bool) self._parms["ignore_const_cols"] = ignore_const_cols @property def target_num_exemplars(self): """ Targeted number of exemplars Type: ``int`` (default: ``5000``). :examples: >>> df = h2o.create_frame(rows=10000, ... cols=10, ... categorical_fraction=0.6, ... integer_fraction=0, ... binary_fraction=0, ... real_range=100, ... integer_range=100, ... missing_fraction=0, ... factors=100, ... seed=1234) >>> params = {"target_num_exemplars": 1000, ... "rel_tol_num_exemplars": 0.5, ... "categorical_encoding": "eigen", ... "num_iteration_without_new_exemplar": 400} >>> agg = H2OAggregatorEstimator(**params) >>> agg.train(training_frame=df) >>> new_df = agg.aggregated_frame >>> new_df """ return self._parms.get("target_num_exemplars") @target_num_exemplars.setter def target_num_exemplars(self, target_num_exemplars): assert_is_type(target_num_exemplars, None, int) self._parms["target_num_exemplars"] = target_num_exemplars @property def rel_tol_num_exemplars(self): """ Relative tolerance for number of exemplars (e.g, 0.5 is +/- 50 percents) Type: ``float`` (default: ``0.5``). :examples: >>> df = h2o.create_frame(rows=10000, ... cols=10, ... categorical_fraction=0.6, ... integer_fraction=0, ... binary_fraction=0, ... real_range=100, ... integer_range=100, ... missing_fraction=0, ... factors=100, ... seed=1234) >>> params = {"target_num_exemplars": 1000, ... "rel_tol_num_exemplars": 0.5, ... "categorical_encoding": "eigen", ... "num_iteration_without_new_exemplar": 400} >>> agg = H2OAggregatorEstimator(**params) >>> agg.train(training_frame=df) >>> new_df = agg.aggregated_frame >>> new_df """ return self._parms.get("rel_tol_num_exemplars") @rel_tol_num_exemplars.setter def rel_tol_num_exemplars(self, rel_tol_num_exemplars): assert_is_type(rel_tol_num_exemplars, None, numeric) self._parms["rel_tol_num_exemplars"] = rel_tol_num_exemplars @property def transform(self): """ Transformation of training data One of: ``"none"``, ``"standardize"``, ``"normalize"``, ``"demean"``, ``"descale"`` (default: ``"normalize"``). :examples: >>> df = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/airlines/allyears2k_headers.zip") >>> params = {"ignore_const_cols": False, ... "target_num_exemplars": 500, ... "rel_tol_num_exemplars": 0.3, ... "transform": "standardize", ... "categorical_encoding": "eigen"} >>> model = H2OAggregatorEstimator(**params) >>> model.train(training_frame=df) >>> new_df = model.aggregated_frame """ return self._parms.get("transform") @transform.setter def transform(self, transform): assert_is_type(transform, None, Enum("none", "standardize", "normalize", "demean", "descale")) self._parms["transform"] = transform @property def categorical_encoding(self): """ Encoding scheme for categorical features One of: ``"auto"``, ``"enum"``, ``"one_hot_internal"``, ``"one_hot_explicit"``, ``"binary"``, ``"eigen"``, ``"label_encoder"``, ``"sort_by_response"``, ``"enum_limited"`` (default: ``"auto"``). :examples: >>> df = h2o.create_frame(rows=10000, ... cols=10, ... categorical_fraction=0.6, ... integer_fraction=0, ... binary_fraction=0, ... real_range=100, ... integer_range=100, ... missing_fraction=0, ... factors=100, ... seed=1234) >>> params = {"target_num_exemplars": 1000, ... "rel_tol_num_exemplars": 0.5, ... "categorical_encoding": "eigen"} >>> agg = H2OAggregatorEstimator(**params) >>> agg.train(training_frame=df) >>> new_df = agg.aggregated_frame >>> new_df """ return self._parms.get("categorical_encoding") @categorical_encoding.setter def categorical_encoding(self, categorical_encoding): assert_is_type(categorical_encoding, None, Enum("auto", "enum", "one_hot_internal", "one_hot_explicit", "binary", "eigen", "label_encoder", "sort_by_response", "enum_limited")) self._parms["categorical_encoding"] = categorical_encoding @property def save_mapping_frame(self): """ Whether to export the mapping of the aggregated frame Type: ``bool`` (default: ``False``). :examples: >>> df = h2o.create_frame(rows=10000, ... cols=10, ... categorical_fraction=0.6, ... integer_fraction=0, ... binary_fraction=0, ... real_range=100, ... integer_range=100, ... missing_fraction=0, ... factors=100, ... seed=1234) >>> params = {"target_num_exemplars": 1000, ... "rel_tol_num_exemplars": 0.5, ... "categorical_encoding": "eigen", ... "save_mapping_frame": True} >>> agg = H2OAggregatorEstimator(**params) >>> agg.train(training_frame=df) >>> new_df = agg.aggregated_frame >>> new_df """ return self._parms.get("save_mapping_frame") @save_mapping_frame.setter def save_mapping_frame(self, save_mapping_frame): assert_is_type(save_mapping_frame, None, bool) self._parms["save_mapping_frame"] = save_mapping_frame @property def num_iteration_without_new_exemplar(self): """ The number of iterations to run before aggregator exits if the number of exemplars collected didn't change Type: ``int`` (default: ``500``). :examples: >>> df = h2o.create_frame(rows=10000, ... cols=10, ... categorical_fraction=0.6, ... integer_fraction=0, ... binary_fraction=0, ... real_range=100, ... integer_range=100, ... missing_fraction=0, ... factors=100, ... seed=1234) >>> params = {"target_num_exemplars": 1000, ... "rel_tol_num_exemplars": 0.5, ... "categorical_encoding": "eigen", ... "num_iteration_without_new_exemplar": 400} >>> agg = H2OAggregatorEstimator(**params) >>> agg.train(training_frame=df) >>> new_df = agg.aggregated_frame >>> new_df """ return self._parms.get("num_iteration_without_new_exemplar") @num_iteration_without_new_exemplar.setter def num_iteration_without_new_exemplar(self, num_iteration_without_new_exemplar): assert_is_type(num_iteration_without_new_exemplar, None, int) self._parms["num_iteration_without_new_exemplar"] = num_iteration_without_new_exemplar @property def export_checkpoints_dir(self): """ Automatically export generated models to this directory. Type: ``str``. :examples: >>> import tempfile >>> from os import listdir >>> df = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/airlines/allyears2k_headers.zip") >>> checkpoints_dir = tempfile.mkdtemp() >>> model = H2OAggregatorEstimator(target_num_exemplars=500, ... rel_tol_num_exemplars=0.3, ... export_checkpoints_dir=checkpoints_dir) >>> model.train(training_frame=df) >>> new_df = model.aggregated_frame >>> new_df >>> len(listdir(checkpoints_dir)) """ return self._parms.get("export_checkpoints_dir") @export_checkpoints_dir.setter def export_checkpoints_dir(self, export_checkpoints_dir): assert_is_type(export_checkpoints_dir, None, str) self._parms["export_checkpoints_dir"] = export_checkpoints_dir @property def aggregated_frame(self): if (self._model_json is not None and self._model_json.get("output", {}).get("output_frame", {}).get("name") is not None): out_frame_name = self._model_json["output"]["output_frame"]["name"] return H2OFrame.get_frame(out_frame_name)