Source code for h2o.estimators.coxph

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
#
# This file is auto-generated by h2o-3/h2o-bindings/bin/gen_python.py
# Copyright 2016 H2O.ai;  Apache License Version 2.0 (see LICENSE for details)
#
from __future__ import absolute_import, division, print_function, unicode_literals

from h2o.estimators.estimator_base import H2OEstimator
from h2o.exceptions import H2OValueError
from h2o.frame import H2OFrame
from h2o.utils.typechecks import assert_is_type, Enum, numeric


[docs]class H2OCoxProportionalHazardsEstimator(H2OEstimator): """ Cox Proportional Hazards Trains a Cox Proportional Hazards Model (CoxPH) on an H2O dataset. """ algo = "coxph" param_names = {"model_id", "training_frame", "start_column", "stop_column", "response_column", "ignored_columns", "weights_column", "offset_column", "stratify_by", "ties", "init", "lre_min", "max_iterations", "interactions", "interaction_pairs", "interactions_only", "use_all_factor_levels", "export_checkpoints_dir", "single_node_mode"} def __init__(self, **kwargs): super(H2OCoxProportionalHazardsEstimator, self).__init__() self._parms = {} for pname, pvalue in kwargs.items(): if pname == 'model_id': self._id = pvalue self._parms["model_id"] = pvalue elif pname in self.param_names: # Using setattr(...) will invoke type-checking of the arguments setattr(self, pname, pvalue) else: raise H2OValueError("Unknown parameter %s = %r" % (pname, pvalue)) @property def training_frame(self): """ Id of the training data frame. Type: ``H2OFrame``. :examples: >>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv") >>> predictor = "age" >>> response = "event" >>> train, valid = heart.split_frame(ratios=[.8]) >>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start", ... stop_column="stop") >>> heart_coxph.train(x=predictor, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> heart_coxph.scoring_history() """ return self._parms.get("training_frame") @training_frame.setter def training_frame(self, training_frame): self._parms["training_frame"] = H2OFrame._validate(training_frame, 'training_frame') @property def start_column(self): """ Start Time Column. Type: ``str``. :examples: >>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv") >>> predictor = "age" >>> response = "event" >>> train, valid = heart.split_frame(ratios=[.8]) >>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start", ... stop_column="stop") >>> heart_coxph.train(x=predictor, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> heart_coxph.scoring_history() """ return self._parms.get("start_column") @start_column.setter def start_column(self, start_column): assert_is_type(start_column, None, str) self._parms["start_column"] = start_column @property def stop_column(self): """ Stop Time Column. Type: ``str``. :examples: >>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv") >>> predictor = "age" >>> response = "event" >>> train, valid = heart.split_frame(ratios=[.8]) >>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start", ... stop_column="stop") >>> heart_coxph.train(x=predictor, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> heart_coxph.scoring_history() """ return self._parms.get("stop_column") @stop_column.setter def stop_column(self, stop_column): assert_is_type(stop_column, None, str) self._parms["stop_column"] = stop_column @property def response_column(self): """ Response variable column. Type: ``str``. """ return self._parms.get("response_column") @response_column.setter def response_column(self, response_column): assert_is_type(response_column, None, str) self._parms["response_column"] = response_column @property def ignored_columns(self): """ Names of columns to ignore for training. Type: ``List[str]``. """ return self._parms.get("ignored_columns") @ignored_columns.setter def ignored_columns(self, ignored_columns): assert_is_type(ignored_columns, None, [str]) self._parms["ignored_columns"] = ignored_columns @property def weights_column(self): """ Column with observation weights. Giving some observation a weight of zero is equivalent to excluding it from the dataset; giving an observation a relative weight of 2 is equivalent to repeating that row twice. Negative weights are not allowed. Note: Weights are per-row observation weights and do not increase the size of the data frame. This is typically the number of times a row is repeated, but non-integer values are supported as well. During training, rows with higher weights matter more, due to the larger loss function pre-factor. Type: ``str``. """ return self._parms.get("weights_column") @weights_column.setter def weights_column(self, weights_column): assert_is_type(weights_column, None, str) self._parms["weights_column"] = weights_column @property def offset_column(self): """ Offset column. This will be added to the combination of columns before applying the link function. Type: ``str``. :examples: >>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv") >>> predictor = "age" >>> response = "event" >>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start", ... stop_column="stop", ... offset_column="transplant") >>> heart_coxph.train(x=predictor, ... y=response, ... training_frame=heart) >>> heart_coxph.scoring_history() """ return self._parms.get("offset_column") @offset_column.setter def offset_column(self, offset_column): assert_is_type(offset_column, None, str) self._parms["offset_column"] = offset_column @property def stratify_by(self): """ List of columns to use for stratification. Type: ``List[str]``. """ return self._parms.get("stratify_by") @stratify_by.setter def stratify_by(self, stratify_by): assert_is_type(stratify_by, None, [str]) self._parms["stratify_by"] = stratify_by @property def ties(self): """ Method for Handling Ties. One of: ``"efron"``, ``"breslow"`` (default: ``"efron"``). :examples: >>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv") >>> predictor = "age" >>> response = "event" >>> train, valid = heart.split_frame(ratios=[.8]) >>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start", ... stop_column="stop", ... ties="breslow") >>> heart_coxph.train(x=predictor, ... y=response, ... training_frame=train, ... validation_frame=valid) >>> heart_coxph.scoring_history() """ return self._parms.get("ties") @ties.setter def ties(self, ties): assert_is_type(ties, None, Enum("efron", "breslow")) self._parms["ties"] = ties @property def init(self): """ Coefficient starting value. Type: ``float`` (default: ``0``). :examples: >>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv") >>> predictor = "age" >>> response = "event" >>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start", ... stop_column="stop", ... init=2.9) >>> heart_coxph.train(x=predictor, ... y=response, ... training_frame=heart) >>> heart_coxph.scoring_history() """ return self._parms.get("init") @init.setter def init(self, init): assert_is_type(init, None, numeric) self._parms["init"] = init @property def lre_min(self): """ Minimum log-relative error. Type: ``float`` (default: ``9``). :examples: >>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv") >>> predictor = "age" >>> response = "event" >>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start", ... stop_column="stop", ... lre_min=5) >>> heart_coxph.train(x=predictor, ... y=response, ... training_frame=heart) >>> heart_coxph.scoring_history() """ return self._parms.get("lre_min") @lre_min.setter def lre_min(self, lre_min): assert_is_type(lre_min, None, numeric) self._parms["lre_min"] = lre_min @property def max_iterations(self): """ Maximum number of iterations. Type: ``int`` (default: ``20``). :examples: >>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv") >>> predictor = "age" >>> response = "event" >>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start", ... stop_column="stop", ... max_iterations=50) >>> heart_coxph.train(x=predictor, ... y=response, ... training_frame=heart) >>> heart_coxph.scoring_history() """ return self._parms.get("max_iterations") @max_iterations.setter def max_iterations(self, max_iterations): assert_is_type(max_iterations, None, int) self._parms["max_iterations"] = max_iterations @property def interactions(self): """ A list of predictor column indices to interact. All pairwise combinations will be computed for the list. Type: ``List[str]``. :examples: >>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv") >>> predictor = "age" >>> response = "event" >>> interactions = ['start','stop'] >>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start", ... stop_column="stop", ... interactions=interactions) >>> heart_coxph.train(x=predictor, ... y=response, ... training_frame=heart) >>> heart_coxph.scoring_history() """ return self._parms.get("interactions") @interactions.setter def interactions(self, interactions): assert_is_type(interactions, None, [str]) self._parms["interactions"] = interactions @property def interaction_pairs(self): """ A list of pairwise (first order) column interactions. Type: ``List[tuple]``. :examples: >>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv") >>> predictor = "age" >>> response = "event" >>> interaction_pairs = [("start","stop")] >>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start", ... stop_column="stop", ... interaction_pairs=interaction_pairs) >>> heart_coxph.train(x=predictor, ... y=response, ... training_frame=heart) >>> heart_coxph.scoring_history() """ return self._parms.get("interaction_pairs") @interaction_pairs.setter def interaction_pairs(self, interaction_pairs): assert_is_type(interaction_pairs, None, [tuple]) self._parms["interaction_pairs"] = interaction_pairs @property def interactions_only(self): """ A list of columns that should only be used to create interactions but should not itself participate in model training. Type: ``List[str]``. :examples: >>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv") >>> predictor = "age" >>> response = "event" >>> interactions = ['start','stop'] >>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start", ... stop_column="stop", ... interactions_only=interactions) >>> heart_coxph.train(x=predictor, ... y=response, ... training_frame=heart) >>> heart_coxph.scoring_history() """ return self._parms.get("interactions_only") @interactions_only.setter def interactions_only(self, interactions_only): assert_is_type(interactions_only, None, [str]) self._parms["interactions_only"] = interactions_only @property def use_all_factor_levels(self): """ (Internal. For development only!) Indicates whether to use all factor levels. Type: ``bool`` (default: ``False``). :examples: >>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv") >>> predictor = "age" >>> response = "event" >>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start", ... stop_column="stop", ... use_all_factor_levels=True) >>> heart_coxph.train(x=predictor, ... y=response, ... training_frame=heart) >>> heart_coxph.scoring_history() """ return self._parms.get("use_all_factor_levels") @use_all_factor_levels.setter def use_all_factor_levels(self, use_all_factor_levels): assert_is_type(use_all_factor_levels, None, bool) self._parms["use_all_factor_levels"] = use_all_factor_levels @property def export_checkpoints_dir(self): """ Automatically export generated models to this directory. Type: ``str``. :examples: >>> import tempfile >>> from os import listdir >>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv") >>> predictor = "age" >>> response = "event" >>> checkpoints_dir = tempfile.mkdtemp() >>> coxph = H2OCoxProportionalHazardsEstimator(start_column="start", ... stop_column="stop", ... export_checkpoints_dir=checkpoints_dir) >>> coxph.train(x=predictor, ... y=response, ... training_frame=heart) >>> len(listdir(checkpoints_dir)) """ return self._parms.get("export_checkpoints_dir") @export_checkpoints_dir.setter def export_checkpoints_dir(self, export_checkpoints_dir): assert_is_type(export_checkpoints_dir, None, str) self._parms["export_checkpoints_dir"] = export_checkpoints_dir @property def single_node_mode(self): """ Run on a single node to reduce the effect of network overhead (for smaller datasets) Type: ``bool`` (default: ``False``). """ return self._parms.get("single_node_mode") @single_node_mode.setter def single_node_mode(self, single_node_mode): assert_is_type(single_node_mode, None, bool) self._parms["single_node_mode"] = single_node_mode def _additional_used_columns(self, parms): """ :return: Start and stop column if specified. """ result = [] for col in ["start_column", "stop_column"]: if col in parms and parms[col] is not None: result.append(parms[col]) return result