#!/usr/bin/env python
# -*- encoding: utf-8 -*-
#
# This file is auto-generated by h2o-3/h2o-bindings/bin/gen_python.py
# Copyright 2016 H2O.ai; Apache License Version 2.0 (see LICENSE for details)
#
from __future__ import absolute_import, division, print_function, unicode_literals
from h2o.estimators.estimator_base import H2OEstimator
from h2o.exceptions import H2OValueError
from h2o.frame import H2OFrame
from h2o.utils.typechecks import assert_is_type, Enum, numeric
[docs]class H2OCoxProportionalHazardsEstimator(H2OEstimator):
"""
Cox Proportional Hazards
Trains a Cox Proportional Hazards Model (CoxPH) on an H2O dataset.
"""
algo = "coxph"
supervised_learning = True
def __init__(self,
model_id=None, # type: Optional[Union[None, str, H2OEstimator]]
training_frame=None, # type: Optional[Union[None, str, H2OFrame]]
start_column=None, # type: Optional[str]
stop_column=None, # type: Optional[str]
response_column=None, # type: Optional[str]
ignored_columns=None, # type: Optional[List[str]]
weights_column=None, # type: Optional[str]
offset_column=None, # type: Optional[str]
stratify_by=None, # type: Optional[List[str]]
ties="efron", # type: Literal["efron", "breslow"]
init=0.0, # type: float
lre_min=9.0, # type: float
max_iterations=20, # type: int
interactions=None, # type: Optional[List[str]]
interaction_pairs=None, # type: Optional[List[tuple]]
interactions_only=None, # type: Optional[List[str]]
use_all_factor_levels=False, # type: bool
export_checkpoints_dir=None, # type: Optional[str]
single_node_mode=False, # type: bool
):
"""
:param model_id: Destination id for this model; auto-generated if not specified.
Defaults to ``None``.
:type model_id: Union[None, str, H2OEstimator], optional
:param training_frame: Id of the training data frame.
Defaults to ``None``.
:type training_frame: Union[None, str, H2OFrame], optional
:param start_column: Start Time Column.
Defaults to ``None``.
:type start_column: str, optional
:param stop_column: Stop Time Column.
Defaults to ``None``.
:type stop_column: str, optional
:param response_column: Response variable column.
Defaults to ``None``.
:type response_column: str, optional
:param ignored_columns: Names of columns to ignore for training.
Defaults to ``None``.
:type ignored_columns: List[str], optional
:param weights_column: Column with observation weights. Giving some observation a weight of zero is equivalent
to excluding it from the dataset; giving an observation a relative weight of 2 is equivalent to repeating
that row twice. Negative weights are not allowed. Note: Weights are per-row observation weights and do
not increase the size of the data frame. This is typically the number of times a row is repeated, but
non-integer values are supported as well. During training, rows with higher weights matter more, due to
the larger loss function pre-factor. If you set weight = 0 for a row, the returned prediction frame at
that row is zero and this is incorrect. To get an accurate prediction, remove all rows with weight == 0.
Defaults to ``None``.
:type weights_column: str, optional
:param offset_column: Offset column. This will be added to the combination of columns before applying the link
function.
Defaults to ``None``.
:type offset_column: str, optional
:param stratify_by: List of columns to use for stratification.
Defaults to ``None``.
:type stratify_by: List[str], optional
:param ties: Method for Handling Ties.
Defaults to ``"efron"``.
:type ties: Literal["efron", "breslow"]
:param init: Coefficient starting value.
Defaults to ``0.0``.
:type init: float
:param lre_min: Minimum log-relative error.
Defaults to ``9.0``.
:type lre_min: float
:param max_iterations: Maximum number of iterations.
Defaults to ``20``.
:type max_iterations: int
:param interactions: A list of predictor column indices to interact. All pairwise combinations will be computed
for the list.
Defaults to ``None``.
:type interactions: List[str], optional
:param interaction_pairs: A list of pairwise (first order) column interactions.
Defaults to ``None``.
:type interaction_pairs: List[tuple], optional
:param interactions_only: A list of columns that should only be used to create interactions but should not
itself participate in model training.
Defaults to ``None``.
:type interactions_only: List[str], optional
:param use_all_factor_levels: (Internal. For development only!) Indicates whether to use all factor levels.
Defaults to ``False``.
:type use_all_factor_levels: bool
:param export_checkpoints_dir: Automatically export generated models to this directory.
Defaults to ``None``.
:type export_checkpoints_dir: str, optional
:param single_node_mode: Run on a single node to reduce the effect of network overhead (for smaller datasets)
Defaults to ``False``.
:type single_node_mode: bool
"""
super(H2OCoxProportionalHazardsEstimator, self).__init__()
self._parms = {}
self._id = self._parms['model_id'] = model_id
self.training_frame = training_frame
self.start_column = start_column
self.stop_column = stop_column
self.response_column = response_column
self.ignored_columns = ignored_columns
self.weights_column = weights_column
self.offset_column = offset_column
self.stratify_by = stratify_by
self.ties = ties
self.init = init
self.lre_min = lre_min
self.max_iterations = max_iterations
self.interactions = interactions
self.interaction_pairs = interaction_pairs
self.interactions_only = interactions_only
self.use_all_factor_levels = use_all_factor_levels
self.export_checkpoints_dir = export_checkpoints_dir
self.single_node_mode = single_node_mode
@property
def training_frame(self):
"""
Id of the training data frame.
Type: ``Union[None, str, H2OFrame]``.
:examples:
>>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv")
>>> predictor = "age"
>>> response = "event"
>>> train, valid = heart.split_frame(ratios=[.8])
>>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start",
... stop_column="stop")
>>> heart_coxph.train(x=predictor,
... y=response,
... training_frame=train,
... validation_frame=valid)
>>> heart_coxph.scoring_history()
"""
return self._parms.get("training_frame")
@training_frame.setter
def training_frame(self, training_frame):
self._parms["training_frame"] = H2OFrame._validate(training_frame, 'training_frame')
@property
def start_column(self):
"""
Start Time Column.
Type: ``str``.
:examples:
>>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv")
>>> predictor = "age"
>>> response = "event"
>>> train, valid = heart.split_frame(ratios=[.8])
>>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start",
... stop_column="stop")
>>> heart_coxph.train(x=predictor,
... y=response,
... training_frame=train,
... validation_frame=valid)
>>> heart_coxph.scoring_history()
"""
return self._parms.get("start_column")
@start_column.setter
def start_column(self, start_column):
assert_is_type(start_column, None, str)
self._parms["start_column"] = start_column
@property
def stop_column(self):
"""
Stop Time Column.
Type: ``str``.
:examples:
>>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv")
>>> predictor = "age"
>>> response = "event"
>>> train, valid = heart.split_frame(ratios=[.8])
>>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start",
... stop_column="stop")
>>> heart_coxph.train(x=predictor,
... y=response,
... training_frame=train,
... validation_frame=valid)
>>> heart_coxph.scoring_history()
"""
return self._parms.get("stop_column")
@stop_column.setter
def stop_column(self, stop_column):
assert_is_type(stop_column, None, str)
self._parms["stop_column"] = stop_column
@property
def response_column(self):
"""
Response variable column.
Type: ``str``.
"""
return self._parms.get("response_column")
@response_column.setter
def response_column(self, response_column):
assert_is_type(response_column, None, str)
self._parms["response_column"] = response_column
@property
def ignored_columns(self):
"""
Names of columns to ignore for training.
Type: ``List[str]``.
"""
return self._parms.get("ignored_columns")
@ignored_columns.setter
def ignored_columns(self, ignored_columns):
assert_is_type(ignored_columns, None, [str])
self._parms["ignored_columns"] = ignored_columns
@property
def weights_column(self):
"""
Column with observation weights. Giving some observation a weight of zero is equivalent to excluding it from the
dataset; giving an observation a relative weight of 2 is equivalent to repeating that row twice. Negative
weights are not allowed. Note: Weights are per-row observation weights and do not increase the size of the data
frame. This is typically the number of times a row is repeated, but non-integer values are supported as well.
During training, rows with higher weights matter more, due to the larger loss function pre-factor. If you set
weight = 0 for a row, the returned prediction frame at that row is zero and this is incorrect. To get an
accurate prediction, remove all rows with weight == 0.
Type: ``str``.
"""
return self._parms.get("weights_column")
@weights_column.setter
def weights_column(self, weights_column):
assert_is_type(weights_column, None, str)
self._parms["weights_column"] = weights_column
@property
def offset_column(self):
"""
Offset column. This will be added to the combination of columns before applying the link function.
Type: ``str``.
:examples:
>>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv")
>>> predictor = "age"
>>> response = "event"
>>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start",
... stop_column="stop",
... offset_column="transplant")
>>> heart_coxph.train(x=predictor,
... y=response,
... training_frame=heart)
>>> heart_coxph.scoring_history()
"""
return self._parms.get("offset_column")
@offset_column.setter
def offset_column(self, offset_column):
assert_is_type(offset_column, None, str)
self._parms["offset_column"] = offset_column
@property
def stratify_by(self):
"""
List of columns to use for stratification.
Type: ``List[str]``.
"""
return self._parms.get("stratify_by")
@stratify_by.setter
def stratify_by(self, stratify_by):
assert_is_type(stratify_by, None, [str])
self._parms["stratify_by"] = stratify_by
@property
def ties(self):
"""
Method for Handling Ties.
Type: ``Literal["efron", "breslow"]``, defaults to ``"efron"``.
:examples:
>>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv")
>>> predictor = "age"
>>> response = "event"
>>> train, valid = heart.split_frame(ratios=[.8])
>>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start",
... stop_column="stop",
... ties="breslow")
>>> heart_coxph.train(x=predictor,
... y=response,
... training_frame=train,
... validation_frame=valid)
>>> heart_coxph.scoring_history()
"""
return self._parms.get("ties")
@ties.setter
def ties(self, ties):
assert_is_type(ties, None, Enum("efron", "breslow"))
self._parms["ties"] = ties
@property
def init(self):
"""
Coefficient starting value.
Type: ``float``, defaults to ``0.0``.
:examples:
>>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv")
>>> predictor = "age"
>>> response = "event"
>>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start",
... stop_column="stop",
... init=2.9)
>>> heart_coxph.train(x=predictor,
... y=response,
... training_frame=heart)
>>> heart_coxph.scoring_history()
"""
return self._parms.get("init")
@init.setter
def init(self, init):
assert_is_type(init, None, numeric)
self._parms["init"] = init
@property
def lre_min(self):
"""
Minimum log-relative error.
Type: ``float``, defaults to ``9.0``.
:examples:
>>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv")
>>> predictor = "age"
>>> response = "event"
>>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start",
... stop_column="stop",
... lre_min=5)
>>> heart_coxph.train(x=predictor,
... y=response,
... training_frame=heart)
>>> heart_coxph.scoring_history()
"""
return self._parms.get("lre_min")
@lre_min.setter
def lre_min(self, lre_min):
assert_is_type(lre_min, None, numeric)
self._parms["lre_min"] = lre_min
@property
def max_iterations(self):
"""
Maximum number of iterations.
Type: ``int``, defaults to ``20``.
:examples:
>>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv")
>>> predictor = "age"
>>> response = "event"
>>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start",
... stop_column="stop",
... max_iterations=50)
>>> heart_coxph.train(x=predictor,
... y=response,
... training_frame=heart)
>>> heart_coxph.scoring_history()
"""
return self._parms.get("max_iterations")
@max_iterations.setter
def max_iterations(self, max_iterations):
assert_is_type(max_iterations, None, int)
self._parms["max_iterations"] = max_iterations
@property
def interactions(self):
"""
A list of predictor column indices to interact. All pairwise combinations will be computed for the list.
Type: ``List[str]``.
:examples:
>>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv")
>>> predictor = "age"
>>> response = "event"
>>> interactions = ['start','stop']
>>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start",
... stop_column="stop",
... interactions=interactions)
>>> heart_coxph.train(x=predictor,
... y=response,
... training_frame=heart)
>>> heart_coxph.scoring_history()
"""
return self._parms.get("interactions")
@interactions.setter
def interactions(self, interactions):
assert_is_type(interactions, None, [str])
self._parms["interactions"] = interactions
@property
def interaction_pairs(self):
"""
A list of pairwise (first order) column interactions.
Type: ``List[tuple]``.
:examples:
>>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv")
>>> predictor = "age"
>>> response = "event"
>>> interaction_pairs = [("start","stop")]
>>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start",
... stop_column="stop",
... interaction_pairs=interaction_pairs)
>>> heart_coxph.train(x=predictor,
... y=response,
... training_frame=heart)
>>> heart_coxph.scoring_history()
"""
return self._parms.get("interaction_pairs")
@interaction_pairs.setter
def interaction_pairs(self, interaction_pairs):
assert_is_type(interaction_pairs, None, [tuple])
self._parms["interaction_pairs"] = interaction_pairs
@property
def interactions_only(self):
"""
A list of columns that should only be used to create interactions but should not itself participate in model
training.
Type: ``List[str]``.
:examples:
>>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv")
>>> predictor = "age"
>>> response = "event"
>>> interactions = ['start','stop']
>>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start",
... stop_column="stop",
... interactions_only=interactions)
>>> heart_coxph.train(x=predictor,
... y=response,
... training_frame=heart)
>>> heart_coxph.scoring_history()
"""
return self._parms.get("interactions_only")
@interactions_only.setter
def interactions_only(self, interactions_only):
assert_is_type(interactions_only, None, [str])
self._parms["interactions_only"] = interactions_only
@property
def use_all_factor_levels(self):
"""
(Internal. For development only!) Indicates whether to use all factor levels.
Type: ``bool``, defaults to ``False``.
:examples:
>>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv")
>>> predictor = "age"
>>> response = "event"
>>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start",
... stop_column="stop",
... use_all_factor_levels=True)
>>> heart_coxph.train(x=predictor,
... y=response,
... training_frame=heart)
>>> heart_coxph.scoring_history()
"""
return self._parms.get("use_all_factor_levels")
@use_all_factor_levels.setter
def use_all_factor_levels(self, use_all_factor_levels):
assert_is_type(use_all_factor_levels, None, bool)
self._parms["use_all_factor_levels"] = use_all_factor_levels
@property
def export_checkpoints_dir(self):
"""
Automatically export generated models to this directory.
Type: ``str``.
:examples:
>>> import tempfile
>>> from os import listdir
>>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv")
>>> predictor = "age"
>>> response = "event"
>>> checkpoints_dir = tempfile.mkdtemp()
>>> coxph = H2OCoxProportionalHazardsEstimator(start_column="start",
... stop_column="stop",
... export_checkpoints_dir=checkpoints_dir)
>>> coxph.train(x=predictor,
... y=response,
... training_frame=heart)
>>> len(listdir(checkpoints_dir))
"""
return self._parms.get("export_checkpoints_dir")
@export_checkpoints_dir.setter
def export_checkpoints_dir(self, export_checkpoints_dir):
assert_is_type(export_checkpoints_dir, None, str)
self._parms["export_checkpoints_dir"] = export_checkpoints_dir
@property
def single_node_mode(self):
"""
Run on a single node to reduce the effect of network overhead (for smaller datasets)
Type: ``bool``, defaults to ``False``.
"""
return self._parms.get("single_node_mode")
@single_node_mode.setter
def single_node_mode(self, single_node_mode):
assert_is_type(single_node_mode, None, bool)
self._parms["single_node_mode"] = single_node_mode
@property
def baseline_hazard_frame(self):
if (self._model_json is not None
and self._model_json.get("output", {}).get("baseline_hazard", {}).get("name") is not None):
baseline_hazard_name = self._model_json["output"]["baseline_hazard"]["name"]
return H2OFrame.get_frame(baseline_hazard_name)
@property
def baseline_survival_frame(self):
if (self._model_json is not None
and self._model_json.get("output", {}).get("baseline_survival", {}).get("name") is not None):
baseline_survival_name = self._model_json["output"]["baseline_survival"]["name"]
return H2OFrame.get_frame(baseline_survival_name)