Source code for h2o.estimators.coxph

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
#
# This file is auto-generated by h2o-3/h2o-bindings/bin/gen_python.py
# Copyright 2016 H2O.ai;  Apache License Version 2.0 (see LICENSE for details)
#
from __future__ import absolute_import, division, print_function, unicode_literals

from h2o.estimators.estimator_base import H2OEstimator
from h2o.exceptions import H2OValueError
from h2o.frame import H2OFrame
from h2o.utils.typechecks import assert_is_type, Enum, numeric


[docs]class H2OCoxProportionalHazardsEstimator(H2OEstimator):
    """
    Cox Proportional Hazards

    Trains a Cox Proportional Hazards Model (CoxPH) on an H2O dataset.
    """

    algo = "coxph"
    supervised_learning = True

    def __init__(self,
                 model_id=None,  # type: Optional[Union[None, str, H2OEstimator]]
                 training_frame=None,  # type: Optional[Union[None, str, H2OFrame]]
                 start_column=None,  # type: Optional[str]
                 stop_column=None,  # type: Optional[str]
                 response_column=None,  # type: Optional[str]
                 ignored_columns=None,  # type: Optional[List[str]]
                 weights_column=None,  # type: Optional[str]
                 offset_column=None,  # type: Optional[str]
                 stratify_by=None,  # type: Optional[List[str]]
                 ties="efron",  # type: Literal["efron", "breslow"]
                 init=0.0,  # type: float
                 lre_min=9.0,  # type: float
                 max_iterations=20,  # type: int
                 interactions=None,  # type: Optional[List[str]]
                 interaction_pairs=None,  # type: Optional[List[tuple]]
                 interactions_only=None,  # type: Optional[List[str]]
                 use_all_factor_levels=False,  # type: bool
                 export_checkpoints_dir=None,  # type: Optional[str]
                 single_node_mode=False,  # type: bool
                 ):
        """
        :param model_id: Destination id for this model; auto-generated if not specified.
               Defaults to ``None``.
        :type model_id: Union[None, str, H2OEstimator], optional
        :param training_frame: Id of the training data frame.
               Defaults to ``None``.
        :type training_frame: Union[None, str, H2OFrame], optional
        :param start_column: Start Time Column.
               Defaults to ``None``.
        :type start_column: str, optional
        :param stop_column: Stop Time Column.
               Defaults to ``None``.
        :type stop_column: str, optional
        :param response_column: Response variable column.
               Defaults to ``None``.
        :type response_column: str, optional
        :param ignored_columns: Names of columns to ignore for training.
               Defaults to ``None``.
        :type ignored_columns: List[str], optional
        :param weights_column: Column with observation weights. Giving some observation a weight of zero is equivalent
               to excluding it from the dataset; giving an observation a relative weight of 2 is equivalent to repeating
               that row twice. Negative weights are not allowed. Note: Weights are per-row observation weights and do
               not increase the size of the data frame. This is typically the number of times a row is repeated, but
               non-integer values are supported as well. During training, rows with higher weights matter more, due to
               the larger loss function pre-factor. If you set weight = 0 for a row, the returned prediction frame at
               that row is zero and this is incorrect. To get an accurate prediction, remove all rows with weight == 0.
               Defaults to ``None``.
        :type weights_column: str, optional
        :param offset_column: Offset column. This will be added to the combination of columns before applying the link
               function.
               Defaults to ``None``.
        :type offset_column: str, optional
        :param stratify_by: List of columns to use for stratification.
               Defaults to ``None``.
        :type stratify_by: List[str], optional
        :param ties: Method for Handling Ties.
               Defaults to ``"efron"``.
        :type ties: Literal["efron", "breslow"]
        :param init: Coefficient starting value.
               Defaults to ``0.0``.
        :type init: float
        :param lre_min: Minimum log-relative error.
               Defaults to ``9.0``.
        :type lre_min: float
        :param max_iterations: Maximum number of iterations.
               Defaults to ``20``.
        :type max_iterations: int
        :param interactions: A list of predictor column indices to interact. All pairwise combinations will be computed
               for the list.
               Defaults to ``None``.
        :type interactions: List[str], optional
        :param interaction_pairs: A list of pairwise (first order) column interactions.
               Defaults to ``None``.
        :type interaction_pairs: List[tuple], optional
        :param interactions_only: A list of columns that should only be used to create interactions but should not
               itself participate in model training.
               Defaults to ``None``.
        :type interactions_only: List[str], optional
        :param use_all_factor_levels: (Internal. For development only!) Indicates whether to use all factor levels.
               Defaults to ``False``.
        :type use_all_factor_levels: bool
        :param export_checkpoints_dir: Automatically export generated models to this directory.
               Defaults to ``None``.
        :type export_checkpoints_dir: str, optional
        :param single_node_mode: Run on a single node to reduce the effect of network overhead (for smaller datasets)
               Defaults to ``False``.
        :type single_node_mode: bool
        """
        super(H2OCoxProportionalHazardsEstimator, self).__init__()
        self._parms = {}
        self._id = self._parms['model_id'] = model_id
        self.training_frame = training_frame
        self.start_column = start_column
        self.stop_column = stop_column
        self.response_column = response_column
        self.ignored_columns = ignored_columns
        self.weights_column = weights_column
        self.offset_column = offset_column
        self.stratify_by = stratify_by
        self.ties = ties
        self.init = init
        self.lre_min = lre_min
        self.max_iterations = max_iterations
        self.interactions = interactions
        self.interaction_pairs = interaction_pairs
        self.interactions_only = interactions_only
        self.use_all_factor_levels = use_all_factor_levels
        self.export_checkpoints_dir = export_checkpoints_dir
        self.single_node_mode = single_node_mode

    @property
    def training_frame(self):
        """
        Id of the training data frame.

        Type: ``Union[None, str, H2OFrame]``.

        :examples:

        >>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv")
        >>> predictor = "age"
        >>> response = "event"
        >>> train, valid = heart.split_frame(ratios=[.8])
        >>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start",
        ...                                                  stop_column="stop")
        >>> heart_coxph.train(x=predictor,
        ...                   y=response,
        ...                   training_frame=train,
        ...                   validation_frame=valid)
        >>> heart_coxph.scoring_history()
        """
        return self._parms.get("training_frame")

    @training_frame.setter
    def training_frame(self, training_frame):
        self._parms["training_frame"] = H2OFrame._validate(training_frame, 'training_frame')

    @property
    def start_column(self):
        """
        Start Time Column.

        Type: ``str``.

        :examples:

        >>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv")
        >>> predictor = "age"
        >>> response = "event"
        >>> train, valid = heart.split_frame(ratios=[.8])
        >>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start",
        ...                                                  stop_column="stop")
        >>> heart_coxph.train(x=predictor,
        ...                   y=response,
        ...                   training_frame=train,
        ...                   validation_frame=valid)
        >>> heart_coxph.scoring_history()
        """
        return self._parms.get("start_column")

    @start_column.setter
    def start_column(self, start_column):
        assert_is_type(start_column, None, str)
        self._parms["start_column"] = start_column

    @property
    def stop_column(self):
        """
        Stop Time Column.

        Type: ``str``.

        :examples:

        >>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv")
        >>> predictor = "age"
        >>> response = "event"
        >>> train, valid = heart.split_frame(ratios=[.8])
        >>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start",
        ...                                                  stop_column="stop")
        >>> heart_coxph.train(x=predictor,
        ...                   y=response,
        ...                   training_frame=train,
        ...                   validation_frame=valid)
        >>> heart_coxph.scoring_history()
        """
        return self._parms.get("stop_column")

    @stop_column.setter
    def stop_column(self, stop_column):
        assert_is_type(stop_column, None, str)
        self._parms["stop_column"] = stop_column

    @property
    def response_column(self):
        """
        Response variable column.

        Type: ``str``.
        """
        return self._parms.get("response_column")

    @response_column.setter
    def response_column(self, response_column):
        assert_is_type(response_column, None, str)
        self._parms["response_column"] = response_column

    @property
    def ignored_columns(self):
        """
        Names of columns to ignore for training.

        Type: ``List[str]``.
        """
        return self._parms.get("ignored_columns")

    @ignored_columns.setter
    def ignored_columns(self, ignored_columns):
        assert_is_type(ignored_columns, None, [str])
        self._parms["ignored_columns"] = ignored_columns

    @property
    def weights_column(self):
        """
        Column with observation weights. Giving some observation a weight of zero is equivalent to excluding it from the
        dataset; giving an observation a relative weight of 2 is equivalent to repeating that row twice. Negative
        weights are not allowed. Note: Weights are per-row observation weights and do not increase the size of the data
        frame. This is typically the number of times a row is repeated, but non-integer values are supported as well.
        During training, rows with higher weights matter more, due to the larger loss function pre-factor. If you set
        weight = 0 for a row, the returned prediction frame at that row is zero and this is incorrect. To get an
        accurate prediction, remove all rows with weight == 0.

        Type: ``str``.
        """
        return self._parms.get("weights_column")

    @weights_column.setter
    def weights_column(self, weights_column):
        assert_is_type(weights_column, None, str)
        self._parms["weights_column"] = weights_column

    @property
    def offset_column(self):
        """
        Offset column. This will be added to the combination of columns before applying the link function.

        Type: ``str``.

        :examples:

        >>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv")
        >>> predictor = "age"
        >>> response = "event"
        >>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start",
        ...                                                  stop_column="stop",
        ...                                                  offset_column="transplant")
        >>> heart_coxph.train(x=predictor,
        ...                   y=response,
        ...                   training_frame=heart)
        >>> heart_coxph.scoring_history()
        """
        return self._parms.get("offset_column")

    @offset_column.setter
    def offset_column(self, offset_column):
        assert_is_type(offset_column, None, str)
        self._parms["offset_column"] = offset_column

    @property
    def stratify_by(self):
        """
        List of columns to use for stratification.

        Type: ``List[str]``.
        """
        return self._parms.get("stratify_by")

    @stratify_by.setter
    def stratify_by(self, stratify_by):
        assert_is_type(stratify_by, None, [str])
        self._parms["stratify_by"] = stratify_by

    @property
    def ties(self):
        """
        Method for Handling Ties.

        Type: ``Literal["efron", "breslow"]``, defaults to ``"efron"``.

        :examples:

        >>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv")
        >>> predictor = "age"
        >>> response = "event"
        >>> train, valid = heart.split_frame(ratios=[.8])
        >>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start",
        ...                                                  stop_column="stop",
        ...                                                  ties="breslow")
        >>> heart_coxph.train(x=predictor,
        ...                   y=response,
        ...                   training_frame=train,
        ...                   validation_frame=valid)
        >>> heart_coxph.scoring_history()
        """
        return self._parms.get("ties")

    @ties.setter
    def ties(self, ties):
        assert_is_type(ties, None, Enum("efron", "breslow"))
        self._parms["ties"] = ties

    @property
    def init(self):
        """
        Coefficient starting value.

        Type: ``float``, defaults to ``0.0``.

        :examples:

        >>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv")
        >>> predictor = "age"
        >>> response = "event"
        >>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start",
        ...                                                  stop_column="stop",
        ...                                                  init=2.9)
        >>> heart_coxph.train(x=predictor,
        ...                   y=response,
        ...                   training_frame=heart)
        >>> heart_coxph.scoring_history()
        """
        return self._parms.get("init")

    @init.setter
    def init(self, init):
        assert_is_type(init, None, numeric)
        self._parms["init"] = init

    @property
    def lre_min(self):
        """
        Minimum log-relative error.

        Type: ``float``, defaults to ``9.0``.

        :examples:

        >>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv")
        >>> predictor = "age"
        >>> response = "event"
        >>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start",
        ...                                                  stop_column="stop",
        ...                                                  lre_min=5)
        >>> heart_coxph.train(x=predictor,
        ...                   y=response,
        ...                   training_frame=heart)
        >>> heart_coxph.scoring_history()
        """
        return self._parms.get("lre_min")

    @lre_min.setter
    def lre_min(self, lre_min):
        assert_is_type(lre_min, None, numeric)
        self._parms["lre_min"] = lre_min

    @property
    def max_iterations(self):
        """
        Maximum number of iterations.

        Type: ``int``, defaults to ``20``.

        :examples:

        >>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv")
        >>> predictor = "age"
        >>> response = "event"
        >>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start",
        ...                                                  stop_column="stop",
        ...                                                  max_iterations=50)
        >>> heart_coxph.train(x=predictor,
        ...                   y=response,
        ...                   training_frame=heart)
        >>> heart_coxph.scoring_history()
        """
        return self._parms.get("max_iterations")

    @max_iterations.setter
    def max_iterations(self, max_iterations):
        assert_is_type(max_iterations, None, int)
        self._parms["max_iterations"] = max_iterations

    @property
    def interactions(self):
        """
        A list of predictor column indices to interact. All pairwise combinations will be computed for the list.

        Type: ``List[str]``.

        :examples:

        >>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv")
        >>> predictor = "age"
        >>> response = "event"
        >>> interactions = ['start','stop']
        >>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start",
        ...                                                  stop_column="stop",
        ...                                                  interactions=interactions)
        >>> heart_coxph.train(x=predictor,
        ...                   y=response,
        ...                   training_frame=heart)
        >>> heart_coxph.scoring_history()
        """
        return self._parms.get("interactions")

    @interactions.setter
    def interactions(self, interactions):
        assert_is_type(interactions, None, [str])
        self._parms["interactions"] = interactions

    @property
    def interaction_pairs(self):
        """
        A list of pairwise (first order) column interactions.

        Type: ``List[tuple]``.

        :examples:

        >>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv")
        >>> predictor = "age"
        >>> response = "event"
        >>> interaction_pairs = [("start","stop")]
        >>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start",
        ...                                                  stop_column="stop",
        ...                                                  interaction_pairs=interaction_pairs)
        >>> heart_coxph.train(x=predictor,
        ...                   y=response,
        ...                   training_frame=heart)
        >>> heart_coxph.scoring_history()
        """
        return self._parms.get("interaction_pairs")

    @interaction_pairs.setter
    def interaction_pairs(self, interaction_pairs):
        assert_is_type(interaction_pairs, None, [tuple])
        self._parms["interaction_pairs"] = interaction_pairs

    @property
    def interactions_only(self):
        """
        A list of columns that should only be used to create interactions but should not itself participate in model
        training.

        Type: ``List[str]``.

        :examples:

        >>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv")
        >>> predictor = "age"
        >>> response = "event"
        >>> interactions = ['start','stop']
        >>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start",
        ...                                                  stop_column="stop",
        ...                                                  interactions_only=interactions)
        >>> heart_coxph.train(x=predictor,
        ...                   y=response,
        ...                   training_frame=heart)
        >>> heart_coxph.scoring_history()
        """
        return self._parms.get("interactions_only")

    @interactions_only.setter
    def interactions_only(self, interactions_only):
        assert_is_type(interactions_only, None, [str])
        self._parms["interactions_only"] = interactions_only

    @property
    def use_all_factor_levels(self):
        """
        (Internal. For development only!) Indicates whether to use all factor levels.

        Type: ``bool``, defaults to ``False``.

        :examples:

        >>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv")
        >>> predictor = "age"
        >>> response = "event"
        >>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start",
        ...                                                  stop_column="stop",
        ...                                                  use_all_factor_levels=True)
        >>> heart_coxph.train(x=predictor,
        ...                   y=response,
        ...                   training_frame=heart)
        >>> heart_coxph.scoring_history()
        """
        return self._parms.get("use_all_factor_levels")

    @use_all_factor_levels.setter
    def use_all_factor_levels(self, use_all_factor_levels):
        assert_is_type(use_all_factor_levels, None, bool)
        self._parms["use_all_factor_levels"] = use_all_factor_levels

    @property
    def export_checkpoints_dir(self):
        """
        Automatically export generated models to this directory.

        Type: ``str``.

        :examples:

        >>> import tempfile
        >>> from os import listdir
        >>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv")
        >>> predictor = "age"
        >>> response = "event"
        >>> checkpoints_dir = tempfile.mkdtemp()
        >>> coxph = H2OCoxProportionalHazardsEstimator(start_column="start",
        ...                                            stop_column="stop",
        ...                                            export_checkpoints_dir=checkpoints_dir)
        >>> coxph.train(x=predictor,
        ...             y=response,
        ...             training_frame=heart)
        >>> len(listdir(checkpoints_dir))
        """
        return self._parms.get("export_checkpoints_dir")

    @export_checkpoints_dir.setter
    def export_checkpoints_dir(self, export_checkpoints_dir):
        assert_is_type(export_checkpoints_dir, None, str)
        self._parms["export_checkpoints_dir"] = export_checkpoints_dir

    @property
    def single_node_mode(self):
        """
        Run on a single node to reduce the effect of network overhead (for smaller datasets)

        Type: ``bool``, defaults to ``False``.
        """
        return self._parms.get("single_node_mode")

    @single_node_mode.setter
    def single_node_mode(self, single_node_mode):
        assert_is_type(single_node_mode, None, bool)
        self._parms["single_node_mode"] = single_node_mode


    @property
    def baseline_hazard_frame(self):
        if (self._model_json is not None
                and self._model_json.get("output", {}).get("baseline_hazard", {}).get("name") is not None):
            baseline_hazard_name = self._model_json["output"]["baseline_hazard"]["name"]
            return H2OFrame.get_frame(baseline_hazard_name)

    @property
    def baseline_survival_frame(self):
        if (self._model_json is not None
                and self._model_json.get("output", {}).get("baseline_survival", {}).get("name") is not None):
            baseline_survival_name = self._model_json["output"]["baseline_survival"]["name"]
            return H2OFrame.get_frame(baseline_survival_name)