Source code for h2o.estimators.coxph

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
#
# This file is auto-generated by h2o-3/h2o-bindings/bin/gen_python.py
# Copyright 2016 H2O.ai;  Apache License Version 2.0 (see LICENSE for details)
#
from __future__ import absolute_import, division, print_function, unicode_literals

from h2o.estimators.estimator_base import H2OEstimator
from h2o.exceptions import H2OValueError
from h2o.frame import H2OFrame
from h2o.utils.typechecks import assert_is_type, Enum, numeric


[docs]class H2OCoxProportionalHazardsEstimator(H2OEstimator):
    """
    Cox Proportional Hazards

    Trains a Cox Proportional Hazards Model (CoxPH) on an H2O dataset.
    """

    algo = "coxph"
    param_names = {"model_id", "training_frame", "start_column", "stop_column", "response_column", "ignored_columns",
                   "weights_column", "offset_column", "stratify_by", "ties", "init", "lre_min", "max_iterations",
                   "interactions", "interaction_pairs", "interactions_only", "use_all_factor_levels",
                   "export_checkpoints_dir", "single_node_mode"}

    def __init__(self, **kwargs):
        super(H2OCoxProportionalHazardsEstimator, self).__init__()
        self._parms = {}
        for pname, pvalue in kwargs.items():
            if pname == 'model_id':
                self._id = pvalue
                self._parms["model_id"] = pvalue
            elif pname in self.param_names:
                # Using setattr(...) will invoke type-checking of the arguments
                setattr(self, pname, pvalue)
            else:
                raise H2OValueError("Unknown parameter %s = %r" % (pname, pvalue))

    @property
    def training_frame(self):
        """
        Id of the training data frame.

        Type: ``H2OFrame``.

        :examples:

        >>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv")
        >>> predictor = "age"
        >>> response = "event"
        >>> train, valid = heart.split_frame(ratios=[.8])
        >>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start",
        ...                                                  stop_column="stop")
        >>> heart_coxph.train(x=predictor,
        ...                   y=response,
        ...                   training_frame=train,
        ...                   validation_frame=valid)
        >>> heart_coxph.scoring_history()
        """
        return self._parms.get("training_frame")

    @training_frame.setter
    def training_frame(self, training_frame):
        self._parms["training_frame"] = H2OFrame._validate(training_frame, 'training_frame')


    @property
    def start_column(self):
        """
        Start Time Column.

        Type: ``str``.

        :examples:

        >>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv")
        >>> predictor = "age"
        >>> response = "event"
        >>> train, valid = heart.split_frame(ratios=[.8])
        >>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start",
        ...                                                  stop_column="stop")
        >>> heart_coxph.train(x=predictor,
        ...                   y=response,
        ...                   training_frame=train,
        ...                   validation_frame=valid)
        >>> heart_coxph.scoring_history()
        """
        return self._parms.get("start_column")

    @start_column.setter
    def start_column(self, start_column):
        assert_is_type(start_column, None, str)
        self._parms["start_column"] = start_column


    @property
    def stop_column(self):
        """
        Stop Time Column.

        Type: ``str``.

        :examples:

        >>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv")
        >>> predictor = "age"
        >>> response = "event"
        >>> train, valid = heart.split_frame(ratios=[.8])
        >>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start",
        ...                                                  stop_column="stop")
        >>> heart_coxph.train(x=predictor,
        ...                   y=response,
        ...                   training_frame=train,
        ...                   validation_frame=valid)
        >>> heart_coxph.scoring_history()
        """
        return self._parms.get("stop_column")

    @stop_column.setter
    def stop_column(self, stop_column):
        assert_is_type(stop_column, None, str)
        self._parms["stop_column"] = stop_column


    @property
    def response_column(self):
        """
        Response variable column.

        Type: ``str``.
        """
        return self._parms.get("response_column")

    @response_column.setter
    def response_column(self, response_column):
        assert_is_type(response_column, None, str)
        self._parms["response_column"] = response_column


    @property
    def ignored_columns(self):
        """
        Names of columns to ignore for training.

        Type: ``List[str]``.
        """
        return self._parms.get("ignored_columns")

    @ignored_columns.setter
    def ignored_columns(self, ignored_columns):
        assert_is_type(ignored_columns, None, [str])
        self._parms["ignored_columns"] = ignored_columns


    @property
    def weights_column(self):
        """
        Column with observation weights. Giving some observation a weight of zero is equivalent to excluding it from the
        dataset; giving an observation a relative weight of 2 is equivalent to repeating that row twice. Negative
        weights are not allowed. Note: Weights are per-row observation weights and do not increase the size of the data
        frame. This is typically the number of times a row is repeated, but non-integer values are supported as well.
        During training, rows with higher weights matter more, due to the larger loss function pre-factor.

        Type: ``str``.
        """
        return self._parms.get("weights_column")

    @weights_column.setter
    def weights_column(self, weights_column):
        assert_is_type(weights_column, None, str)
        self._parms["weights_column"] = weights_column


    @property
    def offset_column(self):
        """
        Offset column. This will be added to the combination of columns before applying the link function.

        Type: ``str``.

        :examples:

        >>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv")
        >>> predictor = "age"
        >>> response = "event"
        >>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start",
        ...                                                  stop_column="stop",
        ...                                                  offset_column="transplant")
        >>> heart_coxph.train(x=predictor,
        ...                   y=response,
        ...                   training_frame=heart)
        >>> heart_coxph.scoring_history()
        """
        return self._parms.get("offset_column")

    @offset_column.setter
    def offset_column(self, offset_column):
        assert_is_type(offset_column, None, str)
        self._parms["offset_column"] = offset_column


    @property
    def stratify_by(self):
        """
        List of columns to use for stratification.

        Type: ``List[str]``.
        """
        return self._parms.get("stratify_by")

    @stratify_by.setter
    def stratify_by(self, stratify_by):
        assert_is_type(stratify_by, None, [str])
        self._parms["stratify_by"] = stratify_by


    @property
    def ties(self):
        """
        Method for Handling Ties.

        One of: ``"efron"``, ``"breslow"``  (default: ``"efron"``).

        :examples:

        >>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv")
        >>> predictor = "age"
        >>> response = "event"
        >>> train, valid = heart.split_frame(ratios=[.8])
        >>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start",
        ...                                                  stop_column="stop",
        ...                                                  ties="breslow")
        >>> heart_coxph.train(x=predictor,
        ...                   y=response,
        ...                   training_frame=train,
        ...                   validation_frame=valid)
        >>> heart_coxph.scoring_history()
        """
        return self._parms.get("ties")

    @ties.setter
    def ties(self, ties):
        assert_is_type(ties, None, Enum("efron", "breslow"))
        self._parms["ties"] = ties


    @property
    def init(self):
        """
        Coefficient starting value.

        Type: ``float``  (default: ``0``).

        :examples:

        >>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv")
        >>> predictor = "age"
        >>> response = "event"
        >>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start",
        ...                                                  stop_column="stop",
        ...                                                  init=2.9)
        >>> heart_coxph.train(x=predictor,
        ...                   y=response,
        ...                   training_frame=heart)
        >>> heart_coxph.scoring_history()
        """
        return self._parms.get("init")

    @init.setter
    def init(self, init):
        assert_is_type(init, None, numeric)
        self._parms["init"] = init


    @property
    def lre_min(self):
        """
        Minimum log-relative error.

        Type: ``float``  (default: ``9``).

        :examples:

        >>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv")
        >>> predictor = "age"
        >>> response = "event"
        >>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start",
        ...                                                  stop_column="stop",
        ...                                                  lre_min=5)
        >>> heart_coxph.train(x=predictor,
        ...                   y=response,
        ...                   training_frame=heart)
        >>> heart_coxph.scoring_history()
        """
        return self._parms.get("lre_min")

    @lre_min.setter
    def lre_min(self, lre_min):
        assert_is_type(lre_min, None, numeric)
        self._parms["lre_min"] = lre_min


    @property
    def max_iterations(self):
        """
        Maximum number of iterations.

        Type: ``int``  (default: ``20``).

        :examples:

        >>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv")
        >>> predictor = "age"
        >>> response = "event"
        >>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start",
        ...                                                  stop_column="stop",
        ...                                                  max_iterations=50)
        >>> heart_coxph.train(x=predictor,
        ...                   y=response,
        ...                   training_frame=heart)
        >>> heart_coxph.scoring_history()
        """
        return self._parms.get("max_iterations")

    @max_iterations.setter
    def max_iterations(self, max_iterations):
        assert_is_type(max_iterations, None, int)
        self._parms["max_iterations"] = max_iterations


    @property
    def interactions(self):
        """
        A list of predictor column indices to interact. All pairwise combinations will be computed for the list.

        Type: ``List[str]``.

        :examples:

        >>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv")
        >>> predictor = "age"
        >>> response = "event"
        >>> interactions = ['start','stop']
        >>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start",
        ...                                                  stop_column="stop",
        ...                                                  interactions=interactions)
        >>> heart_coxph.train(x=predictor,
        ...                   y=response,
        ...                   training_frame=heart)
        >>> heart_coxph.scoring_history()
        """
        return self._parms.get("interactions")

    @interactions.setter
    def interactions(self, interactions):
        assert_is_type(interactions, None, [str])
        self._parms["interactions"] = interactions


    @property
    def interaction_pairs(self):
        """
        A list of pairwise (first order) column interactions.

        Type: ``List[tuple]``.

        :examples:

        >>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv")
        >>> predictor = "age"
        >>> response = "event"
        >>> interaction_pairs = [("start","stop")]
        >>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start",
        ...                                                  stop_column="stop",
        ...                                                  interaction_pairs=interaction_pairs)
        >>> heart_coxph.train(x=predictor,
        ...                   y=response,
        ...                   training_frame=heart)
        >>> heart_coxph.scoring_history()
        """
        return self._parms.get("interaction_pairs")

    @interaction_pairs.setter
    def interaction_pairs(self, interaction_pairs):
        assert_is_type(interaction_pairs, None, [tuple])
        self._parms["interaction_pairs"] = interaction_pairs


    @property
    def interactions_only(self):
        """
        A list of columns that should only be used to create interactions but should not itself participate in model
        training.

        Type: ``List[str]``.

        :examples:

        >>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv")
        >>> predictor = "age"
        >>> response = "event"
        >>> interactions = ['start','stop']
        >>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start",
        ...                                                  stop_column="stop",
        ...                                                  interactions_only=interactions)
        >>> heart_coxph.train(x=predictor,
        ...                   y=response,
        ...                   training_frame=heart)
        >>> heart_coxph.scoring_history()
        """
        return self._parms.get("interactions_only")

    @interactions_only.setter
    def interactions_only(self, interactions_only):
        assert_is_type(interactions_only, None, [str])
        self._parms["interactions_only"] = interactions_only


    @property
    def use_all_factor_levels(self):
        """
        (Internal. For development only!) Indicates whether to use all factor levels.

        Type: ``bool``  (default: ``False``).

        :examples:

        >>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv")
        >>> predictor = "age"
        >>> response = "event"
        >>> heart_coxph = H2OCoxProportionalHazardsEstimator(start_column="start",
        ...                                                  stop_column="stop",
        ...                                                  use_all_factor_levels=True)
        >>> heart_coxph.train(x=predictor,
        ...                   y=response,
        ...                   training_frame=heart)
        >>> heart_coxph.scoring_history()
        """
        return self._parms.get("use_all_factor_levels")

    @use_all_factor_levels.setter
    def use_all_factor_levels(self, use_all_factor_levels):
        assert_is_type(use_all_factor_levels, None, bool)
        self._parms["use_all_factor_levels"] = use_all_factor_levels


    @property
    def export_checkpoints_dir(self):
        """
        Automatically export generated models to this directory.

        Type: ``str``.

        :examples:

        >>> import tempfile
        >>> from os import listdir
        >>> heart = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/coxph_test/heart.csv")
        >>> predictor = "age"
        >>> response = "event"
        >>> checkpoints_dir = tempfile.mkdtemp()
        >>> coxph = H2OCoxProportionalHazardsEstimator(start_column="start",
        ...                                            stop_column="stop",
        ...                                            export_checkpoints_dir=checkpoints_dir)
        >>> coxph.train(x=predictor,
        ...             y=response,
        ...             training_frame=heart)
        >>> len(listdir(checkpoints_dir))
        """
        return self._parms.get("export_checkpoints_dir")

    @export_checkpoints_dir.setter
    def export_checkpoints_dir(self, export_checkpoints_dir):
        assert_is_type(export_checkpoints_dir, None, str)
        self._parms["export_checkpoints_dir"] = export_checkpoints_dir


    @property
    def single_node_mode(self):
        """
        Run on a single node to reduce the effect of network overhead (for smaller datasets)

        Type: ``bool``  (default: ``False``).
        """
        return self._parms.get("single_node_mode")

    @single_node_mode.setter
    def single_node_mode(self, single_node_mode):
        assert_is_type(single_node_mode, None, bool)
        self._parms["single_node_mode"] = single_node_mode


    def _additional_used_columns(self, parms):
        """
        :return: Start and stop column if specified.
        """
        result = []
        for col in ["start_column", "stop_column"]:
            if col in parms and parms[col] is not None:
                result.append(parms[col])
        return result