Source code for h2o.model.dim_reduction

# -*- encoding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
from h2o.utils.shared_utils import can_use_pandas
from h2o.utils.compatibility import *  # NOQA

from .model_base import ModelBase
from .metrics_base import *  # NOQA
import h2o


[docs]class H2ODimReductionModel(ModelBase): """ Dimension reduction model, such as PCA or GLRM. """
[docs] def varimp(self, use_pandas=False): """ Return the Importance of components associcated with a pca model. :param bool use_pandas: If True, then the variable importances will be returned as a pandas data frame. (Default is False.) """ model = self._model_json["output"] if "importance" in list(model.keys()) and model["importance"]: vals = model["importance"].cell_values header = model["importance"].col_header if use_pandas and can_use_pandas(): import pandas return pandas.DataFrame(vals, columns=header) else: return vals else: print("Warning: This model doesn't have importances of components.")
[docs] def num_iterations(self): """Get the number of iterations that it took to converge or reach max iterations.""" o = self._model_json["output"] return o["model_summary"]["number_of_iterations"][0]
[docs] def objective(self): """Get the final value of the objective function.""" o = self._model_json["output"] return o["model_summary"]["final_objective_value"][0]
[docs] def final_step(self): """Get the final step size for the model.""" o = self._model_json["output"] return o["model_summary"]["final_step_size"][0]
[docs] def archetypes(self): """The archetypes (Y) of the GLRM model.""" o = self._model_json["output"] yvals = o["archetypes"].cell_values archetypes = [] for yidx, yval in enumerate(yvals): archetypes.append(list(yvals[yidx])[1:]) return archetypes
[docs] def reconstruct(self, test_data, reverse_transform=False): """ Reconstruct the training data from the model and impute all missing values. :param H2OFrame test_data: The dataset upon which the model was trained. :param bool reverse_transform: Whether the transformation of the training data during model-building should be reversed on the reconstructed frame. :returns: the approximate reconstruction of the training data. """ if test_data is None or test_data.nrow == 0: raise ValueError("Must specify test data") j = h2o.api("POST /3/Predictions/models/%s/frames/%s" % (self.model_id, test_data.frame_id), data={"reconstruct_train": True, "reverse_transform": reverse_transform}) return h2o.get_frame(j["model_metrics"][0]["predictions"]["frame_id"]["name"])
[docs] def proj_archetypes(self, test_data, reverse_transform=False): """ Convert archetypes of the model into original feature space. :param H2OFrame test_data: The dataset upon which the model was trained. :param bool reverse_transform: Whether the transformation of the training data during model-building should be reversed on the projected archetypes. :returns: model archetypes projected back into the original training data's feature space. """ if test_data is None or test_data.nrow == 0: raise ValueError("Must specify test data") j = h2o.api("POST /3/Predictions/models/%s/frames/%s" % (self.model_id, test_data.frame_id), data={"project_archetypes": True, "reverse_transform": reverse_transform}) return h2o.get_frame(j["model_metrics"][0]["predictions"]["frame_id"]["name"])
[docs] def screeplot(self, type="barplot", **kwargs): """ Produce the scree plot. Library ``matplotlib`` is required for this function. :param str type: either ``"barplot"`` or ``"lines"``. """ # check for matplotlib. exit if absent. is_server = kwargs.pop("server") if kwargs: raise ValueError("Unknown arguments %s to screeplot()" % ", ".join(kwargs.keys())) try: import matplotlib if is_server: matplotlib.use('Agg', warn=False) import matplotlib.pyplot as plt except ImportError: print("matplotlib is required for this function!") return variances = [s ** 2 for s in self._model_json['output']['importance'].cell_values[0][1:]] plt.xlabel('Components') plt.ylabel('Variances') plt.title('Scree Plot') plt.xticks(list(range(1, len(variances) + 1))) if type == "barplot": plt.bar(list(range(1, len(variances) + 1)), variances) elif type == "lines": plt.plot(list(range(1, len(variances) + 1)), variances, 'b--') if not is_server: plt.show()