# -*- encoding: utf-8 -*-
"""
H2O TargetEncoder.
:copyright: (c) 2016 H2O.ai
:license:   Apache License Version 2.0 (see LICENSE for details)
"""
from __future__ import absolute_import, division, print_function, unicode_literals
from h2o.expr import ExprNode
from h2o.frame import H2OFrame
from h2o.utils.typechecks import (assert_is_type)
from h2o import get_frame
import warnings
__all__ = ("TargetEncoder", )
[docs]class TargetEncoder(object):
    """
    Deprecated API. Please use H2OTargetencoderEstimator instead.
    This is a main class that provides Python's API to the Java implementation of the target encoding.
    In general target encoding could be applied to three types of problems, namely:
    
         1) Binary classification (supported)
         2) Multi-class classification (not supported yet)
         3) Regression (not supported yet)
    :param List[str]-or-List[int] x: List of categorical column names or indices that we want apply target encoding to.
    :param str-or-int y: the name or column index of the response variable in the data.
    :param str-or-int fold_column: the name or column index of the fold column in the data.
    :param boolean blending_avg: (deprecated) whether to perform blended average. Defaults to TRUE.
    :param boolean blended_avg: whether to perform blended average. Defaults to TRUE.
    :param double inflection_point: parameter for blending. Used to calculate `lambda`. Determines half of the minimal sample size
        for which we completely trust the estimate based on the sample in the particular level of categorical variable. Default value is 10.
    :param double smoothing: parameter for blending. Used to calculate `lambda`. Controls the rate of transition between
        the particular level's posterior probability and the prior probability. For smoothing values approaching infinity it becomes a hard
        threshold between the posterior and the prior probability. Default value is 20.
    :examples:
    >>> targetEncoder = TargetEncoder(x=te_columns, y=responseColumnName, blended_avg=True, inflection_point=10, smoothing=20)
    >>> targetEncoder.fit(trainFrame) 
    >>> encodedTrain = targetEncoder.transform(frame=trainFrame, holdout_type="kfold", seed=1234, is_train_or_valid=True)
    >>> encodedValid = targetEncoder.transform(frame=validFrame, holdout_type="none", noise=0.0, is_train_or_valid=True)
    >>> encodedTest = targetEncoder.transform(frame=testFrame, holdout_type="none", noise=0.0, is_train_or_valid=False)
    """
    #-------------------------------------------------------------------------------------------------------------------
    # Construction
    #-------------------------------------------------------------------------------------------------------------------
    def __init__(self, x=None, y=None, fold_column='', blended_avg=True, inflection_point=10, smoothing=20, **kwargs):
        """
        Deprecated API. Please use H2OTargetencoderEstimator instead.
        
        Creates instance of the TargetEncoder class and setting parameters that will be used in both `train` and `transform` methods.
        """
        if(type(x) == str or type(x) == int):
            x = [x]
        self._teColumns = x
        self._responseColumnName = y
        self._foldColumnName = fold_column
        if 'blending_avg' in kwargs:
            warnings.warn("Parameter blending_avg is deprecated; use blended_avg instead", category=DeprecationWarning, stacklevel=2)
            self._blending = kwargs.get('blending_avg')
        else:
            self._blending = blended_avg
          
        if not inflection_point > 0:
            raise ValueError("Parameter `inflection_point` should be greater than 0")
        
        if not smoothing > 0:
            raise ValueError("Parameter `smoothing` should be greater than 0")
        self._inflectionPoint = inflection_point
        self._smoothing = smoothing
[docs]    def fit(self, frame = None):
        """
        Deprecated API. Please use H2OTargetencoderEstimator instead.
        
        Returns encoding map as an object that maps 'column_name' -> 'frame_with_encoding_map_for_this_column_name'
        :param frame frame: An H2OFrame object with which to create the target encoding map
        :examples:
        >>> targetEncoder = TargetEncoder(x=te_columns, y=responseColumnName, blended_avg=True, inflection_point=10, smoothing=20)
        >>> targetEncoder.fit(trainFrame) 
        """
        self._teColumns = list(map(lambda i: frame.names[i], self._teColumns)) if all(isinstance(n, int) for n in self._teColumns) else self._teColumns
        self._responseColumnName = frame.names[self._responseColumnName] if isinstance(self._responseColumnName, int) else self._responseColumnName
        self._foldColumnName = frame.names[self._foldColumnName] if isinstance(self._foldColumnName, int) else self._foldColumnName
        
        self._encodingMap = ExprNode("target.encoder.fit", frame, self._teColumns, self._responseColumnName,
                                     self._foldColumnName)._eager_map_frame()
        return self._encodingMap 
[docs]    def encoding_map_frames(self):
        return list(map(lambda x: get_frame(x['key']['name']), self._encodingMap.frames))