Source code for h2o.assembly

# -*- encoding: utf-8 -*-
from __future__ import division, print_function, absolute_import, unicode_literals

import uuid

import h2o
from h2o.frame import H2OFrame
from h2o.utils.compatibility import *  # NOQA
from h2o.utils.shared_utils import urlopen, quoted


[docs]class H2OAssembly(object):
    """
    Extension class of Pipeline implementing additional methods:

      - to_pojo: Exports the assembly to a self-contained Java POJO used in a per-row, high-throughput environment.
      - union: Combine two H2OAssembly objects, the resulting row from each H2OAssembly are joined with simple
        concatenation.
    """

    # static properties pointing to H2OFrame methods
    divide = H2OFrame.__truediv__
    plus = H2OFrame.__add__
    multiply = H2OFrame.__mul__
    minus = H2OFrame.__sub__
    less_than = H2OFrame.__lt__
    less_than_equal = H2OFrame.__le__
    equal_equal = H2OFrame.__eq__
    not_equal = H2OFrame.__ne__
    greater_than = H2OFrame.__gt__
    greater_than_equal = H2OFrame.__ge__


    def __init__(self, steps):
        """
        Build a new H2OAssembly.

        :param steps: A list of steps that sequentially transforms the input data.

        :returns: H2OFrame
        """
        self.id = None
        self.steps = steps
        self.fuzed = []
        self.in_colnames = None
        self.out_colnames = None


    @property
    def names(self):
        return list(zip(*self.steps))[0][:-1]


[docs]    def to_pojo(self, pojo_name="", path="", get_jar=True):
        if pojo_name == "": pojo_name = "AssemblyPOJO_" + str(uuid.uuid4())
        java = h2o.api("GET /99/Assembly.java/%s/%s" % (self.id, pojo_name))
        file_path = path + "/" + pojo_name + ".java"
        if path == "":
            print(java)
        else:
            with open(file_path, 'w', encoding="utf-8") as f:
                f.write(java)  # this had better be utf-8 ?
        if get_jar and path != "":
            url = h2o.connection().make_url("h2o-genmodel.jar")
            filename = path + "/" + "h2o-genmodel.jar"
            response = urlopen()(url)
            with open(filename, "wb") as f:
                f.write(response.read())


    # def union(self, assemblies):
    #   # fuse the assemblies onto this one, each is added to the end going left -> right
    #   # assemblies must be a list of namedtuples.
    #   #   [(H2OAssembly, X, y, {params}), ..., (H2OAssembly, X, y, {params})]
    #   for i in assemblies:
    #     if not isinstance(i, namedtuple):
    #       raise ValueError("Not a namedtuple. Assembly must be of type collections.namedtuple with fields [assembly, x, params].")
    #     if i._fields != ('assembly','x','params'):
    #       raise ValueError("Assembly must be a namedtuple with fields ('assembly', 'x', 'params').")
    #     self.fuzed.append(i)


[docs]    def fit(self, fr, **fit_params):
        res = []
        for step in self.steps:
            res.append(step[1].to_rest(step[0]))
        res = "[" + ",".join([quoted(r.replace('"', "'")) for r in res]) + "]"
        j = h2o.api("POST /99/Assembly", data={"steps": res, "frame": fr.frame_id})
        self.id = j["assembly"]["name"]
        return H2OFrame.get_frame(j["result"]["name"])




class H2OCol(object):
    """
    Wrapper class for H2OBinaryOp step's left/right args.

    Use if you want to signal that a column actually comes from the train to be fitted on.
    """

    def __init__(self, column):
        self.col = column

        # TODO: handle arbitrary (non H2OFrame) inputs -- sql, web, file, generated