public abstract class GenModel extends java.lang.Object implements IGenModel, IGeneratedModel, java.io.Serializable
Modifier and Type | Field and Description |
---|---|
java.lang.String[][] |
_domains
Categorical (factor/enum) mappings, per column.
|
java.lang.String[] |
_names
Column names; last is response for supervised models
|
java.lang.String |
_offsetColumn
Name of the column with offsets (used for certain types of models).
|
Constructor and Description |
---|
GenModel(java.lang.String[] names,
java.lang.String[][] domains) |
Modifier and Type | Method and Description |
---|---|
static boolean |
bitSetContains(byte[] bits,
int nbits,
int bitoff,
double dnum) |
static boolean |
bitSetIsInRange(int nbits,
int bitoff,
double dnum) |
boolean |
calibrateClassProbabilities(double[] preds)
Subclasses implement calibration of class probabilities.
|
static double[] |
correctProbabilities(double[] scored,
double[] priorClassDist,
double[] modelClassDist)
Correct a given list of class probabilities produced as a prediction by a model back to prior class distribution
|
static java.lang.String |
createAuxKey(java.lang.String k) |
static void |
GBM_rescale(double[] preds) |
int |
getColIdx(java.lang.String name)
Returns index of a column with given name, or -1 if the column is not found.
|
java.lang.String[][] |
getDomainValues()
Returns domain values for all columns, including the response column.
|
java.lang.String[] |
getDomainValues(int i)
Returns domain values for the i-th column.
|
java.lang.String[] |
getDomainValues(java.lang.String name)
Gets domain of the given column.
|
java.lang.String |
getHeader()
???
|
java.util.EnumSet<ModelCategory> |
getModelCategories()
Override this for models that may produce results in different categories.
|
abstract ModelCategory |
getModelCategory()
Returns this model category.
|
java.lang.String[] |
getNames()
The names of all columns used, including response and offset columns.
|
int |
getNumClasses(int colIdx)
Get number of classes in the given column.
|
int |
getNumCols()
Returns number of columns used as input for training (i.e., exclude response and offset columns).
|
int |
getNumResponseClasses()
Return a number of classes in response column.
|
static int |
getPrediction(double[] preds,
double[] priorClassDist,
double[] data,
double threshold)
Utility function to get a best prediction from an array of class
prediction distribution.
|
int |
getPredsSize()
Returns the expected size of preds array which is passed to `predict(double[], double[])` function.
|
int |
getPredsSize(ModelCategory mc) |
int |
getResponseIdx()
Returns the index of the response column inside getDomains().
|
java.lang.String |
getResponseName()
The name of the response column.
|
abstract java.lang.String |
getUUID()
Returns model's unique identifier.
|
static double |
GLM_identityInv(double x) |
static double |
GLM_inverseInv(double x) |
static double |
GLM_logInv(double x) |
static double |
GLM_logitInv(double x) |
static double |
GLM_tweedieInv(double x,
double tweedie_link_power) |
static void |
img2pixels(java.awt.image.BufferedImage img,
int w,
int h,
int channels,
float[] pixels,
int start,
float[] mean) |
boolean |
isAutoEncoder()
Returns true if this model represents an AutoEncoder.
|
boolean |
isClassifier()
Returns true if this model represents a classifier, else it is used for regression.
|
boolean |
isSupervised()
Returns true for supervised models.
|
static int |
KMeans_closest(double[][] centers,
double[] point,
java.lang.String[][] domains) |
static double |
KMeans_distance(double[] center,
double[] point,
java.lang.String[][] domains) |
static double |
KMeans_distance(double[] center,
float[] point,
int[] modes,
double[] colSum,
double[] colSumSq) |
static void |
Kmeans_preprocessData(double[] data,
double[] means,
double[] mults,
int[] modes) |
static double |
Kmeans_preprocessData(double d,
int i,
double[] means,
double[] mults,
int[] modes) |
static double[] |
KMeans_simplex(double[][] centers,
double[] point,
java.lang.String[][] domains) |
static double |
log_rescale(double[] preds) |
int |
mapEnum(int colIdx,
java.lang.String enumValue)
Maps given column's categorical to the integer used by this model (returns -1 if mapping not found).
|
int |
nclasses()
Returns number of output classes for classifiers, 1 for regression models, and 0 for unsupervised models.
|
int |
nfeatures()
Returns number of input features.
|
abstract double[] |
score0(double[] row,
double[] preds)
Subclasses implement the scoring logic.
|
double[] |
score0(double[] row,
double offset,
double[] preds) |
static void |
setInput(double[] from,
float[] to,
int _nums,
int _cats,
int[] _catOffsets,
double[] _normMul,
double[] _normSub,
boolean useAllFactorLevels,
boolean replaceMissingWithZero) |
public final java.lang.String[] _names
public final java.lang.String[][] _domains
public java.lang.String _offsetColumn
public boolean isSupervised()
isSupervised
in interface IGenModel
public int nfeatures()
public int nclasses()
public abstract ModelCategory getModelCategory()
getModelCategory
in interface IGenModel
ModelCategory
public java.util.EnumSet<ModelCategory> getModelCategories()
getModelCategories
in interface IGenModel
public abstract java.lang.String getUUID()
IGeneratedModel
getUUID
in interface IGeneratedModel
public int getNumCols()
getNumCols
in interface IGeneratedModel
public java.lang.String[] getNames()
getNames
in interface IGeneratedModel
public java.lang.String getResponseName()
getResponseName
in interface IGeneratedModel
public int getResponseIdx()
getResponseIdx
in interface IGeneratedModel
public int getNumClasses(int colIdx)
getNumClasses
in interface IGeneratedModel
public int getNumResponseClasses()
getNumResponseClasses
in interface IGeneratedModel
public boolean isClassifier()
isClassifier
in interface IGeneratedModel
public boolean isAutoEncoder()
isAutoEncoder
in interface IGeneratedModel
public java.lang.String[] getDomainValues(java.lang.String name)
getDomainValues
in interface IGeneratedModel
name
- column namepublic java.lang.String[] getDomainValues(int i)
getDomainValues
in interface IGeneratedModel
i
- index of columnpublic java.lang.String[][] getDomainValues()
getDomainValues
in interface IGeneratedModel
public int getColIdx(java.lang.String name)
getColIdx
in interface IGeneratedModel
public int mapEnum(int colIdx, java.lang.String enumValue)
mapEnum
in interface IGeneratedModel
public int getPredsSize()
getPredsSize
in interface IGeneratedModel
public int getPredsSize(ModelCategory mc)
public static java.lang.String createAuxKey(java.lang.String k)
public abstract double[] score0(double[] row, double[] preds)
public double[] score0(double[] row, double offset, double[] preds)
public boolean calibrateClassProbabilities(double[] preds)
public static double[] correctProbabilities(double[] scored, double[] priorClassDist, double[] modelClassDist)
The implementation is based on Eq. (27) in the paper.
scored
- list of class probabilities beginning at index 1priorClassDist
- original class distributionmodelClassDist
- class distribution used for model building (e.g., data was oversampled)public static int getPrediction(double[] preds, double[] priorClassDist, double[] data, double threshold)
preds
- an array of prediction distribution. Length of arrays is equal to a number of classes+1.priorClassDist
- prior class probabilities (used to break ties)data
- Test datathreshold
- threshold for binary classifierpublic static boolean bitSetContains(byte[] bits, int nbits, int bitoff, double dnum)
public static boolean bitSetIsInRange(int nbits, int bitoff, double dnum)
public static void Kmeans_preprocessData(double[] data, double[] means, double[] mults, int[] modes)
public static double Kmeans_preprocessData(double d, int i, double[] means, double[] mults, int[] modes)
public static int KMeans_closest(double[][] centers, double[] point, java.lang.String[][] domains)
public static double[] KMeans_simplex(double[][] centers, double[] point, java.lang.String[][] domains)
public static double KMeans_distance(double[] center, float[] point, int[] modes, double[] colSum, double[] colSumSq)
public static double KMeans_distance(double[] center, double[] point, java.lang.String[][] domains)
public static double log_rescale(double[] preds)
public static void GBM_rescale(double[] preds)
public static double GLM_identityInv(double x)
public static double GLM_logitInv(double x)
public static double GLM_logInv(double x)
public static double GLM_inverseInv(double x)
public static double GLM_tweedieInv(double x, double tweedie_link_power)
public java.lang.String getHeader()
public static void setInput(double[] from, float[] to, int _nums, int _cats, int[] _catOffsets, double[] _normMul, double[] _normSub, boolean useAllFactorLevels, boolean replaceMissingWithZero)
public static void img2pixels(java.awt.image.BufferedImage img, int w, int h, int channels, float[] pixels, int start, float[] mean) throws java.io.IOException
java.io.IOException