public class ModelSelectionUtils
extends java.lang.Object
| Constructor and Description |
|---|
ModelSelectionUtils() |
| Modifier and Type | Method and Description |
|---|---|
static GLM[] |
buildGLMBuilders(GLMModel.GLMParameters[] trainingParams) |
static java.lang.String[] |
extractPredictorNames(ModelSelectionModel.ModelSelectionParameters parms,
DataInfo dinfo,
java.lang.String foldColumn) |
static java.util.List<java.lang.String> |
extraModelColumnNames(java.util.List<java.lang.String> coefNames,
GLMModel bestModel) |
static GLMModel |
findBestModel(GLM[] glmResults)
Given GLM run results of a fixed number of predictors, find the model with the best R2 value.
|
static int |
findBestR2Model(double lastBestR2,
GLMModel[] bestR2Models)
Given an array GLMModel built, find the one with the highest R2 value that exceeds lastBestR2.
|
static int |
findMinAbs(java.util.List<java.lang.Double> zList) |
static GLMModel.GLMParameters[] |
generateGLMParameters(water.fvec.Frame[] trainingFrames,
ModelSelectionModel.ModelSelectionParameters parms,
int nfolds,
java.lang.String foldColumn,
hex.Model.Parameters.FoldAssignmentScheme foldAssignment) |
static water.fvec.Frame[] |
generateMaxRTrainingFrames(ModelSelectionModel.ModelSelectionParameters parms,
java.lang.String[] predictorNames,
java.lang.String foldColumn,
java.util.List<java.lang.Integer> currSubsetIndices,
int newPredPos,
java.util.List<java.lang.Integer> validSubsets)
Give a predictor subset with indices stored in currSubsetIndices, an array of training frames are generated by
adding one predictor from predictorNames with predictors not already included in currSubsetIndices.
|
static water.fvec.Frame |
generateOneFrame(int[] predIndices,
ModelSelectionModel.ModelSelectionParameters parms,
java.lang.String[] predNames,
java.lang.String foldColumn)
Given a predictor indices set, this function will generate a training frame containing the predictors with
indices in predIndices.
|
static water.fvec.Frame[] |
generateTrainingFrames(ModelSelectionModel.ModelSelectionParameters parms,
int predNum,
java.lang.String[] predNames,
int numModels,
java.lang.String foldColumn) |
static java.lang.String |
joinDouble(double[] val) |
static void |
removeTrainingFrames(water.fvec.Frame[] trainingFrames) |
static void |
setParamField(hex.Model.Parameters params,
GLMModel.GLMParameters glmParam,
boolean superClassParams,
java.lang.reflect.Field[] paramFields,
java.util.List<java.lang.String> excludeList) |
static double[][] |
shrinkDoubleArray(double[][] array,
int numModels) |
static water.Key[] |
shrinkKeyArray(water.Key[] array,
int numModels) |
static java.lang.String[][] |
shrinkStringArray(java.lang.String[][] array,
int numModels) |
static void |
updateLaterIndices(int[] currentPredIndices,
int indexUpdated,
int lastPredInd)
Give 5 predictors and say we want the combo of 3 predictors, this function will properly reset the prediction
combination indices say from [0, 1, 4] -> [0, 2, 3] or [0, 3, 4] -> [1, 2, 3].
|
static void |
updatePredIndices(int[] currentPredIndices,
int[] indicesBounds)
Given predictor indices stored in currentPredIndices, we need to find the next combination of predictor indices
to use to generate the next combination.
|
static void |
updateValidSubset(java.util.List<java.lang.Integer> validSubset,
java.util.List<java.lang.Integer> originalSubset,
java.util.List<java.lang.Integer> currSubsetIndices) |
public static water.fvec.Frame[] generateTrainingFrames(ModelSelectionModel.ModelSelectionParameters parms, int predNum, java.lang.String[] predNames, int numModels, java.lang.String foldColumn)
public static void updatePredIndices(int[] currentPredIndices,
int[] indicesBounds)
currentPredIndices - indicesBounds - public static void updateLaterIndices(int[] currentPredIndices,
int indexUpdated,
int lastPredInd)
currentPredIndices - indexUpdated - lastPredInd - public static water.fvec.Frame generateOneFrame(int[] predIndices,
ModelSelectionModel.ModelSelectionParameters parms,
java.lang.String[] predNames,
java.lang.String foldColumn)
predIndices - parms - predNames - public static water.fvec.Frame[] generateMaxRTrainingFrames(ModelSelectionModel.ModelSelectionParameters parms, java.lang.String[] predictorNames, java.lang.String foldColumn, java.util.List<java.lang.Integer> currSubsetIndices, int newPredPos, java.util.List<java.lang.Integer> validSubsets)
parms - predictorNames - foldColumn - currSubsetIndices - validSubsets - Lists containing only valid predictor indices to choose frompublic static int findMinAbs(java.util.List<java.lang.Double> zList)
public static java.lang.String[][] shrinkStringArray(java.lang.String[][] array,
int numModels)
public static double[][] shrinkDoubleArray(double[][] array,
int numModels)
public static water.Key[] shrinkKeyArray(water.Key[] array,
int numModels)
public static java.lang.String joinDouble(double[] val)
public static int findBestR2Model(double lastBestR2,
GLMModel[] bestR2Models)
lastBestR2 - bestR2Models - public static GLMModel.GLMParameters[] generateGLMParameters(water.fvec.Frame[] trainingFrames, ModelSelectionModel.ModelSelectionParameters parms, int nfolds, java.lang.String foldColumn, hex.Model.Parameters.FoldAssignmentScheme foldAssignment)
public static void setParamField(hex.Model.Parameters params,
GLMModel.GLMParameters glmParam,
boolean superClassParams,
java.lang.reflect.Field[] paramFields,
java.util.List<java.lang.String> excludeList)
public static GLM[] buildGLMBuilders(GLMModel.GLMParameters[] trainingParams)
public static void removeTrainingFrames(water.fvec.Frame[] trainingFrames)
public static GLMModel findBestModel(GLM[] glmResults)
glmResults - public static java.lang.String[] extractPredictorNames(ModelSelectionModel.ModelSelectionParameters parms, DataInfo dinfo, java.lang.String foldColumn)
public static java.util.List<java.lang.String> extraModelColumnNames(java.util.List<java.lang.String> coefNames,
GLMModel bestModel)
public static void updateValidSubset(java.util.List<java.lang.Integer> validSubset,
java.util.List<java.lang.Integer> originalSubset,
java.util.List<java.lang.Integer> currSubsetIndices)