public class ModelSelectionUtils
extends java.lang.Object
| Constructor and Description |
|---|
ModelSelectionUtils() |
| Modifier and Type | Method and Description |
|---|---|
static GLM[] |
buildGLMBuilders(GLMModel.GLMParameters[] trainingParams) |
static int |
calculateModelNumber(int numPredictors,
int maxPredictorNumber)
Given the number of predictors in the training frame and the maximum predictor number, we are going to calculate
the number of models that we need to build in order to find:
- best model with 1 predictor;
- best model with 2 predictors;
...
|
static java.lang.String[] |
extractPredictorNames(ModelSelectionModel.ModelSelectionParameters parms,
DataInfo dinfo,
java.lang.String foldColumn) |
static java.util.List<java.lang.String> |
extraModelColumnNames(java.util.List<java.lang.String> coefNames,
GLMModel bestModel) |
static GLMModel |
findBestModel(GLM[] glmResults)
Given GLM run results of a fixed number of predictors, find the model with the best R2 value.
|
static int |
findBestR2Model(double lastBestR2,
GLMModel[] bestR2Models)
Given an array GLMModel built, find the one with the highest R2 value that exceeds lastBestR2.
|
static GLMModel.GLMParameters[] |
generateGLMParameters(water.fvec.Frame[] trainingFrames,
ModelSelectionModel.ModelSelectionParameters parms,
int nfolds,
java.lang.String foldColumn,
hex.Model.Parameters.FoldAssignmentScheme foldAssignment) |
static water.fvec.Frame[] |
generateMaxRTrainingFrames(ModelSelectionModel.ModelSelectionParameters parms,
java.lang.String[] predictorNames,
java.lang.String foldColumn,
java.util.List<java.lang.Integer> currSubsetIndices,
int newPredPos,
java.util.List<java.lang.Integer> validSubsets)
Give a predictor subset with indices stored in currSubsetIndices, an array of training frames are generated by
adding one predictor from predictorNames with predictors not already included in currSubsetIndices.
|
static water.fvec.Frame |
generateOneFrame(int[] predIndices,
ModelSelectionModel.ModelSelectionParameters parms,
java.lang.String[] predNames,
java.lang.String foldColumn)
Given a predictor indices set, this function will generate a training frame containing the predictors with
indices in predIndices.
|
static water.fvec.Frame[] |
generateTrainingFrames(ModelSelectionModel.ModelSelectionParameters parms,
int predNum,
java.lang.String[] predNames,
int numModels,
java.lang.String foldColumn) |
static void |
removeTrainingFrames(water.fvec.Frame[] trainingFrames) |
static void |
setParamField(hex.Model.Parameters params,
GLMModel.GLMParameters glmParam,
boolean superClassParams,
java.lang.reflect.Field[] paramFields,
java.util.List<java.lang.String> excludeList) |
static void |
updateLaterIndices(int[] currentPredIndices,
int indexUpdated,
int lastPredInd)
Give 5 predictors and say we want the combo of 3 predictors, this function will properly reset the prediction
combination indices say from [0, 1, 4] -> [0, 2, 3] or [0, 3, 4] -> [1, 2, 3].
|
static void |
updatePredIndices(int[] currentPredIndices,
int[] indicesBounds)
Given predictor indices stored in currentPredIndices, we need to find the next combination of predictor indices
to use to generate the next combination.
|
static void |
updateValidSubset(java.util.List<java.lang.Integer> validSubset,
java.util.List<java.lang.Integer> originalSubset,
java.util.List<java.lang.Integer> currSubsetIndices) |
public static int calculateModelNumber(int numPredictors,
int maxPredictorNumber)
numPredictors: - number of predictors in the training framemaxPredictorNumber: - maximum number of predictors of interestpublic static water.fvec.Frame[] generateTrainingFrames(ModelSelectionModel.ModelSelectionParameters parms, int predNum, java.lang.String[] predNames, int numModels, java.lang.String foldColumn)
public static void updatePredIndices(int[] currentPredIndices,
int[] indicesBounds)
currentPredIndices - indicesBounds - public static void updateLaterIndices(int[] currentPredIndices,
int indexUpdated,
int lastPredInd)
currentPredIndices - indexUpdated - lastPredInd - public static water.fvec.Frame generateOneFrame(int[] predIndices,
ModelSelectionModel.ModelSelectionParameters parms,
java.lang.String[] predNames,
java.lang.String foldColumn)
predIndices - parms - predNames - public static water.fvec.Frame[] generateMaxRTrainingFrames(ModelSelectionModel.ModelSelectionParameters parms, java.lang.String[] predictorNames, java.lang.String foldColumn, java.util.List<java.lang.Integer> currSubsetIndices, int newPredPos, java.util.List<java.lang.Integer> validSubsets)
parms - predictorNames - foldColumn - currSubsetIndices - validSubsets - Lists containing only valid predictor indices to choose frompublic static int findBestR2Model(double lastBestR2,
GLMModel[] bestR2Models)
lastBestR2 - bestR2Models - public static GLMModel.GLMParameters[] generateGLMParameters(water.fvec.Frame[] trainingFrames, ModelSelectionModel.ModelSelectionParameters parms, int nfolds, java.lang.String foldColumn, hex.Model.Parameters.FoldAssignmentScheme foldAssignment)
public static void setParamField(hex.Model.Parameters params,
GLMModel.GLMParameters glmParam,
boolean superClassParams,
java.lang.reflect.Field[] paramFields,
java.util.List<java.lang.String> excludeList)
public static GLM[] buildGLMBuilders(GLMModel.GLMParameters[] trainingParams)
public static void removeTrainingFrames(water.fvec.Frame[] trainingFrames)
public static GLMModel findBestModel(GLM[] glmResults)
glmResults - public static java.lang.String[] extractPredictorNames(ModelSelectionModel.ModelSelectionParameters parms, DataInfo dinfo, java.lang.String foldColumn)
public static java.util.List<java.lang.String> extraModelColumnNames(java.util.List<java.lang.String> coefNames,
GLMModel bestModel)
public static void updateValidSubset(java.util.List<java.lang.Integer> validSubset,
java.util.List<java.lang.Integer> originalSubset,
java.util.List<java.lang.Integer> currSubsetIndices)