public class SpeeDRF extends Job.ValidatedJob
| Modifier and Type | Class and Description |
|---|---|
static class |
SpeeDRF.DRFParams
RF execution parameters.
|
static class |
SpeeDRF.DRFTask |
Job.ValidatedJob.Response2CMAdaptorJob.ChunkProgress, Job.ChunkProgressJob, Job.ColumnsJob, Job.ColumnsResJob, Job.Fail, Job.FrameJob, Job.HexJob, Job.JobCancelledException, Job.JobHandle, Job.JobState, Job.List, Job.ModelJob, Job.ModelJobWithoutClassificationField, Job.Progress, Job.ProgressMonitor, Job.ValidatedJobRequest2.ColumnSelect, Request2.Dependent, Request2.DoClassBoolean, Request2.DRFCopyDataBoolean, Request2.MultiVecSelect, Request2.MultiVecSelectType, Request2.TypeaheadKey, Request2.VecClassSelect, Request2.VecSelectRequest.API, Request.Default, Request.Filter, Request.Validator<V>RequestBuilders.ArrayBuilder, RequestBuilders.ArrayHeaderRowBuilder, RequestBuilders.ArrayRowBuilder, RequestBuilders.ArrayRowElementBuilder, RequestBuilders.ArrayRowSingleColBuilder, RequestBuilders.BooleanStringBuilder, RequestBuilders.Builder, RequestBuilders.ElementBuilder, RequestBuilders.HideBuilder, RequestBuilders.KeyCellBuilder, RequestBuilders.KeyElementBuilder, RequestBuilders.KeyLinkElementBuilder, RequestBuilders.KeyMinAvgMaxBuilder, RequestBuilders.NoCaptionObjectBuilder, RequestBuilders.ObjectBuilder, RequestBuilders.PaginatedTable, RequestBuilders.PreFormattedBuilder, RequestBuilders.Response, RequestBuilders.ResponseInfo, RequestBuilders.WarningCellBuilderRequestArguments.Argument<T>, RequestArguments.Bool, RequestArguments.CaseModeSelect, RequestArguments.CaseSelect, RequestArguments.ClassifyBool, RequestArguments.DRFCopyDataBool, RequestArguments.EnumArgument<T extends java.lang.Enum<T>>, RequestArguments.ExistingFile, RequestArguments.FrameClassVec, RequestArguments.FrameKeyMultiVec, RequestArguments.FrameKeyVec, RequestArguments.GeneralFile, RequestArguments.H2OCategoryStrata, RequestArguments.H2OCategoryWeights, RequestArguments.H2OExistingKey, RequestArguments.H2OGLMModelKey, RequestArguments.H2OHexKey, RequestArguments.H2OHexKeyCol, RequestArguments.H2OIllegalArgumentException, RequestArguments.H2OKey, RequestArguments.H2OKey2, RequestArguments.H2OKMeansModelKey, RequestArguments.H2OModelKey<TM extends OldModel,TK extends TypeaheadKeysRequest>, RequestArguments.HexAllColumnSelect, RequestArguments.HexColumnSelect, RequestArguments.HexKeyClassCol, RequestArguments.HexNonClassColumnSelect, RequestArguments.HexNonConstantColumnSelect, RequestArguments.HexPCAColumnSelect, RequestArguments.InputCheckBox, RequestArguments.InputSelect<T>, RequestArguments.InputText<T>, RequestArguments.Int, RequestArguments.LongInt, RequestArguments.MultipleSelect<T>, RequestArguments.MultipleText<T>, RequestArguments.NTree, RequestArguments.NumberSequence, RequestArguments.NumberSequenceFloat, RequestArguments.Real, RequestArguments.Record<T>, RequestArguments.RFModelKey, RequestArguments.RSeq, RequestArguments.RSeqFloat, RequestArguments.Str, RequestArguments.StringList, RequestArguments.TypeaheadInputText<T>RequestStatics.RequestTypeConstants.Extensions, Constants.Schemes, Constants.Suffixes| Modifier and Type | Field and Description |
|---|---|
int |
_exclusiveSplitLimit |
Key |
_modelKey |
boolean |
_useNonLocalData |
boolean |
balance_classes
For imbalanced data, balance training data class counts via
over/under-sampling.
|
int |
bin_limit |
double[] |
class_weights |
static DocGen.FieldDoc[] |
DOC_FIELDS |
static java.lang.String |
DOC_GET |
SpeeDRF.DRFParams |
drfParams |
boolean |
importance |
float |
max_after_balance_size
When classes are balanced, limit the resulting dataset size to the
specified multiple of the original dataset size.
|
int |
max_depth |
int |
mtry |
int |
num_trees |
boolean |
oobee |
double |
sample |
Sampling.Strategy |
sampling_strategy |
boolean |
score_each_iteration |
long |
seed |
Tree.SelectStatType |
select_stat_type |
boolean |
verbose |
_cmDomain, _cv_count, _names, _responseName, _sourceResponseDomain, _train, _valid, _validResponse, _validResponseDomain, keep_cross_validation_splits, n_folds, validation, xval_modelsclassificationresponsecols, ignored_cols, ignored_cols_by_namesource_cv, _fjtask, description, destination_key, end_time, exception, job_key, LIST, start_time, state_parms, response_info_requestHelp, SUPPORTS_ONLY_V1, SUPPORTS_ONLY_V2, SUPPORTS_V1_V2ARRAY_BUILDER, ARRAY_HEADER_ROW_BUILDER, ARRAY_ROW_BUILDER, ARRAY_ROW_ELEMENT_BUILDER, ARRAY_ROW_SINGLECOL_BUILDER, ELEMENT_BUILDER, GSON_BUILDER, OBJECT_BUILDER, ROOT_OBJECT_queryHtml_argumentsALPHA, ARGUMENTS, AUC, BASE, BEST_THRESHOLD, BETA_EPS, BIN_LIMIT, BROWSE, BUCKET, BUILT_IN_KEY_JOBS, CANCELLED, CARDINALITY, CASE, CASE_MODE, CHUNK, CLASS, CLOUD_HEALTH, CLOUD_NAME, CLOUD_SIZE, CLOUD_UPTIME_MILLIS, CLUSTERS, COEFFICIENTS, COL_INDEX, COLS, COLUMN_NAME, COLUMNS_DISPLAY, CONSENSUS, CONTENTS, COUNT, DATA_KEY, DEPTH, DESCRIPTION, DEST_KEY, DTHRESHOLDS, ELAPSED, END_TIME, ENUM_DOMAIN_SIZE, ERROR, ESCAPE_NAN, EXCLUSIVE_SPLIT_LIMIT, EXPRESSION, FAILED, FAMILY, FEATURES, FILE, FILES, FILTER, FIRST_CHUNK, FJ_QUEUE_HI, FJ_QUEUE_LO, FJ_THREADS_HI, FJ_THREADS_LO, FREE_DISK, FREE_MEM, GFLOPS, HEADER, HEIGHT, HELP, IGNORE, ITEMS, ITERATIVE_CM, JOB, JOB_KEY, JOBS, JSON_H2O, KEY, KEYS, LAMBDA, LAST_CONTACT, LIMIT, LINK, LOCKED, MAX, MAX_DISK, MAX_ITER, MAX_MEM, MAX_ROWS, MEAN, MEM_BW, MIN, MODEL_KEY, MODELS, MORE, MTRY, MTRY_NODES, NAME, NEG_X, NO_CM, NODE, NODE_HEALTH, NODE_NAME, NODES, NORMALIZE, NUM_COLS, NUM_CPUS, NUM_FAILED, NUM_KEYS, NUM_MISSING_VALUES, NUM_ROWS, NUM_SUCCEEDED, NUM_TREES, OBJECT, OFFSET, OOBEE, PARALLEL, PARSER_TYPE, PATH, PREVIEW, PREVIOUS_MODEL_KEY, PRIOR, PROGRESS, PROGRESS_KEY, PROGRESS_TOTAL, REDIRECT, REDIRECT_ARGS, REPLICATION_FACTOR, REQUEST_TIME, RESPONSE, RHO, ROW, ROW_SIZE, ROWS, RPCS, SAMPLE, SAMPLING_STRATEGY, SCALE, SEED, SENT_ROWS, SEPARATOR, SIZE, SOURCE_KEY, STACK_TRACES, START_TIME, STAT_TYPE, STATUS, STEP, STRATA_SAMPLES, SUCCEEDED, SYSTEM_LOAD, TASK_KEY, TCPS_ACTIVE, TCPS_DUTY, TIME, TO_ENUM, TOT_MEM, TREE_COUNT, TREE_DEPTH, TREE_LEAVES, TREE_NUM, TREES, TWEEDIE_POWER, TYPE, URL, USE_NON_LOCAL_DATA, VALUE, VALUE_SIZE, VALUE_TYPE, VARIANCE, VERSION, VIEW, WARNINGS, WEIGHT, WEIGHTS, WIDTH, X, XVAL, Y| Constructor and Description |
|---|
SpeeDRF() |
| Modifier and Type | Method and Description |
|---|---|
static void |
build(Job job,
SpeeDRF.DRFParams drfParams,
Data localData,
int ntrees,
int numSplitFeatures,
int[] rowsPerChunks)
Build random forest for data stored on this node.
|
void |
buildForest(SpeeDRFModel model) |
void |
crossValidate(Frame[] splits,
Frame[] cv_preds,
long[] offsets,
int i)
Cross-Validate a SpeeDRF model by building new models on N train/test holdout splits
|
protected void |
execImpl()
The real implementation which should be provided by ancestors.
|
SpeeDRFModel |
initModel() |
protected SpeeDRFModel |
makeModel(SpeeDRFModel model,
double err,
ConfusionMatrix cm,
VarImp varimp,
AUC validAUC) |
protected void |
queryArgumentValueSet(RequestArguments.Argument arg,
java.util.Properties inputArgs)
Helper to handle arguments based on existing input values
|
protected RequestBuilders.Response |
redirect() |
Frame |
score(Frame fr) |
cv_progress, genericCrossValidation, getCMDomain, getOrigValidation, getValidAdaptor, getValidation, getVectorDomain, hasValidation, init, prepareValidationWithModel, registered, toJSONselectFrame, selectVecsall, cancel, cancel, cancel, checkIdx, defaultDestKey, defaultJobKey, dest, findJob, findJobByDest, fork, get, getState, gridParallelism, hygiene, invoke, isCancelledOrCrashed, isCrashed, isDone, isEnded, isRunning, isRunning, onCancelled, progress, remove, runTimeMs, self, serve, speedDescription, speedValue, start, waitUntilJobEnded, waitUntilJobEndedcreate, fillResponseInfo, filterNaCols, input, logStart, makeJsonBox, serveGrid, servePublic, set, split, superServeGrid, supportedVersions, toStringaddToNavbar, addToNavbar, addToNavbar, DocExampleFail, DocExampleSucc, href, href, hrefType, HTMLHelp, htmlTemplate, initializeNavBar, log, mapTypeahead, ReSTHelp, serve, serveJava, serveResponse, toDocGET, toHTML, toJava, wrap, wrap, wrap, writeJSONFieldsbuild, buildJSONResponseBox, buildResponseHeader, namebuildQuery, checkArgumentsarguments, argumentsToJson, frameColumnNameToIndex, vaCategoryNames, vaCategoryNames, vaColumnNameToIndexcheckJsonName, encodeRedirectArgs, JSON2HTML, jsonError, requestName, Str2JSONclone, frozenType, init, newInstance, read, toDocField, write, writeJSONpublic static DocGen.FieldDoc[] DOC_FIELDS
public static final java.lang.String DOC_GET
@Request.API(help="Number of trees", filter=Request.Default.class, json=true, lmin=1L, lmax=2147483647L, importance=CRITICAL) public int num_trees
@Request.API(help="Number of features to randomly select at each split.", filter=Request.Default.class, json=true, lmin=-1L, lmax=2147483647L, importance=SECONDARY) public int mtry
@Request.API(help="Max Depth", filter=Request.Default.class, json=true, lmin=0L, lmax=2147483647L, importance=CRITICAL) public int max_depth
@Request.API(help="Split Criterion Type", filter=Request.Default.class, json=true, importance=SECONDARY) public Tree.SelectStatType select_stat_type
public double[] class_weights
@Request.API(help="Sampling Strategy", filter=Request.Default.class, json=true, importance=SECONDARY) public Sampling.Strategy sampling_strategy
@Request.API(help="Sampling Rate at each split.", filter=Request.Default.class, json=true, dmin=0.0, dmax=1.0, importance=EXPERT) public double sample
@Request.API(help="Score each iteration", filter=Request.Default.class, json=true, importance=SECONDARY) public boolean score_each_iteration
@Request.API(help="Balance training data class counts via over/under-sampling (for imbalanced data)", filter=Request.Default.class, json=true, importance=EXPERT) public boolean balance_classes
@Request.API(help="Maximum relative size of the training data after balancing class counts (can be less than 1.0)", filter=Request.Default.class, json=true, dmin=0.001, importance=EXPERT) public float max_after_balance_size
@Request.API(help="Out of bag error estimate", filter=Request.Default.class, json=true, importance=SECONDARY) public boolean oobee
@Request.API(help="Variable Importance", filter=Request.Default.class, json=true) public boolean importance
public Key _modelKey
@Request.API(help="bin limit", filter=Request.Default.class, json=true, lmin=0L, lmax=65534L, importance=EXPERT) public int bin_limit
@Request.API(help="seed", filter=Request.Default.class, json=true, importance=EXPERT) public long seed
@Request.API(help="Tree splits and extra statistics printed to stdout.", filter=Request.Default.class, json=true, importance=EXPERT) public boolean verbose
@Request.API(help="split limit", importance=EXPERT) public int _exclusiveSplitLimit
@Request.API(help="use non local data", importance=EXPERT) public boolean _useNonLocalData
public SpeeDRF.DRFParams drfParams
protected SpeeDRFModel makeModel(SpeeDRFModel model, double err, ConfusionMatrix cm, VarImp varimp, AUC validAUC)
protected void queryArgumentValueSet(RequestArguments.Argument arg, java.util.Properties inputArgs)
Job.ValidatedJobqueryArgumentValueSet in class Job.ValidatedJobprotected void execImpl()
Funcprotected RequestBuilders.Response redirect()
public final void buildForest(SpeeDRFModel model)
public SpeeDRFModel initModel()
public static void build(Job job, SpeeDRF.DRFParams drfParams, Data localData, int ntrees, int numSplitFeatures, int[] rowsPerChunks)
public void crossValidate(Frame[] splits, Frame[] cv_preds, long[] offsets, int i)
crossValidate in class Job.ValidatedJobsplits - Frames containing train/test splitscv_preds - Array of Frames to store the predictions for each cross-validation runoffsets - Array to store the offsets of starting row indices for each cross-validation runi - Which fold of cross-validation to perform