public class SpeeDRF extends Job.ValidatedJob
Modifier and Type | Class and Description |
---|---|
static class |
SpeeDRF.DRFParams
RF execution parameters.
|
static class |
SpeeDRF.DRFTask |
Job.ValidatedJob.Response2CMAdaptor
Job.ChunkProgress, Job.ChunkProgressJob, Job.ColumnsJob, Job.ColumnsResJob, Job.Fail, Job.FrameJob, Job.JobCancelledException, Job.JobHandle, Job.JobState, Job.List, Job.ModelJob, Job.ModelJobWithoutClassificationField, Job.Progress, Job.ProgressMonitor, Job.ValidatedJob
Request2.ColumnSelect, Request2.Dependent, Request2.DoClassBoolean, Request2.DRFCopyDataBoolean, Request2.MultiVecSelect, Request2.MultiVecSelectType, Request2.SpecialVecSelect, Request2.TypeaheadKey, Request2.VecClassSelect, Request2.VecSelect
Request.API, Request.Default, Request.Filter, Request.Validator<V>
RequestBuilders.ArrayBuilder, RequestBuilders.ArrayHeaderRowBuilder, RequestBuilders.ArrayRowBuilder, RequestBuilders.ArrayRowElementBuilder, RequestBuilders.ArrayRowSingleColBuilder, RequestBuilders.BooleanStringBuilder, RequestBuilders.Builder, RequestBuilders.ElementBuilder, RequestBuilders.HideBuilder, RequestBuilders.KeyCellBuilder, RequestBuilders.KeyElementBuilder, RequestBuilders.KeyLinkElementBuilder, RequestBuilders.KeyMinAvgMaxBuilder, RequestBuilders.NoCaptionObjectBuilder, RequestBuilders.ObjectBuilder, RequestBuilders.PaginatedTable, RequestBuilders.PreFormattedBuilder, RequestBuilders.Response, RequestBuilders.ResponseInfo, RequestBuilders.WarningCellBuilder
RequestArguments.Argument<T>, RequestArguments.Bool, RequestArguments.ClassifyBool, RequestArguments.DRFCopyDataBool, RequestArguments.EnumArgument<T extends java.lang.Enum<T>>, RequestArguments.ExistingFile, RequestArguments.FrameClassVec, RequestArguments.FrameKeyMultiVec, RequestArguments.FrameKeyVec, RequestArguments.GeneralFile, RequestArguments.H2OExistingKey, RequestArguments.H2OIllegalArgumentException, RequestArguments.H2OKey, RequestArguments.H2OKey2, RequestArguments.InputCheckBox, RequestArguments.InputSelect<T>, RequestArguments.InputText<T>, RequestArguments.Int, RequestArguments.LongInt, RequestArguments.MultipleSelect<T>, RequestArguments.MultipleText<T>, RequestArguments.NumberSequence, RequestArguments.NumberSequenceFloat, RequestArguments.Real, RequestArguments.Record<T>, RequestArguments.RSeq, RequestArguments.RSeqFloat, RequestArguments.Str, RequestArguments.StringList, RequestArguments.TypeaheadInputText<T>
RequestStatics.RequestType
Constants.Extensions, Constants.Schemes, Constants.Suffixes
Modifier and Type | Field and Description |
---|---|
int |
_exclusiveSplitLimit |
Key |
_modelKey |
boolean |
balance_classes
For imbalanced data, balance training data class counts via
over/under-sampling.
|
double[] |
class_weights |
static DocGen.FieldDoc[] |
DOC_FIELDS |
static java.lang.String |
DOC_GET |
SpeeDRF.DRFParams |
drfParams |
boolean |
importance |
float |
max_after_balance_size
When classes are balanced, limit the resulting dataset size to the
specified multiple of the original dataset size.
|
int |
max_depth |
int |
mtries |
int |
nbins |
int |
ntrees |
boolean |
oobee |
double |
sample_rate |
Sampling.Strategy |
sampling_strategy |
boolean |
score_each_iteration |
boolean |
score_pojo |
long |
seed |
Tree.SelectStatType |
select_stat_type |
boolean |
verbose |
_cmDomain, _cv_count, _names, _responseName, _sourceResponseDomain, _train, _valid, _validResponse, _validResponseDomain, holdout_fraction, keep_cross_validation_splits, n_folds, validation, xval_models
classification
response
cols, ignored_cols, ignored_cols_by_name
source
_cv, _fjtask, description, destination_key, end_time, exception, job_key, LIST, start_time, state
_parms, response_info
_requestHelp, SUPPORTS_ONLY_V1, SUPPORTS_ONLY_V2, SUPPORTS_V1_V2
ARRAY_BUILDER, ARRAY_HEADER_ROW_BUILDER, ARRAY_ROW_BUILDER, ARRAY_ROW_ELEMENT_BUILDER, ARRAY_ROW_SINGLECOL_BUILDER, ELEMENT_BUILDER, GSON_BUILDER, OBJECT_BUILDER, ROOT_OBJECT
_queryHtml
_arguments
ALPHA, ARGUMENTS, AUC, BASE, BEST_THRESHOLD, BETA_EPS, BIN_LIMIT, BROWSE, BUCKET, BUILT_IN_KEY_JOBS, CANCELLED, CARDINALITY, CASE, CASE_MODE, CHUNK, CLASS, CLOUD_HEALTH, CLOUD_NAME, CLOUD_SIZE, CLOUD_UPTIME_MILLIS, CLUSTERS, COEFFICIENTS, COL_INDEX, COLS, COLUMN_NAME, COLUMNS_DISPLAY, CONSENSUS, CONTENTS, COUNT, DATA_KEY, DEPTH, DESCRIPTION, DEST_KEY, DTHRESHOLDS, ELAPSED, END_TIME, ENUM_DOMAIN_SIZE, ERROR, ESCAPE_NAN, EXCLUSIVE_SPLIT_LIMIT, EXPRESSION, FAILED, FAMILY, FEATURES, FILE, FILES, FILTER, FIRST_CHUNK, FJ_QUEUE_HI, FJ_QUEUE_LO, FJ_THREADS_HI, FJ_THREADS_LO, FREE_DISK, FREE_MEM, GFLOPS, HEADER, HEIGHT, HELP, IGNORE, ITEMS, ITERATIVE_CM, JOB, JOB_KEY, JOBS, JSON_H2O, KEY, KEYS, LAMBDA, LAST_CONTACT, LIMIT, LINK, LOCKED, MAX, MAX_DISK, MAX_ITER, MAX_MEM, MAX_ROWS, MEAN, MEM_BW, MIN, MODEL_KEY, MODELS, MORE, MTRY, MTRY_NODES, NAME, NEG_X, NO_CM, NODE, NODE_HEALTH, NODE_NAME, NODES, NORMALIZE, NUM_COLS, NUM_CPUS, NUM_FAILED, NUM_KEYS, NUM_MISSING_VALUES, NUM_ROWS, NUM_SUCCEEDED, NUM_TREES, OBJECT, OFFSET, OOBEE, PARALLEL, PARSER_TYPE, PATH, PREVIEW, PREVIOUS_MODEL_KEY, PRIOR, PROGRESS, PROGRESS_KEY, PROGRESS_TOTAL, REDIRECT, REDIRECT_ARGS, REPLICATION_FACTOR, REQUEST_TIME, RESPONSE, RHO, ROW, ROW_SIZE, ROWS, RPCS, SAMPLE, SAMPLING_STRATEGY, SCALE, SEED, SENT_ROWS, SEPARATOR, SIZE, SOURCE_KEY, STACK_TRACES, START_TIME, STAT_TYPE, STATUS, STEP, STRATA_SAMPLES, SUCCEEDED, SYSTEM_LOAD, TASK_KEY, TCPS_ACTIVE, TCPS_DUTY, TIME, TO_ENUM, TOT_MEM, TREE_COUNT, TREE_DEPTH, TREE_LEAVES, TREE_NUM, TREES, TWEEDIE_POWER, TYPE, URL, USE_NON_LOCAL_DATA, VALUE, VALUE_SIZE, VALUE_TYPE, VARIANCE, VERSION, VIEW, WARNINGS, WEIGHT, WEIGHTS, WIDTH, X, XVAL, Y
Constructor and Description |
---|
SpeeDRF() |
Modifier and Type | Method and Description |
---|---|
static void |
build(Key jobKey,
Key modelKey,
SpeeDRF.DRFParams drfParams,
Data localData,
int ntrees,
int numSplitFeatures,
int[] rowsPerChunks)
Build random forest for data stored on this node.
|
void |
crossValidate(Frame[] splits,
Frame[] cv_preds,
long[] offsets,
int i)
Cross-Validate a SpeeDRF model by building new models on N train/test holdout splits
|
protected void |
execImpl()
The real implementation which should be provided by ancestors.
|
protected void |
init()
Invoked before job runs.
|
SpeeDRFModel |
initModel(Frame train,
Frame test,
float[] priorDist) |
static java.lang.String |
link(Key k,
java.lang.String content)
Return the query link to this page
|
protected SpeeDRFModel |
makeModel(SpeeDRFModel model,
double err,
ConfusionMatrix cm,
VarImp varimp,
AUCData validAUC) |
protected void |
queryArgumentValueSet(RequestArguments.Argument arg,
java.util.Properties inputArgs)
Helper to handle arguments based on existing input values
|
protected RequestBuilders.Response |
redirect() |
Frame |
score(Frame fr) |
cv_progress, genericCrossValidation, getCMDomain, getOrigValidation, getValidAdaptor, getValidation, getVectorDomain, hasValidation, prepareValidationWithModel, registered, toJSON
selectFrame, selectVecs
all, cancel, cancel, cancel, checkIdx, defaultDestKey, defaultJobKey, dest, findJob, findJobByDest, fork, get, getState, gridParallelism, hygiene, hygiene, invoke, isCancelledOrCrashed, isCrashed, isDone, isEnded, isRunning, isRunning, onCancelled, progress, remove, runTimeMs, self, serve, speedDescription, speedValue, start, waitUntilJobEnded, waitUntilJobEnded
create, fillResponseInfo, filterNaCols, find, input, logStart, makeJsonBox, serveGrid, servePublic, set, split, superServeGrid, supportedVersions, toJSON, toString
addToNavbar, addToNavbar, addToNavbar, DocExampleFail, DocExampleSucc, href, href, hrefType, HTMLHelp, htmlTemplate, initializeNavBar, log, mapTypeahead, ReSTHelp, serve, serveJava, serveResponse, toDocGET, toHTML, toJava, wrap, wrap, wrap, writeJSONFields
build, buildJSONResponseBox, buildResponseHeader, name
buildQuery, checkArguments
arguments, argumentsToJson, frameColumnNameToIndex
checkJsonName, encodeRedirectArgs, JSON2HTML, jsonError, requestName, Str2JSON
clone, frozenType, init, newInstance, read, toDocField, write, writeJSON
public static DocGen.FieldDoc[] DOC_FIELDS
public static final java.lang.String DOC_GET
@Request.API(help="Number of trees", filter=Request.Default.class, json=true, lmin=1L, lmax=2147483647L, importance=CRITICAL) public int ntrees
@Request.API(help="Number of features to randomly select at each split.", filter=Request.Default.class, json=true, lmin=-1L, lmax=2147483647L, importance=SECONDARY) public int mtries
@Request.API(help="Max Depth", filter=Request.Default.class, json=true, lmin=0L, lmax=2147483647L, importance=CRITICAL) public int max_depth
@Request.API(help="Split Criterion Type", filter=Request.Default.class, json=true, importance=SECONDARY) public Tree.SelectStatType select_stat_type
public double[] class_weights
@Request.API(help="Sampling Strategy", filter=Request.Default.class, json=true, importance=SECONDARY) public Sampling.Strategy sampling_strategy
@Request.API(help="Sampling Rate at each split.", filter=Request.Default.class, json=true, dmin=0.0, dmax=1.0, importance=EXPERT) public double sample_rate
public boolean score_each_iteration
@Request.API(help="Create the Score POJO", filter=Request.Default.class, json=true, importance=EXPERT) public boolean score_pojo
@Request.API(help="Balance training data class counts via over/under-sampling (for imbalanced data)", filter=Request.Default.class, json=true, importance=EXPERT) public boolean balance_classes
@Request.API(help="Maximum relative size of the training data after balancing class counts (can be less than 1.0)", filter=Request.Default.class, json=true, dmin=0.001, importance=EXPERT) public float max_after_balance_size
@Request.API(help="Out of bag error estimate", filter=Request.Default.class, json=true, importance=SECONDARY) public boolean oobee
@Request.API(help="Variable Importance", filter=Request.Default.class, json=true) public boolean importance
public Key _modelKey
@Request.API(help="bin limit", filter=Request.Default.class, json=true, lmin=0L, lmax=65534L, importance=EXPERT) public int nbins
@Request.API(help="seed", filter=Request.Default.class, json=true, importance=EXPERT) public long seed
@Request.API(help="Tree splits and extra statistics printed to stdout.", filter=Request.Default.class, json=true, importance=EXPERT) public boolean verbose
@Request.API(help="split limit", importance=EXPERT) public int _exclusiveSplitLimit
public SpeeDRF.DRFParams drfParams
public static java.lang.String link(Key k, java.lang.String content)
protected SpeeDRFModel makeModel(SpeeDRFModel model, double err, ConfusionMatrix cm, VarImp varimp, AUCData validAUC)
protected void queryArgumentValueSet(RequestArguments.Argument arg, java.util.Properties inputArgs)
Job.ValidatedJob
queryArgumentValueSet
in class Job.ValidatedJob
protected void init()
Job
init
in class Job.ValidatedJob
protected void execImpl()
Func
protected RequestBuilders.Response redirect()
public SpeeDRFModel initModel(Frame train, Frame test, float[] priorDist)
public static void build(Key jobKey, Key modelKey, SpeeDRF.DRFParams drfParams, Data localData, int ntrees, int numSplitFeatures, int[] rowsPerChunks)
public void crossValidate(Frame[] splits, Frame[] cv_preds, long[] offsets, int i)
crossValidate
in class Job.ValidatedJob
splits
- Frames containing train/test splitscv_preds
- Array of Frames to store the predictions for each cross-validation runoffsets
- Array to store the offsets of starting row indices for each cross-validation runi
- Which fold of cross-validation to perform