public abstract class Model<M extends Model<M,P,O>,P extends Model.Parameters,O extends Model.Output> extends Lockable<M>
Modifier and Type | Class and Description |
---|---|
protected class |
Model.BigScore |
static interface |
Model.DeepFeatures |
static interface |
Model.ExemplarMembers |
static interface |
Model.GetMostImportantFeatures |
static interface |
Model.GLRMArchetypes |
static class |
Model.GridSortBy |
static class |
Model.InteractionPair
This class represents a pair of interacting columns plus some additional data
about specific enums to be interacted when the vecs are categorical.
|
class |
Model.JavaModelStreamWriter
Model stream writer - output Java code representation of model.
|
static interface |
Model.LeafNodeAssignment |
static class |
Model.Output
Model-specific output class.
|
static class |
Model.Parameters
Model-specific parameter class.
|
Modifier and Type | Field and Description |
---|---|
Distribution |
_dist |
O |
_output |
P |
_parms |
IcedHashMap<Key,java.lang.String> |
_toDelete |
java.lang.String[] |
_warnings |
protected ScoringInfo[] |
scoringInfo |
Constructor and Description |
---|
Model(Key<M> selfKey,
P parms,
O output)
Full constructor
|
Modifier and Type | Method and Description |
---|---|
java.lang.String[] |
adaptTestForTrain(Frame test,
boolean expensive,
boolean computeMetrics)
Adapt a Test/Validation Frame to be compatible for a Training Frame.
|
static java.lang.String[] |
adaptTestForTrain(Frame test,
java.lang.String[] origNames,
java.lang.String[][] origDomains,
java.lang.String[] names,
java.lang.String[][] domains,
Model.Parameters parms,
boolean expensive,
boolean computeMetrics,
java.lang.String[] interactions,
ToEigenVec tev,
IcedHashMap<Key,java.lang.String> toDelete,
boolean catEncoded) |
ModelMetrics |
addMetrics(ModelMetrics mm) |
ModelMetrics |
addModelMetrics(ModelMetrics mm) |
void |
addWarning(java.lang.String s) |
double |
auc() |
protected long |
checksum_impl()
High-quality 64-bit checksum of the content of the object.
|
double |
classification_error() |
protected static void |
cleanup_adapt(Frame adaptFr,
Frame fr) |
protected void |
closeBigScorePredict() |
int |
compareTo(M o) |
Frame |
computeDeviances(Frame valid,
Frame predictions,
java.lang.String outputName)
Compute the deviances for each observation
|
protected double |
data(Chunk[] chks,
int row,
int col) |
double |
defaultThreshold()
Default threshold for assigning class labels to the target class (for binomial models)
|
void |
deleteCrossValidationModels() |
double |
deviance() |
double |
deviance(double w,
double y,
double f)
Deviance of given distribution function at predicted value f
|
void |
fillScoringInfo(ScoringInfo scoringInfo)
Fill a ScoringInfo with data from the ModelMetrics for this model.
|
Model.GridSortBy |
getDefaultGridSortBy()
Identifies the default ordering method for models returned from Grid Search
|
ModelMojoWriter |
getMojo()
Override this in models that support serialization into the MOJO format.
|
ToEigenVec |
getToEigenVec() |
boolean |
isSupervised() |
ScoringInfo |
last_scored() |
double |
lift_top_group() |
double |
logloss() |
float |
loss() |
double |
mae() |
protected Model.BigScore |
makeBigScoreTask(java.lang.String[][] domains,
java.lang.String[] names,
Frame adaptFrm,
boolean computeMetrics,
boolean makePrediction,
Job j)
Allow subclasses to define their own BigScore class.
|
static InteractionWrappedVec |
makeInteraction(Frame fr,
Model.InteractionPair ip,
boolean useAllFactorLevels,
boolean skipMissing,
boolean standardize) |
static Frame |
makeInteractions(Frame fr,
boolean valid,
Model.InteractionPair[] interactions,
boolean useAllFactorLevels,
boolean skipMissing,
boolean standardize) |
static InteractionWrappedVec[] |
makeInteractions(Frame fr,
Model.InteractionPair[] interactions,
boolean useAllFactorLevels,
boolean skipMissing,
boolean standardize) |
abstract ModelMetrics.MetricBuilder |
makeMetricBuilder(java.lang.String[] domain) |
java.lang.Class<KeyV3.ModelKeyV3> |
makeSchema() |
protected java.lang.String[] |
makeScoringNames() |
double |
mean_per_class_error() |
double |
mse() |
protected Frame |
postProcessPredictions(Frame predictFr) |
protected Frame |
predictScoreImpl(Frame fr,
Frame adaptFrm,
java.lang.String destination_key,
Job j,
boolean computeMetrics)
Score an already adapted frame.
|
protected Keyed |
readAll_impl(AutoBuffer ab,
Futures fs) |
protected Futures |
remove_impl(Futures fs)
Override to remove subparts, but not self, of composite Keyed objects.
|
double |
rmsle() |
double |
score(double[] data) |
Frame |
score(Frame fr)
Bulk score the frame, and auto-name the resulting predictions frame.
|
Frame |
score(Frame fr,
java.lang.String destination_key)
Bulk score the frame
fr , producing a Frame result; the 1st
Vec is the predicted class, the remaining Vecs are the probability
distributions. |
Frame |
score(Frame fr,
java.lang.String destination_key,
Job j) |
Frame |
score(Frame fr,
java.lang.String destination_key,
Job j,
boolean computeMetrics) |
double[] |
score0(Chunk[] chks,
double offset,
int row_in_chunk,
double[] tmp,
double[] preds) |
double[] |
score0(Chunk[] chks,
int row_in_chunk,
double[] tmp,
double[] preds)
Bulk scoring API for one row.
|
protected abstract double[] |
score0(double[] data,
double[] preds)
Subclasses implement the scoring logic.
|
protected double[] |
score0(double[] data,
double[] preds,
double offset)
Override scoring logic for models that handle weight/offset
|
protected ModelMetrics.MetricBuilder |
scoreMetrics(Frame adaptFrm)
Score an already adapted frame.
|
ScoringInfo[] |
scoring_history() |
protected java.lang.String[][] |
scoringDomains() |
protected void |
setupBigScorePredict() |
boolean |
testJavaScoring(Frame data,
Frame model_predictions,
double rel_epsilon) |
boolean |
testJavaScoring(Frame data,
Frame model_predictions,
double rel_epsilon,
double abs_epsilon) |
boolean |
testJavaScoring(Frame data,
Frame model_predictions,
double rel_epsilon,
double abs_epsilon,
double fraction) |
java.lang.String |
toJava(boolean preview,
boolean verboseCode)
Return a String which is a valid Java program representing a class that
implements the Model.
|
SBPrintStream |
toJava(java.io.OutputStream os,
boolean preview,
boolean verboseCode) |
protected SBPrintStream |
toJava(SBPrintStream sb,
boolean isGeneratingPreview,
boolean verboseCode) |
protected boolean |
toJavaCheckTooBig() |
protected SBPrintStream |
toJavaInit(SBPrintStream sb,
CodeGeneratorPipeline fileContext) |
protected SBPrintStream |
toJavaNCLASSES(SBPrintStream sb) |
protected void |
toJavaPredictBody(SBPrintStream body,
CodeGeneratorPipeline classCtx,
CodeGeneratorPipeline fileCtx,
boolean verboseCode) |
protected SBPrintStream |
toJavaPROB(SBPrintStream sb) |
protected SBPrintStream |
toJavaSuper(java.lang.String modelName,
SBPrintStream sb)
Generate implementation for super class.
|
java.lang.String |
toString() |
protected AutoBuffer |
writeAll_impl(AutoBuffer ab)
Write out K/V pairs, in this case model metrics.
|
delete_and_lock, delete_and_lock, delete_and_lock, delete, delete, delete, read_lock, read_lock, read_lock, unlock_all, unlock, unlock, unlock, unlock, update, update, update, write_lock, write_lock, write_lock
checksum, readAll, remove, remove, remove, remove, writeAll
asBytes, clone, copyOver, frozenType, read, readExternal, readJSON, reloadFromBytes, toJsonString, write, writeExternal, writeJSON
public P extends Model.Parameters _parms
public O extends Model.Output _output
public java.lang.String[] _warnings
public Distribution _dist
protected ScoringInfo[] scoringInfo
public IcedHashMap<Key,java.lang.String> _toDelete
public double defaultThreshold()
public final boolean isSupervised()
public Model.GridSortBy getDefaultGridSortBy()
public ToEigenVec getToEigenVec()
public ModelMetrics addModelMetrics(ModelMetrics mm)
public void addWarning(java.lang.String s)
protected java.lang.String[][] scoringDomains()
public ModelMetrics addMetrics(ModelMetrics mm)
public abstract ModelMetrics.MetricBuilder makeMetricBuilder(java.lang.String[] domain)
public double deviance(double w, double y, double f)
w
- observation weighty
- (actual) responsef
- (predicted) response in original response spacepublic ScoringInfo[] scoring_history()
public void fillScoringInfo(ScoringInfo scoringInfo)
scoringInfo
- public ScoringInfo last_scored()
public float loss()
public int compareTo(M o)
public double classification_error()
public double mse()
public double mae()
public double rmsle()
public double auc()
public double deviance()
public double logloss()
public double mean_per_class_error()
public double lift_top_group()
public java.lang.String[] adaptTestForTrain(Frame test, boolean expensive, boolean computeMetrics)
missingColumnsType
). This will issue a warning,
and if the "expensive" flag is false won't actually make the column
replacement column but instead will bail-out on the whole adaption (but
will continue looking for more warnings).
- If all columns are missing, issue an error.
- Renumber matching cat levels to match the Train levels; this might make
"holes" in the Test set cat levels, if some are not in the Test set.
- Extra Test levels are renumbered past the end of the Train set, hence
the train and test levels match up to all the train levels; there might
be extra Test levels past that.
- For all mis-matched levels, issue a warning.
The test
frame is updated in-place to be compatible, by altering
the names and Vecs; make a defensive copy if you do not want it modified.
There is a fast-path cutout if the test set is already compatible. Since
the test-set is conditionally modifed with extra CategoricalWrappedVec optionally
added it is recommended to use a Scope enter/exit to track Vec lifetimes.test
- Testing Frame, updated in-placeexpensive
- Try hard to adapt; this might involve the creation of
whole Vecs and thus get expensive. If false
, then only adapt if
no warnings and errors; otherwise just the messages are produced.
Created Vecs have to be deleted by the caller (e.g. Scope.enter/exit).IllegalArgumentException
if no columns are in common, or
if any factor column has no levels in common.public static java.lang.String[] adaptTestForTrain(Frame test, java.lang.String[] origNames, java.lang.String[][] origDomains, java.lang.String[] names, java.lang.String[][] domains, Model.Parameters parms, boolean expensive, boolean computeMetrics, java.lang.String[] interactions, ToEigenVec tev, IcedHashMap<Key,java.lang.String> toDelete, boolean catEncoded) throws java.lang.IllegalArgumentException
test
- Frame to be adaptedorigNames
- Training column names before categorical column encoding - can be the same as namesorigDomains
- Training column levels before categorical column encoding - can be the same as domainsnames
- Training column namesdomains
- Training column levelsparms
- Model parametersexpensive
- Whether to actually do the hard workcomputeMetrics
- Whether metrics can be (and should be) computedinteractions
- Column names to create pairwise interactions withcatEncoded
- Whether the categorical columns of the test frame were already transformed via categorical_encodingjava.lang.IllegalArgumentException
public Frame score(Frame fr) throws java.lang.IllegalArgumentException
fr
- frame which should be scoredjava.lang.IllegalArgumentException
score(Frame, String)
public Frame score(Frame fr, java.lang.String destination_key) throws java.lang.IllegalArgumentException
fr
, producing a Frame result; the 1st
Vec is the predicted class, the remaining Vecs are the probability
distributions. For Regression (single-class) models, the 1st and only
Vec is the prediction value. The result is in the DKV; caller is
responsible for deleting.fr
- frame which should be scoredjava.lang.IllegalArgumentException
public Frame score(Frame fr, java.lang.String destination_key, Job j) throws java.lang.IllegalArgumentException
java.lang.IllegalArgumentException
public Frame score(Frame fr, java.lang.String destination_key, Job j, boolean computeMetrics) throws java.lang.IllegalArgumentException
java.lang.IllegalArgumentException
public Frame computeDeviances(Frame valid, Frame predictions, java.lang.String outputName)
valid
- Validation Frame (must contain the response)predictions
- Predictions made by the modeloutputName
- Name of the output frameprotected java.lang.String[] makeScoringNames()
protected Model.BigScore makeBigScoreTask(java.lang.String[][] domains, java.lang.String[] names, Frame adaptFrm, boolean computeMetrics, boolean makePrediction, Job j)
protected Frame predictScoreImpl(Frame fr, Frame adaptFrm, java.lang.String destination_key, Job j, boolean computeMetrics)
adaptFrm
- Already adapted framecomputeMetrics
- protected ModelMetrics.MetricBuilder scoreMetrics(Frame adaptFrm)
adaptFrm
- Already adapted frameprotected void setupBigScorePredict()
protected void closeBigScorePredict()
protected double data(Chunk[] chks, int row, int col)
public double[] score0(Chunk[] chks, int row_in_chunk, double[] tmp, double[] preds)
public double[] score0(Chunk[] chks, double offset, int row_in_chunk, double[] tmp, double[] preds)
protected abstract double[] score0(double[] data, double[] preds)
protected double[] score0(double[] data, double[] preds, double offset)
public double score(double[] data)
protected AutoBuffer writeAll_impl(AutoBuffer ab)
protected Keyed readAll_impl(AutoBuffer ab, Futures fs)
protected long checksum_impl()
Keyed
public ModelMojoWriter getMojo()
public final java.lang.String toJava(boolean preview, boolean verboseCode)
class UUIDxxxxModel { public static final String NAMES[] = { ....column names... } public static final String DOMAINS[][] = { ....domain names... } // Pass in data in a double[], pre-aligned to the Model's requirements. // Jam predictions into the preds[] array; preds[0] is reserved for the // main prediction (class for classifiers or value for regression), // and remaining columns hold a probability distribution for classifiers. double[] predict( double data[], double preds[] ); double[] map( HashMap < String,Double > row, double data[] ); // Does the mapping lookup for every row, no allocation double[] predict( HashMap < String,Double > row, double data[], double preds[] ); // Allocates a double[] for every row double[] predict( HashMap < String,Double > row, double preds[] ); // Allocates a double[] and a double[] for every row double[] predict( HashMap < String,Double > row ); }
public final SBPrintStream toJava(java.io.OutputStream os, boolean preview, boolean verboseCode)
protected SBPrintStream toJava(SBPrintStream sb, boolean isGeneratingPreview, boolean verboseCode)
protected SBPrintStream toJavaSuper(java.lang.String modelName, SBPrintStream sb)
protected SBPrintStream toJavaNCLASSES(SBPrintStream sb)
protected SBPrintStream toJavaPROB(SBPrintStream sb)
protected boolean toJavaCheckTooBig()
protected SBPrintStream toJavaInit(SBPrintStream sb, CodeGeneratorPipeline fileContext)
protected void toJavaPredictBody(SBPrintStream body, CodeGeneratorPipeline classCtx, CodeGeneratorPipeline fileCtx, boolean verboseCode)
public boolean testJavaScoring(Frame data, Frame model_predictions, double rel_epsilon)
public boolean testJavaScoring(Frame data, Frame model_predictions, double rel_epsilon, double abs_epsilon)
public boolean testJavaScoring(Frame data, Frame model_predictions, double rel_epsilon, double abs_epsilon, double fraction)
public void deleteCrossValidationModels()
public java.lang.String toString()
toString
in class java.lang.Object
public java.lang.Class<KeyV3.ModelKeyV3> makeSchema()
public static Frame makeInteractions(Frame fr, boolean valid, Model.InteractionPair[] interactions, boolean useAllFactorLevels, boolean skipMissing, boolean standardize)
public static InteractionWrappedVec[] makeInteractions(Frame fr, Model.InteractionPair[] interactions, boolean useAllFactorLevels, boolean skipMissing, boolean standardize)
public static InteractionWrappedVec makeInteraction(Frame fr, Model.InteractionPair ip, boolean useAllFactorLevels, boolean skipMissing, boolean standardize)