public class DataInfo extends water.Keyed<DataInfo>
Modifier and Type | Class and Description |
---|---|
class |
DataInfo.Row |
class |
DataInfo.Rows |
static class |
DataInfo.TransformType |
Modifier and Type | Field and Description |
---|---|
int[] |
_activeCols |
water.fvec.Frame |
_adaptedFrame |
int[][] |
_catLvls |
boolean[] |
_catMissing |
int[] |
_catOffsets |
int |
_cats |
java.lang.String[] |
_coefNames |
boolean |
_fold |
boolean |
_imputeMissing |
java.lang.String[] |
_interactionColumns |
hex.Model.InteractionPair[] |
_interactions |
int[] |
_interactionVecs |
boolean |
_intercept |
int[][] |
_intLvls |
double[] |
_normMul |
double[] |
_normRespMul |
double[] |
_normRespSub |
double[] |
_normSub |
double[] |
_numMeans |
int[] |
_numOffsets |
int |
_nums |
boolean |
_offset |
int |
_outpus |
int[] |
_permutation |
DataInfo.TransformType |
_predictor_transform |
DataInfo.TransformType |
_response_transform |
int |
_responses |
boolean |
_skipMissing |
boolean |
_useAllFactorLevels |
boolean |
_valid |
boolean |
_weights |
Constructor and Description |
---|
DataInfo(water.fvec.Frame train,
water.fvec.Frame valid,
boolean useAllFactorLevels,
DataInfo.TransformType predictor_transform,
boolean skipMissing,
boolean imputeMissing,
boolean missingBucket) |
DataInfo(water.fvec.Frame train,
water.fvec.Frame valid,
int nResponses,
boolean useAllFactorLevels,
DataInfo.TransformType predictor_transform,
DataInfo.TransformType response_transform,
boolean skipMissing,
boolean imputeMissing,
boolean missingBucket,
boolean weight,
boolean offset,
boolean fold) |
DataInfo(water.fvec.Frame train,
water.fvec.Frame valid,
int nResponses,
boolean useAllFactorLevels,
DataInfo.TransformType predictor_transform,
DataInfo.TransformType response_transform,
boolean skipMissing,
boolean imputeMissing,
boolean missingBucket,
boolean weight,
boolean offset,
boolean fold,
boolean intercept) |
DataInfo(water.fvec.Frame train,
water.fvec.Frame valid,
int nResponses,
boolean useAllFactorLevels,
DataInfo.TransformType predictor_transform,
DataInfo.TransformType response_transform,
boolean skipMissing,
boolean imputeMissing,
boolean missingBucket,
boolean weight,
boolean offset,
boolean fold,
java.lang.String[] interactions)
The train/valid Frame instances are sorted by categorical (themselves sorted by
cardinality greatest to least) with all numerical columns following.
|
Modifier and Type | Method and Description |
---|---|
int[] |
activeCols() |
void |
addOutput(java.lang.String name,
water.fvec.Vec v) |
void |
addResponse(java.lang.String[] names,
water.fvec.Vec[] vecs) |
int[] |
catNAFill() |
int |
catNAFill(int cid) |
protected long |
checksum_impl() |
java.lang.String[] |
coefNames() |
double[] |
denormalizeBeta(double[] beta) |
void |
dropInteractions() |
void |
dropWeights() |
DataInfo.Row |
extractDenseRow(water.fvec.Chunk[] chunks,
int rid,
DataInfo.Row row) |
DataInfo.Row[] |
extractSparseRows(water.fvec.Chunk[] chunks)
Extract (sparse) rows from given chunks.
|
DataInfo |
filterExpandedColumns(int[] cols)
Filter the _adaptedFrame so that it contains only the Vecs referenced by the cols
parameter.
|
int |
foldChunkId() |
protected int[] |
fullCatOffsets() |
int |
fullN()
Get the fully expanded number of predictor columns.
|
int |
getCategoricalId(int cid,
double val) |
int |
getCategoricalId(int cid,
int val)
Get the offset into the expanded categorical
|
int |
getCategoricalIdFromInteraction(int cid,
int val) |
int |
getInteractionOffset(water.fvec.Chunk[] chunks,
int cid,
int rid) |
water.fvec.Vec |
getOffsetVec() |
water.fvec.Vec |
getOutputVec(int i) |
water.fvec.Vec |
getWeightsVec() |
static int |
imputeCat(water.fvec.Vec v) |
static int |
imputeCat(water.fvec.Vec v,
boolean useAllFactorLevels) |
boolean |
isInteractionVec(int colid) |
int |
largestCat() |
int[] |
mapNames(java.lang.String[] names) |
DataInfo.Row |
newDenseRow() |
DataInfo.Row |
newDenseRow(double[] numVals,
long start) |
int |
nextNumericIdx(int currentColIdx)
Get the next expanded number-column index.
|
int |
numCats() |
int |
numNums() |
int |
numStart() |
int |
offsetChunkId() |
int |
outputChunkId() |
int |
outputChunkId(int n) |
int |
responseChunkId(int n) |
DataInfo.Rows |
rows(water.fvec.Chunk[] chks) |
DataInfo.Rows |
rows(water.fvec.Chunk[] chks,
boolean sparse) |
DataInfo |
scoringInfo(java.lang.String[] names,
water.fvec.Frame adaptFrame) |
void |
setCatNAFill(int[] catNAFill) |
void |
setPredictorTransform(DataInfo.TransformType t) |
void |
setResponse(java.lang.String name,
water.fvec.Vec v) |
void |
setResponse(java.lang.String name,
water.fvec.Vec v,
int n) |
void |
setResponseTransform(DataInfo.TransformType t) |
water.fvec.Vec |
setWeights(java.lang.String name,
water.fvec.Vec vec) |
void |
unScaleNumericals(double[] in,
double[] out)
Undo the standardization/normalization of numerical columns
|
void |
updateWeightedSigmaAndMean(double[] sigmas,
double[] mean) |
void |
updateWeightedSigmaAndMeanForResponse(double[] sigmas,
double[] mean) |
DataInfo |
validDinfo(water.fvec.Frame valid) |
int |
weightChunkId() |
checksum, makeSchema, readAll_impl, readAll, remove_impl, remove, remove, remove, remove, writeAll_impl, writeAll
public int[] _activeCols
public water.fvec.Frame _adaptedFrame
public int _responses
public int _outpus
public DataInfo.TransformType _predictor_transform
public DataInfo.TransformType _response_transform
public boolean _useAllFactorLevels
public int _nums
public int _cats
public int[] _catOffsets
public boolean[] _catMissing
public int[] _permutation
public double[] _normMul
public double[] _normSub
public double[] _normRespMul
public double[] _normRespSub
public double[] _numMeans
public boolean _intercept
public boolean _offset
public boolean _weights
public boolean _fold
public hex.Model.InteractionPair[] _interactions
public java.lang.String[] _interactionColumns
public int[] _interactionVecs
public int[] _numOffsets
public final boolean _skipMissing
public final boolean _imputeMissing
public boolean _valid
public final int[][] _catLvls
public final int[][] _intLvls
public java.lang.String[] _coefNames
public DataInfo(water.fvec.Frame train, water.fvec.Frame valid, boolean useAllFactorLevels, DataInfo.TransformType predictor_transform, boolean skipMissing, boolean imputeMissing, boolean missingBucket)
public DataInfo(water.fvec.Frame train, water.fvec.Frame valid, int nResponses, boolean useAllFactorLevels, DataInfo.TransformType predictor_transform, DataInfo.TransformType response_transform, boolean skipMissing, boolean imputeMissing, boolean missingBucket, boolean weight, boolean offset, boolean fold)
public DataInfo(water.fvec.Frame train, water.fvec.Frame valid, int nResponses, boolean useAllFactorLevels, DataInfo.TransformType predictor_transform, DataInfo.TransformType response_transform, boolean skipMissing, boolean imputeMissing, boolean missingBucket, boolean weight, boolean offset, boolean fold, java.lang.String[] interactions)
public DataInfo(water.fvec.Frame train, water.fvec.Frame valid, int nResponses, boolean useAllFactorLevels, DataInfo.TransformType predictor_transform, DataInfo.TransformType response_transform, boolean skipMissing, boolean imputeMissing, boolean missingBucket, boolean weight, boolean offset, boolean fold, boolean intercept)
public water.fvec.Vec setWeights(java.lang.String name, water.fvec.Vec vec)
public void dropWeights()
public void dropInteractions()
public int[] activeCols()
public void addResponse(java.lang.String[] names, water.fvec.Vec[] vecs)
public int[] catNAFill()
public int catNAFill(int cid)
public void setCatNAFill(int[] catNAFill)
public int responseChunkId(int n)
public int foldChunkId()
public int offsetChunkId()
public int weightChunkId()
public int outputChunkId()
public int outputChunkId(int n)
public void addOutput(java.lang.String name, water.fvec.Vec v)
public water.fvec.Vec getOutputVec(int i)
public void setResponse(java.lang.String name, water.fvec.Vec v)
public void setResponse(java.lang.String name, water.fvec.Vec v, int n)
protected long checksum_impl()
checksum_impl
in class water.Keyed<DataInfo>
public DataInfo validDinfo(water.fvec.Frame valid)
public double[] denormalizeBeta(double[] beta)
protected int[] fullCatOffsets()
public static int imputeCat(water.fvec.Vec v)
public static int imputeCat(water.fvec.Vec v, boolean useAllFactorLevels)
public DataInfo filterExpandedColumns(int[] cols)
cols
- Array of the expanded column indices to keep.public void updateWeightedSigmaAndMean(double[] sigmas, double[] mean)
public void updateWeightedSigmaAndMeanForResponse(double[] sigmas, double[] mean)
public void setPredictorTransform(DataInfo.TransformType t)
public void setResponseTransform(DataInfo.TransformType t)
public boolean isInteractionVec(int colid)
public final int fullN()
public final int largestCat()
public final int numStart()
public final int numCats()
public final int numNums()
public final int nextNumericIdx(int currentColIdx)
public final java.lang.String[] coefNames()
public int[] mapNames(java.lang.String[] names)
public final void unScaleNumericals(double[] in, double[] out)
in
- input valuesout
- output values (can be the same as input)public final int getCategoricalId(int cid, double val)
public final int getCategoricalId(int cid, int val)
cid
- the column idval
- the integer representation of the categorical levelpublic final int getCategoricalIdFromInteraction(int cid, int val)
public final DataInfo.Row extractDenseRow(water.fvec.Chunk[] chunks, int rid, DataInfo.Row row)
public int getInteractionOffset(water.fvec.Chunk[] chunks, int cid, int rid)
public water.fvec.Vec getWeightsVec()
public water.fvec.Vec getOffsetVec()
public DataInfo.Row newDenseRow()
public DataInfo.Row newDenseRow(double[] numVals, long start)
public DataInfo.Rows rows(water.fvec.Chunk[] chks)
public DataInfo.Rows rows(water.fvec.Chunk[] chks, boolean sparse)
public final DataInfo.Row[] extractSparseRows(water.fvec.Chunk[] chunks)
chunks
- - chunk of datasetpublic DataInfo scoringInfo(java.lang.String[] names, water.fvec.Frame adaptFrame)