public class DataInfo
extends water.Keyed
| Modifier and Type | Class and Description |
|---|---|
class |
DataInfo.Row |
static class |
DataInfo.TransformType |
| Modifier and Type | Field and Description |
|---|---|
int[] |
_activeCols |
water.fvec.Frame |
_adaptedFrame |
int |
_bins |
int[][] |
_catLvls |
int[] |
_catMissing |
int[] |
_catModes |
int[] |
_catOffsets |
int |
_cats |
boolean |
_fold |
boolean |
_imputeMissing |
boolean |
_intercept |
double[] |
_normMul |
double[] |
_normRespMul |
double[] |
_normRespSub |
double[] |
_normSub |
double[] |
_numMeans |
int |
_nums |
boolean |
_offset |
int[] |
_permutation |
DataInfo.TransformType |
_predictor_transform |
DataInfo.TransformType |
_response_transform |
int |
_responses |
boolean |
_skipMissing |
boolean |
_useAllFactorLevels |
boolean |
_valid |
boolean |
_weights |
| Constructor and Description |
|---|
DataInfo(water.Key selfKey,
water.fvec.Frame train,
water.fvec.Frame valid,
boolean useAllFactorLevels,
DataInfo.TransformType predictor_transform,
boolean skipMissing,
boolean imputeMissing,
boolean missingBucket) |
DataInfo(water.Key selfKey,
water.fvec.Frame train,
water.fvec.Frame valid,
int nResponses,
boolean useAllFactorLevels,
DataInfo.TransformType predictor_transform,
DataInfo.TransformType response_transform,
boolean skipMissing,
boolean imputeMissing,
boolean missingBucket,
boolean weight,
boolean offset,
boolean fold) |
DataInfo(water.Key selfKey,
water.fvec.Frame train,
water.fvec.Frame valid,
int nResponses,
boolean useAllFactorLevels,
DataInfo.TransformType predictor_transform,
DataInfo.TransformType response_transform,
boolean skipMissing,
boolean imputeMissing,
boolean missingBucket,
boolean weight,
boolean offset,
boolean fold,
boolean intercept) |
| Modifier and Type | Method and Description |
|---|---|
protected long |
checksum_impl() |
java.lang.String[] |
coefNames() |
double |
computeSparseOffset(double[] coefficients) |
DataInfo |
deep_clone() |
double[] |
denormalizeBeta(double[] beta) |
DataInfo.Row |
extractDenseRow(water.fvec.Chunk[] chunks,
int rid,
DataInfo.Row row) |
DataInfo.Row[] |
extractSparseRows(water.fvec.Chunk[] chunks,
double offset)
Extract (sparse) rows from given chunks.
|
DataInfo |
filterExpandedColumns(int[] cols) |
int |
foldChunkId() |
int |
fullN() |
int |
getCategoricalId(int cid,
int val) |
static int |
imputeCat(water.fvec.Vec v) |
int |
largestCat() |
static DataInfo |
makeEmpty(int fullN) |
int[] |
mapNames(java.lang.String[] names) |
DataInfo.Row |
newDenseRow() |
DataInfo.Row |
newDenseRow(double[] numVals) |
int |
numStart() |
int |
offsetChunkId() |
int |
responseChunkId() |
void |
setPredictorTransform(DataInfo.TransformType t) |
void |
setResponseTransform(DataInfo.TransformType t) |
void |
unScaleNumericals(double[] in,
double[] out)
Undo the standardization/normalization of numerical columns
|
void |
unScaleResponses(double[] in,
double[] out)
Undo the standardization/normalization of numerical columns
|
void |
updateWeightedSigmaAndMean(double[] sigmas,
double[] mean) |
DataInfo |
validDinfo(water.fvec.Frame valid) |
int |
weightChunkId() |
checksum, getBinarySerializer, getPublishedKeys, remove_impl, remove, remove, remove, removepublic int[] _activeCols
public water.fvec.Frame _adaptedFrame
public int _responses
public DataInfo.TransformType _predictor_transform
public DataInfo.TransformType _response_transform
public boolean _useAllFactorLevels
public int _nums
public int _bins
public int _cats
public int[] _catOffsets
public int[] _catMissing
public int[] _catModes
public int[] _permutation
public double[] _normMul
public double[] _normSub
public double[] _normRespMul
public double[] _normRespSub
public double[] _numMeans
public boolean _intercept
public final boolean _offset
public final boolean _weights
public final boolean _fold
public final boolean _skipMissing
public final boolean _imputeMissing
public boolean _valid
public final int[][] _catLvls
public DataInfo(water.Key selfKey,
water.fvec.Frame train,
water.fvec.Frame valid,
boolean useAllFactorLevels,
DataInfo.TransformType predictor_transform,
boolean skipMissing,
boolean imputeMissing,
boolean missingBucket)
public DataInfo(water.Key selfKey,
water.fvec.Frame train,
water.fvec.Frame valid,
int nResponses,
boolean useAllFactorLevels,
DataInfo.TransformType predictor_transform,
DataInfo.TransformType response_transform,
boolean skipMissing,
boolean imputeMissing,
boolean missingBucket,
boolean weight,
boolean offset,
boolean fold)
public DataInfo(water.Key selfKey,
water.fvec.Frame train,
water.fvec.Frame valid,
int nResponses,
boolean useAllFactorLevels,
DataInfo.TransformType predictor_transform,
DataInfo.TransformType response_transform,
boolean skipMissing,
boolean imputeMissing,
boolean missingBucket,
boolean weight,
boolean offset,
boolean fold,
boolean intercept)
public int responseChunkId()
public int foldChunkId()
public int offsetChunkId()
public int weightChunkId()
protected long checksum_impl()
checksum_impl in class water.Keyedpublic DataInfo deep_clone()
public static DataInfo makeEmpty(int fullN)
public DataInfo validDinfo(water.fvec.Frame valid)
public double[] denormalizeBeta(double[] beta)
public static int imputeCat(water.fvec.Vec v)
public DataInfo filterExpandedColumns(int[] cols)
public void updateWeightedSigmaAndMean(double[] sigmas,
double[] mean)
public void setPredictorTransform(DataInfo.TransformType t)
public void setResponseTransform(DataInfo.TransformType t)
public final int fullN()
public final int largestCat()
public final int numStart()
public final java.lang.String[] coefNames()
public int[] mapNames(java.lang.String[] names)
public final void unScaleNumericals(double[] in,
double[] out)
in - input valuesout - output values (can be the same as input)public final void unScaleResponses(double[] in,
double[] out)
in - input valuesout - output values (can be the same as input)public final int getCategoricalId(int cid,
int val)
public final DataInfo.Row extractDenseRow(water.fvec.Chunk[] chunks, int rid, DataInfo.Row row)
public DataInfo.Row newDenseRow()
public DataInfo.Row newDenseRow(double[] numVals)
public double computeSparseOffset(double[] coefficients)
public final DataInfo.Row[] extractSparseRows(water.fvec.Chunk[] chunks, double offset)
chunks - - chunk of datasetoffset - - adjustment for 0s if running with on-the-fly standardization (i.e. zeros are not really zeros because of centering)