public class DataInfo
extends water.Keyed
Modifier and Type | Class and Description |
---|---|
class |
DataInfo.Row |
static class |
DataInfo.TransformType |
Modifier and Type | Field and Description |
---|---|
int[] |
_activeCols |
water.fvec.Frame |
_adaptedFrame |
int |
_bins |
int[] |
_catMissing |
int[] |
_catOffsets |
int |
_cats |
boolean |
_intercept |
double[] |
_normMul |
double[] |
_normRespMul |
double[] |
_normRespSub |
double[] |
_normSub |
int |
_nums |
boolean |
_offset |
int[] |
_permutation |
DataInfo.TransformType |
_predictor_transform |
DataInfo.TransformType |
_response_transform |
int |
_responses |
boolean |
_skipMissing |
boolean |
_useAllFactorLevels |
boolean |
_valid |
boolean |
_weights |
Constructor and Description |
---|
DataInfo(water.Key selfKey,
water.fvec.Frame train,
water.fvec.Frame valid,
int nResponses,
boolean useAllFactorLevels,
DataInfo.TransformType predictor_transform,
DataInfo.TransformType response_transform,
boolean skipMissing,
boolean missingBucket,
boolean weight,
boolean offset) |
DataInfo(water.Key selfKey,
water.fvec.Frame train,
water.fvec.Frame valid,
int nResponses,
boolean useAllFactorLevels,
DataInfo.TransformType predictor_transform,
DataInfo.TransformType response_transform,
boolean skipMissing,
boolean missingBucket,
boolean weight,
boolean offset,
boolean intercept) |
Modifier and Type | Method and Description |
---|---|
protected long |
checksum_impl() |
java.lang.String[] |
coefNames() |
double |
computeSparseOffset(double[] coefficients) |
DataInfo |
deep_clone() |
double[] |
denormalizeBeta(double[] beta) |
DataInfo.Row |
extractDenseRow(water.fvec.Chunk[] chunks,
int rid,
DataInfo.Row row) |
DataInfo.Row[] |
extractSparseRows(water.fvec.Chunk[] chunks,
double offset)
Extract (sparse) rows from given chunks.
|
DataInfo |
filterExpandedColumns(int[] cols) |
int |
fullN() |
int |
getCategoricalId(int cid,
int val) |
int |
largestCat() |
static DataInfo |
makeEmpty(int fullN) |
int[] |
mapNames(java.lang.String[] names) |
DataInfo.Row |
newDenseRow() |
DataInfo.Row |
newDenseRow(double[] numVals) |
int |
numStart() |
int |
offsetChunkId() |
int |
responseChunkId() |
void |
setPredictorTransform(DataInfo.TransformType t) |
void |
setResponseTransform(DataInfo.TransformType t) |
void |
unScaleNumericals(float[] in,
float[] out)
Undo the standardization/normalization of numerical columns
|
void |
updateWeightedSigmaAndMean(double[] sigmas,
double[] mean) |
DataInfo |
validDinfo(water.fvec.Frame valid) |
int |
weightChunkId() |
checksum, getBinarySerializer, getPublishedKeys, remove_impl, remove, remove, remove, remove
public int[] _activeCols
public water.fvec.Frame _adaptedFrame
public int _responses
public DataInfo.TransformType _predictor_transform
public DataInfo.TransformType _response_transform
public boolean _useAllFactorLevels
public int _nums
public int _bins
public int _cats
public int[] _catOffsets
public int[] _catMissing
public int[] _permutation
public double[] _normMul
public double[] _normSub
public double[] _normRespMul
public double[] _normRespSub
public boolean _intercept
public final boolean _offset
public final boolean _weights
public final boolean _skipMissing
public boolean _valid
public DataInfo(water.Key selfKey, water.fvec.Frame train, water.fvec.Frame valid, int nResponses, boolean useAllFactorLevels, DataInfo.TransformType predictor_transform, DataInfo.TransformType response_transform, boolean skipMissing, boolean missingBucket, boolean weight, boolean offset)
public DataInfo(water.Key selfKey, water.fvec.Frame train, water.fvec.Frame valid, int nResponses, boolean useAllFactorLevels, DataInfo.TransformType predictor_transform, DataInfo.TransformType response_transform, boolean skipMissing, boolean missingBucket, boolean weight, boolean offset, boolean intercept)
public int responseChunkId()
public int offsetChunkId()
public int weightChunkId()
protected long checksum_impl()
checksum_impl
in class water.Keyed
public DataInfo deep_clone()
public static DataInfo makeEmpty(int fullN)
public DataInfo validDinfo(water.fvec.Frame valid)
public double[] denormalizeBeta(double[] beta)
public DataInfo filterExpandedColumns(int[] cols)
public void updateWeightedSigmaAndMean(double[] sigmas, double[] mean)
public void setPredictorTransform(DataInfo.TransformType t)
public void setResponseTransform(DataInfo.TransformType t)
public final int fullN()
public final int largestCat()
public final int numStart()
public final java.lang.String[] coefNames()
public int[] mapNames(java.lang.String[] names)
public final void unScaleNumericals(float[] in, float[] out)
in
- input valuesout
- output values (can be the same as input)public final int getCategoricalId(int cid, int val)
public final DataInfo.Row extractDenseRow(water.fvec.Chunk[] chunks, int rid, DataInfo.Row row)
public DataInfo.Row newDenseRow()
public DataInfo.Row newDenseRow(double[] numVals)
public double computeSparseOffset(double[] coefficients)
public final DataInfo.Row[] extractSparseRows(water.fvec.Chunk[] chunks, double offset)
chunks
- - chunk of datasetoffset
- - adjustment for 0s if running with on-the-fly standardization (i.e. zeros are not really zeros because of centering)