|
|||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | ||||||||
java.lang.Objectjsr166y.ForkJoinTask<java.lang.Void>
jsr166y.CountedCompleter
water.H2O.H2OCountedCompleter
water.DTask<T>
water.DRemoteTask<T>
water.MRTask<DParseTask>
water.parser.DParseTask
public class DParseTask
Class responsible for actual parsing of the datasets. Works in two phases, first phase collects basic statistics and determines the column encodings of the dataset. Second phase the goes over all data again, encodes them and writes them to the result VA. The parser works distributed for CSV parsing, but is one node only for the XLS and XLSX formats (they are not fully our code).
| Nested Class Summary | |
|---|---|
static class |
DParseTask.AtomicUnion
|
| Field Summary | |
|---|---|
protected boolean |
_map
|
static long[] |
powers10i
|
| Fields inherited from class water.MRTask |
|---|
_hi, _lo |
| Fields inherited from class water.DRemoteTask |
|---|
_fs, _is_local, _keys |
| Fields inherited from class water.DTask |
|---|
_cls, _eFromNode, _exception, _fname, _lineNum, _msg, _mth |
| Constructor Summary | |
|---|---|
DParseTask()
|
|
| Method Summary | |
|---|---|
void |
addColumns(int ncols)
|
void |
addInvalidCol(int colIdx)
Adds invalid value to the column. |
void |
addNumCol(int colIdx,
double value)
Adds double value to the column. |
void |
addNumCol(int colIdx,
long number,
int exp)
|
void |
addStrCol(int colIdx,
ValueString str)
Adds string (enum) value to the column. |
DParseTask |
clone2()
|
protected void |
createEnums()
|
DParseTask |
createPassOne(Value dataset,
ParseDataset job,
CustomParser parser)
Creates a phase one dparse task. |
DParseTask |
createPassTwo()
Creates the second pass dparse task from a first phase one. |
protected water.parser.DParseTask.OutputStreamRecord[] |
createRecords(long firstRow,
int rowsToParse)
Returns the list of streams that should be used to store the given rows. |
protected void |
createValueArrayHeader()
Creates the value header based on the calculated columns. |
static boolean |
fitsIntoInt(double d)
|
void |
init()
|
void |
invalidLine(java.lang.String err)
|
void |
invalidValue(int line,
int col)
|
boolean |
isString(int idx)
|
protected DParseTask |
makePhase2Clone(ParseDataset.FileInfo finfo)
|
void |
map(Key key)
Map function for distributed parsing of the CSV files. |
long |
memOverheadPerChunk()
|
void |
newLine()
Advances to new line. |
void |
normalizeSigma()
|
void |
passOne()
Executes the phase one of the parser. |
void |
passTwo()
Executes the phase two of the parser task. |
void |
phaseOneInitialize()
Initialize phase one data structures with the appropriate number of columns. |
void |
phaseTwoInitialize()
Initializes the phase two data. |
static double |
pow10(int exp)
|
static long |
pow10i(int exp)
|
void |
reduce(DParseTask dpt)
|
void |
rollbackLine()
Rolls back parsed line. |
void |
setColumnNames(java.lang.String[] colNames)
Sets the column names and creates the array of the enums for each column. |
| Methods inherited from class water.MRTask |
|---|
hi, lcompute, lo, lonCompletion, onExceptionalCompletion |
| Methods inherited from class water.DRemoteTask |
|---|
alsoBlockFor, alsoBlockFor, clone, compute2, dfork, getFutures, invoke, invokeOnAllNodes, keys, onCompletion, reduceAlsoBlock |
| Methods inherited from class water.DTask |
|---|
copyOver, dinvoke, frozenType, getDException, hasException, logVerbose, newInstance, onAck, onAckAck, read, setException, toDocField, write, writeJSONFields |
| Methods inherited from class water.H2O.H2OCountedCompleter |
|---|
compute, priority |
| Methods inherited from class jsr166y.CountedCompleter |
|---|
addToPendingCount, compareAndSetPendingCount, complete, exec, getCompleter, getPendingCount, getRawResult, setCompleter, setPendingCount, setRawResult, tryComplete |
| Methods inherited from class jsr166y.ForkJoinTask |
|---|
adapt, adapt, adapt, cancel, compareAndSetForkJoinTaskTag, completeExceptionally, fork, get, get, getException, getForkJoinTaskTag, getPool, getQueuedTaskCount, getSurplusQueuedTaskCount, helpQuiesce, inForkJoinPool, invoke, invokeAll, invokeAll, invokeAll, isCancelled, isCompletedAbnormally, isCompletedNormally, isDone, join, peekNextLocalTask, pollNextLocalTask, pollTask, quietlyComplete, quietlyInvoke, quietlyJoin, reinitialize, setForkJoinTaskTag, tryUnfork |
| Methods inherited from class java.lang.Object |
|---|
equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Methods inherited from interface water.Freezable |
|---|
frozenType, newInstance, read, toDocField, write, writeJSONFields |
| Field Detail |
|---|
protected boolean _map
public static long[] powers10i
| Constructor Detail |
|---|
public DParseTask()
| Method Detail |
|---|
public void addColumns(int ncols)
public long memOverheadPerChunk()
memOverheadPerChunk in class MRTask<DParseTask>public DParseTask clone2()
protected water.parser.DParseTask.OutputStreamRecord[] createRecords(long firstRow,
int rowsToParse)
public boolean isString(int idx)
isString in interface CustomParser.DataOutprotected DParseTask makePhase2Clone(ParseDataset.FileInfo finfo)
public DParseTask createPassOne(Value dataset,
ParseDataset job,
CustomParser parser)
dataset - Dataset to parse.resultKey - VA to store results to.parserType - Parser type to use.
public void passOne()
java.lang.Exceptionpublic DParseTask createPassTwo()
public void passTwo()
public void normalizeSigma()
protected void createValueArrayHeader()
protected void createEnums()
public void init()
init in class MRTask<DParseTask>public void setColumnNames(java.lang.String[] colNames)
setColumnNames in interface CustomParser.DataOutpublic void phaseOneInitialize()
public void phaseTwoInitialize()
public void map(Key key)
map in class MRTask<DParseTask>public void reduce(DParseTask dpt)
reduce in class DRemoteTask<DParseTask>public static double pow10(int exp)
public static long pow10i(int exp)
public static final boolean fitsIntoInt(double d)
public void newLine()
newLine in interface CustomParser.DataOutpublic void rollbackLine()
rollbackLine in interface CustomParser.DataOut
public void addNumCol(int colIdx,
double value)
addNumCol in interface CustomParser.DataOutcolIdx - value - public void addInvalidCol(int colIdx)
addInvalidCol in interface CustomParser.DataOut
public void addStrCol(int colIdx,
ValueString str)
addStrCol in interface CustomParser.DataOut
public void addNumCol(int colIdx,
long number,
int exp)
addNumCol in interface CustomParser.DataOutpublic void invalidLine(java.lang.String err)
invalidLine in interface CustomParser.DataOut
public void invalidValue(int line,
int col)
invalidValue in interface CustomParser.DataOut
|
|||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | ||||||||