public class DParseTask extends MRTask<DParseTask> implements CustomParser.DataOut
Modifier and Type | Class and Description |
---|---|
static class |
DParseTask.AtomicUnion |
Modifier and Type | Field and Description |
---|---|
protected boolean |
_map |
static long[] |
powers10i |
_hi, _lo, _runSingleThreaded
_fs, _is_local, _keys, _top_level
_cls, _eFromNode, _exception, _fname, _lineNum, _msg, _mth
Constructor and Description |
---|
DParseTask() |
Modifier and Type | Method and Description |
---|---|
void |
addColumns(int ncols) |
void |
addInvalidCol(int colIdx)
Adds invalid value to the column.
|
void |
addNumCol(int colIdx,
double value)
Adds double value to the column.
|
void |
addNumCol(int colIdx,
long number,
int exp) |
void |
addStrCol(int colIdx,
ValueString str)
Adds string (enum) value to the column.
|
DParseTask |
clone2() |
protected void |
createEnums() |
DParseTask |
createPassOne(Value dataset,
ParseDataset job,
CustomParser parser)
Creates a phase one dparse task.
|
DParseTask |
createPassTwo()
Creates the second pass dparse task from a first phase one.
|
protected water.parser.DParseTask.OutputStreamRecord[] |
createRecords(long firstRow,
int rowsToParse)
Returns the list of streams that should be used to store the given rows.
|
protected void |
createValueArrayHeader()
Creates the value header based on the calculated columns.
|
static boolean |
fitsIntoInt(double d) |
void |
init() |
void |
invalidLine(java.lang.String err) |
void |
invalidValue(int line,
int col) |
boolean |
isString(int idx) |
protected DParseTask |
makePhase2Clone(ParseDataset.FileInfo finfo) |
void |
map(Key key)
Map function for distributed parsing of the CSV files.
|
long |
memOverheadPerChunk() |
void |
newLine()
Advances to new line.
|
void |
normalizeSigma() |
void |
passOne()
Executes the phase one of the parser.
|
void |
passTwo()
Executes the phase two of the parser task.
|
void |
phaseOneInitialize()
Initialize phase one data structures with the appropriate number of
columns.
|
void |
phaseTwoInitialize()
Initializes the phase two data.
|
static double |
pow10(int exp) |
static long |
pow10i(int exp) |
void |
reduce(DParseTask dpt) |
void |
rollbackLine()
Rolls back parsed line.
|
void |
setColumnNames(java.lang.String[] colNames)
Sets the column names and creates the array of the enums for each
column.
|
hi, lcompute, lo, lonCompletion, onExceptionalCompletion
alsoBlockFor, alsoBlockFor, block, clone, compute2, dfork, getFutures, invoke, invokeOnAllNodes, isReleasable, keys, onCompletion, postGlobal, reduceAlsoBlock
copyOver, dinvoke, frozenType, getDException, hasException, logVerbose, newInstance, onAck, onAckAck, read, setException, toDocField, write, writeJSONFields
compute, priority
addToPendingCount, compareAndSetPendingCount, complete, exec, getCompleter, getPendingCount, getRawResult, setCompleter, setPendingCount, setRawResult, tryComplete
adapt, adapt, adapt, cancel, compareAndSetForkJoinTaskTag, completeExceptionally, fork, get, get, getException, getForkJoinTaskTag, getPool, getQueuedTaskCount, getSurplusQueuedTaskCount, helpQuiesce, inForkJoinPool, invoke, invokeAll, invokeAll, invokeAll, isCancelled, isCompletedAbnormally, isCompletedNormally, isDone, join, peekNextLocalTask, pollNextLocalTask, pollTask, quietlyComplete, quietlyInvoke, quietlyJoin, reinitialize, setForkJoinTaskTag, tryUnfork
equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
frozenType, newInstance, read, toDocField, write, writeJSONFields
public void addColumns(int ncols)
public long memOverheadPerChunk()
memOverheadPerChunk
in class MRTask<DParseTask>
public DParseTask clone2()
protected water.parser.DParseTask.OutputStreamRecord[] createRecords(long firstRow, int rowsToParse)
public boolean isString(int idx)
isString
in interface CustomParser.DataOut
protected DParseTask makePhase2Clone(ParseDataset.FileInfo finfo)
public DParseTask createPassOne(Value dataset, ParseDataset job, CustomParser parser)
dataset
- Dataset to parse.public void passOne()
public DParseTask createPassTwo()
public void passTwo()
public void normalizeSigma()
protected void createValueArrayHeader()
protected void createEnums()
public void init()
init
in class MRTask<DParseTask>
public void setColumnNames(java.lang.String[] colNames)
setColumnNames
in interface CustomParser.DataOut
public void phaseOneInitialize()
public void phaseTwoInitialize()
public void map(Key key)
map
in class MRTask<DParseTask>
public void reduce(DParseTask dpt)
reduce
in class DRemoteTask<DParseTask>
public static double pow10(int exp)
public static long pow10i(int exp)
public static final boolean fitsIntoInt(double d)
public void newLine()
newLine
in interface CustomParser.DataOut
public void rollbackLine()
rollbackLine
in interface CustomParser.DataOut
public void addNumCol(int colIdx, double value)
addNumCol
in interface CustomParser.DataOut
colIdx
- value
- public void addInvalidCol(int colIdx)
addInvalidCol
in interface CustomParser.DataOut
public void addStrCol(int colIdx, ValueString str)
addStrCol
in interface CustomParser.DataOut
public void addNumCol(int colIdx, long number, int exp)
addNumCol
in interface CustomParser.DataOut
public void invalidLine(java.lang.String err)
invalidLine
in interface CustomParser.DataOut
public void invalidValue(int line, int col)
invalidValue
in interface CustomParser.DataOut