public class ParseSetup extends Iced
Modifier and Type | Class and Description |
---|---|
static class |
ParseSetup.GuessSetupTsk
Try to determine the ParseSetup on a file by file basis
and merge results.
|
Modifier and Type | Field and Description |
---|---|
int |
_chunk_size |
ParseWriter.ParseErr[] |
_errs |
boolean |
disableParallelParse |
static int |
GUESS_COL_CNT |
static int |
GUESS_HEADER |
static byte |
GUESS_SEP |
static int |
HAS_HEADER |
static int |
NO_HEADER |
Constructor and Description |
---|
ParseSetup()
Create a default ParseSetup
Used by Ray's schema magic
|
ParseSetup(ParserInfo parseType,
byte sep,
boolean singleQuotes,
int checkHeader,
int ncols,
java.lang.String[][] data,
ParseWriter.ParseErr[] errs)
Create a ParseSetup without any column information
Typically used by file type parsers for returning final invalid results
|
ParseSetup(ParserInfo parseType,
byte sep,
boolean singleQuotes,
int checkHeader,
int ncols,
java.lang.String[] columnNames,
byte[] ctypes,
java.lang.String[][] domains,
java.lang.String[][] naStrings,
java.lang.String[][] data)
Create a ParseSetup with all parameters except chunk size.
|
ParseSetup(ParserInfo parseType,
byte sep,
boolean singleQuotes,
int checkHeader,
int ncols,
java.lang.String[] columnNames,
byte[] ctypes,
java.lang.String[][] domains,
java.lang.String[][] naStrings,
java.lang.String[][] data,
ParseWriter.ParseErr[] errs) |
ParseSetup(ParserInfo parse_type,
byte sep,
boolean singleQuotes,
int checkHeader,
int ncols,
java.lang.String[] columnNames,
byte[] ctypes,
java.lang.String[][] domains,
java.lang.String[][] naStrings,
java.lang.String[][] data,
ParseWriter.ParseErr[] errs,
int chunkSize) |
ParseSetup(ParseSetup ps) |
ParseSetup(ParseSetupV3 ps)
Create a ParseSetup with parameters from the client.
|
Modifier and Type | Method and Description |
---|---|
<T extends ParseSetup> |
copyTo(T setup)
Copies the common setup to another object (that is possibly and extension of the base setup).
|
static java.lang.String |
createHexName(java.lang.String n)
Cleans up the file name to make .hex name
to be used as a destination key.
|
java.lang.String[] |
getColumnNames() |
byte[] |
getColumnTypes() |
java.lang.String[] |
getColumnTypeStrings() |
java.lang.String[][] |
getData() |
ParseSetup |
getFinalSetup(Key[] inputKeys,
ParseSetup demandedSetup)
Return create a final parser-specific setup
for this configuration.
|
ParserInfo |
getParseType() |
static ParseSetup |
guessSetup(ByteVec bv,
byte[] bits,
ParserInfo parserType,
byte sep,
int ncols,
boolean singleQuotes,
int checkHeader,
java.lang.String[] columnNames,
byte[] columnTypes,
java.lang.String[][] domains,
java.lang.String[][] naStrings) |
static ParseSetup |
guessSetup(ByteVec bv,
byte[] bits,
ParseSetup userSetup)
Guess everything from a single pile-o-bits.
|
static ParseSetup |
guessSetup(Key[] fkeys,
boolean singleQuote,
int checkHeader)
Used by test harnesses for simple parsing of test data.
|
static ParseSetup |
guessSetup(Key[] fkeys,
ParseSetup userSetup)
Discover the parse setup needed to correctly parse all files.
|
protected boolean |
isCompatible(ParseSetup setupB) |
static ParseSetup |
makeSVMLightSetup() |
protected Parser |
parser(Key jobKey)
This is a single entry-point to create a parser.
|
ParseSetup |
setCheckHeader(int check_header) |
ParseSetup |
setChunkSize(int chunk_size) |
ParseSetup |
setColumnNames(java.lang.String[] column_names) |
ParseSetup |
setColumnTypes(byte[] column_types) |
ParseSetup |
setDomains(java.lang.String[][] domains) |
void |
setFileName(java.lang.String name) |
ParseSetup |
setNAStrings(java.lang.String[][] na_strings) |
ParseSetup |
setNumberColumns(int number_columns) |
ParseSetup |
setParseType(ParserInfo parse_type) |
ParseSetup |
setSeparator(byte separator) |
ParseSetup |
setSingleQuotes(boolean single_quotes) |
static byte[] |
strToColumnTypes(java.lang.String[] strs) |
java.lang.String |
toString() |
asBytes, clone, copyOver, frozenType, read, readExternal, readJSON, reloadFromBytes, toJsonString, write, writeExternal, writeJSON
public static final byte GUESS_SEP
public static final int NO_HEADER
public static final int GUESS_HEADER
public static final int HAS_HEADER
public static final int GUESS_COL_CNT
public boolean disableParallelParse
public ParseWriter.ParseErr[] _errs
public int _chunk_size
public ParseSetup(ParseSetup ps)
public ParseSetup(ParserInfo parse_type, byte sep, boolean singleQuotes, int checkHeader, int ncols, java.lang.String[] columnNames, byte[] ctypes, java.lang.String[][] domains, java.lang.String[][] naStrings, java.lang.String[][] data, ParseWriter.ParseErr[] errs, int chunkSize)
public ParseSetup(ParseSetupV3 ps)
ps
- Parse setup settings from clientpublic ParseSetup(ParserInfo parseType, byte sep, boolean singleQuotes, int checkHeader, int ncols, java.lang.String[] columnNames, byte[] ctypes, java.lang.String[][] domains, java.lang.String[][] naStrings, java.lang.String[][] data)
public ParseSetup(ParserInfo parseType, byte sep, boolean singleQuotes, int checkHeader, int ncols, java.lang.String[] columnNames, byte[] ctypes, java.lang.String[][] domains, java.lang.String[][] naStrings, java.lang.String[][] data, ParseWriter.ParseErr[] errs)
public ParseSetup(ParserInfo parseType, byte sep, boolean singleQuotes, int checkHeader, int ncols, java.lang.String[][] data, ParseWriter.ParseErr[] errs)
public ParseSetup()
public void setFileName(java.lang.String name)
public static ParseSetup makeSVMLightSetup()
public java.lang.String[] getColumnNames()
public java.lang.String[][] getData()
public java.lang.String[] getColumnTypeStrings()
public byte[] getColumnTypes()
public static byte[] strToColumnTypes(java.lang.String[] strs)
protected Parser parser(Key jobKey)
public final ParseSetup getFinalSetup(Key[] inputKeys, ParseSetup demandedSetup)
inputKeys
- inputsdemandedSetup
- setup demanded by a userpublic java.lang.String toString()
toString
in class java.lang.Object
public static ParseSetup guessSetup(Key[] fkeys, boolean singleQuote, int checkHeader)
fkeys
- Keys to input vectors to be parsedsingleQuote
- single quotes quote fieldscheckHeader
- check for a headerpublic static ParseSetup guessSetup(Key[] fkeys, ParseSetup userSetup)
fkeys
- Keys to input vectors to be parseduserSetup
- Setup guidance from userprotected boolean isCompatible(ParseSetup setupB)
public static ParseSetup guessSetup(ByteVec bv, byte[] bits, ParseSetup userSetup)
bits
- Initial bytes from a parse sourcepublic static ParseSetup guessSetup(ByteVec bv, byte[] bits, ParserInfo parserType, byte sep, int ncols, boolean singleQuotes, int checkHeader, java.lang.String[] columnNames, byte[] columnTypes, java.lang.String[][] domains, java.lang.String[][] naStrings)
public static java.lang.String createHexName(java.lang.String n)
n
- filename to be cleanedpublic <T extends ParseSetup> T copyTo(T setup)
T
- class derived from ParseSetupsetup
- target setup objectpublic ParserInfo getParseType()
public ParseSetup setParseType(ParserInfo parse_type)
public ParseSetup setSeparator(byte separator)
public ParseSetup setSingleQuotes(boolean single_quotes)
public ParseSetup setCheckHeader(int check_header)
public ParseSetup setNumberColumns(int number_columns)
public ParseSetup setColumnNames(java.lang.String[] column_names)
public ParseSetup setColumnTypes(byte[] column_types)
public ParseSetup setDomains(java.lang.String[][] domains)
public ParseSetup setNAStrings(java.lang.String[][] na_strings)
public ParseSetup setChunkSize(int chunk_size)