public class ParseSetup extends Iced
Modifier and Type | Class and Description |
---|---|
static class |
ParseSetup.GuessSetupTsk
Try to determine the ParseSetup on a file by file basis
and merge results.
|
Modifier and Type | Field and Description |
---|---|
int |
_chunk_size |
static byte |
DEFAULT_ESCAPE_CHAR |
boolean |
disableParallelParse |
static int |
GUESS_COL_CNT |
static int |
GUESS_HEADER |
static byte |
GUESS_SEP |
static int |
HAS_HEADER |
static int |
NO_HEADER |
Constructor and Description |
---|
ParseSetup()
Create a default ParseSetup
Used by Ray's schema magic
|
ParseSetup(ParserInfo parseType,
byte sep,
boolean singleQuotes,
int checkHeader,
int ncols,
java.lang.String[][] data,
ParseWriter.ParseErr[] errs)
Create a ParseSetup without any column information
|
ParseSetup(ParserInfo parseType,
byte sep,
boolean singleQuotes,
int checkHeader,
int ncols,
java.lang.String[] columnNames,
byte[] ctypes,
java.lang.String[][] domains,
java.lang.String[][] naStrings,
java.lang.String[][] data) |
ParseSetup(ParserInfo parseType,
byte sep,
boolean singleQuotes,
int checkHeader,
int ncols,
java.lang.String[] columnNames,
byte[] ctypes,
java.lang.String[][] domains,
java.lang.String[][] naStrings,
java.lang.String[][] data,
byte escapeChar)
Create a ParseSetup with all parameters except chunk size.
|
ParseSetup(ParserInfo parseType,
byte sep,
boolean singleQuotes,
int checkHeader,
int ncols,
java.lang.String[] columnNames,
byte[] ctypes,
java.lang.String[][] domains,
java.lang.String[][] naStrings,
java.lang.String[][] data,
byte[] nonDataLineMarkers,
byte escapeChar)
Create a ParseSetup with all parameters except chunk size.
|
ParseSetup(ParserInfo parseType,
byte sep,
boolean singleQuotes,
int checkHeader,
int ncols,
java.lang.String[] columnNames,
byte[] ctypes,
java.lang.String[][] domains,
java.lang.String[][] naStrings,
java.lang.String[][] data,
ParseWriter.ParseErr[] errs) |
ParseSetup(ParserInfo parseType,
byte sep,
boolean singleQuotes,
int checkHeader,
int ncols,
java.lang.String[] columnNames,
byte[] ctypes,
java.lang.String[][] domains,
java.lang.String[][] naStrings,
java.lang.String[][] data,
ParseWriter.ParseErr[] errs,
byte[] nonDataLineMarkers) |
ParseSetup(ParserInfo parse_type,
byte sep,
boolean singleQuotes,
int checkHeader,
int ncols,
java.lang.String[] columnNames,
byte[] ctypes,
java.lang.String[][] domains,
java.lang.String[][] naStrings,
java.lang.String[][] data,
ParseWriter.ParseErr[] errs,
int chunkSize,
byte[] nonDataLineMarkers,
byte escapeChar) |
ParseSetup(ParserInfo parse_type,
byte sep,
boolean singleQuotes,
int checkHeader,
int ncols,
java.lang.String[] columnNames,
byte[] ctypes,
java.lang.String[][] domains,
java.lang.String[][] naStrings,
java.lang.String[][] data,
ParseWriter.ParseErr[] errs,
int chunkSize,
Key<DecryptionTool> decrypt_tool,
int[] skipped_columns,
byte[] nonDataLineMarkers,
byte escapeChar) |
ParseSetup(ParseSetup ps) |
ParseSetup(ParseSetupV3 ps)
Create a ParseSetup with parameters from the client.
|
Modifier and Type | Method and Description |
---|---|
void |
addErrs(ParseWriter.ParseErr... errs) |
<T extends ParseSetup> |
copyTo(T setup)
Copies the common setup to another object (that is possibly and extension of the base setup).
|
static java.lang.String |
createHexName(java.lang.String n)
Cleans up the file name to make .hex name
to be used as a destination key.
|
ParseWriter.ParseErr[] |
errs() |
int[] |
get_parse_columns_indices() |
java.lang.String[] |
getColumnNames() |
byte[] |
getColumnTypes() |
java.lang.String[] |
getColumnTypeStrings() |
java.lang.String[][] |
getData() |
DecryptionTool |
getDecryptionTool() |
ParseSetup |
getFinalSetup(Key[] inputKeys,
ParseSetup demandedSetup)
Return create a final parser-specific setup
for this configuration.
|
int |
getNumberColumns() |
ParserInfo |
getParseType() |
int[] |
getSkippedColumns() |
static ParseSetup |
guessSetup(ByteVec bv,
byte[] bits,
ParseSetup userSetup)
Guess everything from a single pile-o-bits.
|
static ParseSetup |
guessSetup(Key[] fkeys,
boolean singleQuote,
int checkHeader)
Used by test harnesses for simple parsing of test data.
|
static ParseSetup |
guessSetup(Key[] fkeys,
ParseSetup userSetup)
Discover the parse setup needed to correctly parse all files.
|
protected boolean |
isCompatible(ParseSetup setupB) |
boolean |
isNA(int colIdx,
BufferedString str)
Tests whether a given string represents a NA in a given column.
|
static ParseSetup |
makeSVMLightSetup() |
ParserInfo.ParseMethod |
parseMethod(int nfiles,
Vec v) |
protected Parser |
parser(Key jobKey)
This is a single entry-point to create a parser.
|
ParseSetup |
setCheckHeader(int check_header) |
ParseSetup |
setChunkSize(int chunk_size) |
ParseSetup |
setColumnNames(java.lang.String[] column_names) |
ParseSetup |
setColumnTypes(byte[] column_types) |
ParseSetup |
setDecryptTool(Key<DecryptionTool> decrypt_tool) |
ParseSetup |
setDomains(java.lang.String[][] domains) |
void |
setFileName(java.lang.String name) |
ParseSetup |
setNAStrings(java.lang.String[][] na_strings) |
ParseSetup |
setNumberColumns(int number_columns) |
void |
setParseColumnIndices(int ncols,
int[] skipped_columns) |
ParseSetup |
setParseType(ParserInfo parse_type) |
ParseSetup |
setSeparator(byte separator) |
ParseSetup |
setSingleQuotes(boolean single_quotes) |
ParseSetup |
setSkippedColumns(int[] skipped_columns) |
void |
setSyntheticColumns(java.lang.String[] names,
java.lang.String[][] valueMapping,
byte synthetic_column_type) |
static byte[] |
strToColumnTypes(java.lang.String[] strs) |
java.lang.String |
toString() |
asBytes, clone, copyOver, frozenType, read, readExternal, readJSON, reloadFromBytes, toJsonBytes, toJsonString, write, writeExternal, writeJSON
public static final byte GUESS_SEP
public static final int NO_HEADER
public static final int GUESS_HEADER
public static final int HAS_HEADER
public static final int GUESS_COL_CNT
public static final byte DEFAULT_ESCAPE_CHAR
public boolean disableParallelParse
public int _chunk_size
public ParseSetup(ParseSetup ps)
public ParseSetup(ParserInfo parse_type, byte sep, boolean singleQuotes, int checkHeader, int ncols, java.lang.String[] columnNames, byte[] ctypes, java.lang.String[][] domains, java.lang.String[][] naStrings, java.lang.String[][] data, ParseWriter.ParseErr[] errs, int chunkSize, byte[] nonDataLineMarkers, byte escapeChar)
public ParseSetup(ParserInfo parse_type, byte sep, boolean singleQuotes, int checkHeader, int ncols, java.lang.String[] columnNames, byte[] ctypes, java.lang.String[][] domains, java.lang.String[][] naStrings, java.lang.String[][] data, ParseWriter.ParseErr[] errs, int chunkSize, Key<DecryptionTool> decrypt_tool, int[] skipped_columns, byte[] nonDataLineMarkers, byte escapeChar)
public ParseSetup(ParseSetupV3 ps)
ps
- Parse setup settings from clientpublic ParseSetup(ParserInfo parseType, byte sep, boolean singleQuotes, int checkHeader, int ncols, java.lang.String[] columnNames, byte[] ctypes, java.lang.String[][] domains, java.lang.String[][] naStrings, java.lang.String[][] data, byte[] nonDataLineMarkers, byte escapeChar)
Typically used by file type parsers for returning final valid results _chunk_size will be set later using results from all files.
public ParseSetup(ParserInfo parseType, byte sep, boolean singleQuotes, int checkHeader, int ncols, java.lang.String[] columnNames, byte[] ctypes, java.lang.String[][] domains, java.lang.String[][] naStrings, java.lang.String[][] data, byte escapeChar)
Typically used by file type parsers for returning final valid results _chunk_size will be set later using results from all files.
public ParseSetup(ParserInfo parseType, byte sep, boolean singleQuotes, int checkHeader, int ncols, java.lang.String[] columnNames, byte[] ctypes, java.lang.String[][] domains, java.lang.String[][] naStrings, java.lang.String[][] data)
public ParseSetup(ParserInfo parseType, byte sep, boolean singleQuotes, int checkHeader, int ncols, java.lang.String[] columnNames, byte[] ctypes, java.lang.String[][] domains, java.lang.String[][] naStrings, java.lang.String[][] data, ParseWriter.ParseErr[] errs, byte[] nonDataLineMarkers)
public ParseSetup(ParserInfo parseType, byte sep, boolean singleQuotes, int checkHeader, int ncols, java.lang.String[] columnNames, byte[] ctypes, java.lang.String[][] domains, java.lang.String[][] naStrings, java.lang.String[][] data, ParseWriter.ParseErr[] errs)
public ParseSetup(ParserInfo parseType, byte sep, boolean singleQuotes, int checkHeader, int ncols, java.lang.String[][] data, ParseWriter.ParseErr[] errs)
Typically used by file type parsers for returning final invalid results
public ParseSetup()
public void setFileName(java.lang.String name)
public final ParseWriter.ParseErr[] errs()
public void addErrs(ParseWriter.ParseErr... errs)
public static ParseSetup makeSVMLightSetup()
public void setParseColumnIndices(int ncols, int[] skipped_columns)
public void setSyntheticColumns(java.lang.String[] names, java.lang.String[][] valueMapping, byte synthetic_column_type)
public java.lang.String[] getColumnNames()
public int[] getSkippedColumns()
public int[] get_parse_columns_indices()
public java.lang.String[][] getData()
public java.lang.String[] getColumnTypeStrings()
public byte[] getColumnTypes()
public static byte[] strToColumnTypes(java.lang.String[] strs)
protected Parser parser(Key jobKey)
public final ParseSetup getFinalSetup(Key[] inputKeys, ParseSetup demandedSetup)
inputKeys
- inputsdemandedSetup
- setup demanded by a userpublic int getNumberColumns()
public final DecryptionTool getDecryptionTool()
public final ParserInfo.ParseMethod parseMethod(int nfiles, Vec v)
public java.lang.String toString()
toString
in class java.lang.Object
public static ParseSetup guessSetup(Key[] fkeys, boolean singleQuote, int checkHeader)
fkeys
- Keys to input vectors to be parsedsingleQuote
- single quotes quote fieldscheckHeader
- check for a headerpublic static ParseSetup guessSetup(Key[] fkeys, ParseSetup userSetup)
fkeys
- Keys to input vectors to be parseduserSetup
- Setup guidance from userprotected boolean isCompatible(ParseSetup setupB)
public static ParseSetup guessSetup(ByteVec bv, byte[] bits, ParseSetup userSetup)
bits
- Initial bytes from a parse sourcepublic static java.lang.String createHexName(java.lang.String n)
n
- filename to be cleanedpublic <T extends ParseSetup> T copyTo(T setup)
T
- class derived from ParseSetupsetup
- target setup objectpublic boolean isNA(int colIdx, BufferedString str)
colIdx
- index of the columnstr
- string to be tested for NApublic ParserInfo getParseType()
public ParseSetup setParseType(ParserInfo parse_type)
public ParseSetup setSeparator(byte separator)
public ParseSetup setSingleQuotes(boolean single_quotes)
public ParseSetup setCheckHeader(int check_header)
public ParseSetup setNumberColumns(int number_columns)
public ParseSetup setColumnNames(java.lang.String[] column_names)
public ParseSetup setSkippedColumns(int[] skipped_columns)
public ParseSetup setColumnTypes(byte[] column_types)
public ParseSetup setDomains(java.lang.String[][] domains)
public ParseSetup setNAStrings(java.lang.String[][] na_strings)
public ParseSetup setChunkSize(int chunk_size)
public ParseSetup setDecryptTool(Key<DecryptionTool> decrypt_tool)