VALUEIN
- public class DelimitedTextReader<VALUEIN> extends ImportRecordReader<VALUEIN>
Modifier and Type | Field and Description |
---|---|
protected long |
bytesRead |
protected boolean |
compressed |
protected char |
delimiter |
protected DocBuilder |
docBuilder |
static char |
encapsulator |
protected String[] |
fields
header of delimited text
|
protected org.apache.hadoop.fs.FSDataInputStream |
fileIn |
protected long |
fileLen |
protected boolean |
generateId |
protected boolean |
hasNext |
protected IdGenerator |
idGen |
protected InputStreamReader |
instream |
static org.apache.commons.logging.Log |
LOG |
protected org.apache.commons.csv.CSVParser |
parser |
protected Iterator<org.apache.commons.csv.CSVRecord> |
parserIterator |
protected int |
uriId |
protected String |
uriName |
conf, encoding, file, fs, iterator, key, mode, streaming, subId, value
AGGREGATE_RECORD_ELEMENT, AGGREGATE_RECORD_NAMESPACE, AGGREGATE_URI_ID, ARCHIVE_METADATA_OPTIONAL, AUDIT_MLCPFINISH_CODE, AUDIT_MLCPFINISH_EVENT, AUDIT_MLCPSTART_CODE, AUDIT_MLCPSTART_EVENT, BATCH_MIN_VERSION, BATCH_SIZE, COLLECTION_FILTER, CONF_AGGREGATE_RECORD_ELEMENT, CONF_AGGREGATE_RECORD_NAMESPACE, CONF_AUDIT_MLCPFINISH_ENABLED, CONF_AUDIT_MLCPFINISH_MESSAGE, CONF_AUDIT_MLCPSTART_MESSAGE, CONF_COPY_PERMISSIONS, CONF_COPY_PROPERTIES, CONF_DELIMITED_DATA_TYPE, CONF_DELIMITED_ROOT_NAME, CONF_DELIMITER, CONF_INPUT_ARCHIVE_METADATA_OPTIONAL, CONF_INPUT_COMPRESSION_CODEC, CONF_INPUT_DIRECTORY, CONF_INPUT_FILE_PATTERN, CONF_INPUT_GENERATE_URI, CONF_INPUT_MODULES_DATABASE, CONF_INPUT_MODULES_ROOT, CONF_INPUT_PATH_FILTER_CLASS, CONF_INPUT_SEQUENCEFILE_KEY_CLASS, CONF_INPUT_SEQUENCEFILE_VALUE_CLASS, CONF_INPUT_SEQUENCEFILE_VALUE_TYPE, CONF_INPUT_URI_ID, CONF_MAPREDUCE_JOB_MAP_CLASS, CONF_MAPREDUCE_JOB_WORKING_DIR, CONF_MAPREDUCE_JOBTRACKER_ADDRESS, CONF_MAX_SPLIT_SIZE1, CONF_MAX_SPLIT_SIZE2, CONF_MIMETYPES, CONF_MIN_SPLIT_SIZE1, CONF_MIN_SPLIT_SIZE2, CONF_MIN_THREADS, CONF_ML_VERSION, CONF_MULTITHREADEDMAPPER_CLASS, CONF_OUTPUT_FILENAME_AS_COLLECTION, CONF_OUTPUT_FILEPATH, CONF_OUTPUT_TYPE, CONF_ROLE_MAP, CONF_SPLIT_INPUT, CONF_THREADS_PER_SPLIT, CONF_TRANSFORM_FUNCTION, CONF_TRANSFORM_MODULE, CONF_TRANSFORM_NAMESPACE, CONF_TRANSFORM_PARAM, CONTENT_ENCODING, CONTENTPUMP_BUNDLE_ARTIFACT, CONTENTPUMP_HOME_PROPERTY_NAME, CONTENTPUMP_JAR_PREFIX, COPY_COLLECTIONS, COPY_METADATA, COPY_PERMISSIONS, COPY_PROPERTIES, COPY_QUALITY, DATA_TYPE, DATABASE, DEFAULT_ARCHIVE_METADATA_OPTIONAL, DEFAULT_COPY_COLLECTIONS, DEFAULT_COPY_METADATA, DEFAULT_COPY_PERMISSIONS, DEFAULT_COPY_PROPERTIES, DEFAULT_COPY_QUALITY, DEFAULT_DELIMITER, DEFAULT_ENCODING, DEFAULT_OUTPUT_TYPE, DEFAULT_SEQUENCEFILE_VALUE_TYPE, DELIMITED_ROOT_NAME, DELIMITED_URI_ID, DELIMITER, DIRECTORY_FILTER, DOCUMENT_SELECTOR, DOCUMENT_TYPE, FAST_LOAD, GENERATE_URI, HADOOP_CONF_DIR, HADOOP_CONFDIR_ENV_NAME, HOST, INPUT_COMPRESSED, INPUT_COMPRESSION_CODEC, INPUT_DATABASE, INPUT_FILE_PATH, INPUT_FILE_PATTERN, INPUT_FILE_TYPE, INPUT_FILE_TYPE_DEFAULT, INPUT_HOST, INPUT_PASSWORD, INPUT_PORT, INPUT_SEQUENCEFILE_KEY_CLASS, INPUT_SEQUENCEFILE_VALUE_CLASS, INPUT_SEQUENCEFILE_VALUE_TYPE, INPUT_SSL, INPUT_SSL_PROTOCOL, INPUT_USERNAME, MAX_BATCH_SIZE, MAX_SPLIT_SIZE, MAX_TXN_SIZE, MIN_SPLIT_SIZE, MODE, MODULES, MODULES_ROOT, NAMESPACE, OPTIONS_FILE, OUTPUT_CLEANDIR, OUTPUT_COLLECTIONS, OUTPUT_COMPRESS, OUTPUT_DATABASE, OUTPUT_DIRECTORY, OUTPUT_FILE_PATH, OUTPUT_FILENAME_AS_COLLECTION, OUTPUT_GRAPH, OUTPUT_HOST, OUTPUT_IDNAME, OUTPUT_INDENTED, OUTPUT_LANGUAGE, OUTPUT_OVERRIDE_GRAPH, OUTPUT_PARTITION, OUTPUT_PASSWORD, OUTPUT_PERMISSIONS, OUTPUT_PORT, OUTPUT_QUALITY, OUTPUT_SSL, OUTPUT_SSL_PROTOCOL, OUTPUT_TYPE, OUTPUT_URI_PREFIX, OUTPUT_URI_REPLACE, OUTPUT_URI_SUFFIX, OUTPUT_USERNAME, PASSWORD, PATH_NAMESPACE, PORT, QUERY_FILTER, RDF_STREAMING_MEMORY_THRESHOLD, RDF_TRIPLES_PER_DOCUMENT, REDACTION, RESTRICT_HOSTS, RESTRICT_INPUT_HOSTS, RESTRICT_OUTPUT_HOSTS, SNAPSHOT, SPLIT_INPUT, SSL, SSL_PROTOCOL, STREAMING, TEMPORAL_COLLECTION, THREAD_COUNT, THREADS_PER_SPLIT, TOLERATE_ERRORS, TRANSACTION_SIZE, TRANSFORM_FUNCTION, TRANSFORM_MODULE, TRANSFORM_NAMESPACE, TRANSFORM_PARAM, TYPE_FILTER, URI_ID, USERNAME, XML_REPAIR_LEVEL
Constructor and Description |
---|
DelimitedTextReader() |
Modifier and Type | Method and Description |
---|---|
void |
close() |
protected String |
convertToLine(String[] values) |
protected String[] |
getLine() |
protected String[] |
getLine(org.apache.commons.csv.CSVRecord record) |
float |
getProgress() |
protected org.apache.commons.csv.CSVRecord |
getRecordLine() |
protected void |
initDelimConf() |
protected void |
initDocType() |
void |
initialize(org.apache.hadoop.mapreduce.InputSplit inSplit,
org.apache.hadoop.mapreduce.TaskAttemptContext context) |
protected void |
initParser(org.apache.hadoop.mapreduce.InputSplit inSplit) |
boolean |
nextKeyValue() |
configFileNameAsCollection, getCurrentKey, getCurrentValue, getFile, initConfig, makeURIForZipEntry, makeURIFromPath, openFile, setFile, setKey, setSkipKey
public static final org.apache.commons.logging.Log LOG
public static final char encapsulator
protected String[] fields
protected char delimiter
protected org.apache.commons.csv.CSVParser parser
protected InputStreamReader instream
protected org.apache.hadoop.fs.FSDataInputStream fileIn
protected boolean hasNext
protected String uriName
protected long fileLen
protected long bytesRead
protected boolean generateId
protected IdGenerator idGen
protected int uriId
protected boolean compressed
protected DocBuilder docBuilder
protected Iterator<org.apache.commons.csv.CSVRecord> parserIterator
public void close() throws IOException
close
in interface Closeable
close
in interface AutoCloseable
close
in class ImportRecordReader<VALUEIN>
IOException
public float getProgress() throws IOException, InterruptedException
getProgress
in class ImportRecordReader<VALUEIN>
IOException
InterruptedException
public void initialize(org.apache.hadoop.mapreduce.InputSplit inSplit, org.apache.hadoop.mapreduce.TaskAttemptContext context) throws IOException, InterruptedException
initialize
in class ImportRecordReader<VALUEIN>
IOException
InterruptedException
protected void initParser(org.apache.hadoop.mapreduce.InputSplit inSplit) throws IOException, InterruptedException
IOException
InterruptedException
protected void initDelimConf()
protected String[] getLine() throws IOException
IOException
protected String[] getLine(org.apache.commons.csv.CSVRecord record) throws IOException
IOException
protected org.apache.commons.csv.CSVRecord getRecordLine()
public boolean nextKeyValue() throws IOException, InterruptedException
nextKeyValue
in class ImportRecordReader<VALUEIN>
IOException
InterruptedException
protected void initDocType()
Copyright © 2021 MarkLogic Corporation
Complete online documentation for MarkLogic Server, XQuery and related components may be found at developer.marklogic.com