public class ZipDelimitedJSONReader extends DelimitedJSONReader<org.apache.hadoop.io.Text>
Modifier and Type | Field and Description |
---|---|
static org.apache.commons.logging.Log |
LOG |
bytesRead, fileIn, generateId, hasNext, idGen, instream, mapper, reader, totalBytes, uriName
conf, encoding, file, fs, iterator, key, mode, streaming, subId, value
AGGREGATE_RECORD_ELEMENT, AGGREGATE_RECORD_NAMESPACE, AGGREGATE_URI_ID, ARCHIVE_METADATA_OPTIONAL, AUDIT_MLCPFINISH_CODE, AUDIT_MLCPFINISH_EVENT, AUDIT_MLCPSTART_CODE, AUDIT_MLCPSTART_EVENT, BATCH_MIN_VERSION, BATCH_SIZE, COLLECTION_FILTER, CONF_AGGREGATE_RECORD_ELEMENT, CONF_AGGREGATE_RECORD_NAMESPACE, CONF_AUDIT_MLCPFINISH_ENABLED, CONF_AUDIT_MLCPFINISH_MESSAGE, CONF_AUDIT_MLCPSTART_MESSAGE, CONF_COPY_PERMISSIONS, CONF_COPY_PROPERTIES, CONF_DELIMITED_DATA_TYPE, CONF_DELIMITED_ROOT_NAME, CONF_DELIMITER, CONF_INPUT_ARCHIVE_METADATA_OPTIONAL, CONF_INPUT_COMPRESSION_CODEC, CONF_INPUT_DIRECTORY, CONF_INPUT_FILE_PATTERN, CONF_INPUT_GENERATE_URI, CONF_INPUT_MODULES_DATABASE, CONF_INPUT_MODULES_ROOT, CONF_INPUT_PATH_FILTER_CLASS, CONF_INPUT_SEQUENCEFILE_KEY_CLASS, CONF_INPUT_SEQUENCEFILE_VALUE_CLASS, CONF_INPUT_SEQUENCEFILE_VALUE_TYPE, CONF_INPUT_URI_ID, CONF_MAPREDUCE_JOB_MAP_CLASS, CONF_MAPREDUCE_JOB_WORKING_DIR, CONF_MAPREDUCE_JOBTRACKER_ADDRESS, CONF_MAX_SPLIT_SIZE1, CONF_MAX_SPLIT_SIZE2, CONF_MIMETYPES, CONF_MIN_SPLIT_SIZE1, CONF_MIN_SPLIT_SIZE2, CONF_MIN_THREADS, CONF_ML_VERSION, CONF_MULTITHREADEDMAPPER_CLASS, CONF_OUTPUT_FILENAME_AS_COLLECTION, CONF_OUTPUT_FILEPATH, CONF_OUTPUT_TYPE, CONF_ROLE_MAP, CONF_SPLIT_INPUT, CONF_THREADS_PER_SPLIT, CONF_TRANSFORM_FUNCTION, CONF_TRANSFORM_MODULE, CONF_TRANSFORM_NAMESPACE, CONF_TRANSFORM_PARAM, CONTENT_ENCODING, CONTENTPUMP_BUNDLE_ARTIFACT, CONTENTPUMP_HOME_PROPERTY_NAME, CONTENTPUMP_JAR_PREFIX, COPY_COLLECTIONS, COPY_METADATA, COPY_PERMISSIONS, COPY_PROPERTIES, COPY_QUALITY, DATA_TYPE, DATABASE, DEFAULT_ARCHIVE_METADATA_OPTIONAL, DEFAULT_COPY_COLLECTIONS, DEFAULT_COPY_METADATA, DEFAULT_COPY_PERMISSIONS, DEFAULT_COPY_PROPERTIES, DEFAULT_COPY_QUALITY, DEFAULT_DELIMITER, DEFAULT_ENCODING, DEFAULT_OUTPUT_TYPE, DEFAULT_SEQUENCEFILE_VALUE_TYPE, DELIMITED_ROOT_NAME, DELIMITED_URI_ID, DELIMITER, DIRECTORY_FILTER, DOCUMENT_SELECTOR, DOCUMENT_TYPE, FAST_LOAD, GENERATE_URI, HADOOP_CONF_DIR, HADOOP_CONFDIR_ENV_NAME, HOST, INPUT_COMPRESSED, INPUT_COMPRESSION_CODEC, INPUT_DATABASE, INPUT_FILE_PATH, INPUT_FILE_PATTERN, INPUT_FILE_TYPE, INPUT_FILE_TYPE_DEFAULT, INPUT_HOST, INPUT_PASSWORD, INPUT_PORT, INPUT_SEQUENCEFILE_KEY_CLASS, INPUT_SEQUENCEFILE_VALUE_CLASS, INPUT_SEQUENCEFILE_VALUE_TYPE, INPUT_SSL, INPUT_SSL_PROTOCOL, INPUT_USERNAME, MAX_BATCH_SIZE, MAX_SPLIT_SIZE, MAX_TXN_SIZE, MIN_SPLIT_SIZE, MODE, MODULES, MODULES_ROOT, NAMESPACE, OPTIONS_FILE, OUTPUT_CLEANDIR, OUTPUT_COLLECTIONS, OUTPUT_COMPRESS, OUTPUT_DATABASE, OUTPUT_DIRECTORY, OUTPUT_FILE_PATH, OUTPUT_FILENAME_AS_COLLECTION, OUTPUT_GRAPH, OUTPUT_HOST, OUTPUT_IDNAME, OUTPUT_INDENTED, OUTPUT_LANGUAGE, OUTPUT_OVERRIDE_GRAPH, OUTPUT_PARTITION, OUTPUT_PASSWORD, OUTPUT_PERMISSIONS, OUTPUT_PORT, OUTPUT_QUALITY, OUTPUT_SSL, OUTPUT_SSL_PROTOCOL, OUTPUT_TYPE, OUTPUT_URI_PREFIX, OUTPUT_URI_REPLACE, OUTPUT_URI_SUFFIX, OUTPUT_USERNAME, PASSWORD, PATH_NAMESPACE, PORT, QUERY_FILTER, RDF_STREAMING_MEMORY_THRESHOLD, RDF_TRIPLES_PER_DOCUMENT, REDACTION, RESTRICT_HOSTS, RESTRICT_INPUT_HOSTS, RESTRICT_OUTPUT_HOSTS, SNAPSHOT, SPLIT_INPUT, SSL, SSL_PROTOCOL, STREAMING, TEMPORAL_COLLECTION, THREAD_COUNT, THREADS_PER_SPLIT, TOLERATE_ERRORS, TRANSACTION_SIZE, TRANSFORM_FUNCTION, TRANSFORM_MODULE, TRANSFORM_NAMESPACE, TRANSFORM_PARAM, TYPE_FILTER, URI_ID, USERNAME, XML_REPAIR_LEVEL
Constructor and Description |
---|
ZipDelimitedJSONReader() |
Modifier and Type | Method and Description |
---|---|
void |
close() |
protected void |
configFileNameAsCollection(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path file) |
protected boolean |
findNextFileEntryAndInitReader() |
float |
getProgress() |
protected boolean |
hasMoreEntryInZip() |
protected void |
initFileStream(org.apache.hadoop.mapreduce.InputSplit inSplit) |
void |
initialize(org.apache.hadoop.mapreduce.InputSplit inSplit,
org.apache.hadoop.mapreduce.TaskAttemptContext context) |
findUriInJSON, nextKeyValue
getCurrentKey, getCurrentValue, getFile, initConfig, makeURIForZipEntry, makeURIFromPath, openFile, setFile, setKey, setSkipKey
public void close() throws IOException
close
in interface Closeable
close
in interface AutoCloseable
close
in class DelimitedJSONReader<org.apache.hadoop.io.Text>
IOException
protected void initFileStream(org.apache.hadoop.mapreduce.InputSplit inSplit) throws IOException, InterruptedException
initFileStream
in class DelimitedJSONReader<org.apache.hadoop.io.Text>
IOException
InterruptedException
public void initialize(org.apache.hadoop.mapreduce.InputSplit inSplit, org.apache.hadoop.mapreduce.TaskAttemptContext context) throws IOException, InterruptedException
initialize
in class DelimitedJSONReader<org.apache.hadoop.io.Text>
IOException
InterruptedException
protected boolean findNextFileEntryAndInitReader() throws InterruptedException, IOException
findNextFileEntryAndInitReader
in class DelimitedJSONReader<org.apache.hadoop.io.Text>
InterruptedException
IOException
protected boolean hasMoreEntryInZip() throws IOException
IOException
public float getProgress() throws IOException, InterruptedException
getProgress
in class DelimitedJSONReader<org.apache.hadoop.io.Text>
IOException
InterruptedException
protected void configFileNameAsCollection(org.apache.hadoop.conf.Configuration conf, org.apache.hadoop.fs.Path file)
configFileNameAsCollection
in class ImportRecordReader<org.apache.hadoop.io.Text>
Copyright © 2022 MarkLogic Corporation
Complete online documentation for MarkLogic Server, XQuery and related components may be found at developer.marklogic.com