VALUEIN
- public abstract class ImportRecordReader<VALUEIN> extends org.apache.hadoop.mapreduce.RecordReader<DocumentURIWithSourceInfo,VALUEIN> implements ConfigConstants
Modifier and Type | Field and Description |
---|---|
protected org.apache.hadoop.conf.Configuration |
conf |
protected String |
encoding |
protected org.apache.hadoop.fs.Path |
file |
protected org.apache.hadoop.fs.FileSystem |
fs |
protected Iterator<org.apache.hadoop.mapreduce.lib.input.FileSplit> |
iterator |
protected DocumentURIWithSourceInfo |
key |
static org.apache.commons.logging.Log |
LOG |
protected String |
mode |
protected boolean |
streaming |
protected String |
subId |
protected VALUEIN |
value |
AGGREGATE_RECORD_ELEMENT, AGGREGATE_RECORD_NAMESPACE, AGGREGATE_URI_ID, ARCHIVE_METADATA_OPTIONAL, AUDIT_MLCPFINISH_CODE, AUDIT_MLCPFINISH_EVENT, AUDIT_MLCPSTART_CODE, AUDIT_MLCPSTART_EVENT, BATCH_MIN_VERSION, BATCH_SIZE, COLLECTION_FILTER, CONF_AGGREGATE_RECORD_ELEMENT, CONF_AGGREGATE_RECORD_NAMESPACE, CONF_AUDIT_MLCPFINISH_ENABLED, CONF_AUDIT_MLCPFINISH_MESSAGE, CONF_AUDIT_MLCPSTART_MESSAGE, CONF_COPY_PERMISSIONS, CONF_COPY_PROPERTIES, CONF_DELIMITED_DATA_TYPE, CONF_DELIMITED_ROOT_NAME, CONF_DELIMITER, CONF_INPUT_ARCHIVE_METADATA_OPTIONAL, CONF_INPUT_COMPRESSION_CODEC, CONF_INPUT_DIRECTORY, CONF_INPUT_FILE_PATTERN, CONF_INPUT_GENERATE_URI, CONF_INPUT_MODULES_DATABASE, CONF_INPUT_MODULES_ROOT, CONF_INPUT_PATH_FILTER_CLASS, CONF_INPUT_SEQUENCEFILE_KEY_CLASS, CONF_INPUT_SEQUENCEFILE_VALUE_CLASS, CONF_INPUT_SEQUENCEFILE_VALUE_TYPE, CONF_INPUT_URI_ID, CONF_MAPREDUCE_JOB_MAP_CLASS, CONF_MAPREDUCE_JOB_WORKING_DIR, CONF_MAPREDUCE_JOBTRACKER_ADDRESS, CONF_MAX_SPLIT_SIZE1, CONF_MAX_SPLIT_SIZE2, CONF_MIMETYPES, CONF_MIN_SPLIT_SIZE1, CONF_MIN_SPLIT_SIZE2, CONF_MIN_THREADS, CONF_ML_VERSION, CONF_MULTITHREADEDMAPPER_CLASS, CONF_OUTPUT_FILENAME_AS_COLLECTION, CONF_OUTPUT_FILEPATH, CONF_OUTPUT_TYPE, CONF_ROLE_MAP, CONF_SPLIT_INPUT, CONF_THREADS_PER_SPLIT, CONF_TRANSFORM_FUNCTION, CONF_TRANSFORM_MODULE, CONF_TRANSFORM_NAMESPACE, CONF_TRANSFORM_PARAM, CONTENT_ENCODING, CONTENTPUMP_BUNDLE_ARTIFACT, CONTENTPUMP_HOME_PROPERTY_NAME, CONTENTPUMP_JAR_PREFIX, COPY_COLLECTIONS, COPY_METADATA, COPY_PERMISSIONS, COPY_PROPERTIES, COPY_QUALITY, DATA_TYPE, DATABASE, DEFAULT_ARCHIVE_METADATA_OPTIONAL, DEFAULT_COPY_COLLECTIONS, DEFAULT_COPY_METADATA, DEFAULT_COPY_PERMISSIONS, DEFAULT_COPY_PROPERTIES, DEFAULT_COPY_QUALITY, DEFAULT_DELIMITER, DEFAULT_ENCODING, DEFAULT_OUTPUT_TYPE, DEFAULT_SEQUENCEFILE_VALUE_TYPE, DEFAULT_THREAD_COUNT, DELIMITED_ROOT_NAME, DELIMITED_URI_ID, DELIMITER, DIRECTORY_FILTER, DOCUMENT_SELECTOR, DOCUMENT_TYPE, FAST_LOAD, GENERATE_URI, HADOOP_CONF_DIR, HADOOP_CONFDIR_ENV_NAME, HOST, INPUT_COMPRESSED, INPUT_COMPRESSION_CODEC, INPUT_DATABASE, INPUT_FILE_PATH, INPUT_FILE_PATTERN, INPUT_FILE_TYPE, INPUT_FILE_TYPE_DEFAULT, INPUT_HOST, INPUT_KEYSTORE_PASSWD, INPUT_KEYSTORE_PATH, INPUT_PASSWORD, INPUT_PORT, INPUT_SEQUENCEFILE_KEY_CLASS, INPUT_SEQUENCEFILE_VALUE_CLASS, INPUT_SEQUENCEFILE_VALUE_TYPE, INPUT_SSL, INPUT_SSL_PROTOCOL, INPUT_TRUSTSTORE_PASSWD, INPUT_TRUSTSTORE_PATH, INPUT_USERNAME, KEYSTORE_PASSWD, KEYSTORE_PATH, MAX_BATCH_SIZE, MAX_SPLIT_SIZE, MAX_THREADS, MAX_TXN_SIZE, MIN_SPLIT_SIZE, MODE, MODULES, MODULES_ROOT, NAMESPACE, OPTIONS_FILE, OUTPUT_CLEANDIR, OUTPUT_COLLECTIONS, OUTPUT_COMPRESS, OUTPUT_DATABASE, OUTPUT_DIRECTORY, OUTPUT_FILE_PATH, OUTPUT_FILENAME_AS_COLLECTION, OUTPUT_GRAPH, OUTPUT_HOST, OUTPUT_IDNAME, OUTPUT_INDENTED, OUTPUT_KEYSTORE_PASSWD, OUTPUT_KEYSTORE_PATH, OUTPUT_LANGUAGE, OUTPUT_OVERRIDE_GRAPH, OUTPUT_PARTITION, OUTPUT_PASSWORD, OUTPUT_PERMISSIONS, OUTPUT_PORT, OUTPUT_QUALITY, OUTPUT_SSL, OUTPUT_SSL_PROTOCOL, OUTPUT_TRUSTSTORE_PASSWD, OUTPUT_TRUSTSTORE_PATH, OUTPUT_TYPE, OUTPUT_URI_PREFIX, OUTPUT_URI_REPLACE, OUTPUT_URI_SUFFIX, OUTPUT_USERNAME, PASSWORD, PATH_NAMESPACE, PORT, QUERY_FILTER, RDF_STREAMING_MEMORY_THRESHOLD, RDF_TRIPLES_PER_DOCUMENT, REDACTION, RESTRICT_HOSTS, RESTRICT_INPUT_HOSTS, RESTRICT_OUTPUT_HOSTS, SNAPSHOT, SPLIT_INPUT, SSL, SSL_PROTOCOL, STREAMING, TEMPORAL_COLLECTION, THREAD_COUNT, THREADS_PER_SPLIT, TRANSACTION_SIZE, TRANSFORM_FUNCTION, TRANSFORM_MODULE, TRANSFORM_NAMESPACE, TRANSFORM_PARAM, TRUSTSTORE_PASSWD, TRUSTSTORE_PATH, TYPE_FILTER, URI_ID, USERNAME, XML_REPAIR_LEVEL
Constructor and Description |
---|
ImportRecordReader() |
Modifier and Type | Method and Description |
---|---|
abstract void |
close() |
protected void |
configFileNameAsCollection(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path file) |
DocumentURIWithSourceInfo |
getCurrentKey() |
VALUEIN |
getCurrentValue() |
org.apache.hadoop.fs.Path |
getFile() |
abstract float |
getProgress() |
protected void |
initConfig(org.apache.hadoop.mapreduce.TaskAttemptContext context) |
abstract void |
initialize(org.apache.hadoop.mapreduce.InputSplit arg0,
org.apache.hadoop.mapreduce.TaskAttemptContext context) |
protected String |
makeURIForZipEntry(org.apache.hadoop.fs.Path zipFile,
String val) |
protected String |
makeURIFromPath(org.apache.hadoop.fs.Path file) |
abstract boolean |
nextKeyValue() |
org.apache.hadoop.fs.FSDataInputStream |
openFile(org.apache.hadoop.mapreduce.InputSplit inSplit,
boolean configCol) |
void |
setFile(org.apache.hadoop.fs.Path file) |
protected boolean |
setKey(String uri,
int line,
int col,
boolean encode)
Apply URI replace option, encode URI if specified, apply URI prefix and
suffix configuration options and set the result as DocumentURI key.
|
protected void |
setSkipKey(int line,
int col,
String reason)
Set the result as DocumentURI key.
|
public static final org.apache.commons.logging.Log LOG
protected DocumentURIWithSourceInfo key
protected VALUEIN value
protected String mode
protected boolean streaming
protected org.apache.hadoop.conf.Configuration conf
protected String encoding
protected org.apache.hadoop.fs.Path file
protected org.apache.hadoop.fs.FileSystem fs
protected Iterator<org.apache.hadoop.mapreduce.lib.input.FileSplit> iterator
protected String subId
protected boolean setKey(String uri, int line, int col, boolean encode)
uri
- Source string of document URI.line
- Line number in the source if applicable; 0 otherwise.col
- Column number in the source if applicable; 0 otherwise.encode
- Encode uri if set to true.protected void setSkipKey(int line, int col, String reason)
uri
- Source string of document URI.line
- Line number in the source if applicable; -1 otherwise.col
- Column number in the source if applicable; -1 otherwise.public abstract void close() throws IOException
close
in interface Closeable
close
in interface AutoCloseable
close
in class org.apache.hadoop.mapreduce.RecordReader<DocumentURIWithSourceInfo,VALUEIN>
IOException
public DocumentURIWithSourceInfo getCurrentKey() throws IOException, InterruptedException
getCurrentKey
in class org.apache.hadoop.mapreduce.RecordReader<DocumentURIWithSourceInfo,VALUEIN>
IOException
InterruptedException
public VALUEIN getCurrentValue() throws IOException, InterruptedException
getCurrentValue
in class org.apache.hadoop.mapreduce.RecordReader<DocumentURIWithSourceInfo,VALUEIN>
IOException
InterruptedException
public abstract float getProgress() throws IOException, InterruptedException
getProgress
in class org.apache.hadoop.mapreduce.RecordReader<DocumentURIWithSourceInfo,VALUEIN>
IOException
InterruptedException
public abstract void initialize(org.apache.hadoop.mapreduce.InputSplit arg0, org.apache.hadoop.mapreduce.TaskAttemptContext context) throws IOException, InterruptedException
initialize
in class org.apache.hadoop.mapreduce.RecordReader<DocumentURIWithSourceInfo,VALUEIN>
IOException
InterruptedException
protected void initConfig(org.apache.hadoop.mapreduce.TaskAttemptContext context)
protected void configFileNameAsCollection(org.apache.hadoop.conf.Configuration conf, org.apache.hadoop.fs.Path file)
protected String makeURIFromPath(org.apache.hadoop.fs.Path file)
protected String makeURIForZipEntry(org.apache.hadoop.fs.Path zipFile, String val)
public abstract boolean nextKeyValue() throws IOException, InterruptedException
nextKeyValue
in class org.apache.hadoop.mapreduce.RecordReader<DocumentURIWithSourceInfo,VALUEIN>
IOException
InterruptedException
public org.apache.hadoop.fs.Path getFile()
public void setFile(org.apache.hadoop.fs.Path file)
public org.apache.hadoop.fs.FSDataInputStream openFile(org.apache.hadoop.mapreduce.InputSplit inSplit, boolean configCol) throws IOException
IOException
Copyright © 2020 MarkLogic Corporation
Complete online documentation for MarkLogic Server, XQuery and related components may be found at developer.marklogic.com