public class DatabaseContentReader extends MarkLogicRecordReader<DocumentURI,MarkLogicDocument> implements ConfigConstants
Modifier and Type | Field and Description |
---|---|
protected boolean |
copyCollection |
protected boolean |
copyMetadata |
protected boolean |
copyPermission |
protected boolean |
copyProperties |
protected boolean |
copyQuality |
protected String |
ctsQuery |
protected DocumentURI |
currentKey
Current key.
|
protected DatabaseDocumentWithMeta |
currentValue
Current value.
|
protected boolean |
docDone |
static org.apache.commons.logging.Log |
LOG |
protected HashMap<String,DocumentMetadata> |
metadataMap |
protected int |
nakedCount |
protected boolean |
nakedDone |
conf, count, curForest, hostNames, length, maxRetries, maxSleepTime, mlSplit, redactionRuleCol, replicas, result, retry, session, sleepTime
AGGREGATE_RECORD_ELEMENT, AGGREGATE_RECORD_NAMESPACE, AGGREGATE_URI_ID, ARCHIVE_METADATA_OPTIONAL, AUDIT_MLCPFINISH_CODE, AUDIT_MLCPFINISH_EVENT, AUDIT_MLCPSTART_CODE, AUDIT_MLCPSTART_EVENT, BATCH_MIN_VERSION, BATCH_SIZE, COLLECTION_FILTER, CONF_AGGREGATE_RECORD_ELEMENT, CONF_AGGREGATE_RECORD_NAMESPACE, CONF_AUDIT_MLCPFINISH_ENABLED, CONF_AUDIT_MLCPFINISH_MESSAGE, CONF_AUDIT_MLCPSTART_MESSAGE, CONF_COPY_PERMISSIONS, CONF_COPY_PROPERTIES, CONF_DELIMITED_DATA_TYPE, CONF_DELIMITED_ROOT_NAME, CONF_DELIMITER, CONF_INPUT_ARCHIVE_METADATA_OPTIONAL, CONF_INPUT_COMPRESSION_CODEC, CONF_INPUT_DIRECTORY, CONF_INPUT_FILE_PATTERN, CONF_INPUT_GENERATE_URI, CONF_INPUT_MODULES_DATABASE, CONF_INPUT_MODULES_ROOT, CONF_INPUT_PATH_FILTER_CLASS, CONF_INPUT_SEQUENCEFILE_KEY_CLASS, CONF_INPUT_SEQUENCEFILE_VALUE_CLASS, CONF_INPUT_SEQUENCEFILE_VALUE_TYPE, CONF_INPUT_URI_ID, CONF_MAPREDUCE_JOB_MAP_CLASS, CONF_MAPREDUCE_JOB_WORKING_DIR, CONF_MAPREDUCE_JOBTRACKER_ADDRESS, CONF_MAX_SPLIT_SIZE1, CONF_MAX_SPLIT_SIZE2, CONF_MIMETYPES, CONF_MIN_SPLIT_SIZE1, CONF_MIN_SPLIT_SIZE2, CONF_MIN_THREADS, CONF_ML_VERSION, CONF_MULTITHREADEDMAPPER_CLASS, CONF_OUTPUT_FILENAME_AS_COLLECTION, CONF_OUTPUT_FILEPATH, CONF_OUTPUT_TYPE, CONF_ROLE_MAP, CONF_SPLIT_INPUT, CONF_THREADS_PER_SPLIT, CONF_TRANSFORM_FUNCTION, CONF_TRANSFORM_MODULE, CONF_TRANSFORM_NAMESPACE, CONF_TRANSFORM_PARAM, CONTENT_ENCODING, CONTENTPUMP_BUNDLE_ARTIFACT, CONTENTPUMP_HOME_PROPERTY_NAME, CONTENTPUMP_JAR_PREFIX, COPY_COLLECTIONS, COPY_METADATA, COPY_PERMISSIONS, COPY_PROPERTIES, COPY_QUALITY, DATA_TYPE, DATABASE, DEFAULT_ARCHIVE_METADATA_OPTIONAL, DEFAULT_COPY_COLLECTIONS, DEFAULT_COPY_METADATA, DEFAULT_COPY_PERMISSIONS, DEFAULT_COPY_PROPERTIES, DEFAULT_COPY_QUALITY, DEFAULT_DELIMITER, DEFAULT_ENCODING, DEFAULT_OUTPUT_TYPE, DEFAULT_SEQUENCEFILE_VALUE_TYPE, DEFAULT_THREAD_COUNT, DELIMITED_ROOT_NAME, DELIMITED_URI_ID, DELIMITER, DIRECTORY_FILTER, DOCUMENT_SELECTOR, DOCUMENT_TYPE, FAST_LOAD, GENERATE_URI, HADOOP_CONF_DIR, HADOOP_CONFDIR_ENV_NAME, HOST, INPUT_COMPRESSED, INPUT_COMPRESSION_CODEC, INPUT_DATABASE, INPUT_FILE_PATH, INPUT_FILE_PATTERN, INPUT_FILE_TYPE, INPUT_FILE_TYPE_DEFAULT, INPUT_HOST, INPUT_KEYSTORE_PASSWD, INPUT_KEYSTORE_PATH, INPUT_PASSWORD, INPUT_PORT, INPUT_SEQUENCEFILE_KEY_CLASS, INPUT_SEQUENCEFILE_VALUE_CLASS, INPUT_SEQUENCEFILE_VALUE_TYPE, INPUT_SSL, INPUT_SSL_PROTOCOL, INPUT_TRUSTSTORE_PASSWD, INPUT_TRUSTSTORE_PATH, INPUT_USERNAME, KEYSTORE_PASSWD, KEYSTORE_PATH, MAX_BATCH_SIZE, MAX_SPLIT_SIZE, MAX_THREADS, MAX_TXN_SIZE, MIN_SPLIT_SIZE, MODE, MODULES, MODULES_ROOT, NAMESPACE, OPTIONS_FILE, OUTPUT_CLEANDIR, OUTPUT_COLLECTIONS, OUTPUT_COMPRESS, OUTPUT_DATABASE, OUTPUT_DIRECTORY, OUTPUT_FILE_PATH, OUTPUT_FILENAME_AS_COLLECTION, OUTPUT_GRAPH, OUTPUT_HOST, OUTPUT_IDNAME, OUTPUT_INDENTED, OUTPUT_KEYSTORE_PASSWD, OUTPUT_KEYSTORE_PATH, OUTPUT_LANGUAGE, OUTPUT_OVERRIDE_GRAPH, OUTPUT_PARTITION, OUTPUT_PASSWORD, OUTPUT_PERMISSIONS, OUTPUT_PORT, OUTPUT_QUALITY, OUTPUT_SSL, OUTPUT_SSL_PROTOCOL, OUTPUT_TRUSTSTORE_PASSWD, OUTPUT_TRUSTSTORE_PATH, OUTPUT_TYPE, OUTPUT_URI_PREFIX, OUTPUT_URI_REPLACE, OUTPUT_URI_SUFFIX, OUTPUT_USERNAME, PASSWORD, PATH_NAMESPACE, PORT, QUERY_FILTER, RDF_STREAMING_MEMORY_THRESHOLD, RDF_TRIPLES_PER_DOCUMENT, REDACTION, RESTRICT_HOSTS, RESTRICT_INPUT_HOSTS, RESTRICT_OUTPUT_HOSTS, SNAPSHOT, SPLIT_INPUT, SSL, SSL_PROTOCOL, STREAMING, TEMPORAL_COLLECTION, THREAD_COUNT, THREADS_PER_SPLIT, TRANSACTION_SIZE, TRANSFORM_FUNCTION, TRANSFORM_MODULE, TRANSFORM_NAMESPACE, TRANSFORM_PARAM, TRUSTSTORE_PASSWD, TRUSTSTORE_PATH, TYPE_FILTER, URI_ID, USERNAME, XML_REPAIR_LEVEL
ADVANCED_MODE, ASSIGNMENT_POLICY, BASIC_MODE, BATCH_SIZE, BIND_SPLIT_RANGE, COLLECTION_FILTER, CONTENT_TYPE, COPY_COLLECTIONS, COPY_METADATA, COPY_QUALITY, DEFAULT_BATCH_SIZE, DEFAULT_CONTENT_TYPE, DEFAULT_LOCAL_MAX_SPLIT_SIZE, DEFAULT_MAX_SPLIT_SIZE, DEFAULT_OUTPUT_CONTENT_ENCODING, DEFAULT_OUTPUT_XML_REPAIR_LEVEL, DEFAULT_PROPERTY_OPERATION_TYPE, DEFAULT_TXN_SIZE, DIRECTORY_FILTER, DOCUMENT_SELECTOR, EXECUTION_MODE, EXTRACT_URI, INDENTED, INPUT_DATABASE_NAME, INPUT_HOST, INPUT_KEY_CLASS, INPUT_KEYSTORE_PASSWD, INPUT_KEYSTORE_PATH, INPUT_LEXICON_FUNCTION_CLASS, INPUT_MODE, INPUT_PASSWORD, INPUT_PORT, INPUT_QUERY, INPUT_QUERY_LANGUAGE, INPUT_QUERY_TIMESTAMP, INPUT_RESTRICT_HOSTS, INPUT_SSL_OPTIONS_CLASS, INPUT_SSL_PROTOCOL, INPUT_TRUSTSTORE_PASSWD, INPUT_TRUSTSTORE_PATH, INPUT_USE_SSL, INPUT_USERNAME, INPUT_VALUE_CLASS, MAX_SPLIT_SIZE, MIN_NODEUPDATE_VERSION, MODE_DISTRIBUTED, MODE_LOCAL, MR_NAMESPACE, NODE_OPERATION_TYPE, OUTPUT_CLEAN_DIR, OUTPUT_COLLECTION, OUTPUT_CONTENT_ENCODING, OUTPUT_CONTENT_LANGUAGE, OUTPUT_CONTENT_NAMESPACE, OUTPUT_DATABASE_NAME, OUTPUT_DIRECTORY, OUTPUT_FAST_LOAD, OUTPUT_FOREST_HOST, OUTPUT_GRAPH, OUTPUT_HOST, OUTPUT_KEY_TYPE, OUTPUT_KEY_VARNAME, OUTPUT_KEYSTORE_PASSWD, OUTPUT_KEYSTORE_PATH, OUTPUT_NAMESPACE, OUTPUT_OVERRIDE_GRAPH, OUTPUT_PARTITION, OUTPUT_PASSWORD, OUTPUT_PERMISSION, OUTPUT_PORT, OUTPUT_PROPERTY_ALWAYS_CREATE, OUTPUT_QUALITY, OUTPUT_QUERY, OUTPUT_QUERY_LANGUAGE, OUTPUT_RESTRICT_HOSTS, OUTPUT_SSL_OPTIONS_CLASS, OUTPUT_SSL_PROTOCOL, OUTPUT_STREAMING, OUTPUT_TRUSTSTORE_PASSWD, OUTPUT_TRUSTSTORE_PATH, OUTPUT_URI_PREFIX, OUTPUT_URI_REPLACE, OUTPUT_URI_SUFFIX, OUTPUT_USE_SSL, OUTPUT_USERNAME, OUTPUT_VALUE_TYPE, OUTPUT_VALUE_VARNAME, OUTPUT_XML_REPAIR_LEVEL, PATH_NAMESPACE, PROPERTY_OPERATION_TYPE, QUERY_FILTER, RECORD_TO_FRAGMENT_RATIO, REDACTION_RULE_COLLECTION, SERVER_THREAD_COUNT, SPLIT_END_VARNAME, SPLIT_QUERY, SPLIT_START_VARNAME, SUBDOCUMENT_EXPRESSION, TEMPORAL_COLLECTION, THREAD_MULTIPLIER, TXN_SIZE, TYPE_FILTER
Constructor and Description |
---|
DatabaseContentReader(org.apache.hadoop.conf.Configuration conf) |
Modifier and Type | Method and Description |
---|---|
protected void |
endOfResult() |
DocumentURI |
getCurrentKey() |
MarkLogicDocument |
getCurrentValue() |
protected float |
getDefaultRatio() |
void |
initialize(org.apache.hadoop.mapreduce.InputSplit inSplit,
org.apache.hadoop.mapreduce.TaskAttemptContext context) |
boolean |
nextKeyValue() |
protected boolean |
nextResult(com.marklogic.xcc.ResultItem result) |
protected void |
queryNakedProperties() |
buildDocExprQuery, buildSearchQuery, buildSrcInDocExprQuery, buildSrcInSearchQuery, close, getConf, getCount, getProgress
public static final org.apache.commons.logging.Log LOG
protected boolean copyCollection
protected boolean copyPermission
protected boolean copyProperties
protected boolean copyQuality
protected boolean copyMetadata
protected HashMap<String,DocumentMetadata> metadataMap
protected String ctsQuery
protected boolean nakedDone
protected boolean docDone
protected DocumentURI currentKey
protected DatabaseDocumentWithMeta currentValue
protected int nakedCount
public DatabaseContentReader(org.apache.hadoop.conf.Configuration conf)
public void initialize(org.apache.hadoop.mapreduce.InputSplit inSplit, org.apache.hadoop.mapreduce.TaskAttemptContext context) throws IOException, InterruptedException
initialize
in class MarkLogicRecordReader<DocumentURI,MarkLogicDocument>
IOException
InterruptedException
protected void queryNakedProperties() throws com.marklogic.xcc.exceptions.RequestException
com.marklogic.xcc.exceptions.RequestException
public boolean nextKeyValue() throws IOException, InterruptedException
nextKeyValue
in class MarkLogicRecordReader<DocumentURI,MarkLogicDocument>
IOException
InterruptedException
protected boolean nextResult(com.marklogic.xcc.ResultItem result)
nextResult
in class MarkLogicRecordReader<DocumentURI,MarkLogicDocument>
protected void endOfResult()
endOfResult
in class MarkLogicRecordReader<DocumentURI,MarkLogicDocument>
protected float getDefaultRatio()
getDefaultRatio
in class MarkLogicRecordReader<DocumentURI,MarkLogicDocument>
public DocumentURI getCurrentKey() throws IOException, InterruptedException
getCurrentKey
in class org.apache.hadoop.mapreduce.RecordReader<DocumentURI,MarkLogicDocument>
IOException
InterruptedException
public MarkLogicDocument getCurrentValue() throws IOException, InterruptedException
getCurrentValue
in class org.apache.hadoop.mapreduce.RecordReader<DocumentURI,MarkLogicDocument>
IOException
InterruptedException
Copyright © 2020 MarkLogic Corporation
Complete online documentation for MarkLogic Server, XQuery and related components may be found at developer.marklogic.com