VALUEIN
- Currently only support ForestDocument, but other types like
Text or BytesWritable are possible candidates to be added.public class ForestReader<VALUEIN> extends org.apache.hadoop.mapreduce.RecordReader<DocumentURIWithSourceInfo,VALUEIN> implements MarkLogicConstants
Modifier and Type | Field and Description |
---|---|
protected long |
bytesRead |
protected Collection<String> |
colFilters |
protected org.apache.hadoop.conf.Configuration |
conf |
protected BiendianDataInputStream |
dataIs |
protected int |
deletedCnt |
protected Collection<String> |
dirFilters |
protected boolean |
done |
protected int |
fragCnt |
protected DocumentURIWithSourceInfo |
key |
protected org.apache.hadoop.fs.Path |
largeForestDir |
static org.apache.commons.logging.Log |
LOG |
protected int |
nascentCnt |
protected BiendianDataInputStream |
ordIs |
protected int |
position |
protected int |
prevDocid |
protected org.apache.hadoop.mapreduce.lib.input.FileSplit |
split |
protected String |
srcId |
protected BiendianDataInputStream |
tsIs |
protected Collection<String> |
typeFilters |
protected VALUEIN |
value |
protected Class<? extends org.apache.hadoop.io.Writable> |
valueClass |
ADVANCED_MODE, ASSIGNMENT_POLICY, BASIC_MODE, BATCH_SIZE, BIND_SPLIT_RANGE, COLLECTION_FILTER, CONF_OUTPUT_URI_PREFIX, CONF_OUTPUT_URI_REPLACE, CONF_OUTPUT_URI_SUFFIX, CONTENT_TYPE, DEFAULT_BATCH_SIZE, DEFAULT_CONTENT_TYPE, DEFAULT_LOCAL_MAX_SPLIT_SIZE, DEFAULT_MAX_SPLIT_SIZE, DEFAULT_OUTPUT_CONTENT_ENCODING, DEFAULT_OUTPUT_XML_REPAIR_LEVEL, DEFAULT_PROPERTY_OPERATION_TYPE, DIRECTORY_FILTER, DOCUMENT_SELECTOR, EXECUTION_MODE, EXTRACT_URI, INDENTED, INPUT_DATABASE_NAME, INPUT_HOST, INPUT_KEY_CLASS, INPUT_LEXICON_FUNCTION_CLASS, INPUT_MODE, INPUT_PASSWORD, INPUT_PORT, INPUT_QUERY, INPUT_QUERY_LANGUAGE, INPUT_QUERY_TIMESTAMP, INPUT_SSL_OPTIONS_CLASS, INPUT_USE_SSL, INPUT_USERNAME, INPUT_VALUE_CLASS, MAX_SPLIT_SIZE, MODE_DISTRIBUTED, MODE_LOCAL, MR_NAMESPACE, NODE_OPERATION_TYPE, OUTPUT_CLEAN_DIR, OUTPUT_COLLECTION, OUTPUT_CONTENT_ENCODING, OUTPUT_CONTENT_LANGUAGE, OUTPUT_CONTENT_NAMESPACE, OUTPUT_DATABASE_NAME, OUTPUT_DIRECTORY, OUTPUT_FAST_LOAD, OUTPUT_FOREST_HOST, OUTPUT_GRAPH, OUTPUT_HOST, OUTPUT_KEY_TYPE, OUTPUT_KEY_VARNAME, OUTPUT_NAMESPACE, OUTPUT_OVERRIDE_GRAPH, OUTPUT_PARTITION, OUTPUT_PASSWORD, OUTPUT_PERMISSION, OUTPUT_PORT, OUTPUT_PROPERTY_ALWAYS_CREATE, OUTPUT_QUALITY, OUTPUT_QUERY, OUTPUT_QUERY_LANGUAGE, OUTPUT_SSL_OPTIONS_CLASS, OUTPUT_STREAMING, OUTPUT_TOLERATE_ERRORS, OUTPUT_USE_SSL, OUTPUT_USERNAME, OUTPUT_VALUE_TYPE, OUTPUT_VALUE_VARNAME, OUTPUT_XML_REPAIR_LEVEL, PATH_NAMESPACE, PROPERTY_OPERATION_TYPE, QUERY_FILTER, RECORD_TO_FRAGMENT_RATIO, SPLIT_END_VARNAME, SPLIT_QUERY, SPLIT_START_VARNAME, SUBDOCUMENT_EXPRESSION, TEMPORAL_COLLECTION, TXN_SIZE, TYPE_FILTER
Constructor and Description |
---|
ForestReader() |
Modifier and Type | Method and Description |
---|---|
protected boolean |
applyFilter(String uri,
ExpandedTree tree) |
void |
close() |
DocumentURIWithSourceInfo |
getCurrentKey() |
VALUEIN |
getCurrentValue() |
float |
getProgress() |
void |
initialize(org.apache.hadoop.mapreduce.InputSplit split,
org.apache.hadoop.mapreduce.TaskAttemptContext context) |
boolean |
nextKeyValue() |
protected void |
setKey(String uri,
String sub,
int line,
int col)
Apply URI prefix and suffix configuration options and set the result as
DocumentURI key.
|
protected void |
setSkipKey(String sub,
int line,
int col,
String reason)
Set the result as
DocumentURI key.
|
public static final org.apache.commons.logging.Log LOG
protected org.apache.hadoop.mapreduce.lib.input.FileSplit split
protected long bytesRead
protected org.apache.hadoop.conf.Configuration conf
protected BiendianDataInputStream dataIs
protected BiendianDataInputStream ordIs
protected BiendianDataInputStream tsIs
protected DocumentURIWithSourceInfo key
protected VALUEIN value
protected Class<? extends org.apache.hadoop.io.Writable> valueClass
protected int position
protected int prevDocid
protected boolean done
protected org.apache.hadoop.fs.Path largeForestDir
protected int nascentCnt
protected int deletedCnt
protected int fragCnt
protected Collection<String> colFilters
protected Collection<String> dirFilters
protected Collection<String> typeFilters
protected String srcId
public void close() throws IOException
close
in interface Closeable
close
in interface AutoCloseable
close
in class org.apache.hadoop.mapreduce.RecordReader<DocumentURIWithSourceInfo,VALUEIN>
IOException
public DocumentURIWithSourceInfo getCurrentKey() throws IOException, InterruptedException
getCurrentKey
in class org.apache.hadoop.mapreduce.RecordReader<DocumentURIWithSourceInfo,VALUEIN>
IOException
InterruptedException
public VALUEIN getCurrentValue() throws IOException, InterruptedException
getCurrentValue
in class org.apache.hadoop.mapreduce.RecordReader<DocumentURIWithSourceInfo,VALUEIN>
IOException
InterruptedException
public float getProgress() throws IOException, InterruptedException
getProgress
in class org.apache.hadoop.mapreduce.RecordReader<DocumentURIWithSourceInfo,VALUEIN>
IOException
InterruptedException
public void initialize(org.apache.hadoop.mapreduce.InputSplit split, org.apache.hadoop.mapreduce.TaskAttemptContext context) throws IOException, InterruptedException
initialize
in class org.apache.hadoop.mapreduce.RecordReader<DocumentURIWithSourceInfo,VALUEIN>
IOException
InterruptedException
public boolean nextKeyValue() throws IOException, InterruptedException
nextKeyValue
in class org.apache.hadoop.mapreduce.RecordReader<DocumentURIWithSourceInfo,VALUEIN>
IOException
InterruptedException
protected void setKey(String uri, String sub, int line, int col)
uri
- Source string of document URI.sub
- Sub-entry of the source of the document origin.line
- Line number in the source if applicable; -1 otherwise.col
- Column number in the source if applicable; -1 otherwise.protected void setSkipKey(String sub, int line, int col, String reason)
uri
- Source string of document URI.line
- Line number in the source if applicable; -1 otherwise.col
- Column number in the source if applicable; -1 otherwise.protected boolean applyFilter(String uri, ExpandedTree tree)
> Copyright © 2016 MarkLogic Corporation. All Rights Reserved.
Complete online documentation for MarkLogic Server, XQuery and related components may be found at developer.marklogic.com