VALUEIN
- Currently only support ForestDocument, but other types like
Text or BytesWritable are possible candidates to be added.public class ForestReader<VALUEIN> extends org.apache.hadoop.mapreduce.RecordReader<DocumentURI,VALUEIN> implements MarkLogicConstants
Modifier and Type | Field and Description |
---|---|
protected long |
bytesRead |
protected Collection<String> |
colFilters |
protected org.apache.hadoop.conf.Configuration |
conf |
protected BiendianDataInputStream |
dataIs |
protected int |
deletedCnt |
protected Collection<String> |
dirFilters |
protected boolean |
done |
protected int |
fragCnt |
protected DocumentURI |
key |
protected org.apache.hadoop.fs.Path |
largeForestDir |
static org.apache.commons.logging.Log |
LOG |
protected int |
nascentCnt |
protected BiendianDataInputStream |
ordIs |
protected int |
position |
protected int |
prevDocid |
protected org.apache.hadoop.mapreduce.lib.input.FileSplit |
split |
protected BiendianDataInputStream |
tsIs |
protected Collection<String> |
typeFilters |
protected VALUEIN |
value |
protected Class<? extends org.apache.hadoop.io.Writable> |
valueClass |
ADVANCED_MODE, ASSIGNMENT_POLICY, BASIC_MODE, BATCH_SIZE, BIND_SPLIT_RANGE, COLLECTION_FILTER, CONTENT_TYPE, DEFAULT_BATCH_SIZE, DEFAULT_CONTENT_TYPE, DEFAULT_MAX_SPLIT_SIZE, DEFAULT_OUTPUT_CONTENT_ENCODING, DEFAULT_OUTPUT_XML_REPAIR_LEVEL, DEFAULT_PROPERTY_OPERATION_TYPE, DIRECTORY_FILTER, DOCUMENT_SELECTOR, EXECUTION_MODE, INDENTED, INPUT_DATABASE_NAME, INPUT_HOST, INPUT_KEY_CLASS, INPUT_LEXICON_FUNCTION_CLASS, INPUT_MODE, INPUT_PASSWORD, INPUT_PORT, INPUT_QUERY, INPUT_QUERY_TIMESTAMP, INPUT_SSL_OPTIONS_CLASS, INPUT_USE_SSL, INPUT_USERNAME, INPUT_VALUE_CLASS, MAX_SPLIT_SIZE, MODE_DISTRIBUTED, MODE_LOCAL, MR_NAMESPACE, NODE_OPERATION_TYPE, OUTPUT_CLEAN_DIR, OUTPUT_COLLECTION, OUTPUT_CONTENT_ENCODING, OUTPUT_CONTENT_LANGUAGE, OUTPUT_CONTENT_NAMESPACE, OUTPUT_DIRECTORY, OUTPUT_FAST_LOAD, OUTPUT_FOREST_HOST, OUTPUT_HOST, OUTPUT_KEY_TYPE, OUTPUT_KEY_VARNAME, OUTPUT_NAMESPACE, OUTPUT_PARTITION, OUTPUT_PASSWORD, OUTPUT_PERMISSION, OUTPUT_PORT, OUTPUT_PROPERTY_ALWAYS_CREATE, OUTPUT_QUALITY, OUTPUT_QUERY, OUTPUT_SSL_OPTIONS_CLASS, OUTPUT_STREAMING, OUTPUT_TOLERATE_ERRORS, OUTPUT_USE_SSL, OUTPUT_USERNAME, OUTPUT_VALUE_TYPE, OUTPUT_VALUE_VARNAME, OUTPUT_XML_REPAIR_LEVEL, PATH_NAMESPACE, PROPERTY_OPERATION_TYPE, RECORD_TO_FRAGMENT_RATIO, SPLIT_END_VARNAME, SPLIT_QUERY, SPLIT_START_VARNAME, SUBDOCUMENT_EXPRESSION, TXN_SIZE, TYPE_FILTER
Constructor and Description |
---|
ForestReader() |
Modifier and Type | Method and Description |
---|---|
protected boolean |
applyFilter(String uri,
ExpandedTree tree) |
void |
close() |
DocumentURI |
getCurrentKey() |
VALUEIN |
getCurrentValue() |
float |
getProgress() |
void |
initialize(org.apache.hadoop.mapreduce.InputSplit split,
org.apache.hadoop.mapreduce.TaskAttemptContext context) |
boolean |
nextKeyValue() |
public static final org.apache.commons.logging.Log LOG
protected org.apache.hadoop.mapreduce.lib.input.FileSplit split
protected long bytesRead
protected org.apache.hadoop.conf.Configuration conf
protected BiendianDataInputStream dataIs
protected BiendianDataInputStream ordIs
protected BiendianDataInputStream tsIs
protected DocumentURI key
protected VALUEIN value
protected Class<? extends org.apache.hadoop.io.Writable> valueClass
protected int position
protected int prevDocid
protected boolean done
protected org.apache.hadoop.fs.Path largeForestDir
protected int nascentCnt
protected int deletedCnt
protected int fragCnt
protected Collection<String> colFilters
protected Collection<String> dirFilters
protected Collection<String> typeFilters
public void close() throws IOException
close
in interface Closeable
close
in interface AutoCloseable
close
in class org.apache.hadoop.mapreduce.RecordReader<DocumentURI,VALUEIN>
IOException
public DocumentURI getCurrentKey() throws IOException, InterruptedException
getCurrentKey
in class org.apache.hadoop.mapreduce.RecordReader<DocumentURI,VALUEIN>
IOException
InterruptedException
public VALUEIN getCurrentValue() throws IOException, InterruptedException
getCurrentValue
in class org.apache.hadoop.mapreduce.RecordReader<DocumentURI,VALUEIN>
IOException
InterruptedException
public float getProgress() throws IOException, InterruptedException
getProgress
in class org.apache.hadoop.mapreduce.RecordReader<DocumentURI,VALUEIN>
IOException
InterruptedException
public void initialize(org.apache.hadoop.mapreduce.InputSplit split, org.apache.hadoop.mapreduce.TaskAttemptContext context) throws IOException, InterruptedException
initialize
in class org.apache.hadoop.mapreduce.RecordReader<DocumentURI,VALUEIN>
IOException
InterruptedException
public boolean nextKeyValue() throws IOException, InterruptedException
nextKeyValue
in class org.apache.hadoop.mapreduce.RecordReader<DocumentURI,VALUEIN>
IOException
InterruptedException
protected boolean applyFilter(String uri, ExpandedTree tree)
Copyright © 2016 MarkLogic Corporation. All Rights Reserved.
Complete online documentation for MarkLogic Server, XQuery and related components may be found at developer.marklogic.com