VALUEIN
- public class RDFReader<VALUEIN> extends ImportRecordReader<VALUEIN>
Modifier and Type | Class and Description |
---|---|
protected class |
RDFReader.ParserErrorHandler |
protected class |
RDFReader.RunnableParser |
Modifier and Type | Field and Description |
---|---|
protected StringBuilder |
buffer |
protected String |
collection |
protected int |
collectionCount |
protected Hashtable<String,Vector> |
collectionHash |
protected boolean |
compressed |
protected int |
countPerBatch |
protected com.hp.hpl.jena.query.Dataset |
dataset |
static String |
DEFAULT_GRAPH |
protected com.marklogic.xcc.ContentPermission[] |
defaultPerms |
protected long |
end |
protected HashMap<String,com.marklogic.xcc.ContentPermission[]> |
existingMapPerms |
protected Iterator<String> |
graphItr |
protected Iterator<String> |
graphNameIter |
protected StringBuilder |
graphQry |
protected boolean |
graphSupported |
static String |
HASHALGORITHM |
protected boolean |
hasNext |
protected boolean |
hasOutputCol |
protected IdGenerator |
idGen |
protected boolean |
ignoreCollectionQuad |
protected long |
ingestedTriples |
protected long |
INGESTIONNOTIFYSTEP |
protected long |
INMEMORYTHRESHOLD |
protected String |
inputFn |
static String |
JENA_DEFAULT_GRAPH |
protected RDFReader.RunnableParser |
jenaStreamingParser |
protected org.apache.jena.riot.Lang |
lang |
static org.apache.commons.logging.Log |
LOG |
protected int |
MAXGRAPHSPERREQUEST |
protected int |
MAXTRIPLESPERDOCUMENT |
protected long |
milliSecs |
protected HashSet<String> |
newGraphs |
protected String |
origFn |
protected String |
outputGraph |
protected String |
outputOverrideGraph |
protected static Pattern[] |
patterns |
protected long |
pos |
protected Random |
random |
protected long |
randomValue |
protected org.apache.jena.riot.lang.PipedRDFStream |
rdfInputStream |
protected org.apache.jena.riot.lang.PipedRDFIterator |
rdfIter |
protected LinkedMapWritable |
roleMap |
protected boolean |
roleMapExists |
protected long |
splitStart |
protected long |
start |
protected com.hp.hpl.jena.rdf.model.StmtIterator |
statementIter |
protected String |
version |
conf, encoding, file, fs, iterator, key, mode, streaming, subId, value
AGGREGATE_RECORD_ELEMENT, AGGREGATE_RECORD_NAMESPACE, AGGREGATE_URI_ID, ARCHIVE_METADATA_OPTIONAL, AUDIT_MLCPFINISH_CODE, AUDIT_MLCPFINISH_EVENT, AUDIT_MLCPSTART_CODE, AUDIT_MLCPSTART_EVENT, BATCH_MIN_VERSION, BATCH_SIZE, COLLECTION_FILTER, CONF_AGGREGATE_RECORD_ELEMENT, CONF_AGGREGATE_RECORD_NAMESPACE, CONF_AUDIT_MLCPFINISH_ENABLED, CONF_AUDIT_MLCPFINISH_MESSAGE, CONF_AUDIT_MLCPSTART_MESSAGE, CONF_COPY_PERMISSIONS, CONF_COPY_PROPERTIES, CONF_DELIMITED_DATA_TYPE, CONF_DELIMITED_ROOT_NAME, CONF_DELIMITER, CONF_INPUT_ARCHIVE_METADATA_OPTIONAL, CONF_INPUT_COMPRESSION_CODEC, CONF_INPUT_DIRECTORY, CONF_INPUT_FILE_PATTERN, CONF_INPUT_GENERATE_URI, CONF_INPUT_MODULES_DATABASE, CONF_INPUT_MODULES_ROOT, CONF_INPUT_PATH_FILTER_CLASS, CONF_INPUT_SEQUENCEFILE_KEY_CLASS, CONF_INPUT_SEQUENCEFILE_VALUE_CLASS, CONF_INPUT_SEQUENCEFILE_VALUE_TYPE, CONF_INPUT_URI_ID, CONF_MAPREDUCE_JOB_MAP_CLASS, CONF_MAPREDUCE_JOB_WORKING_DIR, CONF_MAPREDUCE_JOBTRACKER_ADDRESS, CONF_MAX_SPLIT_SIZE1, CONF_MAX_SPLIT_SIZE2, CONF_MIMETYPES, CONF_MIN_SPLIT_SIZE1, CONF_MIN_SPLIT_SIZE2, CONF_MIN_THREADS, CONF_ML_VERSION, CONF_MULTITHREADEDMAPPER_CLASS, CONF_OUTPUT_FILENAME_AS_COLLECTION, CONF_OUTPUT_FILEPATH, CONF_OUTPUT_TYPE, CONF_ROLE_MAP, CONF_SPLIT_INPUT, CONF_THREADS_PER_SPLIT, CONF_TRANSFORM_FUNCTION, CONF_TRANSFORM_MODULE, CONF_TRANSFORM_NAMESPACE, CONF_TRANSFORM_PARAM, CONTENT_ENCODING, CONTENTPUMP_BUNDLE_ARTIFACT, CONTENTPUMP_HOME_PROPERTY_NAME, CONTENTPUMP_JAR_PREFIX, COPY_COLLECTIONS, COPY_METADATA, COPY_PERMISSIONS, COPY_PROPERTIES, COPY_QUALITY, DATA_TYPE, DATABASE, DEFAULT_ARCHIVE_METADATA_OPTIONAL, DEFAULT_COPY_COLLECTIONS, DEFAULT_COPY_METADATA, DEFAULT_COPY_PERMISSIONS, DEFAULT_COPY_PROPERTIES, DEFAULT_COPY_QUALITY, DEFAULT_DELIMITER, DEFAULT_ENCODING, DEFAULT_OUTPUT_TYPE, DEFAULT_SEQUENCEFILE_VALUE_TYPE, DEFAULT_THREAD_COUNT, DELIMITED_ROOT_NAME, DELIMITED_URI_ID, DELIMITER, DIRECTORY_FILTER, DOCUMENT_SELECTOR, DOCUMENT_TYPE, FAST_LOAD, GENERATE_URI, HADOOP_CONF_DIR, HADOOP_CONFDIR_ENV_NAME, HOST, INPUT_COMPRESSED, INPUT_COMPRESSION_CODEC, INPUT_DATABASE, INPUT_FILE_PATH, INPUT_FILE_PATTERN, INPUT_FILE_TYPE, INPUT_FILE_TYPE_DEFAULT, INPUT_HOST, INPUT_KEYSTORE_PASSWD, INPUT_KEYSTORE_PATH, INPUT_PASSWORD, INPUT_PORT, INPUT_SEQUENCEFILE_KEY_CLASS, INPUT_SEQUENCEFILE_VALUE_CLASS, INPUT_SEQUENCEFILE_VALUE_TYPE, INPUT_SSL, INPUT_SSL_PROTOCOL, INPUT_TRUSTSTORE_PASSWD, INPUT_TRUSTSTORE_PATH, INPUT_USERNAME, KEYSTORE_PASSWD, KEYSTORE_PATH, MAX_BATCH_SIZE, MAX_SPLIT_SIZE, MAX_THREAD_PERCENTAGE, MAX_THREADS, MAX_TXN_SIZE, MIN_SPLIT_SIZE, MODE, MODULES, MODULES_ROOT, NAMESPACE, OPTIONS_FILE, OUTPUT_CLEANDIR, OUTPUT_COLLECTIONS, OUTPUT_COMPRESS, OUTPUT_DATABASE, OUTPUT_DIRECTORY, OUTPUT_FILE_PATH, OUTPUT_FILENAME_AS_COLLECTION, OUTPUT_GRAPH, OUTPUT_HOST, OUTPUT_IDNAME, OUTPUT_INDENTED, OUTPUT_KEYSTORE_PASSWD, OUTPUT_KEYSTORE_PATH, OUTPUT_LANGUAGE, OUTPUT_OVERRIDE_GRAPH, OUTPUT_PARTITION, OUTPUT_PASSWORD, OUTPUT_PERMISSIONS, OUTPUT_PORT, OUTPUT_QUALITY, OUTPUT_SSL, OUTPUT_SSL_PROTOCOL, OUTPUT_TRUSTSTORE_PASSWD, OUTPUT_TRUSTSTORE_PATH, OUTPUT_TYPE, OUTPUT_URI_PREFIX, OUTPUT_URI_REPLACE, OUTPUT_URI_SUFFIX, OUTPUT_USERNAME, PASSWORD, PATH_NAMESPACE, POLLING_INIT_DELAY, POLLING_PERIOD, POLLING_TIME_UNIT, PORT, QUERY_FILTER, RDF_STREAMING_MEMORY_THRESHOLD, RDF_TRIPLES_PER_DOCUMENT, REDACTION, RESTRICT_HOSTS, RESTRICT_INPUT_HOSTS, RESTRICT_OUTPUT_HOSTS, SNAPSHOT, SPLIT_INPUT, SSL, SSL_PROTOCOL, STREAMING, TEMPORAL_COLLECTION, THREAD_COUNT, THREADS_PER_SPLIT, TRANSACTION_SIZE, TRANSFORM_FUNCTION, TRANSFORM_MODULE, TRANSFORM_NAMESPACE, TRANSFORM_PARAM, TRUSTSTORE_PASSWD, TRUSTSTORE_PATH, TYPE_FILTER, URI_ID, USERNAME, XML_REPAIR_LEVEL
Constructor and Description |
---|
RDFReader(String version,
LinkedMapWritable roleMap) |
Modifier and Type | Method and Description |
---|---|
void |
close() |
protected static String |
escapeXml(String _in) |
float |
getProgress() |
void |
initExistingMapPerms() |
void |
initialize(org.apache.hadoop.mapreduce.InputSplit inSplit,
org.apache.hadoop.mapreduce.TaskAttemptContext context) |
protected void |
initParser(String fsname,
long size) |
protected void |
initStream(org.apache.hadoop.mapreduce.InputSplit inSplit) |
com.marklogic.xcc.ContentPermission[] |
insertGraphDoc(String graph) |
protected String |
largestCollection() |
protected void |
loadModel(String fsname,
InputStream in) |
boolean |
nextInMemoryKeyValue() |
boolean |
nextInMemoryQuadKeyValue() |
boolean |
nextInMemoryQuadKeyValueIgnoreCollections() |
boolean |
nextInMemoryQuadKeyValueWithCollections() |
boolean |
nextInMemoryTripleKeyValue() |
boolean |
nextKeyValue() |
boolean |
nextStramingQuadKeyValue() |
boolean |
nextStreamingKeyValue() |
protected boolean |
nextStreamingQuadKeyValueIgnoreCollections() |
boolean |
nextStreamingQuadKeyValueWithCollections() |
protected boolean |
nextStreamingTripleKeyValue() |
protected void |
notifyUser() |
protected String |
object(com.hp.hpl.jena.graph.Node node) |
protected void |
parse(String fsname,
org.apache.hadoop.fs.FSDataInputStream in) |
protected String |
predicate(com.hp.hpl.jena.graph.Node subj) |
protected String |
predicate(com.hp.hpl.jena.rdf.model.Resource subj) |
protected String |
resource(com.hp.hpl.jena.graph.Node rsrc) |
protected String |
resource(com.hp.hpl.jena.graph.Node rsrc,
String tag) |
protected String |
resource(com.hp.hpl.jena.rdf.model.Resource rsrc,
String tag) |
protected void |
setKey() |
protected String |
subject(com.hp.hpl.jena.graph.Node subj) |
protected String |
subject(com.hp.hpl.jena.rdf.model.Resource subj) |
protected void |
submitGraphQuery() |
protected void |
write(String str) |
void |
writeValue() |
void |
writeValue(String collection) |
configFileNameAsCollection, getCurrentKey, getCurrentValue, getFile, initConfig, makeURIForZipEntry, makeURIFromPath, openFile, setFile, setKey, setSkipKey
public static final org.apache.commons.logging.Log LOG
public static final String HASHALGORITHM
public static final String DEFAULT_GRAPH
public static final String JENA_DEFAULT_GRAPH
protected static Pattern[] patterns
protected int MAXTRIPLESPERDOCUMENT
protected int MAXGRAPHSPERREQUEST
protected int countPerBatch
protected long INMEMORYTHRESHOLD
protected long INGESTIONNOTIFYSTEP
protected com.hp.hpl.jena.query.Dataset dataset
protected com.hp.hpl.jena.rdf.model.StmtIterator statementIter
protected String collection
protected RDFReader.RunnableParser jenaStreamingParser
protected org.apache.jena.riot.lang.PipedRDFIterator rdfIter
protected org.apache.jena.riot.lang.PipedRDFStream rdfInputStream
protected org.apache.jena.riot.Lang lang
protected int collectionCount
protected boolean ignoreCollectionQuad
protected boolean hasOutputCol
protected String outputGraph
protected String outputOverrideGraph
protected StringBuilder buffer
protected boolean hasNext
protected IdGenerator idGen
protected Random random
protected long randomValue
protected long milliSecs
protected String origFn
protected String inputFn
protected long splitStart
protected long start
protected long pos
protected long end
protected boolean compressed
protected long ingestedTriples
protected String version
protected LinkedMapWritable roleMap
protected com.marklogic.xcc.ContentPermission[] defaultPerms
protected StringBuilder graphQry
protected boolean roleMapExists
protected boolean graphSupported
public RDFReader(String version, LinkedMapWritable roleMap)
public void close() throws IOException
close
in interface Closeable
close
in interface AutoCloseable
close
in class ImportRecordReader<VALUEIN>
IOException
protected void submitGraphQuery() throws IOException
IOException
public float getProgress() throws IOException, InterruptedException
getProgress
in class ImportRecordReader<VALUEIN>
IOException
InterruptedException
public void initialize(org.apache.hadoop.mapreduce.InputSplit inSplit, org.apache.hadoop.mapreduce.TaskAttemptContext context) throws IOException, InterruptedException
initialize
in class ImportRecordReader<VALUEIN>
IOException
InterruptedException
protected void initStream(org.apache.hadoop.mapreduce.InputSplit inSplit) throws IOException, InterruptedException
IOException
InterruptedException
protected void initParser(String fsname, long size) throws IOException
IOException
protected void parse(String fsname, org.apache.hadoop.fs.FSDataInputStream in) throws IOException
IOException
protected void loadModel(String fsname, InputStream in) throws IOException
IOException
protected void write(String str)
protected String resource(com.hp.hpl.jena.graph.Node rsrc)
protected String subject(com.hp.hpl.jena.graph.Node subj)
protected String subject(com.hp.hpl.jena.rdf.model.Resource subj)
protected String predicate(com.hp.hpl.jena.graph.Node subj)
protected String predicate(com.hp.hpl.jena.rdf.model.Resource subj)
protected String object(com.hp.hpl.jena.graph.Node node)
protected void setKey()
public boolean nextKeyValue() throws IOException, InterruptedException
nextKeyValue
in class ImportRecordReader<VALUEIN>
IOException
InterruptedException
public void initExistingMapPerms() throws IOException
IOException
public com.marklogic.xcc.ContentPermission[] insertGraphDoc(String graph) throws IOException
IOException
public boolean nextInMemoryKeyValue() throws IOException, InterruptedException
IOException
InterruptedException
public boolean nextInMemoryTripleKeyValue() throws IOException, InterruptedException
IOException
InterruptedException
public boolean nextInMemoryQuadKeyValue() throws IOException, InterruptedException
IOException
InterruptedException
public boolean nextInMemoryQuadKeyValueWithCollections() throws IOException, InterruptedException
IOException
InterruptedException
public boolean nextInMemoryQuadKeyValueIgnoreCollections() throws IOException, InterruptedException
IOException
InterruptedException
public boolean nextStreamingKeyValue() throws IOException, InterruptedException
IOException
InterruptedException
protected boolean nextStreamingTripleKeyValue() throws IOException, InterruptedException
IOException
InterruptedException
public boolean nextStramingQuadKeyValue() throws IOException, InterruptedException
IOException
InterruptedException
protected boolean nextStreamingQuadKeyValueIgnoreCollections() throws IOException, InterruptedException
IOException
InterruptedException
public boolean nextStreamingQuadKeyValueWithCollections() throws IOException, InterruptedException
IOException
InterruptedException
public void writeValue() throws IOException
IOException
public void writeValue(String collection) throws IOException
IOException
protected String largestCollection()
protected void notifyUser()
Copyright © 2021 MarkLogic Corporation
Complete online documentation for MarkLogic Server, XQuery and related components may be found at developer.marklogic.com