/*
 * Decompiled with CFR 0.152.
 */
package com.marklogic.contentpump;

import com.marklogic.contentpump.DelimitedTextReader;
import com.marklogic.contentpump.FileAndDirectoryInputFormat;
import com.marklogic.contentpump.SplitDelimitedTextReader;
import com.marklogic.contentpump.utilities.CSVParserFormatter;
import com.marklogic.contentpump.utilities.DelimitedSplit;
import com.marklogic.contentpump.utilities.EncodingUtil;
import com.marklogic.mapreduce.DocumentURIWithSourceInfo;
import com.marklogic.mapreduce.utilities.TextArrayWritable;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DefaultStringifier;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;

public class DelimitedTextInputFormat
extends FileAndDirectoryInputFormat<DocumentURIWithSourceInfo, Text> {
    public static final Log LOG = LogFactory.getLog(DelimitedTextInputFormat.class);

    public RecordReader<DocumentURIWithSourceInfo, Text> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
        if (this.isSplitInput(context.getConfiguration())) {
            return new SplitDelimitedTextReader<Text>();
        }
        return new DelimitedTextReader<Text>();
    }

    private boolean isSplitInput(Configuration conf) {
        return conf.getBoolean("mapreduce.marklogic.splitinput", false);
    }

    @Override
    public List<InputSplit> getSplits(JobContext job) throws IOException {
        boolean delimSplit = this.isSplitInput(job.getConfiguration());
        List<InputSplit> splits = super.getSplits(job);
        if (!delimSplit) {
            return splits;
        }
        if (splits.size() >= SPLIT_COUNT_LIMIT) {
            LOG.warn((Object)("Exceeding SPLIT_COUNT_LIMIT, input_split is off:" + SPLIT_COUNT_LIMIT));
            DefaultStringifier.store((Configuration)job.getConfiguration(), (Object)false, (String)"mapreduce.marklogic.splitinput");
            return splits;
        }
        ArrayList<InputSplit> populatedSplits = new ArrayList<InputSplit>();
        LOG.info((Object)(splits.size() + " DelimitedSplits generated"));
        Configuration conf = job.getConfiguration();
        char delimiter = '\u0000';
        ArrayList<Text> hlist = new ArrayList<Text>();
        for (InputSplit file : splits) {
            FileSplit fsplit = (FileSplit)file;
            Path path = fsplit.getPath();
            FileSystem fs = path.getFileSystem(conf);
            if (fsplit.getStart() == 0L) {
                FSDataInputStream fileIn = fs.open(path);
                String delimStr = conf.get("mapreduce.marklogic.delimited.delimiter", ",");
                if (delimStr.length() == 1) {
                    delimiter = delimStr.charAt(0);
                } else {
                    LOG.error((Object)("Incorrect delimitor: " + delimiter + ". Expects single character."));
                }
                String encoding = conf.get("mapreduce.marklogic.output.content.encoding", "UTF-8");
                InputStreamReader instream = new InputStreamReader((InputStream)fileIn, encoding);
                CSVParser parser = new CSVParser((Reader)instream, CSVParserFormatter.getFormat(delimiter, Character.valueOf('\"'), true, true));
                Iterator it = parser.iterator();
                String[] header = null;
                if (it.hasNext()) {
                    CSVRecord record = (CSVRecord)it.next();
                    Iterator recordIterator = record.iterator();
                    int recordSize = record.size();
                    header = new String[recordSize];
                    for (int i = 0; i < recordSize; ++i) {
                        if (!recordIterator.hasNext()) {
                            throw new IOException("Record size doesn't match the real size");
                        }
                        header[i] = (String)recordIterator.next();
                    }
                    EncodingUtil.handleBOMUTF8(header, 0);
                    hlist.clear();
                    for (String s : header) {
                        hlist.add(new Text(s));
                    }
                }
                instream.close();
            }
            DelimitedSplit ds = new DelimitedSplit(new TextArrayWritable(hlist.toArray(new Text[hlist.size()])), path, fsplit.getStart(), fsplit.getLength(), fsplit.getLocations());
            populatedSplits.add((InputSplit)ds);
        }
        return populatedSplits;
    }
}

