package org.apache.solr.hadoop;

import com.google.common.base.Charsets;
import com.google.common.base.Preconditions;
import com.google.common.io.ByteStreams;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLClassLoader;
import java.nio.charset.StandardCharsets;
import java.text.NumberFormat;
import java.time.Instant;
import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import java.util.Random;
import java.util.function.Function;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import net.sourceforge.argparse4j.ArgumentParsers;
import net.sourceforge.argparse4j.impl.Arguments;
import net.sourceforge.argparse4j.impl.action.HelpArgumentAction;
import net.sourceforge.argparse4j.impl.choice.RangeArgumentChoice;
import net.sourceforge.argparse4j.impl.type.FileArgumentType;
import net.sourceforge.argparse4j.inf.Argument;
import net.sourceforge.argparse4j.inf.ArgumentGroup;
import net.sourceforge.argparse4j.inf.ArgumentParser;
import net.sourceforge.argparse4j.inf.ArgumentParserException;
import net.sourceforge.argparse4j.inf.FeatureControl;
import net.sourceforge.argparse4j.inf.Namespace;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.fs.permission.AclEntry;
import org.apache.hadoop.fs.permission.AclEntryScope;
import org.apache.hadoop.fs.permission.AclEntryType;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.PropertyConfigurator;
import org.apache.lucene.util.Version;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.hadoop.dedup.RetainMostRecentUpdateConflictResolver;
import org.apache.solr.hadoop.morphline.MorphlineMapRunner;
import org.apache.solr.hadoop.morphline.MorphlineMapper;
import org.noggit.CharArr;
import org.noggit.JSONUtil;
import org.noggit.JSONWriter;
import org.noggit.ObjectBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/solr/hadoop/MapReduceIndexerTool.class */
public class MapReduceIndexerTool extends Configured implements Tool {
    static final int GO_LIVE_DEFAULT_TIMEOUT = 86400000;
    Job job;
    public static final String RESULTS_DIR = "results";
    static final String MAIN_MEMORY_RANDOMIZATION_THRESHOLD;
    private static final String FULL_INPUT_LIST = "full-input-list.txt";
    private static final Logger LOG;
    static final /* synthetic */ boolean $assertionsDisabled;

    /* loaded from: input_file:org/apache/solr/hadoop/MapReduceIndexerTool$MyArgumentParser.class */
    static final class MyArgumentParser {
        private static final String SHOW_NON_SOLR_CLOUD = "--show-non-solr-cloud";
        private boolean showNonSolrCloud = false;
        static final /* synthetic */ boolean $assertionsDisabled;

        /* loaded from: input_file:org/apache/solr/hadoop/MapReduceIndexerTool$MyArgumentParser$FoundHelpArgument.class */
        private static final class FoundHelpArgument extends RuntimeException {
            private FoundHelpArgument() {
            }
        }

        MyArgumentParser() {
        }

        /* JADX WARN: Type inference failed for: r1v52, types: [org.apache.solr.hadoop.MapReduceIndexerTool$MyArgumentParser$3] */
        public Integer parseArgs(String[] strArr, Configuration configuration, Options options) {
            if (!$assertionsDisabled && strArr == null) {
                throw new AssertionError();
            }
            if (!$assertionsDisabled && configuration == null) {
                throw new AssertionError();
            }
            if (!$assertionsDisabled && options == null) {
                throw new AssertionError();
            }
            if (strArr.length == 0) {
                strArr = new String[]{"--help"};
            }
            this.showNonSolrCloud = Arrays.asList(strArr).contains(SHOW_NON_SOLR_CLOUD);
            ArgumentParser description = ArgumentParsers.newArgumentParser("hadoop [GenericOptions]... jar search-mr-*-job.jar " + MapReduceIndexerTool.class.getName(), false).defaultHelp(true).description("MapReduce batch job driver that takes a morphline and creates a set of Solr index shards from a set of input files and writes the indexes into HDFS, in a flexible, scalable and fault-tolerant manner. It also supports merging the output shards into a set of live customer facing Solr servers, typically a SolrCloud. The program proceeds in several consecutive MapReduce based phases, as follows:\n\n1) Randomization phase: This (parallel) phase randomizes the list of input files in order to spread indexing load more evenly among the mappers of the subsequent phase.\n\n2) Mapper phase: This (parallel) phase takes the input files, extracts the relevant content, transforms it and hands SolrInputDocuments to a set of reducers. The ETL functionality is flexible and customizable using chains of arbitrary morphline commands that pipe records from one transformation command to another. Commands to parse and transform a set of standard data formats such as Avro, CSV, Text, HTML, XML, PDF, Word, Excel, etc. are provided out of the box, and additional custom commands and parsers for additional file or data formats can be added as morphline plugins. This is done by implementing a simple Java interface that consumes a record (e.g. a file in the form of an InputStream plus some headers plus contextual metadata) and generates as output zero or more records. Any kind of data format can be indexed and any Solr documents for any kind of Solr schema can be generated, and any custom ETL logic can be registered and executed.\nRecord fields, including MIME types, can also explicitly be passed by force from the CLI to the morphline, for example: hadoop ... -D morphlineField._attachment_mimetype=text/csv\n\n3) Reducer phase: This (parallel) phase loads the mapper's SolrInputDocuments into one EmbeddedSolrServer per reducer. Each such reducer and Solr server can be seen as a (micro) shard. The Solr servers store their data in HDFS.\n\n4) Mapper-only merge phase: This (parallel) phase merges the set of reducer shards into the number of solr shards expected by the user, using a mapper-only job. This phase is omitted if the number of shards is already equal to the number of shards expected by the user. \n\n5) Go-live phase: This optional (parallel) phase merges the output shards of the previous phase into a set of live customer facing Solr servers, typically a SolrCloud. If this phase is omitted you can explicitly point each Solr server to one of the HDFS output shard directories.\n\nFault Tolerance: Mapper and reducer task attempts are retried on failure per the standard MapReduce semantics. On program startup all data in the --output-dir is deleted if that output directory already exists. If the whole job fails you can retry simply by rerunning the program again using the same arguments.");
            description.addArgument(new String[]{"--help", "-help", "-h"}).help("Show this help message and exit").action(new HelpArgumentAction() { // from class: org.apache.solr.hadoop.MapReduceIndexerTool.MyArgumentParser.1
                public void run(ArgumentParser argumentParser, Argument argument, Map<String, Object> map, String str, Object obj) throws ArgumentParserException {
                    argumentParser.printHelp();
                    System.out.println();
                    System.out.print(ToolRunnerHelpFormatter.getGenericCommandUsage());
                    System.out.println("Examples: \n\n# (Re)index an Avro based Twitter tweet file:\nsudo -u hdfs hadoop \\\n  --config /etc/hadoop/conf.cloudera.mapreduce1 \\\n  jar target/search-mr-*-job.jar " + MapReduceIndexerTool.class.getName() + " \\\n  -D 'mapred.child.java.opts=-Xmx500m' \\\n  --log4j src/test/resources/log4j.properties \\\n  --morphline-file ../search-core/src/test/resources/test-morphlines/tutorialReadAvroContainer.conf \\\n  --solr-home-dir src/test/resources/solr/minimr \\\n  --output-dir hdfs://c2202.mycompany.com/user/$USER/test \\\n  --shards 1 \\\n  hdfs:///user/$USER/test-documents/sample-statuses-20120906-141433.avro\n\n# (Re)index all files that match all of the following conditions:\n# 1) File is contained in dir tree hdfs:///user/$USER/solrloadtest/twitter/tweets\n# 2) file name matches the glob pattern 'sample-statuses*.gz'\n# 3) file was last modified less than 100000 minutes ago\n# 4) file size is between 1 MB and 1 GB\n# Also include extra library jar file containing JSON tweet Java parser:\nhadoop fs \\\n  -find hdfs:///user/$USER/solrloadtest/twitter/tweets \\\n  -type f \\\n  -name 'sample-statuses*.gz' \\\n  -mmin -1000000 \\\n  -size -100000000c \\\n  -size +1000000c \\\n| sudo -u hdfs hadoop \\\n  --config /etc/hadoop/conf.cloudera.mapreduce1 \\\n  jar target/search-mr-*-job.jar " + MapReduceIndexerTool.class.getName() + " \\\n  --libjars /path/to/kite-morphlines-twitter-0.10.0.jar \\\n  -D 'mapred.child.java.opts=-Xmx500m' \\\n  --log4j src/test/resources/log4j.properties \\\n  --morphline-file ../search-core/src/test/resources/test-morphlines/tutorialReadJsonTestTweets.conf \\\n  --solr-home-dir src/test/resources/solr/minimr \\\n  --output-dir hdfs://c2202.mycompany.com/user/$USER/test \\\n  --shards 100 \\\n  --input-list -\n\n# Go live by merging resulting index shards into a live Solr cluster\n# (explicitly specify Solr URLs - for a SolrCloud cluster see next example):\nsudo -u hdfs hadoop \\\n  --config /etc/hadoop/conf.cloudera.mapreduce1 \\\n  jar target/search-mr-*-job.jar " + MapReduceIndexerTool.class.getName() + " \\\n  -D 'mapred.child.java.opts=-Xmx500m' \\\n  --log4j src/test/resources/log4j.properties \\\n  --morphline-file ../search-core/src/test/resources/test-morphlines/tutorialReadAvroContainer.conf \\\n  --solr-home-dir src/test/resources/solr/minimr \\\n  --output-dir hdfs://c2202.mycompany.com/user/$USER/test \\\n  --shard-url http://solr001.mycompany.com:8983/solr/collection1 \\\n  --shard-url http://solr002.mycompany.com:8983/solr/collection1 \\\n  --go-live \\\n  hdfs:///user/foo/indir\n\n# Go live by merging resulting index shards into a live SolrCloud cluster\n# (discover shards and Solr URLs through ZooKeeper):\nsudo -u hdfs hadoop \\\n  --config /etc/hadoop/conf.cloudera.mapreduce1 \\\n  jar target/search-mr-*-job.jar " + MapReduceIndexerTool.class.getName() + " \\\n  -D 'mapred.child.java.opts=-Xmx500m' \\\n  --log4j src/test/resources/log4j.properties \\\n  --morphline-file ../search-core/src/test/resources/test-morphlines/tutorialReadAvroContainer.conf \\\n  --output-dir hdfs://c2202.mycompany.com/user/$USER/test \\\n  --zk-host zk01.mycompany.com:2181/solr \\\n  --collection collection1 \\\n  --go-live \\\n  hdfs:///user/foo/indir\n\n# MapReduce on Yarn - Pass custom JVM arguments (including a custom tmp directory)\nHADOOP_CLIENT_OPTS='-DmaxConnectionsPerHost=10000 -DmaxConnections=10000 -Djava.io.tmpdir=/my/tmp/dir/'; \\\nsudo -u hdfs hadoop \\\n  --config /etc/hadoop/conf.cloudera.mapreduce1 \\\n  jar target/search-mr-*-job.jar " + MapReduceIndexerTool.class.getName() + " \\\n  -D 'mapreduce.map.java.opts=-DmaxConnectionsPerHost=10000 -DmaxConnections=10000' \\\n  -D 'mapreduce.reduce.java.opts=-DmaxConnectionsPerHost=10000 -DmaxConnections=10000' \\\n  --log4j src/test/resources/log4j.properties \\\n  --morphline-file ../search-core/src/test/resources/test-morphlines/tutorialReadAvroContainer.conf \\\n  --solr-home-dir src/test/resources/solr/minimr \\\n  --output-dir hdfs://c2202.mycompany.com/user/$USER/test \\\n  --shards 1 \\\n  hdfs:///user/$USER/test-documents/sample-statuses-20120906-141433.avro\n\n# MapReduce on MR1 - Pass custom JVM arguments (including a custom tmp directory)\nHADOOP_CLIENT_OPTS='-DmaxConnectionsPerHost=10000 -DmaxConnections=10000 -Djava.io.tmpdir=/my/tmp/dir/'; \\\nsudo -u hdfs hadoop \\\n  --config /etc/hadoop/conf.cloudera.mapreduce1 \\\n  jar target/search-mr-*-job.jar " + MapReduceIndexerTool.class.getName() + " \\\n  -D 'mapred.child.java.opts=-DmaxConnectionsPerHost=10000 -DmaxConnections=10000' \\\n  --log4j src/test/resources/log4j.properties \\\n  --morphline-file ../search-core/src/test/resources/test-morphlines/tutorialReadAvroContainer.conf \\\n  --solr-home-dir src/test/resources/solr/minimr \\\n  --output-dir hdfs://c2202.mycompany.com/user/$USER/test \\\n  --shards 1 \\\n  hdfs:///user/$USER/test-documents/sample-statuses-20120906-141433.avro\n\n");
                    throw new FoundHelpArgument();
                }
            });
            ArgumentGroup addArgumentGroup = description.addArgumentGroup("Required arguments");
            Argument help = addArgumentGroup.addArgument(new String[]{"--output-dir"}).metavar(new String[]{"HDFS_URI"}).type(new PathArgumentType(configuration) { // from class: org.apache.solr.hadoop.MapReduceIndexerTool.MyArgumentParser.2
                @Override // org.apache.solr.hadoop.PathArgumentType
                /* renamed from: convert */
                public Path mo7convert(ArgumentParser argumentParser, Argument argument, String str) throws ArgumentParserException {
                    Path mo7convert = super.mo7convert(argumentParser, argument, str);
                    if ("hdfs".equals(mo7convert.toUri().getScheme()) && mo7convert.toUri().getAuthority() == null) {
                        throw new ArgumentParserException("Missing authority in path URI: " + mo7convert, argumentParser);
                    }
                    return mo7convert;
                }
            }.verifyHasScheme().verifyIsAbsolute().verifyCanWriteParent()).required(true).help("HDFS directory to write Solr indexes to. Inside there one output directory per shard will be generated. Example: hdfs://c2202.mycompany.com/user/$USER/test");
            Argument help2 = description.addArgument(new String[]{"--input-list"}).action(Arguments.append()).metavar(new String[]{"URI"}).type(Path.class).help("Local URI or HDFS URI of a UTF-8 encoded file containing a list of HDFS URIs to index, one URI per line in the file. If '-' is specified, URIs are read from the standard input. Multiple --input-list arguments can be specified.");
            Argument help3 = addArgumentGroup.addArgument(new String[]{"--morphline-file"}).metavar(new String[]{"FILE"}).type(new FileArgumentType().verifyExists().verifyIsFile().verifyCanRead()).required(true).help("Relative or absolute path to a local config file that contains one or more morphlines. The file must be UTF-8 encoded. Example: /path/to/morphline.conf");
            Argument help4 = description.addArgument(new String[]{"--morphline-id"}).metavar(new String[]{"STRING"}).type(String.class).help("The identifier of the morphline that shall be executed within the morphline config file specified by --morphline-file. If the --morphline-id option is ommitted the first (i.e. top-most) morphline within the config file is used. Example: morphline1");
            Argument help5 = description.addArgument(new String[]{"--solr-home-dir"}).metavar(new String[]{"DIR"}).type(new FileArgumentType() { // from class: org.apache.solr.hadoop.MapReduceIndexerTool.MyArgumentParser.3
                /* renamed from: convert, reason: merged with bridge method [inline-methods] */
                public File m8convert(ArgumentParser argumentParser, Argument argument, String str) throws ArgumentParserException {
                    File convert = super.convert(argumentParser, argument, str);
                    new FileArgumentType().verifyExists().verifyIsFile().verifyCanRead().convert(argumentParser, argument, MapReduceIndexerTool.getSolrConfig(convert).getPath());
                    return convert;
                }
            }.verifyIsDirectory().verifyCanRead()).required(false).help("Optional relative or absolute path to a local dir containing Solr conf/ dir and in particular conf/solrconfig.xml and optionally also lib/ dir. This directory will be uploaded to each MR task. Example: src/test/resources/solr/minimr");
            Argument help6 = description.addArgument(new String[]{"--update-conflict-resolver"}).metavar(new String[]{"FQCN"}).type(String.class).setDefault(RetainMostRecentUpdateConflictResolver.class.getName()).help("Fully qualified class name of a Java class that implements the UpdateConflictResolver interface. This enables deduplication and ordering of a series of document updates for the same unique document key. For example, a MapReduce batch job might index multiple files in the same job where some of the files contain old and new versions of the very same document, using the same unique document key.\nTypically, implementations of this interface forbid collisions by throwing an exception, or ignore all but the most recent document version, or, in the general case, order colliding updates ascending from least recent to most recent (partial) update. The caller of this interface (i.e. the Hadoop Reducer) will then apply the updates to Solr in the order returned by the orderUpdates() method.\nThe default RetainMostRecentUpdateConflictResolver implementation ignores all but the most recent document version, based on a configurable numeric Solr field, which defaults to the file_last_modified timestamp");
            Argument help7 = description.addArgument(new String[]{"--mappers"}).metavar(new String[]{"INTEGER"}).type(Integer.class).choices(new RangeArgumentChoice(-1, Integer.MAX_VALUE)).setDefault(-1).help("Tuning knob that indicates the maximum number of MR mapper tasks to use. -1 indicates use all map slots available on the cluster.");
            Argument help8 = description.addArgument(new String[]{"--reducers"}).metavar(new String[]{"INTEGER"}).type(Integer.class).choices(new RangeArgumentChoice(-2, Integer.MAX_VALUE)).setDefault(-1).help("Tuning knob that indicates the number of reducers to index into. 0 is reserved for a mapper-only feature that may ship in a future release. -1 indicates use all reduce slots available on the cluster. -2 indicates use one reducer per output shard, which disables the mtree merge MR algorithm. The mtree merge MR algorithm improves scalability by spreading load (in particular CPU load) among a number of parallel reducers that can be much larger than the number of solr shards expected by the user. It can be seen as an extension of concurrent lucene merges and tiered lucene merges to the clustered case. The subsequent mapper-only phase merges the output of said large number of reducers to the number of shards expected by the user, again by utilizing more available parallelism on the cluster.");
            Argument help9 = description.addArgument(new String[]{"--fanout"}).metavar(new String[]{"INTEGER"}).type(Integer.class).choices(new RangeArgumentChoice(2, Integer.MAX_VALUE)).setDefault(Integer.MAX_VALUE).help(FeatureControl.SUPPRESS);
            Argument help10 = description.addArgument(new String[]{"--max-segments"}).metavar(new String[]{"INTEGER"}).type(Integer.class).choices(new RangeArgumentChoice(1, Integer.MAX_VALUE)).setDefault(1).help("Tuning knob that indicates the maximum number of segments to be contained on output in the index of each reducer shard. After a reducer has built its output index it applies a merge policy to merge segments until there are <= maxSegments lucene segments left in this index. Merging segments involves reading and rewriting all data in all these segment files, potentially multiple times, which is very I/O intensive and time consuming. However, an index with fewer segments can later be merged faster, and it can later be queried faster once deployed to a live Solr serving shard. Set maxSegments to 1 to optimize the index for low query latency. In a nutshell, a small maxSegments value trades indexing latency for subsequently improved query latency. This can be a reasonable trade-off for batch indexing systems.");
            Argument help11 = description.addArgument(new String[]{"--fair-scheduler-pool"}).metavar(new String[]{"STRING"}).help("Optional tuning knob that indicates the name of the fair scheduler pool to submit jobs to. The Fair Scheduler is a pluggable MapReduce scheduler that provides a way to share large clusters. Fair scheduling is a method of assigning resources to jobs such that all jobs get, on average, an equal share of resources over time. When there is a single job running, that job uses the entire cluster. When other jobs are submitted, tasks slots that free up are assigned to the new jobs, so that each job gets roughly the same amount of CPU time. Unlike the default Hadoop scheduler, which forms a queue of jobs, this lets short jobs finish in reasonable time while not starving long jobs. It is also an easy way to share a cluster between multiple of users. Fair sharing can also work with job priorities - the priorities are used as weights to determine the fraction of total compute time that each job gets.");
            Argument help12 = description.addArgument(new String[]{"--dry-run"}).action(Arguments.storeTrue()).help("Run in local mode and print documents to stdout instead of loading them into Solr. This executes the morphline in the client process (without submitting a job to MR) for quicker turnaround during early trial & debug sessions. This option cannot be used with the --use-backup-format option.");
            Argument help13 = description.addArgument(new String[]{"--log4j"}).metavar(new String[]{"FILE"}).type(new FileArgumentType().verifyExists().verifyIsFile().verifyCanRead()).help("Relative or absolute path to a log4j.properties config file on the local file system. This file will be uploaded to each MR task. Example: /path/to/log4j.properties");
            Argument help14 = description.addArgument(new String[]{"--verbose", "-v"}).action(Arguments.storeTrue()).help("Turn on verbose output.");
            description.addArgument(new String[]{SHOW_NON_SOLR_CLOUD}).action(Arguments.storeTrue()).help("Also show options for Non-SolrCloud mode as part of --help.");
            ArgumentGroup description2 = description.addArgumentGroup("Cluster arguments").description("Arguments that provide information about your Solr cluster. " + nonSolrCloud("If you are building shards for a SolrCloud cluster, pass the --zk-host argument. If you are building shards for a Non-SolrCloud cluster, pass the --shard-url argument one or more times. To build indexes for a replicated Non-SolrCloud cluster with --shard-url, pass replica urls consecutively and also pass --shards. Using --go-live requires either --zk-host or --shard-url."));
            Argument help15 = description2.addArgument(new String[]{"--zk-host"}).metavar(new String[]{"STRING"}).type(String.class).help("The address of a ZooKeeper ensemble being used by a SolrCloud cluster. This ZooKeeper ensemble will be examined to determine the number of output shards to create as well as the Solr URLs to merge the output shards into when using the --go-live option. Requires that you also pass the --collection to merge the shards into.\n\nThe --zk-host option implements the same partitioning semantics as the standard SolrCloud Near-Real-Time (NRT) API. This enables to mix batch updates from MapReduce ingestion with updates from standard Solr NRT ingestion on the same SolrCloud cluster, using identical unique document keys.\n\nFormat is: a list of comma separated host:port pairs, each corresponding to a zk server. Example: '127.0.0.1:2181,127.0.0.1:2182,127.0.0.1:2183' If the optional chroot suffix is used the example would look like: '127.0.0.1:2181/solr,127.0.0.1:2182/solr,127.0.0.1:2183/solr' where the client would be rooted at '/solr' and all paths would be relative to this root - i.e. getting/setting/etc... '/foo/bar' would result in operations being run on '/solr/foo/bar' (from the server perspective).\n\nIf --solr-home-dir is not specified, the Solr home directory for the collection may be downloaded from this ZooKeeper ensemble.");
            Argument help16 = description2.addArgument(new String[]{"--use-zk-solrconfig.xml"}).action(Arguments.storeTrue()).help(FeatureControl.SUPPRESS);
            Argument nonSolrCloud = nonSolrCloud(description2.addArgument(new String[]{"--shard-url"}).metavar(new String[]{"URL"}).type(String.class).action(Arguments.append()).help("Solr URL to merge resulting shard into if using --go-live. Example: http://solr001.mycompany.com:8983/solr/collection1. Multiple --shard-url arguments can be specified, one for each desired shard. If you are merging shards into a SolrCloud cluster, use --zk-host instead."));
            Argument nonSolrCloud2 = nonSolrCloud(description2.addArgument(new String[]{"--shards"}).metavar(new String[]{"INTEGER"}).type(Integer.class).choices(new RangeArgumentChoice(1, Integer.MAX_VALUE)).help("Number of output shards to generate."));
            ArgumentGroup description3 = description.addArgumentGroup("Go live arguments").description("Arguments for merging the shards that are built into a live Solr cluster. Also see the Cluster arguments.");
            Argument help17 = description3.addArgument(new String[]{"--go-live"}).action(Arguments.storeTrue()).help("Allows you to optionally merge the final index shards into a live Solr cluster after they are built. You can pass the ZooKeeper address with --zk-host and the relevant cluster information will be auto detected. This option cannot be used with the --use-backup-format option. " + nonSolrCloud("If you are not using a SolrCloud cluster, --shard-url arguments can be used to specify each SolrCore to merge each shard into."));
            Argument help18 = description3.addArgument(new String[]{"--go-live-timeout"}).metavar(new String[]{"INTEGER"}).type(Integer.class).choices(new RangeArgumentChoice(1, Integer.MAX_VALUE)).setDefault(Integer.valueOf(MapReduceIndexerTool.GO_LIVE_DEFAULT_TIMEOUT)).required(false).help("Timeout in ms to wait for the merge to complete before the connection times out and the tool fails.");
            ArgumentGroup description4 = description.addArgumentGroup("Backup format arguments").description("Arguments for converting the shards that are built into a backup format that can be restored into a Solr cluster. Also see the Cluster arguments.");
            Argument help19 = description4.addArgument(new String[]{"--use-backup-format"}).action(Arguments.storeTrue()).help("Allows you to optionally convert the final index shards to a Solr backup format to restore into a new collection later.This option can't be used in combination with --dry-run or --go-live options.");
            Argument help20 = description4.addArgument(new String[]{"--backup-name"}).metavar(new String[]{"STRING"}).setDefault("backup").help("Allows you to specify the name of the Solr backup that can be restored into a new collection later. To be used with the " + help19.textualName() + " argument.");
            Argument help21 = description3.addArgument(new String[]{"--collection"}).metavar(new String[]{"STRING"}).help("The SolrCloud collection to merge shards into when using --go-live and --zk-host. Example: collection1");
            Argument help22 = description3.addArgument(new String[]{"--go-live-min-replication-factor"}).metavar(new String[]{"INTEGER"}).type(Integer.class).choices(new RangeArgumentChoice(-1, Integer.MAX_VALUE)).setDefault(-1).help("The minimum number of SolrCloud replicas to successfully merge any final index shard into. The go-live job phase attempts to merge final index shards into all SolrCloud replicas. Some of these merge operations may fail, for example if some SolrCloud servers are down. This option enables indexing jobs to succeed even if some such merge operations fail on SolrCloud followers. Successful merge operations into all leaders are always required for job success, regardless of the value of --go-live-min-replication-factor. -1 indicates require successful merge operations into all replicas. 1 indicates require successful merge operations only into leader replicas.");
            Argument help23 = description3.addArgument(new String[]{"--go-live-threads"}).metavar(new String[]{"INTEGER"}).type(Integer.class).choices(new RangeArgumentChoice(1, Integer.MAX_VALUE)).setDefault(1000).help("Tuning knob that indicates the maximum number of live merges to run in parallel at one time.");
            Argument help24 = description.addArgument(new String[]{"input-files"}).metavar(new String[]{"HDFS_URI"}).type(new PathArgumentType(configuration).verifyHasScheme().verifyExists().verifyCanRead()).nargs("*").setDefault(new Object[0]).help("HDFS URI of file or directory tree to index.");
            try {
                Namespace parseArgs = description.parseArgs(strArr);
                options.log4jConfigFile = (File) parseArgs.get(help13.getDest());
                if (options.log4jConfigFile != null) {
                    PropertyConfigurator.configure(options.log4jConfigFile.getPath());
                }
                MapReduceIndexerTool.LOG.debug("Parsed command line args: {}", parseArgs);
                options.inputLists = parseArgs.getList(help2.getDest());
                if (options.inputLists == null) {
                    options.inputLists = Collections.EMPTY_LIST;
                }
                options.inputFiles = parseArgs.getList(help24.getDest());
                options.outputDir = (Path) parseArgs.get(help.getDest());
                options.mappers = parseArgs.getInt(help7.getDest()).intValue();
                options.reducers = parseArgs.getInt(help8.getDest()).intValue();
                options.updateConflictResolver = parseArgs.getString(help6.getDest());
                options.fanout = parseArgs.getInt(help9.getDest()).intValue();
                options.maxSegments = parseArgs.getInt(help10.getDest()).intValue();
                options.morphlineFile = (File) parseArgs.get(help3.getDest());
                options.morphlineId = parseArgs.getString(help4.getDest());
                options.solrHomeDir = (File) parseArgs.get(help5.getDest());
                options.fairSchedulerPool = parseArgs.getString(help11.getDest());
                options.isDryRun = parseArgs.getBoolean(help12.getDest()).booleanValue();
                options.isVerbose = parseArgs.getBoolean(help14.getDest()).booleanValue();
                options.zkHost = parseArgs.getString(help15.getDest());
                options.useZkSolrConfig = parseArgs.getBoolean(help16.getDest()).booleanValue();
                options.shards = parseArgs.getInt(nonSolrCloud2.getDest());
                options.shardUrls = MapReduceIndexerTool.buildShardUrls(parseArgs.getList(nonSolrCloud.getDest()), options.shards);
                options.useBackupFormat = parseArgs.getBoolean(help19.getDest()).booleanValue();
                options.backupName = parseArgs.getString(help20.getDest());
                options.goLive = parseArgs.getBoolean(help17.getDest()).booleanValue();
                options.goLiveMinReplicationFactor = parseArgs.getInt(help22.getDest());
                options.goLiveThreads = parseArgs.getInt(help23.getDest());
                options.collection = parseArgs.getString(help21.getDest());
                options.goLiveTimeout = parseArgs.getInt(help18.getDest()).intValue();
                try {
                    if (options.reducers == 0) {
                        throw new ArgumentParserException("--reducers must not be zero", description);
                    }
                    MapReduceIndexerTool.verifyGoLiveArgs(options, description);
                    if (!options.inputLists.isEmpty() || !options.inputFiles.isEmpty()) {
                        return null;
                    }
                    MapReduceIndexerTool.LOG.info("No input files specified - nothing to process");
                    return 0;
                } catch (ArgumentParserException e) {
                    description.handleError(e);
                    return 1;
                }
            } catch (ArgumentParserException e2) {
                MapReduceIndexerTool.LOG.warn("", e2);
                description.handleError(e2);
                return 1;
            } catch (FoundHelpArgument e3) {
                return 0;
            }
        }

        private Argument nonSolrCloud(Argument argument) {
            return this.showNonSolrCloud ? argument : argument.help(FeatureControl.SUPPRESS);
        }

        private String nonSolrCloud(String str) {
            return this.showNonSolrCloud ? str : "";
        }

        static {
            $assertionsDisabled = !MapReduceIndexerTool.class.desiredAssertionStatus();
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/apache/solr/hadoop/MapReduceIndexerTool$Options.class */
    public static final class Options {
        boolean goLive;
        boolean useBackupFormat;
        String backupName;
        String collection;
        String zkHost;
        boolean useZkSolrConfig;
        Integer goLiveThreads;
        Integer goLiveMinReplicationFactor;
        List<List<String>> shardUrls;
        List<Path> inputLists;
        List<Path> inputFiles;
        Path outputDir;
        int mappers;
        int reducers;
        String updateConflictResolver;
        int fanout;
        Integer shards;
        int maxSegments;
        File morphlineFile;
        String morphlineId;
        File solrHomeDir;
        String fairSchedulerPool;
        boolean isDryRun;
        File log4jConfigFile;
        boolean isVerbose;
        int goLiveTimeout;

        Options() {
        }
    }

    static List<List<String>> buildShardUrls(List<Object> list, Integer num) {
        if (list == null) {
            return null;
        }
        ArrayList arrayList = new ArrayList(list.size());
        ArrayList arrayList2 = null;
        if (num == null) {
            num = Integer.valueOf(list.size());
        }
        int ceil = (int) Math.ceil(list.size() / num.intValue());
        for (int i = 0; i < list.size(); i++) {
            if (i % ceil == 0) {
                arrayList2 = new ArrayList();
                arrayList.add(arrayList2);
            }
            arrayList2.add((String) list.get(i));
        }
        return arrayList;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static File getSolrConfig(File file) {
        return new File(new File(file, "conf"), "solrconfig.xml");
    }

    public static void main(String[] strArr) throws Exception {
        System.exit(ToolRunner.run(new Configuration(), new MapReduceIndexerTool(), strArr));
    }

    public int run(String[] strArr) throws Exception {
        Options options = new Options();
        Integer parseArgs = new MyArgumentParser().parseArgs(strArr, getConf(), options);
        return parseArgs != null ? parseArgs.intValue() : run(options);
    }

    int run(Options options) throws Exception {
        if (getConf().getBoolean("isMR1", false) && "local".equals(getConf().get("mapred.job.tracker"))) {
            throw new IllegalStateException("Running with LocalJobRunner (i.e. all of Hadoop inside a single JVM) is not supported because LocalJobRunner does not (yet) implement the Hadoop Distributed Cache feature, which is required for passing files via --files and --libjars");
        }
        if (options.useBackupFormat) {
            if (options.backupName == null) {
                throw new ArgumentParserException("A backup name must be specified with the --backup-name [...] parameter when the --use-backup-format option is used.", (ArgumentParser) null);
            }
            if (options.isDryRun) {
                throw new ArgumentParserException("The --dry-run parameter is not supported in a combination with the --use-backup-format option.", (ArgumentParser) null);
            }
            if (options.goLive) {
                throw new ArgumentParserException("The --go-live parameter is not supported in a combination with the --use-backup-format option.", (ArgumentParser) null);
            }
            if (options.zkHost == null) {
                throw new ArgumentParserException("The ZooKeeper host/quorum must be specified with the --zk-host [...] parameter when the --use-backup-format option is used.", (ArgumentParser) null);
            }
            if (options.collection == null) {
                throw new ArgumentParserException("A collection name must be specified with the --collection [...] parameter when the --use-backup-format option is used.", (ArgumentParser) null);
            }
        }
        long nanoTime = System.nanoTime();
        Instant now = Instant.now();
        if (options.fairSchedulerPool != null) {
            getConf().set("mapred.fairscheduler.pool", options.fairSchedulerPool);
        }
        getConf().setInt(SolrOutputFormat.SOLR_RECORD_WRITER_MAX_SEGMENTS, options.maxSegments);
        getConf().setBoolean("mapred.used.genericoptionsparser", true);
        if (options.log4jConfigFile != null) {
            Utils.setLogConfigFile(options.log4jConfigFile, getConf());
            addDistributedCacheFile(options.log4jConfigFile, getConf());
        }
        if (getConf().get("mapreduce.job.user.classpath.first") == null) {
            getConf().setBoolean("mapreduce.job.user.classpath.first", true);
        }
        LOG.info("Using mapreduce.job.user.classpath.first=" + getConf().get("mapreduce.job.user.classpath.first"));
        this.job = Job.getInstance(getConf());
        this.job.setJarByClass(getClass());
        if (options.morphlineFile == null) {
            throw new ArgumentParserException("Argument --morphline-file is required", (ArgumentParser) null);
        }
        verifyGoLiveArgs(options, null);
        verifyZKStructure(options, null);
        int maxMapTasks = new JobClient(this.job.getConfiguration()).getClusterStatus().getMaxMapTasks();
        LOG.info("Cluster reports {} mapper slots", Integer.valueOf(maxMapTasks));
        int i = options.mappers == -1 ? 8 * maxMapTasks : options.mappers;
        if (i <= 0) {
            throw new IllegalStateException("Illegal number of mappers: " + i);
        }
        options.mappers = i;
        FileSystem fileSystem = options.outputDir.getFileSystem(this.job.getConfiguration());
        if (fileSystem.exists(options.outputDir) && !delete(options.outputDir, true, fileSystem)) {
            return -1;
        }
        Path path = new Path(options.outputDir, RESULTS_DIR);
        Path path2 = new Path(options.outputDir, "reducers");
        Path path3 = new Path(options.outputDir, "tmp1");
        Path path4 = new Path(options.outputDir, "tmp2");
        Path path5 = new Path(options.outputDir, "mtree-merge-output");
        Path path6 = new Path(path3, FULL_INPUT_LIST);
        LOG.debug("Creating list of input files for mappers: {}", path6);
        long addInputFiles = addInputFiles(options.inputFiles, options.inputLists, path6, this.job.getConfiguration());
        if (addInputFiles == 0) {
            LOG.info("No input files found - nothing to process");
            return 0;
        }
        int ceilDivide = (int) ceilDivide(addInputFiles, i);
        if (ceilDivide < 0) {
            ceilDivide = Integer.MAX_VALUE;
        }
        int max = Math.max(1, ceilDivide);
        int min = Math.min(i, (int) ceilDivide(addInputFiles, max));
        calculateNumReducers(options, min);
        int i2 = options.reducers;
        LOG.info("Using these parameters: numFiles: {}, mappers: {}, realMappers: {}, reducers: {}, shards: {}, fanout: {}, maxSegments: {}", new Object[]{Long.valueOf(addInputFiles), Integer.valueOf(i), Integer.valueOf(min), Integer.valueOf(i2), options.shards, Integer.valueOf(options.fanout), Integer.valueOf(options.maxSegments)});
        LOG.info("Randomizing list of {} input files to spread indexing load more evenly among mappers", Long.valueOf(addInputFiles));
        long nanoTime2 = System.nanoTime();
        if (addInputFiles < this.job.getConfiguration().getInt(MAIN_MEMORY_RANDOMIZATION_THRESHOLD, 100001)) {
            randomizeFewInputFiles(fileSystem, path4, path6);
        } else if (!waitForCompletion(randomizeManyInputFiles(getConf(), path6, path4, Math.max(10000000, max)), options.isVerbose)) {
            return -1;
        }
        LOG.info("Done. Randomizing list of {} input files took {} secs", Long.valueOf(addInputFiles), Float.valueOf(((float) (System.nanoTime() - nanoTime2)) / 1.0E9f));
        this.job.setInputFormatClass(NLineInputFormat.class);
        NLineInputFormat.addInputPath(this.job, path4);
        NLineInputFormat.setNumLinesPerSplit(this.job, max);
        FileOutputFormat.setOutputPath(this.job, path2);
        String str = this.job.getConfiguration().get("mapreduce.job.map.class");
        if (str == null) {
            str = MorphlineMapper.class.getName();
            this.job.setMapperClass(MorphlineMapper.class);
        }
        this.job.setJobName(getClass().getName() + "/" + Utils.getShortClassName(str));
        if (this.job.getConfiguration().get("mapreduce.job.reduce.class") == null) {
            this.job.setReducerClass(SolrReducer.class);
        }
        if (options.updateConflictResolver == null) {
            throw new IllegalArgumentException("updateConflictResolver must not be null");
        }
        this.job.getConfiguration().set(SolrReducer.UPDATE_CONFLICT_RESOLVER, options.updateConflictResolver);
        if (options.zkHost != null) {
            if (!$assertionsDisabled && options.collection == null) {
                throw new AssertionError();
            }
            if (this.job.getConfiguration().get("mapreduce.job.partitioner.class") == null) {
                this.job.setPartitionerClass(SolrCloudPartitioner.class);
            }
            this.job.getConfiguration().set(SolrCloudPartitioner.ZKHOST, options.zkHost);
            this.job.getConfiguration().set(SolrCloudPartitioner.COLLECTION, options.collection);
        }
        this.job.getConfiguration().setInt(SolrCloudPartitioner.SHARDS, options.shards.intValue());
        this.job.setOutputFormatClass(SolrOutputFormat.class);
        if (options.solrHomeDir != null) {
            SolrOutputFormat.setupSolrHomeCache(options.solrHomeDir, this.job);
        } else {
            if (!$assertionsDisabled && options.zkHost == null) {
                throw new AssertionError();
            }
            ZooKeeperInspector zooKeeperInspector = new ZooKeeperInspector();
            SolrZkClient zkClient = zooKeeperInspector.getZkClient(options.zkHost);
            Throwable th = null;
            try {
                try {
                    File downloadConfigDir = zooKeeperInspector.downloadConfigDir(zkClient, zooKeeperInspector.readConfigName(zkClient, options.collection), options.useZkSolrConfig);
                    SolrOutputFormat.setupSolrHomeCache(downloadConfigDir, this.job);
                    options.solrHomeDir = downloadConfigDir;
                    if (zkClient != null) {
                        if (0 != 0) {
                            try {
                                zkClient.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        } else {
                            zkClient.close();
                        }
                    }
                } finally {
                }
            } catch (Throwable th3) {
                if (zkClient != null) {
                    if (th != null) {
                        try {
                            zkClient.close();
                        } catch (Throwable th4) {
                            th.addSuppressed(th4);
                        }
                    } else {
                        zkClient.close();
                    }
                }
                throw th3;
            }
        }
        MorphlineMapRunner morphlineMapRunner = setupMorphline(options);
        if (options.isDryRun && morphlineMapRunner != null) {
            LOG.info("Indexing {} files in dryrun mode", Long.valueOf(addInputFiles));
            long nanoTime3 = System.nanoTime();
            dryRun(morphlineMapRunner, fileSystem, path6);
            LOG.info("Done. Indexing {} files in dryrun mode took {} secs", Long.valueOf(addInputFiles), Float.valueOf(((float) (System.nanoTime() - nanoTime3)) / 1.0E9f));
            goodbye(null, nanoTime);
            return 0;
        }
        this.job.getConfiguration().set(MorphlineMapRunner.MORPHLINE_FILE_PARAM, options.morphlineFile.getName());
        this.job.setNumReduceTasks(i2);
        this.job.setOutputKeyClass(Text.class);
        this.job.setOutputValueClass(SolrInputDocumentWritable.class);
        LOG.info("Indexing {} files using {} real mappers into {} reducers", new Object[]{Long.valueOf(addInputFiles), Integer.valueOf(min), Integer.valueOf(i2)});
        long nanoTime4 = System.nanoTime();
        if (!waitForCompletion(this.job, options.isVerbose)) {
            return -1;
        }
        LOG.info("Done. Indexing {} files using {} real mappers into {} reducers took {} secs", new Object[]{Long.valueOf(addInputFiles), Integer.valueOf(min), Integer.valueOf(i2), Float.valueOf(((float) (System.nanoTime() - nanoTime4)) / 1.0E9f)});
        int round = i2 > options.shards.intValue() ? (int) Math.round(log(options.fanout, i2 / options.shards.intValue())) : 0;
        LOG.debug("MTree merge iterations to do: {}", Integer.valueOf(round));
        int i3 = 1;
        while (i2 > options.shards.intValue()) {
            this.job = Job.getInstance(getConf());
            this.job.setJarByClass(getClass());
            this.job.setJobName(getClass().getName() + "/" + Utils.getShortClassName(TreeMergeMapper.class));
            this.job.setMapperClass(TreeMergeMapper.class);
            this.job.setOutputFormatClass(TreeMergeOutputFormat.class);
            this.job.setNumReduceTasks(0);
            this.job.setOutputKeyClass(Text.class);
            this.job.setOutputValueClass(NullWritable.class);
            this.job.setInputFormatClass(NLineInputFormat.class);
            Path path7 = new Path(new Path(options.outputDir, "mtree-merge-input-iteration" + i3), FULL_INPUT_LIST);
            LOG.debug("MTree merge iteration {}/{}: Creating input list file for mappers {}", new Object[]{Integer.valueOf(i3), Integer.valueOf(round), path7});
            long createTreeMergeInputDirList = createTreeMergeInputDirList(path2, fileSystem, path7);
            if (createTreeMergeInputDirList != i2) {
                throw new IllegalStateException("Not same reducers: " + i2 + ", numFiles: " + createTreeMergeInputDirList);
            }
            NLineInputFormat.addInputPath(this.job, path7);
            NLineInputFormat.setNumLinesPerSplit(this.job, options.fanout);
            FileOutputFormat.setOutputPath(this.job, path5);
            LOG.info("MTree merge iteration {}/{}: Merging {} shards into {} shards using fanout {}", new Object[]{Integer.valueOf(i3), Integer.valueOf(round), Integer.valueOf(i2), Integer.valueOf(i2 / options.fanout), Integer.valueOf(options.fanout)});
            long nanoTime5 = System.nanoTime();
            if (!waitForCompletion(this.job, options.isVerbose) || !renameTreeMergeShardDirs(path5, this.job, fileSystem)) {
                return -1;
            }
            LOG.info("MTree merge iteration {}/{}: Done. Merging {} shards into {} shards using fanout {} took {} secs", new Object[]{Integer.valueOf(i3), Integer.valueOf(round), Integer.valueOf(i2), Integer.valueOf(i2 / options.fanout), Integer.valueOf(options.fanout), Float.valueOf(((float) (System.nanoTime() - nanoTime5)) / 1.0E9f)});
            if (!delete(path2, true, fileSystem) || !rename(path5, path2, fileSystem)) {
                return -1;
            }
            if (!$assertionsDisabled && i2 % options.fanout != 0) {
                throw new AssertionError();
            }
            i2 /= options.fanout;
            i3++;
        }
        if (!$assertionsDisabled && i2 != options.shards.intValue()) {
            throw new AssertionError();
        }
        for (FileStatus fileStatus : fileSystem.listStatus(path2)) {
            String outputName = SolrOutputFormat.getOutputName(this.job);
            Path path8 = fileStatus.getPath();
            if (fileStatus.isDirectory() && path8.getName().startsWith(outputName) && !rename(path8, new Path(path8.getParent(), outputName + path8.getName().substring(outputName.length() + "-m".length())), fileSystem)) {
                return -1;
            }
        }
        if (options.useBackupFormat) {
            Path path9 = new Path(path2, options.backupName);
            fileSystem.mkdirs(path9);
            if (!moveShardsIntoBackupFormat(fileSystem, path2, path9)) {
                return -1;
            }
            ZooKeeperInspector zooKeeperInspector2 = new ZooKeeperInspector();
            String readConfigName = zooKeeperInspector2.readConfigName(options.zkHost, options.collection);
            writeBackupPropertiesFile(options, now, path9, readConfigName);
            writeCollectionState(zooKeeperInspector2, path9, options);
            System.out.println("\nCollection can be restored with the following curl command: \n > curl -i -s -L -k --negotiate -u : 'http(s)://[solr-host]:[port]/solr/admin/collections?action=RESTORE&collection=[target-collection-name]&name=" + options.backupName + "&location=" + options.outputDir + "/results&collection.configName=" + readConfigName + "'\n");
        }
        if (!rename(path2, path, fileSystem)) {
            return -1;
        }
        if (options.goLive) {
            modifyAclsForGoLive(options.outputDir, path, fileSystem);
            if (!new GoLive().goLive(options, listSortedOutputShardDirs(path, fileSystem))) {
                return -1;
            }
        }
        goodbye(this.job, nanoTime);
        return 0;
    }

    private void writeCollectionState(ZooKeeperInspector zooKeeperInspector, Path path, Options options) throws IOException {
        DocCollection extractDocCollection = zooKeeperInspector.extractDocCollection(options.zkHost, options.collection);
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        linkedHashMap.put(options.collection, extractDocCollection);
        CharArr charArr = new CharArr();
        new JSONWriter(charArr).write(linkedHashMap);
        String adjustReplicaCounts = adjustReplicaCounts(charArr.toString(), options.collection);
        Path path2 = new Path(path, "zk_backup");
        path.getFileSystem(getConf()).mkdirs(path2);
        writeHdfsFile(new Path(path2, "collection_state.json"), adjustReplicaCounts);
    }

    private void writeBackupPropertiesFile(Options options, Instant instant, Path path, String str) throws IOException {
        writeHdfsFile(new Path(path, "backup.properties"), "#Backup properties file\n#" + Instant.now().atOffset(ZoneOffset.UTC).format(DateTimeFormatter.RFC_1123_DATE_TIME) + "\nbackupName=" + options.backupName + "\nindex.version=" + Version.LATEST + "\ncollection.configName=" + str + "\nstartTime=" + instant + "\ncollection=" + options.collection + "\n");
    }

    private void writeHdfsFile(Path path, String str) throws IOException {
        FSDataOutputStream create = path.getFileSystem(getConf()).create(path);
        Throwable th = null;
        try {
            try {
                create.write(str.getBytes(StandardCharsets.UTF_8));
                create.flush();
                if (create != null) {
                    if (0 == 0) {
                        create.close();
                        return;
                    }
                    try {
                        create.close();
                    } catch (Throwable th2) {
                        th.addSuppressed(th2);
                    }
                }
            } catch (Throwable th3) {
                th = th3;
                throw th3;
            }
        } catch (Throwable th4) {
            if (create != null) {
                if (th != null) {
                    try {
                        create.close();
                    } catch (Throwable th5) {
                        th.addSuppressed(th5);
                    }
                } else {
                    create.close();
                }
            }
            throw th4;
        }
    }

    private boolean moveShardsIntoBackupFormat(FileSystem fileSystem, Path path, Path path2) throws IOException {
        Pattern compile = Pattern.compile("^.*\\/part\\-(\\d+)$");
        for (FileStatus fileStatus : fileSystem.listStatus(path, path3 -> {
            return compile.matcher(path3.toString()).matches();
        })) {
            if (fileStatus.isDirectory()) {
                if (!rename(new Path(new Path(fileStatus.getPath(), "data"), "index"), new Path(path2, "snapshot.shard" + (Integer.parseInt(compile.matcher(fileStatus.getPath().toString()).replaceAll("$1")) + 1)), fileSystem)) {
                    return false;
                }
            }
        }
        return true;
    }

    private void calculateNumReducers(Options options, int i) throws IOException {
        int i2;
        int i3;
        if (options.shards.intValue() <= 0) {
            throw new IllegalStateException("Illegal number of shards: " + options.shards);
        }
        if (options.fanout <= 1) {
            throw new IllegalStateException("Illegal fanout: " + options.fanout);
        }
        if (i <= 0) {
            throw new IllegalStateException("Illegal realMappers: " + i);
        }
        int maxReduceTasks = new JobClient(this.job.getConfiguration()).getClusterStatus().getMaxReduceTasks();
        LOG.info("Cluster reports {} reduce slots", Integer.valueOf(maxReduceTasks));
        if (options.reducers == -2) {
            i2 = options.shards.intValue();
        } else if (options.reducers == -1) {
            i2 = Math.min(maxReduceTasks, i);
        } else {
            if (options.reducers == 0) {
                throw new IllegalStateException("Illegal zero reducers");
            }
            i2 = options.reducers;
        }
        int max = Math.max(i2, options.shards.intValue());
        if (max != options.shards.intValue()) {
            options.fanout = Math.min(options.fanout, (int) ceilDivide(max, options.shards.intValue()));
            int intValue = options.shards.intValue();
            while (true) {
                i3 = intValue;
                if (i3 >= max) {
                    break;
                } else {
                    intValue = i3 * options.fanout;
                }
            }
            max = i3;
            if (!$assertionsDisabled && max % options.fanout != 0) {
                throw new AssertionError();
            }
        }
        options.reducers = max;
    }

    private static String adjustReplicaCounts(String str, String str2) {
        try {
            Object fromJSON = ObjectBuilder.fromJSON(str);
            Map<String, Object> resolveMaps = Utils.resolveMaps(fromJSON, str2);
            Map map = (Map) Utils.resolveMaps(resolveMaps, "shards", "shard1", "replicas").entrySet().stream().map(entry -> {
                return Utils.asMap(entry.getValue());
            }).map(map2 -> {
                return Replica.Type.get(String.valueOf(map2.get("type")));
            }).collect(Collectors.groupingBy(Function.identity(), Collectors.counting()));
            resolveMaps.put("pullReplicas", Optional.ofNullable(map.get(Replica.Type.PULL)).map((v0) -> {
                return String.valueOf(v0);
            }).orElse("0"));
            resolveMaps.put("nrtReplicas", Optional.ofNullable(map.get(Replica.Type.NRT)).map((v0) -> {
                return String.valueOf(v0);
            }).orElse("0"));
            resolveMaps.put("tlogReplicas", Optional.ofNullable(map.get(Replica.Type.TLOG)).map((v0) -> {
                return String.valueOf(v0);
            }).orElse("0"));
            return JSONUtil.toJSON(fromJSON, 2);
        } catch (Exception e) {
            throw new IllegalStateException("Could not process replica counts", e);
        }
    }

    private long addInputFiles(List<Path> list, List<Path> list2, Path path, Configuration configuration) throws IOException {
        long j = 0;
        FSDataOutputStream create = path.getFileSystem(configuration).create(path);
        try {
            BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter((OutputStream) create, Charsets.UTF_8));
            for (Path path2 : list) {
                FileSystem fileSystem = path2.getFileSystem(configuration);
                if (fileSystem.exists(path2)) {
                    j += addInputFilesRecursively(path2, bufferedWriter, fileSystem, new PathFilter() { // from class: org.apache.solr.hadoop.MapReduceIndexerTool.1
                        public boolean accept(Path path3) {
                            return (path3.getName().startsWith(".") || path3.getName().startsWith("_")) ? false : true;
                        }
                    });
                }
            }
            for (Path path3 : list2) {
                InputStream bufferedInputStream = path3.toString().equals("-") ? System.in : path3.isAbsoluteAndSchemeAuthorityNull() ? new BufferedInputStream(new FileInputStream(path3.toString())) : path3.getFileSystem(configuration).open(path3);
                try {
                    BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(bufferedInputStream, Charsets.UTF_8));
                    while (true) {
                        String readLine = bufferedReader.readLine();
                        if (readLine == null) {
                            break;
                        }
                        bufferedWriter.write(readLine + "\n");
                        j++;
                    }
                    bufferedReader.close();
                    bufferedInputStream.close();
                } catch (Throwable th) {
                    bufferedInputStream.close();
                    throw th;
                }
            }
            bufferedWriter.close();
            create.close();
            return j;
        } catch (Throwable th2) {
            create.close();
            throw th2;
        }
    }

    private long addInputFilesRecursively(Path path, Writer writer, FileSystem fileSystem, PathFilter pathFilter) throws IOException {
        long j;
        long j2;
        long j3 = 0;
        for (FileStatus fileStatus : fileSystem.listStatus(path, pathFilter)) {
            LOG.debug("Adding path {}", fileStatus.getPath());
            if (fileStatus.isDirectory()) {
                j = j3;
                j2 = addInputFilesRecursively(fileStatus.getPath(), writer, fileSystem, pathFilter);
            } else {
                writer.write(fileStatus.getPath().toString() + "\n");
                j = j3;
                j2 = 1;
            }
            j3 = j + j2;
        }
        return j3;
    }

    private void randomizeFewInputFiles(FileSystem fileSystem, Path path, Path path2) throws IOException {
        ArrayList arrayList = new ArrayList();
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader((InputStream) fileSystem.open(path2), Charsets.UTF_8));
        while (true) {
            try {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    break;
                } else {
                    arrayList.add(readLine);
                }
            } finally {
                bufferedReader.close();
            }
        }
        Collections.shuffle(arrayList, new Random(421439783L));
        BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter((OutputStream) fileSystem.create(new Path(path, FULL_INPUT_LIST)), Charsets.UTF_8));
        try {
            Iterator it = arrayList.iterator();
            while (it.hasNext()) {
                bufferedWriter.write(((String) it.next()) + "\n");
            }
        } finally {
            bufferedWriter.close();
        }
    }

    private Job randomizeManyInputFiles(Configuration configuration, Path path, Path path2, int i) throws IOException {
        Job job = Job.getInstance(configuration);
        job.setJarByClass(getClass());
        job.setJobName(getClass().getName() + "/" + Utils.getShortClassName(LineRandomizerMapper.class));
        job.setInputFormatClass(NLineInputFormat.class);
        NLineInputFormat.addInputPath(job, path);
        NLineInputFormat.setNumLinesPerSplit(job, i);
        job.setMapperClass(LineRandomizerMapper.class);
        job.setReducerClass(LineRandomizerReducer.class);
        job.setOutputFormatClass(TextOutputFormat.class);
        FileOutputFormat.setOutputPath(job, path2);
        job.setNumReduceTasks(1);
        job.setOutputKeyClass(LongWritable.class);
        job.setOutputValueClass(Text.class);
        return job;
    }

    private void addDistributedCacheFile(File file, Configuration configuration) throws IOException {
        String str = configuration.get("tmpfiles", "");
        if (str.length() > 0) {
            str = str + ",";
        }
        String str2 = new GenericOptionsParser(new Configuration(configuration), new String[]{"--files", file.getCanonicalPath()}).getConfiguration().get("tmpfiles");
        if (!$assertionsDisabled && str2 == null) {
            throw new AssertionError();
        }
        if (!$assertionsDisabled && str2.length() <= 0) {
            throw new AssertionError();
        }
        configuration.set("tmpfiles", str + str2);
    }

    private MorphlineMapRunner setupMorphline(Options options) throws IOException, URISyntaxException {
        if (options.morphlineId != null) {
            this.job.getConfiguration().set(MorphlineMapRunner.MORPHLINE_ID_PARAM, options.morphlineId);
        }
        addDistributedCacheFile(options.morphlineFile, this.job.getConfiguration());
        if (!options.isDryRun) {
            return null;
        }
        LOG.trace("dryRun: java.class.path: {}", System.getProperty("java.class.path"));
        String str = "";
        ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader();
        while (true) {
            ClassLoader classLoader = contextClassLoader;
            if (classLoader == null) {
                break;
            }
            if (classLoader instanceof URLClassLoader) {
                URL[] uRLs = ((URLClassLoader) classLoader).getURLs();
                LOG.trace("dryRun: classPathPartURLs: {}", Arrays.asList(uRLs));
                StringBuilder sb = new StringBuilder();
                for (URL url : uRLs) {
                    File file = new File(url.toURI());
                    if (uRLs.length > 0) {
                        sb.append(File.pathSeparator);
                    }
                    sb.append(file.getPath());
                }
                LOG.trace("dryRun: classPathParts: {}", sb);
                String str2 = File.pathSeparator;
                if (str.length() == 0 || sb.length() == 0) {
                    str2 = "";
                }
                str = ((Object) sb) + str2 + str;
            }
            contextClassLoader = classLoader.getParent();
        }
        if (str.length() > 0) {
            if (!$assertionsDisabled && System.getProperty("java.class.path") == null) {
                throw new AssertionError();
            }
            String str3 = System.getProperty("java.class.path") + File.pathSeparator + str;
            LOG.trace("dryRun: fullClassPath: {}", str3);
            System.setProperty("java.class.path", str3);
        }
        this.job.getConfiguration().set(MorphlineMapRunner.MORPHLINE_FILE_PARAM, options.morphlineFile.getPath());
        return new MorphlineMapRunner(this.job.getConfiguration(), new DryRunDocumentLoader(), options.solrHomeDir.getPath());
    }

    private void dryRun(MorphlineMapRunner morphlineMapRunner, FileSystem fileSystem, Path path) throws IOException {
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader((InputStream) fileSystem.open(path), Charsets.UTF_8));
        while (true) {
            try {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    morphlineMapRunner.cleanup();
                    bufferedReader.close();
                    return;
                }
                morphlineMapRunner.map(readLine, this.job.getConfiguration(), null);
            } catch (Throwable th) {
                bufferedReader.close();
                throw th;
            }
        }
    }

    private int createTreeMergeInputDirList(Path path, FileSystem fileSystem, Path path2) throws FileNotFoundException, IOException {
        FileStatus[] listSortedOutputShardDirs = listSortedOutputShardDirs(path, fileSystem);
        int i = 0;
        FSDataOutputStream create = fileSystem.create(path2);
        try {
            BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter((OutputStream) create, Charsets.UTF_8));
            for (FileStatus fileStatus : listSortedOutputShardDirs) {
                LOG.debug("Adding path {}", fileStatus.getPath());
                Path path3 = new Path(fileStatus.getPath(), "data/index");
                if (!fileSystem.isDirectory(path3)) {
                    throw new IllegalStateException("Not a directory: " + path3);
                }
                bufferedWriter.write(path3.toString() + "\n");
                i++;
            }
            bufferedWriter.close();
            create.close();
            return i;
        } catch (Throwable th) {
            create.close();
            throw th;
        }
    }

    private FileStatus[] listSortedOutputShardDirs(Path path, FileSystem fileSystem) throws FileNotFoundException, IOException {
        final String outputName = SolrOutputFormat.getOutputName(this.job);
        FileStatus[] listStatus = fileSystem.listStatus(path, new PathFilter() { // from class: org.apache.solr.hadoop.MapReduceIndexerTool.2
            public boolean accept(Path path2) {
                return path2.getName().startsWith(outputName);
            }
        });
        for (FileStatus fileStatus : listStatus) {
            if (!fileStatus.isDirectory()) {
                throw new IllegalStateException("Not a directory: " + fileStatus.getPath());
            }
        }
        Arrays.sort(listStatus, new Comparator<FileStatus>() { // from class: org.apache.solr.hadoop.MapReduceIndexerTool.3
            @Override // java.util.Comparator
            public int compare(FileStatus fileStatus2, FileStatus fileStatus3) {
                return new AlphaNumericComparator().compare(fileStatus2.getPath().getName(), fileStatus3.getPath().getName());
            }
        });
        return listStatus;
    }

    private boolean renameTreeMergeShardDirs(Path path, Job job, FileSystem fileSystem) throws IOException {
        final String outputName = SolrOutputFormat.getOutputName(job);
        FileStatus[] listStatus = fileSystem.listStatus(path, new PathFilter() { // from class: org.apache.solr.hadoop.MapReduceIndexerTool.4
            public boolean accept(Path path2) {
                return path2.getName().startsWith(outputName);
            }
        });
        for (FileStatus fileStatus : listStatus) {
            if (!fileStatus.isDirectory()) {
                throw new IllegalStateException("Not a directory: " + fileStatus.getPath());
            }
        }
        for (FileStatus fileStatus2 : listStatus) {
            Path path2 = fileStatus2.getPath();
            if (!rename(path2, new Path(path2.getParent(), "_" + path2.getName()), fileSystem)) {
                return false;
            }
        }
        for (FileStatus fileStatus3 : listStatus) {
            Path path3 = fileStatus3.getPath();
            Path path4 = new Path(path3.getParent(), "_" + path3.getName());
            Path path5 = new Path(path4, TreeMergeMapper.SOLR_SHARD_NUMBER);
            FSDataInputStream open = fileSystem.open(path5);
            byte[] byteArray = ByteStreams.toByteArray(open);
            open.close();
            Preconditions.checkArgument(byteArray.length > 0);
            int parseInt = Integer.parseInt(new String(byteArray, Charsets.UTF_8));
            if (!delete(path5, false, fileSystem)) {
                return false;
            }
            NumberFormat numberFormat = NumberFormat.getInstance(Locale.ENGLISH);
            numberFormat.setMinimumIntegerDigits(5);
            numberFormat.setGroupingUsed(false);
            Path path6 = new Path(path4.getParent(), outputName + "-m-" + numberFormat.format(parseInt));
            LOG.info("MTree merge renaming solr shard: " + parseInt + " from dir: " + fileStatus3.getPath() + " to dir: " + path6);
            if (!rename(path4, path6, fileSystem)) {
                return false;
            }
        }
        return true;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static void verifyGoLiveArgs(Options options, ArgumentParser argumentParser) throws ArgumentParserException {
        if (options.zkHost == null && options.solrHomeDir == null) {
            throw new ArgumentParserException("At least one of --zk-host or --solr-home-dir is required", argumentParser);
        }
        if (options.goLive && options.zkHost == null && options.shardUrls == null) {
            throw new ArgumentParserException("--go-live requires that you also pass --shard-url or --zk-host", argumentParser);
        }
        if (options.zkHost != null && options.collection == null) {
            throw new ArgumentParserException("--zk-host requires that you also pass --collection", argumentParser);
        }
        if (options.zkHost != null) {
            return;
        }
        if (options.shardUrls != null) {
            if (options.shardUrls.size() == 0) {
                throw new ArgumentParserException("--shard-url requires at least one URL", argumentParser);
            }
        } else {
            if (options.shards == null) {
                throw new ArgumentParserException("You must specify one of the following (mutually exclusive) arguments: --zk-host or --shard-url or --shards", argumentParser);
            }
            if (options.shards.intValue() <= 0) {
                throw new ArgumentParserException("--shards must be a positive number: " + options.shards, argumentParser);
            }
        }
        if (options.shardUrls != null) {
            options.shards = Integer.valueOf(options.shardUrls.size());
        }
        if (!$assertionsDisabled && options.shards == null) {
            throw new AssertionError();
        }
        if (!$assertionsDisabled && options.shards.intValue() <= 0) {
            throw new AssertionError();
        }
    }

    private static void verifyZKStructure(Options options, ArgumentParser argumentParser) throws ArgumentParserException {
        if (options.zkHost != null) {
            if (!$assertionsDisabled && options.collection == null) {
                throw new AssertionError();
            }
            try {
                options.shardUrls = new ZooKeeperInspector().extractShardUrls(options.zkHost, options.collection);
                if (!$assertionsDisabled && options.shardUrls == null) {
                    throw new AssertionError();
                }
                if (options.shardUrls.size() == 0) {
                    throw new ArgumentParserException("--zk-host requires ZooKeeper " + options.zkHost + " to contain at least one SolrCore for collection: " + options.collection, argumentParser);
                }
                options.shards = Integer.valueOf(options.shardUrls.size());
                LOG.debug("Using SolrCloud shard URLs: {}", options.shardUrls);
            } catch (Exception e) {
                LOG.debug("Cannot extract SolrCloud shard URLs from ZooKeeper", e);
                throw new ArgumentParserException(e, argumentParser);
            }
        }
    }

    private boolean waitForCompletion(Job job, boolean z) throws IOException, InterruptedException, ClassNotFoundException {
        LOG.debug("Running job: " + getJobInfo(job));
        boolean waitForCompletion = job.waitForCompletion(z);
        if (!waitForCompletion) {
            LOG.error("Job failed! " + getJobInfo(job));
        }
        return waitForCompletion;
    }

    private void goodbye(Job job, long j) {
        float nanoTime = ((float) (System.nanoTime() - j)) / 1.0E9f;
        if (job != null) {
            LOG.info("Succeeded with job: " + getJobInfo(job));
        }
        LOG.info("Success. Done. Program took {} secs. Goodbye.", Float.valueOf(nanoTime));
    }

    private String getJobInfo(Job job) {
        return "jobName: " + job.getJobName() + ", jobId: " + job.getJobID();
    }

    private boolean rename(Path path, Path path2, FileSystem fileSystem) throws IOException {
        boolean rename = fileSystem.rename(path, path2);
        if (!rename) {
            LOG.error("Cannot rename " + path + " to " + path2);
        }
        return rename;
    }

    private boolean delete(Path path, boolean z, FileSystem fileSystem) throws IOException {
        boolean delete = fileSystem.delete(path, z);
        if (!delete) {
            LOG.error("Cannot delete " + path);
        }
        return delete;
    }

    private void modifyAclsForGoLive(Path path, Path path2, FileSystem fileSystem) {
        if (Boolean.parseBoolean(System.getProperty("mrit.skipAddHdfsAcls", "false"))) {
            return;
        }
        String property = System.getProperty("solr.authorization.superuser", "solr");
        try {
            modifyAclsForGoLive(path, fileSystem, property);
            recursiveModifyAclsForGoLive(path2, fileSystem, property);
        } catch (IOException e) {
            LOG.warn("Unable to set acl for user: " + property + ", --go-live may fail", e);
        }
    }

    private void recursiveModifyAclsForGoLive(Path path, FileSystem fileSystem, String str) throws IOException {
        modifyAclsForGoLive(path, fileSystem, str);
        for (FileStatus fileStatus : fileSystem.listStatus(path)) {
            modifyAclsForGoLive(fileStatus.getPath(), fileSystem, str);
            if (fileStatus.isDirectory()) {
                recursiveModifyAclsForGoLive(fileStatus.getPath(), fileSystem, str);
            }
        }
    }

    private void modifyAclsForGoLive(Path path, FileSystem fileSystem, String str) throws IOException {
        fileSystem.modifyAclEntries(path, Arrays.asList(buildAcl(AclEntryType.USER, str, FsAction.READ_EXECUTE, AclEntryScope.ACCESS), buildAcl(AclEntryType.MASK, null, FsAction.ALL, AclEntryScope.ACCESS)));
    }

    private AclEntry buildAcl(AclEntryType aclEntryType, String str, FsAction fsAction, AclEntryScope aclEntryScope) {
        AclEntry.Builder builder = new AclEntry.Builder();
        builder.setType(aclEntryType).setName(str).setPermission(fsAction).setScope(aclEntryScope);
        return builder.build();
    }

    private long ceilDivide(long j, long j2) {
        long j3 = j / j2;
        if (j % j2 != 0) {
            j3++;
        }
        return j3;
    }

    private double log(double d, double d2) {
        return Math.log(d2) / Math.log(d);
    }

    static {
        $assertionsDisabled = !MapReduceIndexerTool.class.desiredAssertionStatus();
        MAIN_MEMORY_RANDOMIZATION_THRESHOLD = MapReduceIndexerTool.class.getName() + ".mainMemoryRandomizationThreshold";
        LOG = LoggerFactory.getLogger(MapReduceIndexerTool.class);
    }
}
