/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.tools;

import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import org.apache.commons.lang3.time.StopWatch;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.crawl.Generator;
import org.apache.nutch.crawl.URLPartitioner;
import org.apache.nutch.net.URLFilters;
import org.apache.nutch.net.URLNormalizers;
import org.apache.nutch.scoring.ScoringFilters;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.NutchJob;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class FreeGenerator
extends Configured
implements Tool {
    private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
    private static final String FILTER_KEY = "free.generator.filter";
    private static final String NORMALIZE_KEY = "free.generator.normalize";

    public int run(String[] args) throws Exception {
        if (args.length < 2) {
            System.err.println("Usage: FreeGenerator <inputDir> <segmentsDir> [-filter] [-normalize] [-numFetchers <n>]");
            System.err.println("\tinputDir\tinput directory containing one or more input files.");
            System.err.println("\t        \tEach text file contains a list of URLs, one URL per line");
            System.err.println("\tsegmentsDir\toutput directory, where new segment will be created");
            System.err.println("\t-filter   \trun current URLFilters on input URLs");
            System.err.println("\t-normalize\trun current URLNormalizers on input URLs");
            System.err.println("\t-numFetchers <n>\tnumber of generated fetch lists, determines number of fetcher tasks");
            return -1;
        }
        boolean filter = false;
        boolean normalize = false;
        int numFetchers = -1;
        if (args.length > 2) {
            for (int i = 2; i < args.length; ++i) {
                if (args[i].equals("-filter")) {
                    filter = true;
                    continue;
                }
                if (args[i].equals("-normalize")) {
                    normalize = true;
                    continue;
                }
                if ("-numFetchers".equals(args[i])) {
                    numFetchers = Integer.parseInt(args[i + 1]);
                    ++i;
                    continue;
                }
                LOG.error("Unknown argument: {}, exiting ...", (Object)args[i]);
                return -1;
            }
        }
        StopWatch stopWatch = new StopWatch();
        stopWatch.start();
        LOG.info("FreeGenerator: starting");
        Job job = Job.getInstance((Configuration)this.getConf(), (String)("Nutch FreeGenerator: " + args[0]));
        Configuration conf = job.getConfiguration();
        conf.setBoolean(FILTER_KEY, filter);
        conf.setBoolean(NORMALIZE_KEY, normalize);
        FileInputFormat.addInputPath((Job)job, (Path)new Path(args[0]));
        job.setInputFormatClass(TextInputFormat.class);
        job.setJarByClass(FG.class);
        job.setMapperClass(FG.FGMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Generator.SelectorEntry.class);
        job.setPartitionerClass(URLPartitioner.class);
        job.setReducerClass(FG.FGReducer.class);
        String segName = Generator.generateSegmentName();
        if (numFetchers == -1) {
            numFetchers = Integer.parseInt(conf.get("mapreduce.job.maps"));
        }
        if ("local".equals(conf.get("mapreduce.framework.name")) && numFetchers != 1) {
            LOG.info("FreeGenerator: running in local mode, generating exactly one partition.");
            numFetchers = 1;
        }
        job.setNumReduceTasks(numFetchers);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(CrawlDatum.class);
        job.setSortComparatorClass(Generator.HashComparator.class);
        FileOutputFormat.setOutputPath((Job)job, (Path)new Path(args[1], new Path(segName, "crawl_generate")));
        try {
            boolean success = job.waitForCompletion(true);
            if (!success) {
                String message = NutchJob.getJobFailureLogMessage("FreeGenerator", job);
                LOG.error(message);
                throw new RuntimeException(message);
            }
        }
        catch (IOException | ClassNotFoundException | InterruptedException e) {
            LOG.error("FAILED: {}", (Object)StringUtils.stringifyException((Throwable)e));
            return -1;
        }
        stopWatch.stop();
        LOG.info("FreeGenerator: finished, elapsed: {} ms", (Object)stopWatch.getTime(TimeUnit.MILLISECONDS));
        return 0;
    }

    public static void main(String[] args) throws Exception {
        int res = ToolRunner.run((Configuration)NutchConfiguration.create(), (Tool)new FreeGenerator(), (String[])args);
        System.exit(res);
    }

    public static class FG {

        public static class FGReducer
        extends Reducer<Text, Generator.SelectorEntry, Text, CrawlDatum> {
            public void reduce(Text key, Iterable<Generator.SelectorEntry> values, Reducer.Context context) throws IOException, InterruptedException {
                HashMap<Text, CrawlDatum> unique = new HashMap<Text, CrawlDatum>();
                for (Generator.SelectorEntry selectorEntry : values) {
                    unique.put(selectorEntry.url, selectorEntry.datum);
                }
                for (Map.Entry entry : unique.entrySet()) {
                    context.write((Object)((Text)entry.getKey()), (Object)((CrawlDatum)entry.getValue()));
                }
            }
        }

        public static class FGMapper
        extends Mapper<WritableComparable<?>, Text, Text, Generator.SelectorEntry> {
            private URLNormalizers normalizers = null;
            private URLFilters filters = null;
            private ScoringFilters scfilters;
            private CrawlDatum datum = new CrawlDatum();
            private Text url = new Text();
            private int defaultInterval = 0;
            Generator.SelectorEntry entry = new Generator.SelectorEntry();

            public void setup(Mapper.Context context) {
                Configuration conf = context.getConfiguration();
                this.defaultInterval = conf.getInt("db.fetch.interval.default", 0);
                this.scfilters = new ScoringFilters(conf);
                if (conf.getBoolean(FreeGenerator.FILTER_KEY, false)) {
                    this.filters = new URLFilters(conf);
                }
                if (conf.getBoolean(FreeGenerator.NORMALIZE_KEY, false)) {
                    this.normalizers = new URLNormalizers(conf, "inject");
                }
            }

            public void map(WritableComparable<?> key, Text value, Mapper.Context context) throws IOException, InterruptedException {
                String urlString = value.toString();
                try {
                    if (this.normalizers != null) {
                        urlString = this.normalizers.normalize(urlString, "inject");
                    }
                    if (urlString != null && this.filters != null) {
                        urlString = this.filters.filter(urlString);
                    }
                    if (urlString != null) {
                        this.url.set(urlString);
                        this.scfilters.injectedScore(this.url, this.datum);
                    }
                }
                catch (Exception e) {
                    LOG.warn("Error adding url '{}', skipping: {}", (Object)value, (Object)StringUtils.stringifyException((Throwable)e));
                    return;
                }
                if (urlString == null) {
                    LOG.debug("- skipping {}", (Object)value);
                    return;
                }
                this.entry.datum = this.datum;
                this.entry.url = this.url;
                this.entry.datum.setFetchInterval(this.defaultInterval);
                context.write((Object)this.url, (Object)this.entry);
            }
        }
    }
}

