/*
 * Decompiled with CFR 0.152.
 */
package src.main.scala;

import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.mllib.feature.HashingTF;
import org.apache.spark.mllib.feature.IDF;
import org.apache.spark.mllib.feature.IDFModel;
import org.apache.spark.mllib.linalg.Vector;
import org.apache.spark.mllib.regression.LabeledPoint;
import org.apache.spark.rdd.RDD;
import org.apache.spark.storage.StorageLevel$;
import scala.Function1;
import scala.MatchError;
import scala.Predef$;
import scala.Serializable;
import scala.Tuple2;
import scala.collection.Seq;
import scala.collection.immutable.StringOps;
import scala.reflect.ClassTag$;
import scala.runtime.BoxesRunTime;
import scala.util.Random;

public final class DocToTFIDF$ {
    public static final DocToTFIDF$ MODULE$;

    static {
        new DocToTFIDF$();
    }

    public void main(String[] args) {
        if (args.length != 3) {
            Predef$.MODULE$.println((Object)"usage: <input> <output>  <numPar>");
            System.exit(0);
        }
        Logger.getLogger((String)"org.apache.spark").setLevel(Level.WARN);
        Logger.getLogger((String)"org.eclipse.jetty.server").setLevel(Level.OFF);
        SparkConf conf = new SparkConf();
        conf.setAppName("Spark Tf-idf Application");
        SparkContext sc = new SparkContext(conf);
        String input = args[0];
        String output = args[1];
        int numPar = new StringOps(Predef$.MODULE$.augmentString(args[2])).toInt();
        RDD parsedData = sc.textFile(input, numPar).filter((Function1)new Serializable(){
            public static final long serialVersionUID = 0L;

            public final boolean apply(String x$1) {
                return x$1.split("::::", 2).length == 2;
            }
        }).map((Function1)new Serializable(){
            public static final long serialVersionUID = 0L;

            public final Tuple2<Object, Seq<String>> apply(String line) {
                String[] data = line.split("::::", 2);
                double cate = data[0].hashCode();
                Seq doc = Predef$.MODULE$.refArrayOps((Object[])data[1].split(" ")).toSeq();
                return new Tuple2((Object)BoxesRunTime.boxToDouble((double)cate), (Object)doc);
            }
        }, ClassTag$.MODULE$.apply(Tuple2.class));
        parsedData.persist(StorageLevel$.MODULE$.MEMORY_AND_DISK());
        RDD documents = parsedData.map((Function1)new Serializable(){
            public static final long serialVersionUID = 0L;

            public final Seq<String> apply(Tuple2<Object, Seq<String>> x0$1) {
                Tuple2<Object, Seq<String>> tuple2 = x0$1;
                if (tuple2 != null) {
                    Seq doc;
                    Seq seq = doc = (Seq)tuple2._2();
                    return seq;
                }
                throw new MatchError(tuple2);
            }
        }, ClassTag$.MODULE$.apply(Seq.class));
        HashingTF hashingTF = new HashingTF();
        RDD tf = hashingTF.transform(documents);
        tf.cache();
        IDFModel idf = new IDF(2).fit(tf);
        RDD tfidf = idf.transform(tf);
        RDD results = tfidf.map((Function1)new Serializable(){
            public static final long serialVersionUID = 0L;

            public final LabeledPoint apply(Vector point) {
                Random rnd = new Random(System.currentTimeMillis());
                double yD = rnd.nextGaussian();
                double y = yD < 0.0 ? 0.0 : 1.0;
                return new LabeledPoint(y, point);
            }
        }, ClassTag$.MODULE$.apply(LabeledPoint.class));
        results.saveAsTextFile(output);
        sc.stop();
    }

    private DocToTFIDF$() {
        MODULE$ = this;
    }
}

