/*
 * Decompiled with CFR 0.152.
 */
package src.main.java;

import edu.jhu.nlp.wikipedia.PageCallbackHandler;
import edu.jhu.nlp.wikipedia.WikiPage;
import edu.jhu.nlp.wikipedia.WikiXMLParser;
import edu.jhu.nlp.wikipedia.WikiXMLParserFactory;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.util.HashSet;

public class WikiParser {
    long pagenum = 0L;
    long bigPageNum = 0L;
    BufferedWriter bw = null;
    FileOutputStream fos = null;
    BufferedWriter bw_cat = null;
    FileOutputStream fos_cat = null;

    WikiParser() {
    }

    public void openfile(String outputFilePath) {
        try {
            this.fos = new FileOutputStream(new File(outputFilePath));
            this.bw = new BufferedWriter(new OutputStreamWriter(this.fos));
            this.fos_cat = new FileOutputStream(new File(outputFilePath + "_cat"));
            this.bw_cat = new BufferedWriter(new OutputStreamWriter(this.fos_cat));
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    public void closefile() {
        try {
            this.bw.close();
            this.fos.close();
            this.bw_cat.close();
            this.fos_cat.close();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static void main(String[] args) {
        try {
            if (args.length < 1) {
                System.out.println("usage: inputfile output");
            }
            final WikiParser wikip = new WikiParser();
            String inputFilePath = args[0];
            String outputFilePath = args[1];
            wikip.openfile(outputFilePath);
            WikiXMLParser wxsp = WikiXMLParserFactory.getSAXParser((String)inputFilePath);
            wxsp.setPageCallback(new PageCallbackHandler(){

                public void process(WikiPage page) {
                    ++wikip.pagenum;
                    String pageText = page.getText().replace("\r", " ").replace("\n", " ");
                    if (pageText.length() < 100) {
                        return;
                    }
                    ++wikip.bigPageNum;
                    HashSet cate = page.getCategories();
                    String category = "";
                    for (String s : cate) {
                        category = category + ":" + s;
                    }
                    String doc = category + " :::: " + page.getTitle().replace("\r", " ").replace("\n", " ") + " " + pageText + "\n";
                    try {
                        if (!category.isEmpty()) {
                            wikip.bw_cat.write(category + "\n");
                        }
                        wikip.bw.write(doc);
                    }
                    catch (Exception e) {
                        wikip.closefile();
                        e.printStackTrace();
                    }
                }
            });
            wxsp.parse();
            System.out.println("page " + wikip.pagenum + " bigpage " + wikip.bigPageNum);
            wikip.closefile();
        }
        catch (Exception e) {
            e.printStackTrace();
            System.exit(1);
        }
    }
}

