/*
 * Decompiled with CFR 0.152.
 */
package EngStats;

import EngStats.NumericParser;
import EngStats.TarFileIterator;
import EngStats.URLParser;
import EngStats.ValueComp;
import EngStats.ValueComparator;
import Sanikumar_file_extractor.Dset;
import com.google.common.base.Charsets;
import com.google.common.io.Files;
import com.google.common.net.InternetDomainName;
import com.google.gson.Gson;
import com.google.gson.stream.JsonReader;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class TLDsExtract {
    int fileCounter = 0;
    public double timeStampBefore = 0.0;
    public double timeStampAfter = 0.0;
    public double horizontal = 0.0;
    public double vertical = 0.0;
    public double headers = 0.0;
    public double keyColumns = 0.0;
    public double lastModofied = 0.0;
    public double relationT = 0.0;
    public double othersT = 0.0;
    public double minColHorizontal = 0.0;
    public double minRowHorizontal = 0.0;
    public double maxColHorizontal = 0.0;
    public double maxRowHorizontal = 0.0;
    public double minColVertical = 0.0;
    public double minRowVertical = 0.0;
    public double maxColVertical = 0.0;
    public double maxRowVertical = 0.0;
    public double avgColHori = 0.0;
    public double avgColVerti = 0.0;
    public double avgRowHori = 0.0;
    public double avgRowVerti = 0.0;
    public double all = 0.0;
    public double Header = 0.0;
    int val = 0;
    public Set<String> tldSet = new HashSet<String>();
    public HashMap<String, Integer> notTopPrivateDomainHM = new HashMap();
    public HashMap<String, Integer> topPrivateDomainHM = new HashMap();
    public HashMap<String, Integer> notPublicSuffixHM = new HashMap();
    public HashMap<String, Integer> publicSuffixHM = new HashMap();
    public HashMap<String, Integer> notTopLevelDomainHM = new HashMap();
    public HashMap<String, Integer> topLevelDomainHM = new HashMap();
    public HashMap<String, Integer> headerCount = new HashMap();
    public HashMap<String, Integer> temp = new HashMap();
    public HashMap<Integer, Integer> rowsperHTable = new HashMap();
    public HashMap<Integer, Integer> colsperHTable = new HashMap();
    public HashMap<Integer, Integer> rowsperVTable = new HashMap();
    public HashMap<Integer, Integer> colsperVTable = new HashMap();
    public HashMap<String, Integer> columnDatatype = new HashMap();
    public HashMap<String, Integer> notDomainHM = new HashMap();
    public Map<String, Integer> rowDistriHori = new HashMap<String, Integer>();
    public Map<String, Integer> rowDistriVerti = new HashMap<String, Integer>();
    public Map<String, Integer> colDistriHori = new HashMap<String, Integer>();
    public Map<String, Integer> colDistriVerti = new HashMap<String, Integer>();
    private static final Map<String, String> DATE_FORMAT_REGEXPS = new HashMap<String, String>(){
        {
            this.put("^\\d{1,2}[\\.|\\|/|-]\\d{1,2}[\\.|\\|/|-]\\d{4}$", "dd.MM.yyyy");
            this.put("^\\d{1,2}[\\.|\\|/|-]\\d{1,2}[\\.|\\|/|-]\\d{2}$", "dd.MM.yy");
            this.put("^\\d{4}-\\d{1,2}-\\d{1,2}$", "yyyy-MM-dd");
            this.put("^\\d{1,2}/\\d{1,2}/\\d{4}$", "MM/dd/yyyy");
            this.put("^\\d{4}/\\d{1,2}/\\d{1,2}$", "yyyy/MM/dd");
            this.put("^\\d{1,2}[/|-|\\.][a-z]{2,}[/|-|\\.]\\d{4}$", "dd/MMMM/yyyy");
            this.put("^\\d{1,2}[/|-|\\.][a-z]{2,}[/|-|\\.]\\d{2}$", "dd/MMMM/yy");
        }
    };

    public void readDir(File dir) throws FileNotFoundException, IOException, URISyntaxException {
        Gson gson = new Gson();
        JsonReader reader = null;
        for (File f : dir.listFiles()) {
            TarFileIterator it = new TarFileIterator(f.getAbsolutePath(), true, true);
            InputStream is = null;
            while ((is = it.getNext()) != null) {
                String content;
                File inFile = it.getCurrentFile();
                if (inFile.getName().equals("0")) {
                    System.out.println(inFile.getName());
                    continue;
                }
                ++this.fileCounter;
                BufferedReader br = new BufferedReader(new InputStreamReader(is));
                while ((content = br.readLine()) != null) {
                    reader = new JsonReader(new StringReader(content));
                    reader.setLenient(true);
                    Dset dataset = gson.fromJson(content, Dset.class);
                    this.otherStats(dataset);
                    String url = dataset.getUrl();
                    this.getTopPrivateDomain(url);
                }
                br.close();
                is.close();
                if (this.fileCounter % 10000 != 0) continue;
                System.out.println("File " + this.fileCounter + " finished processing...");
            }
        }
    }

    public void setTLDSet() throws IOException {
        File file = new File("table_of_tlds.html");
        Document doc = Jsoup.parse(file, "UTF-8");
        Elements tables = doc.select("table");
        for (Element table : tables) {
            Elements tRows = table.select("tr:has(td)");
            for (Element tRow : tRows) {
                Elements tCols = tRow.select("td");
                String tld = tCols.get(0).text();
                this.tldSet.add(tld);
            }
        }
    }

    public void getTopPrivateDomain(String url) throws URISyntaxException {
        int val = 0;
        String host = new URI(url).getHost();
        if (host.matches("[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+")) {
            if (this.notDomainHM.containsKey(url)) {
                val = this.notDomainHM.get(url);
                this.notDomainHM.put(url, val + 1);
            } else {
                this.notDomainHM.put(url, 1);
            }
        } else {
            InternetDomainName internetDomainName = InternetDomainName.from(host);
            if (internetDomainName.isUnderPublicSuffix()) {
                if (this.topPrivateDomainHM.containsKey(internetDomainName.topPrivateDomain().toString())) {
                    val = this.topPrivateDomainHM.get(internetDomainName.topPrivateDomain().toString());
                    this.topPrivateDomainHM.put(internetDomainName.topPrivateDomain().toString(), val + 1);
                } else {
                    this.topPrivateDomainHM.put(internetDomainName.topPrivateDomain().toString(), 1);
                }
            } else if (this.notTopPrivateDomainHM.containsKey(url)) {
                val = this.notTopPrivateDomainHM.get(url);
                this.notTopPrivateDomainHM.put(url, val + 1);
            } else {
                this.notTopPrivateDomainHM.put(url, 1);
            }
            val = 0;
            if (internetDomainName.hasPublicSuffix()) {
                if (this.publicSuffixHM.containsKey(internetDomainName.publicSuffix().toString())) {
                    val = this.publicSuffixHM.get(internetDomainName.publicSuffix().toString());
                    this.publicSuffixHM.put(internetDomainName.publicSuffix().toString(), val + 1);
                } else {
                    this.publicSuffixHM.put(internetDomainName.publicSuffix().toString(), 1);
                }
            } else if (this.notPublicSuffixHM.containsKey(url)) {
                val = this.notPublicSuffixHM.get(url);
                this.notPublicSuffixHM.put(url, val + 1);
            } else {
                this.notPublicSuffixHM.put(url, 1);
            }
            val = 0;
            int l = internetDomainName.toString().split("\\.").length;
            String tld = "." + internetDomainName.toString().split("\\.")[l - 1];
            if (this.tldSet.contains(tld)) {
                if (this.topLevelDomainHM.containsKey(tld)) {
                    val = this.topLevelDomainHM.get(tld);
                    this.topLevelDomainHM.put(tld, val + 1);
                } else {
                    this.topLevelDomainHM.put(tld, 1);
                }
            } else if (this.notTopLevelDomainHM.containsKey(url)) {
                val = this.notTopLevelDomainHM.get(url);
                this.notTopLevelDomainHM.put(url, val + 1);
            } else {
                this.notTopLevelDomainHM.put(url, 1);
            }
        }
    }

    public static void writeMap(Map<String, Integer> map, File file) throws IOException {
        for (Map.Entry<String, Integer> entry : map.entrySet()) {
            Files.append("Key : " + entry.getKey() + " Value : " + entry.getValue() + " , ", file, Charsets.UTF_8);
        }
    }

    public static TreeMap<String, Integer> SortByValueString(Map<String, Integer> map) {
        ValueComp vc = new ValueComp(map);
        TreeMap<String, Integer> sortedMap = new TreeMap<String, Integer>(vc);
        sortedMap.putAll(map);
        return sortedMap;
    }

    public void otherStats(Dset dataset) {
        String[][] arrayContent = dataset.getRelation();
        if (dataset.getTableOrientation().toLowerCase().equals("horizontal")) {
            this.horizontal += 1.0;
            this.predictDatatype(arrayContent);
        } else {
            this.vertical += 1.0;
            String[][] arrayContent_new = this.transpose(arrayContent);
            this.predictDatatype(arrayContent_new);
        }
    }

    public String[][] transpose(String[][] array) {
        String[][] array_new = new String[array[1].length][array.length];
        for (int i = 0; i < array.length; ++i) {
            for (int j = 0; j < array[i].length; ++j) {
                array_new[j][i] = array[i][j];
            }
        }
        return array_new;
    }

    public static boolean isValidDate(String dateString) {
        for (String regexp : DATE_FORMAT_REGEXPS.keySet()) {
            if (dateString.toLowerCase().matches(regexp)) {
                return true;
            }
            if (!dateString.matches(regexp)) continue;
            return true;
        }
        return false;
    }

    public void predictDatatype(String[][] array) {
        for (int i = 0; i < array.length; ++i) {
            int DTlist = 0;
            int DTstring = 0;
            int DTlink = 0;
            int DTnumeric = 0;
            int DTboolean = 0;
            int DTdate = 0;
            String[] col = array[i];
            for (int j = 0; j < col.length; ++j) {
                if (col[j] == null) continue;
                if (col[j].length() < 15 && TLDsExtract.isValidDate(col[j])) {
                    ++DTdate;
                    continue;
                }
                if (col[j].length() < 10 && Boolean.parseBoolean(col[j])) {
                    ++DTboolean;
                    continue;
                }
                if (col[j].length() < 50 && NumericParser.parseNumeric(col[j])) {
                    ++DTnumeric;
                    continue;
                }
                if (URLParser.parseURL(col[j])) {
                    ++DTlink;
                    continue;
                }
                if (col[j].matches("^\\{.+\\|.+\\}$")) {
                    ++DTlist;
                    continue;
                }
                ++DTstring;
            }
            this.temp.put("Boolean", DTboolean);
            this.temp.put("String", DTstring);
            this.temp.put("Numeric", DTnumeric);
            this.temp.put("Link", DTlink);
            this.temp.put("List", DTlist);
            this.temp.put("Date", DTdate);
            TreeMap<String, Integer> treeMap = TLDsExtract.SortByValue(this.temp);
            int val = 0;
            if (this.columnDatatype.containsKey(treeMap.firstKey())) {
                val = this.columnDatatype.get(treeMap.firstKey());
                this.columnDatatype.put(treeMap.firstKey(), val + 1);
                continue;
            }
            this.columnDatatype.put(treeMap.firstKey(), 1);
        }
    }

    public static TreeMap<String, Integer> SortByValue(HashMap<String, Integer> map) {
        ValueComparator vc = new ValueComparator(map);
        TreeMap<String, Integer> sortedMap = new TreeMap<String, Integer>(vc);
        sortedMap.putAll(map);
        return sortedMap;
    }

    public static void writeMapString(Map<String, Integer> map, File file) throws IOException {
        for (Map.Entry<String, Integer> entry : map.entrySet()) {
            Files.append("Key : " + entry.getKey() + " Value : " + entry.getValue() + " , ", file, Charsets.UTF_8);
        }
    }

    public static void writeMapInt(Map<Integer, Integer> map, File file) throws IOException {
        for (Map.Entry<Integer, Integer> entry : map.entrySet()) {
            Files.append("Key : " + entry.getKey() + " Value : " + entry.getValue() + " , ", file, Charsets.UTF_8);
        }
    }

    public static void main(String[] args) throws FileNotFoundException, IOException, URISyntaxException {
        String out = args[1];
        if (!new File(out).exists()) {
            new File(out).mkdir();
        }
        File MainDirectory = new File(args[0]);
        TLDsExtract tlDsExtraction = new TLDsExtract();
        tlDsExtraction.setTLDSet();
        tlDsExtraction.readDir(MainDirectory);
        BufferedWriter write = new BufferedWriter(new FileWriter(new File(out + "stats.txt")));
        TreeMap<String, Integer> treeMap21 = TLDsExtract.SortByValue(tlDsExtraction.columnDatatype);
        TreeMap<String, Integer> treeMap22 = TLDsExtract.SortByValue(tlDsExtraction.headerCount);
        write.flush();
        File file21 = new File(args[1] + "datatypes");
        File file22 = new File(args[1] + "header");
        TLDsExtract.writeMapString(treeMap21, file21);
        TLDsExtract.writeMapString(treeMap22, file22);
        System.out.println("End of operation...");
    }

    private static double median(Map<String, Integer> map) {
        boolean isEven = map.keySet().size() % 2 == 0;
        LinkedList<Integer> keySetAsInt = new LinkedList<Integer>();
        for (String s : map.keySet()) {
            keySetAsInt.add(Integer.parseInt(s));
        }
        if (isEven) {
            double middle = ((double)keySetAsInt.size() + 1.0) / 2.0;
            double median1 = ((Integer)keySetAsInt.get((int)Math.floor(middle) - 1)).intValue();
            double median2 = ((Integer)keySetAsInt.get((int)Math.ceil(middle) - 1)).intValue();
            return (median1 + median2) / 2.0;
        }
        int middle = keySetAsInt.size() / 2;
        return ((Integer)keySetAsInt.get(middle - 1)).intValue();
    }
}

