package EngStats;

import Sanikumar_file_extractor.Dset;
import com.google.common.base.Charsets;
import com.google.common.io.Files;
import com.google.common.net.InternetDomainName;
import com.google.gson.Gson;
import com.google.gson.stream.JsonReader;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import org.apache.commons.compress.utils.CharsetNames;
import org.apache.commons.io.IOUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;

/* loaded from: input_file:EngStats/TLDsExtract.class */
public class TLDsExtract {
    int fileCounter = 0;
    public double timeStampBefore = 0.0d;
    public double timeStampAfter = 0.0d;
    public double horizontal = 0.0d;
    public double vertical = 0.0d;
    public double headers = 0.0d;
    public double keyColumns = 0.0d;
    public double lastModofied = 0.0d;
    public double relationT = 0.0d;
    public double othersT = 0.0d;
    public double minColHorizontal = 0.0d;
    public double minRowHorizontal = 0.0d;
    public double maxColHorizontal = 0.0d;
    public double maxRowHorizontal = 0.0d;
    public double minColVertical = 0.0d;
    public double minRowVertical = 0.0d;
    public double maxColVertical = 0.0d;
    public double maxRowVertical = 0.0d;
    public double avgColHori = 0.0d;
    public double avgColVerti = 0.0d;
    public double avgRowHori = 0.0d;
    public double avgRowVerti = 0.0d;
    public double all = 0.0d;
    public Set<String> tldSet = new HashSet();
    public HashMap<String, Integer> notTopPrivateDomainHM = new HashMap<>();
    public HashMap<String, Integer> topPrivateDomainHM = new HashMap<>();
    public HashMap<String, Integer> notPublicSuffixHM = new HashMap<>();
    public HashMap<String, Integer> publicSuffixHM = new HashMap<>();
    public HashMap<String, Integer> notTopLevelDomainHM = new HashMap<>();
    public HashMap<String, Integer> topLevelDomainHM = new HashMap<>();
    public HashMap<String, Integer> notDomainHM = new HashMap<>();
    public Map<String, Integer> rowDistriHori = new HashMap();
    public Map<String, Integer> rowDistriVerti = new HashMap();
    public Map<String, Integer> colDistriHori = new HashMap();
    public Map<String, Integer> colDistriVerti = new HashMap();

    public void readDir(File file) throws FileNotFoundException, IOException, URISyntaxException {
        Gson gson = new Gson();
        for (File file2 : file.listFiles()) {
            TarFileIterator tarFileIterator = new TarFileIterator(file2.getAbsolutePath(), true, true);
            while (true) {
                InputStream next = tarFileIterator.getNext();
                if (next != null) {
                    File currentFile = tarFileIterator.getCurrentFile();
                    if (currentFile.getName().equals("0")) {
                        System.out.println(currentFile.getName());
                    } else {
                        this.fileCounter++;
                        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(next));
                        while (true) {
                            String readLine = bufferedReader.readLine();
                            if (readLine == null) {
                                break;
                            }
                            new JsonReader(new StringReader(readLine)).setLenient(true);
                            Dset dset = (Dset) gson.fromJson(readLine, Dset.class);
                            otherStats(dset);
                            getTopPrivateDomain(dset.getUrl());
                        }
                        bufferedReader.close();
                        next.close();
                        if (this.fileCounter % 10000 == 0) {
                            System.out.println("File " + this.fileCounter + " finished processing...");
                        }
                    }
                }
            }
        }
    }

    public void setTLDSet() throws IOException {
        Iterator<Element> it = Jsoup.parse(new File("table_of_tlds.html"), CharsetNames.UTF_8).select("table").iterator();
        while (it.hasNext()) {
            Iterator<Element> it2 = it.next().select("tr:has(td)").iterator();
            while (it2.hasNext()) {
                this.tldSet.add(it2.next().select("td").get(0).text());
            }
        }
    }

    public void getTopPrivateDomain(String str) throws URISyntaxException {
        String host = new URI(str).getHost();
        if (host.matches("[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+")) {
            if (!this.notDomainHM.containsKey(str)) {
                this.notDomainHM.put(str, 1);
                return;
            } else {
                this.notDomainHM.put(str, Integer.valueOf(this.notDomainHM.get(str).intValue() + 1));
                return;
            }
        }
        InternetDomainName from = InternetDomainName.from(host);
        if (from.isUnderPublicSuffix()) {
            if (this.topPrivateDomainHM.containsKey(from.topPrivateDomain().toString())) {
                this.topPrivateDomainHM.put(from.topPrivateDomain().toString(), Integer.valueOf(this.topPrivateDomainHM.get(from.topPrivateDomain().toString()).intValue() + 1));
            } else {
                this.topPrivateDomainHM.put(from.topPrivateDomain().toString(), 1);
            }
        } else if (this.notTopPrivateDomainHM.containsKey(str)) {
            this.notTopPrivateDomainHM.put(str, Integer.valueOf(this.notTopPrivateDomainHM.get(str).intValue() + 1));
        } else {
            this.notTopPrivateDomainHM.put(str, 1);
        }
        if (from.hasPublicSuffix()) {
            if (this.publicSuffixHM.containsKey(from.publicSuffix().toString())) {
                this.publicSuffixHM.put(from.publicSuffix().toString(), Integer.valueOf(this.publicSuffixHM.get(from.publicSuffix().toString()).intValue() + 1));
            } else {
                this.publicSuffixHM.put(from.publicSuffix().toString(), 1);
            }
        } else if (this.notPublicSuffixHM.containsKey(str)) {
            this.notPublicSuffixHM.put(str, Integer.valueOf(this.notPublicSuffixHM.get(str).intValue() + 1));
        } else {
            this.notPublicSuffixHM.put(str, 1);
        }
        String str2 = "." + from.toString().split("\\.")[from.toString().split("\\.").length - 1];
        if (this.tldSet.contains(str2)) {
            if (!this.topLevelDomainHM.containsKey(str2)) {
                this.topLevelDomainHM.put(str2, 1);
                return;
            } else {
                this.topLevelDomainHM.put(str2, Integer.valueOf(this.topLevelDomainHM.get(str2).intValue() + 1));
                return;
            }
        }
        if (!this.notTopLevelDomainHM.containsKey(str)) {
            this.notTopLevelDomainHM.put(str, 1);
        } else {
            this.notTopLevelDomainHM.put(str, Integer.valueOf(this.notTopLevelDomainHM.get(str).intValue() + 1));
        }
    }

    public static void writeMap(Map<String, Integer> map, File file) throws IOException {
        for (Map.Entry<String, Integer> entry : map.entrySet()) {
            Files.append("Key : " + entry.getKey() + " Value : " + entry.getValue() + " , ", file, Charsets.UTF_8);
        }
    }

    public static TreeMap<String, Integer> SortByValueString(Map<String, Integer> map) {
        TreeMap<String, Integer> treeMap = new TreeMap<>(new ValueComp(map));
        treeMap.putAll(map);
        return treeMap;
    }

    public void otherStats(Dset dset) {
        String[][] relation = dset.getRelation();
        if (dset.getTableOrientation().toLowerCase().equals("horizontal")) {
            this.horizontal += 1.0d;
            if (dset.getLastModified() != null) {
                this.lastModofied += 1.0d;
            }
            if (dset.getTableContextTimeStampAfterTable() != null) {
                this.timeStampAfter += 1.0d;
            }
            if (dset.getTableContextTimeStampBeforeTable() != null) {
                this.timeStampBefore += 1.0d;
            }
            if (dset.getHasHeader().booleanValue()) {
                this.headers += 1.0d;
            }
            if (dset.getHasKeyColumn()) {
                this.keyColumns += 1.0d;
            }
            this.avgRowHori += relation[1].length;
            this.avgColHori += relation.length;
            if (this.rowDistriHori.containsKey(Integer.toString(relation[1].length))) {
                this.rowDistriHori.put(Integer.toString(relation[1].length), Integer.valueOf(this.rowDistriHori.get(Integer.toString(relation[1].length)).intValue() + 1));
            } else {
                this.rowDistriHori.put(Integer.toString(relation[1].length), 1);
            }
            if (this.colDistriHori.containsKey(Integer.toString(relation.length))) {
                this.colDistriHori.put(Integer.toString(relation.length), Integer.valueOf(this.colDistriHori.get(Integer.toString(relation.length)).intValue() + 1));
            } else {
                this.colDistriHori.put(Integer.toString(relation.length), 1);
            }
            if (this.minRowHorizontal == 0.0d) {
                this.minRowHorizontal = relation[1].length;
            } else if (this.minRowHorizontal > relation[1].length) {
                this.minRowHorizontal = relation[1].length;
            }
            if (this.minColHorizontal == 0.0d) {
                this.minColHorizontal = relation.length;
            } else if (this.minColHorizontal > relation.length) {
                this.minColHorizontal = relation.length;
            }
            if (this.maxRowHorizontal == 0.0d) {
                this.maxRowHorizontal = relation[1].length;
            } else if (this.maxRowHorizontal < relation[1].length) {
                this.maxRowHorizontal = relation[1].length;
            }
            if (this.maxColHorizontal == 0.0d) {
                this.maxColHorizontal = relation.length;
                return;
            } else {
                if (this.maxColHorizontal < relation.length) {
                    this.maxColHorizontal = relation.length;
                    return;
                }
                return;
            }
        }
        this.vertical += 1.0d;
        if (dset.getLastModified() != null) {
            this.lastModofied += 1.0d;
        }
        if (dset.getTableContextTimeStampAfterTable() != null) {
            this.timeStampAfter += 1.0d;
        }
        if (dset.getTableContextTimeStampBeforeTable() != null) {
            this.timeStampBefore += 1.0d;
        }
        if (dset.getHasHeader().booleanValue()) {
            this.headers += 1.0d;
        }
        if (dset.getHasKeyColumn()) {
            this.keyColumns += 1.0d;
        }
        this.avgRowVerti += relation[1].length;
        this.avgColVerti += relation.length;
        if (this.rowDistriVerti.containsKey(Integer.toString(relation[1].length))) {
            this.rowDistriVerti.put(Integer.toString(relation[1].length), Integer.valueOf(this.rowDistriVerti.get(Integer.toString(relation[1].length)).intValue() + 1));
        } else {
            this.rowDistriVerti.put(Integer.toString(relation[1].length), 1);
        }
        if (this.colDistriVerti.containsKey(Integer.toString(relation.length))) {
            this.colDistriVerti.put(Integer.toString(relation.length), Integer.valueOf(this.colDistriVerti.get(Integer.toString(relation.length)).intValue() + 1));
        } else {
            this.colDistriVerti.put(Integer.toString(relation.length), 1);
        }
        if (this.minRowVertical == 0.0d) {
            this.minRowVertical = relation.length;
        } else if (this.minRowVertical > relation.length) {
            this.minRowVertical = relation.length;
        }
        if (this.minColVertical == 0.0d) {
            this.minColVertical = relation[1].length;
        } else if (this.minColVertical > relation[1].length) {
            this.minColVertical = relation[1].length;
        }
        if (this.maxRowVertical == 0.0d) {
            this.maxRowVertical = relation.length;
        } else if (this.maxRowVertical < relation.length) {
            this.maxRowVertical = relation.length;
        }
        if (this.maxColVertical == 0.0d) {
            this.maxColVertical = relation[1].length;
        } else if (this.maxColVertical < relation[1].length) {
            this.maxColVertical = relation[1].length;
        }
    }

    public static void main(String[] strArr) throws FileNotFoundException, IOException, URISyntaxException {
        String str = strArr[1];
        if (!new File(str).exists()) {
            new File(str).mkdir();
        }
        File file = new File(strArr[0]);
        TLDsExtract tLDsExtract = new TLDsExtract();
        tLDsExtract.setTLDSet();
        tLDsExtract.readDir(file);
        BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(new File(str + "stats.txt")));
        bufferedWriter.write("Other Statistics:-----------------------------------------------------------\n");
        bufferedWriter.write("Timestamp After Table: " + tLDsExtract.timeStampAfter + IOUtils.LINE_SEPARATOR_UNIX);
        bufferedWriter.write("Timestamp Before Table: " + tLDsExtract.timeStampBefore + IOUtils.LINE_SEPARATOR_UNIX);
        bufferedWriter.write("Horizontal Tables (Only Relation): " + tLDsExtract.horizontal + IOUtils.LINE_SEPARATOR_UNIX);
        bufferedWriter.write("Vertical Tables (Only Relation): " + tLDsExtract.vertical + IOUtils.LINE_SEPARATOR_UNIX);
        bufferedWriter.write("Header Count: " + tLDsExtract.headers + IOUtils.LINE_SEPARATOR_UNIX);
        bufferedWriter.write("Key-Column Count: " + tLDsExtract.keyColumns + IOUtils.LINE_SEPARATOR_UNIX);
        bufferedWriter.write("Last Version Count: " + tLDsExtract.lastModofied + IOUtils.LINE_SEPARATOR_UNIX);
        bufferedWriter.write("Minimum Row Length for Horizontal Tables: " + tLDsExtract.minRowHorizontal + IOUtils.LINE_SEPARATOR_UNIX);
        bufferedWriter.write("Minimum Column Length for Horizontal Tables: " + tLDsExtract.minColHorizontal + IOUtils.LINE_SEPARATOR_UNIX);
        bufferedWriter.write("Maximum Row Length for Horizontal Tables: " + tLDsExtract.maxRowHorizontal + IOUtils.LINE_SEPARATOR_UNIX);
        bufferedWriter.write("Maximum Column Length for Horizontal Tables: " + tLDsExtract.maxColHorizontal + IOUtils.LINE_SEPARATOR_UNIX);
        bufferedWriter.write("Minimum Row Length for Vertical Tables: " + tLDsExtract.minRowVertical + IOUtils.LINE_SEPARATOR_UNIX);
        bufferedWriter.write("Minimum Column Length for Vertical Tables: " + tLDsExtract.minColVertical + IOUtils.LINE_SEPARATOR_UNIX);
        bufferedWriter.write("Maximum Row Length for Vertical Tables: " + tLDsExtract.maxRowVertical + IOUtils.LINE_SEPARATOR_UNIX);
        bufferedWriter.write("Maximum Column Length for Vertical Tables: " + tLDsExtract.maxColVertical + IOUtils.LINE_SEPARATOR_UNIX);
        bufferedWriter.write("AVG Column Length for Horizontal Tables: " + (tLDsExtract.avgColHori / tLDsExtract.horizontal) + IOUtils.LINE_SEPARATOR_UNIX);
        bufferedWriter.write("AVG Row Length for Vertical Tables: " + (tLDsExtract.avgRowHori / tLDsExtract.horizontal) + IOUtils.LINE_SEPARATOR_UNIX);
        bufferedWriter.write("AVG Column Length for Horizontal Tables: " + (tLDsExtract.avgColVerti / tLDsExtract.vertical) + IOUtils.LINE_SEPARATOR_UNIX);
        bufferedWriter.write("AVG Row Length for Vertical Tables: " + (tLDsExtract.avgRowVerti / tLDsExtract.vertical) + IOUtils.LINE_SEPARATOR_UNIX);
        bufferedWriter.write("TLDs Statistics:------------------------------------------------------------\n");
        bufferedWriter.write("Detected\n");
        bufferedWriter.write("Top Private Domain: " + tLDsExtract.topPrivateDomainHM.size() + IOUtils.LINE_SEPARATOR_UNIX);
        bufferedWriter.write("Top Level Domain: " + tLDsExtract.topLevelDomainHM.size() + IOUtils.LINE_SEPARATOR_UNIX);
        bufferedWriter.write("Public Siffixes: " + tLDsExtract.publicSuffixHM.size() + IOUtils.LINE_SEPARATOR_UNIX);
        bufferedWriter.write("Not Detected\n");
        bufferedWriter.write("Not Top Private Domain: " + tLDsExtract.notTopPrivateDomainHM.size() + IOUtils.LINE_SEPARATOR_UNIX);
        bufferedWriter.write("Not Top Level Domain: " + tLDsExtract.notTopLevelDomainHM.size() + IOUtils.LINE_SEPARATOR_UNIX);
        bufferedWriter.write("Not Public Siffixes: " + tLDsExtract.notPublicSuffixHM.size() + IOUtils.LINE_SEPARATOR_UNIX);
        bufferedWriter.write("Not Domain: " + tLDsExtract.notDomainHM.size() + IOUtils.LINE_SEPARATOR_UNIX);
        TreeMap<String, Integer> SortByValueString = SortByValueString(tLDsExtract.topPrivateDomainHM);
        TreeMap<String, Integer> SortByValueString2 = SortByValueString(tLDsExtract.notTopPrivateDomainHM);
        TreeMap<String, Integer> SortByValueString3 = SortByValueString(tLDsExtract.topLevelDomainHM);
        TreeMap<String, Integer> SortByValueString4 = SortByValueString(tLDsExtract.notTopLevelDomainHM);
        TreeMap<String, Integer> SortByValueString5 = SortByValueString(tLDsExtract.publicSuffixHM);
        TreeMap<String, Integer> SortByValueString6 = SortByValueString(tLDsExtract.notPublicSuffixHM);
        TreeMap<String, Integer> SortByValueString7 = SortByValueString(tLDsExtract.notDomainHM);
        TreeMap<String, Integer> SortByValueString8 = SortByValueString(tLDsExtract.rowDistriHori);
        TreeMap<String, Integer> SortByValueString9 = SortByValueString(tLDsExtract.colDistriHori);
        TreeMap<String, Integer> SortByValueString10 = SortByValueString(tLDsExtract.rowDistriVerti);
        TreeMap<String, Integer> SortByValueString11 = SortByValueString(tLDsExtract.colDistriVerti);
        bufferedWriter.write("Median Column Length for Horizontal Tables: " + median(SortByValueString8) + IOUtils.LINE_SEPARATOR_UNIX);
        bufferedWriter.write("Median Row Length for Vertical Tables: " + median(SortByValueString9) + IOUtils.LINE_SEPARATOR_UNIX);
        bufferedWriter.write("Median Column Length for Horizontal Tables: " + median(SortByValueString11) + IOUtils.LINE_SEPARATOR_UNIX);
        bufferedWriter.write("Median Row Length for Vertical Tables: " + median(SortByValueString10) + IOUtils.LINE_SEPARATOR_UNIX);
        bufferedWriter.flush();
        File file2 = new File(strArr[1] + "Top_Private_Domain.txt");
        File file3 = new File(strArr[1] + "Not_Top_Private_Domain.txt");
        File file4 = new File(strArr[1] + "Top_Level_Domain.txt");
        File file5 = new File(strArr[1] + "Not_Top_Level_Domain.txt");
        File file6 = new File(strArr[1] + "Public_Suffix.txt");
        File file7 = new File(strArr[1] + "Not_Public_Suffix.txt");
        File file8 = new File(strArr[1] + "Not_Domain.txt");
        File file9 = new File(strArr[1] + "rowDistriHori");
        File file10 = new File(strArr[1] + "colDistriHori");
        File file11 = new File(strArr[1] + "rowDistriVerti");
        File file12 = new File(strArr[1] + "colDistriVerti");
        writeMap(SortByValueString, file2);
        writeMap(SortByValueString2, file3);
        writeMap(SortByValueString3, file4);
        writeMap(SortByValueString4, file5);
        writeMap(SortByValueString5, file6);
        writeMap(SortByValueString6, file7);
        writeMap(SortByValueString7, file8);
        writeMap(SortByValueString8, file9);
        writeMap(SortByValueString9, file10);
        writeMap(SortByValueString10, file11);
        writeMap(SortByValueString11, file12);
        System.out.println("End of operation...");
    }

    private static double median(Map<String, Integer> map) {
        boolean z = map.keySet().size() % 2 == 0;
        LinkedList linkedList = new LinkedList();
        Iterator<String> it = map.keySet().iterator();
        while (it.hasNext()) {
            linkedList.add(Integer.valueOf(Integer.parseInt(it.next())));
        }
        if (!z) {
            return ((Integer) linkedList.get((linkedList.size() / 2) - 1)).intValue();
        }
        double size = (linkedList.size() + 1.0d) / 2.0d;
        return (((Integer) linkedList.get(((int) Math.floor(size)) - 1)).intValue() + ((Integer) linkedList.get(((int) Math.ceil(size)) - 1)).intValue()) / 2.0d;
    }
}
