package temp_stats;

import Sanikumar_file_extractor.Dset;
import com.google.common.base.Charsets;
import com.google.common.io.Files;
import com.google.common.net.HttpHeaders;
import com.google.gson.Gson;
import com.google.gson.stream.JsonReader;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;
import java.util.TreeMap;
import java.util.zip.GZIPInputStream;
import org.apache.commons.compress.compressors.CompressorStreamFactory;

/* loaded from: input_file:temp_stats/ColDatatype.class */
public class ColDatatype {
    private static final Map<String, String> DATE_FORMAT_REGEXPS = new HashMap<String, String>() { // from class: temp_stats.ColDatatype.1
        {
            put("^\\d{1,2}[\\.|\\|/|-]\\d{1,2}[\\.|\\|/|-]\\d{4}$", "dd.MM.yyyy");
            put("^\\d{1,2}[\\.|\\|/|-]\\d{1,2}[\\.|\\|/|-]\\d{2}$", "dd.MM.yy");
            put("^\\d{4}-\\d{1,2}-\\d{1,2}$", "yyyy-MM-dd");
            put("^\\d{1,2}/\\d{1,2}/\\d{4}$", "MM/dd/yyyy");
            put("^\\d{4}/\\d{1,2}/\\d{1,2}$", "yyyy/MM/dd");
            put("^\\d{1,2}[/|-|\\.][a-z]{2,}[/|-|\\.]\\d{4}$", "dd/MMMM/yyyy");
            put("^\\d{1,2}[/|-|\\.][a-z]{2,}[/|-|\\.]\\d{2}$", "dd/MMMM/yy");
        }
    };
    public double twohundredwordsBefore = 0.0d;
    public double twohundredwordsAfter = 0.0d;
    public double h = 0.0d;
    public double v = 0.0d;
    public double k = 0.0d;
    public int fileCounter = 0;
    public HashMap<String, Integer> columnDatatype = new HashMap<>();
    public HashMap<String, Integer> temp = new HashMap<>();

    public void readDir(File file) throws FileNotFoundException, IOException {
        Gson gson = new Gson();
        for (File file2 : file.listFiles()) {
            if (file2.isFile()) {
                this.fileCounter++;
                String[] split = file2.getName().split("\\.");
                if (split[split.length - 1].equals(CompressorStreamFactory.GZIP)) {
                    GZIPInputStream gZIPInputStream = new GZIPInputStream(new FileInputStream(file2.getAbsolutePath()));
                    BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(gZIPInputStream));
                    bufferedReader.readLine();
                    while (true) {
                        String readLine = bufferedReader.readLine();
                        if (readLine == null) {
                            break;
                        }
                        new JsonReader(new StringReader(readLine)).setLenient(true);
                        Dset dset = (Dset) gson.fromJson(readLine, Dset.class);
                        String[][] relation = dset.getRelation();
                        if (dset.getTableType().toLowerCase().equals("relation") && dset.hasKeyColumn) {
                            if (dset.getTableOrientation().toLowerCase().equals("horizontal")) {
                                predictDatatype(relation);
                            } else {
                                predictDatatype(transpose(relation));
                            }
                        }
                    }
                    gZIPInputStream.close();
                    bufferedReader.close();
                }
                System.out.println("File " + this.fileCounter + " finished processing...");
            } else if (file2.isDirectory()) {
                readDir(file2);
            }
        }
    }

    public void predictDatatype(String[][] strArr) {
        for (String[] strArr2 : strArr) {
            int i = 0;
            int i2 = 0;
            int i3 = 0;
            int i4 = 0;
            int i5 = 0;
            int i6 = 0;
            for (int i7 = 0; i7 < strArr2.length; i7++) {
                if (strArr2[i7] != null) {
                    if (strArr2[i7].length() < 15 && isValidDate(strArr2[i7])) {
                        i6++;
                    } else if (strArr2[i7].length() < 10 && Boolean.parseBoolean(strArr2[i7])) {
                        i5++;
                    } else if (strArr2[i7].length() < 50 && NumericParser.parseNumeric(strArr2[i7])) {
                        i4++;
                    } else if (URLParser.parseURL(strArr2[i7])) {
                        i3++;
                    } else if (strArr2[i7].matches("^\\{.+\\|.+\\}$")) {
                        i++;
                    } else {
                        i2++;
                    }
                }
            }
            this.temp.put("Boolean", Integer.valueOf(i5));
            this.temp.put("String", Integer.valueOf(i2));
            this.temp.put("Numeric", Integer.valueOf(i4));
            this.temp.put(HttpHeaders.LINK, Integer.valueOf(i3));
            this.temp.put("List", Integer.valueOf(i));
            this.temp.put(HttpHeaders.DATE, Integer.valueOf(i6));
            TreeMap<String, Integer> SortByValue = SortByValue(this.temp);
            if (this.columnDatatype.containsKey(SortByValue.firstKey())) {
                this.columnDatatype.put(SortByValue.firstKey(), Integer.valueOf(this.columnDatatype.get(SortByValue.firstKey()).intValue() + 1));
            } else {
                this.columnDatatype.put(SortByValue.firstKey(), 1);
            }
        }
    }

    public static boolean isValidDate(String str) {
        for (String str2 : DATE_FORMAT_REGEXPS.keySet()) {
            if (str.toLowerCase().matches(str2) || str.matches(str2)) {
                return true;
            }
        }
        return false;
    }

    public String[][] transpose(String[][] strArr) {
        String[][] strArr2 = new String[strArr[1].length][strArr.length];
        for (int i = 0; i < strArr.length; i++) {
            for (int i2 = 0; i2 < strArr[i].length; i2++) {
                strArr2[i2][i] = strArr[i][i2];
            }
        }
        return strArr2;
    }

    public static void writeMapString(Map<String, Integer> map, File file) throws IOException {
        for (Map.Entry<String, Integer> entry : map.entrySet()) {
            Files.append("Key : " + entry.getKey() + " Value : " + entry.getValue() + " , ", file, Charsets.UTF_8);
        }
    }

    public static TreeMap<String, Integer> SortByValue(HashMap<String, Integer> hashMap) {
        TreeMap<String, Integer> treeMap = new TreeMap<>((Comparator<? super String>) new ValueComparator(hashMap));
        treeMap.putAll(hashMap);
        return treeMap;
    }

    public static void main(String[] strArr) throws FileNotFoundException, IOException {
        File file = new File("/data2/common-crawl/crawl-data/CC-MAIN-2015-32/segments");
        ColDatatype colDatatype = new ColDatatype();
        colDatatype.readDir(file);
        System.out.println("Statistics:");
        System.out.println();
        System.out.println("Column Datatype: " + colDatatype.columnDatatype.size());
        System.out.println();
        writeMapString(SortByValue(colDatatype.columnDatatype), new File("/data2/szope/ColDatatypeResult/k.txt"));
        System.out.println("End of operation...");
    }
}
