/*
 * Decompiled with CFR 0.152.
 */
package EngStats;

import EngStats.NumericParser;
import EngStats.URLParser;
import EngStats.ValueComparator;
import Sanikumar_file_extractor.Dset;
import com.google.common.base.Charsets;
import com.google.common.io.Files;
import com.google.gson.Gson;
import com.google.gson.stream.JsonReader;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.util.HashMap;
import java.util.Map;
import java.util.TreeMap;
import java.util.zip.GZIPInputStream;

public class ColDatatype {
    private static final Map<String, String> DATE_FORMAT_REGEXPS = new HashMap<String, String>(){
        {
            this.put("^\\d{1,2}[\\.|\\|/|-]\\d{1,2}[\\.|\\|/|-]\\d{4}$", "dd.MM.yyyy");
            this.put("^\\d{1,2}[\\.|\\|/|-]\\d{1,2}[\\.|\\|/|-]\\d{2}$", "dd.MM.yy");
            this.put("^\\d{4}-\\d{1,2}-\\d{1,2}$", "yyyy-MM-dd");
            this.put("^\\d{1,2}/\\d{1,2}/\\d{4}$", "MM/dd/yyyy");
            this.put("^\\d{4}/\\d{1,2}/\\d{1,2}$", "yyyy/MM/dd");
            this.put("^\\d{1,2}[/|-|\\.][a-z]{2,}[/|-|\\.]\\d{4}$", "dd/MMMM/yyyy");
            this.put("^\\d{1,2}[/|-|\\.][a-z]{2,}[/|-|\\.]\\d{2}$", "dd/MMMM/yy");
        }
    };
    public double twohundredwordsBefore = 0.0;
    public double twohundredwordsAfter = 0.0;
    public double h = 0.0;
    public double v = 0.0;
    public double k = 0.0;
    public int fileCounter = 0;
    public HashMap<String, Integer> columnDatatype = new HashMap();
    public HashMap<String, Integer> temp = new HashMap();

    public void readDir(File f) throws FileNotFoundException, IOException {
        File[] subDir;
        Gson gson = new Gson();
        JsonReader reader = null;
        for (File o : subDir = f.listFiles()) {
            if (o.isFile()) {
                ++this.fileCounter;
                String[] str = o.getName().split("\\.");
                if (str[str.length - 1].equals("gz")) {
                    GZIPInputStream gzip = new GZIPInputStream(new FileInputStream(o.getAbsolutePath()));
                    BufferedReader br = new BufferedReader(new InputStreamReader(gzip));
                    String content = br.readLine();
                    while ((content = br.readLine()) != null) {
                        reader = new JsonReader(new StringReader(content));
                        reader.setLenient(true);
                        Dset dataset = gson.fromJson(content, Dset.class);
                        String[][] arrayContent = dataset.getRelation();
                        if (!dataset.getTableType().toLowerCase().equals("relation") || !dataset.hasKeyColumn) continue;
                        if (dataset.getTableOrientation().toLowerCase().equals("horizontal")) {
                            this.predictDatatype(arrayContent);
                            continue;
                        }
                        String[][] arrayContent_new = this.transpose(arrayContent);
                        this.predictDatatype(arrayContent_new);
                    }
                    gzip.close();
                    br.close();
                }
                System.out.println("File " + this.fileCounter + " finished processing...");
                continue;
            }
            if (!o.isDirectory()) continue;
            this.readDir(o);
        }
    }

    public void predictDatatype(String[][] array) {
        for (int i = 0; i < array.length; ++i) {
            int DTlist = 0;
            int DTstring = 0;
            int DTlink = 0;
            int DTnumeric = 0;
            int DTboolean = 0;
            int DTdate = 0;
            String[] col = array[i];
            for (int j = 0; j < col.length; ++j) {
                if (col[j] == null) continue;
                if (col[j].length() < 15 && ColDatatype.isValidDate(col[j])) {
                    ++DTdate;
                    continue;
                }
                if (col[j].length() < 10 && Boolean.parseBoolean(col[j])) {
                    ++DTboolean;
                    continue;
                }
                if (col[j].length() < 50 && NumericParser.parseNumeric(col[j])) {
                    ++DTnumeric;
                    continue;
                }
                if (URLParser.parseURL(col[j])) {
                    ++DTlink;
                    continue;
                }
                if (col[j].matches("^\\{.+\\|.+\\}$")) {
                    ++DTlist;
                    continue;
                }
                ++DTstring;
            }
            this.temp.put("Boolean", DTboolean);
            this.temp.put("String", DTstring);
            this.temp.put("Numeric", DTnumeric);
            this.temp.put("Link", DTlink);
            this.temp.put("List", DTlist);
            this.temp.put("Date", DTdate);
            TreeMap<String, Integer> treeMap = ColDatatype.SortByValue(this.temp);
            int val = 0;
            if (this.columnDatatype.containsKey(treeMap.firstKey())) {
                val = this.columnDatatype.get(treeMap.firstKey());
                this.columnDatatype.put(treeMap.firstKey(), val + 1);
                continue;
            }
            this.columnDatatype.put(treeMap.firstKey(), 1);
        }
    }

    public static boolean isValidDate(String dateString) {
        for (String regexp : DATE_FORMAT_REGEXPS.keySet()) {
            if (dateString.toLowerCase().matches(regexp)) {
                return true;
            }
            if (!dateString.matches(regexp)) continue;
            return true;
        }
        return false;
    }

    public String[][] transpose(String[][] array) {
        String[][] array_new = new String[array[1].length][array.length];
        for (int i = 0; i < array.length; ++i) {
            for (int j = 0; j < array[i].length; ++j) {
                array_new[j][i] = array[i][j];
            }
        }
        return array_new;
    }

    public static void writeMapString(Map<String, Integer> map, File file) throws IOException {
        for (Map.Entry<String, Integer> entry : map.entrySet()) {
            Files.append("Key : " + entry.getKey() + " Value : " + entry.getValue() + " , ", file, Charsets.UTF_8);
        }
    }

    public static TreeMap<String, Integer> SortByValue(HashMap<String, Integer> map) {
        ValueComparator vc = new ValueComparator(map);
        TreeMap<String, Integer> sortedMap = new TreeMap<String, Integer>(vc);
        sortedMap.putAll(map);
        return sortedMap;
    }

    public static void main(String[] argbs) throws FileNotFoundException, IOException {
        File MainDirectory = new File("/data2/common-crawl/crawl-data/CC-MAIN-2015-32/segments");
        ColDatatype colDatatype = new ColDatatype();
        colDatatype.readDir(MainDirectory);
        System.out.println("Statistics:");
        System.out.println();
        System.out.println("Column Datatype: " + colDatatype.columnDatatype.size());
        System.out.println();
        TreeMap<String, Integer> treeMap1 = ColDatatype.SortByValue(colDatatype.columnDatatype);
        File file1 = new File("/data2/szope/ColDatatypeResult/k.txt");
        ColDatatype.writeMapString(treeMap1, file1);
        System.out.println("End of operation...");
    }
}

