package io.transwarp.table.comparison;

import io.transwarp.common.GlobalArgs;
import io.transwarp.common.bean.table.OrcTableBean;
import io.transwarp.common.util.UtilTools;
import io.transwarp.report.comparison.Comparison;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

public class OrcComparisonImpl implements Comparison {
	
	private final String servicename;
	private final Map<String, OrcTableBean> tables;
	private final ComparisonResultOfTable comparisonResult;
	
	private final Set<String> warning;
	
	private Double orcBucketSizeLimit;
	private Double orcPartitionNumberLimit;
	private Double deltaNumberLimit;
	private Double bucketBeginCheckLimit;
	private Double bucketDataSkewLimit;
	private Double bucketSizeDiffLimit;
	private Double partitionBeginCheckLimit;
	private Double partitionDataSkewLimit;
	private Double partitionSizeDiffLimit;
	private Double smallFileSizeLimit;
	
	public OrcComparisonImpl(final ComparisonResultOfTable comparisonResult, 
			final String servicename,
			final Map<String, OrcTableBean> tables) {
		this.comparisonResult = comparisonResult;
		this.servicename = servicename;
		this.tables = tables;
		this.warning = new HashSet<String>();
		getWarnLimit();
	}
	
	@Override
	public void comparison() throws Exception {
		List<OrcTableBean> trueTables = new ArrayList<OrcTableBean>();
		List<OrcTableBean> errorTables = new ArrayList<OrcTableBean>();
		for (Entry<String, OrcTableBean> entry : tables.entrySet()) {
			OrcTableBean table = entry.getValue();
			String warning = "null";
			try {
				warning = check(table);
			} catch (Exception e) {
				e.printStackTrace();
			}
			table.setWarning(warning);
			if (warning.equals("null")) {
				trueTables.add(table);
			} else {
				errorTables.add(table);
			}
		}
		comparisonResult.orcTrueTables.put(servicename, sortTable(trueTables));
		comparisonResult.orcErrorTables.put(servicename, sortTable(errorTables));
	}
	
	private OrcTableBean[] sortTable(final List<OrcTableBean> tables) {
		int tableNumber = tables.size();
		if (tableNumber == 0) {
			return null;
		}
		OrcTableBean[] orderTables = new OrcTableBean[tableNumber];
		orderTables = tables.toArray(orderTables);
		Arrays.sort(orderTables, new Comparator<OrcTableBean>() {
			@Override
			public int compare(OrcTableBean table1, OrcTableBean table2) {
				int smallFile1 = table1.getSmallFileNumber();
				int smallFile2 = table2.getSmallFileNumber();
				if (smallFile1 > smallFile2) {
					return -1;
				} else if (smallFile1 == smallFile2) {
					return 0;
				} else {
					return 1;
				}
			}
		});
		return orderTables;
	}
	
	private void getWarnLimit() {
		orcBucketSizeLimit = Double.valueOf(GlobalArgs.prop_rules.getProperty("table.orc.bucket.size.limit", "200")) * 1024 * 1024;
		orcPartitionNumberLimit = Double.valueOf(GlobalArgs.prop_rules.getProperty("table.orc.partition.number.limit", "200"));
		deltaNumberLimit = Double.valueOf(GlobalArgs.prop_rules.getProperty("table.torc.delta.number.limit", "100"));
		String[] bucketDataSkew = GlobalArgs.prop_rules.getProperty("table.orc.bucket.dataSkew.limit", "50|20|3").split("\\|");
		bucketBeginCheckLimit = Double.valueOf(bucketDataSkew[0])*1024*1024;
		bucketDataSkewLimit = Double.valueOf(bucketDataSkew[1])/100;
		bucketSizeDiffLimit = Double.valueOf(bucketDataSkew[2]);
		String[] partitionDataSkew = GlobalArgs.prop_rules.getProperty("table.orc.partition.dataSkew.limit", "10240|20|3").split("\\|");
		partitionBeginCheckLimit = Double.valueOf(partitionDataSkew[0])*1024*1024;
		partitionDataSkewLimit = Double.valueOf(partitionDataSkew[1])/100;
		partitionSizeDiffLimit = Double.valueOf(partitionDataSkew[2]);
		smallFileSizeLimit = Double.valueOf(GlobalArgs.prop_rules.getProperty("cluster.smallFile.size.limit", "1")) * 1024 * 1024;
	}
	
	private String check(final OrcTableBean table) throws Exception {
		warning.clear();
		List<Long> partitionSizes = analysisDataFileByPartition(table);
		analysisPartition(table, partitionSizes);
		analysisSmallFile(table);
		analysisDeltaNumber(table);
		if (warning.size() == 0) {
			return "null";
		} else {
			StringBuffer buffer = new StringBuffer();
			boolean hasWarning = false;
			for (String item : warning) {
				if (hasWarning) {
					buffer.append(" && ");
				}
				buffer.append(item);
				hasWarning = true;
			}
			return buffer.toString();
		}
	}
	
	private List<Long> analysisDataFileByPartition(final OrcTableBean table) throws Exception {
		Map<String, Long> fileNameAndSizeMap = table.getFileNameAndSizeMap();
		Map<String, Set<String>> partitionListMap = table.getPartitionListMap();
		int filenumber = fileNameAndSizeMap.size();
		List<Long> partitionSizes = new ArrayList<Long>();
		if (filenumber > 0) {
			Long[] orderFiles = new Long[filenumber];
			int index = 0;
			for (Entry<String, Set<String>> entry : partitionListMap.entrySet()) {
				String partitionName = entry.getKey();
				Set<String> fileList = entry.getValue();
				Long partitionSize = 0L;
				for (String filename : fileList) {
					Long filesize = fileNameAndSizeMap.get(String.format("%s-%s", partitionName, filename));
					orderFiles[index++] = filesize;
					partitionSize += filesize;
				}
				partitionSizes.add(partitionSize);
			}
			Arrays.sort(orderFiles);
			addFileComparison(table, orderFiles);
		}
		return partitionSizes;
	}
	
	private void analysisPartition(final OrcTableBean table, final List<Long> partitionSizes) throws Exception {
		String partitionKey = table.getPartitionKey();
		int number = partitionSizes.size();
		if (number == 0) {
			return;
		}
		if (!partitionKey.equals("") && !table.isPartitionRange()) {
			if (number > orcPartitionNumberLimit) {
				warning.add("partition number too many");
				comparisonResult.orcErrorInfos.add(new String[]{String.format("%s.%s.%s", servicename, table.getDatabase(), table.getTablename()), 
						"partition number too many", 
						String.format("partition number is %s", number)});
			}
		}
		Long[] orderSizes = new Long[number];
		orderSizes = partitionSizes.toArray(orderSizes);
		Arrays.sort(orderSizes);
		if (orderSizes[number - 1] > partitionBeginCheckLimit) {
			boolean isDataBias = checkDataSkew("partition", orderSizes, 0, this.partitionDataSkewLimit, this.partitionSizeDiffLimit);
			if (isDataBias) {
				comparisonResult.orcErrorInfos.add(new String[]{String.format("%s.%s.%s", servicename, table.getDatabase(), table.getTablename()), 
						"partition is data skew", 
						""});
			}
		}
		table.setPartitionComparison(String.format("%s|%s|%s|%s", UtilTools.getCarrySize(orderSizes[number - 1]),
				UtilTools.getCarrySize(orderSizes[number/2]),
				UtilTools.getCarrySize(orderSizes[0]),
				number));
	}
	
	private void analysisSmallFile(final OrcTableBean table) throws Exception {
		List<Long> allFiles = table.getDataFileSizes();
		int totalSmallFile = 0;
		int number = allFiles.size();
		if (number > 0) {
			for (Long size : allFiles) {
				if (size <= smallFileSizeLimit) {
					totalSmallFile += 1;
				}
			}
			table.setSmallFileNumber(totalSmallFile);
			table.setSmallFilePercent(String.format("%.2f %%", totalSmallFile*100.0/number));
		} else {
			table.setSmallFileNumber(0);
			table.setSmallFilePercent("null");
		}
	}
	
	private void analysisDeltaNumber(final OrcTableBean table) throws Exception {
		Map<String, Integer> deltaMenus = table.getDeltaMenuNumbers();
		if (table.isTransactional()) {
			int maxNumber = 0;
			int minNumber = Integer.MAX_VALUE;
			int errorPartitionNumber = 0;
			for (Entry<String, Integer> entry : deltaMenus.entrySet()) {
				Integer number = entry.getValue();
				maxNumber = Math.max(number, maxNumber);
				minNumber = Math.min(number, minNumber);
				if (number > this.deltaNumberLimit) {
					errorPartitionNumber += 1;
				}
			}
			if (errorPartitionNumber > 0) {
				warning.add("delta dir too many");
				comparisonResult.orcErrorInfos.add(new String[]{String.format("%s.%s.%s", servicename, table.getDatabase(), table.getTablename()), 
						"delta dir too many", 
						String.format("there are %s partitions is more than %d delta menu", errorPartitionNumber, deltaNumberLimit)});
			}
			table.setDeltaComparison(String.format("%s|%s|%s", maxNumber, minNumber, errorPartitionNumber));
		}
	}
	
	private void addFileComparison(final OrcTableBean table, final Long[] fileSizes) throws Exception {
		String bucket_column = table.getBucketColumn();
		int bucket_number = Integer.valueOf(table.getBucketNumber());
		int number = fileSizes.length;
		if (!bucket_column.equals("")) {
			if (fileSizes[number - 1] > orcBucketSizeLimit) {
				warning.add("bucket over size");
				comparisonResult.orcErrorInfos.add(new String[]{String.format("%s.%s.%s", servicename, table.getDatabase(), table.getTablename()), 
						"bucket over size", 
						String.format("bucket max size is %s", UtilTools.getCarrySize(fileSizes[number - 1]))});
			}
			if (fileSizes[number - 1] > bucketBeginCheckLimit) {
				boolean isDataSkew = checkDataSkew("bucket", fileSizes, bucket_number, this.bucketDataSkewLimit, this.bucketSizeDiffLimit);
				if (isDataSkew) {
					comparisonResult.orcErrorInfos.add(new String[]{String.format("%s.%s.%s", servicename, table.getDatabase(), table.getTablename()), 
							"bucket is data skew", 
							""});
				}
			}
		}
		table.setBucketComparison(String.format("%s|%s|%s|%s", UtilTools.getCarrySize(fileSizes[number - 1]),
				UtilTools.getCarrySize(fileSizes[number/2]),
				UtilTools.getCarrySize(fileSizes[0]),
				number));
	}
	
	private boolean checkDataSkew(final String type, 
			final Long[] sizes, 
			final int calcNumber,
			final Double dataSkewLimit, 
			final Double sizeDiff) throws Exception {
		Long totalSize = 0L;
		Long topFileSize = 0L;
		int number = sizes.length;
		int topNumber = 0;
		if (calcNumber == 0) {
			topNumber = (int)Math.ceil(number*dataSkewLimit);
		} else {
			topNumber = (int)Math.ceil(calcNumber*dataSkewLimit);
			topNumber = Math.min(topNumber, number);
		}
		for (int i = number - 1; i >= 0; i--) {
			totalSize += sizes[i];
			if (i >= number - topNumber) {
				topFileSize += sizes[i];
			}
		}
		if (isDataSkew(totalSize, number, topFileSize, topNumber, sizeDiff)) {
			warning.add(type + " is data skew");
			return true;
		}
		return false;
	}
	
	private boolean isDataSkew(final Long totalSize,
			final int number,
			final Long topFileSize,
			final int topNumber,
			final Double sizeDiff) {
		int otherNumber = number - topNumber;
		if (topNumber == 0 || otherNumber == 0) {
			return false;
		}
		Double avgTopFile = topFileSize*1.0/topNumber;
		Double avgOtherFile = (totalSize - topFileSize)*1.0/otherNumber;
		if (avgTopFile > avgOtherFile*sizeDiff) {
			return true;
		} else {
			return false;
		}
	}
}
