/*
 * Decompiled with CFR 0.152.
 */
package edu.umd.marbl.mhap.main;

import edu.umd.marbl.mhap.impl.FastaData;
import edu.umd.marbl.mhap.sketch.BottomOverlapSketch;
import edu.umd.marbl.mhap.sketch.BottomSketch;
import edu.umd.marbl.mhap.sketch.MinHashSketch;
import edu.umd.marbl.mhap.sketch.ZeroNGramsFoundException;
import edu.umd.marbl.mhap.utils.Utils;
import java.io.BufferedReader;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.ListIterator;
import java.util.Random;

public class KmerStatSimulator {
    private boolean verbose = false;
    private int kmer = -1;
    private int overlap = 100;
    private ArrayList<Double> randomJaccard = new ArrayList();
    private ArrayList<Double> randomMinHash = new ArrayList();
    private ArrayList<Double> randomMerCounts = new ArrayList();
    private String reference = null;
    private double requestedLength = 5000.0;
    private double sharedCount = 0.0;
    private ArrayList<Double> sharedJaccard = new ArrayList();
    private ArrayList<Double> sharedMinHash = new ArrayList();
    private ArrayList<Double> sharedMerCounts = new ArrayList();
    private HashMap<String, Integer> skipMers = new HashMap();
    private int totalTrials = 10000;
    private boolean halfError = false;
    private static Random generator = null;
    public static int seed = 0;

    public static void main(String[] args) throws Exception {
        boolean usage1 = true;
        if (args.length >= 5 && args.length <= 6) {
            usage1 = false;
        } else if (args.length >= 7) {
            usage1 = true;
        } else {
            KmerStatSimulator.printUsage();
            System.exit(1);
        }
        KmerStatSimulator f = new KmerStatSimulator();
        f.totalTrials = Integer.parseInt(args[0]);
        if (usage1) {
            f.requestedLength = Double.parseDouble(args[2]);
            f.kmer = Integer.parseInt(args[1]);
            f.overlap = Integer.parseInt(args[3]);
            if (args.length > 7) {
                f.halfError = Boolean.parseBoolean(args[7]);
            }
            if (args.length > 8) {
                f.reference = args[8];
            }
            if ((double)f.overlap > f.requestedLength) {
                System.err.println("Cannot have overlap > sequence length");
                System.exit(1);
            }
            if (args.length > 9) {
                f.loadSkipMers(args[9]);
            }
            f.simulate(Double.parseDouble(args[4]), Double.parseDouble(args[5]), Double.parseDouble(args[6]));
        } else {
            f.requestedLength = Double.parseDouble(args[1]);
            if (args.length > 5) {
                f.reference = args[5];
            }
            f.simulate(Double.parseDouble(args[2]), Double.parseDouble(args[3]), Double.parseDouble(args[4]));
        }
    }

    public static void printUsage() {
        System.err.println("Example usage: simulateSharedKmers <#trials> <kmer size> <seq length> <overlap length> <insertion> <del> <subst> [only one sequence error] [reference genome] [kmers to ignore]");
        System.err.println("Usage 2: simulateSharedKmers <#trials> <seq length> <insertion> <del> <subst> [reference genome]");
    }

    public KmerStatSimulator() {
        generator = new Random(seed);
    }

    private void loadSkipMers(String file) throws Exception {
        BufferedReader bf = Utils.getFile(file, null);
        String line = null;
        while ((line = bf.readLine()) != null) {
            String[] split = line.trim().split("\\s+");
            String mer = split[0].trim();
            int count = Integer.parseInt(split[1]);
            this.skipMers.put(mer, count);
        }
        bf.close();
    }

    private String buildRandomSequence(int length) {
        StringBuilder st = new StringBuilder();
        for (int i = 0; i < length; ++i) {
            st.append(this.getRandomBase(null));
        }
        return st.toString();
    }

    public double compareKmers(String first, String second) {
        int i;
        HashSet<String> firstSeqs = new HashSet<String>(first.length());
        HashSet<String> totalSeqs = new HashSet<String>(first.length() + second.length());
        HashSet<String> shared = new HashSet<String>(first.length());
        for (i = 0; i <= first.length() - this.kmer; ++i) {
            String fmer = first.substring(i, i + this.kmer);
            if (!this.skipMers.containsKey(fmer)) {
                firstSeqs.add(fmer);
            }
            totalSeqs.add(fmer);
        }
        for (i = 0; i <= second.length() - this.kmer; ++i) {
            String smer = second.substring(i, i + this.kmer);
            if (firstSeqs.contains(smer)) {
                shared.add(smer);
                continue;
            }
            totalSeqs.add(smer);
        }
        this.sharedCount = shared.size();
        return (double)shared.size() / (double)totalSeqs.size();
    }

    public double compareMinHash(String first, String second) {
        BottomSketch h1 = new BottomSketch(first, this.kmer, 1256, true);
        BottomSketch h2 = new BottomSketch(second, this.kmer, 1256, true);
        return h1.jaccard(h2);
    }

    public double compareMinHash2(String first, String second) throws ZeroNGramsFoundException {
        MinHashSketch h1 = new MinHashSketch(first, this.kmer, 1256, null, true, 1.0);
        MinHashSketch h2 = new MinHashSketch(second, this.kmer, 1256, null, true, 1.0);
        return h1.jaccard(h2);
    }

    private char getRandomBase(Character toExclude) {
        Character result = null;
        while (result == null) {
            double base = generator.nextDouble();
            result = base < 0.25 ? Character.valueOf('A') : (base < 0.5 ? Character.valueOf('C') : (base < 0.75 ? Character.valueOf('G') : Character.valueOf('T')));
            if (toExclude == null || !toExclude.equals(result)) continue;
            result = null;
        }
        return result.charValue();
    }

    private String getSequence(int firstLen, int firstPos, String sequence, double errorRate, StringBuilder profile, StringBuilder realErrorStr) {
        return this.getSequence(firstLen, firstPos, sequence, errorRate, profile, realErrorStr, 0.792, 0.122, 0.086, true);
    }

    private String getSequence(int seqLength, int firstPos, String sequence, double errorRate, StringBuilder profile, StringBuilder realErrorStr, double insertionRate, double deletionRate, double substitutionRate, boolean trimRight) {
        StringBuilder firstSeq = new StringBuilder();
        firstSeq.append(sequence.substring(firstPos, Math.min(sequence.length(), firstPos + 2 * seqLength)));
        if (firstSeq.length() < 2 * seqLength) {
            firstSeq.append(sequence.substring(0, Math.min(sequence.length(), 2 * seqLength - firstSeq.length())));
        }
        LinkedList<Character> modifiedSequence = new LinkedList<Character>();
        for (char a : firstSeq.toString().toCharArray()) {
            modifiedSequence.add(Character.valueOf(a));
        }
        int realError = 0;
        ListIterator<Character> iter = modifiedSequence.listIterator();
        while (iter.hasNext()) {
            char i = ((Character)iter.next()).charValue();
            if (!(generator.nextDouble() < errorRate)) continue;
            double errorType = generator.nextDouble();
            if (errorType < substitutionRate) {
                iter.set(Character.valueOf(this.getRandomBase(Character.valueOf(i))));
                ++realError;
                i = (char)(i + '\u0001');
                continue;
            }
            if (errorType < insertionRate + substitutionRate) {
                iter.previous();
                iter.add(Character.valueOf(this.getRandomBase(null)));
                ++realError;
                continue;
            }
            iter.remove();
            ++realError;
        }
        firstSeq = new StringBuilder();
        Iterator iterator = modifiedSequence.iterator();
        while (iterator.hasNext()) {
            char c = ((Character)iterator.next()).charValue();
            firstSeq.append(c);
        }
        realErrorStr.append((double)realError / (double)seqLength);
        if (trimRight) {
            return firstSeq.substring(0, seqLength).toString();
        }
        return firstSeq.substring(firstSeq.length() - seqLength, firstSeq.length()).toString();
    }

    private void outputStats(ArrayList<Double> values, PrintStream out) {
        double mean = 0.0;
        double variance = 0.0;
        int N = 0;
        for (double d : values) {
            ++N;
            mean += d;
        }
        mean /= (double)N;
        N = 0;
        for (double d : values) {
            ++N;
            variance += (d - mean) * (d - mean);
        }
        double stdev = Math.sqrt(variance /= (double)(N - 1));
        out.print(mean + "\t" + stdev);
    }

    public void simulate(double insertionRate, double delRate, double subRate) throws Exception {
        int i;
        double errorRate = insertionRate + delRate + subRate;
        double insertionPercentage = insertionRate / errorRate;
        double deletionPercentage = delRate / errorRate;
        double subPercentage = subRate / errorRate;
        if (errorRate < 0.0 || errorRate > 1.0) {
            System.err.println("Error rate must be between 0 and 1");
            System.exit(1);
        }
        System.err.println("Started...");
        String[] sequences = null;
        if (this.reference != null) {
            FastaData data = new FastaData(this.reference, 0L);
            data.enqueueFullFile();
            sequences = new String[data.getNumberProcessed()];
            int i2 = 0;
            while (!data.isEmpty()) {
                sequences[i2++] = data.dequeue().getSquenceString().toUpperCase().replace("N", "");
            }
        }
        System.err.println("Loaded reference");
        for (i = 0; i < this.totalTrials; ++i) {
            if (i % 100 == 0) {
                System.err.println("Done " + i + "/" + this.totalTrials);
            }
            int sequenceLength = (int)this.requestedLength;
            int firstPos = 0;
            String sequence = null;
            int seqID = 0;
            if (this.reference != null) {
                sequence = null;
                while (sequence == null || sequence.length() < 4 * sequenceLength) {
                    seqID = generator.nextInt(sequences.length);
                    sequence = sequences[seqID];
                }
                firstPos = generator.nextInt(sequence.length());
            } else {
                sequence = this.buildRandomSequence(sequenceLength * 4);
            }
            StringBuilder firstAdj = new StringBuilder();
            StringBuilder errors = new StringBuilder();
            String firstSeq = this.getSequence(sequenceLength, firstPos, sequence, errorRate, firstAdj, errors, insertionPercentage, deletionPercentage, subPercentage, false);
            if (this.kmer < 0) {
                System.out.println(">s" + i + " " + seqID + " " + (firstPos + sequenceLength));
                System.out.println(Utils.convertToFasta(firstSeq));
                continue;
            }
            int offset = (int)(this.requestedLength * 2.0 - (double)this.overlap);
            int secondPos = (firstPos + offset) % sequence.length();
            String secondSeq = this.getSequence(sequenceLength, secondPos, sequence, this.halfError ? 0.0 : errorRate, firstAdj, errors, this.halfError ? 0.0 : insertionPercentage, this.halfError ? 0.0 : deletionPercentage, this.halfError ? 0.0 : subPercentage, true);
            if (this.verbose) {
                System.err.println("Given seq " + firstPos + " of len " + sequence.length() + " and offset " + secondPos + " due to offset " + offset);
                System.err.println(">" + seqID + "_" + firstPos + "\n" + firstSeq);
                System.err.println(">" + seqID + "_" + secondPos + "\n" + secondSeq);
            }
            if (firstSeq.length() != secondSeq.length() || (double)firstSeq.length() != this.requestedLength) {
                System.err.println("Error wrong length first: " + firstSeq.length() + " second: " + secondSeq.length() + " requested " + this.requestedLength);
                System.exit(1);
            }
            this.sharedJaccard.add(this.compareKmers(firstSeq, secondSeq));
            this.sharedMinHash.add(this.compareMinHash(firstSeq, secondSeq));
            this.sharedMerCounts.add(this.sharedCount);
            if (this.reference != null) {
                sequence = null;
                int secondSeqID = 0;
                while (sequence == null || sequence.length() < 2 * sequenceLength) {
                    secondSeqID = generator.nextInt(sequences.length);
                    sequence = sequences[secondSeqID];
                }
                secondPos = generator.nextInt(sequence.length());
                while (seqID == secondSeqID && Utils.getRangeOverlap(firstPos, firstPos + sequenceLength, secondPos, secondPos + sequenceLength) > 0) {
                    secondPos = generator.nextInt(sequence.length());
                }
                secondSeq = this.getSequence(sequenceLength, secondPos, sequence, this.halfError ? 0.0 : errorRate, firstAdj, errors, this.halfError ? 0.0 : insertionPercentage, this.halfError ? 0.0 : deletionPercentage, this.halfError ? 0.0 : subPercentage, true);
            } else {
                secondPos = 0;
                secondSeq = this.buildRandomSequence(sequenceLength);
            }
            if (firstSeq.length() != secondSeq.length() || (double)firstSeq.length() != this.requestedLength) {
                System.err.println("Error wrong length " + firstSeq.length());
                System.exit(1);
            }
            this.randomJaccard.add(this.compareKmers(firstSeq, secondSeq));
            this.randomMinHash.add(this.compareMinHash(firstSeq, secondSeq));
            this.randomMerCounts.add(this.sharedCount);
        }
        if (this.randomJaccard.size() != this.randomMerCounts.size() || this.sharedJaccard.size() != this.sharedMerCounts.size() || this.sharedJaccard.size() != this.randomJaccard.size()) {
            System.err.println("Error trial number not consistent!");
        }
        if (this.sharedMerCounts.size() == 0) {
            return;
        }
        for (i = 0; i < this.totalTrials; ++i) {
            System.out.println(this.sharedMerCounts.get(i) + "\t" + this.sharedJaccard.get(i) + "\t" + this.sharedMinHash.get(i) + "\t" + BottomOverlapSketch.jaccardToIdentity(this.sharedMinHash.get(i), this.kmer) + "\t" + this.randomMerCounts.get(i) + "\t" + this.randomJaccard.get(i) + "\t" + this.randomMinHash.get(i));
        }
        System.out.print("Shared mer counts stats: ");
        this.outputStats(this.sharedMerCounts, System.out);
        System.out.println();
        System.out.print("Shared jaccard stats: ");
        this.outputStats(this.sharedJaccard, System.out);
        System.out.println();
        System.out.print("Shared MinHash jaccard stats: ");
        this.outputStats(this.sharedMinHash, System.out);
        System.out.println();
        System.out.print("Random mer counts stats: ");
        this.outputStats(this.randomMerCounts, System.out);
        System.out.println();
        System.out.print("Random jaccard stats: ");
        this.outputStats(this.randomJaccard, System.out);
        System.out.println();
        System.out.print("Random MinHash jaccard stats: ");
        this.outputStats(this.randomMinHash, System.out);
        System.out.println();
    }
}

