/* 
 * File:   main.cpp
 * Author: Hendrik
 *
 * Created on 11. Januar 2013, 13:37
 */

//#define ERRORHANDLER

#include <memory>
#include <iostream>
#include <sstream>
#include <iomanip>
#include <vector>
#include <iterator>
#include <fstream>

#include <cstdlib>
#include <stdexcept>
#ifdef ERRORHANDLER
#include <execinfo.h>
#endif

#include <boost/algorithm/string.hpp>
#include "../Source/cputime.h"

#include "../Source/DatastreamCoreset.hpp"
#include "../Source/EuclideanMetric.hpp"
#include "../Source/EuclideanNorm.hpp"
#include "../Source/FastCoreset.hpp"
#include "../Source/PKMedian.hpp"
#include "../Source/LloydMedian.hpp"
#include "../Source/LloydProbMedian.hpp"
#include "../Source/Weiszfeld.hpp"
#include "../Source/WeightedPoint.hpp"
#include "../Source/Point.hpp"
#include "../Source/ProbabilisticPoint.hpp"

using namespace std;

#ifdef ERRORHANDLER
void handler()
{
    void *trace_elems[20];
    int trace_elem_count(backtrace(trace_elems, 20));
    char **stack_syms(backtrace_symbols(trace_elems, trace_elem_count));
    for (int i = 0; i < trace_elem_count; ++i)
    {
        std::cout << stack_syms[i] << "\n";
    }
    free(stack_syms);

    exit(1);
}
#endif

void processStream(std::fstream & fs, std::function<bool(ProbabilisticPoint) > processProbPoint, std::string splitSeq, int numOfPointsToMerge, double prob)
{
    std::vector<WeightedPoint> pointBuffer;
    pointBuffer.reserve(numOfPointsToMerge);
    std::string line;
    int pos = 0;
    int dim = 0;
    while (std::getline(fs, line))
    {
        std::vector<std::string> stringcoords;
        boost::split(stringcoords, line, boost::is_any_of(splitSeq));

        std::vector<double> coords;
        coords.reserve(stringcoords.size());
        for (size_t i = 0; i < stringcoords.size(); ++i)
            coords.push_back(atof(stringcoords[i].c_str()));
        WeightedPoint p(coords, prob);

        if (dim == 0)
            dim = p.getDimension();

        if (p.getDimension() != dim)
            continue;
        else
            pointBuffer.push_back(p);

        ++pos;
        if (pos % numOfPointsToMerge == 0)
        {
            ProbabilisticPoint pp(pointBuffer);
            bool contin = processProbPoint(pp);
            if (!contin)
                return;
            pointBuffer.clear();
        }
    }
}

int main(int argc, char** argv)
{
#ifdef ERRORHANDLER
    std::set_terminate(handler);
#endif

    if (argc <= 4)
    {
        std::cout << "Usage: runOnline runOffline runFull [k [splitChar [fastCoresetSampleSize [numOfPointsToMerge [bucketSize]]]]] file" << std::endl;
        return 1;
    }

    // Algorithm switches
    bool runOnline = *argv[1] == '1';
    bool runOffline = *argv[2] == '1';
    bool runFull = *argv[3] == '1';
    // k argument
    int k = 2;
    if (argc > 4)
        k = atoi(argv[4]);
    // SplitChar argument
    char splitChar = ',';
    if (argc > 5)
        splitChar = *argv[5];
    if (splitChar == 'S')
        splitChar = ' ';
    std::stringstream splitStream;
    splitStream << splitChar;
    std::string splitSeq(splitStream.str());
    // FastCoresetSampleSize argument
    int fastCoresetSampleSize = 200 * k;
    if (argc > 6)
        fastCoresetSampleSize = atoi(argv[6]);
    // NumOfPointsToMerge  argument
    int numOfPointsToMerge = 10;
    if (argc > 7)
        numOfPointsToMerge = atoi(argv[7]);
    // BucketSize argument
    int bucketSize = 1000;
    if (argc > 8)
        bucketSize = atoi(argv[8]);
    // Input file
    std::fstream filestr(argv[argc - 1], std::fstream::in);
    // Primitive check
    if (!(runOnline || runOffline || runFull))
    {
        std::cout << "Nothing to do" << std::endl;
        return 1;
    }
    // Is online mode possible?
    bool const onlineMode = !runOffline && !runFull;

    LloydProbMedian lloydprob([]()
    {
        return new EuclideanMetric();
    }, []()
    {
        return new EuclideanNorm();
    });
    PKMedian pkmed([]()
    {
        return new EuclideanMetric();
    });
    FastCoreset fastCoreset([]()
    {
        return new EuclideanMetric();
    }, []()
    {
        return new EuclideanNorm();
    });
    fastCoreset.setK(k);
    fastCoreset.setAllSamplesSize(fastCoresetSampleSize);
    DatastreamCoreset dsCoreset(&fastCoreset, bucketSize);

    std::vector<ProbabilisticPoint> ppoints;
    double prob = 1 / double(numOfPointsToMerge);

    // Data dimension
    int dim = 0;
    std::function<bool(ProbabilisticPoint) > determineDimension = [&dim] (ProbabilisticPoint pp)
    {
        dim = pp[0].getDimension();
        return false;
    };
    filestr.clear();
    filestr.seekg(0);
    processStream(filestr, determineDimension, splitSeq, numOfPointsToMerge, prob);

    // Coreset construction
    int n = 0;
    double ds_onlineMode_time = 0;
    std::function<bool(ProbabilisticPoint) > processProbPoint;
    if (onlineMode)
    {
        processProbPoint = [&ds_onlineMode_time, &dsCoreset, &n] (ProbabilisticPoint pp)
        {
            double tmp_start_time = getCPUTime();
            dsCoreset << pp;
            double tmp_end_time = getCPUTime();
            ds_onlineMode_time += tmp_end_time - tmp_start_time;
            ++n;
            return true;
        };
    }
    else
    {
        processProbPoint = [&ppoints, &n] (ProbabilisticPoint pp)
        {
            ppoints.push_back(pp);
            ++n;
            return true;
        };
    }
    filestr.clear();
    filestr.seekg(0);
    processStream(filestr, processProbPoint, splitSeq, numOfPointsToMerge, prob);

    //std::cout << "FastCoreset Offline..." << std::endl;
    std::vector<ProbabilisticPoint> off_coreset;
    double off_start_time = getCPUTime();
    if (runOffline)
        fastCoreset.computeCoreset(ppoints.begin(), ppoints.end(), std::back_inserter(off_coreset), n);
    double off_end_time = getCPUTime();

    //std::cout << "FastCoreset Online..." << std::endl;
    std::vector<ProbabilisticPoint> ds_coreset;
    double ds_start_time = getCPUTime();
    if (runOnline && !onlineMode)
        for (size_t i = 0; i < n; ++i)
            dsCoreset << ppoints[i];
    ds_coreset = *dsCoreset.assemble();
    double ds_end_time = getCPUTime();

    //std::cout << "LloydProb offline..." << std::endl;
    std::vector<Point> off_centers;
    double off_lloyd_start_time = getCPUTime();
    if (runOffline)
        lloydprob.computeCenterSet(off_coreset.begin(), off_coreset.end(), std::back_inserter(off_centers), k, 10, off_coreset.size());
    double off_lloyd_end_time = getCPUTime();

    //std::cout << "LloydProb online..." << std::endl;
    std::vector<Point> ds_centers;
    double ds_lloyd_start_time = getCPUTime();
    if (runOnline)
        lloydprob.computeCenterSet(ds_coreset.begin(), ds_coreset.end(), std::back_inserter(ds_centers), k, 10, ds_coreset.size());
    double ds_lloyd_end_time = getCPUTime();

    //std::cout << "LloydProb full..." << std::endl;
    std::vector<Point> full_centers;
    double full_lloyd_start_time = getCPUTime();
    if (runFull)
        lloydprob.computeCenterSet(ppoints.begin(), ppoints.end(), std::back_inserter(full_centers), k, 10, ppoints.size());
    double full_lloyd_end_time = getCPUTime();

    std::cout << "MODE: " << (onlineMode ? "online" : "offline") << std::endl;
    std::cout << "k =   " << k << std::endl;
    std::cout << "Points per node =   " << numOfPointsToMerge << std::endl;
    if (runOnline || runOffline)
        std::cout << "Sample size fast coreset =   " << fastCoresetSampleSize << std::endl;
    if (runOnline)
        std::cout << "M&R bucket size = " << bucketSize << std::endl;

    std::cout << "DATA" << std::endl;
    std::cout << "Dimension                 : " << dim << std::endl;
    std::cout << "Data size                 : " << n << std::endl;
    if (runOffline)
        std::cout << "FastCoreset (offline) size: " << off_coreset.size() << std::endl;
    if (runOnline)
        std::cout << "FastCoreset (online)  size: " << ds_coreset.size() << std::endl;

    std::cout << "TIME" << std::endl;
    if (runFull)
    {
        std::cout << "Lloyd on all data    : " << full_lloyd_end_time - full_lloyd_start_time << std::endl;
    }
    if (runOffline)
    {
        std::cout << "FastCoreset (offline): " << off_end_time - off_start_time << std::endl;
        std::cout << "  plus Lloyd         : " << off_lloyd_end_time - off_lloyd_start_time << std::endl;
    }
    if (runOnline)
    {
        std::cout << "FastCoreset (online) : " << ds_end_time - ds_start_time << std::endl;
        std::cout << "  plus Lloyd         : " << ds_lloyd_end_time - ds_lloyd_start_time + ds_onlineMode_time << std::endl;
    }

    std::cout << "CENTERS" << std::endl;
    if (runFull)
        std::cout << "Lloyd on all data    : " << full_centers.size() << std::endl;
    if (runOffline)
        std::cout << "FastCoreset (offline): " << off_centers.size() << std::endl;
    if (runOnline)
        std::cout << "FastCoreset (online) : " << ds_centers.size() << std::endl;

    std::cout << "COSTS" << std::endl;
    if (onlineMode)
    {
        if (runOnline)
        {
            double ds_cost = 0;
            std::function<bool(ProbabilisticPoint) > costFunction = [&pkmed, &ds_centers, &ds_cost] (ProbabilisticPoint pp)
            {
                ds_cost += pkmed.weightedCost(pp, ds_centers.begin(), ds_centers.end());
                return true;
            };
            filestr.clear();
            filestr.seekg(0);
            processStream(filestr, costFunction, splitSeq, numOfPointsToMerge, prob);
            std::cout << "FastCoreset (online) : " << ds_cost << std::endl;
            std::cout << "  on coreset only    : " << pkmed.weightedCost(ds_coreset.begin(), ds_coreset.end(), ds_centers.begin(), ds_centers.end()) << std::endl;
        }
    }
    else
    {
        if (runFull)
            std::cout << "Lloyd on all data    : " << pkmed.weightedCost(ppoints.begin(), ppoints.end(), full_centers.begin(), full_centers.end()) << std::endl;
        if (runOffline)
        {
            std::cout << "FastCoreset (offline): " << pkmed.weightedCost(ppoints.begin(), ppoints.end(), off_centers.begin(), off_centers.end()) << std::endl;
            std::cout << "  on coreset only    : " << pkmed.weightedCost(off_coreset.begin(), off_coreset.end(), off_centers.begin(), off_centers.end()) << std::endl;
        }
        if (runOnline)
        {
            std::cout << "FastCoreset (online) : " << pkmed.weightedCost(ppoints.begin(), ppoints.end(), ds_centers.begin(), ds_centers.end()) << std::endl;
            std::cout << "  on coreset only    : " << pkmed.weightedCost(ds_coreset.begin(), ds_coreset.end(), ds_centers.begin(), ds_centers.end()) << std::endl;
        }
    }

    return 0;
}

