Statistics / Writing results¶

Statistics are functors that are called at the end of each iteration of the Bayesian optimizer. Their job is to:

write the results to files;
write the current state of the optimization;
write the data that are useful for your own analyses.

All the statistics are written in a directory called hostname_date_pid-number. For instance: wallepro-perso.loria.fr_2016-05-13_16_16_09_72226

Limbo provides a few classes for common uses (see Statistics (stats) for details):

ConsoleSummary: writes a summary to std::cout at each iteration of the algorithm
AggregatedObservations: records the value of each evaluation of the function (after aggregation) [filename aggregated_observations.dat]
BestAggregatedObservations: records the best value observed so far after each iteration [filename aggregated_observations.dat]
Observations: records the value of each evaluation of the function (before aggregation) [filename: observations.dat]
Samples: records the position in the search space of the evaluated points [filename: samples.dat]
BestObservations: records the best observation after each iteration? [filename best_observations.dat]
BestSamples: records the position in the search space of the best observation after each iteration [filename: best_samples.dat]

These statistics are for “advanced users”:

GPAcquisitions
GPKernelHParams
GPLikelihood
GPMeanHParams

The default statistics list is:

boost::fusion::vector<stat::Samples<Params>, stat::AggregatedObservations<Params>,
  stat::ConsoleSummary<Params>>

Writing your own statistics class¶

Limbo only provides generic statistics classes. However, it is often useful to add user-defined statistics classes that are specific to a particular experiment.

All the statistics functors follow the same template:

template <typename Params>
struct Samples : public limbo::stat::StatBase<Params> {
    template <typename BO, typename AggregatorFunction>
    void operator()(const BO& bo, const AggregatorFunction&)
    {
      // code
    }
};

In a few words, they take a BO object (instance of the Bayesian optimizer) and do what they want.

For instance, we could add a statistics class that writes the worst observation at each iteration. Here is how to write this functor:

template <typename Params>
struct WorstObservation : public stat::StatBase<Params> {
    template <typename BO, typename AggregatorFunction>
    void operator()(const BO& bo, const AggregatorFunction& afun)
    {
        // [optional] if statistics have been disabled or if there are no observations, we do not do anything
        if (!bo.stats_enabled() || bo.observations().empty())
            return;

        // [optional] we create a file to write / you can use your own file but remember that this method is called at each iteration (you need to create it in the constructor)
        this->_create_log_file(bo, "worst_observations.dat");

        // [optional] we add a header to the file to make it easier to read later
        if (bo.total_iterations() == 0)
            (*this->_log_file) << "#iteration worst_observation sample" << std::endl;

        // ----- search for the worst observation ----
        // 1. get the aggregated observations
        auto rewards = std::vector<double>(bo.observations().size());
        std::transform(bo.observations().begin(), bo.observations().end(), rewards.begin(), afun);
        // 2. search for the worst element
        auto min_e = std::min_element(rewards.begin(), rewards.end());
        auto min_obs = bo.observations()[std::distance(rewards.begin(), min_e)];
        auto min_sample = bo.samples()[std::distance(rewards.begin(), min_e)];

        // ----- write what we have found ------
        // the file is (*this->_log_file)
        (*this->_log_file) << bo.total_iterations() << " " << min_obs.transpose() << " " << min_sample.transpose() << std::endl;
    }
};

In order to configure the Bayesian optimizer to use our new statistics class, we first need to define a new statistics list which includes our new WorstObservation:

    // define a special list of statistics which include our new statistics class
    using stat_t = boost::fusion::vector<stat::ConsoleSummary<Params>,
        stat::Samples<Params>,
        stat::Observations<Params>,
        WorstObservation<Params>>;

Then, we use it to define the optimizer:

    bayes_opt::BOptimizer<Params, statsfun<stat_t>> boptimizer;

The full source code is available in src/tutorials/statistics.cpp and reproduced here:

#include <iostream>
#include <limbo/limbo.hpp>

using namespace limbo;

struct Params {
    struct bayes_opt_boptimizer : public defaults::bayes_opt_boptimizer {
    };

// depending on which internal optimizer we use, we need to import different parameters
#ifdef USE_NLOPT
    struct opt_nloptnograd : public defaults::opt_nloptnograd {
    };
#elif defined(USE_LIBCMAES)
    struct opt_cmaes : public defaults::opt_cmaes {
    };
#else
    struct opt_gridsearch : public defaults::opt_gridsearch {
    };
#endif

    // enable / disable the writing of the result files
    struct bayes_opt_bobase : public defaults::bayes_opt_bobase {
        BO_PARAM(int, stats_enabled, true);
    };

    // no noise
    struct kernel : public defaults::kernel {
        BO_PARAM(double, noise, 1e-10);
    };

    struct kernel_maternfivehalves : public defaults::kernel_maternfivehalves {
    };

    // we use 10 random samples to initialize the algorithm
    struct init_randomsampling {
        BO_PARAM(int, samples, 10);
    };

    // we stop after 40 iterations
    struct stop_maxiterations {
        BO_PARAM(int, iterations, 40);
    };

    // we use the default parameters for acqui_ucb
    struct acqui_ucb : public defaults::acqui_ucb {
    };
};

struct Eval {
    // number of input dimension (x.size())
    BO_PARAM(size_t, dim_in, 1);
    // number of dimenions of the result (res.size())
    BO_PARAM(size_t, dim_out, 1);

    // the function to be optimized
    Eigen::VectorXd operator()(const Eigen::VectorXd& x) const
    {
        double y = -((5 * x(0) - 2.5) * (5 * x(0) - 2.5)) + 5;
        // we return a 1-dimensional vector
        return tools::make_vector(y);
    }
};

template <typename Params>
struct WorstObservation : public stat::StatBase<Params> {
    template <typename BO, typename AggregatorFunction>
    void operator()(const BO& bo, const AggregatorFunction& afun)
    {
        // [optional] if statistics have been disabled or if there are no observations, we do not do anything
        if (!bo.stats_enabled() || bo.observations().empty())
            return;

        // [optional] we create a file to write / you can use your own file but remember that this method is called at each iteration (you need to create it in the constructor)
        this->_create_log_file(bo, "worst_observations.dat");

        // [optional] we add a header to the file to make it easier to read later
        if (bo.total_iterations() == 0)
            (*this->_log_file) << "#iteration worst_observation sample" << std::endl;

        // ----- search for the worst observation ----
        // 1. get the aggregated observations
        auto rewards = std::vector<double>(bo.observations().size());
        std::transform(bo.observations().begin(), bo.observations().end(), rewards.begin(), afun);
        // 2. search for the worst element
        auto min_e = std::min_element(rewards.begin(), rewards.end());
        auto min_obs = bo.observations()[std::distance(rewards.begin(), min_e)];
        auto min_sample = bo.samples()[std::distance(rewards.begin(), min_e)];

        // ----- write what we have found ------
        // the file is (*this->_log_file)
        (*this->_log_file) << bo.total_iterations() << " " << min_obs.transpose() << " " << min_sample.transpose() << std::endl;
    }
};

int main()
{
    // we use the default acquisition function / model / stat / etc.

    // define a special list of statistics which include our new statistics class
    using stat_t = boost::fusion::vector<stat::ConsoleSummary<Params>,
        stat::Samples<Params>,
        stat::Observations<Params>,
        WorstObservation<Params>>;

    /// remmeber to use the new statistics vector via statsfun<>!
    bayes_opt::BOptimizer<Params, statsfun<stat_t>> boptimizer;

    // run the evaluation
    boptimizer.optimize(Eval());

    // the best sample found
    std::cout << "Best sample: " << boptimizer.best_sample()(0) << " - Best observation: " << boptimizer.best_observation()(0) << std::endl;
    return 0;
}