Core-master/nanobench_8h_source.html

 //  __   _ _______ __   _  _____  ______  _______ __   _ _______ _     _

 //  | \  | |_____| | \  | |     | |_____] |______ | \  | |       |_____|

 //  |  \_| |     | |  \_| |_____| |_____] |______ |  \_| |_____  |     |

 //

 // Microbenchmark framework for C++11/14/17/20

 // https://github.com/martinus/nanobench

 //

 // Licensed under the MIT License <http://opensource.org/licenses/MIT>.

 // SPDX-License-Identifier: MIT

 // Copyright (c) 2019-2023 Martin Leitner-Ankerl <martin.ankerl@gmail.com>

 //

 // Permission is hereby granted, free of charge, to any person obtaining a copy

 // of this software and associated documentation files (the "Software"), to deal

 // in the Software without restriction, including without limitation the rights

 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell

 // copies of the Software, and to permit persons to whom the Software is

 // furnished to do so, subject to the following conditions:

 //

 // The above copyright notice and this permission notice shall be included in all

 // copies or substantial portions of the Software.

 //

 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE

 // SOFTWARE.


 #ifndef ANKERL_NANOBENCH_H_INCLUDED

 #define ANKERL_NANOBENCH_H_INCLUDED


 // see https://semver.org/

 #define ANKERL_NANOBENCH_VERSION_MAJOR 4  // incompatible API changes

 #define ANKERL_NANOBENCH_VERSION_MINOR 3  // backwards-compatible changes

 #define ANKERL_NANOBENCH_VERSION_PATCH 11 // backwards-compatible bug fixes


 // public facing api - as minimal as possible


 #include <chrono>        // high_resolution_clock

 #include <cstring>       // memcpy

 #include <iosfwd>        // for std::ostream* custom output target in Config

 #include <string>        // all names

 #include <unordered_map> // holds context information of results

 #include <vector>        // holds all results


 #define ANKERL_NANOBENCH(x) ANKERL_NANOBENCH_PRIVATE_##x()


 #define ANKERL_NANOBENCH_PRIVATE_CXX() __cplusplus

 #define ANKERL_NANOBENCH_PRIVATE_CXX98() 199711L

 #define ANKERL_NANOBENCH_PRIVATE_CXX11() 201103L

 #define ANKERL_NANOBENCH_PRIVATE_CXX14() 201402L

 #define ANKERL_NANOBENCH_PRIVATE_CXX17() 201703L


 #if ANKERL_NANOBENCH(CXX) >= ANKERL_NANOBENCH(CXX17)

 #    define ANKERL_NANOBENCH_PRIVATE_NODISCARD() [[nodiscard]]

 #else

 #    define ANKERL_NANOBENCH_PRIVATE_NODISCARD()

 #endif


 #if defined(__clang__)

 #    define ANKERL_NANOBENCH_PRIVATE_IGNORE_PADDED_PUSH() \

         _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wpadded\"")

 #    define ANKERL_NANOBENCH_PRIVATE_IGNORE_PADDED_POP() _Pragma("clang diagnostic pop")

 #else

 #    define ANKERL_NANOBENCH_PRIVATE_IGNORE_PADDED_PUSH()

 #    define ANKERL_NANOBENCH_PRIVATE_IGNORE_PADDED_POP()

 #endif


 #if defined(__GNUC__)

 #    define ANKERL_NANOBENCH_PRIVATE_IGNORE_EFFCPP_PUSH() _Pragma("GCC diagnostic push") _Pragma("GCC diagnostic ignored \"-Weffc++\"")

 #    define ANKERL_NANOBENCH_PRIVATE_IGNORE_EFFCPP_POP() _Pragma("GCC diagnostic pop")

 #else

 #    define ANKERL_NANOBENCH_PRIVATE_IGNORE_EFFCPP_PUSH()

 #    define ANKERL_NANOBENCH_PRIVATE_IGNORE_EFFCPP_POP()

 #endif


 #if defined(ANKERL_NANOBENCH_LOG_ENABLED)

 #    include <iostream>

 #    define ANKERL_NANOBENCH_LOG(x)                                                 \

         do {                                                                        \

             std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << x << std::endl; \

         } while (0)

 #else

 #    define ANKERL_NANOBENCH_LOG(x) \

         do {                        \

         } while (0)

 #endif


 #define ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS() 0

 #if defined(__linux__) && !defined(ANKERL_NANOBENCH_DISABLE_PERF_COUNTERS)

 #    include <linux/version.h>

 #    if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 3, 0)

 // PERF_COUNT_HW_REF_CPU_CYCLES only available since kernel 3.3

 // PERF_FLAG_FD_CLOEXEC since kernel 3.14

 #        undef ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS

 #        define ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS() 1

 #    endif

 #endif


 #if defined(__clang__)

 #    define ANKERL_NANOBENCH_NO_SANITIZE(...) __attribute__((no_sanitize(__VA_ARGS__)))

 #else

 #    define ANKERL_NANOBENCH_NO_SANITIZE(...)

 #endif


 #if defined(_MSC_VER)

 #    define ANKERL_NANOBENCH_PRIVATE_NOINLINE() __declspec(noinline)

 #else

 #    define ANKERL_NANOBENCH_PRIVATE_NOINLINE() __attribute__((noinline))

 #endif


 // workaround missing "is_trivially_copyable" in g++ < 5.0

 // See https://stackoverflow.com/a/31798726/48181

 #if defined(__GNUC__) && __GNUC__ < 5

 #    define ANKERL_NANOBENCH_IS_TRIVIALLY_COPYABLE(...) __has_trivial_copy(__VA_ARGS__)

 #else

 #    define ANKERL_NANOBENCH_IS_TRIVIALLY_COPYABLE(...) std::is_trivially_copyable<__VA_ARGS__>::value

 #endif


 // noexcept may be missing for std::string.

 // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58265

 #define ANKERL_NANOBENCH_PRIVATE_NOEXCEPT_STRING_MOVE() std::is_nothrow_move_assignable<std::string>::value


 // declarations ///////////////////////////////////////////////////////////////////////////////////


 namespace ankerl {

 namespace nanobench {


 using Clock = std::conditional<std::chrono::high_resolution_clock::is_steady, std::chrono::high_resolution_clock,

                                std::chrono::steady_clock>::type;

 class Bench;

 struct Config;

 class Result;

 class Rng;

 class BigO;


 void render(char const* mustacheTemplate, Bench const& bench, std::ostream& out);

 void render(std::string const& mustacheTemplate, Bench const& bench, std::ostream& out);


 void render(char const* mustacheTemplate, std::vector<Result> const& results, std::ostream& out);

 void render(std::string const& mustacheTemplate, std::vector<Result> const& results, std::ostream& out);


 // Contains mustache-like templates

 namespace templates {


 char const* csv() noexcept;


 char const* htmlBoxplot() noexcept;


 char const* pyperf() noexcept;


 char const* json() noexcept;


 } // namespace templates


 namespace detail {


 template <typename T>

 struct PerfCountSet;


 class IterationLogic;

 class PerformanceCounters;


 #if ANKERL_NANOBENCH(PERF_COUNTERS)

 class LinuxPerformanceCounters;

 #endif


 } // namespace detail

 } // namespace nanobench

 } // namespace ankerl


 // definitions ////////////////////////////////////////////////////////////////////////////////////


 namespace ankerl {

 namespace nanobench {

 namespace detail {


 template <typename T>

 struct PerfCountSet {

     T pageFaults{};

     T cpuCycles{};

     T contextSwitches{};

     T instructions{};

     T branchInstructions{};

     T branchMisses{};

 };


 } // namespace detail


 ANKERL_NANOBENCH(IGNORE_PADDED_PUSH)

 struct Config {

     // actual benchmark config

     std::string mBenchmarkTitle = "benchmark";                               // NOLINT(misc-non-private-member-variables-in-classes)

     std::string mBenchmarkName = "noname";                                   // NOLINT(misc-non-private-member-variables-in-classes)

     std::string mUnit = "op";                                                // NOLINT(misc-non-private-member-variables-in-classes)

     double mBatch = 1.0;                                                     // NOLINT(misc-non-private-member-variables-in-classes)

     double mComplexityN = -1.0;                                              // NOLINT(misc-non-private-member-variables-in-classes)

     size_t mNumEpochs = 11;                                                  // NOLINT(misc-non-private-member-variables-in-classes)

     size_t mClockResolutionMultiple = static_cast<size_t>(1000);             // NOLINT(misc-non-private-member-variables-in-classes)

     std::chrono::nanoseconds mMaxEpochTime = std::chrono::milliseconds(100); // NOLINT(misc-non-private-member-variables-in-classes)

     std::chrono::nanoseconds mMinEpochTime = std::chrono::milliseconds(1);   // NOLINT(misc-non-private-member-variables-in-classes)

     uint64_t mMinEpochIterations{1};                                         // NOLINT(misc-non-private-member-variables-in-classes)

     // If not 0, run *exactly* these number of iterations per epoch.

     uint64_t mEpochIterations{0};                                          // NOLINT(misc-non-private-member-variables-in-classes)

     uint64_t mWarmup = 0;                                                  // NOLINT(misc-non-private-member-variables-in-classes)

     std::ostream* mOut = nullptr;                                          // NOLINT(misc-non-private-member-variables-in-classes)

     std::chrono::duration<double> mTimeUnit = std::chrono::nanoseconds{1}; // NOLINT(misc-non-private-member-variables-in-classes)

     std::string mTimeUnitName = "ns";                                      // NOLINT(misc-non-private-member-variables-in-classes)

     bool mShowPerformanceCounters = true;                                  // NOLINT(misc-non-private-member-variables-in-classes)

     bool mIsRelative = false;                                              // NOLINT(misc-non-private-member-variables-in-classes)

     std::unordered_map<std::string, std::string> mContext{};               // NOLINT(misc-non-private-member-variables-in-classes)


     Config();

     ~Config();

     Config& operator=(Config const& other);

     Config& operator=(Config&& other) noexcept(ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE));

     Config(Config const& other);

     Config(Config&& other) noexcept;

 };

 ANKERL_NANOBENCH(IGNORE_PADDED_POP)


 // Result returned after a benchmark has finished. Can be used as a baseline for relative().

 ANKERL_NANOBENCH(IGNORE_PADDED_PUSH)

 class Result {

 public:

     enum class Measure : size_t {

         elapsed,

         iterations,

         pagefaults,

         cpucycles,

         contextswitches,

         instructions,

         branchinstructions,

         branchmisses,

         _size

     };


     explicit Result(Config benchmarkConfig);


     ~Result();

     Result& operator=(Result const& other);

     Result& operator=(Result&& other) noexcept(ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE));

     Result(Result const& other);

     Result(Result&& other) noexcept;


     // adds new measurement results

     // all values are scaled by iters (except iters...)

     void add(Clock::duration totalElapsed, uint64_t iters, detail::PerformanceCounters const& pc);


     ANKERL_NANOBENCH(NODISCARD) Config const& config() const noexcept;


     ANKERL_NANOBENCH(NODISCARD) double median(Measure m) const;

     ANKERL_NANOBENCH(NODISCARD) double medianAbsolutePercentError(Measure m) const;

     ANKERL_NANOBENCH(NODISCARD) double average(Measure m) const;

     ANKERL_NANOBENCH(NODISCARD) double sum(Measure m) const noexcept;

     ANKERL_NANOBENCH(NODISCARD) double sumProduct(Measure m1, Measure m2) const noexcept;

     ANKERL_NANOBENCH(NODISCARD) double minimum(Measure m) const noexcept;

     ANKERL_NANOBENCH(NODISCARD) double maximum(Measure m) const noexcept;

     ANKERL_NANOBENCH(NODISCARD) std::string const& context(char const* variableName) const;

     ANKERL_NANOBENCH(NODISCARD) std::string const& context(std::string const& variableName) const;


     ANKERL_NANOBENCH(NODISCARD) bool has(Measure m) const noexcept;

     ANKERL_NANOBENCH(NODISCARD) double get(size_t idx, Measure m) const;

     ANKERL_NANOBENCH(NODISCARD) bool empty() const noexcept;

     ANKERL_NANOBENCH(NODISCARD) size_t size() const noexcept;


     // Finds string, if not found, returns _size.

     static Measure fromString(std::string const& str);


 private:

     Config mConfig{};

     std::vector<std::vector<double>> mNameToMeasurements{};

 };

 ANKERL_NANOBENCH(IGNORE_PADDED_POP)


 class Rng final {

 public:

     using result_type = uint64_t;


     static constexpr uint64_t(min)();

     static constexpr uint64_t(max)();


     Rng(Rng const&) = delete;


     Rng& operator=(Rng const&) = delete;


     // moving is ok

     Rng(Rng&&) noexcept = default;

     Rng& operator=(Rng&&) noexcept = default;

     ~Rng() noexcept = default;


     Rng();


     explicit Rng(uint64_t seed) noexcept;

     Rng(uint64_t x, uint64_t y) noexcept;

     explicit Rng(std::vector<uint64_t> const& data);


     ANKERL_NANOBENCH(NODISCARD) Rng copy() const noexcept;


     inline uint64_t operator()() noexcept;


     // This is slightly biased. See


     inline uint32_t bounded(uint32_t range) noexcept;


     // random double in range [0, 1(

     // see http://prng.di.unimi.it/


     inline double uniform01() noexcept;


     template <typename Container>

     void shuffle(Container& container) noexcept;


     ANKERL_NANOBENCH(NODISCARD) std::vector<uint64_t> state() const;


 private:

     static constexpr uint64_t rotl(uint64_t x, unsigned k) noexcept;


     uint64_t mX;

     uint64_t mY;

 };


 ANKERL_NANOBENCH(IGNORE_PADDED_PUSH)

 class Bench {

 public:

     Bench();


     Bench(Bench&& other) noexcept;

     Bench& operator=(Bench&& other) noexcept(ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE));

     Bench(Bench const& other);

     Bench& operator=(Bench const& other);

     ~Bench() noexcept;


     template <typename Op>

     ANKERL_NANOBENCH(NOINLINE)

     Bench& run(char const* benchmarkName, Op&& op);


     template <typename Op>

     ANKERL_NANOBENCH(NOINLINE)

     Bench& run(std::string const& benchmarkName, Op&& op);


     template <typename Op>

     ANKERL_NANOBENCH(NOINLINE)

     Bench& run(Op&& op);


     Bench& title(char const* benchmarkTitle);

     Bench& title(std::string const& benchmarkTitle);


     ANKERL_NANOBENCH(NODISCARD) std::string const& title() const noexcept;


     Bench& name(char const* benchmarkName);

     Bench& name(std::string const& benchmarkName);

     ANKERL_NANOBENCH(NODISCARD) std::string const& name() const noexcept;


     Bench& context(char const* variableName, char const* variableValue);

     Bench& context(std::string const& variableName, std::string const& variableValue);


     Bench& clearContext();


     template <typename T>

     Bench& batch(T b) noexcept;

     ANKERL_NANOBENCH(NODISCARD) double batch() const noexcept;


     Bench& unit(char const* unit);

     Bench& unit(std::string const& unit);

     ANKERL_NANOBENCH(NODISCARD) std::string const& unit() const noexcept;


     Bench& timeUnit(std::chrono::duration<double> const& tu, std::string const& tuName);

     ANKERL_NANOBENCH(NODISCARD) std::string const& timeUnitName() const noexcept;

     ANKERL_NANOBENCH(NODISCARD) std::chrono::duration<double> const& timeUnit() const noexcept;


     Bench& output(std::ostream* outstream) noexcept;

     ANKERL_NANOBENCH(NODISCARD) std::ostream* output() const noexcept;


     Bench& clockResolutionMultiple(size_t multiple) noexcept;

     ANKERL_NANOBENCH(NODISCARD) size_t clockResolutionMultiple() const noexcept;


     Bench& epochs(size_t numEpochs) noexcept;

     ANKERL_NANOBENCH(NODISCARD) size_t epochs() const noexcept;


     Bench& maxEpochTime(std::chrono::nanoseconds t) noexcept;

     ANKERL_NANOBENCH(NODISCARD) std::chrono::nanoseconds maxEpochTime() const noexcept;


     Bench& minEpochTime(std::chrono::nanoseconds t) noexcept;

     ANKERL_NANOBENCH(NODISCARD) std::chrono::nanoseconds minEpochTime() const noexcept;


     Bench& minEpochIterations(uint64_t numIters) noexcept;

     ANKERL_NANOBENCH(NODISCARD) uint64_t minEpochIterations() const noexcept;


     Bench& epochIterations(uint64_t numIters) noexcept;

     ANKERL_NANOBENCH(NODISCARD) uint64_t epochIterations() const noexcept;


     Bench& warmup(uint64_t numWarmupIters) noexcept;

     ANKERL_NANOBENCH(NODISCARD) uint64_t warmup() const noexcept;


     Bench& relative(bool isRelativeEnabled) noexcept;

     ANKERL_NANOBENCH(NODISCARD) bool relative() const noexcept;


     Bench& performanceCounters(bool showPerformanceCounters) noexcept;

     ANKERL_NANOBENCH(NODISCARD) bool performanceCounters() const noexcept;


     ANKERL_NANOBENCH(NODISCARD) std::vector<Result> const& results() const noexcept;


     template <typename Arg>

     Bench& doNotOptimizeAway(Arg&& arg);


     template <typename T>

     Bench& complexityN(T n) noexcept;

     ANKERL_NANOBENCH(NODISCARD) double complexityN() const noexcept;


     std::vector<BigO> complexityBigO() const;


     template <typename Op>

     BigO complexityBigO(char const* name, Op op) const;


     template <typename Op>

     BigO complexityBigO(std::string const& name, Op op) const;


     Bench& render(char const* templateContent, std::ostream& os);

     Bench& render(std::string const& templateContent, std::ostream& os);


     Bench& config(Config const& benchmarkConfig);

     ANKERL_NANOBENCH(NODISCARD) Config const& config() const noexcept;


 private:

     Config mConfig{};

     std::vector<Result> mResults{};

 };

 ANKERL_NANOBENCH(IGNORE_PADDED_POP)


 template <typename Arg>

 void doNotOptimizeAway(Arg&& arg);


 namespace detail {


 #if defined(_MSC_VER)

 void doNotOptimizeAwaySink(void const*);


 template <typename T>

 void doNotOptimizeAway(T const& val);


 #else


 // These assembly magic is directly from what Google Benchmark is doing. I have previously used what facebook's folly was doing, but

 // this seemed to have compilation problems in some cases. Google Benchmark seemed to be the most well tested anyways.

 // see https://github.com/google/benchmark/blob/v1.7.1/include/benchmark/benchmark.h#L443-L446

 template <typename T>

 void doNotOptimizeAway(T const& val) {

     // NOLINTNEXTLINE(hicpp-no-assembler)

     asm volatile("" : : "r,m"(val) : "memory");

 }


 template <typename T>

 void doNotOptimizeAway(T& val) {

 #    if defined(__clang__)

     // NOLINTNEXTLINE(hicpp-no-assembler)

     asm volatile("" : "+r,m"(val) : : "memory");

 #    else

     // NOLINTNEXTLINE(hicpp-no-assembler)

     asm volatile("" : "+m,r"(val) : : "memory");

 #    endif

 }

 #endif


 // internally used, but visible because run() is templated.

 // Not movable/copy-able, so we simply use a pointer instead of unique_ptr. This saves us from

 // having to include <memory>, and the template instantiation overhead of unique_ptr which is unfortunately quite significant.

 ANKERL_NANOBENCH(IGNORE_EFFCPP_PUSH)

 class IterationLogic {

 public:

     explicit IterationLogic(Bench const& bench);

     IterationLogic(IterationLogic&&) = delete;

     IterationLogic& operator=(IterationLogic&&) = delete;

     IterationLogic(IterationLogic const&) = delete;

     IterationLogic& operator=(IterationLogic const&) = delete;

     ~IterationLogic();


     ANKERL_NANOBENCH(NODISCARD) uint64_t numIters() const noexcept;

     void add(std::chrono::nanoseconds elapsed, PerformanceCounters const& pc) noexcept;

     void moveResultTo(std::vector<Result>& results) noexcept;


 private:

     struct Impl;

     Impl* mPimpl;

 };

 ANKERL_NANOBENCH(IGNORE_EFFCPP_POP)


 ANKERL_NANOBENCH(IGNORE_PADDED_PUSH)

 class PerformanceCounters {

 public:

     PerformanceCounters(PerformanceCounters const&) = delete;

     PerformanceCounters(PerformanceCounters&&) = delete;

     PerformanceCounters& operator=(PerformanceCounters const&) = delete;

     PerformanceCounters& operator=(PerformanceCounters&&) = delete;


     PerformanceCounters();

     ~PerformanceCounters();


     void beginMeasure();

     void endMeasure();

     void updateResults(uint64_t numIters);


     ANKERL_NANOBENCH(NODISCARD) PerfCountSet<uint64_t> const& val() const noexcept;

     ANKERL_NANOBENCH(NODISCARD) PerfCountSet<bool> const& has() const noexcept;


 private:

 #if ANKERL_NANOBENCH(PERF_COUNTERS)

     LinuxPerformanceCounters* mPc = nullptr;

 #endif

     PerfCountSet<uint64_t> mVal{};

     PerfCountSet<bool> mHas{};

 };

 ANKERL_NANOBENCH(IGNORE_PADDED_POP)


 // Gets the singleton

 PerformanceCounters& performanceCounters();


 } // namespace detail


 class BigO {

 public:

     using RangeMeasure = std::vector<std::pair<double, double>>;


     template <typename Op>

     static RangeMeasure mapRangeMeasure(RangeMeasure data, Op op) {

         for (auto& rangeMeasure : data) {

             rangeMeasure.first = op(rangeMeasure.first);

         }

         return data;

     }


     static RangeMeasure collectRangeMeasure(std::vector<Result> const& results);


     template <typename Op>

     BigO(char const* bigOName, RangeMeasure const& rangeMeasure, Op rangeToN)

         : BigO(bigOName, mapRangeMeasure(rangeMeasure, rangeToN)) {}


     template <typename Op>

     BigO(std::string bigOName, RangeMeasure const& rangeMeasure, Op rangeToN)

         : BigO(std::move(bigOName), mapRangeMeasure(rangeMeasure, rangeToN)) {}


     BigO(char const* bigOName, RangeMeasure const& scaledRangeMeasure);

     BigO(std::string bigOName, RangeMeasure const& scaledRangeMeasure);

     ANKERL_NANOBENCH(NODISCARD) std::string const& name() const noexcept;

     ANKERL_NANOBENCH(NODISCARD) double constant() const noexcept;

     ANKERL_NANOBENCH(NODISCARD) double normalizedRootMeanSquare() const noexcept;

     ANKERL_NANOBENCH(NODISCARD) bool operator<(BigO const& other) const noexcept;


 private:

     std::string mName{};

     double mConstant{};

     double mNormalizedRootMeanSquare{};

 };

 std::ostream& operator<<(std::ostream& os, BigO const& bigO);

 std::ostream& operator<<(std::ostream& os, std::vector<ankerl::nanobench::BigO> const& bigOs);


 } // namespace nanobench

 } // namespace ankerl


 // implementation /////////////////////////////////////////////////////////////////////////////////


 namespace ankerl {

 namespace nanobench {


 constexpr uint64_t(Rng::min)() {

     return 0;

 }


 constexpr uint64_t(Rng::max)() {

     return (std::numeric_limits<uint64_t>::max)();

 }


 ANKERL_NANOBENCH_NO_SANITIZE("integer", "undefined")

 uint64_t Rng::operator()() noexcept {

     auto x = mX;


     mX = UINT64_C(15241094284759029579) * mY;

     mY = rotl(mY - x, 27);


     return x;

 }


 ANKERL_NANOBENCH_NO_SANITIZE("integer", "undefined")

 uint32_t Rng::bounded(uint32_t range) noexcept {

     uint64_t const r32 = static_cast<uint32_t>(operator()());

     auto multiresult = r32 * range;

     return static_cast<uint32_t>(multiresult >> 32U);

 }


 double Rng::uniform01() noexcept {

     auto i = (UINT64_C(0x3ff) << 52U) | (operator()() >> 12U);

     // can't use union in c++ here for type puning, it's undefined behavior.

     // std::memcpy is optimized anyways.

     double d{};

     std::memcpy(&d, &i, sizeof(double));

     return d - 1.0;

 }


 template <typename Container>

 void Rng::shuffle(Container& container) noexcept {

     auto i = container.size();

     while (i > 1U) {

         using std::swap;

         auto n = operator()();

         // using decltype(i) instead of size_t to be compatible to containers with 32bit index (see #80)

         auto b1 = static_cast<decltype(i)>((static_cast<uint32_t>(n) * static_cast<uint64_t>(i)) >> 32U);

         swap(container[--i], container[b1]);


         auto b2 = static_cast<decltype(i)>(((n >> 32U) * static_cast<uint64_t>(i)) >> 32U);

         swap(container[--i], container[b2]);

     }

 }


 ANKERL_NANOBENCH_NO_SANITIZE("integer", "undefined")

 constexpr uint64_t Rng::rotl(uint64_t x, unsigned k) noexcept {

     return (x << k) | (x >> (64U - k));

 }


 template <typename Op>

 ANKERL_NANOBENCH_NO_SANITIZE("integer")

 Bench& Bench::run(Op&& op) {

     // It is important that this method is kept short so the compiler can do better optimizations/ inlining of op()

     detail::IterationLogic iterationLogic(*this);

     auto& pc = detail::performanceCounters();


     while (auto n = iterationLogic.numIters()) {

         pc.beginMeasure();

         Clock::time_point const before = Clock::now();

         while (n-- > 0) {

             op();

         }

         Clock::time_point const after = Clock::now();

         pc.endMeasure();

         pc.updateResults(iterationLogic.numIters());

         iterationLogic.add(after - before, pc);

     }

     iterationLogic.moveResultTo(mResults);

     return *this;

 }


 // Performs all evaluations.

 template <typename Op>

 Bench& Bench::run(char const* benchmarkName, Op&& op) {

     name(benchmarkName);

     return run(std::forward<Op>(op));

 }


 template <typename Op>

 Bench& Bench::run(std::string const& benchmarkName, Op&& op) {

     name(benchmarkName);

     return run(std::forward<Op>(op));

 }


 template <typename Op>

 BigO Bench::complexityBigO(char const* benchmarkName, Op op) const {

     return BigO(benchmarkName, BigO::collectRangeMeasure(mResults), op);

 }


 template <typename Op>

 BigO Bench::complexityBigO(std::string const& benchmarkName, Op op) const {

     return BigO(benchmarkName, BigO::collectRangeMeasure(mResults), op);

 }


 // Set the batch size, e.g. number of processed bytes, or some other metric for the size of the processed data in each iteration.

 // Any argument is cast to double.

 template <typename T>

 Bench& Bench::batch(T b) noexcept {

     mConfig.mBatch = static_cast<double>(b);

     return *this;

 }


 // Sets the computation complexity of the next run. Any argument is cast to double.

 template <typename T>

 Bench& Bench::complexityN(T n) noexcept {

     mConfig.mComplexityN = static_cast<double>(n);

     return *this;

 }


 // Convenience: makes sure none of the given arguments are optimized away by the compiler.

 template <typename Arg>

 Bench& Bench::doNotOptimizeAway(Arg&& arg) {

     detail::doNotOptimizeAway(std::forward<Arg>(arg));

     return *this;

 }


 // Makes sure none of the given arguments are optimized away by the compiler.

 template <typename Arg>

 void doNotOptimizeAway(Arg&& arg) {

     detail::doNotOptimizeAway(std::forward<Arg>(arg));

 }


 namespace detail {


 #if defined(_MSC_VER)

 template <typename T>

 void doNotOptimizeAway(T const& val) {

     doNotOptimizeAwaySink(&val);

 }


 #endif


 } // namespace detail

 } // namespace nanobench

 } // namespace ankerl


 #if defined(ANKERL_NANOBENCH_IMPLEMENT)


 // implementation part - only visible in .cpp


 #    include <algorithm> // sort, reverse

 #    include <atomic>    // compare_exchange_strong in loop overhead

 #    include <cstdlib>   // getenv

 #    include <cstring>   // strstr, strncmp

 #    include <fstream>   // ifstream to parse proc files

 #    include <iomanip>   // setw, setprecision

 #    include <iostream>  // cout

 #    include <numeric>   // accumulate

 #    include <random>    // random_device

 #    include <sstream>   // to_s in Number

 #    include <stdexcept> // throw for rendering templates

 #    include <tuple>     // std::tie

 #    if defined(__linux__)

 #        include <unistd.h> //sysconf

 #    endif

 #    if ANKERL_NANOBENCH(PERF_COUNTERS)

 #        include <map> // map


 #        include <linux/perf_event.h>

 #        include <sys/ioctl.h>

 #        include <sys/syscall.h>

 #    endif


 // declarations ///////////////////////////////////////////////////////////////////////////////////


 namespace ankerl {

 namespace nanobench {


 // helper stuff that is only intended to be used internally

 namespace detail {


 struct TableInfo;


 // formatting utilities

 namespace fmt {


 class NumSep;

 class StreamStateRestorer;

 class Number;

 class MarkDownColumn;

 class MarkDownCode;


 } // namespace fmt

 } // namespace detail

 } // namespace nanobench

 } // namespace ankerl


 // definitions ////////////////////////////////////////////////////////////////////////////////////


 namespace ankerl {

 namespace nanobench {


 uint64_t splitMix64(uint64_t& state) noexcept;


 namespace detail {


 // helpers to get double values

 template <typename T>

 inline double d(T t) noexcept {

     return static_cast<double>(t);

 }

 inline double d(Clock::duration duration) noexcept {

     return std::chrono::duration_cast<std::chrono::duration<double>>(duration).count();

 }


 // Calculates clock resolution once, and remembers the result

 inline Clock::duration clockResolution() noexcept;


 } // namespace detail


 namespace templates {


 char const* csv() noexcept {

     return R"DELIM("title";"name";"unit";"batch";"elapsed";"error %";"instructions";"branches";"branch misses";"total"

 {{#result}}"{{title}}";"{{name}}";"{{unit}}";{{batch}};{{median(elapsed)}};{{medianAbsolutePercentError(elapsed)}};{{median(instructions)}};{{median(branchinstructions)}};{{median(branchmisses)}};{{sumProduct(iterations, elapsed)}}

 {{/result}})DELIM";

 }


 char const* htmlBoxplot() noexcept {

     return R"DELIM(<html>


 <head>

     <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>

 </head>


 <body>

     <div id="myDiv"></div>

     <script>

         var data = [

             {{#result}}{

                 name: '{{name}}',

                 y: [{{#measurement}}{{elapsed}}{{^-last}}, {{/last}}{{/measurement}}],

             },

             {{/result}}

         ];

         var title = '{{title}}';


         data = data.map(a => Object.assign(a, { boxpoints: 'all', pointpos: 0, type: 'box' }));

         var layout = { title: { text: title }, showlegend: false, yaxis: { title: 'time per unit', rangemode: 'tozero', autorange: true } }; Plotly.newPlot('myDiv', data, layout, {responsive: true});

     </script>

 </body>


 </html>)DELIM";

 }


 char const* pyperf() noexcept {

     return R"DELIM({

     "benchmarks": [

         {

             "runs": [

                 {

                     "values": [

 {{#measurement}}                        {{elapsed}}{{^-last}},

 {{/last}}{{/measurement}}

                     ]

                 }

             ]

         }

     ],

     "metadata": {

         "loops": {{sum(iterations)}},

         "inner_loops": {{batch}},

         "name": "{{title}}",

         "unit": "second"

     },

     "version": "1.0"

 })DELIM";

 }


 char const* json() noexcept {

     return R"DELIM({

     "results": [

 {{#result}}        {

             "title": "{{title}}",

             "name": "{{name}}",

             "unit": "{{unit}}",

             "batch": {{batch}},

             "complexityN": {{complexityN}},

             "epochs": {{epochs}},

             "clockResolution": {{clockResolution}},

             "clockResolutionMultiple": {{clockResolutionMultiple}},

             "maxEpochTime": {{maxEpochTime}},

             "minEpochTime": {{minEpochTime}},

             "minEpochIterations": {{minEpochIterations}},

             "epochIterations": {{epochIterations}},

             "warmup": {{warmup}},

             "relative": {{relative}},

             "median(elapsed)": {{median(elapsed)}},

             "medianAbsolutePercentError(elapsed)": {{medianAbsolutePercentError(elapsed)}},

             "median(instructions)": {{median(instructions)}},

             "medianAbsolutePercentError(instructions)": {{medianAbsolutePercentError(instructions)}},

             "median(cpucycles)": {{median(cpucycles)}},

             "median(contextswitches)": {{median(contextswitches)}},

             "median(pagefaults)": {{median(pagefaults)}},

             "median(branchinstructions)": {{median(branchinstructions)}},

             "median(branchmisses)": {{median(branchmisses)}},

             "totalTime": {{sumProduct(iterations, elapsed)}},

             "measurements": [

 {{#measurement}}                {

                     "iterations": {{iterations}},

                     "elapsed": {{elapsed}},

                     "pagefaults": {{pagefaults}},

                     "cpucycles": {{cpucycles}},

                     "contextswitches": {{contextswitches}},

                     "instructions": {{instructions}},

                     "branchinstructions": {{branchinstructions}},

                     "branchmisses": {{branchmisses}}

                 }{{^-last}},{{/-last}}

 {{/measurement}}            ]

         }{{^-last}},{{/-last}}

 {{/result}}    ]

 })DELIM";

 }


 ANKERL_NANOBENCH(IGNORE_PADDED_PUSH)

 struct Node {

     enum class Type { tag, content, section, inverted_section };


     char const* begin;

     char const* end;

     std::vector<Node> children;

     Type type;


     template <size_t N>

     // NOLINTNEXTLINE(hicpp-avoid-c-arrays,modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)

     bool operator==(char const (&str)[N]) const noexcept {

         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-array-to-pointer-decay)

         return static_cast<size_t>(std::distance(begin, end) + 1) == N && 0 == strncmp(str, begin, N - 1);

     }

 };

 ANKERL_NANOBENCH(IGNORE_PADDED_POP)


 // NOLINTNEXTLINE(misc-no-recursion)

 static std::vector<Node> parseMustacheTemplate(char const** tpl) {

     std::vector<Node> nodes;


     while (true) {

         auto const* begin = std::strstr(*tpl, "{{");

         auto const* end = begin;

         if (begin != nullptr) {

             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)

             begin += 2;

             end = std::strstr(begin, "}}");

         }


         if (begin == nullptr || end == nullptr) {

             // nothing found, finish node

             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)

             nodes.emplace_back(Node{*tpl, *tpl + std::strlen(*tpl), std::vector<Node>{}, Node::Type::content});

             return nodes;

         }


         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)

         nodes.emplace_back(Node{*tpl, begin - 2, std::vector<Node>{}, Node::Type::content});


         // we found a tag

         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)

         *tpl = end + 2;

         switch (*begin) {

         case '/':

             // finished! bail out

             return nodes;


         case '#':

             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)

             nodes.emplace_back(Node{begin + 1, end, parseMustacheTemplate(tpl), Node::Type::section});

             break;


         case '^':

             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)

             nodes.emplace_back(Node{begin + 1, end, parseMustacheTemplate(tpl), Node::Type::inverted_section});

             break;


         default:

             nodes.emplace_back(Node{begin, end, std::vector<Node>{}, Node::Type::tag});

             break;

         }

     }

 }


 static bool generateFirstLast(Node const& n, size_t idx, size_t size, std::ostream& out) {

     ANKERL_NANOBENCH_LOG("n.type=" << static_cast<int>(n.type));

     bool const matchFirst = n == "-first";

     bool const matchLast = n == "-last";

     if (!matchFirst && !matchLast) {

         return false;

     }


     bool doWrite = false;

     if (n.type == Node::Type::section) {

         doWrite = (matchFirst && idx == 0) || (matchLast && idx == size - 1);

     } else if (n.type == Node::Type::inverted_section) {

         doWrite = (matchFirst && idx != 0) || (matchLast && idx != size - 1);

     }


     if (doWrite) {

         for (auto const& child : n.children) {

             if (child.type == Node::Type::content) {

                 out.write(child.begin, std::distance(child.begin, child.end));

             }

         }

     }

     return true;

 }


 static bool matchCmdArgs(std::string const& str, std::vector<std::string>& matchResult) {

     matchResult.clear();

     auto idxOpen = str.find('(');

     auto idxClose = str.find(')', idxOpen);

     if (idxClose == std::string::npos) {

         return false;

     }


     matchResult.emplace_back(str.substr(0, idxOpen));


     // split by comma

     matchResult.emplace_back();

     for (size_t i = idxOpen + 1; i != idxClose; ++i) {

         if (str[i] == ' ' || str[i] == '\t') {

             // skip whitespace

             continue;

         }

         if (str[i] == ',') {

             // got a comma => new string

             matchResult.emplace_back();

             continue;

         }

         // no whitespace no comma, append

         matchResult.back() += str[i];

     }

     return true;

 }


 static bool generateConfigTag(Node const& n, Config const& config, std::ostream& out) {

     using detail::d;


     if (n == "title") {

         out << config.mBenchmarkTitle;

         return true;

     }

     if (n == "name") {

         out << config.mBenchmarkName;

         return true;

     }

     if (n == "unit") {

         out << config.mUnit;

         return true;

     }

     if (n == "batch") {

         out << config.mBatch;

         return true;

     }

     if (n == "complexityN") {

         out << config.mComplexityN;

         return true;

     }

     if (n == "epochs") {

         out << config.mNumEpochs;

         return true;

     }

     if (n == "clockResolution") {

         out << d(detail::clockResolution());

         return true;

     }

     if (n == "clockResolutionMultiple") {

         out << config.mClockResolutionMultiple;

         return true;

     }

     if (n == "maxEpochTime") {

         out << d(config.mMaxEpochTime);

         return true;

     }

     if (n == "minEpochTime") {

         out << d(config.mMinEpochTime);

         return true;

     }

     if (n == "minEpochIterations") {

         out << config.mMinEpochIterations;

         return true;

     }

     if (n == "epochIterations") {

         out << config.mEpochIterations;

         return true;

     }

     if (n == "warmup") {

         out << config.mWarmup;

         return true;

     }

     if (n == "relative") {

         out << config.mIsRelative;

         return true;

     }

     return false;

 }


 // NOLINTNEXTLINE(readability-function-cognitive-complexity)

 static std::ostream& generateResultTag(Node const& n, Result const& r, std::ostream& out) {

     if (generateConfigTag(n, r.config(), out)) {

         return out;

     }

     // match e.g. "median(elapsed)"

     // g++ 4.8 doesn't implement std::regex :(

     // static std::regex const regOpArg1("^([a-zA-Z]+)\\‍(([a-zA-Z]*)\\‍)$");

     // std::cmatch matchResult;

     // if (std::regex_match(n.begin, n.end, matchResult, regOpArg1)) {

     std::vector<std::string> matchResult;

     if (matchCmdArgs(std::string(n.begin, n.end), matchResult)) {

         if (matchResult.size() == 2) {

             if (matchResult[0] == "context") {

                 return out << r.context(matchResult[1]);

             }


             auto m = Result::fromString(matchResult[1]);

             if (m == Result::Measure::_size) {

                 return out << 0.0;

             }


             if (matchResult[0] == "median") {

                 return out << r.median(m);

             }

             if (matchResult[0] == "average") {

                 return out << r.average(m);

             }

             if (matchResult[0] == "medianAbsolutePercentError") {

                 return out << r.medianAbsolutePercentError(m);

             }

             if (matchResult[0] == "sum") {

                 return out << r.sum(m);

             }

             if (matchResult[0] == "minimum") {

                 return out << r.minimum(m);

             }

             if (matchResult[0] == "maximum") {

                 return out << r.maximum(m);

             }

         } else if (matchResult.size() == 3) {

             auto m1 = Result::fromString(matchResult[1]);

             auto m2 = Result::fromString(matchResult[2]);

             if (m1 == Result::Measure::_size || m2 == Result::Measure::_size) {

                 return out << 0.0;

             }


             if (matchResult[0] == "sumProduct") {

                 return out << r.sumProduct(m1, m2);

             }

         }

     }


     // match e.g. "sumProduct(elapsed, iterations)"

     // static std::regex const regOpArg2("^([a-zA-Z]+)\\‍(([a-zA-Z]*)\\s*,\\s+([a-zA-Z]*)\\‍)$");


     // nothing matches :(

     throw std::runtime_error("command '" + std::string(n.begin, n.end) + "' not understood");

 }


 static void generateResultMeasurement(std::vector<Node> const& nodes, size_t idx, Result const& r, std::ostream& out) {

     for (auto const& n : nodes) {

         if (!generateFirstLast(n, idx, r.size(), out)) {

             ANKERL_NANOBENCH_LOG("n.type=" << static_cast<int>(n.type));

             switch (n.type) {

             case Node::Type::content:

                 out.write(n.begin, std::distance(n.begin, n.end));

                 break;


             case Node::Type::inverted_section:

                 throw std::runtime_error("got a inverted section inside measurement");


             case Node::Type::section:

                 throw std::runtime_error("got a section inside measurement");


             case Node::Type::tag: {

                 auto m = Result::fromString(std::string(n.begin, n.end));

                 if (m == Result::Measure::_size || !r.has(m)) {

                     out << 0.0;

                 } else {

                     out << r.get(idx, m);

                 }

                 break;

             }

             }

         }

     }

 }


 static void generateResult(std::vector<Node> const& nodes, size_t idx, std::vector<Result> const& results, std::ostream& out) {

     auto const& r = results[idx];

     for (auto const& n : nodes) {

         if (!generateFirstLast(n, idx, results.size(), out)) {

             ANKERL_NANOBENCH_LOG("n.type=" << static_cast<int>(n.type));

             switch (n.type) {

             case Node::Type::content:

                 out.write(n.begin, std::distance(n.begin, n.end));

                 break;


             case Node::Type::inverted_section:

                 throw std::runtime_error("got a inverted section inside result");


             case Node::Type::section:

                 if (n == "measurement") {

                     for (size_t i = 0; i < r.size(); ++i) {

                         generateResultMeasurement(n.children, i, r, out);

                     }

                 } else {

                     throw std::runtime_error("got a section inside result");

                 }

                 break;


             case Node::Type::tag:

                 generateResultTag(n, r, out);

                 break;

             }

         }

     }

 }


 } // namespace templates


 // helper stuff that only intended to be used internally

 namespace detail {


 char const* getEnv(char const* name);

 bool isEndlessRunning(std::string const& name);

 bool isWarningsEnabled();


 template <typename T>

 T parseFile(std::string const& filename, bool* fail);


 void gatherStabilityInformation(std::vector<std::string>& warnings, std::vector<std::string>& recommendations);

 void printStabilityInformationOnce(std::ostream* outStream);


 // remembers the last table settings used. When it changes, a new table header is automatically written for the new entry.

 uint64_t& singletonHeaderHash() noexcept;


 // determines resolution of the given clock. This is done by measuring multiple times and returning the minimum time difference.

 Clock::duration calcClockResolution(size_t numEvaluations) noexcept;


 // formatting utilities

 namespace fmt {


 // adds thousands separator to numbers

 ANKERL_NANOBENCH(IGNORE_PADDED_PUSH)

 class NumSep : public std::numpunct<char> {

 public:

     explicit NumSep(char sep);

     char do_thousands_sep() const override;

     std::string do_grouping() const override;


 private:

     char mSep;

 };

 ANKERL_NANOBENCH(IGNORE_PADDED_POP)


 // RAII to save & restore a stream's state

 ANKERL_NANOBENCH(IGNORE_PADDED_PUSH)

 class StreamStateRestorer {

 public:

     explicit StreamStateRestorer(std::ostream& s);

     ~StreamStateRestorer();


     // sets back all stream info that we remembered at construction

     void restore();


     // don't allow copying / moving

     StreamStateRestorer(StreamStateRestorer const&) = delete;

     StreamStateRestorer& operator=(StreamStateRestorer const&) = delete;

     StreamStateRestorer(StreamStateRestorer&&) = delete;

     StreamStateRestorer& operator=(StreamStateRestorer&&) = delete;


 private:

     std::ostream& mStream;

     std::locale mLocale;

     std::streamsize const mPrecision;

     std::streamsize const mWidth;

     std::ostream::char_type const mFill;

     std::ostream::fmtflags const mFmtFlags;

 };

 ANKERL_NANOBENCH(IGNORE_PADDED_POP)


 // Number formatter

 class Number {

 public:

     Number(int width, int precision, double value);

     Number(int width, int precision, int64_t value);

     ANKERL_NANOBENCH(NODISCARD) std::string to_s() const;


 private:

     friend std::ostream& operator<<(std::ostream& os, Number const& n);

     std::ostream& write(std::ostream& os) const;


     int mWidth;

     int mPrecision;

     double mValue;

 };


 // helper replacement for std::to_string of signed/unsigned numbers so we are locale independent

 std::string to_s(uint64_t n);


 std::ostream& operator<<(std::ostream& os, Number const& n);


 class MarkDownColumn {

 public:

     MarkDownColumn(int w, int prec, std::string tit, std::string suff, double val) noexcept;

     ANKERL_NANOBENCH(NODISCARD) std::string title() const;

     ANKERL_NANOBENCH(NODISCARD) std::string separator() const;

     ANKERL_NANOBENCH(NODISCARD) std::string invalid() const;

     ANKERL_NANOBENCH(NODISCARD) std::string value() const;


 private:

     int mWidth;

     int mPrecision;

     std::string mTitle;

     std::string mSuffix;

     double mValue;

 };


 // Formats any text as markdown code, escaping backticks.

 class MarkDownCode {

 public:

     explicit MarkDownCode(std::string const& what);


 private:

     friend std::ostream& operator<<(std::ostream& os, MarkDownCode const& mdCode);

     std::ostream& write(std::ostream& os) const;


     std::string mWhat{};

 };


 std::ostream& operator<<(std::ostream& os, MarkDownCode const& mdCode);


 } // namespace fmt

 } // namespace detail

 } // namespace nanobench

 } // namespace ankerl


 // implementation /////////////////////////////////////////////////////////////////////////////////


 namespace ankerl {

 namespace nanobench {


 // NOLINTNEXTLINE(readability-function-cognitive-complexity)

 void render(char const* mustacheTemplate, std::vector<Result> const& results, std::ostream& out) {

     detail::fmt::StreamStateRestorer const restorer(out);


     out.precision(std::numeric_limits<double>::digits10);

     auto nodes = templates::parseMustacheTemplate(&mustacheTemplate);


     for (auto const& n : nodes) {

         ANKERL_NANOBENCH_LOG("n.type=" << static_cast<int>(n.type));

         switch (n.type) {

         case templates::Node::Type::content:

             out.write(n.begin, std::distance(n.begin, n.end));

             break;


         case templates::Node::Type::inverted_section:

             throw std::runtime_error("unknown list '" + std::string(n.begin, n.end) + "'");


         case templates::Node::Type::section:

             if (n == "result") {

                 const size_t nbResults = results.size();

                 for (size_t i = 0; i < nbResults; ++i) {

                     generateResult(n.children, i, results, out);

                 }

             } else if (n == "measurement") {

                 if (results.size() != 1) {

                     throw std::runtime_error(

                         "render: can only use section 'measurement' here if there is a single result, but there are " +

                         detail::fmt::to_s(results.size()));

                 }

                 // when we only have a single result, we can immediately go into its measurement.

                 auto const& r = results.front();

                 for (size_t i = 0; i < r.size(); ++i) {

                     generateResultMeasurement(n.children, i, r, out);

                 }

             } else {

                 throw std::runtime_error("render: unknown section '" + std::string(n.begin, n.end) + "'");

             }

             break;


         case templates::Node::Type::tag:

             if (results.size() == 1) {

                 // result & config are both supported there

                 generateResultTag(n, results.front(), out);

             } else {

                 // This just uses the last result's config.

                 if (!generateConfigTag(n, results.back().config(), out)) {

                     throw std::runtime_error("unknown tag '" + std::string(n.begin, n.end) + "'");

                 }

             }

             break;

         }

     }

 }


 void render(std::string const& mustacheTemplate, std::vector<Result> const& results, std::ostream& out) {

     render(mustacheTemplate.c_str(), results, out);

 }


 void render(char const* mustacheTemplate, const Bench& bench, std::ostream& out) {

     render(mustacheTemplate, bench.results(), out);

 }


 void render(std::string const& mustacheTemplate, const Bench& bench, std::ostream& out) {

     render(mustacheTemplate.c_str(), bench.results(), out);

 }


 namespace detail {


 PerformanceCounters& performanceCounters() {

 #    if defined(__clang__)

 #        pragma clang diagnostic push

 #        pragma clang diagnostic ignored "-Wexit-time-destructors"

 #    endif

     static PerformanceCounters pc;

 #    if defined(__clang__)

 #        pragma clang diagnostic pop

 #    endif

     return pc;

 }


 // Windows version of doNotOptimizeAway

 // see https://github.com/google/benchmark/blob/v1.7.1/include/benchmark/benchmark.h#L514

 // see https://github.com/facebook/folly/blob/v2023.01.30.00/folly/lang/Hint-inl.h#L54-L58

 // see https://learn.microsoft.com/en-us/cpp/preprocessor/optimize

 #    if defined(_MSC_VER)

 #        pragma optimize("", off)

 void doNotOptimizeAwaySink(void const*) {}

 #        pragma optimize("", on)

 #    endif


 template <typename T>

 T parseFile(std::string const& filename, bool* fail) {

     std::ifstream fin(filename); // NOLINT(misc-const-correctness)

     T num{};

     fin >> num;

     if (fail != nullptr) {

         *fail = fin.fail();

     }

     return num;

 }


 char const* getEnv(char const* name) {

 #    if defined(_MSC_VER)

 #        pragma warning(push)

 #        pragma warning(disable : 4996) // getenv': This function or variable may be unsafe.

 #    endif

     return std::getenv(name); // NOLINT(concurrency-mt-unsafe)

 #    if defined(_MSC_VER)

 #        pragma warning(pop)

 #    endif

 }


 bool isEndlessRunning(std::string const& name) {

     auto const* const endless = getEnv("NANOBENCH_ENDLESS");

     return nullptr != endless && endless == name;

 }


 // True when environment variable NANOBENCH_SUPPRESS_WARNINGS is either not set at all, or set to "0"

 bool isWarningsEnabled() {

     auto const* const suppression = getEnv("NANOBENCH_SUPPRESS_WARNINGS");

     return nullptr == suppression || suppression == std::string("0");

 }


 void gatherStabilityInformation(std::vector<std::string>& warnings, std::vector<std::string>& recommendations) {

     warnings.clear();

     recommendations.clear();


 #    if defined(DEBUG)

     warnings.emplace_back("DEBUG defined");

     bool const recommendCheckFlags = true;

 #    else

     bool const recommendCheckFlags = false;

 #    endif


     bool recommendPyPerf = false;

 #    if defined(__linux__)

     auto nprocs = sysconf(_SC_NPROCESSORS_CONF);

     if (nprocs <= 0) {

         warnings.emplace_back("couldn't figure out number of processors - no governor, turbo check possible");

     } else {

         // check frequency scaling

         for (long id = 0; id < nprocs; ++id) {

             auto idStr = detail::fmt::to_s(static_cast<uint64_t>(id));

             auto sysCpu = "/sys/devices/system/cpu/cpu" + idStr;

             auto minFreq = parseFile<int64_t>(sysCpu + "/cpufreq/scaling_min_freq", nullptr);

             auto maxFreq = parseFile<int64_t>(sysCpu + "/cpufreq/scaling_max_freq", nullptr);

             if (minFreq != maxFreq) {

                 auto minMHz = d(minFreq) / 1000.0;

                 auto maxMHz = d(maxFreq) / 1000.0;

                 warnings.emplace_back("CPU frequency scaling enabled: CPU " + idStr + " between " +

                                       detail::fmt::Number(1, 1, minMHz).to_s() + " and " + detail::fmt::Number(1, 1, maxMHz).to_s() +

                                       " MHz");

                 recommendPyPerf = true;

                 break;

             }

         }


         auto fail = false;

         auto currentGovernor = parseFile<std::string>("/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor", &fail);

         if (!fail && "performance" != currentGovernor) {

             warnings.emplace_back("CPU governor is '" + currentGovernor + "' but should be 'performance'");

             recommendPyPerf = true;

         }


         auto noTurbo = parseFile<int>("/sys/devices/system/cpu/intel_pstate/no_turbo", &fail);

         if (!fail && noTurbo == 0) {

             warnings.emplace_back("Turbo is enabled, CPU frequency will fluctuate");

             recommendPyPerf = true;

         }

     }

 #    endif


     if (recommendCheckFlags) {

         recommendations.emplace_back("Make sure you compile for Release");

     }

     if (recommendPyPerf) {

         recommendations.emplace_back("Use 'pyperf system tune' before benchmarking. See https://github.com/psf/pyperf");

     }

 }


 void printStabilityInformationOnce(std::ostream* outStream) {

     static bool shouldPrint = true;

     if (shouldPrint && (nullptr != outStream) && isWarningsEnabled()) {

         auto& os = *outStream;

         shouldPrint = false;

         std::vector<std::string> warnings;

         std::vector<std::string> recommendations;

         gatherStabilityInformation(warnings, recommendations);

         if (warnings.empty()) {

             return;

         }


         os << "Warning, results might be unstable:" << std::endl;

         for (auto const& w : warnings) {

             os << "* " << w << std::endl;

         }


         os << std::endl << "Recommendations" << std::endl;

         for (auto const& r : recommendations) {

             os << "* " << r << std::endl;

         }

     }

 }


 // remembers the last table settings used. When it changes, a new table header is automatically written for the new entry.

 uint64_t& singletonHeaderHash() noexcept {

     static uint64_t sHeaderHash{};

     return sHeaderHash;

 }


 ANKERL_NANOBENCH_NO_SANITIZE("integer", "undefined")

 inline uint64_t hash_combine(uint64_t seed, uint64_t val) {

     return seed ^ (val + UINT64_C(0x9e3779b9) + (seed << 6U) + (seed >> 2U));

 }


 // determines resolution of the given clock. This is done by measuring multiple times and returning the minimum time difference.

 Clock::duration calcClockResolution(size_t numEvaluations) noexcept {

     auto bestDuration = Clock::duration::max();

     Clock::time_point tBegin;

     Clock::time_point tEnd;

     for (size_t i = 0; i < numEvaluations; ++i) {

         tBegin = Clock::now();

         do {

             tEnd = Clock::now();

         } while (tBegin == tEnd);

         bestDuration = (std::min)(bestDuration, tEnd - tBegin);

     }

     return bestDuration;

 }


 // Calculates clock resolution once, and remembers the result

 Clock::duration clockResolution() noexcept {

     static Clock::duration const sResolution = calcClockResolution(20);

     return sResolution;

 }


 ANKERL_NANOBENCH(IGNORE_PADDED_PUSH)

 struct IterationLogic::Impl {

     enum class State { warmup, upscaling_runtime, measuring, endless };


     explicit Impl(Bench const& bench)

         : mBench(bench)

         , mResult(bench.config()) {

         printStabilityInformationOnce(mBench.output());


         // determine target runtime per epoch

         mTargetRuntimePerEpoch = detail::clockResolution() * mBench.clockResolutionMultiple();

         if (mTargetRuntimePerEpoch > mBench.maxEpochTime()) {

             mTargetRuntimePerEpoch = mBench.maxEpochTime();

         }

         if (mTargetRuntimePerEpoch < mBench.minEpochTime()) {

             mTargetRuntimePerEpoch = mBench.minEpochTime();

         }


         if (isEndlessRunning(mBench.name())) {

             std::cerr << "NANOBENCH_ENDLESS set: running '" << mBench.name() << "' endlessly" << std::endl;

             mNumIters = (std::numeric_limits<uint64_t>::max)();

             mState = State::endless;

         } else if (0 != mBench.warmup()) {

             mNumIters = mBench.warmup();

             mState = State::warmup;

         } else if (0 != mBench.epochIterations()) {

             // exact number of iterations

             mNumIters = mBench.epochIterations();

             mState = State::measuring;

         } else {

             mNumIters = mBench.minEpochIterations();

             mState = State::upscaling_runtime;

         }

     }


     // directly calculates new iters based on elapsed&iters, and adds a 10% noise. Makes sure we don't underflow.

     ANKERL_NANOBENCH(NODISCARD) uint64_t calcBestNumIters(std::chrono::nanoseconds elapsed, uint64_t iters) noexcept {

         auto doubleElapsed = d(elapsed);

         auto doubleTargetRuntimePerEpoch = d(mTargetRuntimePerEpoch);

         auto doubleNewIters = doubleTargetRuntimePerEpoch / doubleElapsed * d(iters);


         auto doubleMinEpochIters = d(mBench.minEpochIterations());

         if (doubleNewIters < doubleMinEpochIters) {

             doubleNewIters = doubleMinEpochIters;

         }

         doubleNewIters *= 1.0 + 0.2 * mRng.uniform01();


         // +0.5 for correct rounding when casting

         // NOLINTNEXTLINE(bugprone-incorrect-roundings)

         return static_cast<uint64_t>(doubleNewIters + 0.5);

     }


     ANKERL_NANOBENCH_NO_SANITIZE("integer", "undefined") void upscale(std::chrono::nanoseconds elapsed) {

         if (elapsed * 10 < mTargetRuntimePerEpoch) {

             // we are far below the target runtime. Multiply iterations by 10 (with overflow check)

             if (mNumIters * 10 < mNumIters) {

                 // overflow :-(

                 showResult("iterations overflow. Maybe your code got optimized away?");

                 mNumIters = 0;

                 return;

             }

             mNumIters *= 10;

         } else {

             mNumIters = calcBestNumIters(elapsed, mNumIters);

         }

     }


     void add(std::chrono::nanoseconds elapsed, PerformanceCounters const& pc) noexcept {

 #    if defined(ANKERL_NANOBENCH_LOG_ENABLED)

         auto oldIters = mNumIters;

 #    endif


         switch (mState) {

         case State::warmup:

             if (isCloseEnoughForMeasurements(elapsed)) {

                 // if elapsed is close enough, we can skip upscaling and go right to measurements

                 // still, we don't add the result to the measurements.

                 mState = State::measuring;

                 mNumIters = calcBestNumIters(elapsed, mNumIters);

             } else {

                 // not close enough: switch to upscaling

                 mState = State::upscaling_runtime;

                 upscale(elapsed);

             }

             break;


         case State::upscaling_runtime:

             if (isCloseEnoughForMeasurements(elapsed)) {

                 // if we are close enough, add measurement and switch to always measuring

                 mState = State::measuring;

                 mTotalElapsed += elapsed;

                 mTotalNumIters += mNumIters;

                 mResult.add(elapsed, mNumIters, pc);

                 mNumIters = calcBestNumIters(mTotalElapsed, mTotalNumIters);

             } else {

                 upscale(elapsed);

             }

             break;


         case State::measuring:

             // just add measurements - no questions asked. Even when runtime is low. But we can't ignore

             // that fluctuation, or else we would bias the result

             mTotalElapsed += elapsed;

             mTotalNumIters += mNumIters;

             mResult.add(elapsed, mNumIters, pc);

             if (0 != mBench.epochIterations()) {

                 mNumIters = mBench.epochIterations();

             } else {

                 mNumIters = calcBestNumIters(mTotalElapsed, mTotalNumIters);

             }

             break;


         case State::endless:

             mNumIters = (std::numeric_limits<uint64_t>::max)();

             break;

         }


         if (static_cast<uint64_t>(mResult.size()) == mBench.epochs()) {

             // we got all the results that we need, finish it

             showResult("");

             mNumIters = 0;

         }


         ANKERL_NANOBENCH_LOG(mBench.name() << ": " << detail::fmt::Number(20, 3, d(elapsed.count())) << " elapsed, "

                                            << detail::fmt::Number(20, 3, d(mTargetRuntimePerEpoch.count())) << " target. oldIters="

                                            << oldIters << ", mNumIters=" << mNumIters << ", mState=" << static_cast<int>(mState));

     }


     // NOLINTNEXTLINE(readability-function-cognitive-complexity)

     void showResult(std::string const& errorMessage) const {

         ANKERL_NANOBENCH_LOG(errorMessage);


         if (mBench.output() != nullptr) {

             // prepare column data ///////

             std::vector<fmt::MarkDownColumn> columns;


             auto rMedian = mResult.median(Result::Measure::elapsed);


             if (mBench.relative()) {

                 double d = 100.0;

                 if (!mBench.results().empty()) {

                     d = rMedian <= 0.0 ? 0.0 : mBench.results().front().median(Result::Measure::elapsed) / rMedian * 100.0;

                 }

                 columns.emplace_back(11, 1, "relative", "%", d);

             }


             if (mBench.complexityN() > 0) {

                 columns.emplace_back(14, 0, "complexityN", "", mBench.complexityN());

             }


             columns.emplace_back(22, 2, mBench.timeUnitName() + "/" + mBench.unit(), "",

                                  rMedian / (mBench.timeUnit().count() * mBench.batch()));

             columns.emplace_back(22, 2, mBench.unit() + "/s", "", rMedian <= 0.0 ? 0.0 : mBench.batch() / rMedian);


             double const rErrorMedian = mResult.medianAbsolutePercentError(Result::Measure::elapsed);

             columns.emplace_back(10, 1, "err%", "%", rErrorMedian * 100.0);


             double rInsMedian = -1.0;

             if (mBench.performanceCounters() && mResult.has(Result::Measure::instructions)) {

                 rInsMedian = mResult.median(Result::Measure::instructions);

                 columns.emplace_back(18, 2, "ins/" + mBench.unit(), "", rInsMedian / mBench.batch());

             }


             double rCycMedian = -1.0;

             if (mBench.performanceCounters() && mResult.has(Result::Measure::cpucycles)) {

                 rCycMedian = mResult.median(Result::Measure::cpucycles);

                 columns.emplace_back(18, 2, "cyc/" + mBench.unit(), "", rCycMedian / mBench.batch());

             }

             if (rInsMedian > 0.0 && rCycMedian > 0.0) {

                 columns.emplace_back(9, 3, "IPC", "", rCycMedian <= 0.0 ? 0.0 : rInsMedian / rCycMedian);

             }

             if (mBench.performanceCounters() && mResult.has(Result::Measure::branchinstructions)) {

                 double const rBraMedian = mResult.median(Result::Measure::branchinstructions);

                 columns.emplace_back(17, 2, "bra/" + mBench.unit(), "", rBraMedian / mBench.batch());

                 if (mResult.has(Result::Measure::branchmisses)) {

                     double p = 0.0;

                     if (rBraMedian >= 1e-9) {

                         p = 100.0 * mResult.median(Result::Measure::branchmisses) / rBraMedian;

                     }

                     columns.emplace_back(10, 1, "miss%", "%", p);

                 }

             }


             columns.emplace_back(12, 2, "total", "", mResult.sumProduct(Result::Measure::iterations, Result::Measure::elapsed));


             // write everything

             auto& os = *mBench.output();


             // combine all elements that are relevant for printing the header

             uint64_t hash = 0;

             hash = hash_combine(std::hash<std::string>{}(mBench.unit()), hash);

             hash = hash_combine(std::hash<std::string>{}(mBench.title()), hash);

             hash = hash_combine(std::hash<std::string>{}(mBench.timeUnitName()), hash);

             hash = hash_combine(std::hash<double>{}(mBench.timeUnit().count()), hash);

             hash = hash_combine(std::hash<bool>{}(mBench.relative()), hash);

             hash = hash_combine(std::hash<bool>{}(mBench.performanceCounters()), hash);


             if (hash != singletonHeaderHash()) {

                 singletonHeaderHash() = hash;


                 // no result yet, print header

                 os << std::endl;

                 for (auto const& col : columns) {

                     os << col.title();

                 }

                 os << "| " << mBench.title() << std::endl;


                 for (auto const& col : columns) {

                     os << col.separator();

                 }

                 os << "|:" << std::string(mBench.title().size() + 1U, '-') << std::endl;

             }


             if (!errorMessage.empty()) {

                 for (auto const& col : columns) {

                     os << col.invalid();

                 }

                 os << "| :boom: " << fmt::MarkDownCode(mBench.name()) << " (" << errorMessage << ')' << std::endl;

             } else {

                 for (auto const& col : columns) {

                     os << col.value();

                 }

                 os << "| ";

                 auto showUnstable = isWarningsEnabled() && rErrorMedian >= 0.05;

                 if (showUnstable) {

                     os << ":wavy_dash: ";

                 }

                 os << fmt::MarkDownCode(mBench.name());

                 if (showUnstable) {

                     auto avgIters = d(mTotalNumIters) / d(mBench.epochs());

                     // NOLINTNEXTLINE(bugprone-incorrect-roundings)

                     auto suggestedIters = static_cast<uint64_t>(avgIters * 10 + 0.5);


                     os << " (Unstable with ~" << detail::fmt::Number(1, 1, avgIters)

                        << " iters. Increase `minEpochIterations` to e.g. " << suggestedIters << ")";

                 }

                 os << std::endl;

             }

         }

     }


     ANKERL_NANOBENCH(NODISCARD) bool isCloseEnoughForMeasurements(std::chrono::nanoseconds elapsed) const noexcept {

         return elapsed * 3 >= mTargetRuntimePerEpoch * 2;

     }


     uint64_t mNumIters = 1;                            // NOLINT(misc-non-private-member-variables-in-classes)

     Bench const& mBench;                               // NOLINT(misc-non-private-member-variables-in-classes)

     std::chrono::nanoseconds mTargetRuntimePerEpoch{}; // NOLINT(misc-non-private-member-variables-in-classes)

     Result mResult;                                    // NOLINT(misc-non-private-member-variables-in-classes)

     Rng mRng{123};                                     // NOLINT(misc-non-private-member-variables-in-classes)

     std::chrono::nanoseconds mTotalElapsed{};          // NOLINT(misc-non-private-member-variables-in-classes)

     uint64_t mTotalNumIters = 0;                       // NOLINT(misc-non-private-member-variables-in-classes)

     State mState = State::upscaling_runtime;           // NOLINT(misc-non-private-member-variables-in-classes)

 };

 ANKERL_NANOBENCH(IGNORE_PADDED_POP)


 IterationLogic::IterationLogic(Bench const& bench)

     : mPimpl(new Impl(bench)) {}


 IterationLogic::~IterationLogic() {

     delete mPimpl;

 }


 uint64_t IterationLogic::numIters() const noexcept {

     ANKERL_NANOBENCH_LOG(mPimpl->mBench.name() << ": mNumIters=" << mPimpl->mNumIters);

     return mPimpl->mNumIters;

 }


 void IterationLogic::add(std::chrono::nanoseconds elapsed, PerformanceCounters const& pc) noexcept {

     mPimpl->add(elapsed, pc);

 }


 void IterationLogic::moveResultTo(std::vector<Result>& results) noexcept {

     results.emplace_back(std::move(mPimpl->mResult));

 }


 #    if ANKERL_NANOBENCH(PERF_COUNTERS)


 ANKERL_NANOBENCH(IGNORE_PADDED_PUSH)

 class LinuxPerformanceCounters {

 public:

     struct Target {

         Target(uint64_t* targetValue_, bool correctMeasuringOverhead_, bool correctLoopOverhead_)

             : targetValue(targetValue_)

             , correctMeasuringOverhead(correctMeasuringOverhead_)

             , correctLoopOverhead(correctLoopOverhead_) {}


         uint64_t* targetValue{};         // NOLINT(misc-non-private-member-variables-in-classes)

         bool correctMeasuringOverhead{}; // NOLINT(misc-non-private-member-variables-in-classes)

         bool correctLoopOverhead{};      // NOLINT(misc-non-private-member-variables-in-classes)

     };


     LinuxPerformanceCounters() = default;

     LinuxPerformanceCounters(LinuxPerformanceCounters const&) = delete;

     LinuxPerformanceCounters(LinuxPerformanceCounters&&) = delete;

     LinuxPerformanceCounters& operator=(LinuxPerformanceCounters const&) = delete;

     LinuxPerformanceCounters& operator=(LinuxPerformanceCounters&&) = delete;

     ~LinuxPerformanceCounters();


     // quick operation

     inline void start() {}


     inline void stop() {}


     bool monitor(perf_sw_ids swId, Target target);

     bool monitor(perf_hw_id hwId, Target target);


     ANKERL_NANOBENCH(NODISCARD) bool hasError() const noexcept {

         return mHasError;

     }


     // Just reading data is faster than enable & disabling.

     // we subtract data ourselves.

     inline void beginMeasure() {

         if (mHasError) {

             return;

         }


         // NOLINTNEXTLINE(hicpp-signed-bitwise,cppcoreguidelines-pro-type-vararg)

         mHasError = -1 == ioctl(mFd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP);

         if (mHasError) {

             return;

         }


         // NOLINTNEXTLINE(hicpp-signed-bitwise,cppcoreguidelines-pro-type-vararg)

         mHasError = -1 == ioctl(mFd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP);

     }


     inline void endMeasure() {

         if (mHasError) {

             return;

         }


         // NOLINTNEXTLINE(hicpp-signed-bitwise,cppcoreguidelines-pro-type-vararg)

         mHasError = (-1 == ioctl(mFd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP));

         if (mHasError) {

             return;

         }


         auto const numBytes = sizeof(uint64_t) * mCounters.size();

         auto ret = read(mFd, mCounters.data(), numBytes);

         mHasError = ret != static_cast<ssize_t>(numBytes);

     }


     void updateResults(uint64_t numIters);


     // rounded integer division

     template <typename T>

     static inline T divRounded(T a, T divisor) {

         return (a + divisor / 2) / divisor;

     }


     ANKERL_NANOBENCH_NO_SANITIZE("integer", "undefined")

     static inline uint32_t mix(uint32_t x) noexcept {

         x ^= x << 13U;

         x ^= x >> 17U;

         x ^= x << 5U;

         return x;

     }


     template <typename Op>

     ANKERL_NANOBENCH_NO_SANITIZE("integer", "undefined")

     void calibrate(Op&& op) {

         // clear current calibration data,

         for (auto& v : mCalibratedOverhead) {

             v = UINT64_C(0);

         }


         // create new calibration data

         auto newCalibration = mCalibratedOverhead;

         for (auto& v : newCalibration) {

             v = (std::numeric_limits<uint64_t>::max)();

         }

         for (size_t iter = 0; iter < 100; ++iter) {

             beginMeasure();

             op();

             endMeasure();

             if (mHasError) {

                 return;

             }


             for (size_t i = 0; i < newCalibration.size(); ++i) {

                 auto diff = mCounters[i];

                 if (newCalibration[i] > diff) {

                     newCalibration[i] = diff;

                 }

             }

         }


         mCalibratedOverhead = std::move(newCalibration);


         {

             // calibrate loop overhead. For branches & instructions this makes sense, not so much for everything else like cycles.

             // marsaglia's xorshift: mov, sal/shr, xor. Times 3.

             // This has the nice property that the compiler doesn't seem to be able to optimize multiple calls any further.

             // see https://godbolt.org/z/49RVQ5

             uint64_t const numIters = 100000U + (std::random_device{}() & 3U);

             uint64_t n = numIters;

             uint32_t x = 1234567;


             beginMeasure();

             while (n-- > 0) {

                 x = mix(x);

             }

             endMeasure();

             detail::doNotOptimizeAway(x);

             auto measure1 = mCounters;


             n = numIters;

             beginMeasure();

             while (n-- > 0) {

                 // we now run *twice* so we can easily calculate the overhead

                 x = mix(x);

                 x = mix(x);

             }

             endMeasure();

             detail::doNotOptimizeAway(x);

             auto measure2 = mCounters;


             for (size_t i = 0; i < mCounters.size(); ++i) {

                 // factor 2 because we have two instructions per loop

                 auto m1 = measure1[i] > mCalibratedOverhead[i] ? measure1[i] - mCalibratedOverhead[i] : 0;

                 auto m2 = measure2[i] > mCalibratedOverhead[i] ? measure2[i] - mCalibratedOverhead[i] : 0;

                 auto overhead = m1 * 2 > m2 ? m1 * 2 - m2 : 0;


                 mLoopOverhead[i] = divRounded(overhead, numIters);

             }

         }

     }


 private:

     bool monitor(uint32_t type, uint64_t eventid, Target target);


     std::map<uint64_t, Target> mIdToTarget{};


     // start with minimum size of 3 for read_format

     std::vector<uint64_t> mCounters{3};

     std::vector<uint64_t> mCalibratedOverhead{3};

     std::vector<uint64_t> mLoopOverhead{3};


     uint64_t mTimeEnabledNanos = 0;

     uint64_t mTimeRunningNanos = 0;

     int mFd = -1;

     bool mHasError = false;

 };

 ANKERL_NANOBENCH(IGNORE_PADDED_POP)


 LinuxPerformanceCounters::~LinuxPerformanceCounters() {

     if (-1 != mFd) {

         close(mFd);

     }

 }


 bool LinuxPerformanceCounters::monitor(perf_sw_ids swId, LinuxPerformanceCounters::Target target) {

     return monitor(PERF_TYPE_SOFTWARE, swId, target);

 }


 bool LinuxPerformanceCounters::monitor(perf_hw_id hwId, LinuxPerformanceCounters::Target target) {

     return monitor(PERF_TYPE_HARDWARE, hwId, target);

 }


 // overflow is ok, it's checked

 ANKERL_NANOBENCH_NO_SANITIZE("integer", "undefined")

 void LinuxPerformanceCounters::updateResults(uint64_t numIters) {

     // clear old data

     for (auto& id_value : mIdToTarget) {

         *id_value.second.targetValue = UINT64_C(0);

     }


     if (mHasError) {

         return;

     }


     mTimeEnabledNanos = mCounters[1] - mCalibratedOverhead[1];

     mTimeRunningNanos = mCounters[2] - mCalibratedOverhead[2];


     for (uint64_t i = 0; i < mCounters[0]; ++i) {

         auto idx = static_cast<size_t>(3 + i * 2 + 0);

         auto id = mCounters[idx + 1U];


         auto it = mIdToTarget.find(id);

         if (it != mIdToTarget.end()) {


             auto& tgt = it->second;

             *tgt.targetValue = mCounters[idx];

             if (tgt.correctMeasuringOverhead) {

                 if (*tgt.targetValue >= mCalibratedOverhead[idx]) {

                     *tgt.targetValue -= mCalibratedOverhead[idx];

                 } else {

                     *tgt.targetValue = 0U;

                 }

             }

             if (tgt.correctLoopOverhead) {

                 auto correctionVal = mLoopOverhead[idx] * numIters;

                 if (*tgt.targetValue >= correctionVal) {

                     *tgt.targetValue -= correctionVal;

                 } else {

                     *tgt.targetValue = 0U;

                 }

             }

         }

     }

 }


 bool LinuxPerformanceCounters::monitor(uint32_t type, uint64_t eventid, Target target) {

     *target.targetValue = (std::numeric_limits<uint64_t>::max)();

     if (mHasError) {

         return false;

     }


     auto pea = perf_event_attr();

     std::memset(&pea, 0, sizeof(perf_event_attr));

     pea.type = type;

     pea.size = sizeof(perf_event_attr);

     pea.config = eventid;

     pea.disabled = 1; // start counter as disabled

     pea.exclude_kernel = 1;

     pea.exclude_hv = 1;


     // NOLINTNEXTLINE(hicpp-signed-bitwise)

     pea.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID | PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING;


     const int pid = 0;                    // the current process

     const int cpu = -1;                   // all CPUs

 #        if defined(PERF_FLAG_FD_CLOEXEC) // since Linux 3.14

     const unsigned long flags = PERF_FLAG_FD_CLOEXEC;

 #        else

     const unsigned long flags = 0;

 #        endif


     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)

     auto fd = static_cast<int>(syscall(__NR_perf_event_open, &pea, pid, cpu, mFd, flags));

     if (-1 == fd) {

         return false;

     }

     if (-1 == mFd) {

         // first call: set to fd, and use this from now on

         mFd = fd;

     }

     uint64_t id = 0;

     // NOLINTNEXTLINE(hicpp-signed-bitwise,cppcoreguidelines-pro-type-vararg)

     if (-1 == ioctl(fd, PERF_EVENT_IOC_ID, &id)) {

         // couldn't get id

         return false;

     }


     // insert into map, rely on the fact that map's references are constant.

     mIdToTarget.emplace(id, target);


     // prepare readformat with the correct size (after the insert)

     auto size = 3 + 2 * mIdToTarget.size();

     mCounters.resize(size);

     mCalibratedOverhead.resize(size);

     mLoopOverhead.resize(size);


     return true;

 }


 PerformanceCounters::PerformanceCounters()

     : mPc(new LinuxPerformanceCounters())

     , mVal()

     , mHas() {


     // HW events

     mHas.cpuCycles = mPc->monitor(PERF_COUNT_HW_REF_CPU_CYCLES, LinuxPerformanceCounters::Target(&mVal.cpuCycles, true, false));

     if (!mHas.cpuCycles) {

         // Fallback to cycles counter, reference cycles not available in many systems.

         mHas.cpuCycles = mPc->monitor(PERF_COUNT_HW_CPU_CYCLES, LinuxPerformanceCounters::Target(&mVal.cpuCycles, true, false));

     }

     mHas.instructions = mPc->monitor(PERF_COUNT_HW_INSTRUCTIONS, LinuxPerformanceCounters::Target(&mVal.instructions, true, true));

     mHas.branchInstructions =

         mPc->monitor(PERF_COUNT_HW_BRANCH_INSTRUCTIONS, LinuxPerformanceCounters::Target(&mVal.branchInstructions, true, false));

     mHas.branchMisses = mPc->monitor(PERF_COUNT_HW_BRANCH_MISSES, LinuxPerformanceCounters::Target(&mVal.branchMisses, true, false));

     // mHas.branchMisses = false;


     // SW events

     mHas.pageFaults = mPc->monitor(PERF_COUNT_SW_PAGE_FAULTS, LinuxPerformanceCounters::Target(&mVal.pageFaults, true, false));

     mHas.contextSwitches =

         mPc->monitor(PERF_COUNT_SW_CONTEXT_SWITCHES, LinuxPerformanceCounters::Target(&mVal.contextSwitches, true, false));


     mPc->start();

     mPc->calibrate([] {

         auto before = ankerl::nanobench::Clock::now();

         auto after = ankerl::nanobench::Clock::now();

         (void)before;

         (void)after;

     });


     if (mPc->hasError()) {

         // something failed, don't monitor anything.

         mHas = PerfCountSet<bool>{};

     }

 }


 PerformanceCounters::~PerformanceCounters() {

     // no need to check for nullptr, delete nullptr has no effect

     delete mPc;

 }


 void PerformanceCounters::beginMeasure() {

     mPc->beginMeasure();

 }


 void PerformanceCounters::endMeasure() {

     mPc->endMeasure();

 }


 void PerformanceCounters::updateResults(uint64_t numIters) {

     mPc->updateResults(numIters);

 }


 #    else


 PerformanceCounters::PerformanceCounters() = default;

 PerformanceCounters::~PerformanceCounters() = default;

 void PerformanceCounters::beginMeasure() {}

 void PerformanceCounters::endMeasure() {}

 void PerformanceCounters::updateResults(uint64_t) {}


 #    endif


 ANKERL_NANOBENCH(NODISCARD) PerfCountSet<uint64_t> const& PerformanceCounters::val() const noexcept {

     return mVal;

 }

 ANKERL_NANOBENCH(NODISCARD) PerfCountSet<bool> const& PerformanceCounters::has() const noexcept {

     return mHas;

 }


 // formatting utilities

 namespace fmt {


 // adds thousands separator to numbers

 NumSep::NumSep(char sep)

     : mSep(sep) {}


 char NumSep::do_thousands_sep() const {

     return mSep;

 }


 std::string NumSep::do_grouping() const {

     return "\003";

 }


 // RAII to save & restore a stream's state

 StreamStateRestorer::StreamStateRestorer(std::ostream& s)

     : mStream(s)

     , mLocale(s.getloc())

     , mPrecision(s.precision())

     , mWidth(s.width())

     , mFill(s.fill())

     , mFmtFlags(s.flags()) {}


 StreamStateRestorer::~StreamStateRestorer() {

     restore();

 }


 // sets back all stream info that we remembered at construction

 void StreamStateRestorer::restore() {

     mStream.imbue(mLocale);

     mStream.precision(mPrecision);

     mStream.width(mWidth);

     mStream.fill(mFill);

     mStream.flags(mFmtFlags);

 }


 Number::Number(int width, int precision, int64_t value)

     : mWidth(width)

     , mPrecision(precision)

     , mValue(d(value)) {}


 Number::Number(int width, int precision, double value)

     : mWidth(width)

     , mPrecision(precision)

     , mValue(value) {}


 std::ostream& Number::write(std::ostream& os) const {

     StreamStateRestorer const restorer(os);

     os.imbue(std::locale(os.getloc(), new NumSep(',')));

     os << std::setw(mWidth) << std::setprecision(mPrecision) << std::fixed << mValue;

     return os;

 }


 std::string Number::to_s() const {

     std::stringstream ss;

     write(ss);

     return ss.str();

 }


 std::string to_s(uint64_t n) {

     std::string str;

     do {

         str += static_cast<char>('0' + static_cast<char>(n % 10));

         n /= 10;

     } while (n != 0);

     std::reverse(str.begin(), str.end());

     return str;

 }


 std::ostream& operator<<(std::ostream& os, Number const& n) {

     return n.write(os);

 }


 MarkDownColumn::MarkDownColumn(int w, int prec, std::string tit, std::string suff, double val) noexcept

     : mWidth(w)

     , mPrecision(prec)

     , mTitle(std::move(tit))

     , mSuffix(std::move(suff))

     , mValue(val) {}


 std::string MarkDownColumn::title() const {

     std::stringstream ss;

     ss << '|' << std::setw(mWidth - 2) << std::right << mTitle << ' ';

     return ss.str();

 }


 std::string MarkDownColumn::separator() const {

     std::string sep(static_cast<size_t>(mWidth), '-');

     sep.front() = '|';

     sep.back() = ':';

     return sep;

 }


 std::string MarkDownColumn::invalid() const {

     std::string sep(static_cast<size_t>(mWidth), ' ');

     sep.front() = '|';

     sep[sep.size() - 2] = '-';

     return sep;

 }


 std::string MarkDownColumn::value() const {

     std::stringstream ss;

     auto width = mWidth - 2 - static_cast<int>(mSuffix.size());

     ss << '|' << Number(width, mPrecision, mValue) << mSuffix << ' ';

     return ss.str();

 }


 // Formats any text as markdown code, escaping backticks.

 MarkDownCode::MarkDownCode(std::string const& what) {

     mWhat.reserve(what.size() + 2);

     mWhat.push_back('`');

     for (char const c : what) {

         mWhat.push_back(c);

         if ('`' == c) {

             mWhat.push_back('`');

         }

     }

     mWhat.push_back('`');

 }


 std::ostream& MarkDownCode::write(std::ostream& os) const {

     return os << mWhat;

 }


 std::ostream& operator<<(std::ostream& os, MarkDownCode const& mdCode) {

     return mdCode.write(os);

 }

 } // namespace fmt

 } // namespace detail


 // provide implementation here so it's only generated once

 Config::Config() = default;

 Config::~Config() = default;

 Config& Config::operator=(Config const&) = default;

 Config& Config::operator=(Config&&) noexcept(ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE)) = default;

 Config::Config(Config const&) = default;

 Config::Config(Config&&) noexcept = default;


 // provide implementation here so it's only generated once

 Result::~Result() = default;

 Result& Result::operator=(Result const&) = default;

 Result& Result::operator=(Result&&) noexcept(ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE)) = default;

 Result::Result(Result const&) = default;

 Result::Result(Result&&) noexcept = default;


 namespace detail {

 template <typename T>

 inline constexpr typename std::underlying_type<T>::type u(T val) noexcept {

     return static_cast<typename std::underlying_type<T>::type>(val);

 }

 } // namespace detail


 // Result returned after a benchmark has finished. Can be used as a baseline for relative().

 Result::Result(Config benchmarkConfig)

     : mConfig(std::move(benchmarkConfig))

     , mNameToMeasurements{detail::u(Result::Measure::_size)} {}


 void Result::add(Clock::duration totalElapsed, uint64_t iters, detail::PerformanceCounters const& pc) {

     using detail::d;

     using detail::u;


     double const dIters = d(iters);

     mNameToMeasurements[u(Result::Measure::iterations)].push_back(dIters);


     mNameToMeasurements[u(Result::Measure::elapsed)].push_back(d(totalElapsed) / dIters);

     if (pc.has().pageFaults) {

         mNameToMeasurements[u(Result::Measure::pagefaults)].push_back(d(pc.val().pageFaults) / dIters);

     }

     if (pc.has().cpuCycles) {

         mNameToMeasurements[u(Result::Measure::cpucycles)].push_back(d(pc.val().cpuCycles) / dIters);

     }

     if (pc.has().contextSwitches) {

         mNameToMeasurements[u(Result::Measure::contextswitches)].push_back(d(pc.val().contextSwitches) / dIters);

     }

     if (pc.has().instructions) {

         mNameToMeasurements[u(Result::Measure::instructions)].push_back(d(pc.val().instructions) / dIters);

     }

     if (pc.has().branchInstructions) {

         double branchInstructions = 0.0;

         // correcting branches: remove branch introduced by the while (...) loop for each iteration.

         if (pc.val().branchInstructions > iters + 1U) {

             branchInstructions = d(pc.val().branchInstructions - (iters + 1U));

         }

         mNameToMeasurements[u(Result::Measure::branchinstructions)].push_back(branchInstructions / dIters);


         if (pc.has().branchMisses) {

             // correcting branch misses

             double branchMisses = d(pc.val().branchMisses);

             if (branchMisses > branchInstructions) {

                 // can't have branch misses when there were branches...

                 branchMisses = branchInstructions;

             }


             // assuming at least one missed branch for the loop

             branchMisses -= 1.0;

             if (branchMisses < 1.0) {

                 branchMisses = 1.0;

             }

             mNameToMeasurements[u(Result::Measure::branchmisses)].push_back(branchMisses / dIters);

         }

     }

 }


 Config const& Result::config() const noexcept {

     return mConfig;

 }


 inline double calcMedian(std::vector<double>& data) {

     if (data.empty()) {

         return 0.0;

     }

     std::sort(data.begin(), data.end());


     auto midIdx = data.size() / 2U;

     if (1U == (data.size() & 1U)) {

         return data[midIdx];

     }

     return (data[midIdx - 1U] + data[midIdx]) / 2U;

 }


 double Result::median(Measure m) const {

     // create a copy so we can sort

     auto data = mNameToMeasurements[detail::u(m)];

     return calcMedian(data);

 }


 double Result::average(Measure m) const {

     using detail::d;

     auto const& data = mNameToMeasurements[detail::u(m)];

     if (data.empty()) {

         return 0.0;

     }


     // create a copy so we can sort

     return sum(m) / d(data.size());

 }


 double Result::medianAbsolutePercentError(Measure m) const {

     // create copy

     auto data = mNameToMeasurements[detail::u(m)];


     // calculates MdAPE which is the median of percentage error

     // see https://support.numxl.com/hc/en-us/articles/115001223503-MdAPE-Median-Absolute-Percentage-Error

     auto med = calcMedian(data);


     // transform the data to absolute error

     for (auto& x : data) {

         x = (x - med) / x;

         if (x < 0) {

             x = -x;

         }

     }

     return calcMedian(data);

 }


 double Result::sum(Measure m) const noexcept {

     auto const& data = mNameToMeasurements[detail::u(m)];

     return std::accumulate(data.begin(), data.end(), 0.0);

 }


 double Result::sumProduct(Measure m1, Measure m2) const noexcept {

     auto const& data1 = mNameToMeasurements[detail::u(m1)];

     auto const& data2 = mNameToMeasurements[detail::u(m2)];


     if (data1.size() != data2.size()) {

         return 0.0;

     }


     double result = 0.0;

     for (size_t i = 0, s = data1.size(); i != s; ++i) {

         result += data1[i] * data2[i];

     }

     return result;

 }


 bool Result::has(Measure m) const noexcept {

     return !mNameToMeasurements[detail::u(m)].empty();

 }


 double Result::get(size_t idx, Measure m) const {

     auto const& data = mNameToMeasurements[detail::u(m)];

     return data.at(idx);

 }


 bool Result::empty() const noexcept {

     return 0U == size();

 }


 size_t Result::size() const noexcept {

     auto const& data = mNameToMeasurements[detail::u(Measure::elapsed)];

     return data.size();

 }


 double Result::minimum(Measure m) const noexcept {

     auto const& data = mNameToMeasurements[detail::u(m)];

     if (data.empty()) {

         return 0.0;

     }


     // here its save to assume that at least one element is there

     return *std::min_element(data.begin(), data.end());

 }


 double Result::maximum(Measure m) const noexcept {

     auto const& data = mNameToMeasurements[detail::u(m)];

     if (data.empty()) {

         return 0.0;

     }


     // here its save to assume that at least one element is there

     return *std::max_element(data.begin(), data.end());

 }


 std::string const& Result::context(char const* variableName) const {

     return mConfig.mContext.at(variableName);

 }


 std::string const& Result::context(std::string const& variableName) const {

     return mConfig.mContext.at(variableName);

 }


 Result::Measure Result::fromString(std::string const& str) {

     if (str == "elapsed") {

         return Measure::elapsed;

     }

     if (str == "iterations") {

         return Measure::iterations;

     }

     if (str == "pagefaults") {

         return Measure::pagefaults;

     }

     if (str == "cpucycles") {

         return Measure::cpucycles;

     }

     if (str == "contextswitches") {

         return Measure::contextswitches;

     }

     if (str == "instructions") {

         return Measure::instructions;

     }

     if (str == "branchinstructions") {

         return Measure::branchinstructions;

     }

     if (str == "branchmisses") {

         return Measure::branchmisses;

     }

     // not found, return _size

     return Measure::_size;

 }


 // Configuration of a microbenchmark.

 Bench::Bench() {

     mConfig.mOut = &std::cout;

 }


 Bench::Bench(Bench&&) noexcept = default;

 Bench& Bench::operator=(Bench&&) noexcept(ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE)) = default;

 Bench::Bench(Bench const&) = default;

 Bench& Bench::operator=(Bench const&) = default;

 Bench::~Bench() noexcept = default;


 double Bench::batch() const noexcept {

     return mConfig.mBatch;

 }


 double Bench::complexityN() const noexcept {

     return mConfig.mComplexityN;

 }


 // Set a baseline to compare it to. 100% it is exactly as fast as the baseline, >100% means it is faster than the baseline, <100%

 // means it is slower than the baseline.

 Bench& Bench::relative(bool isRelativeEnabled) noexcept {

     mConfig.mIsRelative = isRelativeEnabled;

     return *this;

 }

 bool Bench::relative() const noexcept {

     return mConfig.mIsRelative;

 }


 Bench& Bench::performanceCounters(bool showPerformanceCounters) noexcept {

     mConfig.mShowPerformanceCounters = showPerformanceCounters;

     return *this;

 }

 bool Bench::performanceCounters() const noexcept {

     return mConfig.mShowPerformanceCounters;

 }


 // Operation unit. Defaults to "op", could be e.g. "byte" for string processing.

 // If u differs from currently set unit, the stored results will be cleared.

 // Use singular (byte, not bytes).

 Bench& Bench::unit(char const* u) {

     if (u != mConfig.mUnit) {

         mResults.clear();

     }

     mConfig.mUnit = u;

     return *this;

 }


 Bench& Bench::unit(std::string const& u) {

     return unit(u.c_str());

 }


 std::string const& Bench::unit() const noexcept {

     return mConfig.mUnit;

 }


 Bench& Bench::timeUnit(std::chrono::duration<double> const& tu, std::string const& tuName) {

     mConfig.mTimeUnit = tu;

     mConfig.mTimeUnitName = tuName;

     return *this;

 }


 std::string const& Bench::timeUnitName() const noexcept {

     return mConfig.mTimeUnitName;

 }


 std::chrono::duration<double> const& Bench::timeUnit() const noexcept {

     return mConfig.mTimeUnit;

 }


 // If benchmarkTitle differs from currently set title, the stored results will be cleared.

 Bench& Bench::title(const char* benchmarkTitle) {

     if (benchmarkTitle != mConfig.mBenchmarkTitle) {

         mResults.clear();

     }

     mConfig.mBenchmarkTitle = benchmarkTitle;

     return *this;

 }

 Bench& Bench::title(std::string const& benchmarkTitle) {

     if (benchmarkTitle != mConfig.mBenchmarkTitle) {

         mResults.clear();

     }

     mConfig.mBenchmarkTitle = benchmarkTitle;

     return *this;

 }


 std::string const& Bench::title() const noexcept {

     return mConfig.mBenchmarkTitle;

 }


 Bench& Bench::name(const char* benchmarkName) {

     mConfig.mBenchmarkName = benchmarkName;

     return *this;

 }


 Bench& Bench::name(std::string const& benchmarkName) {

     mConfig.mBenchmarkName = benchmarkName;

     return *this;

 }


 std::string const& Bench::name() const noexcept {

     return mConfig.mBenchmarkName;

 }


 Bench& Bench::context(char const* variableName, char const* variableValue) {

     mConfig.mContext[variableName] = variableValue;

     return *this;

 }


 Bench& Bench::context(std::string const& variableName, std::string const& variableValue) {

     mConfig.mContext[variableName] = variableValue;

     return *this;

 }


 Bench& Bench::clearContext() {

     mConfig.mContext.clear();

     return *this;

 }


 // Number of epochs to evaluate. The reported result will be the median of evaluation of each epoch.

 Bench& Bench::epochs(size_t numEpochs) noexcept {

     mConfig.mNumEpochs = numEpochs;

     return *this;

 }

 size_t Bench::epochs() const noexcept {

     return mConfig.mNumEpochs;

 }


 // Desired evaluation time is a multiple of clock resolution. Default is to be 1000 times above this measurement precision.

 Bench& Bench::clockResolutionMultiple(size_t multiple) noexcept {

     mConfig.mClockResolutionMultiple = multiple;

     return *this;

 }

 size_t Bench::clockResolutionMultiple() const noexcept {

     return mConfig.mClockResolutionMultiple;

 }


 // Sets the maximum time each epoch should take. Default is 100ms.

 Bench& Bench::maxEpochTime(std::chrono::nanoseconds t) noexcept {

     mConfig.mMaxEpochTime = t;

     return *this;

 }

 std::chrono::nanoseconds Bench::maxEpochTime() const noexcept {

     return mConfig.mMaxEpochTime;

 }


 // Sets the maximum time each epoch should take. Default is 100ms.

 Bench& Bench::minEpochTime(std::chrono::nanoseconds t) noexcept {

     mConfig.mMinEpochTime = t;

     return *this;

 }

 std::chrono::nanoseconds Bench::minEpochTime() const noexcept {

     return mConfig.mMinEpochTime;

 }


 Bench& Bench::minEpochIterations(uint64_t numIters) noexcept {

     mConfig.mMinEpochIterations = (numIters == 0) ? 1 : numIters;

     return *this;

 }

 uint64_t Bench::minEpochIterations() const noexcept {

     return mConfig.mMinEpochIterations;

 }


 Bench& Bench::epochIterations(uint64_t numIters) noexcept {

     mConfig.mEpochIterations = numIters;

     return *this;

 }

 uint64_t Bench::epochIterations() const noexcept {

     return mConfig.mEpochIterations;

 }


 Bench& Bench::warmup(uint64_t numWarmupIters) noexcept {

     mConfig.mWarmup = numWarmupIters;

     return *this;

 }

 uint64_t Bench::warmup() const noexcept {

     return mConfig.mWarmup;

 }


 Bench& Bench::config(Config const& benchmarkConfig) {

     mConfig = benchmarkConfig;

     return *this;

 }

 Config const& Bench::config() const noexcept {

     return mConfig;

 }


 Bench& Bench::output(std::ostream* outstream) noexcept {

     mConfig.mOut = outstream;

     return *this;

 }


 ANKERL_NANOBENCH(NODISCARD) std::ostream* Bench::output() const noexcept {

     return mConfig.mOut;

 }


 std::vector<Result> const& Bench::results() const noexcept {

     return mResults;

 }


 Bench& Bench::render(char const* templateContent, std::ostream& os) {

     ::ankerl::nanobench::render(templateContent, *this, os);

     return *this;

 }


 Bench& Bench::render(std::string const& templateContent, std::ostream& os) {

     ::ankerl::nanobench::render(templateContent, *this, os);

     return *this;

 }


 std::vector<BigO> Bench::complexityBigO() const {

     std::vector<BigO> bigOs;

     auto rangeMeasure = BigO::collectRangeMeasure(mResults);

     bigOs.emplace_back("O(1)", rangeMeasure, [](double) {

         return 1.0;

     });

     bigOs.emplace_back("O(n)", rangeMeasure, [](double n) {

         return n;

     });

     bigOs.emplace_back("O(log n)", rangeMeasure, [](double n) {

         return std::log2(n);

     });

     bigOs.emplace_back("O(n log n)", rangeMeasure, [](double n) {

         return n * std::log2(n);

     });

     bigOs.emplace_back("O(n^2)", rangeMeasure, [](double n) {

         return n * n;

     });

     bigOs.emplace_back("O(n^3)", rangeMeasure, [](double n) {

         return n * n * n;

     });

     std::sort(bigOs.begin(), bigOs.end());

     return bigOs;

 }


 Rng::Rng()

     : mX(0)

     , mY(0) {

     std::random_device rd;

     std::uniform_int_distribution<uint64_t> dist;

     do {

         mX = dist(rd);

         mY = dist(rd);

     } while (mX == 0 && mY == 0);

 }


 ANKERL_NANOBENCH_NO_SANITIZE("integer", "undefined")

 uint64_t splitMix64(uint64_t& state) noexcept {

     uint64_t z = (state += UINT64_C(0x9e3779b97f4a7c15));

     z = (z ^ (z >> 30U)) * UINT64_C(0xbf58476d1ce4e5b9);

     z = (z ^ (z >> 27U)) * UINT64_C(0x94d049bb133111eb);

     return z ^ (z >> 31U);

 }


 // Seeded as described in romu paper (update april 2020)

 Rng::Rng(uint64_t seed) noexcept

     : mX(splitMix64(seed))

     , mY(splitMix64(seed)) {

     for (size_t i = 0; i < 10; ++i) {

         operator()();

     }

 }


 // only internally used to copy the RNG.

 Rng::Rng(uint64_t x, uint64_t y) noexcept

     : mX(x)

     , mY(y) {}


 Rng Rng::copy() const noexcept {

     return Rng{mX, mY};

 }


 Rng::Rng(std::vector<uint64_t> const& data)

     : mX(0)

     , mY(0) {

     if (data.size() != 2) {

         throw std::runtime_error("ankerl::nanobench::Rng::Rng: needed exactly 2 entries in data, but got " +

                                  detail::fmt::to_s(data.size()));

     }

     mX = data[0];

     mY = data[1];

 }


 std::vector<uint64_t> Rng::state() const {

     std::vector<uint64_t> data(2);

     data[0] = mX;

     data[1] = mY;

     return data;

 }


 BigO::RangeMeasure BigO::collectRangeMeasure(std::vector<Result> const& results) {

     BigO::RangeMeasure rangeMeasure;

     for (auto const& result : results) {

         if (result.config().mComplexityN > 0.0) {

             rangeMeasure.emplace_back(result.config().mComplexityN, result.median(Result::Measure::elapsed));

         }

     }

     return rangeMeasure;

 }


 BigO::BigO(std::string bigOName, RangeMeasure const& rangeMeasure)

     : mName(std::move(bigOName)) {


     // estimate the constant factor

     double sumRangeMeasure = 0.0;

     double sumRangeRange = 0.0;


     for (const auto& rm : rangeMeasure) {

         sumRangeMeasure += rm.first * rm.second;

         sumRangeRange += rm.first * rm.first;

     }

     mConstant = sumRangeMeasure / sumRangeRange;


     // calculate root mean square

     double err = 0.0;

     double sumMeasure = 0.0;

     for (const auto& rm : rangeMeasure) {

         auto diff = mConstant * rm.first - rm.second;

         err += diff * diff;


         sumMeasure += rm.second;

     }


     auto n = detail::d(rangeMeasure.size());

     auto mean = sumMeasure / n;

     mNormalizedRootMeanSquare = std::sqrt(err / n) / mean;

 }


 BigO::BigO(const char* bigOName, RangeMeasure const& rangeMeasure)

     : BigO(std::string(bigOName), rangeMeasure) {}


 std::string const& BigO::name() const noexcept {

     return mName;

 }


 double BigO::constant() const noexcept {

     return mConstant;

 }


 double BigO::normalizedRootMeanSquare() const noexcept {

     return mNormalizedRootMeanSquare;

 }


 bool BigO::operator<(BigO const& other) const noexcept {

     return std::tie(mNormalizedRootMeanSquare, mName) < std::tie(other.mNormalizedRootMeanSquare, other.mName);

 }


 std::ostream& operator<<(std::ostream& os, BigO const& bigO) {

     return os << bigO.constant() << " * " << bigO.name() << ", rms=" << bigO.normalizedRootMeanSquare();

 }


 std::ostream& operator<<(std::ostream& os, std::vector<ankerl::nanobench::BigO> const& bigOs) {

     detail::fmt::StreamStateRestorer const restorer(os);

     os << std::endl << "|   coefficient |   err% | complexity" << std::endl << "|--------------:|-------:|------------" << std::endl;

     for (auto const& bigO : bigOs) {

         os << "|" << std::setw(14) << std::setprecision(7) << std::scientific << bigO.constant() << " ";

         os << "|" << detail::fmt::Number(6, 1, bigO.normalizedRootMeanSquare() * 100.0) << "% ";

         os << "| " << bigO.name();

         os << std::endl;

     }

     return os;

 }


 } // namespace nanobench

 } // namespace ankerl


 #endif // ANKERL_NANOBENCH_IMPLEMENT

 #endif // ANKERL_NANOBENCH_H_INCLUDED

ret
int ret
Definition: bitcoin-cli.cpp:1303

flags
int flags
Definition: bitcoin-tx.cpp:533

ankerl::nanobench::Bench
Main entry point to nanobench's benchmarking facility.
Definition: nanobench.h:627

ankerl::nanobench::Bench::~Bench
~Bench() noexcept

ankerl::nanobench::Bench::run
Bench & run(char const *benchmarkName, Op &&op)
Repeatedly calls op() based on the configuration, and performs measurements.
Definition: nanobench.h:1234

ankerl::nanobench::Bench::doNotOptimizeAway
ANKERL_NANOBENCH(NODISCARD) std Bench & doNotOptimizeAway(Arg &&arg)
Retrieves all benchmark results collected by the bench object so far.

ankerl::nanobench::Bench::batch
Bench & batch(T b) noexcept
Sets the batch size.
Definition: nanobench.h:1258

ankerl::nanobench::Bench::Bench
Bench()
Creates a new benchmark for configuration and running of benchmarks.

ankerl::nanobench::Bench::operator=
Bench & operator=(Bench &&other) noexcept(ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE))

ankerl::nanobench::Bench::complexityBigO
std::vector< BigO > complexityBigO() const

ankerl::nanobench::Bench::Bench
Bench(Bench &&other) noexcept

ankerl::nanobench::Bench::operator=
Bench & operator=(Bench const &other)

ankerl::nanobench::Bench::Bench
Bench(Bench const &other)

ankerl::nanobench::Bench::complexityN
Bench & complexityN(T n) noexcept
Definition: nanobench.h:1265

ankerl::nanobench::BigO
Definition: nanobench.h:1111

ankerl::nanobench::BigO::mapRangeMeasure
static RangeMeasure mapRangeMeasure(RangeMeasure data, Op op)
Definition: nanobench.h:1116

ankerl::nanobench::BigO::BigO
BigO(std::string bigOName, RangeMeasure const &scaledRangeMeasure)

ankerl::nanobench::BigO::RangeMeasure
std::vector< std::pair< double, double > > RangeMeasure
Definition: nanobench.h:1113

ankerl::nanobench::BigO::BigO
BigO(char const *bigOName, RangeMeasure const &rangeMeasure, Op rangeToN)
Definition: nanobench.h:1126

ankerl::nanobench::BigO::collectRangeMeasure
static RangeMeasure collectRangeMeasure(std::vector< Result > const &results)

ankerl::nanobench::BigO::BigO
BigO(std::string bigOName, RangeMeasure const &rangeMeasure, Op rangeToN)
Definition: nanobench.h:1130

ankerl::nanobench::BigO::BigO
BigO(char const *bigOName, RangeMeasure const &scaledRangeMeasure)

ankerl::nanobench::Result
Definition: nanobench.h:419

ankerl::nanobench::Result::operator=
Result & operator=(Result &&other) noexcept(ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE))

ankerl::nanobench::Result::Result
Result(Config benchmarkConfig)

ankerl::nanobench::Result::fromString
static Measure fromString(std::string const &str)

ankerl::nanobench::Result::add
void add(Clock::duration totalElapsed, uint64_t iters, detail::PerformanceCounters const &pc)

ankerl::nanobench::Result::Result
Result(Result &&other) noexcept

ankerl::nanobench::Result::~Result
~Result()

ankerl::nanobench::Result::Measure
Measure
Definition: nanobench.h:421

ankerl::nanobench::Result::Measure::branchinstructions
@ branchinstructions

ankerl::nanobench::Result::Measure::iterations
@ iterations

ankerl::nanobench::Result::Measure::cpucycles
@ cpucycles

ankerl::nanobench::Result::Measure::_size
@ _size

ankerl::nanobench::Result::Measure::elapsed
@ elapsed

ankerl::nanobench::Result::Measure::branchmisses
@ branchmisses

ankerl::nanobench::Result::Measure::instructions
@ instructions

ankerl::nanobench::Result::ANKERL_NANOBENCH
ANKERL_NANOBENCH(NODISCARD) Config const &config() const noexcept

ankerl::nanobench::Result::Result
Result(Result const &other)

ankerl::nanobench::Result::operator=
Result & operator=(Result const &other)

ankerl::nanobench::Rng
An extremely fast random generator.
Definition: nanobench.h:488

ankerl::nanobench::Rng::min
static constexpr uint64_t() min()

ankerl::nanobench::Rng::Rng
Rng(Rng const &)=delete
As a safety precaution, we don't allow copying.

ankerl::nanobench::Rng::shuffle
void shuffle(Container &container) noexcept
Shuffles all entries in the given container.
Definition: nanobench.h:1191

ankerl::nanobench::Rng::Rng
Rng(Rng &&) noexcept=default

ankerl::nanobench::Rng::operator=
Rng & operator=(Rng const &)=delete
Same as Rng(Rng const&), we don't allow assignment.

ankerl::nanobench::Rng::max
static constexpr uint64_t() max()

ankerl::nanobench::Rng::uniform01
double uniform01() noexcept
Provides a random uniform double value between 0 and 1.
Definition: nanobench.h:1181

ankerl::nanobench::Rng::result_type
uint64_t result_type
This RNG provides 64bit randomness.
Definition: nanobench.h:493

ankerl::nanobench::detail::IterationLogic
Definition: nanobench.h:1060

ankerl::nanobench::detail::IterationLogic::moveResultTo
void moveResultTo(std::vector< Result > &results) noexcept

ankerl::nanobench::detail::IterationLogic::add
void add(std::chrono::nanoseconds elapsed, PerformanceCounters const &pc) noexcept

ankerl::nanobench::detail::IterationLogic::IterationLogic
IterationLogic(IterationLogic &&)=delete

ankerl::nanobench::detail::IterationLogic::operator=
IterationLogic & operator=(IterationLogic &&)=delete

ankerl::nanobench::detail::IterationLogic::~IterationLogic
~IterationLogic()

ankerl::nanobench::detail::IterationLogic::ANKERL_NANOBENCH
ANKERL_NANOBENCH(NODISCARD) uint64_t numIters() const noexcept

ankerl::nanobench::detail::IterationLogic::IterationLogic
IterationLogic(IterationLogic const &)=delete

ankerl::nanobench::detail::IterationLogic::operator=
IterationLogic & operator=(IterationLogic const &)=delete

ankerl::nanobench::detail::IterationLogic::IterationLogic
IterationLogic(Bench const &bench)

ankerl::nanobench::detail::PerformanceCounters
Definition: nanobench.h:1080

ankerl::nanobench::detail::PerformanceCounters::operator=
PerformanceCounters & operator=(PerformanceCounters const &)=delete

ankerl::nanobench::detail::PerformanceCounters::PerformanceCounters
PerformanceCounters(PerformanceCounters const &)=delete

ankerl::nanobench::detail::PerformanceCounters::operator=
PerformanceCounters & operator=(PerformanceCounters &&)=delete

ankerl::nanobench::detail::PerformanceCounters::beginMeasure
void beginMeasure()

ankerl::nanobench::detail::PerformanceCounters::~PerformanceCounters
~PerformanceCounters()

ankerl::nanobench::detail::PerformanceCounters::updateResults
void updateResults(uint64_t numIters)

ankerl::nanobench::detail::PerformanceCounters::ANKERL_NANOBENCH
ANKERL_NANOBENCH(NODISCARD) PerfCountSet< uint64_t > const &val() const noexcept

ankerl::nanobench::detail::PerformanceCounters::PerformanceCounters
PerformanceCounters(PerformanceCounters &&)=delete

ankerl::nanobench::detail::PerformanceCounters::PerformanceCounters
PerformanceCounters()

ankerl::nanobench::detail::PerformanceCounters::endMeasure
void endMeasure()

sum
volatile double sum
Definition: examples.cpp:10

T
#define T(expected, seed, data)

ankerl::nanobench::detail::performanceCounters
PerformanceCounters & performanceCounters()

ankerl::nanobench::detail::doNotOptimizeAway
void doNotOptimizeAway(T &val)
Definition: nanobench.h:1045

ankerl::nanobench::detail::doNotOptimizeAway
void doNotOptimizeAway(T const &val)
Definition: nanobench.h:1039

ankerl::nanobench::templates::json
char const  * json() noexcept
Template to generate JSON data.

ankerl::nanobench::templates::pyperf
char const  * pyperf() noexcept
Output in pyperf compatible JSON format, which can be used for more analyzation.

ankerl::nanobench::templates::csv
char const  * csv() noexcept
CSV data for the benchmark results.

ankerl::nanobench::templates::htmlBoxplot
char const  * htmlBoxplot() noexcept
HTML output that uses plotly to generate an interactive boxplot chart. See the tutorial for an exampl...

ankerl::nanobench::render
void render(char const *mustacheTemplate, Bench const &bench, std::ostream &out)
Renders output from a mustache-like template and benchmark results.

ankerl::nanobench::operator<<
std::ostream & operator<<(std::ostream &os, std::vector< ankerl::nanobench::BigO > const &bigOs)

ankerl::nanobench::Clock
std::conditional< std::chrono::high_resolution_clock::is_steady, std::chrono::high_resolution_clock, std::chrono::steady_clock >::type Clock
Definition: nanobench.h:133

ankerl::nanobench::render
void render(std::string const &mustacheTemplate, std::vector< Result > const &results, std::ostream &out)

ankerl::nanobench::doNotOptimizeAway
void doNotOptimizeAway(Arg &&arg)
Makes sure none of the given arguments are optimized away by the compiler.
Definition: nanobench.h:1279

ankerl::nanobench::operator<<
std::ostream & operator<<(std::ostream &os, BigO const &bigO)

ankerl
Definition: nanobench.h:129

detail
Definition: uint256.h:94

std
Definition: setup_common.h:42

tests_wycheproof_generate.out
string out
Definition: tests_wycheproof_generate.py:28

wallet::feebumper::Result
Result
Definition: feebumper.h:24

ANKERL_NANOBENCH_LOG
#define ANKERL_NANOBENCH_LOG(x)
Definition: nanobench.h:87

ANKERL_NANOBENCH_NO_SANITIZE
#define ANKERL_NANOBENCH_NO_SANITIZE(...)
Definition: nanobench.h:106

ANKERL_NANOBENCH
#define ANKERL_NANOBENCH(x)
Definition: nanobench.h:49

operator==
bool operator==(const CNetAddr &a, const CNetAddr &b)
Definition: netaddress.cpp:607

operator<
bool operator<(const CNetAddr &a, const CNetAddr &b)
Definition: netaddress.cpp:612

name
const char * name
Definition: rest.cpp:49

stop
static RPCHelpMan stop()
Definition: server.cpp:171

ByteUnit::m
@ m

ByteUnit::k
@ k

ByteUnit::t
@ t

ankerl::nanobench::Config
Definition: nanobench.h:386

ankerl::nanobench::Config::~Config
~Config()

ankerl::nanobench::Config::Config
Config(Config const &other)

ankerl::nanobench::Config::Config
Config(Config &&other) noexcept

ankerl::nanobench::Config::Config
Config()

ankerl::nanobench::Config::operator=
Config & operator=(Config const &other)

ankerl::nanobench::Config::operator=
Config & operator=(Config &&other) noexcept(ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE))

ankerl::nanobench::detail::PerfCountSet
Definition: nanobench.h:374

ankerl::nanobench::detail::PerfCountSet::instructions
T instructions
Definition: nanobench.h:378

ankerl::nanobench::detail::PerfCountSet::pageFaults
T pageFaults
Definition: nanobench.h:375

ankerl::nanobench::detail::PerfCountSet::branchInstructions
T branchInstructions
Definition: nanobench.h:379

ankerl::nanobench::detail::PerfCountSet::branchMisses
T branchMisses
Definition: nanobench.h:380

ankerl::nanobench::detail::PerfCountSet::cpuCycles
T cpuCycles
Definition: nanobench.h:376

ankerl::nanobench::detail::PerfCountSet::contextSwitches
T contextSwitches
Definition: nanobench.h:377

rotl
static SECP256K1_INLINE uint64_t rotl(const uint64_t x, int k)
Definition: testrand_impl.h:39

count
static int count
Definition: tests_exhaustive.c:34