Bitcoin Core  27.99.0
P2P Digital Currency
nanobench.h
Go to the documentation of this file.
1 // __ _ _______ __ _ _____ ______ _______ __ _ _______ _ _
2 // | \ | |_____| | \ | | | |_____] |______ | \ | | |_____|
3 // | \_| | | | \_| |_____| |_____] |______ | \_| |_____ | |
4 //
5 // Microbenchmark framework for C++11/14/17/20
6 // https://github.com/martinus/nanobench
7 //
8 // Licensed under the MIT License <http://opensource.org/licenses/MIT>.
9 // SPDX-License-Identifier: MIT
10 // Copyright (c) 2019-2023 Martin Leitner-Ankerl <martin.ankerl@gmail.com>
11 //
12 // Permission is hereby granted, free of charge, to any person obtaining a copy
13 // of this software and associated documentation files (the "Software"), to deal
14 // in the Software without restriction, including without limitation the rights
15 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16 // copies of the Software, and to permit persons to whom the Software is
17 // furnished to do so, subject to the following conditions:
18 //
19 // The above copyright notice and this permission notice shall be included in all
20 // copies or substantial portions of the Software.
21 //
22 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28 // SOFTWARE.
29 
30 #ifndef ANKERL_NANOBENCH_H_INCLUDED
31 #define ANKERL_NANOBENCH_H_INCLUDED
32 
33 // see https://semver.org/
34 #define ANKERL_NANOBENCH_VERSION_MAJOR 4 // incompatible API changes
35 #define ANKERL_NANOBENCH_VERSION_MINOR 3 // backwards-compatible changes
36 #define ANKERL_NANOBENCH_VERSION_PATCH 11 // backwards-compatible bug fixes
37 
39 // public facing api - as minimal as possible
41 
42 #include <chrono> // high_resolution_clock
43 #include <cstring> // memcpy
44 #include <iosfwd> // for std::ostream* custom output target in Config
45 #include <string> // all names
46 #include <unordered_map> // holds context information of results
47 #include <vector> // holds all results
48 
49 #define ANKERL_NANOBENCH(x) ANKERL_NANOBENCH_PRIVATE_##x()
50 
51 #define ANKERL_NANOBENCH_PRIVATE_CXX() __cplusplus
52 #define ANKERL_NANOBENCH_PRIVATE_CXX98() 199711L
53 #define ANKERL_NANOBENCH_PRIVATE_CXX11() 201103L
54 #define ANKERL_NANOBENCH_PRIVATE_CXX14() 201402L
55 #define ANKERL_NANOBENCH_PRIVATE_CXX17() 201703L
56 
57 #if ANKERL_NANOBENCH(CXX) >= ANKERL_NANOBENCH(CXX17)
58 # define ANKERL_NANOBENCH_PRIVATE_NODISCARD() [[nodiscard]]
59 #else
60 # define ANKERL_NANOBENCH_PRIVATE_NODISCARD()
61 #endif
62 
63 #if defined(__clang__)
64 # define ANKERL_NANOBENCH_PRIVATE_IGNORE_PADDED_PUSH() \
65  _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wpadded\"")
66 # define ANKERL_NANOBENCH_PRIVATE_IGNORE_PADDED_POP() _Pragma("clang diagnostic pop")
67 #else
68 # define ANKERL_NANOBENCH_PRIVATE_IGNORE_PADDED_PUSH()
69 # define ANKERL_NANOBENCH_PRIVATE_IGNORE_PADDED_POP()
70 #endif
71 
72 #if defined(__GNUC__)
73 # define ANKERL_NANOBENCH_PRIVATE_IGNORE_EFFCPP_PUSH() _Pragma("GCC diagnostic push") _Pragma("GCC diagnostic ignored \"-Weffc++\"")
74 # define ANKERL_NANOBENCH_PRIVATE_IGNORE_EFFCPP_POP() _Pragma("GCC diagnostic pop")
75 #else
76 # define ANKERL_NANOBENCH_PRIVATE_IGNORE_EFFCPP_PUSH()
77 # define ANKERL_NANOBENCH_PRIVATE_IGNORE_EFFCPP_POP()
78 #endif
79 
80 #if defined(ANKERL_NANOBENCH_LOG_ENABLED)
81 # include <iostream>
82 # define ANKERL_NANOBENCH_LOG(x) \
83  do { \
84  std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << x << std::endl; \
85  } while (0)
86 #else
87 # define ANKERL_NANOBENCH_LOG(x) \
88  do { \
89  } while (0)
90 #endif
91 
92 #define ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS() 0
93 #if defined(__linux__) && !defined(ANKERL_NANOBENCH_DISABLE_PERF_COUNTERS)
94 # include <linux/version.h>
95 # if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 3, 0)
96 // PERF_COUNT_HW_REF_CPU_CYCLES only available since kernel 3.3
97 // PERF_FLAG_FD_CLOEXEC since kernel 3.14
98 # undef ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS
99 # define ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS() 1
100 # endif
101 #endif
102 
103 #if defined(__clang__)
104 # define ANKERL_NANOBENCH_NO_SANITIZE(...) __attribute__((no_sanitize(__VA_ARGS__)))
105 #else
106 # define ANKERL_NANOBENCH_NO_SANITIZE(...)
107 #endif
108 
109 #if defined(_MSC_VER)
110 # define ANKERL_NANOBENCH_PRIVATE_NOINLINE() __declspec(noinline)
111 #else
112 # define ANKERL_NANOBENCH_PRIVATE_NOINLINE() __attribute__((noinline))
113 #endif
114 
115 // workaround missing "is_trivially_copyable" in g++ < 5.0
116 // See https://stackoverflow.com/a/31798726/48181
117 #if defined(__GNUC__) && __GNUC__ < 5
118 # define ANKERL_NANOBENCH_IS_TRIVIALLY_COPYABLE(...) __has_trivial_copy(__VA_ARGS__)
119 #else
120 # define ANKERL_NANOBENCH_IS_TRIVIALLY_COPYABLE(...) std::is_trivially_copyable<__VA_ARGS__>::value
121 #endif
122 
123 // noexcept may be missing for std::string.
124 // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58265
125 #define ANKERL_NANOBENCH_PRIVATE_NOEXCEPT_STRING_MOVE() std::is_nothrow_move_assignable<std::string>::value
126 
127 // declarations ///////////////////////////////////////////////////////////////////////////////////
128 
129 namespace ankerl {
130 namespace nanobench {
131 
132 using Clock = std::conditional<std::chrono::high_resolution_clock::is_steady, std::chrono::high_resolution_clock,
133  std::chrono::steady_clock>::type;
134 class Bench;
135 struct Config;
136 class Result;
137 class Rng;
138 class BigO;
139 
290 void render(char const* mustacheTemplate, Bench const& bench, std::ostream& out);
291 void render(std::string const& mustacheTemplate, Bench const& bench, std::ostream& out);
292 
301 void render(char const* mustacheTemplate, std::vector<Result> const& results, std::ostream& out);
302 void render(std::string const& mustacheTemplate, std::vector<Result> const& results, std::ostream& out);
303 
304 // Contains mustache-like templates
305 namespace templates {
306 
316 char const* csv() noexcept;
317 
328 char const* htmlBoxplot() noexcept;
329 
336 char const* pyperf() noexcept;
337 
347 char const* json() noexcept;
348 
349 } // namespace templates
350 
351 namespace detail {
352 
353 template <typename T>
354 struct PerfCountSet;
355 
356 class IterationLogic;
357 class PerformanceCounters;
358 
359 #if ANKERL_NANOBENCH(PERF_COUNTERS)
360 class LinuxPerformanceCounters;
361 #endif
362 
363 } // namespace detail
364 } // namespace nanobench
365 } // namespace ankerl
366 
367 // definitions ////////////////////////////////////////////////////////////////////////////////////
368 
369 namespace ankerl {
370 namespace nanobench {
371 namespace detail {
372 
373 template <typename T>
374 struct PerfCountSet {
381 };
382 
383 } // namespace detail
384 
385 ANKERL_NANOBENCH(IGNORE_PADDED_PUSH)
386 struct Config {
387  // actual benchmark config
388  std::string mBenchmarkTitle = "benchmark"; // NOLINT(misc-non-private-member-variables-in-classes)
389  std::string mBenchmarkName = "noname"; // NOLINT(misc-non-private-member-variables-in-classes)
390  std::string mUnit = "op"; // NOLINT(misc-non-private-member-variables-in-classes)
391  double mBatch = 1.0; // NOLINT(misc-non-private-member-variables-in-classes)
392  double mComplexityN = -1.0; // NOLINT(misc-non-private-member-variables-in-classes)
393  size_t mNumEpochs = 11; // NOLINT(misc-non-private-member-variables-in-classes)
394  size_t mClockResolutionMultiple = static_cast<size_t>(1000); // NOLINT(misc-non-private-member-variables-in-classes)
395  std::chrono::nanoseconds mMaxEpochTime = std::chrono::milliseconds(100); // NOLINT(misc-non-private-member-variables-in-classes)
396  std::chrono::nanoseconds mMinEpochTime = std::chrono::milliseconds(1); // NOLINT(misc-non-private-member-variables-in-classes)
397  uint64_t mMinEpochIterations{1}; // NOLINT(misc-non-private-member-variables-in-classes)
398  // If not 0, run *exactly* these number of iterations per epoch.
399  uint64_t mEpochIterations{0}; // NOLINT(misc-non-private-member-variables-in-classes)
400  uint64_t mWarmup = 0; // NOLINT(misc-non-private-member-variables-in-classes)
401  std::ostream* mOut = nullptr; // NOLINT(misc-non-private-member-variables-in-classes)
402  std::chrono::duration<double> mTimeUnit = std::chrono::nanoseconds{1}; // NOLINT(misc-non-private-member-variables-in-classes)
403  std::string mTimeUnitName = "ns"; // NOLINT(misc-non-private-member-variables-in-classes)
404  bool mShowPerformanceCounters = true; // NOLINT(misc-non-private-member-variables-in-classes)
405  bool mIsRelative = false; // NOLINT(misc-non-private-member-variables-in-classes)
406  std::unordered_map<std::string, std::string> mContext{}; // NOLINT(misc-non-private-member-variables-in-classes)
407 
410  Config& operator=(Config const& other);
411  Config& operator=(Config&& other) noexcept(ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE));
412  Config(Config const& other);
413  Config(Config&& other) noexcept;
414 };
415 ANKERL_NANOBENCH(IGNORE_PADDED_POP)
416 
417 // Result returned after a benchmark has finished. Can be used as a baseline for relative().
418 ANKERL_NANOBENCH(IGNORE_PADDED_PUSH)
419 class Result {
420 public:
421  enum class Measure : size_t {
422  elapsed,
423  iterations,
424  pagefaults,
425  cpucycles,
426  contextswitches,
427  instructions,
428  branchinstructions,
429  branchmisses,
430  _size
431  };
432 
433  explicit Result(Config benchmarkConfig);
434 
436  Result& operator=(Result const& other);
437  Result& operator=(Result&& other) noexcept(ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE));
438  Result(Result const& other);
439  Result(Result&& other) noexcept;
440 
441  // adds new measurement results
442  // all values are scaled by iters (except iters...)
443  void add(Clock::duration totalElapsed, uint64_t iters, detail::PerformanceCounters const& pc);
444 
445  ANKERL_NANOBENCH(NODISCARD) Config const& config() const noexcept;
446 
447  ANKERL_NANOBENCH(NODISCARD) double median(Measure m) const;
448  ANKERL_NANOBENCH(NODISCARD) double medianAbsolutePercentError(Measure m) const;
449  ANKERL_NANOBENCH(NODISCARD) double average(Measure m) const;
450  ANKERL_NANOBENCH(NODISCARD) double sum(Measure m) const noexcept;
451  ANKERL_NANOBENCH(NODISCARD) double sumProduct(Measure m1, Measure m2) const noexcept;
452  ANKERL_NANOBENCH(NODISCARD) double minimum(Measure m) const noexcept;
453  ANKERL_NANOBENCH(NODISCARD) double maximum(Measure m) const noexcept;
454  ANKERL_NANOBENCH(NODISCARD) std::string const& context(char const* variableName) const;
455  ANKERL_NANOBENCH(NODISCARD) std::string const& context(std::string const& variableName) const;
456 
457  ANKERL_NANOBENCH(NODISCARD) bool has(Measure m) const noexcept;
458  ANKERL_NANOBENCH(NODISCARD) double get(size_t idx, Measure m) const;
459  ANKERL_NANOBENCH(NODISCARD) bool empty() const noexcept;
460  ANKERL_NANOBENCH(NODISCARD) size_t size() const noexcept;
461 
462  // Finds string, if not found, returns _size.
463  static Measure fromString(std::string const& str);
464 
465 private:
466  Config mConfig{};
467  std::vector<std::vector<double>> mNameToMeasurements{};
468 };
469 ANKERL_NANOBENCH(IGNORE_PADDED_POP)
470 
471 
488 class Rng final {
489 public:
493  using result_type = uint64_t;
494 
495  static constexpr uint64_t(min)();
496  static constexpr uint64_t(max)();
497 
503  Rng(Rng const&) = delete;
504 
508  Rng& operator=(Rng const&) = delete;
509 
510  // moving is ok
511  Rng(Rng&&) noexcept = default;
512  Rng& operator=(Rng&&) noexcept = default;
513  ~Rng() noexcept = default;
514 
522  Rng();
523 
540  explicit Rng(uint64_t seed) noexcept;
541  Rng(uint64_t x, uint64_t y) noexcept;
542  explicit Rng(std::vector<uint64_t> const& data);
543 
547  ANKERL_NANOBENCH(NODISCARD) Rng copy() const noexcept;
548 
556  inline uint64_t operator()() noexcept;
557 
558  // This is slightly biased. See
559 
574  inline uint32_t bounded(uint32_t range) noexcept;
575 
576  // random double in range [0, 1(
577  // see http://prng.di.unimi.it/
578 
585  inline double uniform01() noexcept;
586 
594  template <typename Container>
595  void shuffle(Container& container) noexcept;
596 
603  ANKERL_NANOBENCH(NODISCARD) std::vector<uint64_t> state() const;
604 
605 private:
606  static constexpr uint64_t rotl(uint64_t x, unsigned k) noexcept;
607 
608  uint64_t mX;
609  uint64_t mY;
610 };
611 
626 ANKERL_NANOBENCH(IGNORE_PADDED_PUSH)
627 class Bench {
628 public:
632  Bench();
633 
634  Bench(Bench&& other) noexcept;
635  Bench& operator=(Bench&& other) noexcept(ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE));
636  Bench(Bench const& other);
637  Bench& operator=(Bench const& other);
638  ~Bench() noexcept;
639 
658  template <typename Op>
659  ANKERL_NANOBENCH(NOINLINE)
660  Bench& run(char const* benchmarkName, Op&& op);
661 
662  template <typename Op>
663  ANKERL_NANOBENCH(NOINLINE)
664  Bench& run(std::string const& benchmarkName, Op&& op);
665 
670  template <typename Op>
671  ANKERL_NANOBENCH(NOINLINE)
672  Bench& run(Op&& op);
673 
679  Bench& title(char const* benchmarkTitle);
680  Bench& title(std::string const& benchmarkTitle);
681 
685  ANKERL_NANOBENCH(NODISCARD) std::string const& title() const noexcept;
686 
688  Bench& name(char const* benchmarkName);
689  Bench& name(std::string const& benchmarkName);
690  ANKERL_NANOBENCH(NODISCARD) std::string const& name() const noexcept;
691 
704  Bench& context(char const* variableName, char const* variableValue);
705  Bench& context(std::string const& variableName, std::string const& variableValue);
706 
715  Bench& clearContext();
716 
726  template <typename T>
727  Bench& batch(T b) noexcept;
728  ANKERL_NANOBENCH(NODISCARD) double batch() const noexcept;
729 
738  Bench& unit(char const* unit);
739  Bench& unit(std::string const& unit);
740  ANKERL_NANOBENCH(NODISCARD) std::string const& unit() const noexcept;
741 
751  Bench& timeUnit(std::chrono::duration<double> const& tu, std::string const& tuName);
752  ANKERL_NANOBENCH(NODISCARD) std::string const& timeUnitName() const noexcept;
753  ANKERL_NANOBENCH(NODISCARD) std::chrono::duration<double> const& timeUnit() const noexcept;
754 
762  Bench& output(std::ostream* outstream) noexcept;
763  ANKERL_NANOBENCH(NODISCARD) std::ostream* output() const noexcept;
764 
785  Bench& clockResolutionMultiple(size_t multiple) noexcept;
786  ANKERL_NANOBENCH(NODISCARD) size_t clockResolutionMultiple() const noexcept;
787 
803  Bench& epochs(size_t numEpochs) noexcept;
804  ANKERL_NANOBENCH(NODISCARD) size_t epochs() const noexcept;
805 
816  Bench& maxEpochTime(std::chrono::nanoseconds t) noexcept;
817  ANKERL_NANOBENCH(NODISCARD) std::chrono::nanoseconds maxEpochTime() const noexcept;
818 
829  Bench& minEpochTime(std::chrono::nanoseconds t) noexcept;
830  ANKERL_NANOBENCH(NODISCARD) std::chrono::nanoseconds minEpochTime() const noexcept;
831 
842  Bench& minEpochIterations(uint64_t numIters) noexcept;
843  ANKERL_NANOBENCH(NODISCARD) uint64_t minEpochIterations() const noexcept;
844 
851  Bench& epochIterations(uint64_t numIters) noexcept;
852  ANKERL_NANOBENCH(NODISCARD) uint64_t epochIterations() const noexcept;
853 
863  Bench& warmup(uint64_t numWarmupIters) noexcept;
864  ANKERL_NANOBENCH(NODISCARD) uint64_t warmup() const noexcept;
865 
883  Bench& relative(bool isRelativeEnabled) noexcept;
884  ANKERL_NANOBENCH(NODISCARD) bool relative() const noexcept;
885 
894  Bench& performanceCounters(bool showPerformanceCounters) noexcept;
895  ANKERL_NANOBENCH(NODISCARD) bool performanceCounters() const noexcept;
896 
905  ANKERL_NANOBENCH(NODISCARD) std::vector<Result> const& results() const noexcept;
906 
914  template <typename Arg>
916 
931  template <typename T>
932  Bench& complexityN(T n) noexcept;
933  ANKERL_NANOBENCH(NODISCARD) double complexityN() const noexcept;
934 
966  std::vector<BigO> complexityBigO() const;
967 
991  template <typename Op>
992  BigO complexityBigO(char const* name, Op op) const;
993 
994  template <typename Op>
995  BigO complexityBigO(std::string const& name, Op op) const;
996 
1004  Bench& render(char const* templateContent, std::ostream& os);
1005  Bench& render(std::string const& templateContent, std::ostream& os);
1006 
1007  Bench& config(Config const& benchmarkConfig);
1008  ANKERL_NANOBENCH(NODISCARD) Config const& config() const noexcept;
1009 
1010 private:
1011  Config mConfig{};
1012  std::vector<Result> mResults{};
1013 };
1014 ANKERL_NANOBENCH(IGNORE_PADDED_POP)
1015 
1016 
1022 template <typename Arg>
1023 void doNotOptimizeAway(Arg&& arg);
1024 
1025 namespace detail {
1026 
1027 #if defined(_MSC_VER)
1028 void doNotOptimizeAwaySink(void const*);
1029 
1030 template <typename T>
1031 void doNotOptimizeAway(T const& val);
1032 
1033 #else
1034 
1035 // These assembly magic is directly from what Google Benchmark is doing. I have previously used what facebook's folly was doing, but
1036 // this seemed to have compilation problems in some cases. Google Benchmark seemed to be the most well tested anyways.
1037 // see https://github.com/google/benchmark/blob/v1.7.1/include/benchmark/benchmark.h#L443-L446
1038 template <typename T>
1039 void doNotOptimizeAway(T const& val) {
1040  // NOLINTNEXTLINE(hicpp-no-assembler)
1041  asm volatile("" : : "r,m"(val) : "memory");
1042 }
1043 
1044 template <typename T>
1045 void doNotOptimizeAway(T& val) {
1046 # if defined(__clang__)
1047  // NOLINTNEXTLINE(hicpp-no-assembler)
1048  asm volatile("" : "+r,m"(val) : : "memory");
1049 # else
1050  // NOLINTNEXTLINE(hicpp-no-assembler)
1051  asm volatile("" : "+m,r"(val) : : "memory");
1052 # endif
1053 }
1054 #endif
1055 
1056 // internally used, but visible because run() is templated.
1057 // Not movable/copy-able, so we simply use a pointer instead of unique_ptr. This saves us from
1058 // having to include <memory>, and the template instantiation overhead of unique_ptr which is unfortunately quite significant.
1059 ANKERL_NANOBENCH(IGNORE_EFFCPP_PUSH)
1061 public:
1062  explicit IterationLogic(Bench const& bench);
1065  IterationLogic(IterationLogic const&) = delete;
1068 
1069  ANKERL_NANOBENCH(NODISCARD) uint64_t numIters() const noexcept;
1070  void add(std::chrono::nanoseconds elapsed, PerformanceCounters const& pc) noexcept;
1071  void moveResultTo(std::vector<Result>& results) noexcept;
1072 
1073 private:
1074  struct Impl;
1075  Impl* mPimpl;
1076 };
1077 ANKERL_NANOBENCH(IGNORE_EFFCPP_POP)
1078 
1079 ANKERL_NANOBENCH(IGNORE_PADDED_PUSH)
1081 public:
1086 
1089 
1091  void endMeasure();
1092  void updateResults(uint64_t numIters);
1093 
1094  ANKERL_NANOBENCH(NODISCARD) PerfCountSet<uint64_t> const& val() const noexcept;
1095  ANKERL_NANOBENCH(NODISCARD) PerfCountSet<bool> const& has() const noexcept;
1096 
1097 private:
1098 #if ANKERL_NANOBENCH(PERF_COUNTERS)
1099  LinuxPerformanceCounters* mPc = nullptr;
1100 #endif
1103 };
1104 ANKERL_NANOBENCH(IGNORE_PADDED_POP)
1105 
1106 // Gets the singleton
1108 
1109 } // namespace detail
1110 
1111 class BigO {
1112 public:
1113  using RangeMeasure = std::vector<std::pair<double, double>>;
1114 
1115  template <typename Op>
1117  for (auto& rangeMeasure : data) {
1118  rangeMeasure.first = op(rangeMeasure.first);
1119  }
1120  return data;
1121  }
1122 
1123  static RangeMeasure collectRangeMeasure(std::vector<Result> const& results);
1124 
1125  template <typename Op>
1126  BigO(char const* bigOName, RangeMeasure const& rangeMeasure, Op rangeToN)
1127  : BigO(bigOName, mapRangeMeasure(rangeMeasure, rangeToN)) {}
1128 
1129  template <typename Op>
1130  BigO(std::string bigOName, RangeMeasure const& rangeMeasure, Op rangeToN)
1131  : BigO(std::move(bigOName), mapRangeMeasure(rangeMeasure, rangeToN)) {}
1132 
1133  BigO(char const* bigOName, RangeMeasure const& scaledRangeMeasure);
1134  BigO(std::string bigOName, RangeMeasure const& scaledRangeMeasure);
1135  ANKERL_NANOBENCH(NODISCARD) std::string const& name() const noexcept;
1136  ANKERL_NANOBENCH(NODISCARD) double constant() const noexcept;
1137  ANKERL_NANOBENCH(NODISCARD) double normalizedRootMeanSquare() const noexcept;
1138  ANKERL_NANOBENCH(NODISCARD) bool operator<(BigO const& other) const noexcept;
1139 
1140 private:
1141  std::string mName{};
1142  double mConstant{};
1143  double mNormalizedRootMeanSquare{};
1144 };
1145 std::ostream& operator<<(std::ostream& os, BigO const& bigO);
1146 std::ostream& operator<<(std::ostream& os, std::vector<ankerl::nanobench::BigO> const& bigOs);
1147 
1148 } // namespace nanobench
1149 } // namespace ankerl
1150 
1151 // implementation /////////////////////////////////////////////////////////////////////////////////
1152 
1153 namespace ankerl {
1154 namespace nanobench {
1155 
1156 constexpr uint64_t(Rng::min)() {
1157  return 0;
1158 }
1159 
1160 constexpr uint64_t(Rng::max)() {
1161  return (std::numeric_limits<uint64_t>::max)();
1162 }
1163 
1164 ANKERL_NANOBENCH_NO_SANITIZE("integer", "undefined")
1165 uint64_t Rng::operator()() noexcept {
1166  auto x = mX;
1167 
1168  mX = UINT64_C(15241094284759029579) * mY;
1169  mY = rotl(mY - x, 27);
1170 
1171  return x;
1172 }
1173 
1174 ANKERL_NANOBENCH_NO_SANITIZE("integer", "undefined")
1175 uint32_t Rng::bounded(uint32_t range) noexcept {
1176  uint64_t const r32 = static_cast<uint32_t>(operator()());
1177  auto multiresult = r32 * range;
1178  return static_cast<uint32_t>(multiresult >> 32U);
1179 }
1180 
1181 double Rng::uniform01() noexcept {
1182  auto i = (UINT64_C(0x3ff) << 52U) | (operator()() >> 12U);
1183  // can't use union in c++ here for type puning, it's undefined behavior.
1184  // std::memcpy is optimized anyways.
1185  double d{};
1186  std::memcpy(&d, &i, sizeof(double));
1187  return d - 1.0;
1188 }
1189 
1190 template <typename Container>
1191 void Rng::shuffle(Container& container) noexcept {
1192  auto i = container.size();
1193  while (i > 1U) {
1194  using std::swap;
1195  auto n = operator()();
1196  // using decltype(i) instead of size_t to be compatible to containers with 32bit index (see #80)
1197  auto b1 = static_cast<decltype(i)>((static_cast<uint32_t>(n) * static_cast<uint64_t>(i)) >> 32U);
1198  swap(container[--i], container[b1]);
1199 
1200  auto b2 = static_cast<decltype(i)>(((n >> 32U) * static_cast<uint64_t>(i)) >> 32U);
1201  swap(container[--i], container[b2]);
1202  }
1203 }
1204 
1205 ANKERL_NANOBENCH_NO_SANITIZE("integer", "undefined")
1206 constexpr uint64_t Rng::rotl(uint64_t x, unsigned k) noexcept {
1207  return (x << k) | (x >> (64U - k));
1208 }
1209 
1210 template <typename Op>
1212 Bench& Bench::run(Op&& op) {
1213  // It is important that this method is kept short so the compiler can do better optimizations/ inlining of op()
1214  detail::IterationLogic iterationLogic(*this);
1215  auto& pc = detail::performanceCounters();
1216 
1217  while (auto n = iterationLogic.numIters()) {
1218  pc.beginMeasure();
1219  Clock::time_point const before = Clock::now();
1220  while (n-- > 0) {
1221  op();
1222  }
1223  Clock::time_point const after = Clock::now();
1224  pc.endMeasure();
1225  pc.updateResults(iterationLogic.numIters());
1226  iterationLogic.add(after - before, pc);
1227  }
1228  iterationLogic.moveResultTo(mResults);
1229  return *this;
1230 }
1231 
1232 // Performs all evaluations.
1233 template <typename Op>
1234 Bench& Bench::run(char const* benchmarkName, Op&& op) {
1235  name(benchmarkName);
1236  return run(std::forward<Op>(op));
1237 }
1238 
1239 template <typename Op>
1240 Bench& Bench::run(std::string const& benchmarkName, Op&& op) {
1241  name(benchmarkName);
1242  return run(std::forward<Op>(op));
1243 }
1244 
1245 template <typename Op>
1246 BigO Bench::complexityBigO(char const* benchmarkName, Op op) const {
1247  return BigO(benchmarkName, BigO::collectRangeMeasure(mResults), op);
1248 }
1249 
1250 template <typename Op>
1251 BigO Bench::complexityBigO(std::string const& benchmarkName, Op op) const {
1252  return BigO(benchmarkName, BigO::collectRangeMeasure(mResults), op);
1253 }
1254 
1255 // Set the batch size, e.g. number of processed bytes, or some other metric for the size of the processed data in each iteration.
1256 // Any argument is cast to double.
1257 template <typename T>
1258 Bench& Bench::batch(T b) noexcept {
1259  mConfig.mBatch = static_cast<double>(b);
1260  return *this;
1261 }
1262 
1263 // Sets the computation complexity of the next run. Any argument is cast to double.
1264 template <typename T>
1265 Bench& Bench::complexityN(T n) noexcept {
1266  mConfig.mComplexityN = static_cast<double>(n);
1267  return *this;
1268 }
1269 
1270 // Convenience: makes sure none of the given arguments are optimized away by the compiler.
1271 template <typename Arg>
1273  detail::doNotOptimizeAway(std::forward<Arg>(arg));
1274  return *this;
1275 }
1276 
1277 // Makes sure none of the given arguments are optimized away by the compiler.
1278 template <typename Arg>
1279 void doNotOptimizeAway(Arg&& arg) {
1280  detail::doNotOptimizeAway(std::forward<Arg>(arg));
1281 }
1282 
1283 namespace detail {
1284 
1285 #if defined(_MSC_VER)
1286 template <typename T>
1287 void doNotOptimizeAway(T const& val) {
1288  doNotOptimizeAwaySink(&val);
1289 }
1290 
1291 #endif
1292 
1293 } // namespace detail
1294 } // namespace nanobench
1295 } // namespace ankerl
1296 
1297 #if defined(ANKERL_NANOBENCH_IMPLEMENT)
1298 
1300 // implementation part - only visible in .cpp
1302 
1303 # include <algorithm> // sort, reverse
1304 # include <atomic> // compare_exchange_strong in loop overhead
1305 # include <cstdlib> // getenv
1306 # include <cstring> // strstr, strncmp
1307 # include <fstream> // ifstream to parse proc files
1308 # include <iomanip> // setw, setprecision
1309 # include <iostream> // cout
1310 # include <numeric> // accumulate
1311 # include <random> // random_device
1312 # include <sstream> // to_s in Number
1313 # include <stdexcept> // throw for rendering templates
1314 # include <tuple> // std::tie
1315 # if defined(__linux__)
1316 # include <unistd.h> //sysconf
1317 # endif
1318 # if ANKERL_NANOBENCH(PERF_COUNTERS)
1319 # include <map> // map
1320 
1321 # include <linux/perf_event.h>
1322 # include <sys/ioctl.h>
1323 # include <sys/syscall.h>
1324 # endif
1325 
1326 // declarations ///////////////////////////////////////////////////////////////////////////////////
1327 
1328 namespace ankerl {
1329 namespace nanobench {
1330 
1331 // helper stuff that is only intended to be used internally
1332 namespace detail {
1333 
1334 struct TableInfo;
1335 
1336 // formatting utilities
1337 namespace fmt {
1338 
1339 class NumSep;
1340 class StreamStateRestorer;
1341 class Number;
1342 class MarkDownColumn;
1343 class MarkDownCode;
1344 
1345 } // namespace fmt
1346 } // namespace detail
1347 } // namespace nanobench
1348 } // namespace ankerl
1349 
1350 // definitions ////////////////////////////////////////////////////////////////////////////////////
1351 
1352 namespace ankerl {
1353 namespace nanobench {
1354 
1355 uint64_t splitMix64(uint64_t& state) noexcept;
1356 
1357 namespace detail {
1358 
1359 // helpers to get double values
1360 template <typename T>
1361 inline double d(T t) noexcept {
1362  return static_cast<double>(t);
1363 }
1364 inline double d(Clock::duration duration) noexcept {
1365  return std::chrono::duration_cast<std::chrono::duration<double>>(duration).count();
1366 }
1367 
1368 // Calculates clock resolution once, and remembers the result
1369 inline Clock::duration clockResolution() noexcept;
1370 
1371 } // namespace detail
1372 
1373 namespace templates {
1374 
1375 char const* csv() noexcept {
1376  return R"DELIM("title";"name";"unit";"batch";"elapsed";"error %";"instructions";"branches";"branch misses";"total"
1377 {{#result}}"{{title}}";"{{name}}";"{{unit}}";{{batch}};{{median(elapsed)}};{{medianAbsolutePercentError(elapsed)}};{{median(instructions)}};{{median(branchinstructions)}};{{median(branchmisses)}};{{sumProduct(iterations, elapsed)}}
1378 {{/result}})DELIM";
1379 }
1380 
1381 char const* htmlBoxplot() noexcept {
1382  return R"DELIM(<html>
1383 
1384 <head>
1385  <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
1386 </head>
1387 
1388 <body>
1389  <div id="myDiv"></div>
1390  <script>
1391  var data = [
1392  {{#result}}{
1393  name: '{{name}}',
1394  y: [{{#measurement}}{{elapsed}}{{^-last}}, {{/last}}{{/measurement}}],
1395  },
1396  {{/result}}
1397  ];
1398  var title = '{{title}}';
1399 
1400  data = data.map(a => Object.assign(a, { boxpoints: 'all', pointpos: 0, type: 'box' }));
1401  var layout = { title: { text: title }, showlegend: false, yaxis: { title: 'time per unit', rangemode: 'tozero', autorange: true } }; Plotly.newPlot('myDiv', data, layout, {responsive: true});
1402  </script>
1403 </body>
1404 
1405 </html>)DELIM";
1406 }
1407 
1408 char const* pyperf() noexcept {
1409  return R"DELIM({
1410  "benchmarks": [
1411  {
1412  "runs": [
1413  {
1414  "values": [
1415 {{#measurement}} {{elapsed}}{{^-last}},
1416 {{/last}}{{/measurement}}
1417  ]
1418  }
1419  ]
1420  }
1421  ],
1422  "metadata": {
1423  "loops": {{sum(iterations)}},
1424  "inner_loops": {{batch}},
1425  "name": "{{title}}",
1426  "unit": "second"
1427  },
1428  "version": "1.0"
1429 })DELIM";
1430 }
1431 
1432 char const* json() noexcept {
1433  return R"DELIM({
1434  "results": [
1435 {{#result}} {
1436  "title": "{{title}}",
1437  "name": "{{name}}",
1438  "unit": "{{unit}}",
1439  "batch": {{batch}},
1440  "complexityN": {{complexityN}},
1441  "epochs": {{epochs}},
1442  "clockResolution": {{clockResolution}},
1443  "clockResolutionMultiple": {{clockResolutionMultiple}},
1444  "maxEpochTime": {{maxEpochTime}},
1445  "minEpochTime": {{minEpochTime}},
1446  "minEpochIterations": {{minEpochIterations}},
1447  "epochIterations": {{epochIterations}},
1448  "warmup": {{warmup}},
1449  "relative": {{relative}},
1450  "median(elapsed)": {{median(elapsed)}},
1451  "medianAbsolutePercentError(elapsed)": {{medianAbsolutePercentError(elapsed)}},
1452  "median(instructions)": {{median(instructions)}},
1453  "medianAbsolutePercentError(instructions)": {{medianAbsolutePercentError(instructions)}},
1454  "median(cpucycles)": {{median(cpucycles)}},
1455  "median(contextswitches)": {{median(contextswitches)}},
1456  "median(pagefaults)": {{median(pagefaults)}},
1457  "median(branchinstructions)": {{median(branchinstructions)}},
1458  "median(branchmisses)": {{median(branchmisses)}},
1459  "totalTime": {{sumProduct(iterations, elapsed)}},
1460  "measurements": [
1461 {{#measurement}} {
1462  "iterations": {{iterations}},
1463  "elapsed": {{elapsed}},
1464  "pagefaults": {{pagefaults}},
1465  "cpucycles": {{cpucycles}},
1466  "contextswitches": {{contextswitches}},
1467  "instructions": {{instructions}},
1468  "branchinstructions": {{branchinstructions}},
1469  "branchmisses": {{branchmisses}}
1470  }{{^-last}},{{/-last}}
1471 {{/measurement}} ]
1472  }{{^-last}},{{/-last}}
1473 {{/result}} ]
1474 })DELIM";
1475 }
1476 
1477 ANKERL_NANOBENCH(IGNORE_PADDED_PUSH)
1478 struct Node {
1479  enum class Type { tag, content, section, inverted_section };
1480 
1481  char const* begin;
1482  char const* end;
1483  std::vector<Node> children;
1484  Type type;
1485 
1486  template <size_t N>
1487  // NOLINTNEXTLINE(hicpp-avoid-c-arrays,modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
1488  bool operator==(char const (&str)[N]) const noexcept {
1489  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-array-to-pointer-decay)
1490  return static_cast<size_t>(std::distance(begin, end) + 1) == N && 0 == strncmp(str, begin, N - 1);
1491  }
1492 };
1493 ANKERL_NANOBENCH(IGNORE_PADDED_POP)
1494 
1495 // NOLINTNEXTLINE(misc-no-recursion)
1496 static std::vector<Node> parseMustacheTemplate(char const** tpl) {
1497  std::vector<Node> nodes;
1498 
1499  while (true) {
1500  auto const* begin = std::strstr(*tpl, "{{");
1501  auto const* end = begin;
1502  if (begin != nullptr) {
1503  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
1504  begin += 2;
1505  end = std::strstr(begin, "}}");
1506  }
1507 
1508  if (begin == nullptr || end == nullptr) {
1509  // nothing found, finish node
1510  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
1511  nodes.emplace_back(Node{*tpl, *tpl + std::strlen(*tpl), std::vector<Node>{}, Node::Type::content});
1512  return nodes;
1513  }
1514 
1515  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
1516  nodes.emplace_back(Node{*tpl, begin - 2, std::vector<Node>{}, Node::Type::content});
1517 
1518  // we found a tag
1519  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
1520  *tpl = end + 2;
1521  switch (*begin) {
1522  case '/':
1523  // finished! bail out
1524  return nodes;
1525 
1526  case '#':
1527  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
1528  nodes.emplace_back(Node{begin + 1, end, parseMustacheTemplate(tpl), Node::Type::section});
1529  break;
1530 
1531  case '^':
1532  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
1533  nodes.emplace_back(Node{begin + 1, end, parseMustacheTemplate(tpl), Node::Type::inverted_section});
1534  break;
1535 
1536  default:
1537  nodes.emplace_back(Node{begin, end, std::vector<Node>{}, Node::Type::tag});
1538  break;
1539  }
1540  }
1541 }
1542 
1543 static bool generateFirstLast(Node const& n, size_t idx, size_t size, std::ostream& out) {
1544  ANKERL_NANOBENCH_LOG("n.type=" << static_cast<int>(n.type));
1545  bool const matchFirst = n == "-first";
1546  bool const matchLast = n == "-last";
1547  if (!matchFirst && !matchLast) {
1548  return false;
1549  }
1550 
1551  bool doWrite = false;
1552  if (n.type == Node::Type::section) {
1553  doWrite = (matchFirst && idx == 0) || (matchLast && idx == size - 1);
1554  } else if (n.type == Node::Type::inverted_section) {
1555  doWrite = (matchFirst && idx != 0) || (matchLast && idx != size - 1);
1556  }
1557 
1558  if (doWrite) {
1559  for (auto const& child : n.children) {
1560  if (child.type == Node::Type::content) {
1561  out.write(child.begin, std::distance(child.begin, child.end));
1562  }
1563  }
1564  }
1565  return true;
1566 }
1567 
1568 static bool matchCmdArgs(std::string const& str, std::vector<std::string>& matchResult) {
1569  matchResult.clear();
1570  auto idxOpen = str.find('(');
1571  auto idxClose = str.find(')', idxOpen);
1572  if (idxClose == std::string::npos) {
1573  return false;
1574  }
1575 
1576  matchResult.emplace_back(str.substr(0, idxOpen));
1577 
1578  // split by comma
1579  matchResult.emplace_back();
1580  for (size_t i = idxOpen + 1; i != idxClose; ++i) {
1581  if (str[i] == ' ' || str[i] == '\t') {
1582  // skip whitespace
1583  continue;
1584  }
1585  if (str[i] == ',') {
1586  // got a comma => new string
1587  matchResult.emplace_back();
1588  continue;
1589  }
1590  // no whitespace no comma, append
1591  matchResult.back() += str[i];
1592  }
1593  return true;
1594 }
1595 
1596 static bool generateConfigTag(Node const& n, Config const& config, std::ostream& out) {
1597  using detail::d;
1598 
1599  if (n == "title") {
1600  out << config.mBenchmarkTitle;
1601  return true;
1602  }
1603  if (n == "name") {
1604  out << config.mBenchmarkName;
1605  return true;
1606  }
1607  if (n == "unit") {
1608  out << config.mUnit;
1609  return true;
1610  }
1611  if (n == "batch") {
1612  out << config.mBatch;
1613  return true;
1614  }
1615  if (n == "complexityN") {
1616  out << config.mComplexityN;
1617  return true;
1618  }
1619  if (n == "epochs") {
1620  out << config.mNumEpochs;
1621  return true;
1622  }
1623  if (n == "clockResolution") {
1624  out << d(detail::clockResolution());
1625  return true;
1626  }
1627  if (n == "clockResolutionMultiple") {
1628  out << config.mClockResolutionMultiple;
1629  return true;
1630  }
1631  if (n == "maxEpochTime") {
1632  out << d(config.mMaxEpochTime);
1633  return true;
1634  }
1635  if (n == "minEpochTime") {
1636  out << d(config.mMinEpochTime);
1637  return true;
1638  }
1639  if (n == "minEpochIterations") {
1640  out << config.mMinEpochIterations;
1641  return true;
1642  }
1643  if (n == "epochIterations") {
1644  out << config.mEpochIterations;
1645  return true;
1646  }
1647  if (n == "warmup") {
1648  out << config.mWarmup;
1649  return true;
1650  }
1651  if (n == "relative") {
1652  out << config.mIsRelative;
1653  return true;
1654  }
1655  return false;
1656 }
1657 
1658 // NOLINTNEXTLINE(readability-function-cognitive-complexity)
1659 static std::ostream& generateResultTag(Node const& n, Result const& r, std::ostream& out) {
1660  if (generateConfigTag(n, r.config(), out)) {
1661  return out;
1662  }
1663  // match e.g. "median(elapsed)"
1664  // g++ 4.8 doesn't implement std::regex :(
1665  // static std::regex const regOpArg1("^([a-zA-Z]+)\\‍(([a-zA-Z]*)\\‍)$");
1666  // std::cmatch matchResult;
1667  // if (std::regex_match(n.begin, n.end, matchResult, regOpArg1)) {
1668  std::vector<std::string> matchResult;
1669  if (matchCmdArgs(std::string(n.begin, n.end), matchResult)) {
1670  if (matchResult.size() == 2) {
1671  if (matchResult[0] == "context") {
1672  return out << r.context(matchResult[1]);
1673  }
1674 
1675  auto m = Result::fromString(matchResult[1]);
1676  if (m == Result::Measure::_size) {
1677  return out << 0.0;
1678  }
1679 
1680  if (matchResult[0] == "median") {
1681  return out << r.median(m);
1682  }
1683  if (matchResult[0] == "average") {
1684  return out << r.average(m);
1685  }
1686  if (matchResult[0] == "medianAbsolutePercentError") {
1687  return out << r.medianAbsolutePercentError(m);
1688  }
1689  if (matchResult[0] == "sum") {
1690  return out << r.sum(m);
1691  }
1692  if (matchResult[0] == "minimum") {
1693  return out << r.minimum(m);
1694  }
1695  if (matchResult[0] == "maximum") {
1696  return out << r.maximum(m);
1697  }
1698  } else if (matchResult.size() == 3) {
1699  auto m1 = Result::fromString(matchResult[1]);
1700  auto m2 = Result::fromString(matchResult[2]);
1701  if (m1 == Result::Measure::_size || m2 == Result::Measure::_size) {
1702  return out << 0.0;
1703  }
1704 
1705  if (matchResult[0] == "sumProduct") {
1706  return out << r.sumProduct(m1, m2);
1707  }
1708  }
1709  }
1710 
1711  // match e.g. "sumProduct(elapsed, iterations)"
1712  // static std::regex const regOpArg2("^([a-zA-Z]+)\\‍(([a-zA-Z]*)\\s*,\\s+([a-zA-Z]*)\\‍)$");
1713 
1714  // nothing matches :(
1715  throw std::runtime_error("command '" + std::string(n.begin, n.end) + "' not understood");
1716 }
1717 
1718 static void generateResultMeasurement(std::vector<Node> const& nodes, size_t idx, Result const& r, std::ostream& out) {
1719  for (auto const& n : nodes) {
1720  if (!generateFirstLast(n, idx, r.size(), out)) {
1721  ANKERL_NANOBENCH_LOG("n.type=" << static_cast<int>(n.type));
1722  switch (n.type) {
1723  case Node::Type::content:
1724  out.write(n.begin, std::distance(n.begin, n.end));
1725  break;
1726 
1727  case Node::Type::inverted_section:
1728  throw std::runtime_error("got a inverted section inside measurement");
1729 
1730  case Node::Type::section:
1731  throw std::runtime_error("got a section inside measurement");
1732 
1733  case Node::Type::tag: {
1734  auto m = Result::fromString(std::string(n.begin, n.end));
1735  if (m == Result::Measure::_size || !r.has(m)) {
1736  out << 0.0;
1737  } else {
1738  out << r.get(idx, m);
1739  }
1740  break;
1741  }
1742  }
1743  }
1744  }
1745 }
1746 
1747 static void generateResult(std::vector<Node> const& nodes, size_t idx, std::vector<Result> const& results, std::ostream& out) {
1748  auto const& r = results[idx];
1749  for (auto const& n : nodes) {
1750  if (!generateFirstLast(n, idx, results.size(), out)) {
1751  ANKERL_NANOBENCH_LOG("n.type=" << static_cast<int>(n.type));
1752  switch (n.type) {
1753  case Node::Type::content:
1754  out.write(n.begin, std::distance(n.begin, n.end));
1755  break;
1756 
1757  case Node::Type::inverted_section:
1758  throw std::runtime_error("got a inverted section inside result");
1759 
1760  case Node::Type::section:
1761  if (n == "measurement") {
1762  for (size_t i = 0; i < r.size(); ++i) {
1763  generateResultMeasurement(n.children, i, r, out);
1764  }
1765  } else {
1766  throw std::runtime_error("got a section inside result");
1767  }
1768  break;
1769 
1770  case Node::Type::tag:
1771  generateResultTag(n, r, out);
1772  break;
1773  }
1774  }
1775  }
1776 }
1777 
1778 } // namespace templates
1779 
1780 // helper stuff that only intended to be used internally
1781 namespace detail {
1782 
1783 char const* getEnv(char const* name);
1784 bool isEndlessRunning(std::string const& name);
1785 bool isWarningsEnabled();
1786 
1787 template <typename T>
1788 T parseFile(std::string const& filename, bool* fail);
1789 
1790 void gatherStabilityInformation(std::vector<std::string>& warnings, std::vector<std::string>& recommendations);
1791 void printStabilityInformationOnce(std::ostream* outStream);
1792 
1793 // remembers the last table settings used. When it changes, a new table header is automatically written for the new entry.
1794 uint64_t& singletonHeaderHash() noexcept;
1795 
1796 // determines resolution of the given clock. This is done by measuring multiple times and returning the minimum time difference.
1797 Clock::duration calcClockResolution(size_t numEvaluations) noexcept;
1798 
1799 // formatting utilities
1800 namespace fmt {
1801 
1802 // adds thousands separator to numbers
1803 ANKERL_NANOBENCH(IGNORE_PADDED_PUSH)
1804 class NumSep : public std::numpunct<char> {
1805 public:
1806  explicit NumSep(char sep);
1807  char do_thousands_sep() const override;
1808  std::string do_grouping() const override;
1809 
1810 private:
1811  char mSep;
1812 };
1813 ANKERL_NANOBENCH(IGNORE_PADDED_POP)
1814 
1815 // RAII to save & restore a stream's state
1816 ANKERL_NANOBENCH(IGNORE_PADDED_PUSH)
1817 class StreamStateRestorer {
1818 public:
1819  explicit StreamStateRestorer(std::ostream& s);
1820  ~StreamStateRestorer();
1821 
1822  // sets back all stream info that we remembered at construction
1823  void restore();
1824 
1825  // don't allow copying / moving
1826  StreamStateRestorer(StreamStateRestorer const&) = delete;
1827  StreamStateRestorer& operator=(StreamStateRestorer const&) = delete;
1828  StreamStateRestorer(StreamStateRestorer&&) = delete;
1829  StreamStateRestorer& operator=(StreamStateRestorer&&) = delete;
1830 
1831 private:
1832  std::ostream& mStream;
1833  std::locale mLocale;
1834  std::streamsize const mPrecision;
1835  std::streamsize const mWidth;
1836  std::ostream::char_type const mFill;
1837  std::ostream::fmtflags const mFmtFlags;
1838 };
1839 ANKERL_NANOBENCH(IGNORE_PADDED_POP)
1840 
1841 // Number formatter
1842 class Number {
1843 public:
1844  Number(int width, int precision, double value);
1845  Number(int width, int precision, int64_t value);
1846  ANKERL_NANOBENCH(NODISCARD) std::string to_s() const;
1847 
1848 private:
1849  friend std::ostream& operator<<(std::ostream& os, Number const& n);
1850  std::ostream& write(std::ostream& os) const;
1851 
1852  int mWidth;
1853  int mPrecision;
1854  double mValue;
1855 };
1856 
1857 // helper replacement for std::to_string of signed/unsigned numbers so we are locale independent
1858 std::string to_s(uint64_t n);
1859 
1860 std::ostream& operator<<(std::ostream& os, Number const& n);
1861 
1862 class MarkDownColumn {
1863 public:
1864  MarkDownColumn(int w, int prec, std::string tit, std::string suff, double val) noexcept;
1865  ANKERL_NANOBENCH(NODISCARD) std::string title() const;
1866  ANKERL_NANOBENCH(NODISCARD) std::string separator() const;
1867  ANKERL_NANOBENCH(NODISCARD) std::string invalid() const;
1868  ANKERL_NANOBENCH(NODISCARD) std::string value() const;
1869 
1870 private:
1871  int mWidth;
1872  int mPrecision;
1873  std::string mTitle;
1874  std::string mSuffix;
1875  double mValue;
1876 };
1877 
1878 // Formats any text as markdown code, escaping backticks.
1879 class MarkDownCode {
1880 public:
1881  explicit MarkDownCode(std::string const& what);
1882 
1883 private:
1884  friend std::ostream& operator<<(std::ostream& os, MarkDownCode const& mdCode);
1885  std::ostream& write(std::ostream& os) const;
1886 
1887  std::string mWhat{};
1888 };
1889 
1890 std::ostream& operator<<(std::ostream& os, MarkDownCode const& mdCode);
1891 
1892 } // namespace fmt
1893 } // namespace detail
1894 } // namespace nanobench
1895 } // namespace ankerl
1896 
1897 // implementation /////////////////////////////////////////////////////////////////////////////////
1898 
1899 namespace ankerl {
1900 namespace nanobench {
1901 
1902 // NOLINTNEXTLINE(readability-function-cognitive-complexity)
1903 void render(char const* mustacheTemplate, std::vector<Result> const& results, std::ostream& out) {
1904  detail::fmt::StreamStateRestorer const restorer(out);
1905 
1906  out.precision(std::numeric_limits<double>::digits10);
1907  auto nodes = templates::parseMustacheTemplate(&mustacheTemplate);
1908 
1909  for (auto const& n : nodes) {
1910  ANKERL_NANOBENCH_LOG("n.type=" << static_cast<int>(n.type));
1911  switch (n.type) {
1912  case templates::Node::Type::content:
1913  out.write(n.begin, std::distance(n.begin, n.end));
1914  break;
1915 
1916  case templates::Node::Type::inverted_section:
1917  throw std::runtime_error("unknown list '" + std::string(n.begin, n.end) + "'");
1918 
1919  case templates::Node::Type::section:
1920  if (n == "result") {
1921  const size_t nbResults = results.size();
1922  for (size_t i = 0; i < nbResults; ++i) {
1923  generateResult(n.children, i, results, out);
1924  }
1925  } else if (n == "measurement") {
1926  if (results.size() != 1) {
1927  throw std::runtime_error(
1928  "render: can only use section 'measurement' here if there is a single result, but there are " +
1929  detail::fmt::to_s(results.size()));
1930  }
1931  // when we only have a single result, we can immediately go into its measurement.
1932  auto const& r = results.front();
1933  for (size_t i = 0; i < r.size(); ++i) {
1934  generateResultMeasurement(n.children, i, r, out);
1935  }
1936  } else {
1937  throw std::runtime_error("render: unknown section '" + std::string(n.begin, n.end) + "'");
1938  }
1939  break;
1940 
1941  case templates::Node::Type::tag:
1942  if (results.size() == 1) {
1943  // result & config are both supported there
1944  generateResultTag(n, results.front(), out);
1945  } else {
1946  // This just uses the last result's config.
1947  if (!generateConfigTag(n, results.back().config(), out)) {
1948  throw std::runtime_error("unknown tag '" + std::string(n.begin, n.end) + "'");
1949  }
1950  }
1951  break;
1952  }
1953  }
1954 }
1955 
1956 void render(std::string const& mustacheTemplate, std::vector<Result> const& results, std::ostream& out) {
1957  render(mustacheTemplate.c_str(), results, out);
1958 }
1959 
1960 void render(char const* mustacheTemplate, const Bench& bench, std::ostream& out) {
1961  render(mustacheTemplate, bench.results(), out);
1962 }
1963 
1964 void render(std::string const& mustacheTemplate, const Bench& bench, std::ostream& out) {
1965  render(mustacheTemplate.c_str(), bench.results(), out);
1966 }
1967 
1968 namespace detail {
1969 
1970 PerformanceCounters& performanceCounters() {
1971 # if defined(__clang__)
1972 # pragma clang diagnostic push
1973 # pragma clang diagnostic ignored "-Wexit-time-destructors"
1974 # endif
1975  static PerformanceCounters pc;
1976 # if defined(__clang__)
1977 # pragma clang diagnostic pop
1978 # endif
1979  return pc;
1980 }
1981 
1982 // Windows version of doNotOptimizeAway
1983 // see https://github.com/google/benchmark/blob/v1.7.1/include/benchmark/benchmark.h#L514
1984 // see https://github.com/facebook/folly/blob/v2023.01.30.00/folly/lang/Hint-inl.h#L54-L58
1985 // see https://learn.microsoft.com/en-us/cpp/preprocessor/optimize
1986 # if defined(_MSC_VER)
1987 # pragma optimize("", off)
1988 void doNotOptimizeAwaySink(void const*) {}
1989 # pragma optimize("", on)
1990 # endif
1991 
1992 template <typename T>
1993 T parseFile(std::string const& filename, bool* fail) {
1994  std::ifstream fin(filename); // NOLINT(misc-const-correctness)
1995  T num{};
1996  fin >> num;
1997  if (fail != nullptr) {
1998  *fail = fin.fail();
1999  }
2000  return num;
2001 }
2002 
2003 char const* getEnv(char const* name) {
2004 # if defined(_MSC_VER)
2005 # pragma warning(push)
2006 # pragma warning(disable : 4996) // getenv': This function or variable may be unsafe.
2007 # endif
2008  return std::getenv(name); // NOLINT(concurrency-mt-unsafe)
2009 # if defined(_MSC_VER)
2010 # pragma warning(pop)
2011 # endif
2012 }
2013 
2014 bool isEndlessRunning(std::string const& name) {
2015  auto const* const endless = getEnv("NANOBENCH_ENDLESS");
2016  return nullptr != endless && endless == name;
2017 }
2018 
2019 // True when environment variable NANOBENCH_SUPPRESS_WARNINGS is either not set at all, or set to "0"
2020 bool isWarningsEnabled() {
2021  auto const* const suppression = getEnv("NANOBENCH_SUPPRESS_WARNINGS");
2022  return nullptr == suppression || suppression == std::string("0");
2023 }
2024 
2025 void gatherStabilityInformation(std::vector<std::string>& warnings, std::vector<std::string>& recommendations) {
2026  warnings.clear();
2027  recommendations.clear();
2028 
2029 # if defined(DEBUG)
2030  warnings.emplace_back("DEBUG defined");
2031  bool const recommendCheckFlags = true;
2032 # else
2033  bool const recommendCheckFlags = false;
2034 # endif
2035 
2036  bool recommendPyPerf = false;
2037 # if defined(__linux__)
2038  auto nprocs = sysconf(_SC_NPROCESSORS_CONF);
2039  if (nprocs <= 0) {
2040  warnings.emplace_back("couldn't figure out number of processors - no governor, turbo check possible");
2041  } else {
2042  // check frequency scaling
2043  for (long id = 0; id < nprocs; ++id) {
2044  auto idStr = detail::fmt::to_s(static_cast<uint64_t>(id));
2045  auto sysCpu = "/sys/devices/system/cpu/cpu" + idStr;
2046  auto minFreq = parseFile<int64_t>(sysCpu + "/cpufreq/scaling_min_freq", nullptr);
2047  auto maxFreq = parseFile<int64_t>(sysCpu + "/cpufreq/scaling_max_freq", nullptr);
2048  if (minFreq != maxFreq) {
2049  auto minMHz = d(minFreq) / 1000.0;
2050  auto maxMHz = d(maxFreq) / 1000.0;
2051  warnings.emplace_back("CPU frequency scaling enabled: CPU " + idStr + " between " +
2052  detail::fmt::Number(1, 1, minMHz).to_s() + " and " + detail::fmt::Number(1, 1, maxMHz).to_s() +
2053  " MHz");
2054  recommendPyPerf = true;
2055  break;
2056  }
2057  }
2058 
2059  auto fail = false;
2060  auto currentGovernor = parseFile<std::string>("/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor", &fail);
2061  if (!fail && "performance" != currentGovernor) {
2062  warnings.emplace_back("CPU governor is '" + currentGovernor + "' but should be 'performance'");
2063  recommendPyPerf = true;
2064  }
2065 
2066  auto noTurbo = parseFile<int>("/sys/devices/system/cpu/intel_pstate/no_turbo", &fail);
2067  if (!fail && noTurbo == 0) {
2068  warnings.emplace_back("Turbo is enabled, CPU frequency will fluctuate");
2069  recommendPyPerf = true;
2070  }
2071  }
2072 # endif
2073 
2074  if (recommendCheckFlags) {
2075  recommendations.emplace_back("Make sure you compile for Release");
2076  }
2077  if (recommendPyPerf) {
2078  recommendations.emplace_back("Use 'pyperf system tune' before benchmarking. See https://github.com/psf/pyperf");
2079  }
2080 }
2081 
2082 void printStabilityInformationOnce(std::ostream* outStream) {
2083  static bool shouldPrint = true;
2084  if (shouldPrint && (nullptr != outStream) && isWarningsEnabled()) {
2085  auto& os = *outStream;
2086  shouldPrint = false;
2087  std::vector<std::string> warnings;
2088  std::vector<std::string> recommendations;
2089  gatherStabilityInformation(warnings, recommendations);
2090  if (warnings.empty()) {
2091  return;
2092  }
2093 
2094  os << "Warning, results might be unstable:" << std::endl;
2095  for (auto const& w : warnings) {
2096  os << "* " << w << std::endl;
2097  }
2098 
2099  os << std::endl << "Recommendations" << std::endl;
2100  for (auto const& r : recommendations) {
2101  os << "* " << r << std::endl;
2102  }
2103  }
2104 }
2105 
2106 // remembers the last table settings used. When it changes, a new table header is automatically written for the new entry.
2107 uint64_t& singletonHeaderHash() noexcept {
2108  static uint64_t sHeaderHash{};
2109  return sHeaderHash;
2110 }
2111 
2112 ANKERL_NANOBENCH_NO_SANITIZE("integer", "undefined")
2113 inline uint64_t hash_combine(uint64_t seed, uint64_t val) {
2114  return seed ^ (val + UINT64_C(0x9e3779b9) + (seed << 6U) + (seed >> 2U));
2115 }
2116 
2117 // determines resolution of the given clock. This is done by measuring multiple times and returning the minimum time difference.
2118 Clock::duration calcClockResolution(size_t numEvaluations) noexcept {
2119  auto bestDuration = Clock::duration::max();
2120  Clock::time_point tBegin;
2121  Clock::time_point tEnd;
2122  for (size_t i = 0; i < numEvaluations; ++i) {
2123  tBegin = Clock::now();
2124  do {
2125  tEnd = Clock::now();
2126  } while (tBegin == tEnd);
2127  bestDuration = (std::min)(bestDuration, tEnd - tBegin);
2128  }
2129  return bestDuration;
2130 }
2131 
2132 // Calculates clock resolution once, and remembers the result
2133 Clock::duration clockResolution() noexcept {
2134  static Clock::duration const sResolution = calcClockResolution(20);
2135  return sResolution;
2136 }
2137 
2138 ANKERL_NANOBENCH(IGNORE_PADDED_PUSH)
2139 struct IterationLogic::Impl {
2140  enum class State { warmup, upscaling_runtime, measuring, endless };
2141 
2142  explicit Impl(Bench const& bench)
2143  : mBench(bench)
2144  , mResult(bench.config()) {
2145  printStabilityInformationOnce(mBench.output());
2146 
2147  // determine target runtime per epoch
2148  mTargetRuntimePerEpoch = detail::clockResolution() * mBench.clockResolutionMultiple();
2149  if (mTargetRuntimePerEpoch > mBench.maxEpochTime()) {
2150  mTargetRuntimePerEpoch = mBench.maxEpochTime();
2151  }
2152  if (mTargetRuntimePerEpoch < mBench.minEpochTime()) {
2153  mTargetRuntimePerEpoch = mBench.minEpochTime();
2154  }
2155 
2156  if (isEndlessRunning(mBench.name())) {
2157  std::cerr << "NANOBENCH_ENDLESS set: running '" << mBench.name() << "' endlessly" << std::endl;
2158  mNumIters = (std::numeric_limits<uint64_t>::max)();
2159  mState = State::endless;
2160  } else if (0 != mBench.warmup()) {
2161  mNumIters = mBench.warmup();
2162  mState = State::warmup;
2163  } else if (0 != mBench.epochIterations()) {
2164  // exact number of iterations
2165  mNumIters = mBench.epochIterations();
2166  mState = State::measuring;
2167  } else {
2168  mNumIters = mBench.minEpochIterations();
2169  mState = State::upscaling_runtime;
2170  }
2171  }
2172 
2173  // directly calculates new iters based on elapsed&iters, and adds a 10% noise. Makes sure we don't underflow.
2174  ANKERL_NANOBENCH(NODISCARD) uint64_t calcBestNumIters(std::chrono::nanoseconds elapsed, uint64_t iters) noexcept {
2175  auto doubleElapsed = d(elapsed);
2176  auto doubleTargetRuntimePerEpoch = d(mTargetRuntimePerEpoch);
2177  auto doubleNewIters = doubleTargetRuntimePerEpoch / doubleElapsed * d(iters);
2178 
2179  auto doubleMinEpochIters = d(mBench.minEpochIterations());
2180  if (doubleNewIters < doubleMinEpochIters) {
2181  doubleNewIters = doubleMinEpochIters;
2182  }
2183  doubleNewIters *= 1.0 + 0.2 * mRng.uniform01();
2184 
2185  // +0.5 for correct rounding when casting
2186  // NOLINTNEXTLINE(bugprone-incorrect-roundings)
2187  return static_cast<uint64_t>(doubleNewIters + 0.5);
2188  }
2189 
2190  ANKERL_NANOBENCH_NO_SANITIZE("integer", "undefined") void upscale(std::chrono::nanoseconds elapsed) {
2191  if (elapsed * 10 < mTargetRuntimePerEpoch) {
2192  // we are far below the target runtime. Multiply iterations by 10 (with overflow check)
2193  if (mNumIters * 10 < mNumIters) {
2194  // overflow :-(
2195  showResult("iterations overflow. Maybe your code got optimized away?");
2196  mNumIters = 0;
2197  return;
2198  }
2199  mNumIters *= 10;
2200  } else {
2201  mNumIters = calcBestNumIters(elapsed, mNumIters);
2202  }
2203  }
2204 
2205  void add(std::chrono::nanoseconds elapsed, PerformanceCounters const& pc) noexcept {
2206 # if defined(ANKERL_NANOBENCH_LOG_ENABLED)
2207  auto oldIters = mNumIters;
2208 # endif
2209 
2210  switch (mState) {
2211  case State::warmup:
2212  if (isCloseEnoughForMeasurements(elapsed)) {
2213  // if elapsed is close enough, we can skip upscaling and go right to measurements
2214  // still, we don't add the result to the measurements.
2215  mState = State::measuring;
2216  mNumIters = calcBestNumIters(elapsed, mNumIters);
2217  } else {
2218  // not close enough: switch to upscaling
2219  mState = State::upscaling_runtime;
2220  upscale(elapsed);
2221  }
2222  break;
2223 
2224  case State::upscaling_runtime:
2225  if (isCloseEnoughForMeasurements(elapsed)) {
2226  // if we are close enough, add measurement and switch to always measuring
2227  mState = State::measuring;
2228  mTotalElapsed += elapsed;
2229  mTotalNumIters += mNumIters;
2230  mResult.add(elapsed, mNumIters, pc);
2231  mNumIters = calcBestNumIters(mTotalElapsed, mTotalNumIters);
2232  } else {
2233  upscale(elapsed);
2234  }
2235  break;
2236 
2237  case State::measuring:
2238  // just add measurements - no questions asked. Even when runtime is low. But we can't ignore
2239  // that fluctuation, or else we would bias the result
2240  mTotalElapsed += elapsed;
2241  mTotalNumIters += mNumIters;
2242  mResult.add(elapsed, mNumIters, pc);
2243  if (0 != mBench.epochIterations()) {
2244  mNumIters = mBench.epochIterations();
2245  } else {
2246  mNumIters = calcBestNumIters(mTotalElapsed, mTotalNumIters);
2247  }
2248  break;
2249 
2250  case State::endless:
2251  mNumIters = (std::numeric_limits<uint64_t>::max)();
2252  break;
2253  }
2254 
2255  if (static_cast<uint64_t>(mResult.size()) == mBench.epochs()) {
2256  // we got all the results that we need, finish it
2257  showResult("");
2258  mNumIters = 0;
2259  }
2260 
2261  ANKERL_NANOBENCH_LOG(mBench.name() << ": " << detail::fmt::Number(20, 3, d(elapsed.count())) << " elapsed, "
2262  << detail::fmt::Number(20, 3, d(mTargetRuntimePerEpoch.count())) << " target. oldIters="
2263  << oldIters << ", mNumIters=" << mNumIters << ", mState=" << static_cast<int>(mState));
2264  }
2265 
2266  // NOLINTNEXTLINE(readability-function-cognitive-complexity)
2267  void showResult(std::string const& errorMessage) const {
2268  ANKERL_NANOBENCH_LOG(errorMessage);
2269 
2270  if (mBench.output() != nullptr) {
2271  // prepare column data ///////
2272  std::vector<fmt::MarkDownColumn> columns;
2273 
2274  auto rMedian = mResult.median(Result::Measure::elapsed);
2275 
2276  if (mBench.relative()) {
2277  double d = 100.0;
2278  if (!mBench.results().empty()) {
2279  d = rMedian <= 0.0 ? 0.0 : mBench.results().front().median(Result::Measure::elapsed) / rMedian * 100.0;
2280  }
2281  columns.emplace_back(11, 1, "relative", "%", d);
2282  }
2283 
2284  if (mBench.complexityN() > 0) {
2285  columns.emplace_back(14, 0, "complexityN", "", mBench.complexityN());
2286  }
2287 
2288  columns.emplace_back(22, 2, mBench.timeUnitName() + "/" + mBench.unit(), "",
2289  rMedian / (mBench.timeUnit().count() * mBench.batch()));
2290  columns.emplace_back(22, 2, mBench.unit() + "/s", "", rMedian <= 0.0 ? 0.0 : mBench.batch() / rMedian);
2291 
2292  double const rErrorMedian = mResult.medianAbsolutePercentError(Result::Measure::elapsed);
2293  columns.emplace_back(10, 1, "err%", "%", rErrorMedian * 100.0);
2294 
2295  double rInsMedian = -1.0;
2296  if (mBench.performanceCounters() && mResult.has(Result::Measure::instructions)) {
2297  rInsMedian = mResult.median(Result::Measure::instructions);
2298  columns.emplace_back(18, 2, "ins/" + mBench.unit(), "", rInsMedian / mBench.batch());
2299  }
2300 
2301  double rCycMedian = -1.0;
2302  if (mBench.performanceCounters() && mResult.has(Result::Measure::cpucycles)) {
2303  rCycMedian = mResult.median(Result::Measure::cpucycles);
2304  columns.emplace_back(18, 2, "cyc/" + mBench.unit(), "", rCycMedian / mBench.batch());
2305  }
2306  if (rInsMedian > 0.0 && rCycMedian > 0.0) {
2307  columns.emplace_back(9, 3, "IPC", "", rCycMedian <= 0.0 ? 0.0 : rInsMedian / rCycMedian);
2308  }
2309  if (mBench.performanceCounters() && mResult.has(Result::Measure::branchinstructions)) {
2310  double const rBraMedian = mResult.median(Result::Measure::branchinstructions);
2311  columns.emplace_back(17, 2, "bra/" + mBench.unit(), "", rBraMedian / mBench.batch());
2312  if (mResult.has(Result::Measure::branchmisses)) {
2313  double p = 0.0;
2314  if (rBraMedian >= 1e-9) {
2315  p = 100.0 * mResult.median(Result::Measure::branchmisses) / rBraMedian;
2316  }
2317  columns.emplace_back(10, 1, "miss%", "%", p);
2318  }
2319  }
2320 
2321  columns.emplace_back(12, 2, "total", "", mResult.sumProduct(Result::Measure::iterations, Result::Measure::elapsed));
2322 
2323  // write everything
2324  auto& os = *mBench.output();
2325 
2326  // combine all elements that are relevant for printing the header
2327  uint64_t hash = 0;
2328  hash = hash_combine(std::hash<std::string>{}(mBench.unit()), hash);
2329  hash = hash_combine(std::hash<std::string>{}(mBench.title()), hash);
2330  hash = hash_combine(std::hash<std::string>{}(mBench.timeUnitName()), hash);
2331  hash = hash_combine(std::hash<double>{}(mBench.timeUnit().count()), hash);
2332  hash = hash_combine(std::hash<bool>{}(mBench.relative()), hash);
2333  hash = hash_combine(std::hash<bool>{}(mBench.performanceCounters()), hash);
2334 
2335  if (hash != singletonHeaderHash()) {
2336  singletonHeaderHash() = hash;
2337 
2338  // no result yet, print header
2339  os << std::endl;
2340  for (auto const& col : columns) {
2341  os << col.title();
2342  }
2343  os << "| " << mBench.title() << std::endl;
2344 
2345  for (auto const& col : columns) {
2346  os << col.separator();
2347  }
2348  os << "|:" << std::string(mBench.title().size() + 1U, '-') << std::endl;
2349  }
2350 
2351  if (!errorMessage.empty()) {
2352  for (auto const& col : columns) {
2353  os << col.invalid();
2354  }
2355  os << "| :boom: " << fmt::MarkDownCode(mBench.name()) << " (" << errorMessage << ')' << std::endl;
2356  } else {
2357  for (auto const& col : columns) {
2358  os << col.value();
2359  }
2360  os << "| ";
2361  auto showUnstable = isWarningsEnabled() && rErrorMedian >= 0.05;
2362  if (showUnstable) {
2363  os << ":wavy_dash: ";
2364  }
2365  os << fmt::MarkDownCode(mBench.name());
2366  if (showUnstable) {
2367  auto avgIters = d(mTotalNumIters) / d(mBench.epochs());
2368  // NOLINTNEXTLINE(bugprone-incorrect-roundings)
2369  auto suggestedIters = static_cast<uint64_t>(avgIters * 10 + 0.5);
2370 
2371  os << " (Unstable with ~" << detail::fmt::Number(1, 1, avgIters)
2372  << " iters. Increase `minEpochIterations` to e.g. " << suggestedIters << ")";
2373  }
2374  os << std::endl;
2375  }
2376  }
2377  }
2378 
2379  ANKERL_NANOBENCH(NODISCARD) bool isCloseEnoughForMeasurements(std::chrono::nanoseconds elapsed) const noexcept {
2380  return elapsed * 3 >= mTargetRuntimePerEpoch * 2;
2381  }
2382 
2383  uint64_t mNumIters = 1; // NOLINT(misc-non-private-member-variables-in-classes)
2384  Bench const& mBench; // NOLINT(misc-non-private-member-variables-in-classes)
2385  std::chrono::nanoseconds mTargetRuntimePerEpoch{}; // NOLINT(misc-non-private-member-variables-in-classes)
2386  Result mResult; // NOLINT(misc-non-private-member-variables-in-classes)
2387  Rng mRng{123}; // NOLINT(misc-non-private-member-variables-in-classes)
2388  std::chrono::nanoseconds mTotalElapsed{}; // NOLINT(misc-non-private-member-variables-in-classes)
2389  uint64_t mTotalNumIters = 0; // NOLINT(misc-non-private-member-variables-in-classes)
2390  State mState = State::upscaling_runtime; // NOLINT(misc-non-private-member-variables-in-classes)
2391 };
2392 ANKERL_NANOBENCH(IGNORE_PADDED_POP)
2393 
2394 IterationLogic::IterationLogic(Bench const& bench)
2395  : mPimpl(new Impl(bench)) {}
2396 
2397 IterationLogic::~IterationLogic() {
2398  delete mPimpl;
2399 }
2400 
2401 uint64_t IterationLogic::numIters() const noexcept {
2402  ANKERL_NANOBENCH_LOG(mPimpl->mBench.name() << ": mNumIters=" << mPimpl->mNumIters);
2403  return mPimpl->mNumIters;
2404 }
2405 
2406 void IterationLogic::add(std::chrono::nanoseconds elapsed, PerformanceCounters const& pc) noexcept {
2407  mPimpl->add(elapsed, pc);
2408 }
2409 
2410 void IterationLogic::moveResultTo(std::vector<Result>& results) noexcept {
2411  results.emplace_back(std::move(mPimpl->mResult));
2412 }
2413 
2414 # if ANKERL_NANOBENCH(PERF_COUNTERS)
2415 
2416 ANKERL_NANOBENCH(IGNORE_PADDED_PUSH)
2417 class LinuxPerformanceCounters {
2418 public:
2419  struct Target {
2420  Target(uint64_t* targetValue_, bool correctMeasuringOverhead_, bool correctLoopOverhead_)
2421  : targetValue(targetValue_)
2422  , correctMeasuringOverhead(correctMeasuringOverhead_)
2423  , correctLoopOverhead(correctLoopOverhead_) {}
2424 
2425  uint64_t* targetValue{}; // NOLINT(misc-non-private-member-variables-in-classes)
2426  bool correctMeasuringOverhead{}; // NOLINT(misc-non-private-member-variables-in-classes)
2427  bool correctLoopOverhead{}; // NOLINT(misc-non-private-member-variables-in-classes)
2428  };
2429 
2430  LinuxPerformanceCounters() = default;
2431  LinuxPerformanceCounters(LinuxPerformanceCounters const&) = delete;
2432  LinuxPerformanceCounters(LinuxPerformanceCounters&&) = delete;
2433  LinuxPerformanceCounters& operator=(LinuxPerformanceCounters const&) = delete;
2434  LinuxPerformanceCounters& operator=(LinuxPerformanceCounters&&) = delete;
2435  ~LinuxPerformanceCounters();
2436 
2437  // quick operation
2438  inline void start() {}
2439 
2440  inline void stop() {}
2441 
2442  bool monitor(perf_sw_ids swId, Target target);
2443  bool monitor(perf_hw_id hwId, Target target);
2444 
2445  ANKERL_NANOBENCH(NODISCARD) bool hasError() const noexcept {
2446  return mHasError;
2447  }
2448 
2449  // Just reading data is faster than enable & disabling.
2450  // we subtract data ourselves.
2451  inline void beginMeasure() {
2452  if (mHasError) {
2453  return;
2454  }
2455 
2456  // NOLINTNEXTLINE(hicpp-signed-bitwise,cppcoreguidelines-pro-type-vararg)
2457  mHasError = -1 == ioctl(mFd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP);
2458  if (mHasError) {
2459  return;
2460  }
2461 
2462  // NOLINTNEXTLINE(hicpp-signed-bitwise,cppcoreguidelines-pro-type-vararg)
2463  mHasError = -1 == ioctl(mFd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP);
2464  }
2465 
2466  inline void endMeasure() {
2467  if (mHasError) {
2468  return;
2469  }
2470 
2471  // NOLINTNEXTLINE(hicpp-signed-bitwise,cppcoreguidelines-pro-type-vararg)
2472  mHasError = (-1 == ioctl(mFd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP));
2473  if (mHasError) {
2474  return;
2475  }
2476 
2477  auto const numBytes = sizeof(uint64_t) * mCounters.size();
2478  auto ret = read(mFd, mCounters.data(), numBytes);
2479  mHasError = ret != static_cast<ssize_t>(numBytes);
2480  }
2481 
2482  void updateResults(uint64_t numIters);
2483 
2484  // rounded integer division
2485  template <typename T>
2486  static inline T divRounded(T a, T divisor) {
2487  return (a + divisor / 2) / divisor;
2488  }
2489 
2490  ANKERL_NANOBENCH_NO_SANITIZE("integer", "undefined")
2491  static inline uint32_t mix(uint32_t x) noexcept {
2492  x ^= x << 13U;
2493  x ^= x >> 17U;
2494  x ^= x << 5U;
2495  return x;
2496  }
2497 
2498  template <typename Op>
2499  ANKERL_NANOBENCH_NO_SANITIZE("integer", "undefined")
2500  void calibrate(Op&& op) {
2501  // clear current calibration data,
2502  for (auto& v : mCalibratedOverhead) {
2503  v = UINT64_C(0);
2504  }
2505 
2506  // create new calibration data
2507  auto newCalibration = mCalibratedOverhead;
2508  for (auto& v : newCalibration) {
2509  v = (std::numeric_limits<uint64_t>::max)();
2510  }
2511  for (size_t iter = 0; iter < 100; ++iter) {
2512  beginMeasure();
2513  op();
2514  endMeasure();
2515  if (mHasError) {
2516  return;
2517  }
2518 
2519  for (size_t i = 0; i < newCalibration.size(); ++i) {
2520  auto diff = mCounters[i];
2521  if (newCalibration[i] > diff) {
2522  newCalibration[i] = diff;
2523  }
2524  }
2525  }
2526 
2527  mCalibratedOverhead = std::move(newCalibration);
2528 
2529  {
2530  // calibrate loop overhead. For branches & instructions this makes sense, not so much for everything else like cycles.
2531  // marsaglia's xorshift: mov, sal/shr, xor. Times 3.
2532  // This has the nice property that the compiler doesn't seem to be able to optimize multiple calls any further.
2533  // see https://godbolt.org/z/49RVQ5
2534  uint64_t const numIters = 100000U + (std::random_device{}() & 3U);
2535  uint64_t n = numIters;
2536  uint32_t x = 1234567;
2537 
2538  beginMeasure();
2539  while (n-- > 0) {
2540  x = mix(x);
2541  }
2542  endMeasure();
2544  auto measure1 = mCounters;
2545 
2546  n = numIters;
2547  beginMeasure();
2548  while (n-- > 0) {
2549  // we now run *twice* so we can easily calculate the overhead
2550  x = mix(x);
2551  x = mix(x);
2552  }
2553  endMeasure();
2555  auto measure2 = mCounters;
2556 
2557  for (size_t i = 0; i < mCounters.size(); ++i) {
2558  // factor 2 because we have two instructions per loop
2559  auto m1 = measure1[i] > mCalibratedOverhead[i] ? measure1[i] - mCalibratedOverhead[i] : 0;
2560  auto m2 = measure2[i] > mCalibratedOverhead[i] ? measure2[i] - mCalibratedOverhead[i] : 0;
2561  auto overhead = m1 * 2 > m2 ? m1 * 2 - m2 : 0;
2562 
2563  mLoopOverhead[i] = divRounded(overhead, numIters);
2564  }
2565  }
2566  }
2567 
2568 private:
2569  bool monitor(uint32_t type, uint64_t eventid, Target target);
2570 
2571  std::map<uint64_t, Target> mIdToTarget{};
2572 
2573  // start with minimum size of 3 for read_format
2574  std::vector<uint64_t> mCounters{3};
2575  std::vector<uint64_t> mCalibratedOverhead{3};
2576  std::vector<uint64_t> mLoopOverhead{3};
2577 
2578  uint64_t mTimeEnabledNanos = 0;
2579  uint64_t mTimeRunningNanos = 0;
2580  int mFd = -1;
2581  bool mHasError = false;
2582 };
2583 ANKERL_NANOBENCH(IGNORE_PADDED_POP)
2584 
2585 LinuxPerformanceCounters::~LinuxPerformanceCounters() {
2586  if (-1 != mFd) {
2587  close(mFd);
2588  }
2589 }
2590 
2591 bool LinuxPerformanceCounters::monitor(perf_sw_ids swId, LinuxPerformanceCounters::Target target) {
2592  return monitor(PERF_TYPE_SOFTWARE, swId, target);
2593 }
2594 
2595 bool LinuxPerformanceCounters::monitor(perf_hw_id hwId, LinuxPerformanceCounters::Target target) {
2596  return monitor(PERF_TYPE_HARDWARE, hwId, target);
2597 }
2598 
2599 // overflow is ok, it's checked
2600 ANKERL_NANOBENCH_NO_SANITIZE("integer", "undefined")
2601 void LinuxPerformanceCounters::updateResults(uint64_t numIters) {
2602  // clear old data
2603  for (auto& id_value : mIdToTarget) {
2604  *id_value.second.targetValue = UINT64_C(0);
2605  }
2606 
2607  if (mHasError) {
2608  return;
2609  }
2610 
2611  mTimeEnabledNanos = mCounters[1] - mCalibratedOverhead[1];
2612  mTimeRunningNanos = mCounters[2] - mCalibratedOverhead[2];
2613 
2614  for (uint64_t i = 0; i < mCounters[0]; ++i) {
2615  auto idx = static_cast<size_t>(3 + i * 2 + 0);
2616  auto id = mCounters[idx + 1U];
2617 
2618  auto it = mIdToTarget.find(id);
2619  if (it != mIdToTarget.end()) {
2620 
2621  auto& tgt = it->second;
2622  *tgt.targetValue = mCounters[idx];
2623  if (tgt.correctMeasuringOverhead) {
2624  if (*tgt.targetValue >= mCalibratedOverhead[idx]) {
2625  *tgt.targetValue -= mCalibratedOverhead[idx];
2626  } else {
2627  *tgt.targetValue = 0U;
2628  }
2629  }
2630  if (tgt.correctLoopOverhead) {
2631  auto correctionVal = mLoopOverhead[idx] * numIters;
2632  if (*tgt.targetValue >= correctionVal) {
2633  *tgt.targetValue -= correctionVal;
2634  } else {
2635  *tgt.targetValue = 0U;
2636  }
2637  }
2638  }
2639  }
2640 }
2641 
2642 bool LinuxPerformanceCounters::monitor(uint32_t type, uint64_t eventid, Target target) {
2643  *target.targetValue = (std::numeric_limits<uint64_t>::max)();
2644  if (mHasError) {
2645  return false;
2646  }
2647 
2648  auto pea = perf_event_attr();
2649  std::memset(&pea, 0, sizeof(perf_event_attr));
2650  pea.type = type;
2651  pea.size = sizeof(perf_event_attr);
2652  pea.config = eventid;
2653  pea.disabled = 1; // start counter as disabled
2654  pea.exclude_kernel = 1;
2655  pea.exclude_hv = 1;
2656 
2657  // NOLINTNEXTLINE(hicpp-signed-bitwise)
2658  pea.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID | PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING;
2659 
2660  const int pid = 0; // the current process
2661  const int cpu = -1; // all CPUs
2662 # if defined(PERF_FLAG_FD_CLOEXEC) // since Linux 3.14
2663  const unsigned long flags = PERF_FLAG_FD_CLOEXEC;
2664 # else
2665  const unsigned long flags = 0;
2666 # endif
2667 
2668  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
2669  auto fd = static_cast<int>(syscall(__NR_perf_event_open, &pea, pid, cpu, mFd, flags));
2670  if (-1 == fd) {
2671  return false;
2672  }
2673  if (-1 == mFd) {
2674  // first call: set to fd, and use this from now on
2675  mFd = fd;
2676  }
2677  uint64_t id = 0;
2678  // NOLINTNEXTLINE(hicpp-signed-bitwise,cppcoreguidelines-pro-type-vararg)
2679  if (-1 == ioctl(fd, PERF_EVENT_IOC_ID, &id)) {
2680  // couldn't get id
2681  return false;
2682  }
2683 
2684  // insert into map, rely on the fact that map's references are constant.
2685  mIdToTarget.emplace(id, target);
2686 
2687  // prepare readformat with the correct size (after the insert)
2688  auto size = 3 + 2 * mIdToTarget.size();
2689  mCounters.resize(size);
2690  mCalibratedOverhead.resize(size);
2691  mLoopOverhead.resize(size);
2692 
2693  return true;
2694 }
2695 
2696 PerformanceCounters::PerformanceCounters()
2697  : mPc(new LinuxPerformanceCounters())
2698  , mVal()
2699  , mHas() {
2700 
2701  // HW events
2702  mHas.cpuCycles = mPc->monitor(PERF_COUNT_HW_REF_CPU_CYCLES, LinuxPerformanceCounters::Target(&mVal.cpuCycles, true, false));
2703  if (!mHas.cpuCycles) {
2704  // Fallback to cycles counter, reference cycles not available in many systems.
2705  mHas.cpuCycles = mPc->monitor(PERF_COUNT_HW_CPU_CYCLES, LinuxPerformanceCounters::Target(&mVal.cpuCycles, true, false));
2706  }
2707  mHas.instructions = mPc->monitor(PERF_COUNT_HW_INSTRUCTIONS, LinuxPerformanceCounters::Target(&mVal.instructions, true, true));
2708  mHas.branchInstructions =
2709  mPc->monitor(PERF_COUNT_HW_BRANCH_INSTRUCTIONS, LinuxPerformanceCounters::Target(&mVal.branchInstructions, true, false));
2710  mHas.branchMisses = mPc->monitor(PERF_COUNT_HW_BRANCH_MISSES, LinuxPerformanceCounters::Target(&mVal.branchMisses, true, false));
2711  // mHas.branchMisses = false;
2712 
2713  // SW events
2714  mHas.pageFaults = mPc->monitor(PERF_COUNT_SW_PAGE_FAULTS, LinuxPerformanceCounters::Target(&mVal.pageFaults, true, false));
2715  mHas.contextSwitches =
2716  mPc->monitor(PERF_COUNT_SW_CONTEXT_SWITCHES, LinuxPerformanceCounters::Target(&mVal.contextSwitches, true, false));
2717 
2718  mPc->start();
2719  mPc->calibrate([] {
2720  auto before = ankerl::nanobench::Clock::now();
2721  auto after = ankerl::nanobench::Clock::now();
2722  (void)before;
2723  (void)after;
2724  });
2725 
2726  if (mPc->hasError()) {
2727  // something failed, don't monitor anything.
2728  mHas = PerfCountSet<bool>{};
2729  }
2730 }
2731 
2732 PerformanceCounters::~PerformanceCounters() {
2733  // no need to check for nullptr, delete nullptr has no effect
2734  delete mPc;
2735 }
2736 
2737 void PerformanceCounters::beginMeasure() {
2738  mPc->beginMeasure();
2739 }
2740 
2741 void PerformanceCounters::endMeasure() {
2742  mPc->endMeasure();
2743 }
2744 
2745 void PerformanceCounters::updateResults(uint64_t numIters) {
2746  mPc->updateResults(numIters);
2747 }
2748 
2749 # else
2750 
2751 PerformanceCounters::PerformanceCounters() = default;
2752 PerformanceCounters::~PerformanceCounters() = default;
2753 void PerformanceCounters::beginMeasure() {}
2754 void PerformanceCounters::endMeasure() {}
2755 void PerformanceCounters::updateResults(uint64_t) {}
2756 
2757 # endif
2758 
2759 ANKERL_NANOBENCH(NODISCARD) PerfCountSet<uint64_t> const& PerformanceCounters::val() const noexcept {
2760  return mVal;
2761 }
2762 ANKERL_NANOBENCH(NODISCARD) PerfCountSet<bool> const& PerformanceCounters::has() const noexcept {
2763  return mHas;
2764 }
2765 
2766 // formatting utilities
2767 namespace fmt {
2768 
2769 // adds thousands separator to numbers
2770 NumSep::NumSep(char sep)
2771  : mSep(sep) {}
2772 
2773 char NumSep::do_thousands_sep() const {
2774  return mSep;
2775 }
2776 
2777 std::string NumSep::do_grouping() const {
2778  return "\003";
2779 }
2780 
2781 // RAII to save & restore a stream's state
2782 StreamStateRestorer::StreamStateRestorer(std::ostream& s)
2783  : mStream(s)
2784  , mLocale(s.getloc())
2785  , mPrecision(s.precision())
2786  , mWidth(s.width())
2787  , mFill(s.fill())
2788  , mFmtFlags(s.flags()) {}
2789 
2790 StreamStateRestorer::~StreamStateRestorer() {
2791  restore();
2792 }
2793 
2794 // sets back all stream info that we remembered at construction
2795 void StreamStateRestorer::restore() {
2796  mStream.imbue(mLocale);
2797  mStream.precision(mPrecision);
2798  mStream.width(mWidth);
2799  mStream.fill(mFill);
2800  mStream.flags(mFmtFlags);
2801 }
2802 
2803 Number::Number(int width, int precision, int64_t value)
2804  : mWidth(width)
2805  , mPrecision(precision)
2806  , mValue(d(value)) {}
2807 
2808 Number::Number(int width, int precision, double value)
2809  : mWidth(width)
2810  , mPrecision(precision)
2811  , mValue(value) {}
2812 
2813 std::ostream& Number::write(std::ostream& os) const {
2814  StreamStateRestorer const restorer(os);
2815  os.imbue(std::locale(os.getloc(), new NumSep(',')));
2816  os << std::setw(mWidth) << std::setprecision(mPrecision) << std::fixed << mValue;
2817  return os;
2818 }
2819 
2820 std::string Number::to_s() const {
2821  std::stringstream ss;
2822  write(ss);
2823  return ss.str();
2824 }
2825 
2826 std::string to_s(uint64_t n) {
2827  std::string str;
2828  do {
2829  str += static_cast<char>('0' + static_cast<char>(n % 10));
2830  n /= 10;
2831  } while (n != 0);
2832  std::reverse(str.begin(), str.end());
2833  return str;
2834 }
2835 
2836 std::ostream& operator<<(std::ostream& os, Number const& n) {
2837  return n.write(os);
2838 }
2839 
2840 MarkDownColumn::MarkDownColumn(int w, int prec, std::string tit, std::string suff, double val) noexcept
2841  : mWidth(w)
2842  , mPrecision(prec)
2843  , mTitle(std::move(tit))
2844  , mSuffix(std::move(suff))
2845  , mValue(val) {}
2846 
2847 std::string MarkDownColumn::title() const {
2848  std::stringstream ss;
2849  ss << '|' << std::setw(mWidth - 2) << std::right << mTitle << ' ';
2850  return ss.str();
2851 }
2852 
2853 std::string MarkDownColumn::separator() const {
2854  std::string sep(static_cast<size_t>(mWidth), '-');
2855  sep.front() = '|';
2856  sep.back() = ':';
2857  return sep;
2858 }
2859 
2860 std::string MarkDownColumn::invalid() const {
2861  std::string sep(static_cast<size_t>(mWidth), ' ');
2862  sep.front() = '|';
2863  sep[sep.size() - 2] = '-';
2864  return sep;
2865 }
2866 
2867 std::string MarkDownColumn::value() const {
2868  std::stringstream ss;
2869  auto width = mWidth - 2 - static_cast<int>(mSuffix.size());
2870  ss << '|' << Number(width, mPrecision, mValue) << mSuffix << ' ';
2871  return ss.str();
2872 }
2873 
2874 // Formats any text as markdown code, escaping backticks.
2875 MarkDownCode::MarkDownCode(std::string const& what) {
2876  mWhat.reserve(what.size() + 2);
2877  mWhat.push_back('`');
2878  for (char const c : what) {
2879  mWhat.push_back(c);
2880  if ('`' == c) {
2881  mWhat.push_back('`');
2882  }
2883  }
2884  mWhat.push_back('`');
2885 }
2886 
2887 std::ostream& MarkDownCode::write(std::ostream& os) const {
2888  return os << mWhat;
2889 }
2890 
2891 std::ostream& operator<<(std::ostream& os, MarkDownCode const& mdCode) {
2892  return mdCode.write(os);
2893 }
2894 } // namespace fmt
2895 } // namespace detail
2896 
2897 // provide implementation here so it's only generated once
2898 Config::Config() = default;
2899 Config::~Config() = default;
2900 Config& Config::operator=(Config const&) = default;
2901 Config& Config::operator=(Config&&) noexcept(ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE)) = default;
2902 Config::Config(Config const&) = default;
2903 Config::Config(Config&&) noexcept = default;
2904 
2905 // provide implementation here so it's only generated once
2906 Result::~Result() = default;
2907 Result& Result::operator=(Result const&) = default;
2908 Result& Result::operator=(Result&&) noexcept(ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE)) = default;
2909 Result::Result(Result const&) = default;
2910 Result::Result(Result&&) noexcept = default;
2911 
2912 namespace detail {
2913 template <typename T>
2914 inline constexpr typename std::underlying_type<T>::type u(T val) noexcept {
2915  return static_cast<typename std::underlying_type<T>::type>(val);
2916 }
2917 } // namespace detail
2918 
2919 // Result returned after a benchmark has finished. Can be used as a baseline for relative().
2920 Result::Result(Config benchmarkConfig)
2921  : mConfig(std::move(benchmarkConfig))
2922  , mNameToMeasurements{detail::u(Result::Measure::_size)} {}
2923 
2924 void Result::add(Clock::duration totalElapsed, uint64_t iters, detail::PerformanceCounters const& pc) {
2925  using detail::d;
2926  using detail::u;
2927 
2928  double const dIters = d(iters);
2929  mNameToMeasurements[u(Result::Measure::iterations)].push_back(dIters);
2930 
2931  mNameToMeasurements[u(Result::Measure::elapsed)].push_back(d(totalElapsed) / dIters);
2932  if (pc.has().pageFaults) {
2933  mNameToMeasurements[u(Result::Measure::pagefaults)].push_back(d(pc.val().pageFaults) / dIters);
2934  }
2935  if (pc.has().cpuCycles) {
2936  mNameToMeasurements[u(Result::Measure::cpucycles)].push_back(d(pc.val().cpuCycles) / dIters);
2937  }
2938  if (pc.has().contextSwitches) {
2939  mNameToMeasurements[u(Result::Measure::contextswitches)].push_back(d(pc.val().contextSwitches) / dIters);
2940  }
2941  if (pc.has().instructions) {
2942  mNameToMeasurements[u(Result::Measure::instructions)].push_back(d(pc.val().instructions) / dIters);
2943  }
2944  if (pc.has().branchInstructions) {
2945  double branchInstructions = 0.0;
2946  // correcting branches: remove branch introduced by the while (...) loop for each iteration.
2947  if (pc.val().branchInstructions > iters + 1U) {
2948  branchInstructions = d(pc.val().branchInstructions - (iters + 1U));
2949  }
2950  mNameToMeasurements[u(Result::Measure::branchinstructions)].push_back(branchInstructions / dIters);
2951 
2952  if (pc.has().branchMisses) {
2953  // correcting branch misses
2954  double branchMisses = d(pc.val().branchMisses);
2955  if (branchMisses > branchInstructions) {
2956  // can't have branch misses when there were branches...
2957  branchMisses = branchInstructions;
2958  }
2959 
2960  // assuming at least one missed branch for the loop
2961  branchMisses -= 1.0;
2962  if (branchMisses < 1.0) {
2963  branchMisses = 1.0;
2964  }
2965  mNameToMeasurements[u(Result::Measure::branchmisses)].push_back(branchMisses / dIters);
2966  }
2967  }
2968 }
2969 
2970 Config const& Result::config() const noexcept {
2971  return mConfig;
2972 }
2973 
2974 inline double calcMedian(std::vector<double>& data) {
2975  if (data.empty()) {
2976  return 0.0;
2977  }
2978  std::sort(data.begin(), data.end());
2979 
2980  auto midIdx = data.size() / 2U;
2981  if (1U == (data.size() & 1U)) {
2982  return data[midIdx];
2983  }
2984  return (data[midIdx - 1U] + data[midIdx]) / 2U;
2985 }
2986 
2987 double Result::median(Measure m) const {
2988  // create a copy so we can sort
2989  auto data = mNameToMeasurements[detail::u(m)];
2990  return calcMedian(data);
2991 }
2992 
2993 double Result::average(Measure m) const {
2994  using detail::d;
2995  auto const& data = mNameToMeasurements[detail::u(m)];
2996  if (data.empty()) {
2997  return 0.0;
2998  }
2999 
3000  // create a copy so we can sort
3001  return sum(m) / d(data.size());
3002 }
3003 
3004 double Result::medianAbsolutePercentError(Measure m) const {
3005  // create copy
3006  auto data = mNameToMeasurements[detail::u(m)];
3007 
3008  // calculates MdAPE which is the median of percentage error
3009  // see https://support.numxl.com/hc/en-us/articles/115001223503-MdAPE-Median-Absolute-Percentage-Error
3010  auto med = calcMedian(data);
3011 
3012  // transform the data to absolute error
3013  for (auto& x : data) {
3014  x = (x - med) / x;
3015  if (x < 0) {
3016  x = -x;
3017  }
3018  }
3019  return calcMedian(data);
3020 }
3021 
3022 double Result::sum(Measure m) const noexcept {
3023  auto const& data = mNameToMeasurements[detail::u(m)];
3024  return std::accumulate(data.begin(), data.end(), 0.0);
3025 }
3026 
3027 double Result::sumProduct(Measure m1, Measure m2) const noexcept {
3028  auto const& data1 = mNameToMeasurements[detail::u(m1)];
3029  auto const& data2 = mNameToMeasurements[detail::u(m2)];
3030 
3031  if (data1.size() != data2.size()) {
3032  return 0.0;
3033  }
3034 
3035  double result = 0.0;
3036  for (size_t i = 0, s = data1.size(); i != s; ++i) {
3037  result += data1[i] * data2[i];
3038  }
3039  return result;
3040 }
3041 
3042 bool Result::has(Measure m) const noexcept {
3043  return !mNameToMeasurements[detail::u(m)].empty();
3044 }
3045 
3046 double Result::get(size_t idx, Measure m) const {
3047  auto const& data = mNameToMeasurements[detail::u(m)];
3048  return data.at(idx);
3049 }
3050 
3051 bool Result::empty() const noexcept {
3052  return 0U == size();
3053 }
3054 
3055 size_t Result::size() const noexcept {
3056  auto const& data = mNameToMeasurements[detail::u(Measure::elapsed)];
3057  return data.size();
3058 }
3059 
3060 double Result::minimum(Measure m) const noexcept {
3061  auto const& data = mNameToMeasurements[detail::u(m)];
3062  if (data.empty()) {
3063  return 0.0;
3064  }
3065 
3066  // here its save to assume that at least one element is there
3067  return *std::min_element(data.begin(), data.end());
3068 }
3069 
3070 double Result::maximum(Measure m) const noexcept {
3071  auto const& data = mNameToMeasurements[detail::u(m)];
3072  if (data.empty()) {
3073  return 0.0;
3074  }
3075 
3076  // here its save to assume that at least one element is there
3077  return *std::max_element(data.begin(), data.end());
3078 }
3079 
3080 std::string const& Result::context(char const* variableName) const {
3081  return mConfig.mContext.at(variableName);
3082 }
3083 
3084 std::string const& Result::context(std::string const& variableName) const {
3085  return mConfig.mContext.at(variableName);
3086 }
3087 
3088 Result::Measure Result::fromString(std::string const& str) {
3089  if (str == "elapsed") {
3090  return Measure::elapsed;
3091  }
3092  if (str == "iterations") {
3093  return Measure::iterations;
3094  }
3095  if (str == "pagefaults") {
3096  return Measure::pagefaults;
3097  }
3098  if (str == "cpucycles") {
3099  return Measure::cpucycles;
3100  }
3101  if (str == "contextswitches") {
3102  return Measure::contextswitches;
3103  }
3104  if (str == "instructions") {
3105  return Measure::instructions;
3106  }
3107  if (str == "branchinstructions") {
3108  return Measure::branchinstructions;
3109  }
3110  if (str == "branchmisses") {
3111  return Measure::branchmisses;
3112  }
3113  // not found, return _size
3114  return Measure::_size;
3115 }
3116 
3117 // Configuration of a microbenchmark.
3118 Bench::Bench() {
3119  mConfig.mOut = &std::cout;
3120 }
3121 
3122 Bench::Bench(Bench&&) noexcept = default;
3123 Bench& Bench::operator=(Bench&&) noexcept(ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE)) = default;
3124 Bench::Bench(Bench const&) = default;
3125 Bench& Bench::operator=(Bench const&) = default;
3126 Bench::~Bench() noexcept = default;
3127 
3128 double Bench::batch() const noexcept {
3129  return mConfig.mBatch;
3130 }
3131 
3132 double Bench::complexityN() const noexcept {
3133  return mConfig.mComplexityN;
3134 }
3135 
3136 // Set a baseline to compare it to. 100% it is exactly as fast as the baseline, >100% means it is faster than the baseline, <100%
3137 // means it is slower than the baseline.
3138 Bench& Bench::relative(bool isRelativeEnabled) noexcept {
3139  mConfig.mIsRelative = isRelativeEnabled;
3140  return *this;
3141 }
3142 bool Bench::relative() const noexcept {
3143  return mConfig.mIsRelative;
3144 }
3145 
3146 Bench& Bench::performanceCounters(bool showPerformanceCounters) noexcept {
3147  mConfig.mShowPerformanceCounters = showPerformanceCounters;
3148  return *this;
3149 }
3150 bool Bench::performanceCounters() const noexcept {
3151  return mConfig.mShowPerformanceCounters;
3152 }
3153 
3154 // Operation unit. Defaults to "op", could be e.g. "byte" for string processing.
3155 // If u differs from currently set unit, the stored results will be cleared.
3156 // Use singular (byte, not bytes).
3157 Bench& Bench::unit(char const* u) {
3158  if (u != mConfig.mUnit) {
3159  mResults.clear();
3160  }
3161  mConfig.mUnit = u;
3162  return *this;
3163 }
3164 
3165 Bench& Bench::unit(std::string const& u) {
3166  return unit(u.c_str());
3167 }
3168 
3169 std::string const& Bench::unit() const noexcept {
3170  return mConfig.mUnit;
3171 }
3172 
3173 Bench& Bench::timeUnit(std::chrono::duration<double> const& tu, std::string const& tuName) {
3174  mConfig.mTimeUnit = tu;
3175  mConfig.mTimeUnitName = tuName;
3176  return *this;
3177 }
3178 
3179 std::string const& Bench::timeUnitName() const noexcept {
3180  return mConfig.mTimeUnitName;
3181 }
3182 
3183 std::chrono::duration<double> const& Bench::timeUnit() const noexcept {
3184  return mConfig.mTimeUnit;
3185 }
3186 
3187 // If benchmarkTitle differs from currently set title, the stored results will be cleared.
3188 Bench& Bench::title(const char* benchmarkTitle) {
3189  if (benchmarkTitle != mConfig.mBenchmarkTitle) {
3190  mResults.clear();
3191  }
3192  mConfig.mBenchmarkTitle = benchmarkTitle;
3193  return *this;
3194 }
3195 Bench& Bench::title(std::string const& benchmarkTitle) {
3196  if (benchmarkTitle != mConfig.mBenchmarkTitle) {
3197  mResults.clear();
3198  }
3199  mConfig.mBenchmarkTitle = benchmarkTitle;
3200  return *this;
3201 }
3202 
3203 std::string const& Bench::title() const noexcept {
3204  return mConfig.mBenchmarkTitle;
3205 }
3206 
3207 Bench& Bench::name(const char* benchmarkName) {
3208  mConfig.mBenchmarkName = benchmarkName;
3209  return *this;
3210 }
3211 
3212 Bench& Bench::name(std::string const& benchmarkName) {
3213  mConfig.mBenchmarkName = benchmarkName;
3214  return *this;
3215 }
3216 
3217 std::string const& Bench::name() const noexcept {
3218  return mConfig.mBenchmarkName;
3219 }
3220 
3221 Bench& Bench::context(char const* variableName, char const* variableValue) {
3222  mConfig.mContext[variableName] = variableValue;
3223  return *this;
3224 }
3225 
3226 Bench& Bench::context(std::string const& variableName, std::string const& variableValue) {
3227  mConfig.mContext[variableName] = variableValue;
3228  return *this;
3229 }
3230 
3231 Bench& Bench::clearContext() {
3232  mConfig.mContext.clear();
3233  return *this;
3234 }
3235 
3236 // Number of epochs to evaluate. The reported result will be the median of evaluation of each epoch.
3237 Bench& Bench::epochs(size_t numEpochs) noexcept {
3238  mConfig.mNumEpochs = numEpochs;
3239  return *this;
3240 }
3241 size_t Bench::epochs() const noexcept {
3242  return mConfig.mNumEpochs;
3243 }
3244 
3245 // Desired evaluation time is a multiple of clock resolution. Default is to be 1000 times above this measurement precision.
3246 Bench& Bench::clockResolutionMultiple(size_t multiple) noexcept {
3247  mConfig.mClockResolutionMultiple = multiple;
3248  return *this;
3249 }
3250 size_t Bench::clockResolutionMultiple() const noexcept {
3251  return mConfig.mClockResolutionMultiple;
3252 }
3253 
3254 // Sets the maximum time each epoch should take. Default is 100ms.
3255 Bench& Bench::maxEpochTime(std::chrono::nanoseconds t) noexcept {
3256  mConfig.mMaxEpochTime = t;
3257  return *this;
3258 }
3259 std::chrono::nanoseconds Bench::maxEpochTime() const noexcept {
3260  return mConfig.mMaxEpochTime;
3261 }
3262 
3263 // Sets the maximum time each epoch should take. Default is 100ms.
3264 Bench& Bench::minEpochTime(std::chrono::nanoseconds t) noexcept {
3265  mConfig.mMinEpochTime = t;
3266  return *this;
3267 }
3268 std::chrono::nanoseconds Bench::minEpochTime() const noexcept {
3269  return mConfig.mMinEpochTime;
3270 }
3271 
3272 Bench& Bench::minEpochIterations(uint64_t numIters) noexcept {
3273  mConfig.mMinEpochIterations = (numIters == 0) ? 1 : numIters;
3274  return *this;
3275 }
3276 uint64_t Bench::minEpochIterations() const noexcept {
3277  return mConfig.mMinEpochIterations;
3278 }
3279 
3280 Bench& Bench::epochIterations(uint64_t numIters) noexcept {
3281  mConfig.mEpochIterations = numIters;
3282  return *this;
3283 }
3284 uint64_t Bench::epochIterations() const noexcept {
3285  return mConfig.mEpochIterations;
3286 }
3287 
3288 Bench& Bench::warmup(uint64_t numWarmupIters) noexcept {
3289  mConfig.mWarmup = numWarmupIters;
3290  return *this;
3291 }
3292 uint64_t Bench::warmup() const noexcept {
3293  return mConfig.mWarmup;
3294 }
3295 
3296 Bench& Bench::config(Config const& benchmarkConfig) {
3297  mConfig = benchmarkConfig;
3298  return *this;
3299 }
3300 Config const& Bench::config() const noexcept {
3301  return mConfig;
3302 }
3303 
3304 Bench& Bench::output(std::ostream* outstream) noexcept {
3305  mConfig.mOut = outstream;
3306  return *this;
3307 }
3308 
3309 ANKERL_NANOBENCH(NODISCARD) std::ostream* Bench::output() const noexcept {
3310  return mConfig.mOut;
3311 }
3312 
3313 std::vector<Result> const& Bench::results() const noexcept {
3314  return mResults;
3315 }
3316 
3317 Bench& Bench::render(char const* templateContent, std::ostream& os) {
3318  ::ankerl::nanobench::render(templateContent, *this, os);
3319  return *this;
3320 }
3321 
3322 Bench& Bench::render(std::string const& templateContent, std::ostream& os) {
3323  ::ankerl::nanobench::render(templateContent, *this, os);
3324  return *this;
3325 }
3326 
3327 std::vector<BigO> Bench::complexityBigO() const {
3328  std::vector<BigO> bigOs;
3329  auto rangeMeasure = BigO::collectRangeMeasure(mResults);
3330  bigOs.emplace_back("O(1)", rangeMeasure, [](double) {
3331  return 1.0;
3332  });
3333  bigOs.emplace_back("O(n)", rangeMeasure, [](double n) {
3334  return n;
3335  });
3336  bigOs.emplace_back("O(log n)", rangeMeasure, [](double n) {
3337  return std::log2(n);
3338  });
3339  bigOs.emplace_back("O(n log n)", rangeMeasure, [](double n) {
3340  return n * std::log2(n);
3341  });
3342  bigOs.emplace_back("O(n^2)", rangeMeasure, [](double n) {
3343  return n * n;
3344  });
3345  bigOs.emplace_back("O(n^3)", rangeMeasure, [](double n) {
3346  return n * n * n;
3347  });
3348  std::sort(bigOs.begin(), bigOs.end());
3349  return bigOs;
3350 }
3351 
3352 Rng::Rng()
3353  : mX(0)
3354  , mY(0) {
3355  std::random_device rd;
3356  std::uniform_int_distribution<uint64_t> dist;
3357  do {
3358  mX = dist(rd);
3359  mY = dist(rd);
3360  } while (mX == 0 && mY == 0);
3361 }
3362 
3363 ANKERL_NANOBENCH_NO_SANITIZE("integer", "undefined")
3364 uint64_t splitMix64(uint64_t& state) noexcept {
3365  uint64_t z = (state += UINT64_C(0x9e3779b97f4a7c15));
3366  z = (z ^ (z >> 30U)) * UINT64_C(0xbf58476d1ce4e5b9);
3367  z = (z ^ (z >> 27U)) * UINT64_C(0x94d049bb133111eb);
3368  return z ^ (z >> 31U);
3369 }
3370 
3371 // Seeded as described in romu paper (update april 2020)
3372 Rng::Rng(uint64_t seed) noexcept
3373  : mX(splitMix64(seed))
3374  , mY(splitMix64(seed)) {
3375  for (size_t i = 0; i < 10; ++i) {
3376  operator()();
3377  }
3378 }
3379 
3380 // only internally used to copy the RNG.
3381 Rng::Rng(uint64_t x, uint64_t y) noexcept
3382  : mX(x)
3383  , mY(y) {}
3384 
3385 Rng Rng::copy() const noexcept {
3386  return Rng{mX, mY};
3387 }
3388 
3389 Rng::Rng(std::vector<uint64_t> const& data)
3390  : mX(0)
3391  , mY(0) {
3392  if (data.size() != 2) {
3393  throw std::runtime_error("ankerl::nanobench::Rng::Rng: needed exactly 2 entries in data, but got " +
3394  detail::fmt::to_s(data.size()));
3395  }
3396  mX = data[0];
3397  mY = data[1];
3398 }
3399 
3400 std::vector<uint64_t> Rng::state() const {
3401  std::vector<uint64_t> data(2);
3402  data[0] = mX;
3403  data[1] = mY;
3404  return data;
3405 }
3406 
3407 BigO::RangeMeasure BigO::collectRangeMeasure(std::vector<Result> const& results) {
3408  BigO::RangeMeasure rangeMeasure;
3409  for (auto const& result : results) {
3410  if (result.config().mComplexityN > 0.0) {
3411  rangeMeasure.emplace_back(result.config().mComplexityN, result.median(Result::Measure::elapsed));
3412  }
3413  }
3414  return rangeMeasure;
3415 }
3416 
3417 BigO::BigO(std::string bigOName, RangeMeasure const& rangeMeasure)
3418  : mName(std::move(bigOName)) {
3419 
3420  // estimate the constant factor
3421  double sumRangeMeasure = 0.0;
3422  double sumRangeRange = 0.0;
3423 
3424  for (const auto& rm : rangeMeasure) {
3425  sumRangeMeasure += rm.first * rm.second;
3426  sumRangeRange += rm.first * rm.first;
3427  }
3428  mConstant = sumRangeMeasure / sumRangeRange;
3429 
3430  // calculate root mean square
3431  double err = 0.0;
3432  double sumMeasure = 0.0;
3433  for (const auto& rm : rangeMeasure) {
3434  auto diff = mConstant * rm.first - rm.second;
3435  err += diff * diff;
3436 
3437  sumMeasure += rm.second;
3438  }
3439 
3440  auto n = detail::d(rangeMeasure.size());
3441  auto mean = sumMeasure / n;
3442  mNormalizedRootMeanSquare = std::sqrt(err / n) / mean;
3443 }
3444 
3445 BigO::BigO(const char* bigOName, RangeMeasure const& rangeMeasure)
3446  : BigO(std::string(bigOName), rangeMeasure) {}
3447 
3448 std::string const& BigO::name() const noexcept {
3449  return mName;
3450 }
3451 
3452 double BigO::constant() const noexcept {
3453  return mConstant;
3454 }
3455 
3456 double BigO::normalizedRootMeanSquare() const noexcept {
3457  return mNormalizedRootMeanSquare;
3458 }
3459 
3460 bool BigO::operator<(BigO const& other) const noexcept {
3461  return std::tie(mNormalizedRootMeanSquare, mName) < std::tie(other.mNormalizedRootMeanSquare, other.mName);
3462 }
3463 
3464 std::ostream& operator<<(std::ostream& os, BigO const& bigO) {
3465  return os << bigO.constant() << " * " << bigO.name() << ", rms=" << bigO.normalizedRootMeanSquare();
3466 }
3467 
3468 std::ostream& operator<<(std::ostream& os, std::vector<ankerl::nanobench::BigO> const& bigOs) {
3469  detail::fmt::StreamStateRestorer const restorer(os);
3470  os << std::endl << "| coefficient | err% | complexity" << std::endl << "|--------------:|-------:|------------" << std::endl;
3471  for (auto const& bigO : bigOs) {
3472  os << "|" << std::setw(14) << std::setprecision(7) << std::scientific << bigO.constant() << " ";
3473  os << "|" << detail::fmt::Number(6, 1, bigO.normalizedRootMeanSquare() * 100.0) << "% ";
3474  os << "| " << bigO.name();
3475  os << std::endl;
3476  }
3477  return os;
3478 }
3479 
3480 } // namespace nanobench
3481 } // namespace ankerl
3482 
3483 #endif // ANKERL_NANOBENCH_IMPLEMENT
3484 #endif // ANKERL_NANOBENCH_H_INCLUDED
int ret
int flags
Definition: bitcoin-tx.cpp:530
Main entry point to nanobench's benchmarking facility.
Definition: nanobench.h:627
Bench & run(char const *benchmarkName, Op &&op)
Repeatedly calls op() based on the configuration, and performs measurements.
Definition: nanobench.h:1234
ANKERL_NANOBENCH(NODISCARD) std Bench & doNotOptimizeAway(Arg &&arg)
Retrieves all benchmark results collected by the bench object so far.
Bench & batch(T b) noexcept
Sets the batch size.
Definition: nanobench.h:1258
Bench()
Creates a new benchmark for configuration and running of benchmarks.
Bench & operator=(Bench &&other) noexcept(ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE))
std::vector< BigO > complexityBigO() const
Bench(Bench &&other) noexcept
Bench & operator=(Bench const &other)
Bench(Bench const &other)
Bench & complexityN(T n) noexcept
Definition: nanobench.h:1265
static RangeMeasure mapRangeMeasure(RangeMeasure data, Op op)
Definition: nanobench.h:1116
BigO(std::string bigOName, RangeMeasure const &scaledRangeMeasure)
std::vector< std::pair< double, double > > RangeMeasure
Definition: nanobench.h:1113
BigO(char const *bigOName, RangeMeasure const &rangeMeasure, Op rangeToN)
Definition: nanobench.h:1126
static RangeMeasure collectRangeMeasure(std::vector< Result > const &results)
BigO(std::string bigOName, RangeMeasure const &rangeMeasure, Op rangeToN)
Definition: nanobench.h:1130
BigO(char const *bigOName, RangeMeasure const &scaledRangeMeasure)
Result & operator=(Result &&other) noexcept(ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE))
Result(Config benchmarkConfig)
static Measure fromString(std::string const &str)
void add(Clock::duration totalElapsed, uint64_t iters, detail::PerformanceCounters const &pc)
Result(Result &&other) noexcept
ANKERL_NANOBENCH(NODISCARD) Config const &config() const noexcept
Result(Result const &other)
Result & operator=(Result const &other)
An extremely fast random generator.
Definition: nanobench.h:488
static constexpr uint64_t() min()
Rng(Rng const &)=delete
As a safety precaution, we don't allow copying.
void shuffle(Container &container) noexcept
Shuffles all entries in the given container.
Definition: nanobench.h:1191
Rng(Rng &&) noexcept=default
Rng & operator=(Rng const &)=delete
Same as Rng(Rng const&), we don't allow assignment.
static constexpr uint64_t() max()
double uniform01() noexcept
Provides a random uniform double value between 0 and 1.
Definition: nanobench.h:1181
uint64_t result_type
This RNG provides 64bit randomness.
Definition: nanobench.h:493
void moveResultTo(std::vector< Result > &results) noexcept
void add(std::chrono::nanoseconds elapsed, PerformanceCounters const &pc) noexcept
IterationLogic(IterationLogic &&)=delete
IterationLogic & operator=(IterationLogic &&)=delete
ANKERL_NANOBENCH(NODISCARD) uint64_t numIters() const noexcept
IterationLogic(IterationLogic const &)=delete
IterationLogic & operator=(IterationLogic const &)=delete
PerformanceCounters & operator=(PerformanceCounters const &)=delete
PerformanceCounters(PerformanceCounters const &)=delete
PerformanceCounters & operator=(PerformanceCounters &&)=delete
ANKERL_NANOBENCH(NODISCARD) PerfCountSet< uint64_t > const &val() const noexcept
PerformanceCounters(PerformanceCounters &&)=delete
volatile double sum
Definition: examples.cpp:10
#define T(expected, seed, data)
PerformanceCounters & performanceCounters()
void doNotOptimizeAway(T &val)
Definition: nanobench.h:1045
void doNotOptimizeAway(T const &val)
Definition: nanobench.h:1039
char const * json() noexcept
Template to generate JSON data.
char const * pyperf() noexcept
Output in pyperf compatible JSON format, which can be used for more analyzation.
char const * csv() noexcept
CSV data for the benchmark results.
char const * htmlBoxplot() noexcept
HTML output that uses plotly to generate an interactive boxplot chart. See the tutorial for an exampl...
void render(char const *mustacheTemplate, Bench const &bench, std::ostream &out)
Renders output from a mustache-like template and benchmark results.
std::ostream & operator<<(std::ostream &os, std::vector< ankerl::nanobench::BigO > const &bigOs)
std::conditional< std::chrono::high_resolution_clock::is_steady, std::chrono::high_resolution_clock, std::chrono::steady_clock >::type Clock
Definition: nanobench.h:133
void render(std::string const &mustacheTemplate, std::vector< Result > const &results, std::ostream &out)
void doNotOptimizeAway(Arg &&arg)
Makes sure none of the given arguments are optimized away by the compiler.
Definition: nanobench.h:1279
std::ostream & operator<<(std::ostream &os, BigO const &bigO)
#define ANKERL_NANOBENCH_LOG(x)
Definition: nanobench.h:87
#define ANKERL_NANOBENCH_NO_SANITIZE(...)
Definition: nanobench.h:106
#define ANKERL_NANOBENCH(x)
Definition: nanobench.h:49
bool operator==(const CNetAddr &a, const CNetAddr &b)
Definition: netaddress.cpp:604
bool operator<(const CNetAddr &a, const CNetAddr &b)
Definition: netaddress.cpp:609
const char * name
Definition: rest.cpp:50
static RPCHelpMan stop()
Definition: server.cpp:171
Config(Config const &other)
Config(Config &&other) noexcept
Config & operator=(Config const &other)
Config & operator=(Config &&other) noexcept(ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE))
static SECP256K1_INLINE uint64_t rotl(const uint64_t x, int k)
Definition: testrand_impl.h:39
static int count