30#ifndef ANKERL_NANOBENCH_H_INCLUDED
31#define ANKERL_NANOBENCH_H_INCLUDED
34#define ANKERL_NANOBENCH_VERSION_MAJOR 4
35#define ANKERL_NANOBENCH_VERSION_MINOR 3
36#define ANKERL_NANOBENCH_VERSION_PATCH 6
48#define ANKERL_NANOBENCH(x) ANKERL_NANOBENCH_PRIVATE_##x()
50#define ANKERL_NANOBENCH_PRIVATE_CXX() __cplusplus
51#define ANKERL_NANOBENCH_PRIVATE_CXX98() 199711L
52#define ANKERL_NANOBENCH_PRIVATE_CXX11() 201103L
53#define ANKERL_NANOBENCH_PRIVATE_CXX14() 201402L
54#define ANKERL_NANOBENCH_PRIVATE_CXX17() 201703L
56#if ANKERL_NANOBENCH(CXX) >= ANKERL_NANOBENCH(CXX17)
57# define ANKERL_NANOBENCH_PRIVATE_NODISCARD() [[nodiscard]]
59# define ANKERL_NANOBENCH_PRIVATE_NODISCARD()
63# define ANKERL_NANOBENCH_PRIVATE_IGNORE_PADDED_PUSH() \
64 _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wpadded\"")
65# define ANKERL_NANOBENCH_PRIVATE_IGNORE_PADDED_POP() _Pragma("clang diagnostic pop")
67# define ANKERL_NANOBENCH_PRIVATE_IGNORE_PADDED_PUSH()
68# define ANKERL_NANOBENCH_PRIVATE_IGNORE_PADDED_POP()
72# define ANKERL_NANOBENCH_PRIVATE_IGNORE_EFFCPP_PUSH() _Pragma("GCC diagnostic push") _Pragma("GCC diagnostic ignored \"-Weffc++\"")
73# define ANKERL_NANOBENCH_PRIVATE_IGNORE_EFFCPP_POP() _Pragma("GCC diagnostic pop")
75# define ANKERL_NANOBENCH_PRIVATE_IGNORE_EFFCPP_PUSH()
76# define ANKERL_NANOBENCH_PRIVATE_IGNORE_EFFCPP_POP()
79#if defined(ANKERL_NANOBENCH_LOG_ENABLED)
81# define ANKERL_NANOBENCH_LOG(x) \
83 std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << x << std::endl; \
86# define ANKERL_NANOBENCH_LOG(x) \
91#define ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS() 0
92#if defined(__linux__) && !defined(ANKERL_NANOBENCH_DISABLE_PERF_COUNTERS)
93# include <linux/version.h>
94# if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0)
97# undef ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS
98# define ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS() 1
102#if defined(__clang__)
103# define ANKERL_NANOBENCH_NO_SANITIZE(...) __attribute__((no_sanitize(__VA_ARGS__)))
105# define ANKERL_NANOBENCH_NO_SANITIZE(...)
109# define ANKERL_NANOBENCH_PRIVATE_NOINLINE() __declspec(noinline)
111# define ANKERL_NANOBENCH_PRIVATE_NOINLINE() __attribute__((noinline))
116#if defined(__GNUC__) && __GNUC__ < 5
117# define ANKERL_NANOBENCH_IS_TRIVIALLY_COPYABLE(...) __has_trivial_copy(__VA_ARGS__)
119# define ANKERL_NANOBENCH_IS_TRIVIALLY_COPYABLE(...) std::is_trivially_copyable<__VA_ARGS__>::value
127using Clock = std::conditional<std::chrono::high_resolution_clock::is_steady, std::chrono::high_resolution_clock,
128 std::chrono::steady_clock>::type;
309char const*
csv() noexcept;
352#if ANKERL_NANOBENCH(PERF_COUNTERS)
381 std::string mBenchmarkTitle =
"benchmark";
382 std::string mBenchmarkName =
"noname";
383 std::string mUnit =
"op";
385 double mComplexityN = -1.0;
386 size_t mNumEpochs = 11;
387 size_t mClockResolutionMultiple =
static_cast<size_t>(1000);
388 std::chrono::nanoseconds mMaxEpochTime = std::chrono::milliseconds(100);
389 std::chrono::nanoseconds mMinEpochTime{};
393 std::ostream* mOut =
nullptr;
394 std::chrono::duration<double> mTimeUnit = std::chrono::nanoseconds{1};
395 std::string mTimeUnitName =
"ns";
396 bool mShowPerformanceCounters =
true;
397 bool mIsRelative =
false;
456 std::vector<std::vector<double>> mNameToMeasurements{};
574 inline double uniform01() noexcept;
892 Bench& complexityN(T
b) noexcept;
972 std::vector<Result> mResults{};
982template <
typename Arg>
1001 asm volatile(
"" : :
"r,m"(val) :
"memory");
1004template <
typename T>
1006# if defined(__clang__)
1008 asm volatile(
"" :
"+r,m"(val) : :
"memory");
1011 asm volatile(
"" :
"+m,r"(val) : :
"memory");
1052#if ANKERL_NANOBENCH(PERF_COUNTERS)
1069 template <
typename Op>
1079 template <
typename Op>
1083 template <
typename Op>
1097 double mNormalizedRootMeanSquare{};
1100std::ostream&
operator<<(std::ostream& os, std::vector<ankerl::nanobench::BigO>
const&
bigOs);
1108namespace nanobench {
1115 return (std::numeric_limits<uint64_t>::max)();
1122 mX =
UINT64_C(15241094284759029579) * mY;
1123 mY = rotl(mY - x, 27);
1135double Rng::uniform01() noexcept {
1136 auto i = (
UINT64_C(0x3ff) << 52U) | (
operator()() >> 12U);
1140 std::memcpy(&d, &i,
sizeof(
double));
1144template <
typename Container>
1147 for (
auto i = size; i > 1U; --i) {
1149 auto p = bounded(i);
1156 return (x << k) | (x >> (64U - k));
1159template <
typename Op>
1164 auto& pc = detail::performanceCounters();
1168 Clock::time_point
before = Clock::now();
1172 Clock::time_point
after = Clock::now();
1182template <
typename Op>
1185 return run(std::forward<Op>(
op));
1188template <
typename Op>
1191 return run(std::forward<Op>(
op));
1194template <
typename Op>
1199template <
typename Op>
1206template <
typename T>
1208 mConfig.mBatch =
static_cast<double>(
b);
1213template <
typename T>
1214Bench& Bench::complexityN(T n)
noexcept {
1215 mConfig.mComplexityN =
static_cast<double>(n);
1220template <
typename Arg>
1222 detail::doNotOptimizeAway(std::forward<Arg>(
arg));
1227template <
typename Arg>
1229 detail::doNotOptimizeAway(std::forward<Arg>(
arg));
1234#if defined(_MSC_VER)
1235template <
typename T>
1236void doNotOptimizeAway(T
const& val) {
1246#if defined(ANKERL_NANOBENCH_IMPLEMENT)
1252# include <algorithm>
1262# include <stdexcept>
1264# if defined(__linux__)
1267# if ANKERL_NANOBENCH(PERF_COUNTERS)
1270# include <linux/perf_event.h>
1271# include <sys/ioctl.h>
1272# include <sys/syscall.h>
1279namespace nanobench {
1303namespace nanobench {
1310template <
typename T>
1311inline double d(T
t)
noexcept {
1312 return static_cast<double>(
t);
1314inline double d(Clock::duration
duration)
noexcept {
1315 return std::chrono::duration_cast<std::chrono::duration<double>>(
duration).
count();
1325char const*
csv() noexcept {
1326 return R
"DELIM("title";"name";"unit";"batch";"elapsed";"error %";"instructions";"branches";"branch misses";"total"
1327{{#result}}"{{title}}";"{{name}}";"{{unit}}";{{batch}};{{median(elapsed)}};{{medianAbsolutePercentError(elapsed)}};{{median(instructions)}};{{median(branchinstructions)}};{{median(branchmisses)}};{{sumProduct(iterations, elapsed)}}
1332 return R
"DELIM(<html>
1335 <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
1339 <div id="myDiv"></div>
1344 y: [{{#measurement}}{{elapsed}}{{^-last}}, {{/last}}{{/measurement}}],
1348 var title = '{{title}}';
1350 data = data.map(a => Object.assign(a, { boxpoints: 'all', pointpos: 0, type: 'box' }));
1351 var layout = { title: { text: title }, showlegend: false, yaxis: { title: 'time per unit', rangemode: 'tozero', autorange: true } }; Plotly.newPlot('myDiv', data, layout, {responsive: true});
1358char const*
pyperf() noexcept {
1365{{#measurement}} {{elapsed}}{{^-last}},
1366{{/last}}{{/measurement}}
1373 "loops": {{sum(iterations)}},
1374 "inner_loops": {{batch}},
1375 "name": "{{title}}",
1382char const*
json() noexcept {
1386 "title": "{{title}}",
1390 "complexityN": {{complexityN}},
1391 "epochs": {{epochs}},
1392 "clockResolution": {{clockResolution}},
1393 "clockResolutionMultiple": {{clockResolutionMultiple}},
1394 "maxEpochTime": {{maxEpochTime}},
1395 "minEpochTime": {{minEpochTime}},
1396 "minEpochIterations": {{minEpochIterations}},
1397 "epochIterations": {{epochIterations}},
1398 "warmup": {{warmup}},
1399 "relative": {{relative}},
1400 "median(elapsed)": {{median(elapsed)}},
1401 "medianAbsolutePercentError(elapsed)": {{medianAbsolutePercentError(elapsed)}},
1402 "median(instructions)": {{median(instructions)}},
1403 "medianAbsolutePercentError(instructions)": {{medianAbsolutePercentError(instructions)}},
1404 "median(cpucycles)": {{median(cpucycles)}},
1405 "median(contextswitches)": {{median(contextswitches)}},
1406 "median(pagefaults)": {{median(pagefaults)}},
1407 "median(branchinstructions)": {{median(branchinstructions)}},
1408 "median(branchmisses)": {{median(branchmisses)}},
1409 "totalTime": {{sumProduct(iterations, elapsed)}},
1412 "iterations": {{iterations}},
1413 "elapsed": {{elapsed}},
1414 "pagefaults": {{pagefaults}},
1415 "cpucycles": {{cpucycles}},
1416 "contextswitches": {{contextswitches}},
1417 "instructions": {{instructions}},
1418 "branchinstructions": {{branchinstructions}},
1419 "branchmisses": {{branchmisses}}
1420 }{{^-last}},{{/-last}}
1422 }{{^-last}},{{/-last}}
1433 std::vector<Node> children;
1438 bool operator==(
char const (&str)[
N])
const noexcept {
1439 return static_cast<size_t>(std::distance(begin, end) + 1) ==
N && 0 ==
strncmp(str, begin,
N - 1);
1445 std::vector<Node> nodes;
1448 auto begin = std::strstr(*
tpl,
"{{");
1450 if (begin !=
nullptr) {
1452 end = std::strstr(begin,
"}}");
1455 if (begin ==
nullptr || end ==
nullptr) {
1457 nodes.emplace_back(Node{*
tpl, *
tpl + std::strlen(*
tpl), std::vector<Node>{}, Node::Type::content});
1461 nodes.emplace_back(Node{*
tpl, begin - 2, std::vector<Node>{}, Node::Type::content});
1479 nodes.emplace_back(Node{begin, end, std::vector<Node>{}, Node::Type::tag});
1485static bool generateFirstLast(Node
const& n,
size_t idx,
size_t size, std::ostream& out) {
1494 if (n.type == Node::Type::section) {
1496 }
else if (n.type == Node::Type::inverted_section) {
1501 for (
auto const&
child : n.children) {
1502 if (
child.type == Node::Type::content) {
1514 if (
idxClose == std::string::npos) {
1523 if (str[i] ==
' ' || str[i] ==
'\t') {
1527 if (str[i] ==
',') {
1542 out << config.mBenchmarkTitle;
1544 }
else if (n ==
"name") {
1545 out << config.mBenchmarkName;
1547 }
else if (n ==
"unit") {
1548 out << config.mUnit;
1550 }
else if (n ==
"batch") {
1551 out << config.mBatch;
1553 }
else if (n ==
"complexityN") {
1554 out << config.mComplexityN;
1556 }
else if (n ==
"epochs") {
1557 out << config.mNumEpochs;
1559 }
else if (n ==
"clockResolution") {
1560 out << d(detail::clockResolution());
1562 }
else if (n ==
"clockResolutionMultiple") {
1563 out << config.mClockResolutionMultiple;
1565 }
else if (n ==
"maxEpochTime") {
1566 out << d(config.mMaxEpochTime);
1568 }
else if (n ==
"minEpochTime") {
1569 out << d(config.mMinEpochTime);
1571 }
else if (n ==
"minEpochIterations") {
1572 out << config.mMinEpochIterations;
1574 }
else if (n ==
"epochIterations") {
1575 out << config.mEpochIterations;
1577 }
else if (n ==
"warmup") {
1578 out << config.mWarmup;
1580 }
else if (n ==
"relative") {
1581 out << config.mIsRelative;
1587static std::ostream&
generateResultTag(Node
const& n, Result
const& r, std::ostream& out) {
1600 if (m == Result::Measure::_size) {
1605 return out << r.median(m);
1608 return out << r.average(m);
1610 if (
matchResult[0] ==
"medianAbsolutePercentError") {
1611 return out << r.medianAbsolutePercentError(m);
1614 return out << r.sum(m);
1617 return out << r.minimum(m);
1620 return out << r.maximum(m);
1625 if (
m1 == Result::Measure::_size ||
m2 == Result::Measure::_size) {
1630 return out << r.sumProduct(
m1,
m2);
1639 throw std::runtime_error(
"command '" + std::string(n.begin, n.end) +
"' not understood");
1643 for (
auto const& n : nodes) {
1647 case Node::Type::content:
1648 out.write(n.begin, std::distance(n.begin, n.end));
1651 case Node::Type::inverted_section:
1652 throw std::runtime_error(
"got a inverted section inside measurement");
1654 case Node::Type::section:
1655 throw std::runtime_error(
"got a section inside measurement");
1657 case Node::Type::tag: {
1658 auto m = Result::fromString(std::string(n.begin, n.end));
1659 if (m == Result::Measure::_size || !r.has(m)) {
1662 out << r.get(idx, m);
1671static void generateResult(std::vector<Node>
const& nodes,
size_t idx, std::vector<Result>
const&
results, std::ostream& out) {
1673 for (
auto const& n : nodes) {
1677 case Node::Type::content:
1678 out.write(n.begin, std::distance(n.begin, n.end));
1681 case Node::Type::inverted_section:
1682 throw std::runtime_error(
"got a inverted section inside result");
1684 case Node::Type::section:
1685 if (n ==
"measurement") {
1686 for (
size_t i = 0; i < r.size(); ++i) {
1690 throw std::runtime_error(
"got a section inside result");
1694 case Node::Type::tag:
1711template <
typename T>
1712T
parseFile(std::string
const& filename);
1728class NumSep :
public std::numpunct<char> {
1759 std::streamsize
const mWidth;
1760 std::ostream::char_type
const mFill;
1770 std::string
to_s()
const;
1774 std::ostream& write(std::ostream& os)
const;
1789 std::string title()
const;
1791 std::string invalid()
const;
1792 std::string value()
const;
1809 std::ostream& write(std::ostream& os)
const;
1811 std::string
mWhat{};
1824namespace nanobench {
1827 detail::fmt::StreamStateRestorer
restorer(out);
1829 out.precision(std::numeric_limits<double>::digits10);
1832 for (
auto const& n : nodes) {
1835 case templates::Node::Type::content:
1836 out.write(n.begin, std::distance(n.begin, n.end));
1839 case templates::Node::Type::inverted_section:
1840 throw std::runtime_error(
"unknown list '" + std::string(n.begin, n.end) +
"'");
1842 case templates::Node::Type::section:
1843 if (n ==
"result") {
1845 for (
size_t i = 0; i <
nbResults; ++i) {
1848 }
else if (n ==
"measurement") {
1850 throw std::runtime_error(
1851 "render: can only use section 'measurement' here if there is a single result, but there are " +
1852 detail::fmt::to_s(
results.size()));
1855 auto const& r =
results.front();
1856 for (
size_t i = 0; i < r.size(); ++i) {
1860 throw std::runtime_error(
"render: unknown section '" + std::string(n.begin, n.end) +
"'");
1864 case templates::Node::Type::tag:
1871 throw std::runtime_error(
"unknown tag '" + std::string(n.begin, n.end) +
"'");
1894# if defined(__clang__)
1895# pragma clang diagnostic push
1896# pragma clang diagnostic ignored "-Wexit-time-destructors"
1898 static PerformanceCounters pc;
1899# if defined(__clang__)
1900# pragma clang diagnostic pop
1909# if defined(_MSC_VER)
1910# pragma optimize("", off)
1912# pragma optimize("", on)
1915template <
typename T>
1916T
parseFile(std::string
const& filename) {
1917 std::ifstream
fin(filename);
1924# if defined(_MSC_VER)
1925# pragma warning(push)
1926# pragma warning(disable : 4996)
1928 return std::getenv(
name);
1929# if defined(_MSC_VER)
1930# pragma warning(pop)
1952 warnings.emplace_back(
"DEBUG defined");
1957# if defined(__linux__)
1960 warnings.emplace_back(
"couldn't figure out number of processors - no governor, turbo check possible");
1964 for (
long id = 0;
id <
nprocs; ++id) {
1966 auto sysCpu =
"/sys/devices/system/cpu/cpu" +
idStr;
1972 warnings.emplace_back(
"CPU frequency scaling enabled: CPU " +
idStr +
" between " +
1973 detail::fmt::Number(1, 1,
minMHz).to_s() +
" and " + detail::fmt::Number(1, 1,
maxMHz).to_s() +
1986 if (0 ==
parseFile<int>(
"/sys/devices/system/cpu/intel_pstate/no_turbo")) {
1987 warnings.emplace_back(
"Turbo is enabled, CPU frequency will fluctuate");
1997 recommendations.emplace_back(
"Use 'pyperf system tune' before benchmarking. See https://github.com/psf/pyperf");
2013 os <<
"Warning, results might be unstable:" << std::endl;
2015 os <<
"* " << w << std::endl;
2018 os << std::endl <<
"Recommendations" << std::endl;
2020 os <<
"* " << r << std::endl;
2033 return seed ^ (val +
UINT64_C(0x9e3779b9) + (seed << 6U) + (seed >> 2U));
2039 Clock::time_point
tBegin;
2040 Clock::time_point
tEnd;
2044 tEnd = Clock::now();
2058struct IterationLogic::Impl {
2076 std::cerr <<
"NANOBENCH_ENDLESS set: running '" <<
mBench.name() <<
"' endlessly" << std::endl;
2077 mNumIters = (std::numeric_limits<uint64_t>::max)();
2079 }
else if (0 !=
mBench.warmup()) {
2082 }
else if (0 !=
mBench.epochIterations()) {
2085 mState = State::measuring;
2088 mState = State::upscaling_runtime;
2114 showResult(
"iterations overflow. Maybe your code got optimized away?");
2124 void add(std::chrono::nanoseconds elapsed, PerformanceCounters
const& pc)
noexcept {
2125# if defined(ANKERL_NANOBENCH_LOG_ENABLED)
2134 mState = State::measuring;
2138 mState = State::upscaling_runtime;
2143 case State::upscaling_runtime:
2146 mState = State::measuring;
2156 case State::measuring:
2162 if (0 !=
mBench.epochIterations()) {
2169 case State::endless:
2170 mNumIters = (std::numeric_limits<uint64_t>::max)();
2180 ANKERL_NANOBENCH_LOG(
mBench.name() <<
": " << detail::fmt::Number(20, 3,
static_cast<double>(elapsed.count())) <<
" elapsed, "
2183 <<
", mState=" <<
static_cast<int>(
mState));
2189 if (
mBench.output() !=
nullptr) {
2191 std::vector<fmt::MarkDownColumn> columns;
2197 if (!
mBench.results().empty()) {
2198 d =
rMedian <= 0.0 ? 0.0 :
mBench.results().front().median(Result::Measure::elapsed) /
rMedian * 100.0;
2200 columns.emplace_back(11, 1,
"relative",
"%", d);
2203 if (
mBench.complexityN() > 0) {
2204 columns.emplace_back(14, 0,
"complexityN",
"",
mBench.complexityN());
2207 columns.emplace_back(22, 2,
mBench.timeUnitName() +
"/" +
mBench.unit(),
"",
2212 columns.emplace_back(10, 1,
"err%",
"%",
rErrorMedian * 100.0);
2215 if (
mBench.performanceCounters() &&
mResult.has(Result::Measure::instructions)) {
2221 if (
mBench.performanceCounters() &&
mResult.has(Result::Measure::cpucycles)) {
2228 if (
mBench.performanceCounters() &&
mResult.has(Result::Measure::branchinstructions)) {
2231 if (
mResult.has(Result::Measure::branchmisses)) {
2236 columns.emplace_back(10, 1,
"miss%",
"%",
p);
2240 columns.emplace_back(12, 2,
"total",
"",
mResult.sumProduct(Result::Measure::iterations, Result::Measure::elapsed));
2243 auto& os = *
mBench.output();
2259 for (
auto const&
col : columns) {
2262 os <<
"| " <<
mBench.title() << std::endl;
2264 for (
auto const&
col : columns) {
2265 os <<
col.separator();
2267 os <<
"|:" << std::string(
mBench.title().size() + 1U,
'-') << std::endl;
2271 for (
auto const&
col : columns) {
2272 os <<
col.invalid();
2274 os <<
"| :boom: " << fmt::MarkDownCode(
mBench.name()) <<
" (" <<
errorMessage <<
')' << std::endl;
2276 for (
auto const&
col : columns) {
2282 os <<
":wavy_dash: ";
2284 os << fmt::MarkDownCode(
mBench.name());
2290 os <<
" (Unstable with ~" << detail::fmt::Number(1, 1,
avgIters)
2291 <<
" iters. Increase `minEpochIterations` to e.g. " <<
suggestedIters <<
")";
2310 State
mState = State::upscaling_runtime;
2314IterationLogic::IterationLogic(Bench
const&
bench) noexcept
2317IterationLogic::~IterationLogic() {
2323uint64_t IterationLogic::numIters() const noexcept {
2325 return mPimpl->mNumIters;
2328void IterationLogic::add(std::chrono::nanoseconds elapsed, PerformanceCounters
const& pc)
noexcept {
2329 mPimpl->add(elapsed, pc);
2332void IterationLogic::moveResultTo(std::vector<Result>&
results)
noexcept {
2333 results.emplace_back(std::move(mPimpl->mResult));
2336# if ANKERL_NANOBENCH(PERF_COUNTERS)
2355 inline void start() {}
2357 inline void stop() {}
2368 inline void beginMeasure() {
2383 inline void endMeasure() {
2402 template <
typename T>
2415 template <
typename Op>
2426 v = (std::numeric_limits<uint64_t>::max)();
2460 detail::doNotOptimizeAway(x);
2471 detail::doNotOptimizeAway(x);
2474 for (
size_t i = 0; i <
mCounters.size(); ++i) {
2502LinuxPerformanceCounters::~LinuxPerformanceCounters() {
2512bool LinuxPerformanceCounters::monitor(
perf_hw_id hwId, LinuxPerformanceCounters::Target
target) {
2532 auto idx =
static_cast<size_t>(3 + i * 2 + 0);
2538 auto&
tgt = it->second;
2540 if (
tgt.correctMeasuringOverhead) {
2544 *
tgt.targetValue = 0
U;
2547 if (
tgt.correctLoopOverhead) {
2552 *
tgt.targetValue = 0
U;
2560 *
target.targetValue = (std::numeric_limits<uint64_t>::max)();
2571 pea.exclude_kernel = 1;
2579# if defined(PERF_FLAG_FD_CLOEXEC)
2582 const unsigned long flags = 0;
2612PerformanceCounters::PerformanceCounters()
2619 mHas.contextSwitches =
2622 mHas.branchInstructions =
2629 auto before = ankerl::nanobench::Clock::now();
2630 auto after = ankerl::nanobench::Clock::now();
2635 if (
mPc->hasError()) {
2637 mHas = PerfCountSet<bool>{};
2641PerformanceCounters::~PerformanceCounters() {
2642 if (
nullptr !=
mPc) {
2647void PerformanceCounters::beginMeasure() {
2648 mPc->beginMeasure();
2651void PerformanceCounters::endMeasure() {
2661PerformanceCounters::PerformanceCounters() =
default;
2662PerformanceCounters::~PerformanceCounters() =
default;
2663void PerformanceCounters::beginMeasure() {}
2664void PerformanceCounters::endMeasure() {}
2665void PerformanceCounters::updateResults(
uint64_t) {}
2680NumSep::NumSep(
char sep)
2683char NumSep::do_thousands_sep()
const {
2687std::string NumSep::do_grouping()
const {
2692StreamStateRestorer::StreamStateRestorer(std::ostream& s)
2700StreamStateRestorer::~StreamStateRestorer() {
2705void StreamStateRestorer::restore() {
2723std::ostream& Number::write(std::ostream& os)
const {
2725 os.imbue(std::locale(os.getloc(),
new NumSep(
',')));
2730std::string Number::to_s()
const {
2731 std::stringstream
ss;
2739 str +=
static_cast<char>(
'0' +
static_cast<char>(n % 10));
2742 std::reverse(str.begin(), str.end());
2750MarkDownColumn::MarkDownColumn(
int w,
int prec, std::string
const&
tit, std::string
const&
suff,
double val)
2757std::string MarkDownColumn::title()
const {
2758 std::stringstream
ss;
2759 ss <<
'|' << std::setw(
mWidth - 2) << std::right <<
mTitle <<
' ';
2763std::string MarkDownColumn::separator()
const {
2764 std::string
sep(
static_cast<size_t>(
mWidth),
'-');
2770std::string MarkDownColumn::invalid()
const {
2771 std::string
sep(
static_cast<size_t>(
mWidth),
' ');
2773 sep[
sep.size() - 2] =
'-';
2777std::string MarkDownColumn::value()
const {
2778 std::stringstream
ss;
2785MarkDownCode::MarkDownCode(std::string
const& what) {
2786 mWhat.reserve(what.size() + 2);
2787 mWhat.push_back(
'`');
2788 for (
char c : what) {
2791 mWhat.push_back(
'`');
2794 mWhat.push_back(
'`');
2797std::ostream& MarkDownCode::write(std::ostream& os)
const {
2809Config::~Config() =
default;
2819Result::Result(Result const&) =
default;
2820Result::Result(Result&&) noexcept =
default;
2823template <
typename T>
2824inline constexpr typename std::underlying_type<T>::type u(T val)
noexcept {
2825 return static_cast<typename std::underlying_type<T>::type
>(val);
2832 , mNameToMeasurements{detail::u(Result::Measure::_size)} {}
2839 mNameToMeasurements[u(Result::Measure::iterations)].push_back(
dIters);
2841 mNameToMeasurements[u(Result::Measure::elapsed)].push_back(d(
totalElapsed) /
dIters);
2842 if (pc.has().pageFaults) {
2843 mNameToMeasurements[u(Result::Measure::pagefaults)].push_back(d(pc.val().pageFaults) /
dIters);
2845 if (pc.has().cpuCycles) {
2846 mNameToMeasurements[u(Result::Measure::cpucycles)].push_back(d(pc.val().cpuCycles) /
dIters);
2848 if (pc.has().contextSwitches) {
2849 mNameToMeasurements[u(Result::Measure::contextswitches)].push_back(d(pc.val().contextSwitches) /
dIters);
2851 if (pc.has().instructions) {
2852 mNameToMeasurements[u(Result::Measure::instructions)].push_back(d(pc.val().instructions) /
dIters);
2854 if (pc.has().branchInstructions) {
2855 double branchInstructions = 0.0;
2857 if (pc.val().branchInstructions >
iters + 1U) {
2858 branchInstructions = d(pc.val().branchInstructions - (
iters + 1U));
2860 mNameToMeasurements[u(Result::Measure::branchinstructions)].push_back(branchInstructions /
dIters);
2862 if (pc.has().branchMisses) {
2864 double branchMisses = d(pc.val().branchMisses);
2865 if (branchMisses > branchInstructions) {
2867 branchMisses = branchInstructions;
2871 branchMisses -= 1.0;
2872 if (branchMisses < 1.0) {
2875 mNameToMeasurements[u(Result::Measure::branchmisses)].push_back(branchMisses /
dIters);
2880Config const& Result::config() const noexcept {
2884inline double calcMedian(std::vector<double>& data) {
2888 std::sort(data.begin(), data.end());
2890 auto midIdx = data.size() / 2U;
2891 if (1U == (data.size() & 1U)) {
2897double Result::median(Measure m)
const {
2899 auto data = mNameToMeasurements[detail::u(m)];
2903double Result::average(Measure m)
const {
2905 auto const& data = mNameToMeasurements[detail::u(m)];
2911 return sum(m) / d(data.size());
2914double Result::medianAbsolutePercentError(Measure m)
const {
2916 auto data = mNameToMeasurements[detail::u(m)];
2923 for (
auto& x : data) {
2932double Result::sum(Measure m)
const noexcept {
2933 auto const& data = mNameToMeasurements[detail::u(m)];
2934 return std::accumulate(data.begin(), data.end(), 0.0);
2937double Result::sumProduct(Measure
m1, Measure
m2)
const noexcept {
2938 auto const&
data1 = mNameToMeasurements[detail::u(
m1)];
2939 auto const&
data2 = mNameToMeasurements[detail::u(
m2)];
2945 double result = 0.0;
2946 for (
size_t i = 0, s =
data1.size(); i != s; ++i) {
2952bool Result::has(Measure m)
const noexcept {
2953 return !mNameToMeasurements[detail::u(m)].empty();
2956double Result::get(
size_t idx, Measure m)
const {
2957 auto const& data = mNameToMeasurements[detail::u(m)];
2958 return data.at(idx);
2961bool Result::empty() const noexcept {
2962 return 0
U == size();
2965size_t Result::size() const noexcept {
2966 auto const& data = mNameToMeasurements[detail::u(Measure::elapsed)];
2970double Result::minimum(Measure m)
const noexcept {
2971 auto const& data = mNameToMeasurements[detail::u(m)];
2977 return *std::min_element(data.begin(), data.end());
2980double Result::maximum(Measure m)
const noexcept {
2981 auto const& data = mNameToMeasurements[detail::u(m)];
2987 return *std::max_element(data.begin(), data.end());
2990Result::Measure Result::fromString(std::string
const& str) {
2991 if (str ==
"elapsed") {
2992 return Measure::elapsed;
2993 }
else if (str ==
"iterations") {
2994 return Measure::iterations;
2995 }
else if (str ==
"pagefaults") {
2996 return Measure::pagefaults;
2997 }
else if (str ==
"cpucycles") {
2998 return Measure::cpucycles;
2999 }
else if (str ==
"contextswitches") {
3000 return Measure::contextswitches;
3001 }
else if (str ==
"instructions") {
3002 return Measure::instructions;
3003 }
else if (str ==
"branchinstructions") {
3004 return Measure::branchinstructions;
3005 }
else if (str ==
"branchmisses") {
3006 return Measure::branchmisses;
3009 return Measure::_size;
3015 mConfig.mOut = &std::cout;
3018Bench::Bench(Bench&&) =
default;
3019Bench& Bench::operator=(Bench&&) =
default;
3020Bench::Bench(Bench
const&) =
default;
3021Bench& Bench::operator=(Bench
const&) =
default;
3022Bench::~Bench() noexcept =
default;
3024double Bench::batch() const noexcept {
3025 return mConfig.mBatch;
3028double Bench::complexityN() const noexcept {
3029 return mConfig.mComplexityN;
3038bool Bench::relative() const noexcept {
3039 return mConfig.mIsRelative;
3046bool Bench::performanceCounters() const noexcept {
3047 return mConfig.mShowPerformanceCounters;
3053Bench& Bench::unit(
char const* u) {
3054 if (u != mConfig.mUnit) {
3061Bench& Bench::unit(std::string
const& u) {
3062 return unit(u.c_str());
3065std::string
const& Bench::unit() const noexcept {
3066 return mConfig.mUnit;
3069Bench& Bench::timeUnit(std::chrono::duration<double>
const&
tu, std::string
const&
tuName) {
3070 mConfig.mTimeUnit =
tu;
3071 mConfig.mTimeUnitName =
tuName;
3075std::string
const& Bench::timeUnitName() const noexcept {
3076 return mConfig.mTimeUnitName;
3079std::chrono::duration<double>
const& Bench::timeUnit() const noexcept {
3080 return mConfig.mTimeUnit;
3099std::string
const& Bench::title() const noexcept {
3100 return mConfig.mBenchmarkTitle;
3113std::string
const& Bench::name() const noexcept {
3114 return mConfig.mBenchmarkName;
3118Bench& Bench::epochs(
size_t numEpochs)
noexcept {
3122size_t Bench::epochs() const noexcept {
3123 return mConfig.mNumEpochs;
3127Bench& Bench::clockResolutionMultiple(
size_t multiple)
noexcept {
3128 mConfig.mClockResolutionMultiple =
multiple;
3131size_t Bench::clockResolutionMultiple() const noexcept {
3132 return mConfig.mClockResolutionMultiple;
3136Bench& Bench::maxEpochTime(std::chrono::nanoseconds
t)
noexcept {
3137 mConfig.mMaxEpochTime =
t;
3140std::chrono::nanoseconds Bench::maxEpochTime() const noexcept {
3141 return mConfig.mMaxEpochTime;
3145Bench& Bench::minEpochTime(std::chrono::nanoseconds
t)
noexcept {
3146 mConfig.mMinEpochTime =
t;
3149std::chrono::nanoseconds Bench::minEpochTime() const noexcept {
3150 return mConfig.mMinEpochTime;
3157uint64_t Bench::minEpochIterations() const noexcept {
3158 return mConfig.mMinEpochIterations;
3162 mConfig.mEpochIterations =
numIters;
3165uint64_t Bench::epochIterations() const noexcept {
3166 return mConfig.mEpochIterations;
3173uint64_t Bench::warmup() const noexcept {
3174 return mConfig.mWarmup;
3181Config const& Bench::config() const noexcept {
3185Bench& Bench::output(std::ostream*
outstream)
noexcept {
3191 return mConfig.mOut;
3194std::vector<Result>
const& Bench::results() const noexcept {
3203Bench& Bench::render(std::string
const&
templateContent, std::ostream& os) {
3208std::vector<BigO> Bench::complexityBigO()
const {
3209 std::vector<BigO>
bigOs;
3210 auto rangeMeasure = BigO::collectRangeMeasure(mResults);
3218 return std::log2(n);
3221 return n * std::log2(n);
3236 std::random_device
rd;
3237 std::uniform_int_distribution<uint64_t>
dist;
3241 }
while (mX == 0 && mY == 0);
3247 z = (z ^ (z >> 30U)) *
UINT64_C(0xbf58476d1ce4e5b9);
3248 z = (z ^ (z >> 27U)) *
UINT64_C(0x94d049bb133111eb);
3249 return z ^ (z >> 31U);
3256 for (
size_t i = 0; i < 10; ++i) {
3266Rng Rng::copy() const noexcept {
3270Rng::Rng(std::vector<uint64_t>
const& data)
3273 if (data.size() != 2) {
3274 throw std::runtime_error(
"ankerl::nanobench::Rng::Rng: needed exactly 2 entries in data, but got " +
3275 detail::fmt::to_s(data.size()));
3281std::vector<uint64_t> Rng::state()
const {
3282 std::vector<uint64_t> data(2);
3288BigO::RangeMeasure BigO::collectRangeMeasure(std::vector<Result>
const&
results) {
3290 for (
auto const& result :
results) {
3291 if (result.config().mComplexityN > 0.0) {
3292 rangeMeasure.emplace_back(result.config().mComplexityN, result.median(Result::Measure::elapsed));
3323 mNormalizedRootMeanSquare = std::sqrt(
err / n) /
mean;
3329std::string
const& BigO::name() const noexcept {
3333double BigO::constant() const noexcept {
3337double BigO::normalizedRootMeanSquare() const noexcept {
3338 return mNormalizedRootMeanSquare;
3341bool BigO::operator<(BigO
const& other)
const noexcept {
3342 return std::tie(mNormalizedRootMeanSquare, mName) < std::tie(other.mNormalizedRootMeanSquare, other.mName);
3346 return os <<
bigO.constant() <<
" * " <<
bigO.name() <<
", rms=" <<
bigO.normalizedRootMeanSquare();
3349std::ostream&
operator<<(std::ostream& os, std::vector<ankerl::nanobench::BigO>
const&
bigOs) {
3350 detail::fmt::StreamStateRestorer
restorer(os);
3351 os << std::endl <<
"| coefficient | err% | complexity" << std::endl <<
"|--------------:|-------:|------------" << std::endl;
3353 os <<
"|" << std::setw(14) << std::setprecision(7) << std::scientific <<
bigO.constant() <<
" ";
3354 os <<
"|" << detail::fmt::Number(6, 1,
bigO.normalizedRootMeanSquare() * 100.0) <<
"% ";
3355 os <<
"| " <<
bigO.name();
Config & operator=(const Config &)=delete
Main entry point to nanobench's benchmarking facility.
Bench & operator=(Bench const &other)
Bench & operator=(Bench &&other)
Bench()
Creates a new benchmark for configuration and running of benchmarks.
Bench(Bench const &other)
static RangeMeasure mapRangeMeasure(RangeMeasure data, Op op)
std::vector< std::pair< double, double > > RangeMeasure
BigO(std::string const &bigOName, RangeMeasure const &rangeMeasure, Op rangeToN)
BigO(char const *bigOName, RangeMeasure const &rangeMeasure, Op rangeToN)
BigO(std::string const &bigOName, RangeMeasure const &scaledRangeMeasure)
static RangeMeasure collectRangeMeasure(std::vector< Result > const &results)
BigO(char const *bigOName, RangeMeasure const &scaledRangeMeasure)
Result(Config const &benchmarkConfig)
Result & operator=(Result const &)
Result & operator=(Result &&)
Result(Result &&) noexcept
An extremely fast random generator.
Rng(Rng const &)=delete
As a safety precausion, we don't allow copying.
Rng(Rng &&) noexcept=default
Rng & operator=(Rng const &)=delete
Same as Rng(Rng const&), we don't allow assignment.
uint64_t result_type
This RNG provides 64bit randomness.
ANKERL_NANOBENCH(NODISCARD) uint64_t numIters() const noexcept
IterationLogic(Bench const &config) noexcept
PerformanceCounters & performanceCounters()
void doNotOptimizeAway(T const &val)
char const * json() noexcept
Template to generate JSON data.
char const * csv() noexcept
CSV data for the benchmark results.
char const * pyperf() noexcept
Output in pyperf compatible JSON format, which can be used for more analyzations.
char const * htmlBoxplot() noexcept
HTML output that uses plotly to generate an interactive boxplot chart. See the tutorial for an exampl...
void render(char const *mustacheTemplate, Bench const &bench, std::ostream &out)
Renders output from a mustache-like template and benchmark results.
std::conditional< std::chrono::high_resolution_clock::is_steady, std::chrono::high_resolution_clock, std::chrono::steady_clock >::type Clock
std::ostream & operator<<(std::ostream &os, BigO const &bigO)
void doNotOptimizeAway(Arg &&arg)
Makes sure none of the given arguments are optimized away by the compiler.
Implement std::hash so RCUPtr can be used as a key for maps or sets.
#define ANKERL_NANOBENCH_LOG(x)
#define ANKERL_NANOBENCH_NO_SANITIZE(...)
#define ANKERL_NANOBENCH(x)
bool operator==(const CNetAddr &a, const CNetAddr &b)
std::ostream & operator<<(std::ostream &os, const PeerMessagingState &state)
T GetRand(T nMax=std::numeric_limits< T >::max()) noexcept
Generate a uniform random integer of type T in the range [0..nMax) nMax defaults to std::numeric_limi...
Config & operator=(Config &&)
Config & operator=(Config const &)
Config(Config &&) noexcept