Line data Source code
1 : #ifndef HASHBROWNS_STATS_ANALYZER_H
2 : #define HASHBROWNS_STATS_ANALYZER_H
3 :
4 : #include <vector>
5 : #include <numeric>
6 : #include <cmath>
7 : #include <algorithm>
8 : #include <random>
9 :
10 : namespace hashbrowns {
11 :
12 : struct StatsSummary {
13 : double mean{0.0};
14 : double stddev{0.0};
15 : double median{0.0};
16 : double p95{0.0};
17 : double ci_low{0.0}; // 95% CI lower bound (mean, bootstrap)
18 : double ci_high{0.0}; // 95% CI upper bound (mean, bootstrap)
19 : int samples{0};
20 : };
21 :
22 113 : inline static double percentile(std::vector<double> v, double p) {
23 113 : if (v.empty()) return 0.0;
24 113 : std::sort(v.begin(), v.end());
25 113 : double idx = p * (v.size() - 1);
26 113 : size_t i0 = static_cast<size_t>(std::floor(idx));
27 113 : size_t i1 = static_cast<size_t>(std::ceil(idx));
28 113 : if (i0 == i1) return v[i0];
29 98 : double w = idx - i0;
30 98 : return v[i0] * (1.0 - w) + v[i1] * w;
31 : }
32 :
33 113 : inline StatsSummary summarize(const std::vector<double>& samples, int bootstrap_iters = 0) {
34 113 : StatsSummary s;
35 113 : s.samples = static_cast<int>(samples.size());
36 113 : if (samples.empty()) return s;
37 113 : double sum = std::accumulate(samples.begin(), samples.end(), 0.0);
38 113 : s.mean = sum / samples.size();
39 113 : double var = 0.0;
40 374 : for (double v : samples) var += (v - s.mean) * (v - s.mean);
41 113 : s.stddev = std::sqrt(var / samples.size());
42 : // median & p95
43 113 : std::vector<double> tmp = samples;
44 113 : std::sort(tmp.begin(), tmp.end());
45 113 : if (tmp.size() % 2 == 1) {
46 15 : s.median = tmp[tmp.size()/2];
47 : } else {
48 98 : size_t r = tmp.size()/2;
49 98 : s.median = 0.5 * (tmp[r-1] + tmp[r]);
50 : }
51 113 : s.p95 = percentile(tmp, 0.95);
52 113 : if (bootstrap_iters > 0 && samples.size() > 1) {
53 1 : std::vector<double> means; means.reserve(bootstrap_iters);
54 1 : std::mt19937_64 rng(123456789ULL); // deterministic bootstrap seed
55 1 : std::uniform_int_distribution<size_t> dist(0, samples.size()-1);
56 201 : for (int i = 0; i < bootstrap_iters; ++i) {
57 200 : double bsum = 0.0;
58 10200 : for (size_t k = 0; k < samples.size(); ++k) bsum += samples[dist(rng)];
59 200 : means.push_back(bsum / samples.size());
60 : }
61 1 : std::sort(means.begin(), means.end());
62 1 : auto idx_lo = static_cast<size_t>(0.025 * (means.size()-1));
63 1 : auto idx_hi = static_cast<size_t>(0.975 * (means.size()-1));
64 1 : s.ci_low = means[idx_lo];
65 1 : s.ci_high = means[idx_hi];
66 1 : } else {
67 112 : s.ci_low = s.mean; s.ci_high = s.mean; // degenerate CI
68 : }
69 113 : return s;
70 113 : }
71 :
72 : } // namespace hashbrowns
73 :
74 : #endif // HASHBROWNS_STATS_ANALYZER_H
|