Line data Source code
1 : // benchmark_baseline.cpp
2 : // Baseline loading, comparison, and reporting helpers for BenchmarkSuite.
3 : // Extracted from benchmark_suite.cpp to isolate schema-aware JSON parsing
4 : // and regression-detection logic from the timing run loop.
5 :
6 : #include "benchmark_suite.h"
7 :
8 : #include <algorithm>
9 : #include <cctype>
10 : #include <cmath>
11 : #include <fstream>
12 : #include <iostream>
13 : #include <map>
14 : #include <optional>
15 : #include <set>
16 : #include <string>
17 : #include <vector>
18 :
19 : namespace hashbrowns {
20 :
21 : namespace {
22 :
23 44 : static std::string trim_ws(const std::string& s) {
24 44 : std::size_t start = 0;
25 44 : std::size_t end = s.size();
26 44 : while (start < end && std::isspace(static_cast<unsigned char>(s[start])))
27 0 : ++start;
28 44 : while (end > start && std::isspace(static_cast<unsigned char>(s[end - 1])))
29 0 : --end;
30 44 : return s.substr(start, end - start);
31 : }
32 :
33 63 : static double pct_delta(double baseline, double current) {
34 63 : if (baseline == 0.0)
35 0 : return 0.0;
36 63 : return (current - baseline) * 100.0 / baseline;
37 : }
38 :
39 82 : static std::optional<std::size_t> find_key_value_start(const std::string& text, const std::string& key) {
40 246 : const std::string pattern = std::string("\"") + key + "\"";
41 82 : auto pos = text.find(pattern);
42 82 : if (pos == std::string::npos)
43 0 : return std::nullopt;
44 82 : pos = text.find(':', pos + pattern.size());
45 82 : if (pos == std::string::npos)
46 0 : return std::nullopt;
47 82 : ++pos;
48 164 : while (pos < text.size() && std::isspace(static_cast<unsigned char>(text[pos])))
49 82 : ++pos;
50 82 : return pos;
51 82 : }
52 :
53 14 : static std::optional<std::size_t> find_matching_delim(const std::string& text, std::size_t start, char open, char close) {
54 14 : if (start >= text.size() || text[start] != open)
55 0 : return std::nullopt;
56 14 : int depth = 0;
57 14 : bool in_string = false;
58 14 : bool escaped = false;
59 3286 : for (std::size_t i = start; i < text.size(); ++i) {
60 3286 : char ch = text[i];
61 3286 : if (in_string) {
62 1834 : if (escaped) {
63 0 : escaped = false;
64 1834 : } else if (ch == '\\') {
65 0 : escaped = true;
66 1834 : } else if (ch == '"') {
67 170 : in_string = false;
68 : }
69 1834 : continue;
70 : }
71 1452 : if (ch == '"') {
72 170 : in_string = true;
73 170 : continue;
74 : }
75 1282 : if (ch == open) {
76 16 : ++depth;
77 1266 : } else if (ch == close) {
78 16 : --depth;
79 16 : if (depth == 0)
80 14 : return i;
81 : }
82 : }
83 0 : return std::nullopt;
84 : }
85 :
86 4 : static std::optional<std::string> extract_object_by_key(const std::string& text, const std::string& key) {
87 4 : auto start = find_key_value_start(text, key);
88 4 : if (!start || *start >= text.size() || text[*start] != '{')
89 0 : return std::nullopt;
90 4 : auto end = find_matching_delim(text, *start, '{', '}');
91 4 : if (!end)
92 0 : return std::nullopt;
93 4 : return text.substr(*start + 1, *end - *start - 1);
94 : }
95 :
96 2 : static std::optional<std::string> extract_array_by_key(const std::string& text, const std::string& key) {
97 2 : auto start = find_key_value_start(text, key);
98 2 : if (!start || *start >= text.size() || text[*start] != '[')
99 0 : return std::nullopt;
100 2 : auto end = find_matching_delim(text, *start, '[', ']');
101 2 : if (!end)
102 0 : return std::nullopt;
103 2 : return text.substr(*start + 1, *end - *start - 1);
104 : }
105 :
106 26 : static std::optional<std::string> extract_string_field(const std::string& obj, const std::string& key) {
107 26 : auto start = find_key_value_start(obj, key);
108 26 : if (!start || *start >= obj.size() || obj[*start] != '"')
109 0 : return std::nullopt;
110 26 : std::string out;
111 26 : bool escaped = false;
112 218 : for (std::size_t i = *start + 1; i < obj.size(); ++i) {
113 218 : char ch = obj[i];
114 218 : if (escaped) {
115 0 : out.push_back(ch);
116 0 : escaped = false;
117 0 : continue;
118 : }
119 218 : if (ch == '\\') {
120 0 : escaped = true;
121 0 : continue;
122 : }
123 218 : if (ch == '"')
124 26 : return out;
125 192 : out.push_back(ch);
126 : }
127 0 : return std::nullopt;
128 26 : }
129 :
130 44 : static std::optional<std::string> extract_scalar_field(const std::string& obj, const std::string& key) {
131 44 : auto start = find_key_value_start(obj, key);
132 44 : if (!start)
133 0 : return std::nullopt;
134 44 : std::size_t end = *start;
135 178 : while (end < obj.size() && obj[end] != ',' && obj[end] != '}' && obj[end] != ']' && obj[end] != '\n')
136 134 : ++end;
137 44 : auto value = trim_ws(obj.substr(*start, end - *start));
138 44 : if (value.empty())
139 0 : return std::nullopt;
140 44 : return value;
141 44 : }
142 :
143 12 : static std::optional<int> extract_int_field(const std::string& obj, const std::string& key) {
144 12 : auto value = extract_scalar_field(obj, key);
145 12 : if (!value)
146 0 : return std::nullopt;
147 : try {
148 12 : return std::stoi(*value);
149 0 : } catch (...) {
150 0 : return std::nullopt;
151 0 : }
152 12 : }
153 :
154 2 : static std::optional<unsigned int> extract_uint_field(const std::string& obj, const std::string& key) {
155 2 : auto value = extract_scalar_field(obj, key);
156 2 : if (!value)
157 0 : return std::nullopt;
158 : try {
159 2 : return static_cast<unsigned int>(std::stoul(*value));
160 0 : } catch (...) {
161 0 : return std::nullopt;
162 0 : }
163 2 : }
164 :
165 6 : static std::optional<std::size_t> extract_size_t_field(const std::string& obj, const std::string& key) {
166 6 : auto value = extract_scalar_field(obj, key);
167 6 : if (!value)
168 0 : return std::nullopt;
169 : try {
170 6 : return static_cast<std::size_t>(std::stoull(*value));
171 0 : } catch (...) {
172 0 : return std::nullopt;
173 0 : }
174 6 : }
175 :
176 4 : static std::optional<unsigned long long> extract_ull_field(const std::string& obj, const std::string& key) {
177 4 : auto value = extract_scalar_field(obj, key);
178 4 : if (!value)
179 0 : return std::nullopt;
180 : try {
181 4 : return static_cast<unsigned long long>(std::stoull(*value));
182 0 : } catch (...) {
183 0 : return std::nullopt;
184 0 : }
185 4 : }
186 :
187 20 : static std::optional<double> extract_double_field(const std::string& obj, const std::string& key) {
188 20 : auto value = extract_scalar_field(obj, key);
189 20 : if (!value)
190 0 : return std::nullopt;
191 : try {
192 20 : return std::stod(*value);
193 0 : } catch (...) {
194 0 : return std::nullopt;
195 0 : }
196 20 : }
197 :
198 6 : static std::vector<std::string> extract_string_array_field(const std::string& obj, const std::string& key) {
199 6 : std::vector<std::string> out;
200 6 : auto start = find_key_value_start(obj, key);
201 6 : if (!start || *start >= obj.size() || obj[*start] != '[')
202 0 : return out;
203 6 : auto end = find_matching_delim(obj, *start, '[', ']');
204 6 : if (!end)
205 0 : return out;
206 6 : std::string array_body = obj.substr(*start + 1, *end - *start - 1);
207 6 : std::size_t pos = 0;
208 18 : while (pos < array_body.size()) {
209 12 : auto quote = array_body.find('"', pos);
210 12 : if (quote == std::string::npos)
211 0 : break;
212 12 : auto close = array_body.find('"', quote + 1);
213 12 : if (close == std::string::npos)
214 0 : break;
215 12 : out.push_back(array_body.substr(quote + 1, close - quote - 1));
216 12 : pos = close + 1;
217 : }
218 6 : return out;
219 6 : }
220 :
221 0 : static bool optional_string_equal(const std::optional<std::string>& a, const std::optional<std::string>& b) {
222 0 : if (a.has_value() != b.has_value())
223 0 : return false;
224 0 : if (!a.has_value())
225 0 : return true;
226 0 : return *a == *b;
227 : }
228 :
229 1 : static std::vector<std::string> compare_string_vectors(const std::vector<std::string>& a, const std::vector<std::string>& b) {
230 1 : if (a == b)
231 0 : return {};
232 1 : std::vector<std::string> rendered;
233 1 : auto render = [](const std::vector<std::string>& values) {
234 6 : std::string out = "[";
235 6 : for (std::size_t i = 0; i < values.size(); ++i) {
236 4 : out += values[i];
237 4 : if (i + 1 < values.size())
238 2 : out += ",";
239 : }
240 2 : out += "]";
241 2 : return out;
242 0 : };
243 1 : rendered.push_back(render(a));
244 1 : rendered.push_back(render(b));
245 1 : return rendered;
246 1 : }
247 :
248 4 : static bool optional_size_t_equal(const std::optional<std::size_t>& a, const std::optional<std::size_t>& b) {
249 4 : if (a.has_value() != b.has_value())
250 0 : return false;
251 4 : if (!a.has_value())
252 0 : return true;
253 4 : return *a == *b;
254 : }
255 :
256 4 : static bool optional_ull_equal(const std::optional<unsigned long long>& a, const std::optional<unsigned long long>& b) {
257 4 : if (a.has_value() != b.has_value())
258 1 : return false;
259 3 : if (!a.has_value())
260 0 : return true;
261 3 : return *a == *b;
262 : }
263 :
264 4 : static bool optional_double_equal(const std::optional<double>& a, const std::optional<double>& b) {
265 4 : if (a.has_value() != b.has_value())
266 0 : return false;
267 4 : if (!a.has_value())
268 0 : return true;
269 4 : return std::fabs(*a - *b) < 1e-9;
270 : }
271 :
272 : } // namespace
273 :
274 4 : BenchmarkData load_benchmark_data_json(const std::string& path) {
275 4 : std::ifstream in(path);
276 4 : if (!in)
277 2 : return {};
278 :
279 6 : std::string json((std::istreambuf_iterator<char>(in)), std::istreambuf_iterator<char>());
280 2 : BenchmarkData data;
281 :
282 6 : if (auto meta_obj = extract_object_by_key(json, "meta")) {
283 2 : auto& meta = data.meta;
284 6 : if (auto v = extract_int_field(*meta_obj, "schema_version"))
285 2 : meta.schema_version = *v;
286 6 : if (auto v = extract_size_t_field(*meta_obj, "size"))
287 2 : meta.size = *v;
288 6 : if (auto v = extract_int_field(*meta_obj, "runs"))
289 2 : meta.runs = *v;
290 6 : if (auto v = extract_int_field(*meta_obj, "warmup_runs"))
291 2 : meta.warmup_runs = *v;
292 6 : if (auto v = extract_int_field(*meta_obj, "bootstrap_iters"))
293 2 : meta.bootstrap_iters = *v;
294 6 : meta.structures = extract_string_array_field(*meta_obj, "structures");
295 6 : if (auto v = extract_string_field(*meta_obj, "pattern"))
296 2 : meta.pattern = *v;
297 6 : meta.seed = extract_ull_field(*meta_obj, "seed");
298 6 : if (auto v = extract_string_field(*meta_obj, "timestamp"))
299 2 : meta.timestamp = *v;
300 6 : if (auto v = extract_string_field(*meta_obj, "cpu_governor"))
301 2 : meta.cpu_governor = *v;
302 6 : if (auto v = extract_string_field(*meta_obj, "git_commit"))
303 2 : meta.git_commit = *v;
304 6 : if (auto v = extract_string_field(*meta_obj, "compiler"))
305 2 : meta.compiler = *v;
306 6 : if (auto v = extract_string_field(*meta_obj, "cpp_standard"))
307 2 : meta.cpp_standard = *v;
308 6 : if (auto v = extract_string_field(*meta_obj, "build_type"))
309 2 : meta.build_type = *v;
310 6 : if (auto v = extract_string_field(*meta_obj, "cpu_model"))
311 2 : meta.cpu_model = *v;
312 6 : if (auto v = extract_string_field(*meta_obj, "profile"))
313 2 : meta.profile = *v;
314 6 : if (auto profile_manifest = extract_object_by_key(*meta_obj, "profile_manifest")) {
315 6 : if (auto v = extract_string_field(*profile_manifest, "selected_profile"))
316 2 : meta.profile_selected = *v;
317 6 : meta.profile_applied_defaults = extract_string_array_field(*profile_manifest, "applied_defaults");
318 6 : meta.profile_explicit_overrides = extract_string_array_field(*profile_manifest, "explicit_overrides");
319 2 : }
320 6 : if (auto v = extract_uint_field(*meta_obj, "cores"))
321 2 : meta.cores = *v;
322 6 : if (auto v = extract_ull_field(*meta_obj, "total_ram_bytes"))
323 2 : meta.total_ram_bytes = *v;
324 6 : if (auto v = extract_string_field(*meta_obj, "kernel"))
325 2 : meta.kernel = *v;
326 6 : if (auto v = extract_string_field(*meta_obj, "hash_strategy"))
327 2 : meta.hash_strategy = *v;
328 6 : meta.hash_capacity = extract_size_t_field(*meta_obj, "hash_capacity");
329 6 : meta.hash_load = extract_double_field(*meta_obj, "hash_load");
330 6 : if (auto v = extract_int_field(*meta_obj, "pinned_cpu"))
331 2 : meta.pinned_cpu = *v;
332 6 : if (auto v = extract_int_field(*meta_obj, "turbo_disabled"))
333 2 : meta.turbo_disabled = (*v != 0);
334 2 : }
335 :
336 6 : auto results_array = extract_array_by_key(json, "results");
337 2 : if (!results_array)
338 0 : return data;
339 :
340 2 : std::size_t pos = 0;
341 4 : while (pos < results_array->size()) {
342 4 : auto obj_start = results_array->find('{', pos);
343 4 : if (obj_start == std::string::npos)
344 2 : break;
345 2 : auto obj_end = find_matching_delim(*results_array, obj_start, '{', '}');
346 2 : if (!obj_end)
347 0 : break;
348 2 : std::string obj = results_array->substr(obj_start + 1, *obj_end - obj_start - 1);
349 2 : pos = *obj_end + 1;
350 :
351 2 : BenchmarkResult r;
352 6 : if (auto v = extract_string_field(obj, "structure"))
353 2 : r.structure = *v;
354 6 : if (auto v = extract_double_field(obj, "insert_ms_mean"))
355 2 : r.insert_ms_mean = *v;
356 6 : if (auto v = extract_double_field(obj, "search_ms_mean"))
357 2 : r.search_ms_mean = *v;
358 6 : if (auto v = extract_double_field(obj, "remove_ms_mean"))
359 2 : r.remove_ms_mean = *v;
360 6 : if (auto v = extract_double_field(obj, "insert_ms_p95"))
361 2 : r.insert_ms_p95 = *v;
362 6 : if (auto v = extract_double_field(obj, "search_ms_p95"))
363 2 : r.search_ms_p95 = *v;
364 6 : if (auto v = extract_double_field(obj, "remove_ms_p95"))
365 2 : r.remove_ms_p95 = *v;
366 6 : if (auto v = extract_double_field(obj, "insert_ci_high"))
367 2 : r.insert_ci_high = *v;
368 6 : if (auto v = extract_double_field(obj, "search_ci_high"))
369 2 : r.search_ci_high = *v;
370 6 : if (auto v = extract_double_field(obj, "remove_ci_high"))
371 2 : r.remove_ci_high = *v;
372 6 : if (auto v = extract_size_t_field(obj, "memory_bytes"))
373 2 : r.memory_bytes = *v;
374 :
375 2 : if (!r.structure.empty())
376 2 : data.results.push_back(r);
377 2 : }
378 :
379 2 : return data;
380 52 : }
381 :
382 2 : std::vector<BenchmarkResult> load_benchmark_results_json(const std::string& path) {
383 2 : return load_benchmark_data_json(path).results;
384 : }
385 :
386 4 : BaselineMetadataReport compare_benchmark_metadata(const BenchmarkMeta& baseline, const BenchmarkMeta& current,
387 : const BaselineConfig& cfg) {
388 4 : BaselineMetadataReport report;
389 :
390 4 : auto require_string_equal = [&report](const std::string& field, const std::string& a, const std::string& b) {
391 20 : if (a != b)
392 2 : report.errors.push_back(field + " mismatch: baseline='" + a + "' current='" + b + "'");
393 20 : };
394 32 : auto require_int_equal = [&report](const std::string& field, auto a, auto b) {
395 28 : if (a != b)
396 0 : report.errors.push_back(field + " mismatch: baseline='" + std::to_string(a) + "' current='" +
397 : std::to_string(b) + "'");
398 32 : };
399 4 : auto require_optional_ull_equal = [&report](const std::string& field, const std::optional<unsigned long long>& a,
400 : const std::optional<unsigned long long>& b) {
401 4 : if (!optional_ull_equal(a, b)) {
402 4 : report.errors.push_back(field + " mismatch: baseline='" + (a ? std::to_string(*a) : std::string("unset")) +
403 6 : "' current='" + (b ? std::to_string(*b) : std::string("unset")) + "'");
404 : }
405 4 : };
406 4 : auto require_optional_size_t_equal = [&report](const std::string& field, const std::optional<std::size_t>& a,
407 : const std::optional<std::size_t>& b) {
408 4 : if (!optional_size_t_equal(a, b)) {
409 0 : report.errors.push_back(field + " mismatch: baseline='" + (a ? std::to_string(*a) : std::string("unset")) +
410 0 : "' current='" + (b ? std::to_string(*b) : std::string("unset")) + "'");
411 : }
412 4 : };
413 4 : auto require_optional_double_equal = [&report](const std::string& field, const std::optional<double>& a,
414 : const std::optional<double>& b) {
415 4 : if (!optional_double_equal(a, b)) {
416 0 : report.errors.push_back(field + " mismatch: baseline='" + (a ? std::to_string(*a) : std::string("unset")) +
417 0 : "' current='" + (b ? std::to_string(*b) : std::string("unset")) + "'");
418 : }
419 4 : };
420 4 : auto warn_string_equal = [&report](const std::string& field, const std::string& a, const std::string& b) {
421 20 : if (a != b)
422 2 : report.warnings.push_back(field + " changed: baseline='" + a + "' current='" + b + "'");
423 20 : };
424 12 : auto warn_int_equal = [&report](const std::string& field, auto a, auto b) {
425 8 : if (a != b)
426 0 : report.warnings.push_back(field + " changed: baseline='" + std::to_string(a) + "' current='" +
427 : std::to_string(b) + "'");
428 12 : };
429 :
430 12 : require_int_equal("schema_version", baseline.schema_version, current.schema_version);
431 12 : require_int_equal("size", baseline.size, current.size);
432 12 : require_int_equal("runs", baseline.runs, current.runs);
433 12 : require_int_equal("warmup_runs", baseline.warmup_runs, current.warmup_runs);
434 12 : require_int_equal("bootstrap_iters", baseline.bootstrap_iters, current.bootstrap_iters);
435 12 : require_string_equal("profile", baseline.profile, current.profile);
436 4 : if (baseline.structures != current.structures) {
437 0 : auto rendered = compare_string_vectors(baseline.structures, current.structures);
438 0 : report.errors.push_back("structures mismatch: baseline='" + rendered[0] + "' current='" + rendered[1] + "'");
439 0 : }
440 12 : require_string_equal("pattern", baseline.pattern, current.pattern);
441 12 : require_optional_ull_equal("seed", baseline.seed, current.seed);
442 12 : require_string_equal("build_type", baseline.build_type, current.build_type);
443 12 : require_string_equal("hash_strategy", baseline.hash_strategy, current.hash_strategy);
444 12 : require_optional_size_t_equal("hash_capacity", baseline.hash_capacity, current.hash_capacity);
445 12 : require_optional_double_equal("hash_load", baseline.hash_load, current.hash_load);
446 12 : require_int_equal("pinned_cpu", baseline.pinned_cpu, current.pinned_cpu);
447 12 : require_int_equal("turbo_disabled", baseline.turbo_disabled ? 1 : 0, current.turbo_disabled ? 1 : 0);
448 :
449 4 : if (cfg.strict_profile_intent) {
450 4 : auto manifest_present = [](const BenchmarkMeta& meta) {
451 8 : return !meta.profile_applied_defaults.empty() || !meta.profile_explicit_overrides.empty() || meta.profile_selected != "custom";
452 : };
453 4 : const bool baseline_has_manifest = manifest_present(baseline);
454 4 : const bool current_has_manifest = manifest_present(current);
455 4 : if (baseline_has_manifest != current_has_manifest) {
456 0 : report.errors.push_back("profile_manifest mismatch: baseline='" + std::string(baseline_has_manifest ? "present" : "missing") +
457 0 : "' current='" + std::string(current_has_manifest ? "present" : "missing") + "'");
458 4 : } else if (baseline_has_manifest && current_has_manifest) {
459 12 : require_string_equal("profile_selected", baseline.profile_selected, current.profile_selected);
460 4 : if (baseline.profile_applied_defaults != current.profile_applied_defaults) {
461 0 : auto rendered = compare_string_vectors(baseline.profile_applied_defaults, current.profile_applied_defaults);
462 0 : report.errors.push_back("profile_applied_defaults mismatch: baseline='" + rendered[0] + "' current='" + rendered[1] + "'");
463 0 : }
464 4 : if (baseline.profile_explicit_overrides != current.profile_explicit_overrides) {
465 1 : auto rendered = compare_string_vectors(baseline.profile_explicit_overrides, current.profile_explicit_overrides);
466 1 : report.errors.push_back("profile_explicit_overrides mismatch: baseline='" + rendered[0] + "' current='" + rendered[1] + "'");
467 1 : }
468 : }
469 : }
470 :
471 12 : warn_string_equal("cpu_model", baseline.cpu_model, current.cpu_model);
472 12 : warn_string_equal("compiler", baseline.compiler, current.compiler);
473 12 : warn_string_equal("cpp_standard", baseline.cpp_standard, current.cpp_standard);
474 12 : warn_string_equal("cpu_governor", baseline.cpu_governor, current.cpu_governor);
475 12 : warn_int_equal("cores", baseline.cores, current.cores);
476 12 : warn_int_equal("total_ram_bytes", baseline.total_ram_bytes, current.total_ram_bytes);
477 12 : warn_string_equal("kernel", baseline.kernel, current.kernel);
478 :
479 4 : report.ok = report.errors.empty();
480 8 : return report;
481 0 : }
482 :
483 7 : BaselineComparison compare_against_baseline(const std::vector<BenchmarkResult>& baseline,
484 : const std::vector<BenchmarkResult>& current, const BaselineConfig& cfg) {
485 7 : BaselineComparison out;
486 :
487 7 : auto scope_name = [&cfg]() {
488 7 : switch (cfg.scope) {
489 5 : case BaselineConfig::MetricScope::MEAN:
490 15 : return std::string("mean");
491 0 : case BaselineConfig::MetricScope::P95:
492 0 : return std::string("p95");
493 0 : case BaselineConfig::MetricScope::CI_HIGH:
494 0 : return std::string("ci_high");
495 2 : case BaselineConfig::MetricScope::ANY:
496 : default:
497 6 : return std::string("any");
498 : }
499 7 : };
500 7 : out.scope = scope_name();
501 :
502 7 : if (baseline.empty() || current.empty())
503 1 : return out;
504 :
505 6 : std::map<std::string, BenchmarkResult> base_map;
506 6 : std::set<std::string> duplicate_baseline_names;
507 16 : for (const auto& b : baseline) {
508 10 : if (base_map.find(b.structure) != base_map.end())
509 2 : duplicate_baseline_names.insert(b.structure);
510 10 : base_map[b.structure] = b;
511 : }
512 :
513 6 : std::set<std::string> current_names;
514 6 : std::set<std::string> duplicate_current_names;
515 16 : for (const auto& cur : current) {
516 10 : if (current_names.find(cur.structure) != current_names.end())
517 2 : duplicate_current_names.insert(cur.structure);
518 10 : current_names.insert(cur.structure);
519 : }
520 :
521 6 : out.coverage.baseline_structure_count = base_map.size();
522 6 : out.coverage.current_structure_count = current_names.size();
523 6 : out.coverage.duplicate_baseline_structures.assign(duplicate_baseline_names.begin(), duplicate_baseline_names.end());
524 6 : out.coverage.duplicate_current_structures.assign(duplicate_current_names.begin(), duplicate_current_names.end());
525 14 : for (const auto& [name, _] : base_map) {
526 8 : if (current_names.find(name) == current_names.end())
527 2 : out.coverage.baseline_only_structures.push_back(name);
528 : }
529 :
530 6 : auto within = [&cfg](double delta) {
531 96 : double absd = std::fabs(delta);
532 96 : if (absd <= cfg.noise_floor_pct)
533 0 : return true;
534 96 : return delta <= cfg.threshold_pct;
535 6 : };
536 :
537 6 : auto select_metric = [&cfg, &within](double mean_delta, double p95_delta, double ci_high_delta) {
538 : struct MetricDecision {
539 : double delta_pct{0.0};
540 : bool ok{true};
541 : std::string basis{"mean"};
542 : };
543 :
544 21 : MetricDecision decision;
545 21 : switch (cfg.scope) {
546 15 : case BaselineConfig::MetricScope::MEAN:
547 15 : decision.delta_pct = mean_delta;
548 15 : decision.ok = within(mean_delta);
549 15 : decision.basis = "mean";
550 15 : return decision;
551 0 : case BaselineConfig::MetricScope::P95:
552 0 : decision.delta_pct = p95_delta;
553 0 : decision.ok = within(p95_delta);
554 0 : decision.basis = "p95";
555 0 : return decision;
556 0 : case BaselineConfig::MetricScope::CI_HIGH:
557 0 : decision.delta_pct = ci_high_delta;
558 0 : decision.ok = within(ci_high_delta);
559 0 : decision.basis = "ci_high";
560 0 : return decision;
561 6 : case BaselineConfig::MetricScope::ANY:
562 : default: {
563 6 : const bool mean_ok = within(mean_delta);
564 6 : const bool p95_ok = within(p95_delta);
565 6 : const bool ci_high_ok = within(ci_high_delta);
566 6 : decision.delta_pct = mean_delta;
567 6 : decision.ok = mean_ok || p95_ok || ci_high_ok;
568 36 : decision.basis = "any(mean=" + std::string(mean_ok ? "ok" : "fail") + ",p95=" +
569 60 : std::string(p95_ok ? "ok" : "fail") + ",ci_high=" +
570 42 : std::string(ci_high_ok ? "ok" : "fail") + ")";
571 6 : return decision;
572 : }
573 : }
574 0 : };
575 :
576 16 : for (const auto& cur : current) {
577 10 : auto it = base_map.find(cur.structure);
578 10 : if (it == base_map.end()) {
579 6 : if (std::find(out.coverage.current_only_structures.begin(), out.coverage.current_only_structures.end(), cur.structure) ==
580 6 : out.coverage.current_only_structures.end()) {
581 2 : out.coverage.current_only_structures.push_back(cur.structure);
582 : }
583 3 : continue;
584 : }
585 7 : ++out.coverage.comparable_structure_count;
586 7 : const auto& b = it->second;
587 7 : BaselineComparison::Entry e;
588 7 : e.structure = cur.structure;
589 :
590 7 : const double insert_mean_delta = pct_delta(b.insert_ms_mean, cur.insert_ms_mean);
591 7 : const double insert_p95_delta = pct_delta(b.insert_ms_p95, cur.insert_ms_p95);
592 7 : const double insert_ci_high_delta = pct_delta(b.insert_ci_high, cur.insert_ci_high);
593 7 : e.insert_mean_delta_pct = insert_mean_delta;
594 7 : e.insert_p95_delta_pct = insert_p95_delta;
595 7 : e.insert_ci_high_delta_pct = insert_ci_high_delta;
596 7 : e.insert_mean_ok = within(insert_mean_delta);
597 7 : e.insert_p95_ok = within(insert_p95_delta);
598 7 : e.insert_ci_high_ok = within(insert_ci_high_delta);
599 7 : const auto insert_decision = select_metric(insert_mean_delta, insert_p95_delta, insert_ci_high_delta);
600 7 : e.insert_delta_pct = insert_decision.delta_pct;
601 7 : e.insert_ok = insert_decision.ok;
602 7 : e.insert_basis = insert_decision.basis;
603 :
604 7 : const double search_mean_delta = pct_delta(b.search_ms_mean, cur.search_ms_mean);
605 7 : const double search_p95_delta = pct_delta(b.search_ms_p95, cur.search_ms_p95);
606 7 : const double search_ci_high_delta = pct_delta(b.search_ci_high, cur.search_ci_high);
607 7 : e.search_mean_delta_pct = search_mean_delta;
608 7 : e.search_p95_delta_pct = search_p95_delta;
609 7 : e.search_ci_high_delta_pct = search_ci_high_delta;
610 7 : e.search_mean_ok = within(search_mean_delta);
611 7 : e.search_p95_ok = within(search_p95_delta);
612 7 : e.search_ci_high_ok = within(search_ci_high_delta);
613 7 : const auto search_decision = select_metric(search_mean_delta, search_p95_delta, search_ci_high_delta);
614 7 : e.search_delta_pct = search_decision.delta_pct;
615 7 : e.search_ok = search_decision.ok;
616 7 : e.search_basis = search_decision.basis;
617 :
618 7 : const double remove_mean_delta = pct_delta(b.remove_ms_mean, cur.remove_ms_mean);
619 7 : const double remove_p95_delta = pct_delta(b.remove_ms_p95, cur.remove_ms_p95);
620 7 : const double remove_ci_high_delta = pct_delta(b.remove_ci_high, cur.remove_ci_high);
621 7 : e.remove_mean_delta_pct = remove_mean_delta;
622 7 : e.remove_p95_delta_pct = remove_p95_delta;
623 7 : e.remove_ci_high_delta_pct = remove_ci_high_delta;
624 7 : e.remove_mean_ok = within(remove_mean_delta);
625 7 : e.remove_p95_ok = within(remove_p95_delta);
626 7 : e.remove_ci_high_ok = within(remove_ci_high_delta);
627 7 : const auto remove_decision = select_metric(remove_mean_delta, remove_p95_delta, remove_ci_high_delta);
628 7 : e.remove_delta_pct = remove_decision.delta_pct;
629 7 : e.remove_ok = remove_decision.ok;
630 7 : e.remove_basis = remove_decision.basis;
631 :
632 7 : auto append_failure = [&out, &cfg, &e](const std::string& operation, bool op_ok, const std::string& basis,
633 : double chosen_delta, bool mean_ok, bool p95_ok, bool ci_high_ok) {
634 21 : if (op_ok)
635 20 : return;
636 1 : BaselineComparison::Failure failure;
637 1 : failure.structure = e.structure;
638 1 : failure.operation = operation;
639 1 : failure.chosen_basis = basis;
640 1 : failure.chosen_delta_pct = chosen_delta;
641 1 : failure.threshold_pct = cfg.threshold_pct;
642 1 : if (!mean_ok)
643 3 : failure.failed_metric_families.push_back("mean");
644 1 : if (!p95_ok)
645 3 : failure.failed_metric_families.push_back("p95");
646 1 : if (!ci_high_ok)
647 3 : failure.failed_metric_families.push_back("ci_high");
648 1 : out.failures.push_back(failure);
649 1 : };
650 :
651 21 : append_failure("insert", e.insert_ok, e.insert_basis, e.insert_delta_pct,
652 7 : e.insert_mean_ok, e.insert_p95_ok, e.insert_ci_high_ok);
653 21 : append_failure("search", e.search_ok, e.search_basis, e.search_delta_pct,
654 7 : e.search_mean_ok, e.search_p95_ok, e.search_ci_high_ok);
655 21 : append_failure("remove", e.remove_ok, e.remove_basis, e.remove_delta_pct,
656 7 : e.remove_mean_ok, e.remove_p95_ok, e.remove_ci_high_ok);
657 :
658 7 : if (!e.insert_ok || !e.search_ok || !e.remove_ok)
659 1 : out.all_ok = false;
660 7 : out.entries.push_back(e);
661 7 : }
662 :
663 6 : const bool has_partial_coverage = !out.coverage.baseline_only_structures.empty() || !out.coverage.current_only_structures.empty();
664 6 : const bool has_duplicates = !out.coverage.duplicate_baseline_structures.empty() || !out.coverage.duplicate_current_structures.empty();
665 6 : if (has_partial_coverage && has_duplicates)
666 1 : out.health = "partial_coverage_with_duplicates";
667 5 : else if (has_partial_coverage)
668 1 : out.health = "partial_coverage";
669 4 : else if (has_duplicates)
670 1 : out.health = "duplicate_inputs";
671 : else
672 3 : out.health = "clean";
673 :
674 6 : if (has_duplicates)
675 2 : out.actionability = "not_actionable";
676 4 : else if (has_partial_coverage)
677 1 : out.actionability = "actionable_with_hygiene_warnings";
678 : else
679 3 : out.actionability = "fully_actionable";
680 :
681 6 : if (out.actionability == "fully_actionable")
682 3 : out.recommended_disposition = "accept";
683 3 : else if (out.actionability == "actionable_with_hygiene_warnings")
684 1 : out.recommended_disposition = "review_with_warnings";
685 : else
686 2 : out.recommended_disposition = "reject_input_hygiene";
687 :
688 6 : if (has_partial_coverage)
689 6 : out.disposition_reasons.push_back("missing_structures");
690 6 : if (!out.coverage.duplicate_baseline_structures.empty())
691 6 : out.disposition_reasons.push_back("duplicate_baseline_structures");
692 6 : if (!out.coverage.duplicate_current_structures.empty())
693 6 : out.disposition_reasons.push_back("duplicate_current_structures");
694 :
695 6 : out.summary.missing_structure_count = out.coverage.baseline_only_structures.size() + out.coverage.current_only_structures.size();
696 6 : out.summary.duplicate_baseline_structure_count = out.coverage.duplicate_baseline_structures.size();
697 6 : out.summary.duplicate_current_structure_count = out.coverage.duplicate_current_structures.size();
698 6 : out.hygiene_issue_count = out.disposition_reasons.size();
699 6 : out.has_hygiene_issues = out.hygiene_issue_count > 0;
700 6 : if (!out.has_hygiene_issues)
701 3 : out.hygiene_gate = "clean";
702 3 : else if (out.actionability == "actionable_with_hygiene_warnings")
703 1 : out.hygiene_gate = "warn";
704 : else
705 2 : out.hygiene_gate = "block";
706 :
707 6 : if (out.entries.empty())
708 0 : out.perf_signal_strength = "none";
709 6 : else if (out.health == "clean")
710 3 : out.perf_signal_strength = "strong";
711 : else
712 3 : out.perf_signal_strength = "limited";
713 :
714 6 : return out;
715 6 : }
716 :
717 1 : void print_baseline_report(const BaselineComparison& report, double threshold_pct, double noise_floor_pct) {
718 1 : if (report.entries.empty()) {
719 0 : std::cout << "[baseline] No comparable structures between baseline and current results.\n";
720 0 : return;
721 : }
722 1 : std::cout << "[baseline] Threshold=" << threshold_pct << "% (noise floor=" << noise_floor_pct << "%)\n";
723 2 : for (const auto& e : report.entries) {
724 1 : auto status = (e.insert_ok && e.search_ok && e.remove_ok) ? "OK" : "FAIL";
725 1 : std::cout << " " << status << " " << e.structure << " insert=" << e.insert_delta_pct << "%"
726 1 : << " search=" << e.search_delta_pct << "%"
727 1 : << " remove=" << e.remove_delta_pct << "%"
728 1 : << "\n";
729 : }
730 1 : if (report.all_ok) {
731 1 : std::cout << "[baseline] All metrics within tolerance." << std::endl;
732 : } else {
733 0 : std::cout << "[baseline] Performance regression detected." << std::endl;
734 : }
735 : }
736 :
737 1 : void print_baseline_metadata_report(const BaselineMetadataReport& report) {
738 1 : if (report.errors.empty() && report.warnings.empty()) {
739 0 : std::cout << "[baseline-meta] Benchmark metadata is compatible." << std::endl;
740 0 : return;
741 : }
742 1 : for (const auto& warning : report.warnings)
743 0 : std::cout << "[baseline-meta] WARN " << warning << std::endl;
744 4 : for (const auto& error : report.errors)
745 3 : std::cout << "[baseline-meta] ERROR " << error << std::endl;
746 1 : if (report.ok)
747 0 : std::cout << "[baseline-meta] Metadata is comparable with warnings." << std::endl;
748 : else
749 1 : std::cout << "[baseline-meta] Metadata mismatch invalidates comparison." << std::endl;
750 : }
751 :
752 1 : void write_baseline_report_json(const std::string& path, const BaselineReport& report) {
753 1 : std::ofstream out(path);
754 1 : if (!out)
755 0 : return;
756 :
757 1 : auto write_string_array = [&out](const std::vector<std::string>& values) {
758 7 : out << "[";
759 14 : for (std::size_t i = 0; i < values.size(); ++i) {
760 7 : out << "\"" << values[i] << "\"";
761 7 : if (i + 1 < values.size())
762 2 : out << ",";
763 : }
764 7 : out << "]";
765 8 : };
766 :
767 1 : out << "{\n";
768 1 : out << " \"baseline_path\": \"" << report.baseline_path << "\",\n";
769 1 : out << " \"scope\": \"" << report.scope << "\",\n";
770 1 : out << " \"threshold_pct\": " << report.threshold_pct << ",\n";
771 1 : out << " \"noise_floor_pct\": " << report.noise_floor_pct << ",\n";
772 1 : out << " \"strict_profile_intent\": " << (report.strict_profile_intent ? "true" : "false") << ",\n";
773 1 : out << " \"exit_code\": " << report.exit_code << ",\n";
774 1 : out << " \"metadata\": {\n";
775 1 : out << " \"ok\": " << (report.metadata.ok ? "true" : "false") << ",\n";
776 1 : out << " \"errors\": ";
777 1 : write_string_array(report.metadata.errors);
778 1 : out << ",\n";
779 1 : out << " \"warnings\": ";
780 1 : write_string_array(report.metadata.warnings);
781 1 : out << "\n },\n";
782 1 : out << " \"comparison\": {\n";
783 1 : out << " \"all_ok\": " << (report.comparison.all_ok ? "true" : "false") << ",\n";
784 1 : out << " \"decision_basis\": \"" << report.comparison.scope << "\",\n";
785 1 : out << " \"health\": \"" << report.comparison.health << "\",\n";
786 1 : out << " \"actionability\": \"" << report.comparison.actionability << "\",\n";
787 1 : out << " \"recommended_disposition\": \"" << report.comparison.recommended_disposition << "\",\n";
788 1 : out << " \"disposition_reasons\": ";
789 1 : write_string_array(report.comparison.disposition_reasons);
790 1 : out << ",\n";
791 1 : out << " \"summary\": {\"missing_structure_count\": " << report.comparison.summary.missing_structure_count
792 1 : << ", \"duplicate_baseline_structure_count\": " << report.comparison.summary.duplicate_baseline_structure_count
793 1 : << ", \"duplicate_current_structure_count\": " << report.comparison.summary.duplicate_current_structure_count << "},\n";
794 1 : out << " \"has_hygiene_issues\": " << (report.comparison.has_hygiene_issues ? "true" : "false") << ",\n";
795 1 : out << " \"hygiene_issue_count\": " << report.comparison.hygiene_issue_count << ",\n";
796 1 : out << " \"hygiene_gate\": \"" << report.comparison.hygiene_gate << "\",\n";
797 1 : out << " \"perf_signal_strength\": \"" << report.comparison.perf_signal_strength << "\",\n";
798 1 : out << " \"coverage\": {\n";
799 1 : out << " \"baseline_structure_count\": " << report.comparison.coverage.baseline_structure_count << ",\n";
800 1 : out << " \"current_structure_count\": " << report.comparison.coverage.current_structure_count << ",\n";
801 1 : out << " \"comparable_structure_count\": " << report.comparison.coverage.comparable_structure_count << ",\n";
802 1 : out << " \"baseline_only_structures\": ";
803 1 : write_string_array(report.comparison.coverage.baseline_only_structures);
804 1 : out << ",\n";
805 1 : out << " \"current_only_structures\": ";
806 1 : write_string_array(report.comparison.coverage.current_only_structures);
807 1 : out << ",\n";
808 1 : out << " \"duplicate_baseline_structures\": ";
809 1 : write_string_array(report.comparison.coverage.duplicate_baseline_structures);
810 1 : out << ",\n";
811 1 : out << " \"duplicate_current_structures\": ";
812 1 : write_string_array(report.comparison.coverage.duplicate_current_structures);
813 1 : out << "\n },\n";
814 1 : out << " \"failures\": [\n";
815 1 : for (std::size_t i = 0; i < report.comparison.failures.size(); ++i) {
816 0 : const auto& f = report.comparison.failures[i];
817 0 : out << " {\"structure\": \"" << f.structure << "\", "
818 0 : << "\"operation\": \"" << f.operation << "\", "
819 0 : << "\"chosen_basis\": \"" << f.chosen_basis << "\", "
820 0 : << "\"chosen_delta_pct\": " << f.chosen_delta_pct << ", "
821 0 : << "\"threshold_pct\": " << f.threshold_pct << ", "
822 0 : << "\"failed_metric_families\": ";
823 0 : write_string_array(f.failed_metric_families);
824 0 : out << "}" << (i + 1 < report.comparison.failures.size() ? "," : "") << "\n";
825 : }
826 1 : out << " ],\n";
827 1 : out << " \"entries\": [\n";
828 2 : for (std::size_t i = 0; i < report.comparison.entries.size(); ++i) {
829 1 : const auto& e = report.comparison.entries[i];
830 1 : out << " {\"structure\": \"" << e.structure << "\", "
831 2 : << "\"insert_delta_pct\": " << e.insert_delta_pct << ", "
832 1 : << "\"search_delta_pct\": " << e.search_delta_pct << ", "
833 1 : << "\"remove_delta_pct\": " << e.remove_delta_pct << ", "
834 1 : << "\"insert_ok\": " << (e.insert_ok ? "true" : "false") << ", "
835 1 : << "\"search_ok\": " << (e.search_ok ? "true" : "false") << ", "
836 1 : << "\"remove_ok\": " << (e.remove_ok ? "true" : "false") << ", "
837 1 : << "\"insert_basis\": \"" << e.insert_basis << "\", "
838 1 : << "\"search_basis\": \"" << e.search_basis << "\", "
839 1 : << "\"remove_basis\": \"" << e.remove_basis << "\", "
840 7 : << "\"insert_mean_delta_pct\": " << e.insert_mean_delta_pct << ", "
841 1 : << "\"insert_p95_delta_pct\": " << e.insert_p95_delta_pct << ", "
842 1 : << "\"insert_ci_high_delta_pct\": " << e.insert_ci_high_delta_pct << ", "
843 1 : << "\"insert_mean_ok\": " << (e.insert_mean_ok ? "true" : "false") << ", "
844 1 : << "\"insert_p95_ok\": " << (e.insert_p95_ok ? "true" : "false") << ", "
845 1 : << "\"insert_ci_high_ok\": " << (e.insert_ci_high_ok ? "true" : "false") << ", "
846 4 : << "\"search_mean_delta_pct\": " << e.search_mean_delta_pct << ", "
847 1 : << "\"search_p95_delta_pct\": " << e.search_p95_delta_pct << ", "
848 1 : << "\"search_ci_high_delta_pct\": " << e.search_ci_high_delta_pct << ", "
849 1 : << "\"search_mean_ok\": " << (e.search_mean_ok ? "true" : "false") << ", "
850 1 : << "\"search_p95_ok\": " << (e.search_p95_ok ? "true" : "false") << ", "
851 1 : << "\"search_ci_high_ok\": " << (e.search_ci_high_ok ? "true" : "false") << ", "
852 4 : << "\"remove_mean_delta_pct\": " << e.remove_mean_delta_pct << ", "
853 1 : << "\"remove_p95_delta_pct\": " << e.remove_p95_delta_pct << ", "
854 1 : << "\"remove_ci_high_delta_pct\": " << e.remove_ci_high_delta_pct << ", "
855 1 : << "\"remove_mean_ok\": " << (e.remove_mean_ok ? "true" : "false") << ", "
856 1 : << "\"remove_p95_ok\": " << (e.remove_p95_ok ? "true" : "false") << ", "
857 1 : << "\"remove_ci_high_ok\": " << (e.remove_ci_high_ok ? "true" : "false") << "}"
858 4 : << (i + 1 < report.comparison.entries.size() ? "," : "") << "\n";
859 : }
860 1 : out << " ]\n }\n}\n";
861 1 : }
862 :
863 : } // namespace hashbrowns
|