Compare commits

...

1 Commits

Author SHA1 Message Date
Claude
5458ea64dc test: reproduce customer SearchProvider brave alert investigation
Adds three formula evaluator tests that mirror the customer's rule shape
(B/A with count() on traces, fillzero on B, AllTheTimes>0.1) and isolate
the only data shape that produces the observed alert value 3/28=0.10714:

- TestCustomerSearchProviderBraveBug: dense A + sparse B + canDefaultZero
  -> F1 dense, mostly zeros, min=0. AllTheTimes(>0.1) cannot fire.
- TestCustomerSearchProviderBraveWithFillZero: same as above but with
  fillzero applied to B in advance (mirroring postProcessBuilderQuery
  ordering). Same outcome: F1 has 29 zeros + one 0.10714 spike.
- TestCustomerSearchProviderBraveNoDefaultZero: canDefaultZero[B] off ->
  F1 sparse, single point = 3/28 = 0.10714 exactly. This is the only
  shape consistent with the customer's reported alert value.

Used to triangulate why the alert fired when it shouldn't have under
the documented canDefaultZero/fillzero behavior.

https://claude.ai/code/session_01WDvQNt2NZWzzi69nr1PpcE
2026-05-05 21:53:43 +00:00

View File

@@ -1051,3 +1051,201 @@ func TestAbsValueExpression(t *testing.T) {
assert.Equal(t, 15.0, series.Values[0].Value) // |10| + |5| = 15
assert.Equal(t, 24.0, series.Values[1].Value) // |20| + |4| = 24
}
// TestCustomerSearchProviderBraveBug reproduces the customer's setup:
// A: count() filtered by name like 'SearchProvider%' and env=PROD, group by name,
// having count() > 10 (assumed already filtered, dense for SearchProvider brave)
// B: count() filtered by ... and status_code_string='Error', group by name,
// sparse: only 3 raw error points (one outside lookback). Has fillzero applied
// BEFORE the formula, so within lookback B is dense (mostly 0, with values at
// a single bucket).
// F1: B / A
// Expectation given canDefaultZero[A]=canDefaultZero[B]=true:
// F1 should have one point per A timestamp, mostly 0 with a single non-zero spike.
func TestCustomerSearchProviderBraveBug(t *testing.T) {
// Simulate 30 timestamps in the lookback window. A is dense at all 30
// (Having count()>10 satisfied at every bucket). Values 11..40 (illustrative).
aValues := map[int64]float64{}
for ts := int64(1); ts <= 30; ts++ {
aValues[ts] = float64(10 + ts) // 11..40, all > 10
}
// B post-fillzero: dense across the same 30 timestamps. The 16:33 bucket is ts=27
// and gets value 3 (sum of the customer's 2+1 errors). Every other bucket is 0,
// because fillzero on B runs before the formula.
bValues := map[int64]float64{}
for ts := int64(1); ts <= 30; ts++ {
bValues[ts] = 0
}
bValues[27] = 3 // 3/A[27] = 3/(10+27) = 3/37 -- not the customer's exact ratio,
// but the shape matches: most points 0, one point ~0.08-0.11.
tsData := map[string]*TimeSeriesData{
"A": createFormulaTestTimeSeriesData("A", []*TimeSeries{
{
Labels: createLabels(map[string]string{"name": "SearchProvider brave"}),
Values: createValues(aValues),
},
}),
"B": createFormulaTestTimeSeriesData("B", []*TimeSeries{
{
Labels: createLabels(map[string]string{"name": "SearchProvider brave"}),
Values: createValues(bValues),
},
}),
}
evaluator, err := NewFormulaEvaluator("B / A", map[string]bool{"A": true, "B": true})
require.NoError(t, err)
result, err := evaluator.EvaluateFormula(tsData)
require.NoError(t, err)
require.NotNil(t, result)
require.Equal(t, 1, len(result), "should have one F1 series for SearchProvider brave")
f1 := result[0]
t.Logf("F1 series for SearchProvider brave has %d datapoints", len(f1.Values))
zeroCount := 0
nonZeroCount := 0
var nonZeroValues []float64
for _, v := range f1.Values {
if v.Value == 0 {
zeroCount++
} else {
nonZeroCount++
nonZeroValues = append(nonZeroValues, v.Value)
}
}
t.Logf("F1: zeros=%d, non-zeros=%d, non-zero values=%v", zeroCount, nonZeroCount, nonZeroValues)
// Customer-bug hypothesis A: F1 has dense 30 datapoints, mostly 0.
// If this assertion holds, AllTheTimes(>0.1) MUST return false (since min=0),
// and the alert should not have fired -- contradicting the observed behavior.
require.Equal(t, 30, len(f1.Values), "F1 should be dense at all 30 A-timestamps when canDefaultZero[B]=true")
require.Equal(t, 29, zeroCount, "F1 should have 29 zero datapoints")
require.Equal(t, 1, nonZeroCount, "F1 should have 1 non-zero datapoint")
}
// TestCustomerSearchProviderBraveWithFillZero applies fillzero to B (as the
// customer's rule does) BEFORE running the formula -- mirroring the v5 querier
// pipeline order: postProcessBuilderQuery -> applyFormulas. This is the most
// faithful reproduction of the alert path.
func TestCustomerSearchProviderBraveWithFillZero(t *testing.T) {
// 30 buckets at 1, 2, ..., 30. A is dense at all of them, all > 10.
aValues := map[int64]float64{}
for ts := int64(1); ts <= 30; ts++ {
aValues[ts] = 28
}
// B raw: 1 datapoint at ts=27 (= the 16:33 bucket), value 3.
bSeries := &TimeSeries{
Labels: createLabels(map[string]string{"name": "SearchProvider brave"}),
Values: createValues(map[int64]float64{27: 3}),
}
// Apply fillzero to B as the v5 pipeline would. Using ms-style ts where
// step=1 ms is equivalent in this synthetic setup.
bFilled := funcFillZero(bSeries, 1, 30, 1)
t.Logf("B after fillzero: %d datapoints", len(bFilled.Values))
tsData := map[string]*TimeSeriesData{
"A": createFormulaTestTimeSeriesData("A", []*TimeSeries{
{
Labels: createLabels(map[string]string{"name": "SearchProvider brave"}),
Values: createValues(aValues),
},
}),
"B": createFormulaTestTimeSeriesData("B", []*TimeSeries{bFilled}),
}
evaluator, err := NewFormulaEvaluator("B / A", map[string]bool{"A": true, "B": true})
require.NoError(t, err)
result, err := evaluator.EvaluateFormula(tsData)
require.NoError(t, err)
require.NotNil(t, result)
require.Equal(t, 1, len(result))
f1 := result[0]
t.Logf("F1 with fillzero(B) has %d datapoints", len(f1.Values))
zeroCount := 0
nonZeroCount := 0
minVal, maxVal := 1.0, 0.0
for _, v := range f1.Values {
if v.Value == 0 {
zeroCount++
} else {
nonZeroCount++
}
if v.Value < minVal {
minVal = v.Value
}
if v.Value > maxVal {
maxVal = v.Value
}
}
t.Logf("F1 stats: zeros=%d non-zeros=%d min=%v max=%v", zeroCount, nonZeroCount, minVal, maxVal)
// AllTheTimes(>0.1) check: would the alert fire?
allAbove := true
for _, v := range f1.Values {
if !(v.Value > 0.1) {
allAbove = false
break
}
}
t.Logf("AllTheTimes(>0.1) over F1 -> %v (should be FALSE for the customer's data)", allAbove)
require.False(t, allAbove, "AllTheTimes(>0.1) must NOT fire when F1 has zero-valued points")
}
// TestCustomerSearchProviderBraveNoDefaultZero models the alternative hypothesis:
// for some reason canDefaultZero[B] is effectively false (e.g. fillzero was NOT
// applied, or the prefix check rejected B's expression). F1 then only emits at
// timestamps where BOTH A and B have raw values, producing a sparse F1 whose
// only point(s) are above 0.1 -- which matches the alert firing with min=0.10714.
func TestCustomerSearchProviderBraveNoDefaultZero(t *testing.T) {
// A dense at all 30 timestamps, all > 10
aValues := map[int64]float64{}
for ts := int64(1); ts <= 30; ts++ {
aValues[ts] = 28 // pick 28 so 3/28 = 0.10714, matching customer's alert value
}
// B raw (NOT post-fillzero): only one datapoint at ts=27 (the 16:33 bucket).
bValues := map[int64]float64{27: 3}
tsData := map[string]*TimeSeriesData{
"A": createFormulaTestTimeSeriesData("A", []*TimeSeries{
{
Labels: createLabels(map[string]string{"name": "SearchProvider brave"}),
Values: createValues(aValues),
},
}),
"B": createFormulaTestTimeSeriesData("B", []*TimeSeries{
{
Labels: createLabels(map[string]string{"name": "SearchProvider brave"}),
Values: createValues(bValues),
},
}),
}
// Disable canDefaultZero for B to model "fillzero/canDefaultZero not effective".
evaluator, err := NewFormulaEvaluator("B / A", map[string]bool{"A": true, "B": false})
require.NoError(t, err)
result, err := evaluator.EvaluateFormula(tsData)
require.NoError(t, err)
require.NotNil(t, result)
require.Equal(t, 1, len(result))
f1 := result[0]
t.Logf("F1 (canDefaultZero[B]=false) has %d datapoints", len(f1.Values))
for i, v := range f1.Values {
t.Logf(" [%d] ts=%d value=%v", i, v.Timestamp, v.Value)
}
// Sparse: only one F1 point, equal to 3/28 = 0.10714... -- alert fires with this min.
require.Equal(t, 1, len(f1.Values))
require.InDelta(t, 3.0/28.0, f1.Values[0].Value, 1e-9)
}