mirror of
https://github.com/SigNoz/signoz.git
synced 2026-02-03 08:33:26 +00:00
fix: add missing data for promql and anomaly rules (#10097)
This commit is contained in:
committed by
GitHub
parent
858cd287fa
commit
c79373314a
@@ -50,7 +50,7 @@ type GetAnomaliesResponse struct {
|
||||
//
|
||||
// ^ ^
|
||||
// | |
|
||||
// (rounded value for past peiod) + (seasonal growth)
|
||||
// (rounded value for past period) + (seasonal growth)
|
||||
//
|
||||
// score = abs(value - prediction) / stddev (current_season_query)
|
||||
type anomalyQueryParams struct {
|
||||
@@ -74,12 +74,12 @@ type anomalyQueryParams struct {
|
||||
// : For daily seasonality, this is the query range params for the (now-2d-5m, now-1d)
|
||||
// : For hourly seasonality, this is the query range params for the (now-2h-5m, now-1h)
|
||||
PastSeasonQuery *v3.QueryRangeParamsV3
|
||||
// Past2SeasonQuery is the query range params for past 2 seasonal period to the current season
|
||||
// Past2SeasonQuery is the query range params for past 2 seasonal periods to the current season
|
||||
// Example: For weekly seasonality, this is the query range params for the (now-3w-5m, now-2w)
|
||||
// : For daily seasonality, this is the query range params for the (now-3d-5m, now-2d)
|
||||
// : For hourly seasonality, this is the query range params for the (now-3h-5m, now-2h)
|
||||
Past2SeasonQuery *v3.QueryRangeParamsV3
|
||||
// Past3SeasonQuery is the query range params for past 3 seasonal period to the current season
|
||||
// Past3SeasonQuery is the query range params for past 3 seasonal periods to the current season
|
||||
// Example: For weekly seasonality, this is the query range params for the (now-4w-5m, now-3w)
|
||||
// : For daily seasonality, this is the query range params for the (now-4d-5m, now-3d)
|
||||
// : For hourly seasonality, this is the query range params for the (now-4h-5m, now-3h)
|
||||
|
||||
@@ -234,6 +234,11 @@ func (r *AnomalyRule) buildAndRunQuery(ctx context.Context, orgID valuer.UUID, t
|
||||
}
|
||||
}
|
||||
|
||||
hasData := len(queryResult.AnomalyScores) > 0
|
||||
if missingDataAlert := r.HandleMissingDataAlert(ctx, ts, hasData); missingDataAlert != nil {
|
||||
return ruletypes.Vector{*missingDataAlert}, nil
|
||||
}
|
||||
|
||||
var resultVector ruletypes.Vector
|
||||
|
||||
scoresJSON, _ := json.Marshal(queryResult.AnomalyScores)
|
||||
@@ -285,6 +290,11 @@ func (r *AnomalyRule) buildAndRunQueryV5(ctx context.Context, orgID valuer.UUID,
|
||||
|
||||
queryResult := transition.ConvertV5TimeSeriesDataToV4Result(qbResult)
|
||||
|
||||
hasData := len(queryResult.AnomalyScores) > 0
|
||||
if missingDataAlert := r.HandleMissingDataAlert(ctx, ts, hasData); missingDataAlert != nil {
|
||||
return ruletypes.Vector{*missingDataAlert}, nil
|
||||
}
|
||||
|
||||
var resultVector ruletypes.Vector
|
||||
|
||||
scoresJSON, _ := json.Marshal(queryResult.AnomalyScores)
|
||||
|
||||
268
ee/query-service/rules/anomaly_test.go
Normal file
268
ee/query-service/rules/anomaly_test.go
Normal file
@@ -0,0 +1,268 @@
|
||||
package rules
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/SigNoz/signoz/ee/query-service/anomaly"
|
||||
"github.com/SigNoz/signoz/pkg/instrumentation/instrumentationtest"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/app/clickhouseReader"
|
||||
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
|
||||
"github.com/SigNoz/signoz/pkg/telemetrystore"
|
||||
"github.com/SigNoz/signoz/pkg/telemetrystore/telemetrystoretest"
|
||||
"github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
)
|
||||
|
||||
// mockAnomalyProvider is a mock implementation of anomaly.Provider for testing.
|
||||
// We need this because the anomaly provider makes 6 different queries for various
|
||||
// time periods (current, past period, current season, past season, past 2 seasons,
|
||||
// past 3 seasons), making it cumbersome to create mock data.
|
||||
type mockAnomalyProvider struct {
|
||||
responses []*anomaly.GetAnomaliesResponse
|
||||
callCount int
|
||||
}
|
||||
|
||||
func (m *mockAnomalyProvider) GetAnomalies(ctx context.Context, orgID valuer.UUID, req *anomaly.GetAnomaliesRequest) (*anomaly.GetAnomaliesResponse, error) {
|
||||
if m.callCount >= len(m.responses) {
|
||||
return &anomaly.GetAnomaliesResponse{Results: []*v3.Result{}}, nil
|
||||
}
|
||||
resp := m.responses[m.callCount]
|
||||
m.callCount++
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
func TestAnomalyRule_NoData_AlertOnAbsent(t *testing.T) {
|
||||
// Test basic AlertOnAbsent functionality (without AbsentFor grace period)
|
||||
|
||||
baseTime := time.Unix(1700000000, 0)
|
||||
evalWindow := 5 * time.Minute
|
||||
evalTime := baseTime.Add(5 * time.Minute)
|
||||
|
||||
target := 500.0
|
||||
|
||||
postableRule := ruletypes.PostableRule{
|
||||
AlertName: "Test anomaly no data",
|
||||
AlertType: ruletypes.AlertTypeMetric,
|
||||
RuleType: RuleTypeAnomaly,
|
||||
Evaluation: &ruletypes.EvaluationEnvelope{Kind: ruletypes.RollingEvaluation, Spec: ruletypes.RollingWindow{
|
||||
EvalWindow: ruletypes.Duration(evalWindow),
|
||||
Frequency: ruletypes.Duration(1 * time.Minute),
|
||||
}},
|
||||
RuleCondition: &ruletypes.RuleCondition{
|
||||
CompareOp: ruletypes.ValueIsAbove,
|
||||
MatchType: ruletypes.AtleastOnce,
|
||||
Target: &target,
|
||||
CompositeQuery: &v3.CompositeQuery{
|
||||
QueryType: v3.QueryTypeBuilder,
|
||||
BuilderQueries: map[string]*v3.BuilderQuery{
|
||||
"A": {
|
||||
QueryName: "A",
|
||||
Expression: "A",
|
||||
DataSource: v3.DataSourceMetrics,
|
||||
Temporality: v3.Unspecified,
|
||||
},
|
||||
},
|
||||
},
|
||||
SelectedQuery: "A",
|
||||
Seasonality: "daily",
|
||||
Thresholds: &ruletypes.RuleThresholdData{
|
||||
Kind: ruletypes.BasicThresholdKind,
|
||||
Spec: ruletypes.BasicRuleThresholds{{
|
||||
Name: "Test anomaly no data",
|
||||
TargetValue: &target,
|
||||
MatchType: ruletypes.AtleastOnce,
|
||||
CompareOp: ruletypes.ValueIsAbove,
|
||||
}},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
responseNoData := &anomaly.GetAnomaliesResponse{
|
||||
Results: []*v3.Result{
|
||||
{
|
||||
QueryName: "A",
|
||||
AnomalyScores: []*v3.Series{},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
cases := []struct {
|
||||
description string
|
||||
alertOnAbsent bool
|
||||
expectAlerts int
|
||||
}{
|
||||
{
|
||||
description: "AlertOnAbsent=false",
|
||||
alertOnAbsent: false,
|
||||
expectAlerts: 0,
|
||||
},
|
||||
{
|
||||
description: "AlertOnAbsent=true",
|
||||
alertOnAbsent: true,
|
||||
expectAlerts: 1,
|
||||
},
|
||||
}
|
||||
|
||||
logger := instrumentationtest.New().Logger()
|
||||
|
||||
for _, c := range cases {
|
||||
t.Run(c.description, func(t *testing.T) {
|
||||
postableRule.RuleCondition.AlertOnAbsent = c.alertOnAbsent
|
||||
|
||||
telemetryStore := telemetrystoretest.New(telemetrystore.Config{}, nil)
|
||||
options := clickhouseReader.NewOptions("primaryNamespace")
|
||||
reader := clickhouseReader.NewReader(nil, telemetryStore, nil, "", time.Second, nil, nil, options)
|
||||
|
||||
rule, err := NewAnomalyRule(
|
||||
"test-anomaly-rule",
|
||||
valuer.GenerateUUID(),
|
||||
&postableRule,
|
||||
reader,
|
||||
nil,
|
||||
logger,
|
||||
nil,
|
||||
)
|
||||
require.NoError(t, err)
|
||||
|
||||
rule.provider = &mockAnomalyProvider{
|
||||
responses: []*anomaly.GetAnomaliesResponse{responseNoData},
|
||||
}
|
||||
|
||||
alertsFound, err := rule.Eval(context.Background(), evalTime)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, c.expectAlerts, alertsFound)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestAnomalyRule_NoData_AbsentFor(t *testing.T) {
|
||||
// Test missing data alert with AbsentFor grace period
|
||||
// 1. Call Eval with data at time t1, to populate lastTimestampWithDatapoints
|
||||
// 2. Call Eval without data at time t2
|
||||
// 3. Alert fires only if t2 - t1 > AbsentFor
|
||||
|
||||
baseTime := time.Unix(1700000000, 0)
|
||||
evalWindow := 5 * time.Minute
|
||||
|
||||
// Set target higher than test data so regular threshold alerts don't fire
|
||||
target := 500.0
|
||||
|
||||
postableRule := ruletypes.PostableRule{
|
||||
AlertName: "Test anomaly no data with AbsentFor",
|
||||
AlertType: ruletypes.AlertTypeMetric,
|
||||
RuleType: RuleTypeAnomaly,
|
||||
Evaluation: &ruletypes.EvaluationEnvelope{Kind: ruletypes.RollingEvaluation, Spec: ruletypes.RollingWindow{
|
||||
EvalWindow: ruletypes.Duration(evalWindow),
|
||||
Frequency: ruletypes.Duration(time.Minute),
|
||||
}},
|
||||
RuleCondition: &ruletypes.RuleCondition{
|
||||
CompareOp: ruletypes.ValueIsAbove,
|
||||
MatchType: ruletypes.AtleastOnce,
|
||||
AlertOnAbsent: true,
|
||||
Target: &target,
|
||||
CompositeQuery: &v3.CompositeQuery{
|
||||
QueryType: v3.QueryTypeBuilder,
|
||||
BuilderQueries: map[string]*v3.BuilderQuery{
|
||||
"A": {
|
||||
QueryName: "A",
|
||||
Expression: "A",
|
||||
DataSource: v3.DataSourceMetrics,
|
||||
Temporality: v3.Unspecified,
|
||||
},
|
||||
},
|
||||
},
|
||||
SelectedQuery: "A",
|
||||
Seasonality: "daily",
|
||||
Thresholds: &ruletypes.RuleThresholdData{
|
||||
Kind: ruletypes.BasicThresholdKind,
|
||||
Spec: ruletypes.BasicRuleThresholds{{
|
||||
Name: "Test anomaly no data with AbsentFor",
|
||||
TargetValue: &target,
|
||||
MatchType: ruletypes.AtleastOnce,
|
||||
CompareOp: ruletypes.ValueIsAbove,
|
||||
}},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
responseNoData := &anomaly.GetAnomaliesResponse{
|
||||
Results: []*v3.Result{
|
||||
{
|
||||
QueryName: "A",
|
||||
AnomalyScores: []*v3.Series{},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
cases := []struct {
|
||||
description string
|
||||
absentFor uint64
|
||||
timeBetweenEvals time.Duration
|
||||
expectAlertOnEval2 int
|
||||
}{
|
||||
{
|
||||
description: "WithinGracePeriod",
|
||||
absentFor: 5,
|
||||
timeBetweenEvals: 4 * time.Minute,
|
||||
expectAlertOnEval2: 0,
|
||||
},
|
||||
{
|
||||
description: "AfterGracePeriod",
|
||||
absentFor: 5,
|
||||
timeBetweenEvals: 6 * time.Minute,
|
||||
expectAlertOnEval2: 1,
|
||||
},
|
||||
}
|
||||
|
||||
logger := instrumentationtest.New().Logger()
|
||||
|
||||
for _, c := range cases {
|
||||
t.Run(c.description, func(t *testing.T) {
|
||||
postableRule.RuleCondition.AbsentFor = c.absentFor
|
||||
|
||||
t1 := baseTime.Add(5 * time.Minute)
|
||||
t2 := t1.Add(c.timeBetweenEvals)
|
||||
|
||||
responseWithData := &anomaly.GetAnomaliesResponse{
|
||||
Results: []*v3.Result{
|
||||
{
|
||||
QueryName: "A",
|
||||
AnomalyScores: []*v3.Series{
|
||||
{
|
||||
Labels: map[string]string{"test": "label"},
|
||||
Points: []v3.Point{
|
||||
{Timestamp: baseTime.UnixMilli(), Value: 1.0},
|
||||
{Timestamp: baseTime.Add(time.Minute).UnixMilli(), Value: 1.5},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
telemetryStore := telemetrystoretest.New(telemetrystore.Config{}, nil)
|
||||
options := clickhouseReader.NewOptions("primaryNamespace")
|
||||
reader := clickhouseReader.NewReader(nil, telemetryStore, nil, "", time.Second, nil, nil, options)
|
||||
|
||||
rule, err := NewAnomalyRule("test-anomaly-rule", valuer.GenerateUUID(), &postableRule, reader, nil, logger, nil)
|
||||
require.NoError(t, err)
|
||||
|
||||
rule.provider = &mockAnomalyProvider{
|
||||
responses: []*anomaly.GetAnomaliesResponse{responseWithData, responseNoData},
|
||||
}
|
||||
|
||||
alertsFound1, err := rule.Eval(context.Background(), t1)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, 0, alertsFound1, "First eval with data should not alert")
|
||||
|
||||
alertsFound2, err := rule.Eval(context.Background(), t2)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, c.expectAlertOnEval2, alertsFound2)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -8,9 +8,11 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/constants"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/interfaces"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/model"
|
||||
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/labels"
|
||||
qslabels "github.com/SigNoz/signoz/pkg/query-service/utils/labels"
|
||||
"github.com/SigNoz/signoz/pkg/queryparser"
|
||||
"github.com/SigNoz/signoz/pkg/sqlstore"
|
||||
@@ -741,3 +743,26 @@ func (r *BaseRule) FilterNewSeries(ctx context.Context, ts time.Time, series []*
|
||||
|
||||
return filteredSeries, nil
|
||||
}
|
||||
|
||||
// HandleMissingDataAlert handles missing data alert logic by tracking the last timestamp
|
||||
// with data points and checking if a missing data alert should be sent based on the
|
||||
// [ruletypes.RuleCondition.AlertOnAbsent] and [ruletypes.RuleCondition.AbsentFor] conditions.
|
||||
//
|
||||
// Returns a pointer to the missing data alert if conditions are met, nil otherwise.
|
||||
func (r *BaseRule) HandleMissingDataAlert(ctx context.Context, ts time.Time, hasData bool) *ruletypes.Sample {
|
||||
// Track the last timestamp with data points for missing data alerts
|
||||
if hasData {
|
||||
r.lastTimestampWithDatapoints = ts
|
||||
}
|
||||
|
||||
if !r.ruleCondition.AlertOnAbsent || ts.Before(r.lastTimestampWithDatapoints.Add(time.Duration(r.ruleCondition.AbsentFor)*time.Minute)) {
|
||||
return nil
|
||||
}
|
||||
|
||||
r.logger.InfoContext(ctx, "no data found for rule condition", "rule_id", r.ID())
|
||||
lbls := labels.NewBuilder(labels.Labels{})
|
||||
if !r.lastTimestampWithDatapoints.IsZero() {
|
||||
lbls.Set(ruletypes.LabelLastSeen, r.lastTimestampWithDatapoints.Format(constants.AlertTimeFormat))
|
||||
}
|
||||
return &ruletypes.Sample{Metric: lbls.Labels(), IsMissing: true}
|
||||
}
|
||||
|
||||
@@ -17,7 +17,7 @@ import (
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/times"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/timestamp"
|
||||
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
|
||||
ruletypes "github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
"github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
"github.com/prometheus/prometheus/promql"
|
||||
)
|
||||
@@ -142,6 +142,12 @@ func (r *PromRule) buildAndRunQuery(ctx context.Context, ts time.Time) (ruletype
|
||||
}
|
||||
|
||||
matrixToProcess := r.matrixToV3Series(res)
|
||||
|
||||
hasData := len(matrixToProcess) > 0
|
||||
if missingDataAlert := r.HandleMissingDataAlert(ctx, ts, hasData); missingDataAlert != nil {
|
||||
return ruletypes.Vector{*missingDataAlert}, nil
|
||||
}
|
||||
|
||||
// Filter out new series if newGroupEvalDelay is configured
|
||||
if r.ShouldSkipNewGroups() {
|
||||
filteredSeries, filterErr := r.BaseRule.FilterNewSeries(ctx, ts, matrixToProcess)
|
||||
@@ -154,6 +160,7 @@ func (r *PromRule) buildAndRunQuery(ctx context.Context, ts time.Time) (ruletype
|
||||
}
|
||||
|
||||
var resultVector ruletypes.Vector
|
||||
|
||||
for _, series := range matrixToProcess {
|
||||
if !r.Condition().ShouldEval(series) {
|
||||
r.logger.InfoContext(
|
||||
@@ -243,6 +250,10 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time) (int, error) {
|
||||
for name, value := range r.annotations.Map() {
|
||||
annotations = append(annotations, qslabels.Label{Name: name, Value: expand(value)})
|
||||
}
|
||||
if result.IsMissing {
|
||||
lb.Set(qslabels.AlertNameLabel, "[No data] "+r.Name())
|
||||
lb.Set(qslabels.NoDataLabel, "true")
|
||||
}
|
||||
|
||||
lbs := lb.Labels()
|
||||
h := lbs.Hash()
|
||||
@@ -265,6 +276,7 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time) (int, error) {
|
||||
Value: result.V,
|
||||
GeneratorURL: r.GeneratorURL(),
|
||||
Receivers: ruleReceiverMap[lbs.Map()[ruletypes.LabelThresholdName]],
|
||||
Missing: result.IsMissing,
|
||||
IsRecovering: result.IsRecovering,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1345,6 +1345,275 @@ func TestMultipleThresholdPromRule(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestPromRule_NoData(t *testing.T) {
|
||||
evalTime := time.Now()
|
||||
|
||||
postableRule := ruletypes.PostableRule{
|
||||
AlertName: "Test no data",
|
||||
AlertType: ruletypes.AlertTypeMetric,
|
||||
RuleType: ruletypes.RuleTypeProm,
|
||||
Evaluation: &ruletypes.EvaluationEnvelope{Kind: ruletypes.RollingEvaluation, Spec: ruletypes.RollingWindow{
|
||||
EvalWindow: ruletypes.Duration(5 * time.Minute),
|
||||
Frequency: ruletypes.Duration(1 * time.Minute),
|
||||
}},
|
||||
RuleCondition: &ruletypes.RuleCondition{
|
||||
CompareOp: ruletypes.ValueIsAbove,
|
||||
MatchType: ruletypes.AtleastOnce,
|
||||
CompositeQuery: &v3.CompositeQuery{
|
||||
QueryType: v3.QueryTypePromQL,
|
||||
PromQueries: map[string]*v3.PromQuery{
|
||||
"A": {Query: "test_metric"},
|
||||
},
|
||||
},
|
||||
Thresholds: &ruletypes.RuleThresholdData{
|
||||
Kind: ruletypes.BasicThresholdKind,
|
||||
Spec: ruletypes.BasicRuleThresholds{{Name: "Test no data"}},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// time_series_v4 cols of interest
|
||||
fingerprintCols := []cmock.ColumnType{
|
||||
{Name: "fingerprint", Type: "UInt64"},
|
||||
{Name: "any(labels)", Type: "String"},
|
||||
}
|
||||
|
||||
// samples_v4 columns
|
||||
samplesCols := []cmock.ColumnType{
|
||||
{Name: "metric_name", Type: "String"},
|
||||
{Name: "fingerprint", Type: "UInt64"},
|
||||
{Name: "unix_milli", Type: "Int64"},
|
||||
{Name: "value", Type: "Float64"},
|
||||
{Name: "flags", Type: "UInt32"},
|
||||
}
|
||||
|
||||
// see Timestamps on base_rule
|
||||
evalWindowMs := int64(5 * 60 * 1000) // 5 minutes in ms
|
||||
evalTimeMs := evalTime.UnixMilli()
|
||||
queryStart := ((evalTimeMs-2*evalWindowMs)/60000)*60000 + 1 // truncate to minute + 1ms
|
||||
queryEnd := (evalTimeMs / 60000) * 60000 // truncate to minute
|
||||
|
||||
cases := []struct {
|
||||
description string
|
||||
alertOnAbsent bool
|
||||
values []any
|
||||
target float64
|
||||
expectAlerts int
|
||||
}{
|
||||
{
|
||||
description: "AlertOnAbsent=false",
|
||||
alertOnAbsent: false,
|
||||
values: []any{},
|
||||
target: 200,
|
||||
expectAlerts: 0,
|
||||
},
|
||||
{
|
||||
description: "AlertOnAbsent=true",
|
||||
alertOnAbsent: true,
|
||||
values: []any{},
|
||||
target: 200,
|
||||
expectAlerts: 1,
|
||||
},
|
||||
}
|
||||
|
||||
logger := instrumentationtest.New().Logger()
|
||||
|
||||
for _, c := range cases {
|
||||
t.Run(c.description, func(t *testing.T) {
|
||||
postableRule.RuleCondition.AlertOnAbsent = c.alertOnAbsent
|
||||
|
||||
telemetryStore := telemetrystoretest.New(telemetrystore.Config{}, &queryMatcherAny{})
|
||||
|
||||
// single fingerprint with labels JSON
|
||||
fingerprint := uint64(12345)
|
||||
labelsJSON := `{"__name__":"test_metric"}`
|
||||
telemetryStore.Mock().
|
||||
ExpectQuery("SELECT fingerprint, any").
|
||||
WithArgs("test_metric", "__name__", "test_metric").
|
||||
WillReturnRows(cmock.NewRows(fingerprintCols, [][]any{{fingerprint, labelsJSON}}))
|
||||
|
||||
telemetryStore.Mock().
|
||||
ExpectQuery("SELECT metric_name, fingerprint, unix_milli").
|
||||
WithArgs("test_metric", "test_metric", "__name__", "test_metric", queryStart, queryEnd).
|
||||
WillReturnRows(cmock.NewRows(samplesCols, [][]any{}))
|
||||
|
||||
promProvider := prometheustest.New(
|
||||
context.Background(),
|
||||
instrumentationtest.New().ToProviderSettings(),
|
||||
prometheus.Config{},
|
||||
telemetryStore,
|
||||
)
|
||||
defer func() {
|
||||
_ = promProvider.Close()
|
||||
}()
|
||||
|
||||
options := clickhouseReader.NewOptions("primaryNamespace")
|
||||
reader := clickhouseReader.NewReader(nil, telemetryStore, promProvider, "", time.Second, nil, nil, options)
|
||||
rule, err := NewPromRule("some-id", valuer.GenerateUUID(), &postableRule, logger, reader, promProvider)
|
||||
require.NoError(t, err)
|
||||
|
||||
alertsFound, err := rule.Eval(context.Background(), evalTime)
|
||||
require.NoError(t, err)
|
||||
|
||||
assert.Equal(t, c.expectAlerts, alertsFound)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestPromRule_NoData_AbsentFor(t *testing.T) {
|
||||
// 1. Call Eval with data at time t1, to populate lastTimestampWithDatapoints
|
||||
// 2. Call Eval without data at time t2
|
||||
// 3. Alert fires only if t2 - t1 > AbsentFor
|
||||
|
||||
baseTime := time.Unix(1700000000, 0)
|
||||
evalWindow := 5 * time.Minute
|
||||
|
||||
// Set target higher than test data (100.0) so regular threshold alerts don't fire
|
||||
target := 500.0
|
||||
|
||||
postableRule := ruletypes.PostableRule{
|
||||
AlertName: "Test no data with AbsentFor",
|
||||
AlertType: ruletypes.AlertTypeMetric,
|
||||
RuleType: ruletypes.RuleTypeProm,
|
||||
Evaluation: &ruletypes.EvaluationEnvelope{Kind: ruletypes.RollingEvaluation, Spec: ruletypes.RollingWindow{
|
||||
EvalWindow: ruletypes.Duration(evalWindow),
|
||||
Frequency: ruletypes.Duration(1 * time.Minute),
|
||||
}},
|
||||
RuleCondition: &ruletypes.RuleCondition{
|
||||
CompareOp: ruletypes.ValueIsAbove,
|
||||
MatchType: ruletypes.AtleastOnce,
|
||||
AlertOnAbsent: true,
|
||||
Target: &target,
|
||||
CompositeQuery: &v3.CompositeQuery{
|
||||
QueryType: v3.QueryTypePromQL,
|
||||
PromQueries: map[string]*v3.PromQuery{
|
||||
"A": {Query: "test_metric"},
|
||||
},
|
||||
},
|
||||
Thresholds: &ruletypes.RuleThresholdData{
|
||||
Kind: ruletypes.BasicThresholdKind,
|
||||
Spec: ruletypes.BasicRuleThresholds{{
|
||||
Name: "Test no data with AbsentFor",
|
||||
TargetValue: &target,
|
||||
MatchType: ruletypes.AtleastOnce,
|
||||
CompareOp: ruletypes.ValueIsAbove,
|
||||
}},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
fingerprintCols := []cmock.ColumnType{
|
||||
{Name: "fingerprint", Type: "UInt64"},
|
||||
{Name: "any(labels)", Type: "String"},
|
||||
}
|
||||
|
||||
samplesCols := []cmock.ColumnType{
|
||||
{Name: "metric_name", Type: "String"},
|
||||
{Name: "fingerprint", Type: "UInt64"},
|
||||
{Name: "unix_milli", Type: "Int64"},
|
||||
{Name: "value", Type: "Float64"},
|
||||
{Name: "flags", Type: "UInt32"},
|
||||
}
|
||||
|
||||
cases := []struct {
|
||||
description string
|
||||
absentFor uint64 // grace period in minutes
|
||||
timeBetweenEvals time.Duration // time between first eval (with data) and second eval (no data)
|
||||
expectAlertOnEval2 int
|
||||
}{
|
||||
{
|
||||
description: "WithinGracePeriod",
|
||||
absentFor: 5,
|
||||
timeBetweenEvals: 4 * time.Minute,
|
||||
expectAlertOnEval2: 0,
|
||||
},
|
||||
{
|
||||
description: "AfterGracePeriod",
|
||||
absentFor: 5,
|
||||
timeBetweenEvals: 6 * time.Minute,
|
||||
expectAlertOnEval2: 1,
|
||||
},
|
||||
}
|
||||
|
||||
logger := instrumentationtest.New().Logger()
|
||||
|
||||
for _, c := range cases {
|
||||
t.Run(c.description, func(t *testing.T) {
|
||||
postableRule.RuleCondition.AbsentFor = c.absentFor
|
||||
|
||||
// Timestamps for two evaluations
|
||||
// t1 is the eval time for first eval, data points are in the past
|
||||
t1 := baseTime.Add(5 * time.Minute) // first eval with data
|
||||
t2 := t1.Add(c.timeBetweenEvals) // second eval without data
|
||||
|
||||
telemetryStore := telemetrystoretest.New(telemetrystore.Config{}, &queryMatcherAny{})
|
||||
|
||||
fingerprint := uint64(12345)
|
||||
labelsJSON := `{"__name__":"test_metric"}`
|
||||
|
||||
// Helper to calculate query time range for an eval time
|
||||
calcQueryRange := func(evalTime time.Time) (int64, int64) {
|
||||
evalTimeMs := evalTime.UnixMilli()
|
||||
queryStart := ((evalTimeMs-2*evalWindow.Milliseconds())/60000)*60000 + 1
|
||||
queryEnd := (evalTimeMs / 60000) * 60000
|
||||
return queryStart, queryEnd
|
||||
}
|
||||
|
||||
// First eval (t1) - with data
|
||||
queryStart1, queryEnd1 := calcQueryRange(t1)
|
||||
telemetryStore.Mock().
|
||||
ExpectQuery("SELECT fingerprint, any").
|
||||
WithArgs("test_metric", "__name__", "test_metric").
|
||||
WillReturnRows(cmock.NewRows(fingerprintCols, [][]any{{fingerprint, labelsJSON}}))
|
||||
telemetryStore.Mock().
|
||||
ExpectQuery("SELECT metric_name, fingerprint, unix_milli").
|
||||
WithArgs("test_metric", "test_metric", "__name__", "test_metric", queryStart1, queryEnd1).
|
||||
WillReturnRows(cmock.NewRows(samplesCols, [][]any{
|
||||
// Data points in the past relative to t1
|
||||
{"test_metric", fingerprint, baseTime.UnixMilli(), 100.0, uint32(0)},
|
||||
{"test_metric", fingerprint, baseTime.Add(1 * time.Minute).UnixMilli(), 100.0, uint32(0)},
|
||||
{"test_metric", fingerprint, baseTime.Add(2 * time.Minute).UnixMilli(), 100.0, uint32(0)},
|
||||
}))
|
||||
|
||||
// Second eval (t2) - no data
|
||||
queryStart2, queryEnd2 := calcQueryRange(t2)
|
||||
telemetryStore.Mock().
|
||||
ExpectQuery("SELECT fingerprint, any").
|
||||
WithArgs("test_metric", "__name__", "test_metric").
|
||||
WillReturnRows(cmock.NewRows(fingerprintCols, [][]any{{fingerprint, labelsJSON}}))
|
||||
telemetryStore.Mock().
|
||||
ExpectQuery("SELECT metric_name, fingerprint, unix_milli").
|
||||
WithArgs("test_metric", "test_metric", "__name__", "test_metric", queryStart2, queryEnd2).
|
||||
WillReturnRows(cmock.NewRows(samplesCols, [][]any{})) // empty - no data
|
||||
|
||||
promProvider := prometheustest.New(
|
||||
context.Background(),
|
||||
instrumentationtest.New().ToProviderSettings(),
|
||||
prometheus.Config{},
|
||||
telemetryStore,
|
||||
)
|
||||
defer func() {
|
||||
_ = promProvider.Close()
|
||||
}()
|
||||
|
||||
options := clickhouseReader.NewOptions("primaryNamespace")
|
||||
reader := clickhouseReader.NewReader(nil, telemetryStore, promProvider, "", time.Second, nil, nil, options)
|
||||
rule, err := NewPromRule("some-id", valuer.GenerateUUID(), &postableRule, logger, reader, promProvider)
|
||||
require.NoError(t, err)
|
||||
|
||||
// First eval with data - should NOT alert, but populates lastTimestampWithDatapoints
|
||||
alertsFound1, err := rule.Eval(context.Background(), t1)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, 0, alertsFound1, "First eval with data should not alert")
|
||||
|
||||
// Second eval without data - should alert based on AbsentFor
|
||||
alertsFound2, err := rule.Eval(context.Background(), t2)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, c.expectAlertOnEval2, alertsFound2)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestPromRuleEval_RequireMinPoints(t *testing.T) {
|
||||
// fixed base time for deterministic tests
|
||||
baseTime := time.Unix(1700000000, 0)
|
||||
|
||||
@@ -24,7 +24,6 @@ import (
|
||||
"github.com/SigNoz/signoz/pkg/query-service/app/querier"
|
||||
querierV2 "github.com/SigNoz/signoz/pkg/query-service/app/querier/v2"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/app/queryBuilder"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/constants"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/interfaces"
|
||||
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/labels"
|
||||
@@ -462,26 +461,13 @@ func (r *ThresholdRule) buildAndRunQuery(ctx context.Context, orgID valuer.UUID,
|
||||
}
|
||||
}
|
||||
|
||||
if queryResult != nil && len(queryResult.Series) > 0 {
|
||||
r.lastTimestampWithDatapoints = time.Now()
|
||||
hasData := queryResult != nil && len(queryResult.Series) > 0
|
||||
if missingDataAlert := r.HandleMissingDataAlert(ctx, ts, hasData); missingDataAlert != nil {
|
||||
return ruletypes.Vector{*missingDataAlert}, nil
|
||||
}
|
||||
|
||||
var resultVector ruletypes.Vector
|
||||
|
||||
// if the data is missing for `For` duration then we should send alert
|
||||
if r.ruleCondition.AlertOnAbsent && r.lastTimestampWithDatapoints.Add(time.Duration(r.Condition().AbsentFor)*time.Minute).Before(time.Now()) {
|
||||
r.logger.InfoContext(ctx, "no data found for rule condition", "rule_id", r.ID())
|
||||
lbls := labels.NewBuilder(labels.Labels{})
|
||||
if !r.lastTimestampWithDatapoints.IsZero() {
|
||||
lbls.Set(ruletypes.LabelLastSeen, r.lastTimestampWithDatapoints.Format(constants.AlertTimeFormat))
|
||||
}
|
||||
resultVector = append(resultVector, ruletypes.Sample{
|
||||
Metric: lbls.Labels(),
|
||||
IsMissing: true,
|
||||
})
|
||||
return resultVector, nil
|
||||
}
|
||||
|
||||
if queryResult == nil {
|
||||
r.logger.WarnContext(ctx, "query result is nil", "rule_name", r.Name(), "query_name", selectedQuery)
|
||||
return resultVector, nil
|
||||
@@ -538,26 +524,13 @@ func (r *ThresholdRule) buildAndRunQueryV5(ctx context.Context, orgID valuer.UUI
|
||||
}
|
||||
}
|
||||
|
||||
if queryResult != nil && len(queryResult.Series) > 0 {
|
||||
r.lastTimestampWithDatapoints = time.Now()
|
||||
hasData := queryResult != nil && len(queryResult.Series) > 0
|
||||
if missingDataAlert := r.HandleMissingDataAlert(ctx, ts, hasData); missingDataAlert != nil {
|
||||
return ruletypes.Vector{*missingDataAlert}, nil
|
||||
}
|
||||
|
||||
var resultVector ruletypes.Vector
|
||||
|
||||
// if the data is missing for `For` duration then we should send alert
|
||||
if r.ruleCondition.AlertOnAbsent && r.lastTimestampWithDatapoints.Add(time.Duration(r.Condition().AbsentFor)*time.Minute).Before(time.Now()) {
|
||||
r.logger.InfoContext(ctx, "no data found for rule condition", "rule_id", r.ID())
|
||||
lbls := labels.NewBuilder(labels.Labels{})
|
||||
if !r.lastTimestampWithDatapoints.IsZero() {
|
||||
lbls.Set(ruletypes.LabelLastSeen, r.lastTimestampWithDatapoints.Format(constants.AlertTimeFormat))
|
||||
}
|
||||
resultVector = append(resultVector, ruletypes.Sample{
|
||||
Metric: lbls.Labels(),
|
||||
IsMissing: true,
|
||||
})
|
||||
return resultVector, nil
|
||||
}
|
||||
|
||||
if queryResult == nil {
|
||||
r.logger.WarnContext(ctx, "query result is nil", "rule_name", r.Name(), "query_name", selectedQuery)
|
||||
return resultVector, nil
|
||||
|
||||
Reference in New Issue
Block a user