mirror of
https://github.com/SigNoz/signoz.git
synced 2026-02-03 08:33:26 +00:00
fix: add missing data for promql and anomaly rules (#10097)
This commit is contained in:
committed by
GitHub
parent
858cd287fa
commit
c79373314a
@@ -50,7 +50,7 @@ type GetAnomaliesResponse struct {
|
|||||||
//
|
//
|
||||||
// ^ ^
|
// ^ ^
|
||||||
// | |
|
// | |
|
||||||
// (rounded value for past peiod) + (seasonal growth)
|
// (rounded value for past period) + (seasonal growth)
|
||||||
//
|
//
|
||||||
// score = abs(value - prediction) / stddev (current_season_query)
|
// score = abs(value - prediction) / stddev (current_season_query)
|
||||||
type anomalyQueryParams struct {
|
type anomalyQueryParams struct {
|
||||||
@@ -74,12 +74,12 @@ type anomalyQueryParams struct {
|
|||||||
// : For daily seasonality, this is the query range params for the (now-2d-5m, now-1d)
|
// : For daily seasonality, this is the query range params for the (now-2d-5m, now-1d)
|
||||||
// : For hourly seasonality, this is the query range params for the (now-2h-5m, now-1h)
|
// : For hourly seasonality, this is the query range params for the (now-2h-5m, now-1h)
|
||||||
PastSeasonQuery *v3.QueryRangeParamsV3
|
PastSeasonQuery *v3.QueryRangeParamsV3
|
||||||
// Past2SeasonQuery is the query range params for past 2 seasonal period to the current season
|
// Past2SeasonQuery is the query range params for past 2 seasonal periods to the current season
|
||||||
// Example: For weekly seasonality, this is the query range params for the (now-3w-5m, now-2w)
|
// Example: For weekly seasonality, this is the query range params for the (now-3w-5m, now-2w)
|
||||||
// : For daily seasonality, this is the query range params for the (now-3d-5m, now-2d)
|
// : For daily seasonality, this is the query range params for the (now-3d-5m, now-2d)
|
||||||
// : For hourly seasonality, this is the query range params for the (now-3h-5m, now-2h)
|
// : For hourly seasonality, this is the query range params for the (now-3h-5m, now-2h)
|
||||||
Past2SeasonQuery *v3.QueryRangeParamsV3
|
Past2SeasonQuery *v3.QueryRangeParamsV3
|
||||||
// Past3SeasonQuery is the query range params for past 3 seasonal period to the current season
|
// Past3SeasonQuery is the query range params for past 3 seasonal periods to the current season
|
||||||
// Example: For weekly seasonality, this is the query range params for the (now-4w-5m, now-3w)
|
// Example: For weekly seasonality, this is the query range params for the (now-4w-5m, now-3w)
|
||||||
// : For daily seasonality, this is the query range params for the (now-4d-5m, now-3d)
|
// : For daily seasonality, this is the query range params for the (now-4d-5m, now-3d)
|
||||||
// : For hourly seasonality, this is the query range params for the (now-4h-5m, now-3h)
|
// : For hourly seasonality, this is the query range params for the (now-4h-5m, now-3h)
|
||||||
|
|||||||
@@ -234,6 +234,11 @@ func (r *AnomalyRule) buildAndRunQuery(ctx context.Context, orgID valuer.UUID, t
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
hasData := len(queryResult.AnomalyScores) > 0
|
||||||
|
if missingDataAlert := r.HandleMissingDataAlert(ctx, ts, hasData); missingDataAlert != nil {
|
||||||
|
return ruletypes.Vector{*missingDataAlert}, nil
|
||||||
|
}
|
||||||
|
|
||||||
var resultVector ruletypes.Vector
|
var resultVector ruletypes.Vector
|
||||||
|
|
||||||
scoresJSON, _ := json.Marshal(queryResult.AnomalyScores)
|
scoresJSON, _ := json.Marshal(queryResult.AnomalyScores)
|
||||||
@@ -285,6 +290,11 @@ func (r *AnomalyRule) buildAndRunQueryV5(ctx context.Context, orgID valuer.UUID,
|
|||||||
|
|
||||||
queryResult := transition.ConvertV5TimeSeriesDataToV4Result(qbResult)
|
queryResult := transition.ConvertV5TimeSeriesDataToV4Result(qbResult)
|
||||||
|
|
||||||
|
hasData := len(queryResult.AnomalyScores) > 0
|
||||||
|
if missingDataAlert := r.HandleMissingDataAlert(ctx, ts, hasData); missingDataAlert != nil {
|
||||||
|
return ruletypes.Vector{*missingDataAlert}, nil
|
||||||
|
}
|
||||||
|
|
||||||
var resultVector ruletypes.Vector
|
var resultVector ruletypes.Vector
|
||||||
|
|
||||||
scoresJSON, _ := json.Marshal(queryResult.AnomalyScores)
|
scoresJSON, _ := json.Marshal(queryResult.AnomalyScores)
|
||||||
|
|||||||
268
ee/query-service/rules/anomaly_test.go
Normal file
268
ee/query-service/rules/anomaly_test.go
Normal file
@@ -0,0 +1,268 @@
|
|||||||
|
package rules
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"github.com/SigNoz/signoz/ee/query-service/anomaly"
|
||||||
|
"github.com/SigNoz/signoz/pkg/instrumentation/instrumentationtest"
|
||||||
|
"github.com/SigNoz/signoz/pkg/query-service/app/clickhouseReader"
|
||||||
|
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
|
||||||
|
"github.com/SigNoz/signoz/pkg/telemetrystore"
|
||||||
|
"github.com/SigNoz/signoz/pkg/telemetrystore/telemetrystoretest"
|
||||||
|
"github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||||
|
"github.com/SigNoz/signoz/pkg/valuer"
|
||||||
|
)
|
||||||
|
|
||||||
|
// mockAnomalyProvider is a mock implementation of anomaly.Provider for testing.
|
||||||
|
// We need this because the anomaly provider makes 6 different queries for various
|
||||||
|
// time periods (current, past period, current season, past season, past 2 seasons,
|
||||||
|
// past 3 seasons), making it cumbersome to create mock data.
|
||||||
|
type mockAnomalyProvider struct {
|
||||||
|
responses []*anomaly.GetAnomaliesResponse
|
||||||
|
callCount int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *mockAnomalyProvider) GetAnomalies(ctx context.Context, orgID valuer.UUID, req *anomaly.GetAnomaliesRequest) (*anomaly.GetAnomaliesResponse, error) {
|
||||||
|
if m.callCount >= len(m.responses) {
|
||||||
|
return &anomaly.GetAnomaliesResponse{Results: []*v3.Result{}}, nil
|
||||||
|
}
|
||||||
|
resp := m.responses[m.callCount]
|
||||||
|
m.callCount++
|
||||||
|
return resp, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAnomalyRule_NoData_AlertOnAbsent(t *testing.T) {
|
||||||
|
// Test basic AlertOnAbsent functionality (without AbsentFor grace period)
|
||||||
|
|
||||||
|
baseTime := time.Unix(1700000000, 0)
|
||||||
|
evalWindow := 5 * time.Minute
|
||||||
|
evalTime := baseTime.Add(5 * time.Minute)
|
||||||
|
|
||||||
|
target := 500.0
|
||||||
|
|
||||||
|
postableRule := ruletypes.PostableRule{
|
||||||
|
AlertName: "Test anomaly no data",
|
||||||
|
AlertType: ruletypes.AlertTypeMetric,
|
||||||
|
RuleType: RuleTypeAnomaly,
|
||||||
|
Evaluation: &ruletypes.EvaluationEnvelope{Kind: ruletypes.RollingEvaluation, Spec: ruletypes.RollingWindow{
|
||||||
|
EvalWindow: ruletypes.Duration(evalWindow),
|
||||||
|
Frequency: ruletypes.Duration(1 * time.Minute),
|
||||||
|
}},
|
||||||
|
RuleCondition: &ruletypes.RuleCondition{
|
||||||
|
CompareOp: ruletypes.ValueIsAbove,
|
||||||
|
MatchType: ruletypes.AtleastOnce,
|
||||||
|
Target: &target,
|
||||||
|
CompositeQuery: &v3.CompositeQuery{
|
||||||
|
QueryType: v3.QueryTypeBuilder,
|
||||||
|
BuilderQueries: map[string]*v3.BuilderQuery{
|
||||||
|
"A": {
|
||||||
|
QueryName: "A",
|
||||||
|
Expression: "A",
|
||||||
|
DataSource: v3.DataSourceMetrics,
|
||||||
|
Temporality: v3.Unspecified,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
SelectedQuery: "A",
|
||||||
|
Seasonality: "daily",
|
||||||
|
Thresholds: &ruletypes.RuleThresholdData{
|
||||||
|
Kind: ruletypes.BasicThresholdKind,
|
||||||
|
Spec: ruletypes.BasicRuleThresholds{{
|
||||||
|
Name: "Test anomaly no data",
|
||||||
|
TargetValue: &target,
|
||||||
|
MatchType: ruletypes.AtleastOnce,
|
||||||
|
CompareOp: ruletypes.ValueIsAbove,
|
||||||
|
}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
responseNoData := &anomaly.GetAnomaliesResponse{
|
||||||
|
Results: []*v3.Result{
|
||||||
|
{
|
||||||
|
QueryName: "A",
|
||||||
|
AnomalyScores: []*v3.Series{},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
cases := []struct {
|
||||||
|
description string
|
||||||
|
alertOnAbsent bool
|
||||||
|
expectAlerts int
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
description: "AlertOnAbsent=false",
|
||||||
|
alertOnAbsent: false,
|
||||||
|
expectAlerts: 0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "AlertOnAbsent=true",
|
||||||
|
alertOnAbsent: true,
|
||||||
|
expectAlerts: 1,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
logger := instrumentationtest.New().Logger()
|
||||||
|
|
||||||
|
for _, c := range cases {
|
||||||
|
t.Run(c.description, func(t *testing.T) {
|
||||||
|
postableRule.RuleCondition.AlertOnAbsent = c.alertOnAbsent
|
||||||
|
|
||||||
|
telemetryStore := telemetrystoretest.New(telemetrystore.Config{}, nil)
|
||||||
|
options := clickhouseReader.NewOptions("primaryNamespace")
|
||||||
|
reader := clickhouseReader.NewReader(nil, telemetryStore, nil, "", time.Second, nil, nil, options)
|
||||||
|
|
||||||
|
rule, err := NewAnomalyRule(
|
||||||
|
"test-anomaly-rule",
|
||||||
|
valuer.GenerateUUID(),
|
||||||
|
&postableRule,
|
||||||
|
reader,
|
||||||
|
nil,
|
||||||
|
logger,
|
||||||
|
nil,
|
||||||
|
)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
rule.provider = &mockAnomalyProvider{
|
||||||
|
responses: []*anomaly.GetAnomaliesResponse{responseNoData},
|
||||||
|
}
|
||||||
|
|
||||||
|
alertsFound, err := rule.Eval(context.Background(), evalTime)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, c.expectAlerts, alertsFound)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAnomalyRule_NoData_AbsentFor(t *testing.T) {
|
||||||
|
// Test missing data alert with AbsentFor grace period
|
||||||
|
// 1. Call Eval with data at time t1, to populate lastTimestampWithDatapoints
|
||||||
|
// 2. Call Eval without data at time t2
|
||||||
|
// 3. Alert fires only if t2 - t1 > AbsentFor
|
||||||
|
|
||||||
|
baseTime := time.Unix(1700000000, 0)
|
||||||
|
evalWindow := 5 * time.Minute
|
||||||
|
|
||||||
|
// Set target higher than test data so regular threshold alerts don't fire
|
||||||
|
target := 500.0
|
||||||
|
|
||||||
|
postableRule := ruletypes.PostableRule{
|
||||||
|
AlertName: "Test anomaly no data with AbsentFor",
|
||||||
|
AlertType: ruletypes.AlertTypeMetric,
|
||||||
|
RuleType: RuleTypeAnomaly,
|
||||||
|
Evaluation: &ruletypes.EvaluationEnvelope{Kind: ruletypes.RollingEvaluation, Spec: ruletypes.RollingWindow{
|
||||||
|
EvalWindow: ruletypes.Duration(evalWindow),
|
||||||
|
Frequency: ruletypes.Duration(time.Minute),
|
||||||
|
}},
|
||||||
|
RuleCondition: &ruletypes.RuleCondition{
|
||||||
|
CompareOp: ruletypes.ValueIsAbove,
|
||||||
|
MatchType: ruletypes.AtleastOnce,
|
||||||
|
AlertOnAbsent: true,
|
||||||
|
Target: &target,
|
||||||
|
CompositeQuery: &v3.CompositeQuery{
|
||||||
|
QueryType: v3.QueryTypeBuilder,
|
||||||
|
BuilderQueries: map[string]*v3.BuilderQuery{
|
||||||
|
"A": {
|
||||||
|
QueryName: "A",
|
||||||
|
Expression: "A",
|
||||||
|
DataSource: v3.DataSourceMetrics,
|
||||||
|
Temporality: v3.Unspecified,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
SelectedQuery: "A",
|
||||||
|
Seasonality: "daily",
|
||||||
|
Thresholds: &ruletypes.RuleThresholdData{
|
||||||
|
Kind: ruletypes.BasicThresholdKind,
|
||||||
|
Spec: ruletypes.BasicRuleThresholds{{
|
||||||
|
Name: "Test anomaly no data with AbsentFor",
|
||||||
|
TargetValue: &target,
|
||||||
|
MatchType: ruletypes.AtleastOnce,
|
||||||
|
CompareOp: ruletypes.ValueIsAbove,
|
||||||
|
}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
responseNoData := &anomaly.GetAnomaliesResponse{
|
||||||
|
Results: []*v3.Result{
|
||||||
|
{
|
||||||
|
QueryName: "A",
|
||||||
|
AnomalyScores: []*v3.Series{},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
cases := []struct {
|
||||||
|
description string
|
||||||
|
absentFor uint64
|
||||||
|
timeBetweenEvals time.Duration
|
||||||
|
expectAlertOnEval2 int
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
description: "WithinGracePeriod",
|
||||||
|
absentFor: 5,
|
||||||
|
timeBetweenEvals: 4 * time.Minute,
|
||||||
|
expectAlertOnEval2: 0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "AfterGracePeriod",
|
||||||
|
absentFor: 5,
|
||||||
|
timeBetweenEvals: 6 * time.Minute,
|
||||||
|
expectAlertOnEval2: 1,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
logger := instrumentationtest.New().Logger()
|
||||||
|
|
||||||
|
for _, c := range cases {
|
||||||
|
t.Run(c.description, func(t *testing.T) {
|
||||||
|
postableRule.RuleCondition.AbsentFor = c.absentFor
|
||||||
|
|
||||||
|
t1 := baseTime.Add(5 * time.Minute)
|
||||||
|
t2 := t1.Add(c.timeBetweenEvals)
|
||||||
|
|
||||||
|
responseWithData := &anomaly.GetAnomaliesResponse{
|
||||||
|
Results: []*v3.Result{
|
||||||
|
{
|
||||||
|
QueryName: "A",
|
||||||
|
AnomalyScores: []*v3.Series{
|
||||||
|
{
|
||||||
|
Labels: map[string]string{"test": "label"},
|
||||||
|
Points: []v3.Point{
|
||||||
|
{Timestamp: baseTime.UnixMilli(), Value: 1.0},
|
||||||
|
{Timestamp: baseTime.Add(time.Minute).UnixMilli(), Value: 1.5},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
telemetryStore := telemetrystoretest.New(telemetrystore.Config{}, nil)
|
||||||
|
options := clickhouseReader.NewOptions("primaryNamespace")
|
||||||
|
reader := clickhouseReader.NewReader(nil, telemetryStore, nil, "", time.Second, nil, nil, options)
|
||||||
|
|
||||||
|
rule, err := NewAnomalyRule("test-anomaly-rule", valuer.GenerateUUID(), &postableRule, reader, nil, logger, nil)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
rule.provider = &mockAnomalyProvider{
|
||||||
|
responses: []*anomaly.GetAnomaliesResponse{responseWithData, responseNoData},
|
||||||
|
}
|
||||||
|
|
||||||
|
alertsFound1, err := rule.Eval(context.Background(), t1)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, 0, alertsFound1, "First eval with data should not alert")
|
||||||
|
|
||||||
|
alertsFound2, err := rule.Eval(context.Background(), t2)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, c.expectAlertOnEval2, alertsFound2)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -8,9 +8,11 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/SigNoz/signoz/pkg/errors"
|
"github.com/SigNoz/signoz/pkg/errors"
|
||||||
|
"github.com/SigNoz/signoz/pkg/query-service/constants"
|
||||||
"github.com/SigNoz/signoz/pkg/query-service/interfaces"
|
"github.com/SigNoz/signoz/pkg/query-service/interfaces"
|
||||||
"github.com/SigNoz/signoz/pkg/query-service/model"
|
"github.com/SigNoz/signoz/pkg/query-service/model"
|
||||||
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
|
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
|
||||||
|
"github.com/SigNoz/signoz/pkg/query-service/utils/labels"
|
||||||
qslabels "github.com/SigNoz/signoz/pkg/query-service/utils/labels"
|
qslabels "github.com/SigNoz/signoz/pkg/query-service/utils/labels"
|
||||||
"github.com/SigNoz/signoz/pkg/queryparser"
|
"github.com/SigNoz/signoz/pkg/queryparser"
|
||||||
"github.com/SigNoz/signoz/pkg/sqlstore"
|
"github.com/SigNoz/signoz/pkg/sqlstore"
|
||||||
@@ -741,3 +743,26 @@ func (r *BaseRule) FilterNewSeries(ctx context.Context, ts time.Time, series []*
|
|||||||
|
|
||||||
return filteredSeries, nil
|
return filteredSeries, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// HandleMissingDataAlert handles missing data alert logic by tracking the last timestamp
|
||||||
|
// with data points and checking if a missing data alert should be sent based on the
|
||||||
|
// [ruletypes.RuleCondition.AlertOnAbsent] and [ruletypes.RuleCondition.AbsentFor] conditions.
|
||||||
|
//
|
||||||
|
// Returns a pointer to the missing data alert if conditions are met, nil otherwise.
|
||||||
|
func (r *BaseRule) HandleMissingDataAlert(ctx context.Context, ts time.Time, hasData bool) *ruletypes.Sample {
|
||||||
|
// Track the last timestamp with data points for missing data alerts
|
||||||
|
if hasData {
|
||||||
|
r.lastTimestampWithDatapoints = ts
|
||||||
|
}
|
||||||
|
|
||||||
|
if !r.ruleCondition.AlertOnAbsent || ts.Before(r.lastTimestampWithDatapoints.Add(time.Duration(r.ruleCondition.AbsentFor)*time.Minute)) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
r.logger.InfoContext(ctx, "no data found for rule condition", "rule_id", r.ID())
|
||||||
|
lbls := labels.NewBuilder(labels.Labels{})
|
||||||
|
if !r.lastTimestampWithDatapoints.IsZero() {
|
||||||
|
lbls.Set(ruletypes.LabelLastSeen, r.lastTimestampWithDatapoints.Format(constants.AlertTimeFormat))
|
||||||
|
}
|
||||||
|
return &ruletypes.Sample{Metric: lbls.Labels(), IsMissing: true}
|
||||||
|
}
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ import (
|
|||||||
"github.com/SigNoz/signoz/pkg/query-service/utils/times"
|
"github.com/SigNoz/signoz/pkg/query-service/utils/times"
|
||||||
"github.com/SigNoz/signoz/pkg/query-service/utils/timestamp"
|
"github.com/SigNoz/signoz/pkg/query-service/utils/timestamp"
|
||||||
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
|
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
|
||||||
ruletypes "github.com/SigNoz/signoz/pkg/types/ruletypes"
|
"github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||||
"github.com/SigNoz/signoz/pkg/valuer"
|
"github.com/SigNoz/signoz/pkg/valuer"
|
||||||
"github.com/prometheus/prometheus/promql"
|
"github.com/prometheus/prometheus/promql"
|
||||||
)
|
)
|
||||||
@@ -142,6 +142,12 @@ func (r *PromRule) buildAndRunQuery(ctx context.Context, ts time.Time) (ruletype
|
|||||||
}
|
}
|
||||||
|
|
||||||
matrixToProcess := r.matrixToV3Series(res)
|
matrixToProcess := r.matrixToV3Series(res)
|
||||||
|
|
||||||
|
hasData := len(matrixToProcess) > 0
|
||||||
|
if missingDataAlert := r.HandleMissingDataAlert(ctx, ts, hasData); missingDataAlert != nil {
|
||||||
|
return ruletypes.Vector{*missingDataAlert}, nil
|
||||||
|
}
|
||||||
|
|
||||||
// Filter out new series if newGroupEvalDelay is configured
|
// Filter out new series if newGroupEvalDelay is configured
|
||||||
if r.ShouldSkipNewGroups() {
|
if r.ShouldSkipNewGroups() {
|
||||||
filteredSeries, filterErr := r.BaseRule.FilterNewSeries(ctx, ts, matrixToProcess)
|
filteredSeries, filterErr := r.BaseRule.FilterNewSeries(ctx, ts, matrixToProcess)
|
||||||
@@ -154,6 +160,7 @@ func (r *PromRule) buildAndRunQuery(ctx context.Context, ts time.Time) (ruletype
|
|||||||
}
|
}
|
||||||
|
|
||||||
var resultVector ruletypes.Vector
|
var resultVector ruletypes.Vector
|
||||||
|
|
||||||
for _, series := range matrixToProcess {
|
for _, series := range matrixToProcess {
|
||||||
if !r.Condition().ShouldEval(series) {
|
if !r.Condition().ShouldEval(series) {
|
||||||
r.logger.InfoContext(
|
r.logger.InfoContext(
|
||||||
@@ -243,6 +250,10 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time) (int, error) {
|
|||||||
for name, value := range r.annotations.Map() {
|
for name, value := range r.annotations.Map() {
|
||||||
annotations = append(annotations, qslabels.Label{Name: name, Value: expand(value)})
|
annotations = append(annotations, qslabels.Label{Name: name, Value: expand(value)})
|
||||||
}
|
}
|
||||||
|
if result.IsMissing {
|
||||||
|
lb.Set(qslabels.AlertNameLabel, "[No data] "+r.Name())
|
||||||
|
lb.Set(qslabels.NoDataLabel, "true")
|
||||||
|
}
|
||||||
|
|
||||||
lbs := lb.Labels()
|
lbs := lb.Labels()
|
||||||
h := lbs.Hash()
|
h := lbs.Hash()
|
||||||
@@ -265,6 +276,7 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time) (int, error) {
|
|||||||
Value: result.V,
|
Value: result.V,
|
||||||
GeneratorURL: r.GeneratorURL(),
|
GeneratorURL: r.GeneratorURL(),
|
||||||
Receivers: ruleReceiverMap[lbs.Map()[ruletypes.LabelThresholdName]],
|
Receivers: ruleReceiverMap[lbs.Map()[ruletypes.LabelThresholdName]],
|
||||||
|
Missing: result.IsMissing,
|
||||||
IsRecovering: result.IsRecovering,
|
IsRecovering: result.IsRecovering,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1345,6 +1345,275 @@ func TestMultipleThresholdPromRule(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestPromRule_NoData(t *testing.T) {
|
||||||
|
evalTime := time.Now()
|
||||||
|
|
||||||
|
postableRule := ruletypes.PostableRule{
|
||||||
|
AlertName: "Test no data",
|
||||||
|
AlertType: ruletypes.AlertTypeMetric,
|
||||||
|
RuleType: ruletypes.RuleTypeProm,
|
||||||
|
Evaluation: &ruletypes.EvaluationEnvelope{Kind: ruletypes.RollingEvaluation, Spec: ruletypes.RollingWindow{
|
||||||
|
EvalWindow: ruletypes.Duration(5 * time.Minute),
|
||||||
|
Frequency: ruletypes.Duration(1 * time.Minute),
|
||||||
|
}},
|
||||||
|
RuleCondition: &ruletypes.RuleCondition{
|
||||||
|
CompareOp: ruletypes.ValueIsAbove,
|
||||||
|
MatchType: ruletypes.AtleastOnce,
|
||||||
|
CompositeQuery: &v3.CompositeQuery{
|
||||||
|
QueryType: v3.QueryTypePromQL,
|
||||||
|
PromQueries: map[string]*v3.PromQuery{
|
||||||
|
"A": {Query: "test_metric"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Thresholds: &ruletypes.RuleThresholdData{
|
||||||
|
Kind: ruletypes.BasicThresholdKind,
|
||||||
|
Spec: ruletypes.BasicRuleThresholds{{Name: "Test no data"}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
// time_series_v4 cols of interest
|
||||||
|
fingerprintCols := []cmock.ColumnType{
|
||||||
|
{Name: "fingerprint", Type: "UInt64"},
|
||||||
|
{Name: "any(labels)", Type: "String"},
|
||||||
|
}
|
||||||
|
|
||||||
|
// samples_v4 columns
|
||||||
|
samplesCols := []cmock.ColumnType{
|
||||||
|
{Name: "metric_name", Type: "String"},
|
||||||
|
{Name: "fingerprint", Type: "UInt64"},
|
||||||
|
{Name: "unix_milli", Type: "Int64"},
|
||||||
|
{Name: "value", Type: "Float64"},
|
||||||
|
{Name: "flags", Type: "UInt32"},
|
||||||
|
}
|
||||||
|
|
||||||
|
// see Timestamps on base_rule
|
||||||
|
evalWindowMs := int64(5 * 60 * 1000) // 5 minutes in ms
|
||||||
|
evalTimeMs := evalTime.UnixMilli()
|
||||||
|
queryStart := ((evalTimeMs-2*evalWindowMs)/60000)*60000 + 1 // truncate to minute + 1ms
|
||||||
|
queryEnd := (evalTimeMs / 60000) * 60000 // truncate to minute
|
||||||
|
|
||||||
|
cases := []struct {
|
||||||
|
description string
|
||||||
|
alertOnAbsent bool
|
||||||
|
values []any
|
||||||
|
target float64
|
||||||
|
expectAlerts int
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
description: "AlertOnAbsent=false",
|
||||||
|
alertOnAbsent: false,
|
||||||
|
values: []any{},
|
||||||
|
target: 200,
|
||||||
|
expectAlerts: 0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "AlertOnAbsent=true",
|
||||||
|
alertOnAbsent: true,
|
||||||
|
values: []any{},
|
||||||
|
target: 200,
|
||||||
|
expectAlerts: 1,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
logger := instrumentationtest.New().Logger()
|
||||||
|
|
||||||
|
for _, c := range cases {
|
||||||
|
t.Run(c.description, func(t *testing.T) {
|
||||||
|
postableRule.RuleCondition.AlertOnAbsent = c.alertOnAbsent
|
||||||
|
|
||||||
|
telemetryStore := telemetrystoretest.New(telemetrystore.Config{}, &queryMatcherAny{})
|
||||||
|
|
||||||
|
// single fingerprint with labels JSON
|
||||||
|
fingerprint := uint64(12345)
|
||||||
|
labelsJSON := `{"__name__":"test_metric"}`
|
||||||
|
telemetryStore.Mock().
|
||||||
|
ExpectQuery("SELECT fingerprint, any").
|
||||||
|
WithArgs("test_metric", "__name__", "test_metric").
|
||||||
|
WillReturnRows(cmock.NewRows(fingerprintCols, [][]any{{fingerprint, labelsJSON}}))
|
||||||
|
|
||||||
|
telemetryStore.Mock().
|
||||||
|
ExpectQuery("SELECT metric_name, fingerprint, unix_milli").
|
||||||
|
WithArgs("test_metric", "test_metric", "__name__", "test_metric", queryStart, queryEnd).
|
||||||
|
WillReturnRows(cmock.NewRows(samplesCols, [][]any{}))
|
||||||
|
|
||||||
|
promProvider := prometheustest.New(
|
||||||
|
context.Background(),
|
||||||
|
instrumentationtest.New().ToProviderSettings(),
|
||||||
|
prometheus.Config{},
|
||||||
|
telemetryStore,
|
||||||
|
)
|
||||||
|
defer func() {
|
||||||
|
_ = promProvider.Close()
|
||||||
|
}()
|
||||||
|
|
||||||
|
options := clickhouseReader.NewOptions("primaryNamespace")
|
||||||
|
reader := clickhouseReader.NewReader(nil, telemetryStore, promProvider, "", time.Second, nil, nil, options)
|
||||||
|
rule, err := NewPromRule("some-id", valuer.GenerateUUID(), &postableRule, logger, reader, promProvider)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
alertsFound, err := rule.Eval(context.Background(), evalTime)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
assert.Equal(t, c.expectAlerts, alertsFound)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPromRule_NoData_AbsentFor(t *testing.T) {
|
||||||
|
// 1. Call Eval with data at time t1, to populate lastTimestampWithDatapoints
|
||||||
|
// 2. Call Eval without data at time t2
|
||||||
|
// 3. Alert fires only if t2 - t1 > AbsentFor
|
||||||
|
|
||||||
|
baseTime := time.Unix(1700000000, 0)
|
||||||
|
evalWindow := 5 * time.Minute
|
||||||
|
|
||||||
|
// Set target higher than test data (100.0) so regular threshold alerts don't fire
|
||||||
|
target := 500.0
|
||||||
|
|
||||||
|
postableRule := ruletypes.PostableRule{
|
||||||
|
AlertName: "Test no data with AbsentFor",
|
||||||
|
AlertType: ruletypes.AlertTypeMetric,
|
||||||
|
RuleType: ruletypes.RuleTypeProm,
|
||||||
|
Evaluation: &ruletypes.EvaluationEnvelope{Kind: ruletypes.RollingEvaluation, Spec: ruletypes.RollingWindow{
|
||||||
|
EvalWindow: ruletypes.Duration(evalWindow),
|
||||||
|
Frequency: ruletypes.Duration(1 * time.Minute),
|
||||||
|
}},
|
||||||
|
RuleCondition: &ruletypes.RuleCondition{
|
||||||
|
CompareOp: ruletypes.ValueIsAbove,
|
||||||
|
MatchType: ruletypes.AtleastOnce,
|
||||||
|
AlertOnAbsent: true,
|
||||||
|
Target: &target,
|
||||||
|
CompositeQuery: &v3.CompositeQuery{
|
||||||
|
QueryType: v3.QueryTypePromQL,
|
||||||
|
PromQueries: map[string]*v3.PromQuery{
|
||||||
|
"A": {Query: "test_metric"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Thresholds: &ruletypes.RuleThresholdData{
|
||||||
|
Kind: ruletypes.BasicThresholdKind,
|
||||||
|
Spec: ruletypes.BasicRuleThresholds{{
|
||||||
|
Name: "Test no data with AbsentFor",
|
||||||
|
TargetValue: &target,
|
||||||
|
MatchType: ruletypes.AtleastOnce,
|
||||||
|
CompareOp: ruletypes.ValueIsAbove,
|
||||||
|
}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
fingerprintCols := []cmock.ColumnType{
|
||||||
|
{Name: "fingerprint", Type: "UInt64"},
|
||||||
|
{Name: "any(labels)", Type: "String"},
|
||||||
|
}
|
||||||
|
|
||||||
|
samplesCols := []cmock.ColumnType{
|
||||||
|
{Name: "metric_name", Type: "String"},
|
||||||
|
{Name: "fingerprint", Type: "UInt64"},
|
||||||
|
{Name: "unix_milli", Type: "Int64"},
|
||||||
|
{Name: "value", Type: "Float64"},
|
||||||
|
{Name: "flags", Type: "UInt32"},
|
||||||
|
}
|
||||||
|
|
||||||
|
cases := []struct {
|
||||||
|
description string
|
||||||
|
absentFor uint64 // grace period in minutes
|
||||||
|
timeBetweenEvals time.Duration // time between first eval (with data) and second eval (no data)
|
||||||
|
expectAlertOnEval2 int
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
description: "WithinGracePeriod",
|
||||||
|
absentFor: 5,
|
||||||
|
timeBetweenEvals: 4 * time.Minute,
|
||||||
|
expectAlertOnEval2: 0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "AfterGracePeriod",
|
||||||
|
absentFor: 5,
|
||||||
|
timeBetweenEvals: 6 * time.Minute,
|
||||||
|
expectAlertOnEval2: 1,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
logger := instrumentationtest.New().Logger()
|
||||||
|
|
||||||
|
for _, c := range cases {
|
||||||
|
t.Run(c.description, func(t *testing.T) {
|
||||||
|
postableRule.RuleCondition.AbsentFor = c.absentFor
|
||||||
|
|
||||||
|
// Timestamps for two evaluations
|
||||||
|
// t1 is the eval time for first eval, data points are in the past
|
||||||
|
t1 := baseTime.Add(5 * time.Minute) // first eval with data
|
||||||
|
t2 := t1.Add(c.timeBetweenEvals) // second eval without data
|
||||||
|
|
||||||
|
telemetryStore := telemetrystoretest.New(telemetrystore.Config{}, &queryMatcherAny{})
|
||||||
|
|
||||||
|
fingerprint := uint64(12345)
|
||||||
|
labelsJSON := `{"__name__":"test_metric"}`
|
||||||
|
|
||||||
|
// Helper to calculate query time range for an eval time
|
||||||
|
calcQueryRange := func(evalTime time.Time) (int64, int64) {
|
||||||
|
evalTimeMs := evalTime.UnixMilli()
|
||||||
|
queryStart := ((evalTimeMs-2*evalWindow.Milliseconds())/60000)*60000 + 1
|
||||||
|
queryEnd := (evalTimeMs / 60000) * 60000
|
||||||
|
return queryStart, queryEnd
|
||||||
|
}
|
||||||
|
|
||||||
|
// First eval (t1) - with data
|
||||||
|
queryStart1, queryEnd1 := calcQueryRange(t1)
|
||||||
|
telemetryStore.Mock().
|
||||||
|
ExpectQuery("SELECT fingerprint, any").
|
||||||
|
WithArgs("test_metric", "__name__", "test_metric").
|
||||||
|
WillReturnRows(cmock.NewRows(fingerprintCols, [][]any{{fingerprint, labelsJSON}}))
|
||||||
|
telemetryStore.Mock().
|
||||||
|
ExpectQuery("SELECT metric_name, fingerprint, unix_milli").
|
||||||
|
WithArgs("test_metric", "test_metric", "__name__", "test_metric", queryStart1, queryEnd1).
|
||||||
|
WillReturnRows(cmock.NewRows(samplesCols, [][]any{
|
||||||
|
// Data points in the past relative to t1
|
||||||
|
{"test_metric", fingerprint, baseTime.UnixMilli(), 100.0, uint32(0)},
|
||||||
|
{"test_metric", fingerprint, baseTime.Add(1 * time.Minute).UnixMilli(), 100.0, uint32(0)},
|
||||||
|
{"test_metric", fingerprint, baseTime.Add(2 * time.Minute).UnixMilli(), 100.0, uint32(0)},
|
||||||
|
}))
|
||||||
|
|
||||||
|
// Second eval (t2) - no data
|
||||||
|
queryStart2, queryEnd2 := calcQueryRange(t2)
|
||||||
|
telemetryStore.Mock().
|
||||||
|
ExpectQuery("SELECT fingerprint, any").
|
||||||
|
WithArgs("test_metric", "__name__", "test_metric").
|
||||||
|
WillReturnRows(cmock.NewRows(fingerprintCols, [][]any{{fingerprint, labelsJSON}}))
|
||||||
|
telemetryStore.Mock().
|
||||||
|
ExpectQuery("SELECT metric_name, fingerprint, unix_milli").
|
||||||
|
WithArgs("test_metric", "test_metric", "__name__", "test_metric", queryStart2, queryEnd2).
|
||||||
|
WillReturnRows(cmock.NewRows(samplesCols, [][]any{})) // empty - no data
|
||||||
|
|
||||||
|
promProvider := prometheustest.New(
|
||||||
|
context.Background(),
|
||||||
|
instrumentationtest.New().ToProviderSettings(),
|
||||||
|
prometheus.Config{},
|
||||||
|
telemetryStore,
|
||||||
|
)
|
||||||
|
defer func() {
|
||||||
|
_ = promProvider.Close()
|
||||||
|
}()
|
||||||
|
|
||||||
|
options := clickhouseReader.NewOptions("primaryNamespace")
|
||||||
|
reader := clickhouseReader.NewReader(nil, telemetryStore, promProvider, "", time.Second, nil, nil, options)
|
||||||
|
rule, err := NewPromRule("some-id", valuer.GenerateUUID(), &postableRule, logger, reader, promProvider)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// First eval with data - should NOT alert, but populates lastTimestampWithDatapoints
|
||||||
|
alertsFound1, err := rule.Eval(context.Background(), t1)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, 0, alertsFound1, "First eval with data should not alert")
|
||||||
|
|
||||||
|
// Second eval without data - should alert based on AbsentFor
|
||||||
|
alertsFound2, err := rule.Eval(context.Background(), t2)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, c.expectAlertOnEval2, alertsFound2)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestPromRuleEval_RequireMinPoints(t *testing.T) {
|
func TestPromRuleEval_RequireMinPoints(t *testing.T) {
|
||||||
// fixed base time for deterministic tests
|
// fixed base time for deterministic tests
|
||||||
baseTime := time.Unix(1700000000, 0)
|
baseTime := time.Unix(1700000000, 0)
|
||||||
|
|||||||
@@ -24,7 +24,6 @@ import (
|
|||||||
"github.com/SigNoz/signoz/pkg/query-service/app/querier"
|
"github.com/SigNoz/signoz/pkg/query-service/app/querier"
|
||||||
querierV2 "github.com/SigNoz/signoz/pkg/query-service/app/querier/v2"
|
querierV2 "github.com/SigNoz/signoz/pkg/query-service/app/querier/v2"
|
||||||
"github.com/SigNoz/signoz/pkg/query-service/app/queryBuilder"
|
"github.com/SigNoz/signoz/pkg/query-service/app/queryBuilder"
|
||||||
"github.com/SigNoz/signoz/pkg/query-service/constants"
|
|
||||||
"github.com/SigNoz/signoz/pkg/query-service/interfaces"
|
"github.com/SigNoz/signoz/pkg/query-service/interfaces"
|
||||||
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
|
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
|
||||||
"github.com/SigNoz/signoz/pkg/query-service/utils/labels"
|
"github.com/SigNoz/signoz/pkg/query-service/utils/labels"
|
||||||
@@ -462,26 +461,13 @@ func (r *ThresholdRule) buildAndRunQuery(ctx context.Context, orgID valuer.UUID,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if queryResult != nil && len(queryResult.Series) > 0 {
|
hasData := queryResult != nil && len(queryResult.Series) > 0
|
||||||
r.lastTimestampWithDatapoints = time.Now()
|
if missingDataAlert := r.HandleMissingDataAlert(ctx, ts, hasData); missingDataAlert != nil {
|
||||||
|
return ruletypes.Vector{*missingDataAlert}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
var resultVector ruletypes.Vector
|
var resultVector ruletypes.Vector
|
||||||
|
|
||||||
// if the data is missing for `For` duration then we should send alert
|
|
||||||
if r.ruleCondition.AlertOnAbsent && r.lastTimestampWithDatapoints.Add(time.Duration(r.Condition().AbsentFor)*time.Minute).Before(time.Now()) {
|
|
||||||
r.logger.InfoContext(ctx, "no data found for rule condition", "rule_id", r.ID())
|
|
||||||
lbls := labels.NewBuilder(labels.Labels{})
|
|
||||||
if !r.lastTimestampWithDatapoints.IsZero() {
|
|
||||||
lbls.Set(ruletypes.LabelLastSeen, r.lastTimestampWithDatapoints.Format(constants.AlertTimeFormat))
|
|
||||||
}
|
|
||||||
resultVector = append(resultVector, ruletypes.Sample{
|
|
||||||
Metric: lbls.Labels(),
|
|
||||||
IsMissing: true,
|
|
||||||
})
|
|
||||||
return resultVector, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
if queryResult == nil {
|
if queryResult == nil {
|
||||||
r.logger.WarnContext(ctx, "query result is nil", "rule_name", r.Name(), "query_name", selectedQuery)
|
r.logger.WarnContext(ctx, "query result is nil", "rule_name", r.Name(), "query_name", selectedQuery)
|
||||||
return resultVector, nil
|
return resultVector, nil
|
||||||
@@ -538,26 +524,13 @@ func (r *ThresholdRule) buildAndRunQueryV5(ctx context.Context, orgID valuer.UUI
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if queryResult != nil && len(queryResult.Series) > 0 {
|
hasData := queryResult != nil && len(queryResult.Series) > 0
|
||||||
r.lastTimestampWithDatapoints = time.Now()
|
if missingDataAlert := r.HandleMissingDataAlert(ctx, ts, hasData); missingDataAlert != nil {
|
||||||
|
return ruletypes.Vector{*missingDataAlert}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
var resultVector ruletypes.Vector
|
var resultVector ruletypes.Vector
|
||||||
|
|
||||||
// if the data is missing for `For` duration then we should send alert
|
|
||||||
if r.ruleCondition.AlertOnAbsent && r.lastTimestampWithDatapoints.Add(time.Duration(r.Condition().AbsentFor)*time.Minute).Before(time.Now()) {
|
|
||||||
r.logger.InfoContext(ctx, "no data found for rule condition", "rule_id", r.ID())
|
|
||||||
lbls := labels.NewBuilder(labels.Labels{})
|
|
||||||
if !r.lastTimestampWithDatapoints.IsZero() {
|
|
||||||
lbls.Set(ruletypes.LabelLastSeen, r.lastTimestampWithDatapoints.Format(constants.AlertTimeFormat))
|
|
||||||
}
|
|
||||||
resultVector = append(resultVector, ruletypes.Sample{
|
|
||||||
Metric: lbls.Labels(),
|
|
||||||
IsMissing: true,
|
|
||||||
})
|
|
||||||
return resultVector, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
if queryResult == nil {
|
if queryResult == nil {
|
||||||
r.logger.WarnContext(ctx, "query result is nil", "rule_name", r.Name(), "query_name", selectedQuery)
|
r.logger.WarnContext(ctx, "query result is nil", "rule_name", r.Name(), "query_name", selectedQuery)
|
||||||
return resultVector, nil
|
return resultVector, nil
|
||||||
|
|||||||
Reference in New Issue
Block a user