mirror of
https://github.com/SigNoz/signoz.git
synced 2026-03-31 17:40:25 +01:00
Compare commits
4 Commits
main
...
remove-sup
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b02be1c764 | ||
|
|
abfaf3f890 | ||
|
|
eb724b9451 | ||
|
|
640bc19ffe |
@@ -49,7 +49,6 @@ import (
|
||||
opAmpModel "github.com/SigNoz/signoz/pkg/query-service/app/opamp/model"
|
||||
baseconst "github.com/SigNoz/signoz/pkg/query-service/constants"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/healthcheck"
|
||||
baseint "github.com/SigNoz/signoz/pkg/query-service/interfaces"
|
||||
baserules "github.com/SigNoz/signoz/pkg/query-service/rules"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils"
|
||||
)
|
||||
@@ -99,7 +98,6 @@ func NewServer(config signoz.Config, signoz *signoz.SigNoz) (*Server, error) {
|
||||
)
|
||||
|
||||
rm, err := makeRulesManager(
|
||||
reader,
|
||||
signoz.Cache,
|
||||
signoz.Alertmanager,
|
||||
signoz.SQLStore,
|
||||
@@ -345,7 +343,7 @@ func (s *Server) Stop(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func makeRulesManager(ch baseint.Reader, cache cache.Cache, alertmanager alertmanager.Alertmanager, sqlstore sqlstore.SQLStore, telemetryStore telemetrystore.TelemetryStore, metadataStore telemetrytypes.MetadataStore, prometheus prometheus.Prometheus, orgGetter organization.Getter, ruleStateHistoryModule rulestatehistory.Module, querier querier.Querier, providerSettings factory.ProviderSettings, queryParser queryparser.QueryParser) (*baserules.Manager, error) {
|
||||
func makeRulesManager(cache cache.Cache, alertmanager alertmanager.Alertmanager, sqlstore sqlstore.SQLStore, telemetryStore telemetrystore.TelemetryStore, metadataStore telemetrytypes.MetadataStore, prometheus prometheus.Prometheus, orgGetter organization.Getter, ruleStateHistoryModule rulestatehistory.Module, querier querier.Querier, providerSettings factory.ProviderSettings, queryParser queryparser.QueryParser) (*baserules.Manager, error) {
|
||||
ruleStore := sqlrulestore.NewRuleStore(sqlstore, queryParser, providerSettings)
|
||||
maintenanceStore := sqlrulestore.NewMaintenanceStore(sqlstore)
|
||||
// create manager opts
|
||||
@@ -354,7 +352,6 @@ func makeRulesManager(ch baseint.Reader, cache cache.Cache, alertmanager alertma
|
||||
MetadataStore: metadataStore,
|
||||
Prometheus: prometheus,
|
||||
Context: context.Background(),
|
||||
Reader: ch,
|
||||
Querier: querier,
|
||||
Logger: providerSettings.Logger,
|
||||
Cache: cache,
|
||||
@@ -365,7 +362,7 @@ func makeRulesManager(ch baseint.Reader, cache cache.Cache, alertmanager alertma
|
||||
OrgGetter: orgGetter,
|
||||
RuleStore: ruleStore,
|
||||
MaintenanceStore: maintenanceStore,
|
||||
SqlStore: sqlstore,
|
||||
SQLStore: sqlstore,
|
||||
QueryParser: queryParser,
|
||||
RuleStateHistoryModule: ruleStateHistoryModule,
|
||||
}
|
||||
|
||||
@@ -5,41 +5,30 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"math"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/SigNoz/signoz/ee/query-service/anomaly"
|
||||
"github.com/SigNoz/signoz/pkg/cache"
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/common"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/model"
|
||||
"github.com/SigNoz/signoz/pkg/transition"
|
||||
"github.com/SigNoz/signoz/pkg/querier"
|
||||
"github.com/SigNoz/signoz/pkg/types/rulestatehistorytypes"
|
||||
"github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
|
||||
querierV2 "github.com/SigNoz/signoz/pkg/query-service/app/querier/v2"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/app/queryBuilder"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/interfaces"
|
||||
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/labels"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/times"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/timestamp"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/units"
|
||||
|
||||
baserules "github.com/SigNoz/signoz/pkg/query-service/rules"
|
||||
|
||||
querierV5 "github.com/SigNoz/signoz/pkg/querier"
|
||||
|
||||
anomalyV2 "github.com/SigNoz/signoz/ee/anomaly"
|
||||
"github.com/SigNoz/signoz/ee/anomaly"
|
||||
|
||||
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
|
||||
)
|
||||
|
||||
const (
|
||||
RuleTypeAnomaly = "anomaly_rule"
|
||||
var (
|
||||
RuleTypeAnomaly = ruletypes.RuleType{String: valuer.NewString("anomaly_rule")}
|
||||
)
|
||||
|
||||
type AnomalyRule struct {
|
||||
@@ -47,16 +36,10 @@ type AnomalyRule struct {
|
||||
|
||||
mtx sync.Mutex
|
||||
|
||||
reader interfaces.Reader
|
||||
|
||||
// querierV2 is used for alerts created after the introduction of new metrics query builder
|
||||
querierV2 interfaces.Querier
|
||||
|
||||
// querierV5 is used for alerts migrated after the introduction of new query builder
|
||||
querierV5 querierV5.Querier
|
||||
querier querierV5.Querier
|
||||
|
||||
provider anomaly.Provider
|
||||
providerV2 anomalyV2.Provider
|
||||
provider anomaly.Provider
|
||||
|
||||
version string
|
||||
logger *slog.Logger
|
||||
@@ -70,8 +53,7 @@ func NewAnomalyRule(
|
||||
id string,
|
||||
orgID valuer.UUID,
|
||||
p *ruletypes.PostableRule,
|
||||
reader interfaces.Reader,
|
||||
querierV5 querierV5.Querier,
|
||||
querier querier.Querier,
|
||||
logger *slog.Logger,
|
||||
cache cache.Cache,
|
||||
opts ...baserules.RuleOption,
|
||||
@@ -81,7 +63,7 @@ func NewAnomalyRule(
|
||||
|
||||
opts = append(opts, baserules.WithLogger(logger))
|
||||
|
||||
baseRule, err := baserules.NewBaseRule(id, orgID, p, reader, opts...)
|
||||
baseRule, err := baserules.NewBaseRule(id, orgID, p, opts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -101,54 +83,26 @@ func NewAnomalyRule(
|
||||
t.seasonality = anomaly.SeasonalityDaily
|
||||
}
|
||||
|
||||
logger.Info("using seasonality", "seasonality", t.seasonality.String())
|
||||
logger.Info("using seasonality", "seasonality", t.seasonality)
|
||||
|
||||
querierOptsV2 := querierV2.QuerierOptions{
|
||||
Reader: reader,
|
||||
Cache: cache,
|
||||
KeyGenerator: queryBuilder.NewKeyGenerator(),
|
||||
}
|
||||
|
||||
t.querierV2 = querierV2.NewQuerier(querierOptsV2)
|
||||
t.reader = reader
|
||||
if t.seasonality == anomaly.SeasonalityHourly {
|
||||
t.provider = anomaly.NewHourlyProvider(
|
||||
anomaly.WithCache[*anomaly.HourlyProvider](cache),
|
||||
anomaly.WithKeyGenerator[*anomaly.HourlyProvider](queryBuilder.NewKeyGenerator()),
|
||||
anomaly.WithReader[*anomaly.HourlyProvider](reader),
|
||||
anomaly.WithQuerier[*anomaly.HourlyProvider](querier),
|
||||
anomaly.WithLogger[*anomaly.HourlyProvider](logger),
|
||||
)
|
||||
} else if t.seasonality == anomaly.SeasonalityDaily {
|
||||
t.provider = anomaly.NewDailyProvider(
|
||||
anomaly.WithCache[*anomaly.DailyProvider](cache),
|
||||
anomaly.WithKeyGenerator[*anomaly.DailyProvider](queryBuilder.NewKeyGenerator()),
|
||||
anomaly.WithReader[*anomaly.DailyProvider](reader),
|
||||
anomaly.WithQuerier[*anomaly.DailyProvider](querier),
|
||||
anomaly.WithLogger[*anomaly.DailyProvider](logger),
|
||||
)
|
||||
} else if t.seasonality == anomaly.SeasonalityWeekly {
|
||||
t.provider = anomaly.NewWeeklyProvider(
|
||||
anomaly.WithCache[*anomaly.WeeklyProvider](cache),
|
||||
anomaly.WithKeyGenerator[*anomaly.WeeklyProvider](queryBuilder.NewKeyGenerator()),
|
||||
anomaly.WithReader[*anomaly.WeeklyProvider](reader),
|
||||
anomaly.WithQuerier[*anomaly.WeeklyProvider](querier),
|
||||
anomaly.WithLogger[*anomaly.WeeklyProvider](logger),
|
||||
)
|
||||
}
|
||||
|
||||
if t.seasonality == anomaly.SeasonalityHourly {
|
||||
t.providerV2 = anomalyV2.NewHourlyProvider(
|
||||
anomalyV2.WithQuerier[*anomalyV2.HourlyProvider](querierV5),
|
||||
anomalyV2.WithLogger[*anomalyV2.HourlyProvider](logger),
|
||||
)
|
||||
} else if t.seasonality == anomaly.SeasonalityDaily {
|
||||
t.providerV2 = anomalyV2.NewDailyProvider(
|
||||
anomalyV2.WithQuerier[*anomalyV2.DailyProvider](querierV5),
|
||||
anomalyV2.WithLogger[*anomalyV2.DailyProvider](logger),
|
||||
)
|
||||
} else if t.seasonality == anomaly.SeasonalityWeekly {
|
||||
t.providerV2 = anomalyV2.NewWeeklyProvider(
|
||||
anomalyV2.WithQuerier[*anomalyV2.WeeklyProvider](querierV5),
|
||||
anomalyV2.WithLogger[*anomalyV2.WeeklyProvider](logger),
|
||||
)
|
||||
}
|
||||
|
||||
t.querierV5 = querierV5
|
||||
t.querier = querier
|
||||
t.version = p.Version
|
||||
t.logger = logger
|
||||
return &t, nil
|
||||
@@ -158,34 +112,7 @@ func (r *AnomalyRule) Type() ruletypes.RuleType {
|
||||
return RuleTypeAnomaly
|
||||
}
|
||||
|
||||
func (r *AnomalyRule) prepareQueryRange(ctx context.Context, ts time.Time) (*v3.QueryRangeParamsV3, error) {
|
||||
|
||||
r.logger.InfoContext(
|
||||
ctx, "prepare query range request v4", "ts", ts.UnixMilli(), "eval_window", r.EvalWindow().Milliseconds(), "eval_delay", r.EvalDelay().Milliseconds(),
|
||||
)
|
||||
|
||||
st, en := r.Timestamps(ts)
|
||||
start := st.UnixMilli()
|
||||
end := en.UnixMilli()
|
||||
|
||||
compositeQuery := r.Condition().CompositeQuery
|
||||
|
||||
if compositeQuery.PanelType != v3.PanelTypeGraph {
|
||||
compositeQuery.PanelType = v3.PanelTypeGraph
|
||||
}
|
||||
|
||||
// default mode
|
||||
return &v3.QueryRangeParamsV3{
|
||||
Start: start,
|
||||
End: end,
|
||||
Step: int64(math.Max(float64(common.MinAllowedStepInterval(start, end)), 60)),
|
||||
CompositeQuery: compositeQuery,
|
||||
Variables: make(map[string]interface{}, 0),
|
||||
NoCache: false,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (r *AnomalyRule) prepareQueryRangeV5(ctx context.Context, ts time.Time) (*qbtypes.QueryRangeRequest, error) {
|
||||
func (r *AnomalyRule) prepareQueryRange(ctx context.Context, ts time.Time) (*qbtypes.QueryRangeRequest, error) {
|
||||
|
||||
r.logger.InfoContext(ctx, "prepare query range request v5", "ts", ts.UnixMilli(), "eval_window", r.EvalWindow().Milliseconds(), "eval_delay", r.EvalDelay().Milliseconds())
|
||||
|
||||
@@ -207,7 +134,7 @@ func (r *AnomalyRule) prepareQueryRangeV5(ctx context.Context, ts time.Time) (*q
|
||||
}
|
||||
|
||||
func (r *AnomalyRule) GetSelectedQuery() string {
|
||||
return r.Condition().GetSelectedQueryName()
|
||||
return r.Condition().SelectedQueryName()
|
||||
}
|
||||
|
||||
func (r *AnomalyRule) buildAndRunQuery(ctx context.Context, orgID valuer.UUID, ts time.Time) (ruletypes.Vector, error) {
|
||||
@@ -216,20 +143,16 @@ func (r *AnomalyRule) buildAndRunQuery(ctx context.Context, orgID valuer.UUID, t
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
err = r.PopulateTemporality(ctx, orgID, params)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("internal error while setting temporality")
|
||||
}
|
||||
|
||||
anomalies, err := r.provider.GetAnomalies(ctx, orgID, &anomaly.GetAnomaliesRequest{
|
||||
Params: params,
|
||||
anomalies, err := r.provider.GetAnomalies(ctx, orgID, &anomaly.AnomaliesRequest{
|
||||
Params: *params,
|
||||
Seasonality: r.seasonality,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var queryResult *v3.Result
|
||||
var queryResult *qbtypes.TimeSeriesData
|
||||
for _, result := range anomalies.Results {
|
||||
if result.QueryName == r.GetSelectedQuery() {
|
||||
queryResult = result
|
||||
@@ -237,74 +160,26 @@ func (r *AnomalyRule) buildAndRunQuery(ctx context.Context, orgID valuer.UUID, t
|
||||
}
|
||||
}
|
||||
|
||||
hasData := len(queryResult.AnomalyScores) > 0
|
||||
if missingDataAlert := r.HandleMissingDataAlert(ctx, ts, hasData); missingDataAlert != nil {
|
||||
return ruletypes.Vector{*missingDataAlert}, nil
|
||||
}
|
||||
|
||||
var resultVector ruletypes.Vector
|
||||
|
||||
scoresJSON, _ := json.Marshal(queryResult.AnomalyScores)
|
||||
r.logger.InfoContext(ctx, "anomaly scores", "scores", string(scoresJSON))
|
||||
|
||||
for _, series := range queryResult.AnomalyScores {
|
||||
if !r.Condition().ShouldEval(series) {
|
||||
r.logger.InfoContext(ctx, "not enough data points to evaluate series, skipping", "ruleid", r.ID(), "numPoints", len(series.Points), "requiredPoints", r.Condition().RequiredNumPoints)
|
||||
continue
|
||||
}
|
||||
results, err := r.Threshold.Eval(*series, r.Unit(), ruletypes.EvalData{
|
||||
ActiveAlerts: r.ActiveAlertsLabelFP(),
|
||||
SendUnmatched: r.ShouldSendUnmatched(),
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
resultVector = append(resultVector, results...)
|
||||
}
|
||||
return resultVector, nil
|
||||
}
|
||||
|
||||
func (r *AnomalyRule) buildAndRunQueryV5(ctx context.Context, orgID valuer.UUID, ts time.Time) (ruletypes.Vector, error) {
|
||||
|
||||
params, err := r.prepareQueryRangeV5(ctx, ts)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
anomalies, err := r.providerV2.GetAnomalies(ctx, orgID, &anomalyV2.AnomaliesRequest{
|
||||
Params: *params,
|
||||
Seasonality: anomalyV2.Seasonality{String: valuer.NewString(r.seasonality.String())},
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var qbResult *qbtypes.TimeSeriesData
|
||||
for _, result := range anomalies.Results {
|
||||
if result.QueryName == r.GetSelectedQuery() {
|
||||
qbResult = result
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if qbResult == nil {
|
||||
if queryResult == nil {
|
||||
r.logger.WarnContext(ctx, "nil qb result", "ts", ts.UnixMilli())
|
||||
return ruletypes.Vector{}, nil
|
||||
}
|
||||
|
||||
queryResult := transition.ConvertV5TimeSeriesDataToV4Result(qbResult)
|
||||
hasData := len(queryResult.Aggregations) > 0 &&
|
||||
queryResult.Aggregations[0] != nil &&
|
||||
len(queryResult.Aggregations[0].AnomalyScores) > 0
|
||||
|
||||
hasData := len(queryResult.AnomalyScores) > 0
|
||||
if missingDataAlert := r.HandleMissingDataAlert(ctx, ts, hasData); missingDataAlert != nil {
|
||||
return ruletypes.Vector{*missingDataAlert}, nil
|
||||
}
|
||||
|
||||
var resultVector ruletypes.Vector
|
||||
|
||||
scoresJSON, _ := json.Marshal(queryResult.AnomalyScores)
|
||||
scoresJSON, _ := json.Marshal(queryResult.Aggregations[0].AnomalyScores)
|
||||
r.logger.InfoContext(ctx, "anomaly scores", "scores", string(scoresJSON))
|
||||
|
||||
// Filter out new series if newGroupEvalDelay is configured
|
||||
seriesToProcess := queryResult.AnomalyScores
|
||||
seriesToProcess := queryResult.Aggregations[0].AnomalyScores
|
||||
if r.ShouldSkipNewGroups() {
|
||||
filteredSeries, filterErr := r.BaseRule.FilterNewSeries(ctx, ts, seriesToProcess)
|
||||
// In case of error we log the error and continue with the original series
|
||||
@@ -317,10 +192,10 @@ func (r *AnomalyRule) buildAndRunQueryV5(ctx context.Context, orgID valuer.UUID,
|
||||
|
||||
for _, series := range seriesToProcess {
|
||||
if !r.Condition().ShouldEval(series) {
|
||||
r.logger.InfoContext(ctx, "not enough data points to evaluate series, skipping", "ruleid", r.ID(), "numPoints", len(series.Points), "requiredPoints", r.Condition().RequiredNumPoints)
|
||||
r.logger.InfoContext(ctx, "not enough data points to evaluate series, skipping", "ruleid", r.ID(), "numPoints", len(series.Values), "requiredPoints", r.Condition().RequiredNumPoints)
|
||||
continue
|
||||
}
|
||||
results, err := r.Threshold.Eval(*series, r.Unit(), ruletypes.EvalData{
|
||||
results, err := r.Threshold.Eval(series, r.Unit(), ruletypes.EvalData{
|
||||
ActiveAlerts: r.ActiveAlertsLabelFP(),
|
||||
SendUnmatched: r.ShouldSendUnmatched(),
|
||||
})
|
||||
@@ -341,13 +216,9 @@ func (r *AnomalyRule) Eval(ctx context.Context, ts time.Time) (int, error) {
|
||||
var res ruletypes.Vector
|
||||
var err error
|
||||
|
||||
if r.version == "v5" {
|
||||
r.logger.InfoContext(ctx, "running v5 query")
|
||||
res, err = r.buildAndRunQueryV5(ctx, r.OrgID(), ts)
|
||||
} else {
|
||||
r.logger.InfoContext(ctx, "running v4 query")
|
||||
res, err = r.buildAndRunQuery(ctx, r.OrgID(), ts)
|
||||
}
|
||||
r.logger.InfoContext(ctx, "running v5 query")
|
||||
res, err = r.buildAndRunQuery(ctx, r.OrgID(), ts)
|
||||
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
@@ -386,7 +257,6 @@ func (r *AnomalyRule) Eval(ctx context.Context, ts time.Time) (int, error) {
|
||||
defs+text,
|
||||
"__alert_"+r.Name(),
|
||||
tmplData,
|
||||
times.Time(timestamp.FromTime(ts)),
|
||||
nil,
|
||||
)
|
||||
result, err := tmpl.Expand()
|
||||
@@ -397,24 +267,24 @@ func (r *AnomalyRule) Eval(ctx context.Context, ts time.Time) (int, error) {
|
||||
return result
|
||||
}
|
||||
|
||||
lb := labels.NewBuilder(smpl.Metric).Del(labels.MetricNameLabel).Del(labels.TemporalityLabel)
|
||||
resultLabels := labels.NewBuilder(smpl.Metric).Del(labels.MetricNameLabel).Del(labels.TemporalityLabel).Labels()
|
||||
lb := ruletypes.NewBuilder(smpl.Metric...).Del(ruletypes.MetricNameLabel).Del(ruletypes.TemporalityLabel)
|
||||
resultLabels := ruletypes.NewBuilder(smpl.Metric...).Del(ruletypes.MetricNameLabel).Del(ruletypes.TemporalityLabel).Labels()
|
||||
|
||||
for name, value := range r.Labels().Map() {
|
||||
lb.Set(name, expand(value))
|
||||
}
|
||||
|
||||
lb.Set(labels.AlertNameLabel, r.Name())
|
||||
lb.Set(labels.AlertRuleIdLabel, r.ID())
|
||||
lb.Set(labels.RuleSourceLabel, r.GeneratorURL())
|
||||
lb.Set(ruletypes.AlertNameLabel, r.Name())
|
||||
lb.Set(ruletypes.AlertRuleIDLabel, r.ID())
|
||||
lb.Set(ruletypes.RuleSourceLabel, r.GeneratorURL())
|
||||
|
||||
annotations := make(labels.Labels, 0, len(r.Annotations().Map()))
|
||||
annotations := make(ruletypes.Labels, 0, len(r.Annotations().Map()))
|
||||
for name, value := range r.Annotations().Map() {
|
||||
annotations = append(annotations, labels.Label{Name: name, Value: expand(value)})
|
||||
annotations = append(annotations, ruletypes.Label{Name: name, Value: expand(value)})
|
||||
}
|
||||
if smpl.IsMissing {
|
||||
lb.Set(labels.AlertNameLabel, "[No data] "+r.Name())
|
||||
lb.Set(labels.NoDataLabel, "true")
|
||||
lb.Set(ruletypes.AlertNameLabel, "[No data] "+r.Name())
|
||||
lb.Set(ruletypes.NoDataLabel, "true")
|
||||
}
|
||||
|
||||
lbs := lb.Labels()
|
||||
@@ -423,16 +293,16 @@ func (r *AnomalyRule) Eval(ctx context.Context, ts time.Time) (int, error) {
|
||||
|
||||
if _, ok := alerts[h]; ok {
|
||||
r.logger.ErrorContext(ctx, "the alert query returns duplicate records", "rule_id", r.ID(), "alert", alerts[h])
|
||||
err = fmt.Errorf("duplicate alert found, vector contains metrics with the same labelset after applying alert labels")
|
||||
err = errors.NewInternalf(errors.CodeInternal, "duplicate alert found, vector contains metrics with the same labelset after applying alert labels")
|
||||
return 0, err
|
||||
}
|
||||
|
||||
alerts[h] = &ruletypes.Alert{
|
||||
Labels: lbs,
|
||||
QueryResultLables: resultLabels,
|
||||
QueryResultLabels: resultLabels,
|
||||
Annotations: annotations,
|
||||
ActiveAt: ts,
|
||||
State: model.StatePending,
|
||||
State: ruletypes.StatePending,
|
||||
Value: smpl.V,
|
||||
GeneratorURL: r.GeneratorURL(),
|
||||
Receivers: ruleReceiverMap[lbs.Map()[ruletypes.LabelThresholdName]],
|
||||
@@ -446,7 +316,7 @@ func (r *AnomalyRule) Eval(ctx context.Context, ts time.Time) (int, error) {
|
||||
for h, a := range alerts {
|
||||
// Check whether we already have alerting state for the identifying label set.
|
||||
// Update the last value and annotations if so, create a new alert entry otherwise.
|
||||
if alert, ok := r.Active[h]; ok && alert.State != model.StateInactive {
|
||||
if alert, ok := r.Active[h]; ok && alert.State != ruletypes.StateInactive {
|
||||
|
||||
alert.Value = a.Value
|
||||
alert.Annotations = a.Annotations
|
||||
@@ -462,76 +332,76 @@ func (r *AnomalyRule) Eval(ctx context.Context, ts time.Time) (int, error) {
|
||||
r.Active[h] = a
|
||||
}
|
||||
|
||||
itemsToAdd := []model.RuleStateHistory{}
|
||||
itemsToAdd := []rulestatehistorytypes.RuleStateHistory{}
|
||||
|
||||
// Check if any pending alerts should be removed or fire now. Write out alert timeseries.
|
||||
for fp, a := range r.Active {
|
||||
labelsJSON, err := json.Marshal(a.QueryResultLables)
|
||||
labelsJSON, err := json.Marshal(a.QueryResultLabels)
|
||||
if err != nil {
|
||||
r.logger.ErrorContext(ctx, "error marshaling labels", errors.Attr(err), "labels", a.Labels)
|
||||
}
|
||||
if _, ok := resultFPs[fp]; !ok {
|
||||
// If the alert was previously firing, keep it around for a given
|
||||
// retention time so it is reported as resolved to the AlertManager.
|
||||
if a.State == model.StatePending || (!a.ResolvedAt.IsZero() && ts.Sub(a.ResolvedAt) > ruletypes.ResolvedRetention) {
|
||||
if a.State == ruletypes.StatePending || (!a.ResolvedAt.IsZero() && ts.Sub(a.ResolvedAt) > ruletypes.ResolvedRetention) {
|
||||
delete(r.Active, fp)
|
||||
}
|
||||
if a.State != model.StateInactive {
|
||||
a.State = model.StateInactive
|
||||
if a.State != ruletypes.StateInactive {
|
||||
a.State = ruletypes.StateInactive
|
||||
a.ResolvedAt = ts
|
||||
itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
|
||||
itemsToAdd = append(itemsToAdd, rulestatehistorytypes.RuleStateHistory{
|
||||
RuleID: r.ID(),
|
||||
RuleName: r.Name(),
|
||||
State: model.StateInactive,
|
||||
State: ruletypes.StateInactive,
|
||||
StateChanged: true,
|
||||
UnixMilli: ts.UnixMilli(),
|
||||
Labels: model.LabelsString(labelsJSON),
|
||||
Fingerprint: a.QueryResultLables.Hash(),
|
||||
Labels: rulestatehistorytypes.LabelsString(labelsJSON),
|
||||
Fingerprint: a.QueryResultLabels.Hash(),
|
||||
Value: a.Value,
|
||||
})
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if a.State == model.StatePending && ts.Sub(a.ActiveAt) >= r.HoldDuration().Duration() {
|
||||
a.State = model.StateFiring
|
||||
if a.State == ruletypes.StatePending && ts.Sub(a.ActiveAt) >= r.HoldDuration().Duration() {
|
||||
a.State = ruletypes.StateFiring
|
||||
a.FiredAt = ts
|
||||
state := model.StateFiring
|
||||
state := ruletypes.StateFiring
|
||||
if a.Missing {
|
||||
state = model.StateNoData
|
||||
state = ruletypes.StateNoData
|
||||
}
|
||||
itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
|
||||
itemsToAdd = append(itemsToAdd, rulestatehistorytypes.RuleStateHistory{
|
||||
RuleID: r.ID(),
|
||||
RuleName: r.Name(),
|
||||
State: state,
|
||||
StateChanged: true,
|
||||
UnixMilli: ts.UnixMilli(),
|
||||
Labels: model.LabelsString(labelsJSON),
|
||||
Fingerprint: a.QueryResultLables.Hash(),
|
||||
Labels: rulestatehistorytypes.LabelsString(labelsJSON),
|
||||
Fingerprint: a.QueryResultLabels.Hash(),
|
||||
Value: a.Value,
|
||||
})
|
||||
}
|
||||
|
||||
// We need to change firing alert to recovering if the returned sample meets recovery threshold
|
||||
changeFiringToRecovering := a.State == model.StateFiring && a.IsRecovering
|
||||
changeFiringToRecovering := a.State == ruletypes.StateFiring && a.IsRecovering
|
||||
// We need to change recovering alerts to firing if the returned sample meets target threshold
|
||||
changeRecoveringToFiring := a.State == model.StateRecovering && !a.IsRecovering && !a.Missing
|
||||
changeRecoveringToFiring := a.State == ruletypes.StateRecovering && !a.IsRecovering && !a.Missing
|
||||
// in any of the above case we need to update the status of alert
|
||||
if changeFiringToRecovering || changeRecoveringToFiring {
|
||||
state := model.StateRecovering
|
||||
state := ruletypes.StateRecovering
|
||||
if changeRecoveringToFiring {
|
||||
state = model.StateFiring
|
||||
state = ruletypes.StateFiring
|
||||
}
|
||||
a.State = state
|
||||
r.logger.DebugContext(ctx, "converting alert state", "name", r.Name(), "state", state)
|
||||
itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
|
||||
itemsToAdd = append(itemsToAdd, rulestatehistorytypes.RuleStateHistory{
|
||||
RuleID: r.ID(),
|
||||
RuleName: r.Name(),
|
||||
State: state,
|
||||
StateChanged: true,
|
||||
UnixMilli: ts.UnixMilli(),
|
||||
Labels: model.LabelsString(labelsJSON),
|
||||
Fingerprint: a.QueryResultLables.Hash(),
|
||||
Labels: rulestatehistorytypes.LabelsString(labelsJSON),
|
||||
Fingerprint: a.QueryResultLabels.Hash(),
|
||||
Value: a.Value,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -2,21 +2,19 @@ package rules
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log/slog"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/SigNoz/signoz/ee/query-service/anomaly"
|
||||
"github.com/SigNoz/signoz/pkg/instrumentation/instrumentationtest"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/app/clickhouseReader"
|
||||
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
|
||||
"github.com/SigNoz/signoz/pkg/telemetrystore"
|
||||
"github.com/SigNoz/signoz/pkg/telemetrystore/telemetrystoretest"
|
||||
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
|
||||
"github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
|
||||
"github.com/SigNoz/signoz/ee/anomaly"
|
||||
)
|
||||
|
||||
// mockAnomalyProvider is a mock implementation of anomaly.Provider for testing.
|
||||
@@ -24,13 +22,13 @@ import (
|
||||
// time periods (current, past period, current season, past season, past 2 seasons,
|
||||
// past 3 seasons), making it cumbersome to create mock data.
|
||||
type mockAnomalyProvider struct {
|
||||
responses []*anomaly.GetAnomaliesResponse
|
||||
responses []*anomaly.AnomaliesResponse
|
||||
callCount int
|
||||
}
|
||||
|
||||
func (m *mockAnomalyProvider) GetAnomalies(ctx context.Context, orgID valuer.UUID, req *anomaly.GetAnomaliesRequest) (*anomaly.GetAnomaliesResponse, error) {
|
||||
func (m *mockAnomalyProvider) GetAnomalies(ctx context.Context, orgID valuer.UUID, req *anomaly.AnomaliesRequest) (*anomaly.AnomaliesResponse, error) {
|
||||
if m.callCount >= len(m.responses) {
|
||||
return &anomaly.GetAnomaliesResponse{Results: []*v3.Result{}}, nil
|
||||
return &anomaly.AnomaliesResponse{Results: []*qbtypes.TimeSeriesData{}}, nil
|
||||
}
|
||||
resp := m.responses[m.callCount]
|
||||
m.callCount++
|
||||
@@ -55,39 +53,40 @@ func TestAnomalyRule_NoData_AlertOnAbsent(t *testing.T) {
|
||||
Frequency: valuer.MustParseTextDuration("1m"),
|
||||
}},
|
||||
RuleCondition: &ruletypes.RuleCondition{
|
||||
CompareOp: ruletypes.ValueIsAbove,
|
||||
MatchType: ruletypes.AtleastOnce,
|
||||
Target: &target,
|
||||
CompositeQuery: &v3.CompositeQuery{
|
||||
QueryType: v3.QueryTypeBuilder,
|
||||
BuilderQueries: map[string]*v3.BuilderQuery{
|
||||
"A": {
|
||||
QueryName: "A",
|
||||
Expression: "A",
|
||||
DataSource: v3.DataSourceMetrics,
|
||||
Temporality: v3.Unspecified,
|
||||
CompareOperator: ruletypes.ValueIsAbove,
|
||||
MatchType: ruletypes.AtleastOnce,
|
||||
Target: &target,
|
||||
CompositeQuery: &ruletypes.AlertCompositeQuery{
|
||||
QueryType: ruletypes.QueryTypeBuilder,
|
||||
Queries: []qbtypes.QueryEnvelope{{
|
||||
Type: qbtypes.QueryTypeBuilder,
|
||||
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
|
||||
Name: "A",
|
||||
Signal: telemetrytypes.SignalMetrics,
|
||||
},
|
||||
},
|
||||
}},
|
||||
},
|
||||
SelectedQuery: "A",
|
||||
Seasonality: "daily",
|
||||
Thresholds: &ruletypes.RuleThresholdData{
|
||||
Kind: ruletypes.BasicThresholdKind,
|
||||
Spec: ruletypes.BasicRuleThresholds{{
|
||||
Name: "Test anomaly no data",
|
||||
TargetValue: &target,
|
||||
MatchType: ruletypes.AtleastOnce,
|
||||
CompareOp: ruletypes.ValueIsAbove,
|
||||
Name: "Test anomaly no data",
|
||||
TargetValue: &target,
|
||||
MatchType: ruletypes.AtleastOnce,
|
||||
CompareOperator: ruletypes.ValueIsAbove,
|
||||
}},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
responseNoData := &anomaly.GetAnomaliesResponse{
|
||||
Results: []*v3.Result{
|
||||
responseNoData := &anomaly.AnomaliesResponse{
|
||||
Results: []*qbtypes.TimeSeriesData{
|
||||
{
|
||||
QueryName: "A",
|
||||
AnomalyScores: []*v3.Series{},
|
||||
QueryName: "A",
|
||||
Aggregations: []*qbtypes.AggregationBucket{{
|
||||
AnomalyScores: []*qbtypes.TimeSeries{},
|
||||
}},
|
||||
},
|
||||
},
|
||||
}
|
||||
@@ -115,15 +114,10 @@ func TestAnomalyRule_NoData_AlertOnAbsent(t *testing.T) {
|
||||
t.Run(c.description, func(t *testing.T) {
|
||||
postableRule.RuleCondition.AlertOnAbsent = c.alertOnAbsent
|
||||
|
||||
telemetryStore := telemetrystoretest.New(telemetrystore.Config{}, nil)
|
||||
options := clickhouseReader.NewOptions("primaryNamespace")
|
||||
reader := clickhouseReader.NewReader(slog.Default(), nil, telemetryStore, nil, "", time.Second, nil, nil, options)
|
||||
|
||||
rule, err := NewAnomalyRule(
|
||||
"test-anomaly-rule",
|
||||
valuer.GenerateUUID(),
|
||||
&postableRule,
|
||||
reader,
|
||||
nil,
|
||||
logger,
|
||||
nil,
|
||||
@@ -131,7 +125,7 @@ func TestAnomalyRule_NoData_AlertOnAbsent(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
|
||||
rule.provider = &mockAnomalyProvider{
|
||||
responses: []*anomaly.GetAnomaliesResponse{responseNoData},
|
||||
responses: []*anomaly.AnomaliesResponse{responseNoData},
|
||||
}
|
||||
|
||||
alertsFound, err := rule.Eval(context.Background(), evalTime)
|
||||
@@ -162,40 +156,41 @@ func TestAnomalyRule_NoData_AbsentFor(t *testing.T) {
|
||||
Frequency: valuer.MustParseTextDuration("1m"),
|
||||
}},
|
||||
RuleCondition: &ruletypes.RuleCondition{
|
||||
CompareOp: ruletypes.ValueIsAbove,
|
||||
MatchType: ruletypes.AtleastOnce,
|
||||
AlertOnAbsent: true,
|
||||
Target: &target,
|
||||
CompositeQuery: &v3.CompositeQuery{
|
||||
QueryType: v3.QueryTypeBuilder,
|
||||
BuilderQueries: map[string]*v3.BuilderQuery{
|
||||
"A": {
|
||||
QueryName: "A",
|
||||
Expression: "A",
|
||||
DataSource: v3.DataSourceMetrics,
|
||||
Temporality: v3.Unspecified,
|
||||
CompareOperator: ruletypes.ValueIsAbove,
|
||||
MatchType: ruletypes.AtleastOnce,
|
||||
AlertOnAbsent: true,
|
||||
Target: &target,
|
||||
CompositeQuery: &ruletypes.AlertCompositeQuery{
|
||||
QueryType: ruletypes.QueryTypeBuilder,
|
||||
Queries: []qbtypes.QueryEnvelope{{
|
||||
Type: qbtypes.QueryTypeBuilder,
|
||||
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
|
||||
Name: "A",
|
||||
Signal: telemetrytypes.SignalMetrics,
|
||||
},
|
||||
},
|
||||
}},
|
||||
},
|
||||
SelectedQuery: "A",
|
||||
Seasonality: "daily",
|
||||
Thresholds: &ruletypes.RuleThresholdData{
|
||||
Kind: ruletypes.BasicThresholdKind,
|
||||
Spec: ruletypes.BasicRuleThresholds{{
|
||||
Name: "Test anomaly no data with AbsentFor",
|
||||
TargetValue: &target,
|
||||
MatchType: ruletypes.AtleastOnce,
|
||||
CompareOp: ruletypes.ValueIsAbove,
|
||||
Name: "Test anomaly no data with AbsentFor",
|
||||
TargetValue: &target,
|
||||
MatchType: ruletypes.AtleastOnce,
|
||||
CompareOperator: ruletypes.ValueIsAbove,
|
||||
}},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
responseNoData := &anomaly.GetAnomaliesResponse{
|
||||
Results: []*v3.Result{
|
||||
responseNoData := &anomaly.AnomaliesResponse{
|
||||
Results: []*qbtypes.TimeSeriesData{
|
||||
{
|
||||
QueryName: "A",
|
||||
AnomalyScores: []*v3.Series{},
|
||||
QueryName: "A",
|
||||
Aggregations: []*qbtypes.AggregationBucket{{
|
||||
AnomalyScores: []*qbtypes.TimeSeries{},
|
||||
}},
|
||||
},
|
||||
},
|
||||
}
|
||||
@@ -229,32 +224,35 @@ func TestAnomalyRule_NoData_AbsentFor(t *testing.T) {
|
||||
t1 := baseTime.Add(5 * time.Minute)
|
||||
t2 := t1.Add(c.timeBetweenEvals)
|
||||
|
||||
responseWithData := &anomaly.GetAnomaliesResponse{
|
||||
Results: []*v3.Result{
|
||||
responseWithData := &anomaly.AnomaliesResponse{
|
||||
Results: []*qbtypes.TimeSeriesData{
|
||||
{
|
||||
QueryName: "A",
|
||||
AnomalyScores: []*v3.Series{
|
||||
{
|
||||
Labels: map[string]string{"test": "label"},
|
||||
Points: []v3.Point{
|
||||
{Timestamp: baseTime.UnixMilli(), Value: 1.0},
|
||||
{Timestamp: baseTime.Add(time.Minute).UnixMilli(), Value: 1.5},
|
||||
Aggregations: []*qbtypes.AggregationBucket{{
|
||||
AnomalyScores: []*qbtypes.TimeSeries{
|
||||
{
|
||||
Labels: []*qbtypes.Label{
|
||||
{
|
||||
Key: telemetrytypes.TelemetryFieldKey{Name: "Test"},
|
||||
Value: "labels",
|
||||
},
|
||||
},
|
||||
Values: []*qbtypes.TimeSeriesValue{
|
||||
{Timestamp: baseTime.UnixMilli(), Value: 1.0},
|
||||
{Timestamp: baseTime.Add(time.Minute).UnixMilli(), Value: 1.5},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
telemetryStore := telemetrystoretest.New(telemetrystore.Config{}, nil)
|
||||
options := clickhouseReader.NewOptions("primaryNamespace")
|
||||
reader := clickhouseReader.NewReader(slog.Default(), nil, telemetryStore, nil, "", time.Second, nil, nil, options)
|
||||
|
||||
rule, err := NewAnomalyRule("test-anomaly-rule", valuer.GenerateUUID(), &postableRule, reader, nil, logger, nil)
|
||||
rule, err := NewAnomalyRule("test-anomaly-rule", valuer.GenerateUUID(), &postableRule, nil, logger, nil)
|
||||
require.NoError(t, err)
|
||||
|
||||
rule.provider = &mockAnomalyProvider{
|
||||
responses: []*anomaly.GetAnomaliesResponse{responseWithData, responseNoData},
|
||||
responses: []*anomaly.AnomaliesResponse{responseWithData, responseNoData},
|
||||
}
|
||||
|
||||
alertsFound1, err := rule.Eval(context.Background(), t1)
|
||||
|
||||
@@ -11,9 +11,7 @@ import (
|
||||
"github.com/google/uuid"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
basemodel "github.com/SigNoz/signoz/pkg/query-service/model"
|
||||
baserules "github.com/SigNoz/signoz/pkg/query-service/rules"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/labels"
|
||||
"github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
)
|
||||
@@ -23,7 +21,7 @@ func PrepareTaskFunc(opts baserules.PrepareTaskOptions) (baserules.Task, error)
|
||||
rules := make([]baserules.Rule, 0)
|
||||
var task baserules.Task
|
||||
|
||||
ruleId := baserules.RuleIdFromTaskName(opts.TaskName)
|
||||
ruleID := baserules.RuleIDFromTaskName(opts.TaskName)
|
||||
evaluation, err := opts.Rule.Evaluation.GetEvaluation()
|
||||
if err != nil {
|
||||
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "evaluation is invalid: %v", err)
|
||||
@@ -32,10 +30,9 @@ func PrepareTaskFunc(opts baserules.PrepareTaskOptions) (baserules.Task, error)
|
||||
if opts.Rule.RuleType == ruletypes.RuleTypeThreshold {
|
||||
// create a threshold rule
|
||||
tr, err := baserules.NewThresholdRule(
|
||||
ruleId,
|
||||
ruleID,
|
||||
opts.OrgID,
|
||||
opts.Rule,
|
||||
opts.Reader,
|
||||
opts.Querier,
|
||||
opts.Logger,
|
||||
baserules.WithEvalDelay(opts.ManagerOpts.EvalDelay),
|
||||
@@ -58,11 +55,10 @@ func PrepareTaskFunc(opts baserules.PrepareTaskOptions) (baserules.Task, error)
|
||||
|
||||
// create promql rule
|
||||
pr, err := baserules.NewPromRule(
|
||||
ruleId,
|
||||
ruleID,
|
||||
opts.OrgID,
|
||||
opts.Rule,
|
||||
opts.Logger,
|
||||
opts.Reader,
|
||||
opts.ManagerOpts.Prometheus,
|
||||
baserules.WithSQLStore(opts.SQLStore),
|
||||
baserules.WithQueryParser(opts.ManagerOpts.QueryParser),
|
||||
@@ -82,10 +78,9 @@ func PrepareTaskFunc(opts baserules.PrepareTaskOptions) (baserules.Task, error)
|
||||
} else if opts.Rule.RuleType == ruletypes.RuleTypeAnomaly {
|
||||
// create anomaly rule
|
||||
ar, err := NewAnomalyRule(
|
||||
ruleId,
|
||||
ruleID,
|
||||
opts.OrgID,
|
||||
opts.Rule,
|
||||
opts.Reader,
|
||||
opts.Querier,
|
||||
opts.Logger,
|
||||
opts.Cache,
|
||||
@@ -105,7 +100,7 @@ func PrepareTaskFunc(opts baserules.PrepareTaskOptions) (baserules.Task, error)
|
||||
task = newTask(baserules.TaskTypeCh, opts.TaskName, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc, opts.MaintenanceStore, opts.OrgID)
|
||||
|
||||
} else {
|
||||
return nil, fmt.Errorf("unsupported rule type %s. Supported types: %s, %s", opts.Rule.RuleType, ruletypes.RuleTypeProm, ruletypes.RuleTypeThreshold)
|
||||
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "unsupported rule type %s. Supported types: %s, %s", opts.Rule.RuleType, ruletypes.RuleTypeProm, ruletypes.RuleTypeThreshold)
|
||||
}
|
||||
|
||||
return task, nil
|
||||
@@ -113,12 +108,12 @@ func PrepareTaskFunc(opts baserules.PrepareTaskOptions) (baserules.Task, error)
|
||||
|
||||
// TestNotification prepares a dummy rule for given rule parameters and
|
||||
// sends a test notification. returns alert count and error (if any)
|
||||
func TestNotification(opts baserules.PrepareTestRuleOptions) (int, *basemodel.ApiError) {
|
||||
func TestNotification(opts baserules.PrepareTestRuleOptions) (int, error) {
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
if opts.Rule == nil {
|
||||
return 0, basemodel.BadRequest(fmt.Errorf("rule is required"))
|
||||
return 0, errors.NewInvalidInputf(errors.CodeInvalidInput, "rule is required")
|
||||
}
|
||||
|
||||
parsedRule := opts.Rule
|
||||
@@ -138,15 +133,14 @@ func TestNotification(opts baserules.PrepareTestRuleOptions) (int, *basemodel.Ap
|
||||
if parsedRule.RuleType == ruletypes.RuleTypeThreshold {
|
||||
|
||||
// add special labels for test alerts
|
||||
parsedRule.Labels[labels.RuleSourceLabel] = ""
|
||||
parsedRule.Labels[labels.AlertRuleIdLabel] = ""
|
||||
parsedRule.Labels[ruletypes.RuleSourceLabel] = ""
|
||||
parsedRule.Labels[ruletypes.AlertRuleIDLabel] = ""
|
||||
|
||||
// create a threshold rule
|
||||
rule, err = baserules.NewThresholdRule(
|
||||
alertname,
|
||||
opts.OrgID,
|
||||
parsedRule,
|
||||
opts.Reader,
|
||||
opts.Querier,
|
||||
opts.Logger,
|
||||
baserules.WithSendAlways(),
|
||||
@@ -158,7 +152,7 @@ func TestNotification(opts baserules.PrepareTestRuleOptions) (int, *basemodel.Ap
|
||||
|
||||
if err != nil {
|
||||
slog.Error("failed to prepare a new threshold rule for test", "name", alertname, errors.Attr(err))
|
||||
return 0, basemodel.BadRequest(err)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
} else if parsedRule.RuleType == ruletypes.RuleTypeProm {
|
||||
@@ -169,7 +163,6 @@ func TestNotification(opts baserules.PrepareTestRuleOptions) (int, *basemodel.Ap
|
||||
opts.OrgID,
|
||||
parsedRule,
|
||||
opts.Logger,
|
||||
opts.Reader,
|
||||
opts.ManagerOpts.Prometheus,
|
||||
baserules.WithSendAlways(),
|
||||
baserules.WithSendUnmatched(),
|
||||
@@ -180,7 +173,7 @@ func TestNotification(opts baserules.PrepareTestRuleOptions) (int, *basemodel.Ap
|
||||
|
||||
if err != nil {
|
||||
slog.Error("failed to prepare a new promql rule for test", "name", alertname, errors.Attr(err))
|
||||
return 0, basemodel.BadRequest(err)
|
||||
return 0, err
|
||||
}
|
||||
} else if parsedRule.RuleType == ruletypes.RuleTypeAnomaly {
|
||||
// create anomaly rule
|
||||
@@ -188,7 +181,6 @@ func TestNotification(opts baserules.PrepareTestRuleOptions) (int, *basemodel.Ap
|
||||
alertname,
|
||||
opts.OrgID,
|
||||
parsedRule,
|
||||
opts.Reader,
|
||||
opts.Querier,
|
||||
opts.Logger,
|
||||
opts.Cache,
|
||||
@@ -200,10 +192,10 @@ func TestNotification(opts baserules.PrepareTestRuleOptions) (int, *basemodel.Ap
|
||||
)
|
||||
if err != nil {
|
||||
slog.Error("failed to prepare a new anomaly rule for test", "name", alertname, errors.Attr(err))
|
||||
return 0, basemodel.BadRequest(err)
|
||||
return 0, err
|
||||
}
|
||||
} else {
|
||||
return 0, basemodel.BadRequest(fmt.Errorf("failed to derive ruletype with given information"))
|
||||
return 0, errors.NewInvalidInputf(errors.CodeInvalidInput, "failed to derive ruletype with given information")
|
||||
}
|
||||
|
||||
// set timestamp to current utc time
|
||||
@@ -212,7 +204,7 @@ func TestNotification(opts baserules.PrepareTestRuleOptions) (int, *basemodel.Ap
|
||||
alertsFound, err := rule.Eval(ctx, ts)
|
||||
if err != nil {
|
||||
slog.Error("evaluating rule failed", "rule", rule.Name(), errors.Attr(err))
|
||||
return 0, basemodel.InternalError(fmt.Errorf("rule evaluation failed"))
|
||||
return 0, err
|
||||
}
|
||||
rule.SendAlerts(ctx, ts, 0, time.Minute, opts.NotifyFunc)
|
||||
|
||||
|
||||
@@ -114,11 +114,8 @@ func TestManager_TestNotification_SendUnmatched_ThresholdRule(t *testing.T) {
|
||||
},
|
||||
})
|
||||
|
||||
count, apiErr := mgr.TestNotification(context.Background(), orgID, string(ruleBytes))
|
||||
if apiErr != nil {
|
||||
t.Logf("TestNotification error: %v, type: %s", apiErr.Err, apiErr.Typ)
|
||||
}
|
||||
require.Nil(t, apiErr)
|
||||
count, err := mgr.TestNotification(context.Background(), orgID, string(ruleBytes))
|
||||
require.Nil(t, err)
|
||||
assert.Equal(t, tc.ExpectAlerts, count)
|
||||
|
||||
if tc.ExpectAlerts > 0 {
|
||||
@@ -268,11 +265,8 @@ func TestManager_TestNotification_SendUnmatched_PromRule(t *testing.T) {
|
||||
},
|
||||
})
|
||||
|
||||
count, apiErr := mgr.TestNotification(context.Background(), orgID, string(ruleBytes))
|
||||
if apiErr != nil {
|
||||
t.Logf("TestNotification error: %v, type: %s", apiErr.Err, apiErr.Typ)
|
||||
}
|
||||
require.Nil(t, apiErr)
|
||||
count, err := mgr.TestNotification(context.Background(), orgID, string(ruleBytes))
|
||||
require.Nil(t, err)
|
||||
assert.Equal(t, tc.ExpectAlerts, count)
|
||||
|
||||
if tc.ExpectAlerts > 0 {
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/modules/rulestatehistory"
|
||||
"github.com/SigNoz/signoz/pkg/types/rulestatehistorytypes"
|
||||
"github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
|
||||
)
|
||||
|
||||
@@ -123,8 +124,8 @@ func (m *module) RecordRuleStateHistory(ctx context.Context, ruleID string, hand
|
||||
for _, item := range lastSavedState {
|
||||
currentState, ok := currentItemsByFingerprint[item.Fingerprint]
|
||||
if !ok {
|
||||
if item.State == rulestatehistorytypes.StateFiring || item.State == rulestatehistorytypes.StateNoData {
|
||||
item.State = rulestatehistorytypes.StateInactive
|
||||
if item.State == ruletypes.StateFiring || item.State == ruletypes.StateNoData {
|
||||
item.State = ruletypes.StateInactive
|
||||
item.StateChanged = true
|
||||
item.UnixMilli = time.Now().UnixMilli()
|
||||
revisedItemsToAdd[item.Fingerprint] = item
|
||||
@@ -145,10 +146,10 @@ func (m *module) RecordRuleStateHistory(ctx context.Context, ruleID string, hand
|
||||
}
|
||||
}
|
||||
|
||||
newState := rulestatehistorytypes.StateInactive
|
||||
newState := ruletypes.StateInactive
|
||||
for _, item := range revisedItemsToAdd {
|
||||
if item.State == rulestatehistorytypes.StateFiring || item.State == rulestatehistorytypes.StateNoData {
|
||||
newState = rulestatehistorytypes.StateFiring
|
||||
if item.State == ruletypes.StateFiring || item.State == ruletypes.StateNoData {
|
||||
newState = ruletypes.StateFiring
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
"github.com/SigNoz/signoz/pkg/telemetrystore"
|
||||
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
|
||||
"github.com/SigNoz/signoz/pkg/types/rulestatehistorytypes"
|
||||
"github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
|
||||
sqlbuilder "github.com/huandu/go-sqlbuilder"
|
||||
)
|
||||
@@ -300,7 +301,7 @@ func (s *store) ReadRuleStateHistoryTopContributorsByRuleID(ctx context.Context,
|
||||
sb.From(historyTable())
|
||||
sb.Where(sb.E("rule_id", ruleID))
|
||||
sb.Where(sb.E("state_changed", true))
|
||||
sb.Where(sb.E("state", rulestatehistorytypes.StateFiring.StringValue()))
|
||||
sb.Where(sb.E("state", ruletypes.StateFiring.StringValue()))
|
||||
sb.Where(sb.GE("unix_milli", query.Start))
|
||||
sb.Where(sb.LT("unix_milli", query.End))
|
||||
|
||||
@@ -341,7 +342,7 @@ WHERE rule_id = %s
|
||||
AND unix_milli < %s
|
||||
GROUP BY unix_milli`,
|
||||
innerSB.Var(query.Start),
|
||||
innerSB.Var(rulestatehistorytypes.StateInactive.StringValue()),
|
||||
innerSB.Var(ruletypes.StateInactive.StringValue()),
|
||||
historyTable(),
|
||||
innerSB.Var(ruleID),
|
||||
innerSB.Var(query.Start),
|
||||
@@ -411,7 +412,7 @@ func (s *store) GetTotalTriggers(ctx context.Context, ruleID string, query *rule
|
||||
sb.From(historyTable())
|
||||
sb.Where(sb.E("rule_id", ruleID))
|
||||
sb.Where(sb.E("state_changed", true))
|
||||
sb.Where(sb.E("state", rulestatehistorytypes.StateFiring.StringValue()))
|
||||
sb.Where(sb.E("state", ruletypes.StateFiring.StringValue()))
|
||||
sb.Where(sb.GE("unix_milli", query.Start))
|
||||
sb.Where(sb.LT("unix_milli", query.End))
|
||||
selectQuery, args := sb.BuildWithFlavor(sqlbuilder.ClickHouse)
|
||||
@@ -432,7 +433,7 @@ func (s *store) GetTriggersByInterval(ctx context.Context, ruleID string, query
|
||||
sb.From(historyTable())
|
||||
sb.Where(sb.E("rule_id", ruleID))
|
||||
sb.Where(sb.E("state_changed", true))
|
||||
sb.Where(sb.E("state", rulestatehistorytypes.StateFiring.StringValue()))
|
||||
sb.Where(sb.E("state", ruletypes.StateFiring.StringValue()))
|
||||
sb.Where(sb.GE("unix_milli", query.Start))
|
||||
sb.Where(sb.LT("unix_milli", query.End))
|
||||
sb.GroupBy("ts")
|
||||
@@ -528,7 +529,7 @@ func (s *store) buildMatchedEventsCTE(ruleID string, query *rulestatehistorytype
|
||||
firingSB := sqlbuilder.NewSelectBuilder()
|
||||
firingSB.Select("rule_id", "unix_milli AS firing_time")
|
||||
firingSB.From(historyTable())
|
||||
firingSB.Where(firingSB.E("overall_state", rulestatehistorytypes.StateFiring.StringValue()))
|
||||
firingSB.Where(firingSB.E("overall_state", ruletypes.StateFiring.StringValue()))
|
||||
firingSB.Where(firingSB.E("overall_state_changed", true))
|
||||
firingSB.Where(firingSB.E("rule_id", ruleID))
|
||||
firingSB.Where(firingSB.GE("unix_milli", query.Start))
|
||||
@@ -537,7 +538,7 @@ func (s *store) buildMatchedEventsCTE(ruleID string, query *rulestatehistorytype
|
||||
resolutionSB := sqlbuilder.NewSelectBuilder()
|
||||
resolutionSB.Select("rule_id", "unix_milli AS resolution_time")
|
||||
resolutionSB.From(historyTable())
|
||||
resolutionSB.Where(resolutionSB.E("overall_state", rulestatehistorytypes.StateInactive.StringValue()))
|
||||
resolutionSB.Where(resolutionSB.E("overall_state", ruletypes.StateInactive.StringValue()))
|
||||
resolutionSB.Where(resolutionSB.E("overall_state_changed", true))
|
||||
resolutionSB.Where(resolutionSB.E("rule_id", ruleID))
|
||||
resolutionSB.Where(resolutionSB.GE("unix_milli", query.Start))
|
||||
|
||||
@@ -299,6 +299,36 @@ type ApiResponse struct {
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
// toApiError translates a pkg/errors typed error into the legacy
|
||||
// model.ApiError to preserve the v1 JSON response shape.
|
||||
func toApiError(err error) *model.ApiError {
|
||||
t, _, _, _, _, _ := errors.Unwrapb(err)
|
||||
|
||||
var typ model.ErrorType
|
||||
switch t {
|
||||
case errors.TypeInvalidInput:
|
||||
typ = model.ErrorBadData
|
||||
case errors.TypeNotFound:
|
||||
typ = model.ErrorNotFound
|
||||
case errors.TypeAlreadyExists:
|
||||
typ = model.ErrorConflict
|
||||
case errors.TypeUnauthenticated:
|
||||
typ = model.ErrorUnauthorized
|
||||
case errors.TypeForbidden:
|
||||
typ = model.ErrorForbidden
|
||||
case errors.TypeUnsupported:
|
||||
typ = model.ErrorNotImplemented
|
||||
case errors.TypeTimeout:
|
||||
typ = model.ErrorTimeout
|
||||
case errors.TypeCanceled:
|
||||
typ = model.ErrorCanceled
|
||||
default:
|
||||
typ = model.ErrorInternal
|
||||
}
|
||||
|
||||
return &model.ApiError{Typ: typ, Err: err}
|
||||
}
|
||||
|
||||
// todo(remove): Implemented at render package (github.com/SigNoz/signoz/pkg/http/render) with the new error structure
|
||||
func RespondError(w http.ResponseWriter, apiErr model.BaseApiError, data interface{}) {
|
||||
json := jsoniter.ConfigCompatibleWithStandardLibrary
|
||||
@@ -891,48 +921,6 @@ func (aH *APIHandler) getOverallStateTransitions(w http.ResponseWriter, r *http.
|
||||
aH.Respond(w, stateItems)
|
||||
}
|
||||
|
||||
func (aH *APIHandler) metaForLinks(ctx context.Context, rule *ruletypes.GettableRule) ([]v3.FilterItem, []v3.AttributeKey, map[string]v3.AttributeKey) {
|
||||
filterItems := []v3.FilterItem{}
|
||||
groupBy := []v3.AttributeKey{}
|
||||
keys := make(map[string]v3.AttributeKey)
|
||||
|
||||
if rule.AlertType == ruletypes.AlertTypeLogs {
|
||||
logFields, apiErr := aH.reader.GetLogFieldsFromNames(ctx, logsv3.GetFieldNames(rule.PostableRule.RuleCondition.CompositeQuery))
|
||||
if apiErr == nil {
|
||||
params := &v3.QueryRangeParamsV3{
|
||||
CompositeQuery: rule.RuleCondition.CompositeQuery,
|
||||
}
|
||||
keys = model.GetLogFieldsV3(ctx, params, logFields)
|
||||
} else {
|
||||
aH.logger.ErrorContext(ctx, "failed to get log fields using empty keys", errors.Attr(apiErr))
|
||||
}
|
||||
} else if rule.AlertType == ruletypes.AlertTypeTraces {
|
||||
traceFields, err := aH.reader.GetSpanAttributeKeysByNames(ctx, logsv3.GetFieldNames(rule.PostableRule.RuleCondition.CompositeQuery))
|
||||
if err == nil {
|
||||
keys = traceFields
|
||||
} else {
|
||||
aH.logger.ErrorContext(ctx, "failed to get span attributes using empty keys", errors.Attr(err))
|
||||
}
|
||||
}
|
||||
|
||||
if rule.AlertType == ruletypes.AlertTypeLogs || rule.AlertType == ruletypes.AlertTypeTraces {
|
||||
if rule.RuleCondition.CompositeQuery != nil {
|
||||
if rule.RuleCondition.QueryType() == v3.QueryTypeBuilder {
|
||||
selectedQuery := rule.RuleCondition.GetSelectedQueryName()
|
||||
if rule.RuleCondition.CompositeQuery.BuilderQueries[selectedQuery] != nil &&
|
||||
rule.RuleCondition.CompositeQuery.BuilderQueries[selectedQuery].Filters != nil {
|
||||
filterItems = rule.RuleCondition.CompositeQuery.BuilderQueries[selectedQuery].Filters.Items
|
||||
}
|
||||
if rule.RuleCondition.CompositeQuery.BuilderQueries[selectedQuery] != nil &&
|
||||
rule.RuleCondition.CompositeQuery.BuilderQueries[selectedQuery].GroupBy != nil {
|
||||
groupBy = rule.RuleCondition.CompositeQuery.BuilderQueries[selectedQuery].GroupBy
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return filterItems, groupBy, keys
|
||||
}
|
||||
|
||||
func (aH *APIHandler) getRuleStateHistory(w http.ResponseWriter, r *http.Request) {
|
||||
idStr := mux.Vars(r)["id"]
|
||||
id, err := valuer.NewUUID(idStr)
|
||||
@@ -966,8 +954,6 @@ func (aH *APIHandler) getRuleStateHistory(w http.ResponseWriter, r *http.Request
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
filterItems, groupBy, keys := aH.metaForLinks(r.Context(), rule)
|
||||
newFilters := contextlinks.PrepareFilters(lbls, filterItems, groupBy, keys)
|
||||
end := time.Unix(res.Items[idx].UnixMilli/1000, 0)
|
||||
// why are we subtracting 3 minutes?
|
||||
// the query range is calculated based on the rule's evalWindow and evalDelay
|
||||
@@ -975,54 +961,46 @@ func (aH *APIHandler) getRuleStateHistory(w http.ResponseWriter, r *http.Request
|
||||
// to get the correct query range
|
||||
start := end.Add(-rule.EvalWindow.Duration() - 3*time.Minute)
|
||||
if rule.AlertType == ruletypes.AlertTypeLogs {
|
||||
if rule.Version != "v5" {
|
||||
res.Items[idx].RelatedLogsLink = contextlinks.PrepareLinksToLogs(start, end, newFilters)
|
||||
} else {
|
||||
// TODO(srikanthccv): re-visit this and support multiple queries
|
||||
var q qbtypes.QueryBuilderQuery[qbtypes.LogAggregation]
|
||||
// TODO(srikanthccv): re-visit this and support multiple queries
|
||||
var q qbtypes.QueryBuilderQuery[qbtypes.LogAggregation]
|
||||
|
||||
for _, query := range rule.RuleCondition.CompositeQuery.Queries {
|
||||
if query.Type == qbtypes.QueryTypeBuilder {
|
||||
switch spec := query.Spec.(type) {
|
||||
case qbtypes.QueryBuilderQuery[qbtypes.LogAggregation]:
|
||||
q = spec
|
||||
}
|
||||
for _, query := range rule.RuleCondition.CompositeQuery.Queries {
|
||||
if query.Type == qbtypes.QueryTypeBuilder {
|
||||
switch spec := query.Spec.(type) {
|
||||
case qbtypes.QueryBuilderQuery[qbtypes.LogAggregation]:
|
||||
q = spec
|
||||
}
|
||||
}
|
||||
|
||||
filterExpr := ""
|
||||
if q.Filter != nil && q.Filter.Expression != "" {
|
||||
filterExpr = q.Filter.Expression
|
||||
}
|
||||
|
||||
whereClause := contextlinks.PrepareFilterExpression(lbls, filterExpr, q.GroupBy)
|
||||
|
||||
res.Items[idx].RelatedLogsLink = contextlinks.PrepareLinksToLogsV5(start, end, whereClause)
|
||||
}
|
||||
|
||||
filterExpr := ""
|
||||
if q.Filter != nil && q.Filter.Expression != "" {
|
||||
filterExpr = q.Filter.Expression
|
||||
}
|
||||
|
||||
whereClause := contextlinks.PrepareFilterExpression(lbls, filterExpr, q.GroupBy)
|
||||
|
||||
res.Items[idx].RelatedLogsLink = contextlinks.PrepareLinksToLogsV5(start, end, whereClause)
|
||||
} else if rule.AlertType == ruletypes.AlertTypeTraces {
|
||||
if rule.Version != "v5" {
|
||||
res.Items[idx].RelatedTracesLink = contextlinks.PrepareLinksToTraces(start, end, newFilters)
|
||||
} else {
|
||||
// TODO(srikanthccv): re-visit this and support multiple queries
|
||||
var q qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation]
|
||||
// TODO(srikanthccv): re-visit this and support multiple queries
|
||||
var q qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation]
|
||||
|
||||
for _, query := range rule.RuleCondition.CompositeQuery.Queries {
|
||||
if query.Type == qbtypes.QueryTypeBuilder {
|
||||
switch spec := query.Spec.(type) {
|
||||
case qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation]:
|
||||
q = spec
|
||||
}
|
||||
for _, query := range rule.RuleCondition.CompositeQuery.Queries {
|
||||
if query.Type == qbtypes.QueryTypeBuilder {
|
||||
switch spec := query.Spec.(type) {
|
||||
case qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation]:
|
||||
q = spec
|
||||
}
|
||||
}
|
||||
|
||||
filterExpr := ""
|
||||
if q.Filter != nil && q.Filter.Expression != "" {
|
||||
filterExpr = q.Filter.Expression
|
||||
}
|
||||
|
||||
whereClause := contextlinks.PrepareFilterExpression(lbls, filterExpr, q.GroupBy)
|
||||
res.Items[idx].RelatedTracesLink = contextlinks.PrepareLinksToTracesV5(start, end, whereClause)
|
||||
}
|
||||
|
||||
filterExpr := ""
|
||||
if q.Filter != nil && q.Filter.Expression != "" {
|
||||
filterExpr = q.Filter.Expression
|
||||
}
|
||||
|
||||
whereClause := contextlinks.PrepareFilterExpression(lbls, filterExpr, q.GroupBy)
|
||||
res.Items[idx].RelatedTracesLink = contextlinks.PrepareLinksToTracesV5(start, end, whereClause)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1051,26 +1029,6 @@ func (aH *APIHandler) getRuleStateHistoryTopContributors(w http.ResponseWriter,
|
||||
return
|
||||
}
|
||||
|
||||
rule, err := aH.ruleManager.GetRule(r.Context(), id)
|
||||
if err == nil {
|
||||
for idx := range res {
|
||||
lbls := make(map[string]string)
|
||||
err := json.Unmarshal([]byte(res[idx].Labels), &lbls)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
filterItems, groupBy, keys := aH.metaForLinks(r.Context(), rule)
|
||||
newFilters := contextlinks.PrepareFilters(lbls, filterItems, groupBy, keys)
|
||||
end := time.Unix(params.End/1000, 0)
|
||||
start := time.Unix(params.Start/1000, 0)
|
||||
if rule.AlertType == ruletypes.AlertTypeLogs {
|
||||
res[idx].RelatedLogsLink = contextlinks.PrepareLinksToLogs(start, end, newFilters)
|
||||
} else if rule.AlertType == ruletypes.AlertTypeTraces {
|
||||
res[idx].RelatedTracesLink = contextlinks.PrepareLinksToTraces(start, end, newFilters)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
aH.Respond(w, res)
|
||||
}
|
||||
|
||||
@@ -1301,9 +1259,9 @@ func (aH *APIHandler) testRule(w http.ResponseWriter, r *http.Request) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
|
||||
defer cancel()
|
||||
|
||||
alertCount, apiRrr := aH.ruleManager.TestNotification(ctx, orgID, string(body))
|
||||
if apiRrr != nil {
|
||||
RespondError(w, apiRrr, nil)
|
||||
alertCount, err := aH.ruleManager.TestNotification(ctx, orgID, string(body))
|
||||
if err != nil {
|
||||
RespondError(w, toApiError(err), nil)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -1325,7 +1283,7 @@ func (aH *APIHandler) deleteRule(w http.ResponseWriter, r *http.Request) {
|
||||
RespondError(w, &model.ApiError{Typ: model.ErrorNotFound, Err: fmt.Errorf("rule not found")}, nil)
|
||||
return
|
||||
}
|
||||
RespondError(w, &model.ApiError{Typ: model.ErrorInternal, Err: err}, nil)
|
||||
RespondError(w, toApiError(err), nil)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -1357,7 +1315,7 @@ func (aH *APIHandler) patchRule(w http.ResponseWriter, r *http.Request) {
|
||||
RespondError(w, &model.ApiError{Typ: model.ErrorNotFound, Err: fmt.Errorf("rule not found")}, nil)
|
||||
return
|
||||
}
|
||||
RespondError(w, &model.ApiError{Typ: model.ErrorInternal, Err: err}, nil)
|
||||
RespondError(w, toApiError(err), nil)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -1387,7 +1345,7 @@ func (aH *APIHandler) editRule(w http.ResponseWriter, r *http.Request) {
|
||||
RespondError(w, &model.ApiError{Typ: model.ErrorNotFound, Err: fmt.Errorf("rule not found")}, nil)
|
||||
return
|
||||
}
|
||||
RespondError(w, &model.ApiError{Typ: model.ErrorInternal, Err: err}, nil)
|
||||
RespondError(w, toApiError(err), nil)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -1407,7 +1365,7 @@ func (aH *APIHandler) createRule(w http.ResponseWriter, r *http.Request) {
|
||||
|
||||
rule, err := aH.ruleManager.CreateRule(r.Context(), string(body))
|
||||
if err != nil {
|
||||
RespondError(w, &model.ApiError{Typ: model.ErrorBadData, Err: err}, nil)
|
||||
RespondError(w, toApiError(err), nil)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@@ -345,7 +345,6 @@ func makeRulesManager(
|
||||
MetadataStore: metadataStore,
|
||||
Prometheus: prometheus,
|
||||
Context: context.Background(),
|
||||
Reader: ch,
|
||||
Querier: querier,
|
||||
Logger: providerSettings.Logger,
|
||||
Cache: cache,
|
||||
@@ -354,7 +353,7 @@ func makeRulesManager(
|
||||
Alertmanager: alertmanager,
|
||||
RuleStore: ruleStore,
|
||||
MaintenanceStore: maintenanceStore,
|
||||
SqlStore: sqlstore,
|
||||
SQLStore: sqlstore,
|
||||
QueryParser: queryParser,
|
||||
RuleStateHistoryModule: ruleStateHistoryModule,
|
||||
}
|
||||
|
||||
@@ -2,10 +2,8 @@ package common
|
||||
|
||||
import (
|
||||
"math"
|
||||
"regexp"
|
||||
"sort"
|
||||
"time"
|
||||
"unicode"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/query-service/constants"
|
||||
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
|
||||
@@ -76,23 +74,6 @@ func LCMList(nums []int64) int64 {
|
||||
return result
|
||||
}
|
||||
|
||||
func NormalizeLabelName(name string) string {
|
||||
// See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels
|
||||
|
||||
// Regular expression to match non-alphanumeric characters except underscores
|
||||
reg := regexp.MustCompile(`[^a-zA-Z0-9_]`)
|
||||
|
||||
// Replace all non-alphanumeric characters except underscores with underscores
|
||||
normalized := reg.ReplaceAllString(name, "_")
|
||||
|
||||
// If the first character is not a letter or an underscore, prepend an underscore
|
||||
if len(normalized) > 0 && !unicode.IsLetter(rune(normalized[0])) && normalized[0] != '_' {
|
||||
normalized = "_" + normalized
|
||||
}
|
||||
|
||||
return normalized
|
||||
}
|
||||
|
||||
func GetSeriesFromCachedData(data []querycache.CachedSeriesData, start, end int64) []*v3.Series {
|
||||
series := make(map[uint64]*v3.Series)
|
||||
|
||||
|
||||
@@ -9,12 +9,6 @@ import (
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
"github.com/SigNoz/signoz/pkg/modules/rulestatehistory"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/constants"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/interfaces"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/model"
|
||||
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/labels"
|
||||
qslabels "github.com/SigNoz/signoz/pkg/query-service/utils/labels"
|
||||
"github.com/SigNoz/signoz/pkg/queryparser"
|
||||
"github.com/SigNoz/signoz/pkg/sqlstore"
|
||||
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
|
||||
@@ -51,8 +45,8 @@ type BaseRule struct {
|
||||
|
||||
// holds the static set of labels and annotations for the rule
|
||||
// these are the same for all alerts created for this rule
|
||||
labels qslabels.BaseLabels
|
||||
annotations qslabels.BaseLabels
|
||||
labels ruletypes.Labels
|
||||
annotations ruletypes.Labels
|
||||
// preferredChannels is the list of channels to send the alert to
|
||||
// if the rule is triggered
|
||||
preferredChannels []string
|
||||
@@ -71,8 +65,6 @@ type BaseRule struct {
|
||||
// This is used for missing data alerts.
|
||||
lastTimestampWithDatapoints time.Time
|
||||
|
||||
reader interfaces.Reader
|
||||
|
||||
logger *slog.Logger
|
||||
|
||||
// sendUnmatched sends observed metric values even if they don't match the
|
||||
@@ -82,12 +74,6 @@ type BaseRule struct {
|
||||
// sendAlways will send alert irrespective of resendDelay or other params
|
||||
sendAlways bool
|
||||
|
||||
// TemporalityMap is a map of metric name to temporality to avoid fetching
|
||||
// temporality for the same metric multiple times.
|
||||
// Querying the v4 table on low cardinal temporality column should be fast,
|
||||
// but we can still avoid the query if we have the data in memory.
|
||||
TemporalityMap map[string]map[v3.Temporality]bool
|
||||
|
||||
sqlstore sqlstore.SQLStore
|
||||
|
||||
metadataStore telemetrytypes.MetadataStore
|
||||
@@ -152,9 +138,9 @@ func WithRuleStateHistoryModule(module rulestatehistory.Module) RuleOption {
|
||||
}
|
||||
}
|
||||
|
||||
func NewBaseRule(id string, orgID valuer.UUID, p *ruletypes.PostableRule, reader interfaces.Reader, opts ...RuleOption) (*BaseRule, error) {
|
||||
func NewBaseRule(id string, orgID valuer.UUID, p *ruletypes.PostableRule, opts ...RuleOption) (*BaseRule, error) {
|
||||
if p.RuleCondition == nil || !p.RuleCondition.IsValid() {
|
||||
return nil, fmt.Errorf("invalid rule condition")
|
||||
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid rule condition")
|
||||
}
|
||||
threshold, err := p.RuleCondition.Thresholds.GetRuleThreshold()
|
||||
if err != nil {
|
||||
@@ -173,13 +159,11 @@ func NewBaseRule(id string, orgID valuer.UUID, p *ruletypes.PostableRule, reader
|
||||
typ: p.AlertType,
|
||||
ruleCondition: p.RuleCondition,
|
||||
evalWindow: p.EvalWindow,
|
||||
labels: qslabels.FromMap(p.Labels),
|
||||
annotations: qslabels.FromMap(p.Annotations),
|
||||
labels: ruletypes.FromMap(p.Labels),
|
||||
annotations: ruletypes.FromMap(p.Annotations),
|
||||
preferredChannels: p.PreferredChannels,
|
||||
health: ruletypes.HealthUnknown,
|
||||
Active: map[uint64]*ruletypes.Alert{},
|
||||
reader: reader,
|
||||
TemporalityMap: make(map[string]map[v3.Temporality]bool),
|
||||
Threshold: threshold,
|
||||
evaluation: evaluation,
|
||||
}
|
||||
@@ -200,20 +184,6 @@ func NewBaseRule(id string, orgID valuer.UUID, p *ruletypes.PostableRule, reader
|
||||
return baseRule, nil
|
||||
}
|
||||
|
||||
func (r *BaseRule) matchType() ruletypes.MatchType {
|
||||
if r.ruleCondition == nil {
|
||||
return ruletypes.AtleastOnce
|
||||
}
|
||||
return r.ruleCondition.MatchType
|
||||
}
|
||||
|
||||
func (r *BaseRule) compareOp() ruletypes.CompareOp {
|
||||
if r.ruleCondition == nil {
|
||||
return ruletypes.ValueIsEq
|
||||
}
|
||||
return r.ruleCondition.CompareOp
|
||||
}
|
||||
|
||||
func (r *BaseRule) currentAlerts() []*ruletypes.Alert {
|
||||
r.mtx.Lock()
|
||||
defer r.mtx.Unlock()
|
||||
@@ -245,10 +215,10 @@ func (r *BaseRule) ActiveAlertsLabelFP() map[uint64]struct{} {
|
||||
|
||||
activeAlerts := make(map[uint64]struct{}, len(r.Active))
|
||||
for _, alert := range r.Active {
|
||||
if alert == nil || alert.QueryResultLables == nil {
|
||||
if alert == nil || alert.QueryResultLabels == nil {
|
||||
continue
|
||||
}
|
||||
activeAlerts[alert.QueryResultLables.Hash()] = struct{}{}
|
||||
activeAlerts[alert.QueryResultLabels.Hash()] = struct{}{}
|
||||
}
|
||||
return activeAlerts
|
||||
}
|
||||
@@ -269,8 +239,8 @@ func (r *BaseRule) ID() string { return r.id }
|
||||
func (r *BaseRule) OrgID() valuer.UUID { return r.orgID }
|
||||
func (r *BaseRule) Name() string { return r.name }
|
||||
func (r *BaseRule) Condition() *ruletypes.RuleCondition { return r.ruleCondition }
|
||||
func (r *BaseRule) Labels() qslabels.BaseLabels { return r.labels }
|
||||
func (r *BaseRule) Annotations() qslabels.BaseLabels { return r.annotations }
|
||||
func (r *BaseRule) Labels() ruletypes.Labels { return r.labels }
|
||||
func (r *BaseRule) Annotations() ruletypes.Labels { return r.annotations }
|
||||
func (r *BaseRule) PreferredChannels() []string { return r.preferredChannels }
|
||||
|
||||
func (r *BaseRule) GeneratorURL() string {
|
||||
@@ -278,10 +248,7 @@ func (r *BaseRule) GeneratorURL() string {
|
||||
}
|
||||
|
||||
func (r *BaseRule) Unit() string {
|
||||
if r.ruleCondition != nil && r.ruleCondition.CompositeQuery != nil {
|
||||
return r.ruleCondition.CompositeQuery.Unit
|
||||
}
|
||||
return ""
|
||||
return r.ruleCondition.CompositeQuery.Unit
|
||||
}
|
||||
|
||||
func (r *BaseRule) Timestamps(ts time.Time) (time.Time, time.Time) {
|
||||
@@ -348,10 +315,10 @@ func (r *BaseRule) GetEvaluationTimestamp() time.Time {
|
||||
return r.evaluationTimestamp
|
||||
}
|
||||
|
||||
func (r *BaseRule) State() model.AlertState {
|
||||
maxState := model.StateInactive
|
||||
func (r *BaseRule) State() ruletypes.AlertState {
|
||||
maxState := ruletypes.StateInactive
|
||||
for _, a := range r.Active {
|
||||
if a.State > maxState {
|
||||
if a.State.Severity() > maxState.Severity() {
|
||||
maxState = a.State
|
||||
}
|
||||
}
|
||||
@@ -408,12 +375,12 @@ func (r *BaseRule) ForEachActiveAlert(f func(*ruletypes.Alert)) {
|
||||
}
|
||||
}
|
||||
|
||||
func (r *BaseRule) RecordRuleStateHistory(ctx context.Context, prevState, currentState model.AlertState, itemsToAdd []model.RuleStateHistory) error {
|
||||
func (r *BaseRule) RecordRuleStateHistory(ctx context.Context, prevState, currentState ruletypes.AlertState, itemsToAdd []rulestatehistorytypes.RuleStateHistory) error {
|
||||
if r.ruleStateHistoryModule == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := r.ruleStateHistoryModule.RecordRuleStateHistory(ctx, r.ID(), r.handledRestart, toRuleStateHistoryTypes(itemsToAdd)); err != nil {
|
||||
if err := r.ruleStateHistoryModule.RecordRuleStateHistory(ctx, r.ID(), r.handledRestart, itemsToAdd); err != nil {
|
||||
r.logger.ErrorContext(ctx, "error while recording rule state history", errors.Attr(err), slog.Any("itemsToAdd", itemsToAdd))
|
||||
return err
|
||||
}
|
||||
@@ -422,100 +389,6 @@ func (r *BaseRule) RecordRuleStateHistory(ctx context.Context, prevState, curren
|
||||
return nil
|
||||
}
|
||||
|
||||
// TODO(srikanthccv): remove these when v3 is cleaned up
|
||||
func toRuleStateHistoryTypes(entries []model.RuleStateHistory) []rulestatehistorytypes.RuleStateHistory {
|
||||
converted := make([]rulestatehistorytypes.RuleStateHistory, 0, len(entries))
|
||||
for _, entry := range entries {
|
||||
converted = append(converted, rulestatehistorytypes.RuleStateHistory{
|
||||
RuleID: entry.RuleID,
|
||||
RuleName: entry.RuleName,
|
||||
OverallState: toRuleStateHistoryAlertState(entry.OverallState),
|
||||
OverallStateChanged: entry.OverallStateChanged,
|
||||
State: toRuleStateHistoryAlertState(entry.State),
|
||||
StateChanged: entry.StateChanged,
|
||||
UnixMilli: entry.UnixMilli,
|
||||
Labels: rulestatehistorytypes.LabelsString(entry.Labels),
|
||||
Fingerprint: entry.Fingerprint,
|
||||
Value: entry.Value,
|
||||
})
|
||||
}
|
||||
return converted
|
||||
}
|
||||
|
||||
func toRuleStateHistoryAlertState(state model.AlertState) rulestatehistorytypes.AlertState {
|
||||
switch state {
|
||||
case model.StateInactive:
|
||||
return rulestatehistorytypes.StateInactive
|
||||
case model.StatePending:
|
||||
return rulestatehistorytypes.StatePending
|
||||
case model.StateRecovering:
|
||||
return rulestatehistorytypes.StateRecovering
|
||||
case model.StateFiring:
|
||||
return rulestatehistorytypes.StateFiring
|
||||
case model.StateNoData:
|
||||
return rulestatehistorytypes.StateNoData
|
||||
case model.StateDisabled:
|
||||
return rulestatehistorytypes.StateDisabled
|
||||
default:
|
||||
return rulestatehistorytypes.StateInactive
|
||||
}
|
||||
}
|
||||
|
||||
func (r *BaseRule) PopulateTemporality(ctx context.Context, orgID valuer.UUID, qp *v3.QueryRangeParamsV3) error {
|
||||
missingTemporality := make([]string, 0)
|
||||
metricNameToTemporality := make(map[string]map[v3.Temporality]bool)
|
||||
if qp.CompositeQuery != nil && len(qp.CompositeQuery.BuilderQueries) > 0 {
|
||||
for _, query := range qp.CompositeQuery.BuilderQueries {
|
||||
// if there is no temporality specified in the query but we have it in the map
|
||||
// then use the value from the map
|
||||
if query.Temporality == "" && r.TemporalityMap[query.AggregateAttribute.Key] != nil {
|
||||
// We prefer delta if it is available
|
||||
if r.TemporalityMap[query.AggregateAttribute.Key][v3.Delta] {
|
||||
query.Temporality = v3.Delta
|
||||
} else if r.TemporalityMap[query.AggregateAttribute.Key][v3.Cumulative] {
|
||||
query.Temporality = v3.Cumulative
|
||||
} else {
|
||||
query.Temporality = v3.Unspecified
|
||||
}
|
||||
}
|
||||
// we don't have temporality for this metric
|
||||
if query.DataSource == v3.DataSourceMetrics && query.Temporality == "" {
|
||||
missingTemporality = append(missingTemporality, query.AggregateAttribute.Key)
|
||||
}
|
||||
if _, ok := metricNameToTemporality[query.AggregateAttribute.Key]; !ok {
|
||||
metricNameToTemporality[query.AggregateAttribute.Key] = make(map[v3.Temporality]bool)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var nameToTemporality map[string]map[v3.Temporality]bool
|
||||
var err error
|
||||
|
||||
if len(missingTemporality) > 0 {
|
||||
nameToTemporality, err = r.reader.FetchTemporality(ctx, orgID, missingTemporality)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if qp.CompositeQuery != nil && len(qp.CompositeQuery.BuilderQueries) > 0 {
|
||||
for name := range qp.CompositeQuery.BuilderQueries {
|
||||
query := qp.CompositeQuery.BuilderQueries[name]
|
||||
if query.DataSource == v3.DataSourceMetrics && query.Temporality == "" {
|
||||
if nameToTemporality[query.AggregateAttribute.Key][v3.Delta] {
|
||||
query.Temporality = v3.Delta
|
||||
} else if nameToTemporality[query.AggregateAttribute.Key][v3.Cumulative] {
|
||||
query.Temporality = v3.Cumulative
|
||||
} else {
|
||||
query.Temporality = v3.Unspecified
|
||||
}
|
||||
r.TemporalityMap[query.AggregateAttribute.Key] = nameToTemporality[query.AggregateAttribute.Key]
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ShouldSkipNewGroups returns true if new group filtering should be applied
|
||||
func (r *BaseRule) ShouldSkipNewGroups() bool {
|
||||
return r.newGroupEvalDelay.IsPositive()
|
||||
@@ -523,7 +396,7 @@ func (r *BaseRule) ShouldSkipNewGroups() bool {
|
||||
|
||||
// isFilterNewSeriesSupported checks if the query is supported for new series filtering
|
||||
func (r *BaseRule) isFilterNewSeriesSupported() bool {
|
||||
if r.ruleCondition.CompositeQuery.QueryType == v3.QueryTypeBuilder {
|
||||
if r.ruleCondition.CompositeQuery.QueryType == ruletypes.QueryTypeBuilder {
|
||||
for _, query := range r.ruleCondition.CompositeQuery.Queries {
|
||||
if query.Type != qbtypes.QueryTypeBuilder {
|
||||
continue
|
||||
@@ -592,7 +465,7 @@ func (r *BaseRule) extractMetricAndGroupBys(ctx context.Context) (map[string][]s
|
||||
|
||||
// FilterNewSeries filters out items that are too new based on metadata first_seen timestamps.
|
||||
// Returns the filtered series (old ones) excluding new series that are still within the grace period.
|
||||
func (r *BaseRule) FilterNewSeries(ctx context.Context, ts time.Time, series []*v3.Series) ([]*v3.Series, error) {
|
||||
func (r *BaseRule) FilterNewSeries(ctx context.Context, ts time.Time, series []*qbtypes.TimeSeries) ([]*qbtypes.TimeSeries, error) {
|
||||
// Extract metric names and groupBy keys
|
||||
metricToGroupedFields, err := r.extractMetricAndGroupBys(ctx)
|
||||
if err != nil {
|
||||
@@ -609,14 +482,22 @@ func (r *BaseRule) FilterNewSeries(ctx context.Context, ts time.Time, series []*
|
||||
seriesIdxToLookupKeys := make(map[int][]telemetrytypes.MetricMetadataLookupKey) // series index -> lookup keys
|
||||
|
||||
for i := 0; i < len(series); i++ {
|
||||
metricLabelMap := series[i].Labels
|
||||
|
||||
valueForKey := func(key string) (string, bool) {
|
||||
for _, item := range series[i].Labels {
|
||||
if item.Key.Name == key {
|
||||
return fmt.Sprint(item.Value), true
|
||||
}
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
// Collect groupBy attribute-value pairs for this series
|
||||
seriesKeys := make([]telemetrytypes.MetricMetadataLookupKey, 0)
|
||||
|
||||
for metricName, groupedFields := range metricToGroupedFields {
|
||||
for _, groupByKey := range groupedFields {
|
||||
if attrValue, ok := metricLabelMap[groupByKey]; ok {
|
||||
if attrValue, ok := valueForKey(groupByKey); ok {
|
||||
lookupKey := telemetrytypes.MetricMetadataLookupKey{
|
||||
MetricName: metricName,
|
||||
AttributeName: groupByKey,
|
||||
@@ -656,7 +537,7 @@ func (r *BaseRule) FilterNewSeries(ctx context.Context, ts time.Time, series []*
|
||||
}
|
||||
|
||||
// Filter series based on first_seen + delay
|
||||
filteredSeries := make([]*v3.Series, 0, len(series))
|
||||
filteredSeries := make([]*qbtypes.TimeSeries, 0, len(series))
|
||||
evalTimeMs := ts.UnixMilli()
|
||||
newGroupEvalDelayMs := r.newGroupEvalDelay.Milliseconds()
|
||||
|
||||
@@ -726,9 +607,9 @@ func (r *BaseRule) HandleMissingDataAlert(ctx context.Context, ts time.Time, has
|
||||
}
|
||||
|
||||
r.logger.InfoContext(ctx, "no data found for rule condition", "rule_id", r.ID())
|
||||
lbls := labels.NewBuilder(labels.Labels{})
|
||||
lbls := ruletypes.NewBuilder()
|
||||
if !r.lastTimestampWithDatapoints.IsZero() {
|
||||
lbls.Set(ruletypes.LabelLastSeen, r.lastTimestampWithDatapoints.Format(constants.AlertTimeFormat))
|
||||
lbls.Set(ruletypes.LabelLastSeen, r.lastTimestampWithDatapoints.Format(ruletypes.AlertTimeFormat))
|
||||
}
|
||||
return &ruletypes.Sample{Metric: lbls.Labels(), IsMissing: true}
|
||||
}
|
||||
|
||||
@@ -2,23 +2,14 @@ package rules
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log/slog"
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/cache"
|
||||
"github.com/SigNoz/signoz/pkg/cache/cachetest"
|
||||
"github.com/SigNoz/signoz/pkg/instrumentation/instrumentationtest"
|
||||
"github.com/SigNoz/signoz/pkg/prometheus"
|
||||
"github.com/SigNoz/signoz/pkg/prometheus/prometheustest"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/app/clickhouseReader"
|
||||
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/labels"
|
||||
"github.com/SigNoz/signoz/pkg/queryparser"
|
||||
"github.com/SigNoz/signoz/pkg/telemetrystore"
|
||||
"github.com/SigNoz/signoz/pkg/telemetrystore/telemetrystoretest"
|
||||
"github.com/SigNoz/signoz/pkg/types/metrictypes"
|
||||
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
|
||||
"github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
@@ -27,22 +18,29 @@ import (
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
)
|
||||
|
||||
// createTestSeries creates a *v3.Series with the given labels and optional points
|
||||
// createTestSeries creates a *qbtypes.TimeSeries with the given labels and optional points
|
||||
// so we don't exactly need the points in the series because the labels are used to determine if the series is new or old
|
||||
// we use the labels to create a lookup key for the series and then check the first_seen timestamp for the series in the metadata table
|
||||
func createTestSeries(labels map[string]string, points []v3.Point) *v3.Series {
|
||||
if points == nil {
|
||||
points = []v3.Point{}
|
||||
func createTestSeries(kvMap map[string]string, values []*qbtypes.TimeSeriesValue) *qbtypes.TimeSeries {
|
||||
if values == nil {
|
||||
values = []*qbtypes.TimeSeriesValue{}
|
||||
}
|
||||
return &v3.Series{
|
||||
Labels: labels,
|
||||
Points: points,
|
||||
lbls := make([]*qbtypes.Label, 0)
|
||||
for k, v := range kvMap {
|
||||
lbls = append(lbls, &qbtypes.Label{
|
||||
Key: telemetrytypes.TelemetryFieldKey{Name: k},
|
||||
Value: v,
|
||||
})
|
||||
}
|
||||
return &qbtypes.TimeSeries{
|
||||
Labels: lbls,
|
||||
Values: values,
|
||||
}
|
||||
}
|
||||
|
||||
// seriesEqual compares two v3.Series by their labels
|
||||
// Returns true if the series have the same labels (order doesn't matter)
|
||||
func seriesEqual(s1, s2 *v3.Series) bool {
|
||||
func seriesEqual(s1, s2 *qbtypes.TimeSeries) bool {
|
||||
if s1 == nil && s2 == nil {
|
||||
return true
|
||||
}
|
||||
@@ -117,7 +115,7 @@ func mergeFirstSeenMaps(maps ...map[telemetrytypes.MetricMetadataLookupKey]int64
|
||||
}
|
||||
|
||||
// createPostableRule creates a PostableRule with the given CompositeQuery
|
||||
func createPostableRule(compositeQuery *v3.CompositeQuery) ruletypes.PostableRule {
|
||||
func createPostableRule(compositeQuery *ruletypes.AlertCompositeQuery) ruletypes.PostableRule {
|
||||
return ruletypes.PostableRule{
|
||||
AlertName: "Test Rule",
|
||||
AlertType: ruletypes.AlertTypeMetric,
|
||||
@@ -135,10 +133,10 @@ func createPostableRule(compositeQuery *v3.CompositeQuery) ruletypes.PostableRul
|
||||
Kind: ruletypes.BasicThresholdKind,
|
||||
Spec: ruletypes.BasicRuleThresholds{
|
||||
{
|
||||
Name: "test-threshold",
|
||||
TargetValue: func() *float64 { v := 1.0; return &v }(),
|
||||
CompareOp: ruletypes.ValueIsAbove,
|
||||
MatchType: ruletypes.AtleastOnce,
|
||||
Name: "test-threshold",
|
||||
TargetValue: func() *float64 { v := 1.0; return &v }(),
|
||||
CompareOperator: ruletypes.ValueIsAbove,
|
||||
MatchType: ruletypes.AtleastOnce,
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -149,12 +147,12 @@ func createPostableRule(compositeQuery *v3.CompositeQuery) ruletypes.PostableRul
|
||||
// filterNewSeriesTestCase represents a test case for FilterNewSeries
|
||||
type filterNewSeriesTestCase struct {
|
||||
name string
|
||||
compositeQuery *v3.CompositeQuery
|
||||
series []*v3.Series
|
||||
compositeQuery *ruletypes.AlertCompositeQuery
|
||||
series []*qbtypes.TimeSeries
|
||||
firstSeenMap map[telemetrytypes.MetricMetadataLookupKey]int64
|
||||
newGroupEvalDelay valuer.TextDuration
|
||||
evalTime time.Time
|
||||
expectedFiltered []*v3.Series // series that should be in the final filtered result (old enough)
|
||||
expectedFiltered []*qbtypes.TimeSeries // series that should be in the final filtered result (old enough)
|
||||
expectError bool
|
||||
}
|
||||
|
||||
@@ -170,8 +168,8 @@ func TestBaseRule_FilterNewSeries(t *testing.T) {
|
||||
tests := []filterNewSeriesTestCase{
|
||||
{
|
||||
name: "mixed old and new series - Builder query",
|
||||
compositeQuery: &v3.CompositeQuery{
|
||||
QueryType: v3.QueryTypeBuilder,
|
||||
compositeQuery: &ruletypes.AlertCompositeQuery{
|
||||
QueryType: ruletypes.QueryTypeBuilder,
|
||||
Queries: []qbtypes.QueryEnvelope{
|
||||
{
|
||||
Type: qbtypes.QueryTypeBuilder,
|
||||
@@ -194,7 +192,7 @@ func TestBaseRule_FilterNewSeries(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
series: []*v3.Series{
|
||||
series: []*qbtypes.TimeSeries{
|
||||
createTestSeries(map[string]string{"service_name": "svc-old", "env": "prod"}, nil),
|
||||
createTestSeries(map[string]string{"service_name": "svc-new", "env": "prod"}, nil),
|
||||
createTestSeries(map[string]string{"service_name": "svc-missing", "env": "stage"}, nil),
|
||||
@@ -206,15 +204,15 @@ func TestBaseRule_FilterNewSeries(t *testing.T) {
|
||||
),
|
||||
newGroupEvalDelay: defaultNewGroupEvalDelay,
|
||||
evalTime: defaultEvalTime,
|
||||
expectedFiltered: []*v3.Series{
|
||||
expectedFiltered: []*qbtypes.TimeSeries{
|
||||
createTestSeries(map[string]string{"service_name": "svc-old", "env": "prod"}, nil),
|
||||
createTestSeries(map[string]string{"service_name": "svc-missing", "env": "stage"}, nil),
|
||||
}, // svc-old and svc-missing should be included; svc-new is filtered out
|
||||
},
|
||||
{
|
||||
name: "all new series - PromQL query",
|
||||
compositeQuery: &v3.CompositeQuery{
|
||||
QueryType: v3.QueryTypePromQL,
|
||||
compositeQuery: &ruletypes.AlertCompositeQuery{
|
||||
QueryType: ruletypes.QueryTypePromQL,
|
||||
Queries: []qbtypes.QueryEnvelope{
|
||||
{
|
||||
Type: qbtypes.QueryTypePromQL,
|
||||
@@ -228,7 +226,7 @@ func TestBaseRule_FilterNewSeries(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
series: []*v3.Series{
|
||||
series: []*qbtypes.TimeSeries{
|
||||
createTestSeries(map[string]string{"service_name": "svc-new1", "env": "prod"}, nil),
|
||||
createTestSeries(map[string]string{"service_name": "svc-new2", "env": "stage"}, nil),
|
||||
},
|
||||
@@ -238,12 +236,12 @@ func TestBaseRule_FilterNewSeries(t *testing.T) {
|
||||
),
|
||||
newGroupEvalDelay: defaultNewGroupEvalDelay,
|
||||
evalTime: defaultEvalTime,
|
||||
expectedFiltered: []*v3.Series{}, // all should be filtered out (new series)
|
||||
expectedFiltered: []*qbtypes.TimeSeries{}, // all should be filtered out (new series)
|
||||
},
|
||||
{
|
||||
name: "all old series - ClickHouse query",
|
||||
compositeQuery: &v3.CompositeQuery{
|
||||
QueryType: v3.QueryTypeClickHouseSQL,
|
||||
compositeQuery: &ruletypes.AlertCompositeQuery{
|
||||
QueryType: ruletypes.QueryTypeClickHouseSQL,
|
||||
Queries: []qbtypes.QueryEnvelope{
|
||||
{
|
||||
Type: qbtypes.QueryTypeClickHouseSQL,
|
||||
@@ -255,7 +253,7 @@ func TestBaseRule_FilterNewSeries(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
series: []*v3.Series{
|
||||
series: []*qbtypes.TimeSeries{
|
||||
createTestSeries(map[string]string{"service_name": "svc-old1", "env": "prod"}, nil),
|
||||
createTestSeries(map[string]string{"service_name": "svc-old2", "env": "stage"}, nil),
|
||||
},
|
||||
@@ -265,15 +263,15 @@ func TestBaseRule_FilterNewSeries(t *testing.T) {
|
||||
),
|
||||
newGroupEvalDelay: defaultNewGroupEvalDelay,
|
||||
evalTime: defaultEvalTime,
|
||||
expectedFiltered: []*v3.Series{
|
||||
expectedFiltered: []*qbtypes.TimeSeries{
|
||||
createTestSeries(map[string]string{"service_name": "svc-old1", "env": "prod"}, nil),
|
||||
createTestSeries(map[string]string{"service_name": "svc-old2", "env": "stage"}, nil),
|
||||
}, // all should be included (old series)
|
||||
},
|
||||
{
|
||||
name: "no grouping in query - Builder",
|
||||
compositeQuery: &v3.CompositeQuery{
|
||||
QueryType: v3.QueryTypeBuilder,
|
||||
compositeQuery: &ruletypes.AlertCompositeQuery{
|
||||
QueryType: ruletypes.QueryTypeBuilder,
|
||||
Queries: []qbtypes.QueryEnvelope{
|
||||
{
|
||||
Type: qbtypes.QueryTypeBuilder,
|
||||
@@ -293,20 +291,20 @@ func TestBaseRule_FilterNewSeries(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
series: []*v3.Series{
|
||||
series: []*qbtypes.TimeSeries{
|
||||
createTestSeries(map[string]string{"service_name": "svc1", "env": "prod"}, nil),
|
||||
},
|
||||
firstSeenMap: make(map[telemetrytypes.MetricMetadataLookupKey]int64),
|
||||
newGroupEvalDelay: defaultNewGroupEvalDelay,
|
||||
evalTime: defaultEvalTime,
|
||||
expectedFiltered: []*v3.Series{
|
||||
expectedFiltered: []*qbtypes.TimeSeries{
|
||||
createTestSeries(map[string]string{"service_name": "svc1", "env": "prod"}, nil),
|
||||
}, // early return, no filtering - all series included
|
||||
},
|
||||
{
|
||||
name: "no metric names - Builder",
|
||||
compositeQuery: &v3.CompositeQuery{
|
||||
QueryType: v3.QueryTypeBuilder,
|
||||
compositeQuery: &ruletypes.AlertCompositeQuery{
|
||||
QueryType: ruletypes.QueryTypeBuilder,
|
||||
Queries: []qbtypes.QueryEnvelope{
|
||||
{
|
||||
Type: qbtypes.QueryTypeBuilder,
|
||||
@@ -323,20 +321,20 @@ func TestBaseRule_FilterNewSeries(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
series: []*v3.Series{
|
||||
series: []*qbtypes.TimeSeries{
|
||||
createTestSeries(map[string]string{"service_name": "svc1", "env": "prod"}, nil),
|
||||
},
|
||||
firstSeenMap: make(map[telemetrytypes.MetricMetadataLookupKey]int64),
|
||||
newGroupEvalDelay: defaultNewGroupEvalDelay,
|
||||
evalTime: defaultEvalTime,
|
||||
expectedFiltered: []*v3.Series{
|
||||
expectedFiltered: []*qbtypes.TimeSeries{
|
||||
createTestSeries(map[string]string{"service_name": "svc1", "env": "prod"}, nil),
|
||||
}, // early return, no filtering - all series included
|
||||
},
|
||||
{
|
||||
name: "series with no matching labels - Builder",
|
||||
compositeQuery: &v3.CompositeQuery{
|
||||
QueryType: v3.QueryTypeBuilder,
|
||||
compositeQuery: &ruletypes.AlertCompositeQuery{
|
||||
QueryType: ruletypes.QueryTypeBuilder,
|
||||
Queries: []qbtypes.QueryEnvelope{
|
||||
{
|
||||
Type: qbtypes.QueryTypeBuilder,
|
||||
@@ -359,20 +357,20 @@ func TestBaseRule_FilterNewSeries(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
series: []*v3.Series{
|
||||
series: []*qbtypes.TimeSeries{
|
||||
createTestSeries(map[string]string{"status": "200"}, nil), // no service_name or env
|
||||
},
|
||||
firstSeenMap: make(map[telemetrytypes.MetricMetadataLookupKey]int64),
|
||||
newGroupEvalDelay: defaultNewGroupEvalDelay,
|
||||
evalTime: defaultEvalTime,
|
||||
expectedFiltered: []*v3.Series{
|
||||
expectedFiltered: []*qbtypes.TimeSeries{
|
||||
createTestSeries(map[string]string{"status": "200"}, nil),
|
||||
}, // series included as we can't decide if it's new or old
|
||||
},
|
||||
{
|
||||
name: "series with missing metadata - PromQL",
|
||||
compositeQuery: &v3.CompositeQuery{
|
||||
QueryType: v3.QueryTypePromQL,
|
||||
compositeQuery: &ruletypes.AlertCompositeQuery{
|
||||
QueryType: ruletypes.QueryTypePromQL,
|
||||
Queries: []qbtypes.QueryEnvelope{
|
||||
{
|
||||
Type: qbtypes.QueryTypePromQL,
|
||||
@@ -386,7 +384,7 @@ func TestBaseRule_FilterNewSeries(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
series: []*v3.Series{
|
||||
series: []*qbtypes.TimeSeries{
|
||||
createTestSeries(map[string]string{"service_name": "svc-old", "env": "prod"}, nil),
|
||||
createTestSeries(map[string]string{"service_name": "svc-no-metadata", "env": "prod"}, nil),
|
||||
},
|
||||
@@ -394,15 +392,15 @@ func TestBaseRule_FilterNewSeries(t *testing.T) {
|
||||
// svc-no-metadata has no entry in firstSeenMap
|
||||
newGroupEvalDelay: defaultNewGroupEvalDelay,
|
||||
evalTime: defaultEvalTime,
|
||||
expectedFiltered: []*v3.Series{
|
||||
expectedFiltered: []*qbtypes.TimeSeries{
|
||||
createTestSeries(map[string]string{"service_name": "svc-old", "env": "prod"}, nil),
|
||||
createTestSeries(map[string]string{"service_name": "svc-no-metadata", "env": "prod"}, nil),
|
||||
}, // both should be included - svc-old is old, svc-no-metadata can't be decided
|
||||
},
|
||||
{
|
||||
name: "series with partial metadata - ClickHouse",
|
||||
compositeQuery: &v3.CompositeQuery{
|
||||
QueryType: v3.QueryTypeClickHouseSQL,
|
||||
compositeQuery: &ruletypes.AlertCompositeQuery{
|
||||
QueryType: ruletypes.QueryTypeClickHouseSQL,
|
||||
Queries: []qbtypes.QueryEnvelope{
|
||||
{
|
||||
Type: qbtypes.QueryTypeClickHouseSQL,
|
||||
@@ -414,7 +412,7 @@ func TestBaseRule_FilterNewSeries(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
series: []*v3.Series{
|
||||
series: []*qbtypes.TimeSeries{
|
||||
createTestSeries(map[string]string{"service_name": "svc-partial", "env": "prod"}, nil),
|
||||
},
|
||||
// Only provide metadata for service_name, not env
|
||||
@@ -424,14 +422,14 @@ func TestBaseRule_FilterNewSeries(t *testing.T) {
|
||||
},
|
||||
newGroupEvalDelay: defaultNewGroupEvalDelay,
|
||||
evalTime: defaultEvalTime,
|
||||
expectedFiltered: []*v3.Series{
|
||||
expectedFiltered: []*qbtypes.TimeSeries{
|
||||
createTestSeries(map[string]string{"service_name": "svc-partial", "env": "prod"}, nil),
|
||||
}, // has some metadata, uses max first_seen which is old
|
||||
},
|
||||
{
|
||||
name: "empty series array - Builder",
|
||||
compositeQuery: &v3.CompositeQuery{
|
||||
QueryType: v3.QueryTypeBuilder,
|
||||
compositeQuery: &ruletypes.AlertCompositeQuery{
|
||||
QueryType: ruletypes.QueryTypeBuilder,
|
||||
Queries: []qbtypes.QueryEnvelope{
|
||||
{
|
||||
Type: qbtypes.QueryTypeBuilder,
|
||||
@@ -454,16 +452,16 @@ func TestBaseRule_FilterNewSeries(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
series: []*v3.Series{},
|
||||
series: []*qbtypes.TimeSeries{},
|
||||
firstSeenMap: make(map[telemetrytypes.MetricMetadataLookupKey]int64),
|
||||
newGroupEvalDelay: defaultNewGroupEvalDelay,
|
||||
evalTime: defaultEvalTime,
|
||||
expectedFiltered: []*v3.Series{},
|
||||
expectedFiltered: []*qbtypes.TimeSeries{},
|
||||
},
|
||||
{
|
||||
name: "zero delay - Builder",
|
||||
compositeQuery: &v3.CompositeQuery{
|
||||
QueryType: v3.QueryTypeBuilder,
|
||||
compositeQuery: &ruletypes.AlertCompositeQuery{
|
||||
QueryType: ruletypes.QueryTypeBuilder,
|
||||
Queries: []qbtypes.QueryEnvelope{
|
||||
{
|
||||
Type: qbtypes.QueryTypeBuilder,
|
||||
@@ -486,20 +484,20 @@ func TestBaseRule_FilterNewSeries(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
series: []*v3.Series{
|
||||
series: []*qbtypes.TimeSeries{
|
||||
createTestSeries(map[string]string{"service_name": "svc1", "env": "prod"}, nil),
|
||||
},
|
||||
firstSeenMap: createFirstSeenMap("request_total", defaultGroupByFields, defaultEvalTime, defaultDelay, true, "svc1", "prod"),
|
||||
newGroupEvalDelay: valuer.TextDuration{}, // zero delay
|
||||
evalTime: defaultEvalTime,
|
||||
expectedFiltered: []*v3.Series{
|
||||
expectedFiltered: []*qbtypes.TimeSeries{
|
||||
createTestSeries(map[string]string{"service_name": "svc1", "env": "prod"}, nil),
|
||||
}, // with zero delay, all series pass
|
||||
},
|
||||
{
|
||||
name: "multiple metrics with same groupBy keys - Builder",
|
||||
compositeQuery: &v3.CompositeQuery{
|
||||
QueryType: v3.QueryTypeBuilder,
|
||||
compositeQuery: &ruletypes.AlertCompositeQuery{
|
||||
QueryType: ruletypes.QueryTypeBuilder,
|
||||
Queries: []qbtypes.QueryEnvelope{
|
||||
{
|
||||
Type: qbtypes.QueryTypeBuilder,
|
||||
@@ -527,7 +525,7 @@ func TestBaseRule_FilterNewSeries(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
series: []*v3.Series{
|
||||
series: []*qbtypes.TimeSeries{
|
||||
createTestSeries(map[string]string{"service_name": "svc1", "env": "prod"}, nil),
|
||||
},
|
||||
firstSeenMap: mergeFirstSeenMaps(
|
||||
@@ -536,14 +534,14 @@ func TestBaseRule_FilterNewSeries(t *testing.T) {
|
||||
),
|
||||
newGroupEvalDelay: defaultNewGroupEvalDelay,
|
||||
evalTime: defaultEvalTime,
|
||||
expectedFiltered: []*v3.Series{
|
||||
expectedFiltered: []*qbtypes.TimeSeries{
|
||||
createTestSeries(map[string]string{"service_name": "svc1", "env": "prod"}, nil),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "series with multiple groupBy attributes where one is new and one is old - Builder",
|
||||
compositeQuery: &v3.CompositeQuery{
|
||||
QueryType: v3.QueryTypeBuilder,
|
||||
compositeQuery: &ruletypes.AlertCompositeQuery{
|
||||
QueryType: ruletypes.QueryTypeBuilder,
|
||||
Queries: []qbtypes.QueryEnvelope{
|
||||
{
|
||||
Type: qbtypes.QueryTypeBuilder,
|
||||
@@ -566,7 +564,7 @@ func TestBaseRule_FilterNewSeries(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
series: []*v3.Series{
|
||||
series: []*qbtypes.TimeSeries{
|
||||
createTestSeries(map[string]string{"service_name": "svc1", "env": "prod"}, nil),
|
||||
},
|
||||
// service_name is old, env is new - should use max (new)
|
||||
@@ -576,12 +574,12 @@ func TestBaseRule_FilterNewSeries(t *testing.T) {
|
||||
),
|
||||
newGroupEvalDelay: defaultNewGroupEvalDelay,
|
||||
evalTime: defaultEvalTime,
|
||||
expectedFiltered: []*v3.Series{}, // max first_seen is new, so should be filtered out
|
||||
expectedFiltered: []*qbtypes.TimeSeries{}, // max first_seen is new, so should be filtered out
|
||||
},
|
||||
{
|
||||
name: "Logs query - should skip filtering and return empty skip indexes",
|
||||
compositeQuery: &v3.CompositeQuery{
|
||||
QueryType: v3.QueryTypeBuilder,
|
||||
compositeQuery: &ruletypes.AlertCompositeQuery{
|
||||
QueryType: ruletypes.QueryTypeBuilder,
|
||||
Queries: []qbtypes.QueryEnvelope{
|
||||
{
|
||||
Type: qbtypes.QueryTypeBuilder,
|
||||
@@ -601,22 +599,22 @@ func TestBaseRule_FilterNewSeries(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
series: []*v3.Series{
|
||||
series: []*qbtypes.TimeSeries{
|
||||
createTestSeries(map[string]string{"service_name": "svc1"}, nil),
|
||||
createTestSeries(map[string]string{"service_name": "svc2"}, nil),
|
||||
},
|
||||
firstSeenMap: make(map[telemetrytypes.MetricMetadataLookupKey]int64),
|
||||
newGroupEvalDelay: defaultNewGroupEvalDelay,
|
||||
evalTime: defaultEvalTime,
|
||||
expectedFiltered: []*v3.Series{
|
||||
expectedFiltered: []*qbtypes.TimeSeries{
|
||||
createTestSeries(map[string]string{"service_name": "svc1"}, nil),
|
||||
createTestSeries(map[string]string{"service_name": "svc2"}, nil),
|
||||
}, // Logs queries should return early, no filtering - all included
|
||||
},
|
||||
{
|
||||
name: "Traces query - should skip filtering and return empty skip indexes",
|
||||
compositeQuery: &v3.CompositeQuery{
|
||||
QueryType: v3.QueryTypeBuilder,
|
||||
compositeQuery: &ruletypes.AlertCompositeQuery{
|
||||
QueryType: ruletypes.QueryTypeBuilder,
|
||||
Queries: []qbtypes.QueryEnvelope{
|
||||
{
|
||||
Type: qbtypes.QueryTypeBuilder,
|
||||
@@ -636,14 +634,14 @@ func TestBaseRule_FilterNewSeries(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
series: []*v3.Series{
|
||||
series: []*qbtypes.TimeSeries{
|
||||
createTestSeries(map[string]string{"service_name": "svc1"}, nil),
|
||||
createTestSeries(map[string]string{"service_name": "svc2"}, nil),
|
||||
},
|
||||
firstSeenMap: make(map[telemetrytypes.MetricMetadataLookupKey]int64),
|
||||
newGroupEvalDelay: defaultNewGroupEvalDelay,
|
||||
evalTime: defaultEvalTime,
|
||||
expectedFiltered: []*v3.Series{
|
||||
expectedFiltered: []*qbtypes.TimeSeries{
|
||||
createTestSeries(map[string]string{"service_name": "svc1"}, nil),
|
||||
createTestSeries(map[string]string{"service_name": "svc2"}, nil),
|
||||
}, // Traces queries should return early, no filtering - all included
|
||||
@@ -655,9 +653,6 @@ func TestBaseRule_FilterNewSeries(t *testing.T) {
|
||||
// Create postableRule from compositeQuery
|
||||
postableRule := createPostableRule(tt.compositeQuery)
|
||||
|
||||
// Setup telemetry store mock
|
||||
telemetryStore := telemetrystoretest.New(telemetrystore.Config{}, &queryMatcherAny{})
|
||||
|
||||
// Setup mock metadata store
|
||||
mockMetadataStore := telemetrytypestest.NewMockMetadataStore()
|
||||
|
||||
@@ -681,37 +676,12 @@ func TestBaseRule_FilterNewSeries(t *testing.T) {
|
||||
// Setup metadata query mock
|
||||
mockMetadataStore.SetFirstSeenFromMetricMetadata(tt.firstSeenMap)
|
||||
|
||||
// Create reader with mocked telemetry store
|
||||
readerCache, err := cachetest.New(
|
||||
cache.Config{
|
||||
Provider: "memory",
|
||||
Memory: cache.Memory{
|
||||
NumCounters: 10 * 1000,
|
||||
MaxCost: 1 << 26,
|
||||
},
|
||||
},
|
||||
)
|
||||
require.NoError(t, err)
|
||||
|
||||
options := clickhouseReader.NewOptions("", "", "archiveNamespace")
|
||||
reader := clickhouseReader.NewReader(
|
||||
slog.Default(),
|
||||
nil,
|
||||
telemetryStore,
|
||||
prometheustest.New(context.Background(), settings, prometheus.Config{Timeout: 2 * time.Minute}, telemetryStore),
|
||||
"",
|
||||
time.Second,
|
||||
nil,
|
||||
readerCache,
|
||||
options,
|
||||
)
|
||||
|
||||
postableRule.NotificationSettings = &ruletypes.NotificationSettings{
|
||||
NewGroupEvalDelay: tt.newGroupEvalDelay,
|
||||
}
|
||||
|
||||
// Create BaseRule using NewBaseRule
|
||||
rule, err := NewBaseRule("test-rule", valuer.GenerateUUID(), &postableRule, reader, WithQueryParser(queryParser), WithLogger(logger), WithMetadataStore(mockMetadataStore))
|
||||
rule, err := NewBaseRule("test-rule", valuer.GenerateUUID(), &postableRule, WithQueryParser(queryParser), WithLogger(logger), WithMetadataStore(mockMetadataStore))
|
||||
require.NoError(t, err)
|
||||
|
||||
filteredSeries, err := rule.FilterNewSeries(context.Background(), tt.evalTime, tt.series)
|
||||
@@ -755,9 +725,13 @@ func TestBaseRule_FilterNewSeries(t *testing.T) {
|
||||
|
||||
// labelsKey creates a deterministic string key from a labels map
|
||||
// This is used to group series by their unique label combinations
|
||||
func labelsKey(lbls map[string]string) string {
|
||||
func labelsKey(lbls []*qbtypes.Label) string {
|
||||
if len(lbls) == 0 {
|
||||
return ""
|
||||
}
|
||||
return labels.FromMap(lbls).String()
|
||||
temp := ruletypes.NewBuilder()
|
||||
for _, item := range lbls {
|
||||
temp.Set(item.Key.Name, fmt.Sprint(item.Value))
|
||||
}
|
||||
return temp.Labels().String()
|
||||
}
|
||||
|
||||
@@ -10,7 +10,7 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/labels"
|
||||
"github.com/SigNoz/signoz/pkg/querier"
|
||||
"github.com/SigNoz/signoz/pkg/queryparser"
|
||||
|
||||
"github.com/go-openapi/strfmt"
|
||||
@@ -21,9 +21,6 @@ import (
|
||||
"github.com/SigNoz/signoz/pkg/modules/organization"
|
||||
"github.com/SigNoz/signoz/pkg/modules/rulestatehistory"
|
||||
"github.com/SigNoz/signoz/pkg/prometheus"
|
||||
querierV5 "github.com/SigNoz/signoz/pkg/querier"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/interfaces"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/model"
|
||||
"github.com/SigNoz/signoz/pkg/sqlstore"
|
||||
"github.com/SigNoz/signoz/pkg/telemetrystore"
|
||||
"github.com/SigNoz/signoz/pkg/types"
|
||||
@@ -39,8 +36,7 @@ type PrepareTaskOptions struct {
|
||||
TaskName string
|
||||
RuleStore ruletypes.RuleStore
|
||||
MaintenanceStore ruletypes.MaintenanceStore
|
||||
Reader interfaces.Reader
|
||||
Querier querierV5.Querier
|
||||
Querier querier.Querier
|
||||
Logger *slog.Logger
|
||||
Cache cache.Cache
|
||||
ManagerOpts *ManagerOptions
|
||||
@@ -53,8 +49,7 @@ type PrepareTestRuleOptions struct {
|
||||
Rule *ruletypes.PostableRule
|
||||
RuleStore ruletypes.RuleStore
|
||||
MaintenanceStore ruletypes.MaintenanceStore
|
||||
Reader interfaces.Reader
|
||||
Querier querierV5.Querier
|
||||
Querier querier.Querier
|
||||
Logger *slog.Logger
|
||||
Cache cache.Cache
|
||||
ManagerOpts *ManagerOptions
|
||||
@@ -65,19 +60,12 @@ type PrepareTestRuleOptions struct {
|
||||
|
||||
const taskNameSuffix = "webAppEditor"
|
||||
|
||||
func RuleIdFromTaskName(n string) string {
|
||||
func RuleIDFromTaskName(n string) string {
|
||||
return strings.Split(n, "-groupname")[0]
|
||||
}
|
||||
|
||||
func prepareTaskName(ruleId interface{}) string {
|
||||
switch ruleId.(type) {
|
||||
case int, int64:
|
||||
return fmt.Sprintf("%d-groupname", ruleId)
|
||||
case string:
|
||||
return fmt.Sprintf("%s-groupname", ruleId)
|
||||
default:
|
||||
return fmt.Sprintf("%v-groupname", ruleId)
|
||||
}
|
||||
func prepareTaskName(ruleID string) string {
|
||||
return fmt.Sprintf("%s-groupname", ruleID)
|
||||
}
|
||||
|
||||
// ManagerOptions bundles options for the Manager.
|
||||
@@ -88,8 +76,7 @@ type ManagerOptions struct {
|
||||
|
||||
Context context.Context
|
||||
ResendDelay time.Duration
|
||||
Reader interfaces.Reader
|
||||
Querier querierV5.Querier
|
||||
Querier querier.Querier
|
||||
Logger *slog.Logger
|
||||
Cache cache.Cache
|
||||
|
||||
@@ -98,12 +85,12 @@ type ManagerOptions struct {
|
||||
RuleStateHistoryModule rulestatehistory.Module
|
||||
|
||||
PrepareTaskFunc func(opts PrepareTaskOptions) (Task, error)
|
||||
PrepareTestRuleFunc func(opts PrepareTestRuleOptions) (int, *model.ApiError)
|
||||
PrepareTestRuleFunc func(opts PrepareTestRuleOptions) (int, error)
|
||||
Alertmanager alertmanager.Alertmanager
|
||||
OrgGetter organization.Getter
|
||||
RuleStore ruletypes.RuleStore
|
||||
MaintenanceStore ruletypes.MaintenanceStore
|
||||
SqlStore sqlstore.SQLStore
|
||||
SQLStore sqlstore.SQLStore
|
||||
QueryParser queryparser.QueryParser
|
||||
}
|
||||
|
||||
@@ -119,10 +106,9 @@ type Manager struct {
|
||||
maintenanceStore ruletypes.MaintenanceStore
|
||||
|
||||
logger *slog.Logger
|
||||
reader interfaces.Reader
|
||||
cache cache.Cache
|
||||
prepareTaskFunc func(opts PrepareTaskOptions) (Task, error)
|
||||
prepareTestRuleFunc func(opts PrepareTestRuleOptions) (int, *model.ApiError)
|
||||
prepareTestRuleFunc func(opts PrepareTestRuleOptions) (int, error)
|
||||
|
||||
alertmanager alertmanager.Alertmanager
|
||||
sqlstore sqlstore.SQLStore
|
||||
@@ -152,7 +138,7 @@ func defaultPrepareTaskFunc(opts PrepareTaskOptions) (Task, error) {
|
||||
rules := make([]Rule, 0)
|
||||
var task Task
|
||||
|
||||
ruleId := RuleIdFromTaskName(opts.TaskName)
|
||||
ruleID := RuleIDFromTaskName(opts.TaskName)
|
||||
|
||||
evaluation, err := opts.Rule.Evaluation.GetEvaluation()
|
||||
if err != nil {
|
||||
@@ -162,10 +148,9 @@ func defaultPrepareTaskFunc(opts PrepareTaskOptions) (Task, error) {
|
||||
if opts.Rule.RuleType == ruletypes.RuleTypeThreshold {
|
||||
// create a threshold rule
|
||||
tr, err := NewThresholdRule(
|
||||
ruleId,
|
||||
ruleID,
|
||||
opts.OrgID,
|
||||
opts.Rule,
|
||||
opts.Reader,
|
||||
opts.Querier,
|
||||
opts.Logger,
|
||||
WithEvalDelay(opts.ManagerOpts.EvalDelay),
|
||||
@@ -188,11 +173,10 @@ func defaultPrepareTaskFunc(opts PrepareTaskOptions) (Task, error) {
|
||||
|
||||
// create promql rule
|
||||
pr, err := NewPromRule(
|
||||
ruleId,
|
||||
ruleID,
|
||||
opts.OrgID,
|
||||
opts.Rule,
|
||||
opts.Logger,
|
||||
opts.Reader,
|
||||
opts.ManagerOpts.Prometheus,
|
||||
WithSQLStore(opts.SQLStore),
|
||||
WithQueryParser(opts.ManagerOpts.QueryParser),
|
||||
@@ -210,7 +194,7 @@ func defaultPrepareTaskFunc(opts PrepareTaskOptions) (Task, error) {
|
||||
task = newTask(TaskTypeProm, opts.TaskName, taskNameSuffix, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc, opts.MaintenanceStore, opts.OrgID)
|
||||
|
||||
} else {
|
||||
return nil, fmt.Errorf("unsupported rule type %s. Supported types: %s, %s", opts.Rule.RuleType, ruletypes.RuleTypeProm, ruletypes.RuleTypeThreshold)
|
||||
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "unsupported rule type %s. Supported types: %s, %s", opts.Rule.RuleType, ruletypes.RuleTypeProm, ruletypes.RuleTypeThreshold)
|
||||
}
|
||||
|
||||
return task, nil
|
||||
@@ -229,13 +213,12 @@ func NewManager(o *ManagerOptions) (*Manager, error) {
|
||||
opts: o,
|
||||
block: make(chan struct{}),
|
||||
logger: o.Logger,
|
||||
reader: o.Reader,
|
||||
cache: o.Cache,
|
||||
prepareTaskFunc: o.PrepareTaskFunc,
|
||||
prepareTestRuleFunc: o.PrepareTestRuleFunc,
|
||||
alertmanager: o.Alertmanager,
|
||||
orgGetter: o.OrgGetter,
|
||||
sqlstore: o.SqlStore,
|
||||
sqlstore: o.SQLStore,
|
||||
queryParser: o.QueryParser,
|
||||
}
|
||||
|
||||
@@ -292,6 +275,7 @@ func (m *Manager) initiate(ctx context.Context) error {
|
||||
loadErrors = append(loadErrors, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if parsedRule.NotificationSettings != nil {
|
||||
config := parsedRule.NotificationSettings.GetAlertManagerNotificationConfig()
|
||||
err = m.alertmanager.SetNotificationConfig(ctx, org.ID, rec.ID.StringValue(), &config)
|
||||
@@ -413,7 +397,6 @@ func (m *Manager) editTask(_ context.Context, orgID valuer.UUID, rule *ruletypes
|
||||
TaskName: taskName,
|
||||
RuleStore: m.ruleStore,
|
||||
MaintenanceStore: m.maintenanceStore,
|
||||
Reader: m.reader,
|
||||
Querier: m.opts.Querier,
|
||||
Logger: m.opts.Logger,
|
||||
Cache: m.cache,
|
||||
@@ -461,7 +444,7 @@ func (m *Manager) DeleteRule(ctx context.Context, idStr string) error {
|
||||
id, err := valuer.NewUUID(idStr)
|
||||
if err != nil {
|
||||
m.logger.Error("delete rule received a rule id in invalid format, must be a valid uuid-v7", "id", idStr, errors.Attr(err))
|
||||
return fmt.Errorf("delete rule received an rule id in invalid format, must be a valid uuid-v7")
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "delete rule received an rule id in invalid format, must be a valid uuid-v7")
|
||||
}
|
||||
|
||||
claims, err := authtypes.ClaimsFromContext(ctx)
|
||||
@@ -526,7 +509,7 @@ func (m *Manager) deleteTask(taskName string) {
|
||||
if ok {
|
||||
oldg.Stop()
|
||||
delete(m.tasks, taskName)
|
||||
delete(m.rules, RuleIdFromTaskName(taskName))
|
||||
delete(m.rules, RuleIDFromTaskName(taskName))
|
||||
m.logger.Debug("rule task deleted", "name", taskName)
|
||||
} else {
|
||||
m.logger.Info("rule not found for deletion", "name", taskName)
|
||||
@@ -622,7 +605,6 @@ func (m *Manager) addTask(_ context.Context, orgID valuer.UUID, rule *ruletypes.
|
||||
TaskName: taskName,
|
||||
RuleStore: m.ruleStore,
|
||||
MaintenanceStore: m.maintenanceStore,
|
||||
Reader: m.reader,
|
||||
Querier: m.opts.Querier,
|
||||
Logger: m.opts.Logger,
|
||||
Cache: m.cache,
|
||||
@@ -644,7 +626,7 @@ func (m *Manager) addTask(_ context.Context, orgID valuer.UUID, rule *ruletypes.
|
||||
// If there is another task with the same identifier, raise an error
|
||||
_, ok := m.tasks[taskName]
|
||||
if ok {
|
||||
return fmt.Errorf("a rule with the same name already exists")
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "a rule with the same name already exists")
|
||||
}
|
||||
|
||||
go func() {
|
||||
@@ -766,7 +748,7 @@ func (m *Manager) prepareTestNotifyFunc() NotifyFunc {
|
||||
if len(alerts) == 0 {
|
||||
return
|
||||
}
|
||||
ruleID := alerts[0].Labels.Map()[labels.AlertRuleIdLabel]
|
||||
ruleID := alerts[0].Labels.Map()[ruletypes.AlertRuleIDLabel]
|
||||
receiverMap := make(map[*alertmanagertypes.PostableAlert][]string)
|
||||
for _, alert := range alerts {
|
||||
generatorURL := alert.GeneratorURL
|
||||
@@ -775,7 +757,7 @@ func (m *Manager) prepareTestNotifyFunc() NotifyFunc {
|
||||
a.Annotations = alert.Annotations.Map()
|
||||
a.StartsAt = strfmt.DateTime(alert.FiredAt)
|
||||
labelsMap := alert.Labels.Map()
|
||||
labelsMap[labels.TestAlertLabel] = "true"
|
||||
labelsMap[ruletypes.TestAlertLabel] = "true"
|
||||
a.Alert = alertmanagertypes.AlertModel{
|
||||
Labels: labelsMap,
|
||||
GeneratorURL: strfmt.URI(generatorURL),
|
||||
@@ -832,7 +814,7 @@ func (m *Manager) ListRuleStates(ctx context.Context) (*ruletypes.GettableRules,
|
||||
|
||||
// fetch state of rule from memory
|
||||
if rm, ok := m.rules[ruleResponse.Id]; !ok {
|
||||
ruleResponse.State = model.StateDisabled
|
||||
ruleResponse.State = ruletypes.StateDisabled
|
||||
ruleResponse.Disabled = true
|
||||
} else {
|
||||
ruleResponse.State = rm.State()
|
||||
@@ -861,7 +843,7 @@ func (m *Manager) GetRule(ctx context.Context, id valuer.UUID) (*ruletypes.Getta
|
||||
r.Id = id.StringValue()
|
||||
// fetch state of rule from memory
|
||||
if rm, ok := m.rules[r.Id]; !ok {
|
||||
r.State = model.StateDisabled
|
||||
r.State = ruletypes.StateDisabled
|
||||
r.Disabled = true
|
||||
} else {
|
||||
r.State = rm.State()
|
||||
@@ -972,7 +954,7 @@ func (m *Manager) PatchRule(ctx context.Context, ruleStr string, id valuer.UUID)
|
||||
|
||||
// fetch state of rule from memory
|
||||
if rm, ok := m.rules[id.StringValue()]; !ok {
|
||||
response.State = model.StateDisabled
|
||||
response.State = ruletypes.StateDisabled
|
||||
response.Disabled = true
|
||||
} else {
|
||||
response.State = rm.State()
|
||||
@@ -983,11 +965,11 @@ func (m *Manager) PatchRule(ctx context.Context, ruleStr string, id valuer.UUID)
|
||||
|
||||
// TestNotification prepares a dummy rule for given rule parameters and
|
||||
// sends a test notification. returns alert count and error (if any)
|
||||
func (m *Manager) TestNotification(ctx context.Context, orgID valuer.UUID, ruleStr string) (int, *model.ApiError) {
|
||||
func (m *Manager) TestNotification(ctx context.Context, orgID valuer.UUID, ruleStr string) (int, error) {
|
||||
parsedRule := ruletypes.PostableRule{}
|
||||
err := json.Unmarshal([]byte(ruleStr), &parsedRule)
|
||||
if err != nil {
|
||||
return 0, model.BadRequest(err)
|
||||
return 0, errors.WrapInvalidInputf(err, errors.CodeInvalidInput, "failed to unmarshal rule")
|
||||
}
|
||||
if !parsedRule.NotificationSettings.UsePolicy {
|
||||
parsedRule.NotificationSettings.GroupBy = append(parsedRule.NotificationSettings.GroupBy, ruletypes.LabelThresholdName)
|
||||
@@ -995,17 +977,13 @@ func (m *Manager) TestNotification(ctx context.Context, orgID valuer.UUID, ruleS
|
||||
config := parsedRule.NotificationSettings.GetAlertManagerNotificationConfig()
|
||||
err = m.alertmanager.SetNotificationConfig(ctx, orgID, parsedRule.AlertName, &config)
|
||||
if err != nil {
|
||||
return 0, &model.ApiError{
|
||||
Typ: model.ErrorBadData,
|
||||
Err: err,
|
||||
}
|
||||
return 0, err
|
||||
}
|
||||
|
||||
alertCount, apiErr := m.prepareTestRuleFunc(PrepareTestRuleOptions{
|
||||
alertCount, err := m.prepareTestRuleFunc(PrepareTestRuleOptions{
|
||||
Rule: &parsedRule,
|
||||
RuleStore: m.ruleStore,
|
||||
MaintenanceStore: m.maintenanceStore,
|
||||
Reader: m.reader,
|
||||
Querier: m.opts.Querier,
|
||||
Logger: m.opts.Logger,
|
||||
Cache: m.cache,
|
||||
@@ -1015,83 +993,5 @@ func (m *Manager) TestNotification(ctx context.Context, orgID valuer.UUID, ruleS
|
||||
OrgID: orgID,
|
||||
})
|
||||
|
||||
return alertCount, apiErr
|
||||
}
|
||||
|
||||
func (m *Manager) GetAlertDetailsForMetricNames(ctx context.Context, metricNames []string) (map[string][]ruletypes.GettableRule, *model.ApiError) {
|
||||
claims, err := authtypes.ClaimsFromContext(ctx)
|
||||
if err != nil {
|
||||
return nil, &model.ApiError{Typ: model.ErrorExec, Err: err}
|
||||
}
|
||||
|
||||
result := make(map[string][]ruletypes.GettableRule)
|
||||
rules, err := m.ruleStore.GetStoredRules(ctx, claims.OrgID)
|
||||
if err != nil {
|
||||
m.logger.ErrorContext(ctx, "error getting stored rules", errors.Attr(err))
|
||||
return nil, &model.ApiError{Typ: model.ErrorExec, Err: err}
|
||||
}
|
||||
|
||||
metricRulesMap := make(map[string][]ruletypes.GettableRule)
|
||||
|
||||
for _, storedRule := range rules {
|
||||
var rule ruletypes.GettableRule
|
||||
err = json.Unmarshal([]byte(storedRule.Data), &rule)
|
||||
if err != nil {
|
||||
m.logger.ErrorContext(ctx, "failed to unmarshal rule from db", "id", storedRule.ID.StringValue(), errors.Attr(err))
|
||||
continue
|
||||
}
|
||||
|
||||
if rule.AlertType != ruletypes.AlertTypeMetric || rule.RuleCondition == nil || rule.RuleCondition.CompositeQuery == nil {
|
||||
continue
|
||||
}
|
||||
rule.Id = storedRule.ID.StringValue()
|
||||
rule.CreatedAt = &storedRule.CreatedAt
|
||||
rule.CreatedBy = &storedRule.CreatedBy
|
||||
rule.UpdatedAt = &storedRule.UpdatedAt
|
||||
rule.UpdatedBy = &storedRule.UpdatedBy
|
||||
|
||||
for _, query := range rule.RuleCondition.CompositeQuery.BuilderQueries {
|
||||
if query.AggregateAttribute.Key != "" {
|
||||
metricRulesMap[query.AggregateAttribute.Key] = append(metricRulesMap[query.AggregateAttribute.Key], rule)
|
||||
}
|
||||
}
|
||||
|
||||
for _, query := range rule.RuleCondition.CompositeQuery.PromQueries {
|
||||
if query.Query != "" {
|
||||
for _, metricName := range metricNames {
|
||||
if strings.Contains(query.Query, metricName) {
|
||||
metricRulesMap[metricName] = append(metricRulesMap[metricName], rule)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for _, query := range rule.RuleCondition.CompositeQuery.ClickHouseQueries {
|
||||
if query.Query != "" {
|
||||
for _, metricName := range metricNames {
|
||||
if strings.Contains(query.Query, metricName) {
|
||||
metricRulesMap[metricName] = append(metricRulesMap[metricName], rule)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for _, metricName := range metricNames {
|
||||
if rules, exists := metricRulesMap[metricName]; exists {
|
||||
seen := make(map[string]bool)
|
||||
uniqueRules := make([]ruletypes.GettableRule, 0)
|
||||
|
||||
for _, rule := range rules {
|
||||
if !seen[rule.Id] {
|
||||
seen[rule.Id] = true
|
||||
uniqueRules = append(uniqueRules, rule)
|
||||
}
|
||||
}
|
||||
|
||||
result[metricName] = uniqueRules
|
||||
}
|
||||
}
|
||||
|
||||
return result, nil
|
||||
return alertCount, err
|
||||
}
|
||||
|
||||
@@ -110,11 +110,8 @@ func TestManager_TestNotification_SendUnmatched_ThresholdRule(t *testing.T) {
|
||||
},
|
||||
})
|
||||
|
||||
count, apiErr := mgr.TestNotification(context.Background(), orgID, string(ruleBytes))
|
||||
if apiErr != nil {
|
||||
t.Logf("TestNotification error: %v, type: %s", apiErr.Err, apiErr.Typ)
|
||||
}
|
||||
require.Nil(t, apiErr)
|
||||
count, err := mgr.TestNotification(context.Background(), orgID, string(ruleBytes))
|
||||
require.Nil(t, err)
|
||||
assert.Equal(t, tc.ExpectAlerts, count)
|
||||
|
||||
if tc.ExpectAlerts > 0 {
|
||||
@@ -209,13 +206,13 @@ func TestManager_TestNotification_SendUnmatched_PromRule(t *testing.T) {
|
||||
// Create fingerprint data
|
||||
fingerprint := uint64(12345)
|
||||
labelsJSON := `{"__name__":"test_metric"}`
|
||||
fingerprintData := [][]interface{}{
|
||||
fingerprintData := [][]any{
|
||||
{fingerprint, labelsJSON},
|
||||
}
|
||||
fingerprintRows := cmock.NewRows(fingerprintCols, fingerprintData)
|
||||
|
||||
// Create samples data from test case values, calculating timestamps relative to baseTime
|
||||
validSamplesData := make([][]interface{}, 0)
|
||||
validSamplesData := make([][]any, 0)
|
||||
for _, v := range tc.Values {
|
||||
// Skip NaN and Inf values in the samples data
|
||||
if math.IsNaN(v.Value) || math.IsInf(v.Value, 0) {
|
||||
@@ -223,7 +220,7 @@ func TestManager_TestNotification_SendUnmatched_PromRule(t *testing.T) {
|
||||
}
|
||||
// Calculate timestamp relative to baseTime
|
||||
sampleTimestamp := baseTime.Add(v.Offset).UnixMilli()
|
||||
validSamplesData = append(validSamplesData, []interface{}{
|
||||
validSamplesData = append(validSamplesData, []any{
|
||||
"test_metric",
|
||||
fingerprint,
|
||||
sampleTimestamp,
|
||||
@@ -263,11 +260,8 @@ func TestManager_TestNotification_SendUnmatched_PromRule(t *testing.T) {
|
||||
},
|
||||
})
|
||||
|
||||
count, apiErr := mgr.TestNotification(context.Background(), orgID, string(ruleBytes))
|
||||
if apiErr != nil {
|
||||
t.Logf("TestNotification error: %v, type: %s", apiErr.Err, apiErr.Typ)
|
||||
}
|
||||
require.Nil(t, apiErr)
|
||||
count, err := mgr.TestNotification(context.Background(), orgID, string(ruleBytes))
|
||||
require.Nil(t, err)
|
||||
assert.Equal(t, tc.ExpectAlerts, count)
|
||||
|
||||
if tc.ExpectAlerts > 0 {
|
||||
|
||||
@@ -4,7 +4,6 @@ import (
|
||||
"math"
|
||||
"time"
|
||||
|
||||
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
|
||||
"github.com/SigNoz/signoz/pkg/types/metrictypes"
|
||||
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
|
||||
ruletypes "github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
@@ -15,7 +14,7 @@ import (
|
||||
// ThresholdRuleTestCase defines test case structure for threshold rule test notifications
|
||||
type ThresholdRuleTestCase struct {
|
||||
Name string
|
||||
Values [][]interface{}
|
||||
Values [][]any
|
||||
ExpectAlerts int
|
||||
ExpectValue float64
|
||||
}
|
||||
@@ -52,11 +51,11 @@ func ThresholdRuleAtLeastOnceValueAbove(target float64, recovery *float64) rulet
|
||||
},
|
||||
Version: "v5",
|
||||
RuleCondition: &ruletypes.RuleCondition{
|
||||
MatchType: ruletypes.AtleastOnce,
|
||||
CompareOp: ruletypes.ValueIsAbove,
|
||||
Target: &target,
|
||||
CompositeQuery: &v3.CompositeQuery{
|
||||
QueryType: v3.QueryTypeBuilder,
|
||||
MatchType: ruletypes.AtleastOnce,
|
||||
CompareOperator: ruletypes.ValueIsAbove,
|
||||
Target: &target,
|
||||
CompositeQuery: &ruletypes.AlertCompositeQuery{
|
||||
QueryType: ruletypes.QueryTypeBuilder,
|
||||
Queries: []qbtypes.QueryEnvelope{
|
||||
{
|
||||
Type: qbtypes.QueryTypeBuilder,
|
||||
@@ -80,11 +79,11 @@ func ThresholdRuleAtLeastOnceValueAbove(target float64, recovery *float64) rulet
|
||||
Kind: ruletypes.BasicThresholdKind,
|
||||
Spec: ruletypes.BasicRuleThresholds{
|
||||
{
|
||||
Name: "primary",
|
||||
TargetValue: &target,
|
||||
RecoveryTarget: recovery,
|
||||
MatchType: ruletypes.AtleastOnce,
|
||||
CompareOp: ruletypes.ValueIsAbove,
|
||||
Name: "primary",
|
||||
TargetValue: &target,
|
||||
RecoveryTarget: recovery,
|
||||
MatchType: ruletypes.AtleastOnce,
|
||||
CompareOperator: ruletypes.ValueIsAbove,
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -111,13 +110,13 @@ func BuildPromAtLeastOnceValueAbove(target float64, recovery *float64) ruletypes
|
||||
},
|
||||
Version: "v5",
|
||||
RuleCondition: &ruletypes.RuleCondition{
|
||||
MatchType: ruletypes.AtleastOnce,
|
||||
SelectedQuery: "A",
|
||||
CompareOp: ruletypes.ValueIsAbove,
|
||||
Target: &target,
|
||||
CompositeQuery: &v3.CompositeQuery{
|
||||
QueryType: v3.QueryTypePromQL,
|
||||
PanelType: v3.PanelTypeGraph,
|
||||
MatchType: ruletypes.AtleastOnce,
|
||||
SelectedQuery: "A",
|
||||
CompareOperator: ruletypes.ValueIsAbove,
|
||||
Target: &target,
|
||||
CompositeQuery: &ruletypes.AlertCompositeQuery{
|
||||
QueryType: ruletypes.QueryTypePromQL,
|
||||
PanelType: ruletypes.PanelTypeGraph,
|
||||
Queries: []qbtypes.QueryEnvelope{
|
||||
{
|
||||
Type: qbtypes.QueryTypePromQL,
|
||||
@@ -134,12 +133,12 @@ func BuildPromAtLeastOnceValueAbove(target float64, recovery *float64) ruletypes
|
||||
Kind: ruletypes.BasicThresholdKind,
|
||||
Spec: ruletypes.BasicRuleThresholds{
|
||||
{
|
||||
Name: "primary",
|
||||
TargetValue: &target,
|
||||
RecoveryTarget: recovery,
|
||||
MatchType: ruletypes.AtleastOnce,
|
||||
CompareOp: ruletypes.ValueIsAbove,
|
||||
Channels: []string{"slack"},
|
||||
Name: "primary",
|
||||
TargetValue: &target,
|
||||
RecoveryTarget: recovery,
|
||||
MatchType: ruletypes.AtleastOnce,
|
||||
CompareOperator: ruletypes.ValueIsAbove,
|
||||
Channels: []string{"slack"},
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -153,7 +152,7 @@ var (
|
||||
TcTestNotiSendUnmatchedThresholdRule = []ThresholdRuleTestCase{
|
||||
{
|
||||
Name: "return first valid point in case of test notification",
|
||||
Values: [][]interface{}{
|
||||
Values: [][]any{
|
||||
{float64(3), "attr", time.Now()},
|
||||
{float64(4), "attr", time.Now().Add(1 * time.Minute)},
|
||||
},
|
||||
@@ -162,12 +161,12 @@ var (
|
||||
},
|
||||
{
|
||||
Name: "No data in DB so no alerts fired",
|
||||
Values: [][]interface{}{},
|
||||
Values: [][]any{},
|
||||
ExpectAlerts: 0,
|
||||
},
|
||||
{
|
||||
Name: "return first valid point in case of test notification skips NaN and Inf",
|
||||
Values: [][]interface{}{
|
||||
Values: [][]any{
|
||||
{math.NaN(), "attr", time.Now()},
|
||||
{math.Inf(1), "attr", time.Now().Add(1 * time.Minute)},
|
||||
{float64(7), "attr", time.Now().Add(2 * time.Minute)},
|
||||
@@ -177,7 +176,7 @@ var (
|
||||
},
|
||||
{
|
||||
Name: "If found matching alert with given target value, return the alerting value rather than first valid point",
|
||||
Values: [][]interface{}{
|
||||
Values: [][]any{
|
||||
{float64(1), "attr", time.Now()},
|
||||
{float64(2), "attr", time.Now().Add(1 * time.Minute)},
|
||||
{float64(3), "attr", time.Now().Add(2 * time.Minute)},
|
||||
|
||||
@@ -16,7 +16,6 @@ import (
|
||||
"github.com/SigNoz/signoz/pkg/prometheus/prometheustest"
|
||||
"github.com/SigNoz/signoz/pkg/querier"
|
||||
"github.com/SigNoz/signoz/pkg/querier/signozquerier"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/app/clickhouseReader"
|
||||
"github.com/SigNoz/signoz/pkg/sqlstore"
|
||||
"github.com/SigNoz/signoz/pkg/sqlstore/sqlstoretest"
|
||||
"github.com/SigNoz/signoz/pkg/telemetrystore"
|
||||
@@ -88,7 +87,7 @@ func NewTestManager(t *testing.T, testOpts *TestManagerOptions) *Manager {
|
||||
}
|
||||
|
||||
// Create reader with mocked telemetry store
|
||||
readerCache, err := cachetest.New(cache.Config{
|
||||
cache, err := cachetest.New(cache.Config{
|
||||
Provider: "memory",
|
||||
Memory: cache.Memory{
|
||||
NumCounters: 10 * 1000,
|
||||
@@ -97,28 +96,16 @@ func NewTestManager(t *testing.T, testOpts *TestManagerOptions) *Manager {
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
options := clickhouseReader.NewOptions("", "", "archiveNamespace")
|
||||
providerSettings := instrumentationtest.New().ToProviderSettings()
|
||||
prometheus := prometheustest.New(context.Background(), providerSettings, prometheus.Config{Timeout: 2 * time.Minute}, telemetryStore)
|
||||
reader := clickhouseReader.NewReader(
|
||||
instrumentationtest.New().Logger(),
|
||||
nil,
|
||||
telemetryStore,
|
||||
prometheus,
|
||||
"",
|
||||
time.Duration(time.Second),
|
||||
nil,
|
||||
readerCache,
|
||||
options,
|
||||
)
|
||||
|
||||
flagger, err := flagger.New(context.Background(), instrumentationtest.New().ToProviderSettings(), flagger.Config{}, flagger.MustNewRegistry())
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create flagger: %v", err)
|
||||
}
|
||||
|
||||
// Create mock querierV5 with test values
|
||||
providerFactory := signozquerier.NewFactory(telemetryStore, prometheus, readerCache, flagger)
|
||||
// Create querier with test values
|
||||
providerFactory := signozquerier.NewFactory(telemetryStore, prometheus, cache, flagger)
|
||||
mockQuerier, err := providerFactory.New(context.Background(), providerSettings, querier.Config{})
|
||||
require.NoError(t, err)
|
||||
|
||||
@@ -128,8 +115,7 @@ func NewTestManager(t *testing.T, testOpts *TestManagerOptions) *Manager {
|
||||
Alertmanager: fAlert,
|
||||
Querier: mockQuerier,
|
||||
TelemetryStore: telemetryStore,
|
||||
Reader: reader,
|
||||
SqlStore: sqlStore, // SQLStore needed for SendAlerts to query organizations
|
||||
SQLStore: sqlStore, // SQLStore needed for SendAlerts to query organizations
|
||||
}
|
||||
|
||||
// Call the ManagerOptions hook if provided to allow customization
|
||||
|
||||
@@ -12,14 +12,10 @@ import (
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
"github.com/SigNoz/signoz/pkg/prometheus"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/interfaces"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/model"
|
||||
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
|
||||
qslabels "github.com/SigNoz/signoz/pkg/query-service/utils/labels"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/times"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/timestamp"
|
||||
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
|
||||
"github.com/SigNoz/signoz/pkg/types/rulestatehistorytypes"
|
||||
"github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
|
||||
"github.com/SigNoz/signoz/pkg/units"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
)
|
||||
@@ -37,13 +33,12 @@ func NewPromRule(
|
||||
orgID valuer.UUID,
|
||||
postableRule *ruletypes.PostableRule,
|
||||
logger *slog.Logger,
|
||||
reader interfaces.Reader,
|
||||
prometheus prometheus.Prometheus,
|
||||
opts ...RuleOption,
|
||||
) (*PromRule, error) {
|
||||
opts = append(opts, WithLogger(logger))
|
||||
|
||||
baseRule, err := NewBaseRule(id, orgID, postableRule, reader, opts...)
|
||||
baseRule, err := NewBaseRule(id, orgID, postableRule, opts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -55,7 +50,7 @@ func NewPromRule(
|
||||
}
|
||||
p.logger = logger
|
||||
|
||||
query, err := p.getPqlQuery()
|
||||
query, err := p.getPqlQuery(context.Background())
|
||||
if err != nil {
|
||||
// can not generate a valid prom QL query
|
||||
return nil, err
|
||||
@@ -68,72 +63,45 @@ func (r *PromRule) Type() ruletypes.RuleType {
|
||||
return ruletypes.RuleTypeProm
|
||||
}
|
||||
|
||||
func (r *PromRule) GetSelectedQuery() string {
|
||||
if r.ruleCondition != nil {
|
||||
// If the user has explicitly set the selected query, we return that.
|
||||
if r.ruleCondition.SelectedQuery != "" {
|
||||
return r.ruleCondition.SelectedQuery
|
||||
}
|
||||
// Historically, we used to have only one query in the alerts for promql.
|
||||
// So, if there is only one query, we return that.
|
||||
// This is to maintain backward compatibility.
|
||||
// For new rules, we will have to explicitly set the selected query.
|
||||
return "A"
|
||||
func (r *PromRule) GetSelectedQuery(ctx context.Context) string {
|
||||
if r.ruleCondition.SelectedQuery != "" {
|
||||
return r.ruleCondition.SelectedQuery
|
||||
}
|
||||
// This should never happen.
|
||||
return ""
|
||||
r.logger.WarnContext(ctx, "missing selected query", "rule_name", r.Name())
|
||||
return r.ruleCondition.SelectedQueryName()
|
||||
}
|
||||
|
||||
func (r *PromRule) getPqlQuery() (string, error) {
|
||||
if r.version == "v5" {
|
||||
if len(r.ruleCondition.CompositeQuery.Queries) > 0 {
|
||||
selectedQuery := r.GetSelectedQuery()
|
||||
for _, item := range r.ruleCondition.CompositeQuery.Queries {
|
||||
switch item.Type {
|
||||
case qbtypes.QueryTypePromQL:
|
||||
promQuery, ok := item.Spec.(qbtypes.PromQuery)
|
||||
if !ok {
|
||||
return "", errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid promql query spec %T", item.Spec)
|
||||
}
|
||||
if promQuery.Name == selectedQuery {
|
||||
return promQuery.Query, nil
|
||||
}
|
||||
}
|
||||
func (r *PromRule) getPqlQuery(ctx context.Context) (string, error) {
|
||||
selectedQuery := r.GetSelectedQuery(ctx)
|
||||
for _, item := range r.ruleCondition.CompositeQuery.Queries {
|
||||
switch item.Type {
|
||||
case qbtypes.QueryTypePromQL:
|
||||
promQuery, ok := item.Spec.(qbtypes.PromQuery)
|
||||
if !ok {
|
||||
return "", errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid promql query spec %T", item.Spec)
|
||||
}
|
||||
}
|
||||
return "", fmt.Errorf("invalid promql rule setup")
|
||||
}
|
||||
|
||||
if r.ruleCondition.CompositeQuery.QueryType == v3.QueryTypePromQL {
|
||||
if len(r.ruleCondition.CompositeQuery.PromQueries) > 0 {
|
||||
selectedQuery := r.GetSelectedQuery()
|
||||
if promQuery, ok := r.ruleCondition.CompositeQuery.PromQueries[selectedQuery]; ok {
|
||||
query := promQuery.Query
|
||||
if query == "" {
|
||||
return query, fmt.Errorf("a promquery needs to be set for this rule to function")
|
||||
}
|
||||
return query, nil
|
||||
if promQuery.Name == selectedQuery {
|
||||
return promQuery.Query, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return "", fmt.Errorf("invalid promql rule query")
|
||||
return "", errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid promql rule setup")
|
||||
}
|
||||
|
||||
func (r *PromRule) matrixToV3Series(res promql.Matrix) []*v3.Series {
|
||||
v3Series := make([]*v3.Series, 0, len(res))
|
||||
func (r *PromRule) matrixToCommonSeries(res promql.Matrix) []*qbtypes.TimeSeries {
|
||||
seriesSlice := make([]*qbtypes.TimeSeries, 0, len(res))
|
||||
for _, series := range res {
|
||||
commonSeries := toCommonSeries(series)
|
||||
v3Series = append(v3Series, &commonSeries)
|
||||
seriesSlice = append(seriesSlice, commonSeries)
|
||||
}
|
||||
return v3Series
|
||||
return seriesSlice
|
||||
}
|
||||
|
||||
func (r *PromRule) buildAndRunQuery(ctx context.Context, ts time.Time) (ruletypes.Vector, error) {
|
||||
start, end := r.Timestamps(ts)
|
||||
interval := 60 * time.Second // TODO(srikanthccv): this should be configurable
|
||||
|
||||
q, err := r.getPqlQuery()
|
||||
q, err := r.getPqlQuery(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -145,7 +113,7 @@ func (r *PromRule) buildAndRunQuery(ctx context.Context, ts time.Time) (ruletype
|
||||
return nil, err
|
||||
}
|
||||
|
||||
matrixToProcess := r.matrixToV3Series(res)
|
||||
matrixToProcess := r.matrixToCommonSeries(res)
|
||||
|
||||
hasData := len(matrixToProcess) > 0
|
||||
if missingDataAlert := r.HandleMissingDataAlert(ctx, ts, hasData); missingDataAlert != nil {
|
||||
@@ -169,11 +137,11 @@ func (r *PromRule) buildAndRunQuery(ctx context.Context, ts time.Time) (ruletype
|
||||
if !r.Condition().ShouldEval(series) {
|
||||
r.logger.InfoContext(
|
||||
ctx, "not enough data points to evaluate series, skipping",
|
||||
"rule_id", r.ID(), "num_points", len(series.Points), "required_points", r.Condition().RequiredNumPoints,
|
||||
"rule_id", r.ID(), "num_points", len(series.Values), "required_points", r.Condition().RequiredNumPoints,
|
||||
)
|
||||
continue
|
||||
}
|
||||
resultSeries, err := r.Threshold.Eval(*series, r.Unit(), ruletypes.EvalData{
|
||||
resultSeries, err := r.Threshold.Eval(series, r.Unit(), ruletypes.EvalData{
|
||||
ActiveAlerts: r.ActiveAlertsLabelFP(),
|
||||
SendUnmatched: r.ShouldSendUnmatched(),
|
||||
})
|
||||
@@ -228,7 +196,6 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time) (int, error) {
|
||||
defs+text,
|
||||
"__alert_"+r.Name(),
|
||||
tmplData,
|
||||
times.Time(timestamp.FromTime(ts)),
|
||||
nil,
|
||||
)
|
||||
result, err := tmpl.Expand()
|
||||
@@ -239,24 +206,24 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time) (int, error) {
|
||||
return result
|
||||
}
|
||||
|
||||
lb := qslabels.NewBuilder(result.Metric).Del(qslabels.MetricNameLabel)
|
||||
resultLabels := qslabels.NewBuilder(result.Metric).Del(qslabels.MetricNameLabel).Labels()
|
||||
lb := ruletypes.NewBuilder(result.Metric...).Del(ruletypes.MetricNameLabel)
|
||||
resultLabels := ruletypes.NewBuilder(result.Metric...).Del(ruletypes.MetricNameLabel).Labels()
|
||||
|
||||
for name, value := range r.labels.Map() {
|
||||
lb.Set(name, expand(value))
|
||||
}
|
||||
|
||||
lb.Set(qslabels.AlertNameLabel, r.Name())
|
||||
lb.Set(qslabels.AlertRuleIdLabel, r.ID())
|
||||
lb.Set(qslabels.RuleSourceLabel, r.GeneratorURL())
|
||||
lb.Set(ruletypes.AlertNameLabel, r.Name())
|
||||
lb.Set(ruletypes.AlertRuleIDLabel, r.ID())
|
||||
lb.Set(ruletypes.RuleSourceLabel, r.GeneratorURL())
|
||||
|
||||
annotations := make(qslabels.Labels, 0, len(r.annotations.Map()))
|
||||
annotations := make(ruletypes.Labels, 0, len(r.annotations.Map()))
|
||||
for name, value := range r.annotations.Map() {
|
||||
annotations = append(annotations, qslabels.Label{Name: name, Value: expand(value)})
|
||||
annotations = append(annotations, ruletypes.Label{Name: name, Value: expand(value)})
|
||||
}
|
||||
if result.IsMissing {
|
||||
lb.Set(qslabels.AlertNameLabel, "[No data] "+r.Name())
|
||||
lb.Set(qslabels.NoDataLabel, "true")
|
||||
lb.Set(ruletypes.AlertNameLabel, "[No data] "+r.Name())
|
||||
lb.Set(ruletypes.NoDataLabel, "true")
|
||||
}
|
||||
|
||||
lbs := lb.Labels()
|
||||
@@ -264,7 +231,7 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time) (int, error) {
|
||||
resultFPs[h] = struct{}{}
|
||||
|
||||
if _, ok := alerts[h]; ok {
|
||||
err = fmt.Errorf("vector contains metrics with the same labelset after applying alert labels")
|
||||
err = errors.NewInternalf(errors.CodeInternal, "vector contains metrics with the same labelset after applying alert labels")
|
||||
// We have already acquired the lock above hence using SetHealth and
|
||||
// SetLastError will deadlock.
|
||||
r.health = ruletypes.HealthBad
|
||||
@@ -273,10 +240,10 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time) (int, error) {
|
||||
}
|
||||
alerts[h] = &ruletypes.Alert{
|
||||
Labels: lbs,
|
||||
QueryResultLables: resultLabels,
|
||||
QueryResultLabels: resultLabels,
|
||||
Annotations: annotations,
|
||||
ActiveAt: ts,
|
||||
State: model.StatePending,
|
||||
State: ruletypes.StatePending,
|
||||
Value: result.V,
|
||||
GeneratorURL: r.GeneratorURL(),
|
||||
Receivers: ruleReceiverMap[lbs.Map()[ruletypes.LabelThresholdName]],
|
||||
@@ -290,7 +257,7 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time) (int, error) {
|
||||
for h, a := range alerts {
|
||||
// Check whether we already have alerting state for the identifying label set.
|
||||
// Update the last value and annotations if so, create a new alert entry otherwise.
|
||||
if alert, ok := r.Active[h]; ok && alert.State != model.StateInactive {
|
||||
if alert, ok := r.Active[h]; ok && alert.State != ruletypes.StateInactive {
|
||||
alert.Value = a.Value
|
||||
alert.Annotations = a.Annotations
|
||||
// Update the recovering and missing state of existing alert
|
||||
@@ -306,75 +273,75 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time) (int, error) {
|
||||
|
||||
}
|
||||
|
||||
itemsToAdd := []model.RuleStateHistory{}
|
||||
itemsToAdd := []rulestatehistorytypes.RuleStateHistory{}
|
||||
|
||||
// Check if any pending alerts should be removed or fire now. Write out alert timeseries.
|
||||
for fp, a := range r.Active {
|
||||
labelsJSON, err := json.Marshal(a.QueryResultLables)
|
||||
labelsJSON, err := json.Marshal(a.QueryResultLabels)
|
||||
if err != nil {
|
||||
r.logger.ErrorContext(ctx, "error marshaling labels", errors.Attr(err), "rule_name", r.Name())
|
||||
}
|
||||
if _, ok := resultFPs[fp]; !ok {
|
||||
// If the alert was previously firing, keep it around for a given
|
||||
// retention time so it is reported as resolved to the AlertManager.
|
||||
if a.State == model.StatePending || (!a.ResolvedAt.IsZero() && ts.Sub(a.ResolvedAt) > ruletypes.ResolvedRetention) {
|
||||
if a.State == ruletypes.StatePending || (!a.ResolvedAt.IsZero() && ts.Sub(a.ResolvedAt) > ruletypes.ResolvedRetention) {
|
||||
delete(r.Active, fp)
|
||||
}
|
||||
if a.State != model.StateInactive {
|
||||
a.State = model.StateInactive
|
||||
if a.State != ruletypes.StateInactive {
|
||||
a.State = ruletypes.StateInactive
|
||||
a.ResolvedAt = ts
|
||||
itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
|
||||
itemsToAdd = append(itemsToAdd, rulestatehistorytypes.RuleStateHistory{
|
||||
RuleID: r.ID(),
|
||||
RuleName: r.Name(),
|
||||
State: model.StateInactive,
|
||||
State: ruletypes.StateInactive,
|
||||
StateChanged: true,
|
||||
UnixMilli: ts.UnixMilli(),
|
||||
Labels: model.LabelsString(labelsJSON),
|
||||
Fingerprint: a.QueryResultLables.Hash(),
|
||||
Labels: rulestatehistorytypes.LabelsString(labelsJSON),
|
||||
Fingerprint: a.QueryResultLabels.Hash(),
|
||||
})
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if a.State == model.StatePending && ts.Sub(a.ActiveAt) >= r.holdDuration.Duration() {
|
||||
a.State = model.StateFiring
|
||||
if a.State == ruletypes.StatePending && ts.Sub(a.ActiveAt) >= r.holdDuration.Duration() {
|
||||
a.State = ruletypes.StateFiring
|
||||
a.FiredAt = ts
|
||||
state := model.StateFiring
|
||||
state := ruletypes.StateFiring
|
||||
if a.Missing {
|
||||
state = model.StateNoData
|
||||
state = ruletypes.StateNoData
|
||||
}
|
||||
itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
|
||||
itemsToAdd = append(itemsToAdd, rulestatehistorytypes.RuleStateHistory{
|
||||
RuleID: r.ID(),
|
||||
RuleName: r.Name(),
|
||||
State: state,
|
||||
StateChanged: true,
|
||||
UnixMilli: ts.UnixMilli(),
|
||||
Labels: model.LabelsString(labelsJSON),
|
||||
Fingerprint: a.QueryResultLables.Hash(),
|
||||
Labels: rulestatehistorytypes.LabelsString(labelsJSON),
|
||||
Fingerprint: a.QueryResultLabels.Hash(),
|
||||
Value: a.Value,
|
||||
})
|
||||
}
|
||||
|
||||
// We need to change firing alert to recovering if the returned sample meets recovery threshold
|
||||
changeAlertingToRecovering := a.State == model.StateFiring && a.IsRecovering
|
||||
changeAlertingToRecovering := a.State == ruletypes.StateFiring && a.IsRecovering
|
||||
// We need to change recovering alerts to firing if the returned sample meets target threshold
|
||||
changeRecoveringToFiring := a.State == model.StateRecovering && !a.IsRecovering && !a.Missing
|
||||
changeRecoveringToFiring := a.State == ruletypes.StateRecovering && !a.IsRecovering && !a.Missing
|
||||
// in any of the above case we need to update the status of alert
|
||||
if changeAlertingToRecovering || changeRecoveringToFiring {
|
||||
state := model.StateRecovering
|
||||
state := ruletypes.StateRecovering
|
||||
if changeRecoveringToFiring {
|
||||
state = model.StateFiring
|
||||
state = ruletypes.StateFiring
|
||||
}
|
||||
a.State = state
|
||||
r.logger.DebugContext(ctx, "converting alert state", "name", r.Name(), "state", state)
|
||||
itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
|
||||
itemsToAdd = append(itemsToAdd, rulestatehistorytypes.RuleStateHistory{
|
||||
RuleID: r.ID(),
|
||||
RuleName: r.Name(),
|
||||
State: state,
|
||||
StateChanged: true,
|
||||
UnixMilli: ts.UnixMilli(),
|
||||
Labels: model.LabelsString(labelsJSON),
|
||||
Fingerprint: a.QueryResultLables.Hash(),
|
||||
Labels: rulestatehistorytypes.LabelsString(labelsJSON),
|
||||
Fingerprint: a.QueryResultLabels.Hash(),
|
||||
Value: a.Value,
|
||||
})
|
||||
}
|
||||
@@ -452,26 +419,25 @@ func (r *PromRule) RunAlertQuery(ctx context.Context, qs string, start, end time
|
||||
case promql.Matrix:
|
||||
return res.Value.(promql.Matrix), nil
|
||||
default:
|
||||
return nil, fmt.Errorf("rule result is not a vector or scalar")
|
||||
return nil, errors.NewInternalf(errors.CodeInternal, "rule result is not a vector or scalar")
|
||||
}
|
||||
}
|
||||
|
||||
func toCommonSeries(series promql.Series) v3.Series {
|
||||
commonSeries := v3.Series{
|
||||
Labels: make(map[string]string),
|
||||
LabelsArray: make([]map[string]string, 0),
|
||||
Points: make([]v3.Point, 0),
|
||||
func toCommonSeries(series promql.Series) *qbtypes.TimeSeries {
|
||||
commonSeries := &qbtypes.TimeSeries{
|
||||
Labels: make([]*qbtypes.Label, 0),
|
||||
Values: make([]*qbtypes.TimeSeriesValue, 0),
|
||||
}
|
||||
|
||||
series.Metric.Range(func(lbl labels.Label) {
|
||||
commonSeries.Labels[lbl.Name] = lbl.Value
|
||||
commonSeries.LabelsArray = append(commonSeries.LabelsArray, map[string]string{
|
||||
lbl.Name: lbl.Value,
|
||||
commonSeries.Labels = append(commonSeries.Labels, &qbtypes.Label{
|
||||
Key: telemetrytypes.TelemetryFieldKey{Name: lbl.Name},
|
||||
Value: lbl.Value,
|
||||
})
|
||||
})
|
||||
|
||||
for _, f := range series.Floats {
|
||||
commonSeries.Points = append(commonSeries.Points, v3.Point{
|
||||
commonSeries.Values = append(commonSeries.Values, &qbtypes.TimeSeriesValue{
|
||||
Timestamp: f.T,
|
||||
Value: f.F,
|
||||
})
|
||||
|
||||
@@ -2,7 +2,6 @@ package rules
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -187,7 +186,7 @@ func (g *PromRuleTask) PromRules() []*PromRule {
|
||||
}
|
||||
}
|
||||
sort.Slice(alerts, func(i, j int) bool {
|
||||
return alerts[i].State() > alerts[j].State() ||
|
||||
return alerts[i].State().Severity() > alerts[j].State().Severity() ||
|
||||
(alerts[i].State() == alerts[j].State() &&
|
||||
alerts[i].Name() < alerts[j].Name())
|
||||
})
|
||||
@@ -268,7 +267,7 @@ func (g *PromRuleTask) CopyState(fromTask Task) error {
|
||||
|
||||
from, ok := fromTask.(*PromRuleTask)
|
||||
if !ok {
|
||||
return fmt.Errorf("you can only copy rule groups with same type")
|
||||
return errors.NewInternalf(errors.CodeInternal, "you can only copy rule groups with same type")
|
||||
}
|
||||
|
||||
g.evaluationTime = from.evaluationTime
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1 +0,0 @@
|
||||
package rules
|
||||
@@ -4,8 +4,7 @@ import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/query-service/model"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/labels"
|
||||
"github.com/SigNoz/signoz/pkg/types/rulestatehistorytypes"
|
||||
ruletypes "github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
)
|
||||
@@ -17,13 +16,13 @@ type Rule interface {
|
||||
Name() string
|
||||
Type() ruletypes.RuleType
|
||||
|
||||
Labels() labels.BaseLabels
|
||||
Annotations() labels.BaseLabels
|
||||
Labels() ruletypes.Labels
|
||||
Annotations() ruletypes.Labels
|
||||
Condition() *ruletypes.RuleCondition
|
||||
EvalDelay() valuer.TextDuration
|
||||
EvalWindow() valuer.TextDuration
|
||||
HoldDuration() valuer.TextDuration
|
||||
State() model.AlertState
|
||||
State() ruletypes.AlertState
|
||||
ActiveAlerts() []*ruletypes.Alert
|
||||
// ActiveAlertsLabelFP returns a map of active alert labels fingerprint
|
||||
ActiveAlertsLabelFP() map[uint64]struct{}
|
||||
@@ -42,7 +41,17 @@ type Rule interface {
|
||||
SetEvaluationTimestamp(time.Time)
|
||||
GetEvaluationTimestamp() time.Time
|
||||
|
||||
RecordRuleStateHistory(ctx context.Context, prevState, currentState model.AlertState, itemsToAdd []model.RuleStateHistory) error
|
||||
RecordRuleStateHistory(
|
||||
ctx context.Context,
|
||||
prevState, currentState ruletypes.AlertState,
|
||||
itemsToAdd []rulestatehistorytypes.RuleStateHistory,
|
||||
) error
|
||||
|
||||
SendAlerts(ctx context.Context, ts time.Time, resendDelay time.Duration, interval time.Duration, notifyFunc NotifyFunc)
|
||||
SendAlerts(
|
||||
ctx context.Context,
|
||||
ts time.Time,
|
||||
resendDelay time.Duration,
|
||||
interval time.Duration,
|
||||
notifyFunc NotifyFunc,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -2,7 +2,6 @@ package rules
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -12,10 +11,9 @@ import (
|
||||
opentracing "github.com/opentracing/opentracing-go"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/labels"
|
||||
"github.com/SigNoz/signoz/pkg/types/authtypes"
|
||||
"github.com/SigNoz/signoz/pkg/types/ctxtypes"
|
||||
ruletypes "github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
"github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
)
|
||||
|
||||
@@ -95,7 +93,7 @@ func (g *RuleTask) Pause(b bool) {
|
||||
|
||||
type QueryOrigin struct{}
|
||||
|
||||
func NewQueryOriginContext(ctx context.Context, data map[string]interface{}) context.Context {
|
||||
func NewQueryOriginContext(ctx context.Context, data map[string]any) context.Context {
|
||||
return context.WithValue(ctx, QueryOrigin{}, data)
|
||||
}
|
||||
|
||||
@@ -111,7 +109,7 @@ func (g *RuleTask) Run(ctx context.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
ctx = NewQueryOriginContext(ctx, map[string]interface{}{
|
||||
ctx = NewQueryOriginContext(ctx, map[string]any{
|
||||
"ruleRuleTask": map[string]string{
|
||||
"name": g.Name(),
|
||||
},
|
||||
@@ -163,8 +161,8 @@ func (g *RuleTask) Stop() {
|
||||
}
|
||||
|
||||
func (g *RuleTask) hash() uint64 {
|
||||
l := labels.New(
|
||||
labels.Label{Name: "name", Value: g.name},
|
||||
l := ruletypes.New(
|
||||
ruletypes.Label{Name: "name", Value: g.name},
|
||||
)
|
||||
return l.Hash()
|
||||
}
|
||||
@@ -180,7 +178,7 @@ func (g *RuleTask) ThresholdRules() []*ThresholdRule {
|
||||
}
|
||||
}
|
||||
sort.Slice(alerts, func(i, j int) bool {
|
||||
return alerts[i].State() > alerts[j].State() ||
|
||||
return alerts[i].State().Severity() > alerts[j].State().Severity() ||
|
||||
(alerts[i].State() == alerts[j].State() &&
|
||||
alerts[i].Name() < alerts[j].Name())
|
||||
})
|
||||
@@ -265,7 +263,7 @@ func (g *RuleTask) CopyState(fromTask Task) error {
|
||||
|
||||
from, ok := fromTask.(*RuleTask)
|
||||
if !ok {
|
||||
return fmt.Errorf("invalid from task for copy")
|
||||
return errors.NewInternalf(errors.CodeInternal, "invalid from task for copy")
|
||||
}
|
||||
g.evaluationTime = from.evaluationTime
|
||||
g.lastEvaluation = from.lastEvaluation
|
||||
|
||||
162
pkg/query-service/rules/setups_test.go
Normal file
162
pkg/query-service/rules/setups_test.go
Normal file
@@ -0,0 +1,162 @@
|
||||
package rules
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/flagger"
|
||||
"github.com/SigNoz/signoz/pkg/instrumentation/instrumentationtest"
|
||||
"github.com/SigNoz/signoz/pkg/querier"
|
||||
"github.com/SigNoz/signoz/pkg/querybuilder"
|
||||
"github.com/SigNoz/signoz/pkg/querybuilder/resourcefilter"
|
||||
"github.com/SigNoz/signoz/pkg/telemetrylogs"
|
||||
"github.com/SigNoz/signoz/pkg/telemetrymetrics"
|
||||
"github.com/SigNoz/signoz/pkg/telemetrystore"
|
||||
"github.com/SigNoz/signoz/pkg/telemetrytraces"
|
||||
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
|
||||
"github.com/SigNoz/signoz/pkg/types/telemetrytypes/telemetrytypestest"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func prepareQuerierForMetrics(t *testing.T, telemetryStore telemetrystore.TelemetryStore) querier.Querier {
|
||||
providerSettings := instrumentationtest.New().ToProviderSettings()
|
||||
metadataStore := telemetrytypestest.NewMockMetadataStore()
|
||||
|
||||
flagger, err := flagger.New(
|
||||
context.Background(),
|
||||
instrumentationtest.New().ToProviderSettings(),
|
||||
flagger.Config{},
|
||||
flagger.MustNewRegistry(),
|
||||
)
|
||||
require.NoError(t, err)
|
||||
|
||||
metricFieldMapper := telemetrymetrics.NewFieldMapper()
|
||||
metricConditionBuilder := telemetrymetrics.NewConditionBuilder(metricFieldMapper)
|
||||
metricStmtBuilder := telemetrymetrics.NewMetricQueryStatementBuilder(
|
||||
providerSettings,
|
||||
metadataStore,
|
||||
metricFieldMapper,
|
||||
metricConditionBuilder,
|
||||
flagger,
|
||||
)
|
||||
|
||||
return querier.New(
|
||||
providerSettings,
|
||||
telemetryStore,
|
||||
metadataStore,
|
||||
nil, // prometheus
|
||||
nil, // traceStmtBuilder
|
||||
nil, // logStmtBuilder
|
||||
metricStmtBuilder,
|
||||
nil, // meterStmtBuilder
|
||||
nil, // traceOperatorStmtBuilder
|
||||
nil, // bucketCache
|
||||
)
|
||||
}
|
||||
|
||||
func prepareQuerierForLogs(telemetryStore telemetrystore.TelemetryStore, keysMap map[string][]*telemetrytypes.TelemetryFieldKey) querier.Querier {
|
||||
|
||||
providerSettings := instrumentationtest.New().ToProviderSettings()
|
||||
metadataStore := telemetrytypestest.NewMockMetadataStore()
|
||||
|
||||
for _, keys := range keysMap {
|
||||
for _, key := range keys {
|
||||
key.Signal = telemetrytypes.SignalLogs
|
||||
}
|
||||
}
|
||||
metadataStore.KeysMap = keysMap
|
||||
|
||||
resourceFilterFieldMapper := resourcefilter.NewFieldMapper()
|
||||
resourceFilterConditionBuilder := resourcefilter.NewConditionBuilder(resourceFilterFieldMapper)
|
||||
|
||||
logFieldMapper := telemetrylogs.NewFieldMapper()
|
||||
logConditionBuilder := telemetrylogs.NewConditionBuilder(logFieldMapper)
|
||||
logResourceFilterStmtBuilder := resourcefilter.NewLogResourceFilterStatementBuilder(
|
||||
providerSettings,
|
||||
resourceFilterFieldMapper,
|
||||
resourceFilterConditionBuilder,
|
||||
metadataStore,
|
||||
telemetrylogs.DefaultFullTextColumn,
|
||||
telemetrylogs.GetBodyJSONKey,
|
||||
)
|
||||
logAggExprRewriter := querybuilder.NewAggExprRewriter(
|
||||
providerSettings,
|
||||
telemetrylogs.DefaultFullTextColumn,
|
||||
logFieldMapper,
|
||||
logConditionBuilder,
|
||||
telemetrylogs.GetBodyJSONKey,
|
||||
)
|
||||
logStmtBuilder := telemetrylogs.NewLogQueryStatementBuilder(
|
||||
providerSettings,
|
||||
metadataStore,
|
||||
logFieldMapper,
|
||||
logConditionBuilder,
|
||||
logResourceFilterStmtBuilder,
|
||||
logAggExprRewriter,
|
||||
telemetrylogs.DefaultFullTextColumn,
|
||||
telemetrylogs.GetBodyJSONKey,
|
||||
)
|
||||
|
||||
return querier.New(
|
||||
providerSettings,
|
||||
telemetryStore,
|
||||
metadataStore,
|
||||
nil, // prometheus
|
||||
nil, // traceStmtBuilder
|
||||
logStmtBuilder, // logStmtBuilder
|
||||
nil, // metricStmtBuilder
|
||||
nil, // meterStmtBuilder
|
||||
nil, // traceOperatorStmtBuilder
|
||||
nil, // bucketCache
|
||||
)
|
||||
}
|
||||
|
||||
func prepareQuerierForTraces(telemetryStore telemetrystore.TelemetryStore, keysMap map[string][]*telemetrytypes.TelemetryFieldKey) querier.Querier {
|
||||
|
||||
providerSettings := instrumentationtest.New().ToProviderSettings()
|
||||
metadataStore := telemetrytypestest.NewMockMetadataStore()
|
||||
|
||||
for _, keys := range keysMap {
|
||||
for _, key := range keys {
|
||||
key.Signal = telemetrytypes.SignalTraces
|
||||
}
|
||||
}
|
||||
metadataStore.KeysMap = keysMap
|
||||
|
||||
// Create trace statement builder
|
||||
traceFieldMapper := telemetrytraces.NewFieldMapper()
|
||||
traceConditionBuilder := telemetrytraces.NewConditionBuilder(traceFieldMapper)
|
||||
|
||||
resourceFilterFieldMapper := resourcefilter.NewFieldMapper()
|
||||
resourceFilterConditionBuilder := resourcefilter.NewConditionBuilder(resourceFilterFieldMapper)
|
||||
resourceFilterStmtBuilder := resourcefilter.NewTraceResourceFilterStatementBuilder(
|
||||
providerSettings,
|
||||
resourceFilterFieldMapper,
|
||||
resourceFilterConditionBuilder,
|
||||
metadataStore,
|
||||
)
|
||||
|
||||
traceAggExprRewriter := querybuilder.NewAggExprRewriter(providerSettings, nil, traceFieldMapper, traceConditionBuilder, nil)
|
||||
traceStmtBuilder := telemetrytraces.NewTraceQueryStatementBuilder(
|
||||
providerSettings,
|
||||
metadataStore,
|
||||
traceFieldMapper,
|
||||
traceConditionBuilder,
|
||||
resourceFilterStmtBuilder,
|
||||
traceAggExprRewriter,
|
||||
telemetryStore,
|
||||
)
|
||||
|
||||
return querier.New(
|
||||
providerSettings,
|
||||
telemetryStore,
|
||||
metadataStore,
|
||||
nil, // prometheus
|
||||
traceStmtBuilder, // traceStmtBuilder
|
||||
nil, // logStmtBuilder
|
||||
nil, // metricStmtBuilder
|
||||
nil, // meterStmtBuilder
|
||||
nil, // traceOperatorStmtBuilder
|
||||
nil, // bucketCache
|
||||
)
|
||||
}
|
||||
@@ -4,7 +4,7 @@ import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
ruletypes "github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
"github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
)
|
||||
|
||||
|
||||
@@ -10,18 +10,16 @@ import (
|
||||
"github.com/google/uuid"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/model"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/labels"
|
||||
ruletypes "github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
)
|
||||
|
||||
// TestNotification prepares a dummy rule for given rule parameters and
|
||||
// sends a test notification. returns alert count and error (if any)
|
||||
func defaultTestNotification(opts PrepareTestRuleOptions) (int, *model.ApiError) {
|
||||
func defaultTestNotification(opts PrepareTestRuleOptions) (int, error) {
|
||||
ctx := context.Background()
|
||||
|
||||
if opts.Rule == nil {
|
||||
return 0, model.BadRequest(fmt.Errorf("rule is required"))
|
||||
return 0, errors.NewInvalidInputf(errors.CodeInvalidInput, "rule is required")
|
||||
}
|
||||
|
||||
parsedRule := opts.Rule
|
||||
@@ -41,15 +39,14 @@ func defaultTestNotification(opts PrepareTestRuleOptions) (int, *model.ApiError)
|
||||
if parsedRule.RuleType == ruletypes.RuleTypeThreshold {
|
||||
|
||||
// add special labels for test alerts
|
||||
parsedRule.Labels[labels.RuleSourceLabel] = ""
|
||||
parsedRule.Labels[labels.AlertRuleIdLabel] = ""
|
||||
parsedRule.Labels[ruletypes.RuleSourceLabel] = ""
|
||||
parsedRule.Labels[ruletypes.AlertRuleIDLabel] = ""
|
||||
|
||||
// create a threshold rule
|
||||
rule, err = NewThresholdRule(
|
||||
alertname,
|
||||
opts.OrgID,
|
||||
parsedRule,
|
||||
opts.Reader,
|
||||
opts.Querier,
|
||||
opts.Logger,
|
||||
WithSendAlways(),
|
||||
@@ -61,7 +58,7 @@ func defaultTestNotification(opts PrepareTestRuleOptions) (int, *model.ApiError)
|
||||
|
||||
if err != nil {
|
||||
slog.Error("failed to prepare a new threshold rule for test", errors.Attr(err))
|
||||
return 0, model.BadRequest(err)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
} else if parsedRule.RuleType == ruletypes.RuleTypeProm {
|
||||
@@ -72,7 +69,6 @@ func defaultTestNotification(opts PrepareTestRuleOptions) (int, *model.ApiError)
|
||||
opts.OrgID,
|
||||
parsedRule,
|
||||
opts.Logger,
|
||||
opts.Reader,
|
||||
opts.ManagerOpts.Prometheus,
|
||||
WithSendAlways(),
|
||||
WithSendUnmatched(),
|
||||
@@ -83,10 +79,10 @@ func defaultTestNotification(opts PrepareTestRuleOptions) (int, *model.ApiError)
|
||||
|
||||
if err != nil {
|
||||
slog.Error("failed to prepare a new promql rule for test", errors.Attr(err))
|
||||
return 0, model.BadRequest(err)
|
||||
return 0, err
|
||||
}
|
||||
} else {
|
||||
return 0, model.BadRequest(fmt.Errorf("failed to derive ruletype with given information"))
|
||||
return 0, errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid rule type")
|
||||
}
|
||||
|
||||
// set timestamp to current utc time
|
||||
@@ -94,8 +90,8 @@ func defaultTestNotification(opts PrepareTestRuleOptions) (int, *model.ApiError)
|
||||
|
||||
alertsFound, err := rule.Eval(ctx, ts)
|
||||
if err != nil {
|
||||
slog.Error("evaluating rule failed", "rule", rule.Name(), errors.Attr(err))
|
||||
return 0, model.InternalError(fmt.Errorf("rule evaluation failed"))
|
||||
slog.Error("evaluating rule failed", "rule_name", rule.Name(), errors.Attr(err))
|
||||
return 0, err
|
||||
}
|
||||
rule.SendAlerts(ctx, ts, 0, time.Duration(1*time.Minute), opts.NotifyFunc)
|
||||
|
||||
|
||||
@@ -1,67 +1,33 @@
|
||||
package rules
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"math"
|
||||
"net/url"
|
||||
"reflect"
|
||||
"text/template"
|
||||
"time"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/contextlinks"
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/common"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/model"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/postprocess"
|
||||
"github.com/SigNoz/signoz/pkg/transition"
|
||||
"github.com/SigNoz/signoz/pkg/querier"
|
||||
"github.com/SigNoz/signoz/pkg/types/ctxtypes"
|
||||
"github.com/SigNoz/signoz/pkg/types/instrumentationtypes"
|
||||
"github.com/SigNoz/signoz/pkg/types/rulestatehistorytypes"
|
||||
"github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/query-service/app/querier"
|
||||
querierV2 "github.com/SigNoz/signoz/pkg/query-service/app/querier/v2"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/app/queryBuilder"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/interfaces"
|
||||
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/labels"
|
||||
querytemplate "github.com/SigNoz/signoz/pkg/query-service/utils/queryTemplate"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/times"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/timestamp"
|
||||
|
||||
logsv3 "github.com/SigNoz/signoz/pkg/query-service/app/logs/v3"
|
||||
tracesV4 "github.com/SigNoz/signoz/pkg/query-service/app/traces/v4"
|
||||
"github.com/SigNoz/signoz/pkg/units"
|
||||
|
||||
querierV5 "github.com/SigNoz/signoz/pkg/querier"
|
||||
|
||||
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
|
||||
)
|
||||
|
||||
type ThresholdRule struct {
|
||||
*BaseRule
|
||||
// Ever since we introduced the new metrics query builder, the version is "v4"
|
||||
// for all the rules
|
||||
// if the version is "v3", then we use the old querier
|
||||
// if the version is "v4", then we use the new querierV2
|
||||
version string
|
||||
|
||||
// querier is used for alerts created before the introduction of new metrics query builder
|
||||
querier interfaces.Querier
|
||||
// querierV2 is used for alerts created after the introduction of new metrics query builder
|
||||
querierV2 interfaces.Querier
|
||||
|
||||
// querierV5 is used for alerts migrated after the introduction of new query builder
|
||||
querierV5 querierV5.Querier
|
||||
|
||||
// used for attribute metadata enrichment for logs and traces
|
||||
logsKeys map[string]v3.AttributeKey
|
||||
spansKeys map[string]v3.AttributeKey
|
||||
querier querier.Querier
|
||||
}
|
||||
|
||||
var _ Rule = (*ThresholdRule)(nil)
|
||||
@@ -70,8 +36,7 @@ func NewThresholdRule(
|
||||
id string,
|
||||
orgID valuer.UUID,
|
||||
p *ruletypes.PostableRule,
|
||||
reader interfaces.Reader,
|
||||
querierV5 querierV5.Querier,
|
||||
querier querier.Querier,
|
||||
logger *slog.Logger,
|
||||
opts ...RuleOption,
|
||||
) (*ThresholdRule, error) {
|
||||
@@ -79,211 +44,33 @@ func NewThresholdRule(
|
||||
|
||||
opts = append(opts, WithLogger(logger))
|
||||
|
||||
baseRule, err := NewBaseRule(id, orgID, p, reader, opts...)
|
||||
baseRule, err := NewBaseRule(id, orgID, p, opts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
t := ThresholdRule{
|
||||
return &ThresholdRule{
|
||||
BaseRule: baseRule,
|
||||
version: p.Version,
|
||||
}
|
||||
|
||||
querierOption := querier.QuerierOptions{
|
||||
Reader: reader,
|
||||
Cache: nil,
|
||||
KeyGenerator: queryBuilder.NewKeyGenerator(),
|
||||
}
|
||||
|
||||
querierOptsV2 := querierV2.QuerierOptions{
|
||||
Reader: reader,
|
||||
Cache: nil,
|
||||
KeyGenerator: queryBuilder.NewKeyGenerator(),
|
||||
}
|
||||
|
||||
t.querier = querier.NewQuerier(querierOption)
|
||||
t.querierV2 = querierV2.NewQuerier(querierOptsV2)
|
||||
t.querierV5 = querierV5
|
||||
t.reader = reader
|
||||
return &t, nil
|
||||
querier: querier,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (r *ThresholdRule) hostFromSource() string {
|
||||
parsedUrl, err := url.Parse(r.source)
|
||||
parsedURL, err := url.Parse(r.source)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
if parsedUrl.Port() != "" {
|
||||
return fmt.Sprintf("%s://%s:%s", parsedUrl.Scheme, parsedUrl.Hostname(), parsedUrl.Port())
|
||||
if parsedURL.Port() != "" {
|
||||
return fmt.Sprintf("%s://%s:%s", parsedURL.Scheme, parsedURL.Hostname(), parsedURL.Port())
|
||||
}
|
||||
return fmt.Sprintf("%s://%s", parsedUrl.Scheme, parsedUrl.Hostname())
|
||||
return fmt.Sprintf("%s://%s", parsedURL.Scheme, parsedURL.Hostname())
|
||||
}
|
||||
|
||||
func (r *ThresholdRule) Type() ruletypes.RuleType {
|
||||
return ruletypes.RuleTypeThreshold
|
||||
}
|
||||
|
||||
func (r *ThresholdRule) prepareQueryRange(ctx context.Context, ts time.Time) (*v3.QueryRangeParamsV3, error) {
|
||||
r.logger.InfoContext(
|
||||
ctx, "prepare query range request v4", "ts", ts.UnixMilli(), "eval_window", r.evalWindow.Milliseconds(), "eval_delay", r.evalDelay.Milliseconds(),
|
||||
)
|
||||
|
||||
startTs, endTs := r.Timestamps(ts)
|
||||
start, end := startTs.UnixMilli(), endTs.UnixMilli()
|
||||
|
||||
if r.ruleCondition.QueryType() == v3.QueryTypeClickHouseSQL {
|
||||
params := &v3.QueryRangeParamsV3{
|
||||
Start: start,
|
||||
End: end,
|
||||
Step: int64(math.Max(float64(common.MinAllowedStepInterval(start, end)), 60)),
|
||||
CompositeQuery: &v3.CompositeQuery{
|
||||
QueryType: r.ruleCondition.CompositeQuery.QueryType,
|
||||
PanelType: r.ruleCondition.CompositeQuery.PanelType,
|
||||
BuilderQueries: make(map[string]*v3.BuilderQuery),
|
||||
ClickHouseQueries: make(map[string]*v3.ClickHouseQuery),
|
||||
PromQueries: make(map[string]*v3.PromQuery),
|
||||
Unit: r.ruleCondition.CompositeQuery.Unit,
|
||||
},
|
||||
Variables: make(map[string]interface{}),
|
||||
NoCache: true,
|
||||
}
|
||||
querytemplate.AssignReservedVarsV3(params)
|
||||
for name, chQuery := range r.ruleCondition.CompositeQuery.ClickHouseQueries {
|
||||
if chQuery.Disabled {
|
||||
continue
|
||||
}
|
||||
tmpl := template.New("clickhouse-query")
|
||||
tmpl, err := tmpl.Parse(chQuery.Query)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var query bytes.Buffer
|
||||
err = tmpl.Execute(&query, params.Variables)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
params.CompositeQuery.ClickHouseQueries[name] = &v3.ClickHouseQuery{
|
||||
Query: query.String(),
|
||||
Disabled: chQuery.Disabled,
|
||||
Legend: chQuery.Legend,
|
||||
}
|
||||
}
|
||||
return params, nil
|
||||
}
|
||||
|
||||
if r.ruleCondition.CompositeQuery != nil && r.ruleCondition.CompositeQuery.BuilderQueries != nil {
|
||||
for _, q := range r.ruleCondition.CompositeQuery.BuilderQueries {
|
||||
// If the step interval is less than the minimum allowed step interval, set it to the minimum allowed step interval
|
||||
if minStep := common.MinAllowedStepInterval(start, end); q.StepInterval < minStep {
|
||||
q.StepInterval = minStep
|
||||
}
|
||||
|
||||
q.SetShiftByFromFunc()
|
||||
|
||||
if q.DataSource == v3.DataSourceMetrics {
|
||||
// if the time range is greater than 1 day, and less than 1 week set the step interval to be multiple of 5 minutes
|
||||
// if the time range is greater than 1 week, set the step interval to be multiple of 30 mins
|
||||
if end-start >= 24*time.Hour.Milliseconds() && end-start < 7*24*time.Hour.Milliseconds() {
|
||||
q.StepInterval = int64(math.Round(float64(q.StepInterval)/300)) * 300
|
||||
} else if end-start >= 7*24*time.Hour.Milliseconds() {
|
||||
q.StepInterval = int64(math.Round(float64(q.StepInterval)/1800)) * 1800
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if r.ruleCondition.CompositeQuery.PanelType != v3.PanelTypeGraph {
|
||||
r.ruleCondition.CompositeQuery.PanelType = v3.PanelTypeGraph
|
||||
}
|
||||
|
||||
// default mode
|
||||
return &v3.QueryRangeParamsV3{
|
||||
Start: start,
|
||||
End: end,
|
||||
Step: int64(math.Max(float64(common.MinAllowedStepInterval(start, end)), 60)),
|
||||
CompositeQuery: r.ruleCondition.CompositeQuery,
|
||||
Variables: make(map[string]interface{}),
|
||||
NoCache: true,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (r *ThresholdRule) prepareLinksToLogs(ctx context.Context, ts time.Time, lbls labels.Labels) string {
|
||||
if r.version == "v5" {
|
||||
return r.prepareLinksToLogsV5(ctx, ts, lbls)
|
||||
}
|
||||
|
||||
selectedQuery := r.GetSelectedQuery()
|
||||
|
||||
qr, err := r.prepareQueryRange(ctx, ts)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
start := time.UnixMilli(qr.Start)
|
||||
end := time.UnixMilli(qr.End)
|
||||
|
||||
// TODO(srikanthccv): handle formula queries
|
||||
if selectedQuery < "A" || selectedQuery > "Z" {
|
||||
return ""
|
||||
}
|
||||
|
||||
q := r.ruleCondition.CompositeQuery.BuilderQueries[selectedQuery]
|
||||
if q == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
if q.DataSource != v3.DataSourceLogs {
|
||||
return ""
|
||||
}
|
||||
|
||||
queryFilter := []v3.FilterItem{}
|
||||
if q.Filters != nil {
|
||||
queryFilter = q.Filters.Items
|
||||
}
|
||||
|
||||
filterItems := contextlinks.PrepareFilters(lbls.Map(), queryFilter, q.GroupBy, r.logsKeys)
|
||||
|
||||
return contextlinks.PrepareLinksToLogs(start, end, filterItems)
|
||||
}
|
||||
|
||||
func (r *ThresholdRule) prepareLinksToTraces(ctx context.Context, ts time.Time, lbls labels.Labels) string {
|
||||
if r.version == "v5" {
|
||||
return r.prepareLinksToTracesV5(ctx, ts, lbls)
|
||||
}
|
||||
|
||||
selectedQuery := r.GetSelectedQuery()
|
||||
|
||||
qr, err := r.prepareQueryRange(ctx, ts)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
start := time.UnixMilli(qr.Start)
|
||||
end := time.UnixMilli(qr.End)
|
||||
|
||||
// TODO(srikanthccv): handle formula queries
|
||||
if selectedQuery < "A" || selectedQuery > "Z" {
|
||||
return ""
|
||||
}
|
||||
|
||||
q := r.ruleCondition.CompositeQuery.BuilderQueries[selectedQuery]
|
||||
if q == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
if q.DataSource != v3.DataSourceTraces {
|
||||
return ""
|
||||
}
|
||||
|
||||
queryFilter := []v3.FilterItem{}
|
||||
if q.Filters != nil {
|
||||
queryFilter = q.Filters.Items
|
||||
}
|
||||
|
||||
filterItems := contextlinks.PrepareFilters(lbls.Map(), queryFilter, q.GroupBy, r.spansKeys)
|
||||
|
||||
return contextlinks.PrepareLinksToTraces(start, end, filterItems)
|
||||
}
|
||||
|
||||
func (r *ThresholdRule) prepareQueryRangeV5(ctx context.Context, ts time.Time) (*qbtypes.QueryRangeRequest, error) {
|
||||
func (r *ThresholdRule) prepareQueryRange(ctx context.Context, ts time.Time) (*qbtypes.QueryRangeRequest, error) {
|
||||
r.logger.InfoContext(
|
||||
ctx, "prepare query range request v5", "ts", ts.UnixMilli(), "eval_window", r.evalWindow.Milliseconds(), "eval_delay", r.evalDelay.Milliseconds(),
|
||||
)
|
||||
@@ -305,10 +92,10 @@ func (r *ThresholdRule) prepareQueryRangeV5(ctx context.Context, ts time.Time) (
|
||||
return req, nil
|
||||
}
|
||||
|
||||
func (r *ThresholdRule) prepareLinksToLogsV5(ctx context.Context, ts time.Time, lbls labels.Labels) string {
|
||||
selectedQuery := r.GetSelectedQuery()
|
||||
func (r *ThresholdRule) prepareLinksToLogs(ctx context.Context, ts time.Time, lbls ruletypes.Labels) string {
|
||||
selectedQuery := r.GetSelectedQuery(ctx)
|
||||
|
||||
qr, err := r.prepareQueryRangeV5(ctx, ts)
|
||||
qr, err := r.prepareQueryRange(ctx, ts)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
@@ -345,10 +132,10 @@ func (r *ThresholdRule) prepareLinksToLogsV5(ctx context.Context, ts time.Time,
|
||||
return contextlinks.PrepareLinksToLogsV5(start, end, whereClause)
|
||||
}
|
||||
|
||||
func (r *ThresholdRule) prepareLinksToTracesV5(ctx context.Context, ts time.Time, lbls labels.Labels) string {
|
||||
selectedQuery := r.GetSelectedQuery()
|
||||
func (r *ThresholdRule) prepareLinksToTraces(ctx context.Context, ts time.Time, lbls ruletypes.Labels) string {
|
||||
selectedQuery := r.GetSelectedQuery(ctx)
|
||||
|
||||
qr, err := r.prepareQueryRangeV5(ctx, ts)
|
||||
qr, err := r.prepareQueryRange(ctx, ts)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
@@ -385,8 +172,12 @@ func (r *ThresholdRule) prepareLinksToTracesV5(ctx context.Context, ts time.Time
|
||||
return contextlinks.PrepareLinksToTracesV5(start, end, whereClause)
|
||||
}
|
||||
|
||||
func (r *ThresholdRule) GetSelectedQuery() string {
|
||||
return r.ruleCondition.GetSelectedQueryName()
|
||||
func (r *ThresholdRule) GetSelectedQuery(ctx context.Context) string {
|
||||
if r.ruleCondition.SelectedQuery != "" {
|
||||
return r.ruleCondition.SelectedQuery
|
||||
}
|
||||
r.logger.WarnContext(ctx, "missing selected query", "rule_name", r.Name())
|
||||
return r.ruleCondition.SelectedQueryName()
|
||||
}
|
||||
|
||||
func (r *ThresholdRule) buildAndRunQuery(ctx context.Context, orgID valuer.UUID, ts time.Time) (ruletypes.Vector, error) {
|
||||
@@ -394,142 +185,31 @@ func (r *ThresholdRule) buildAndRunQuery(ctx context.Context, orgID valuer.UUID,
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
err = r.PopulateTemporality(ctx, orgID, params)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("internal error while setting temporality")
|
||||
}
|
||||
|
||||
if params.CompositeQuery.QueryType == v3.QueryTypeBuilder {
|
||||
hasLogsQuery := false
|
||||
hasTracesQuery := false
|
||||
for _, query := range params.CompositeQuery.BuilderQueries {
|
||||
if query.DataSource == v3.DataSourceLogs {
|
||||
hasLogsQuery = true
|
||||
}
|
||||
if query.DataSource == v3.DataSourceTraces {
|
||||
hasTracesQuery = true
|
||||
}
|
||||
}
|
||||
var results []*qbtypes.TimeSeriesData
|
||||
|
||||
if hasLogsQuery {
|
||||
// check if any enrichment is required for logs if yes then enrich them
|
||||
if logsv3.EnrichmentRequired(params) {
|
||||
logsFields, apiErr := r.reader.GetLogFieldsFromNames(ctx, logsv3.GetFieldNames(params.CompositeQuery))
|
||||
if apiErr != nil {
|
||||
return nil, apiErr.ToError()
|
||||
}
|
||||
logsKeys := model.GetLogFieldsV3(ctx, params, logsFields)
|
||||
r.logsKeys = logsKeys
|
||||
logsv3.Enrich(params, logsKeys)
|
||||
}
|
||||
}
|
||||
|
||||
if hasTracesQuery {
|
||||
spanKeys, err := r.reader.GetSpanAttributeKeysByNames(ctx, logsv3.GetFieldNames(params.CompositeQuery))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
r.spansKeys = spanKeys
|
||||
tracesV4.Enrich(params, spanKeys)
|
||||
}
|
||||
}
|
||||
|
||||
var results []*v3.Result
|
||||
var queryErrors map[string]error
|
||||
ctx = ctxtypes.NewContextWithCommentVals(ctx, map[string]string{
|
||||
instrumentationtypes.CodeNamespace: "rules",
|
||||
instrumentationtypes.CodeFunctionName: "buildAndRunQuery",
|
||||
})
|
||||
if r.version == "v4" {
|
||||
results, queryErrors, err = r.querierV2.QueryRange(ctx, orgID, params)
|
||||
} else {
|
||||
results, queryErrors, err = r.querier.QueryRange(ctx, orgID, params)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
r.logger.ErrorContext(ctx, "failed to get alert query range result", "rule_name", r.Name(), errors.Attr(err), "query_errors", queryErrors)
|
||||
return nil, fmt.Errorf("internal error while querying")
|
||||
}
|
||||
|
||||
if params.CompositeQuery.QueryType == v3.QueryTypeBuilder {
|
||||
results, err = postprocess.PostProcessResult(results, params)
|
||||
if err != nil {
|
||||
r.logger.ErrorContext(ctx, "failed to post process result", "rule_name", r.Name(), errors.Attr(err))
|
||||
return nil, fmt.Errorf("internal error while post processing")
|
||||
}
|
||||
}
|
||||
|
||||
selectedQuery := r.GetSelectedQuery()
|
||||
|
||||
var queryResult *v3.Result
|
||||
for _, res := range results {
|
||||
if res.QueryName == selectedQuery {
|
||||
queryResult = res
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
hasData := queryResult != nil && len(queryResult.Series) > 0
|
||||
if missingDataAlert := r.HandleMissingDataAlert(ctx, ts, hasData); missingDataAlert != nil {
|
||||
return ruletypes.Vector{*missingDataAlert}, nil
|
||||
}
|
||||
|
||||
var resultVector ruletypes.Vector
|
||||
|
||||
if queryResult == nil {
|
||||
r.logger.WarnContext(ctx, "query result is nil", "rule_name", r.Name(), "query_name", selectedQuery)
|
||||
return resultVector, nil
|
||||
}
|
||||
|
||||
for _, series := range queryResult.Series {
|
||||
if !r.Condition().ShouldEval(series) {
|
||||
r.logger.InfoContext(ctx, "not enough data points to evaluate series, skipping", "ruleid", r.ID(), "numPoints", len(series.Points), "requiredPoints", r.Condition().RequiredNumPoints)
|
||||
continue
|
||||
}
|
||||
resultSeries, err := r.Threshold.Eval(*series, r.Unit(), ruletypes.EvalData{
|
||||
ActiveAlerts: r.ActiveAlertsLabelFP(),
|
||||
SendUnmatched: r.ShouldSendUnmatched(),
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
resultVector = append(resultVector, resultSeries...)
|
||||
}
|
||||
|
||||
return resultVector, nil
|
||||
}
|
||||
|
||||
func (r *ThresholdRule) buildAndRunQueryV5(ctx context.Context, orgID valuer.UUID, ts time.Time) (ruletypes.Vector, error) {
|
||||
params, err := r.prepareQueryRangeV5(ctx, ts)
|
||||
v5Result, err := r.querier.QueryRange(ctx, orgID, params)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var results []*v3.Result
|
||||
|
||||
ctx = ctxtypes.NewContextWithCommentVals(ctx, map[string]string{
|
||||
instrumentationtypes.CodeNamespace: "rules",
|
||||
instrumentationtypes.CodeFunctionName: "buildAndRunQueryV5",
|
||||
})
|
||||
|
||||
v5Result, err := r.querierV5.QueryRange(ctx, orgID, params)
|
||||
if err != nil {
|
||||
r.logger.ErrorContext(ctx, "failed to get alert query result", "rule_name", r.Name(), errors.Attr(err))
|
||||
return nil, fmt.Errorf("internal error while querying")
|
||||
}
|
||||
|
||||
for _, item := range v5Result.Data.Results {
|
||||
if tsData, ok := item.(*qbtypes.TimeSeriesData); ok {
|
||||
results = append(results, transition.ConvertV5TimeSeriesDataToV4Result(tsData))
|
||||
results = append(results, tsData)
|
||||
} else {
|
||||
// NOTE: should not happen but just to ensure we don't miss it if it happens for some reason
|
||||
r.logger.WarnContext(ctx, "expected qbtypes.TimeSeriesData but got", "item_type", reflect.TypeOf(item))
|
||||
}
|
||||
}
|
||||
|
||||
selectedQuery := r.GetSelectedQuery()
|
||||
selectedQuery := r.GetSelectedQuery(ctx)
|
||||
|
||||
var queryResult *v3.Result
|
||||
var queryResult *qbtypes.TimeSeriesData
|
||||
for _, res := range results {
|
||||
if res.QueryName == selectedQuery {
|
||||
queryResult = res
|
||||
@@ -537,20 +217,24 @@ func (r *ThresholdRule) buildAndRunQueryV5(ctx context.Context, orgID valuer.UUI
|
||||
}
|
||||
}
|
||||
|
||||
hasData := queryResult != nil && len(queryResult.Series) > 0
|
||||
hasData := queryResult != nil &&
|
||||
len(queryResult.Aggregations) > 0 &&
|
||||
queryResult.Aggregations[0] != nil &&
|
||||
len(queryResult.Aggregations[0].Series) > 0
|
||||
|
||||
if missingDataAlert := r.HandleMissingDataAlert(ctx, ts, hasData); missingDataAlert != nil {
|
||||
return ruletypes.Vector{*missingDataAlert}, nil
|
||||
}
|
||||
|
||||
var resultVector ruletypes.Vector
|
||||
|
||||
if queryResult == nil {
|
||||
if queryResult == nil || len(queryResult.Aggregations) == 0 || queryResult.Aggregations[0] == nil {
|
||||
r.logger.WarnContext(ctx, "query result is nil", "rule_name", r.Name(), "query_name", selectedQuery)
|
||||
return resultVector, nil
|
||||
}
|
||||
|
||||
// Filter out new series if newGroupEvalDelay is configured
|
||||
seriesToProcess := queryResult.Series
|
||||
seriesToProcess := queryResult.Aggregations[0].Series
|
||||
if r.ShouldSkipNewGroups() {
|
||||
filteredSeries, filterErr := r.BaseRule.FilterNewSeries(ctx, ts, seriesToProcess)
|
||||
// In case of error we log the error and continue with the original series
|
||||
@@ -563,10 +247,10 @@ func (r *ThresholdRule) buildAndRunQueryV5(ctx context.Context, orgID valuer.UUI
|
||||
|
||||
for _, series := range seriesToProcess {
|
||||
if !r.Condition().ShouldEval(series) {
|
||||
r.logger.InfoContext(ctx, "not enough data points to evaluate series, skipping", "ruleid", r.ID(), "numPoints", len(series.Points), "requiredPoints", r.Condition().RequiredNumPoints)
|
||||
r.logger.InfoContext(ctx, "not enough data points to evaluate series, skipping", "rule_id", r.ID(), "num_points", len(series.Values), "required_points", r.Condition().RequiredNumPoints)
|
||||
continue
|
||||
}
|
||||
resultSeries, err := r.Threshold.Eval(*series, r.Unit(), ruletypes.EvalData{
|
||||
resultSeries, err := r.Threshold.Eval(series, r.Unit(), ruletypes.EvalData{
|
||||
ActiveAlerts: r.ActiveAlertsLabelFP(),
|
||||
SendUnmatched: r.ShouldSendUnmatched(),
|
||||
})
|
||||
@@ -587,13 +271,7 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (int, error) {
|
||||
var res ruletypes.Vector
|
||||
var err error
|
||||
|
||||
if r.version == "v5" {
|
||||
r.logger.InfoContext(ctx, "running v5 query")
|
||||
res, err = r.buildAndRunQueryV5(ctx, r.orgID, ts)
|
||||
} else {
|
||||
r.logger.InfoContext(ctx, "running v4 query")
|
||||
res, err = r.buildAndRunQuery(ctx, r.orgID, ts)
|
||||
}
|
||||
res, err = r.buildAndRunQuery(ctx, r.orgID, ts)
|
||||
|
||||
if err != nil {
|
||||
return 0, err
|
||||
@@ -634,7 +312,6 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (int, error) {
|
||||
defs+text,
|
||||
"__alert_"+r.Name(),
|
||||
tmplData,
|
||||
times.Time(timestamp.FromTime(ts)),
|
||||
nil,
|
||||
)
|
||||
result, err := tmpl.Expand()
|
||||
@@ -645,24 +322,24 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (int, error) {
|
||||
return result
|
||||
}
|
||||
|
||||
lb := labels.NewBuilder(smpl.Metric).Del(labels.MetricNameLabel).Del(labels.TemporalityLabel)
|
||||
resultLabels := labels.NewBuilder(smpl.Metric).Del(labels.MetricNameLabel).Del(labels.TemporalityLabel).Labels()
|
||||
lb := ruletypes.NewBuilder(smpl.Metric...).Del(ruletypes.MetricNameLabel).Del(ruletypes.TemporalityLabel)
|
||||
resultLabels := ruletypes.NewBuilder(smpl.Metric...).Del(ruletypes.MetricNameLabel).Del(ruletypes.TemporalityLabel).Labels()
|
||||
|
||||
for name, value := range r.labels.Map() {
|
||||
lb.Set(name, expand(value))
|
||||
}
|
||||
|
||||
lb.Set(labels.AlertNameLabel, r.Name())
|
||||
lb.Set(labels.AlertRuleIdLabel, r.ID())
|
||||
lb.Set(labels.RuleSourceLabel, r.GeneratorURL())
|
||||
lb.Set(ruletypes.AlertNameLabel, r.Name())
|
||||
lb.Set(ruletypes.AlertRuleIDLabel, r.ID())
|
||||
lb.Set(ruletypes.RuleSourceLabel, r.GeneratorURL())
|
||||
|
||||
annotations := make(labels.Labels, 0, len(r.annotations.Map()))
|
||||
annotations := make(ruletypes.Labels, 0, len(r.annotations.Map()))
|
||||
for name, value := range r.annotations.Map() {
|
||||
annotations = append(annotations, labels.Label{Name: name, Value: expand(value)})
|
||||
annotations = append(annotations, ruletypes.Label{Name: name, Value: expand(value)})
|
||||
}
|
||||
if smpl.IsMissing {
|
||||
lb.Set(labels.AlertNameLabel, "[No data] "+r.Name())
|
||||
lb.Set(labels.NoDataLabel, "true")
|
||||
lb.Set(ruletypes.AlertNameLabel, "[No data] "+r.Name())
|
||||
lb.Set(ruletypes.NoDataLabel, "true")
|
||||
}
|
||||
|
||||
// Links with timestamps should go in annotations since labels
|
||||
@@ -673,13 +350,13 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (int, error) {
|
||||
link := r.prepareLinksToTraces(ctx, ts, smpl.Metric)
|
||||
if link != "" && r.hostFromSource() != "" {
|
||||
r.logger.InfoContext(ctx, "adding traces link to annotations", "link", fmt.Sprintf("%s/traces-explorer?%s", r.hostFromSource(), link))
|
||||
annotations = append(annotations, labels.Label{Name: "related_traces", Value: fmt.Sprintf("%s/traces-explorer?%s", r.hostFromSource(), link)})
|
||||
annotations = append(annotations, ruletypes.Label{Name: "related_traces", Value: fmt.Sprintf("%s/traces-explorer?%s", r.hostFromSource(), link)})
|
||||
}
|
||||
case ruletypes.AlertTypeLogs:
|
||||
link := r.prepareLinksToLogs(ctx, ts, smpl.Metric)
|
||||
if link != "" && r.hostFromSource() != "" {
|
||||
r.logger.InfoContext(ctx, "adding logs link to annotations", "link", fmt.Sprintf("%s/logs/logs-explorer?%s", r.hostFromSource(), link))
|
||||
annotations = append(annotations, labels.Label{Name: "related_logs", Value: fmt.Sprintf("%s/logs/logs-explorer?%s", r.hostFromSource(), link)})
|
||||
annotations = append(annotations, ruletypes.Label{Name: "related_logs", Value: fmt.Sprintf("%s/logs/logs-explorer?%s", r.hostFromSource(), link)})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -688,15 +365,15 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (int, error) {
|
||||
resultFPs[h] = struct{}{}
|
||||
|
||||
if _, ok := alerts[h]; ok {
|
||||
return 0, fmt.Errorf("duplicate alert found, vector contains metrics with the same labelset after applying alert labels")
|
||||
return 0, errors.NewInternalf(errors.CodeInternal, "duplicate alert found, vector contains metrics with the same labelset after applying alert labels")
|
||||
}
|
||||
|
||||
alerts[h] = &ruletypes.Alert{
|
||||
Labels: lbs,
|
||||
QueryResultLables: resultLabels,
|
||||
QueryResultLabels: resultLabels,
|
||||
Annotations: annotations,
|
||||
ActiveAt: ts,
|
||||
State: model.StatePending,
|
||||
State: ruletypes.StatePending,
|
||||
Value: smpl.V,
|
||||
GeneratorURL: r.GeneratorURL(),
|
||||
Receivers: ruleReceiverMap[lbs.Map()[ruletypes.LabelThresholdName]],
|
||||
@@ -711,7 +388,7 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (int, error) {
|
||||
for h, a := range alerts {
|
||||
// Check whether we already have alerting state for the identifying label set.
|
||||
// Update the last value and annotations if so, create a new alert entry otherwise.
|
||||
if alert, ok := r.Active[h]; ok && alert.State != model.StateInactive {
|
||||
if alert, ok := r.Active[h]; ok && alert.State != ruletypes.StateInactive {
|
||||
|
||||
alert.Value = a.Value
|
||||
alert.Annotations = a.Annotations
|
||||
@@ -727,78 +404,78 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (int, error) {
|
||||
r.Active[h] = a
|
||||
}
|
||||
|
||||
itemsToAdd := []model.RuleStateHistory{}
|
||||
itemsToAdd := []rulestatehistorytypes.RuleStateHistory{}
|
||||
|
||||
// Check if any pending alerts should be removed or fire now. Write out alert timeseries.
|
||||
for fp, a := range r.Active {
|
||||
labelsJSON, err := json.Marshal(a.QueryResultLables)
|
||||
labelsJSON, err := json.Marshal(a.QueryResultLabels)
|
||||
if err != nil {
|
||||
r.logger.ErrorContext(ctx, "error marshaling labels", errors.Attr(err), "labels", a.Labels)
|
||||
}
|
||||
if _, ok := resultFPs[fp]; !ok {
|
||||
// If the alert was previously firing, keep it around for a given
|
||||
// retention time so it is reported as resolved to the AlertManager.
|
||||
if a.State == model.StatePending || (!a.ResolvedAt.IsZero() && ts.Sub(a.ResolvedAt) > ruletypes.ResolvedRetention) {
|
||||
if a.State == ruletypes.StatePending || (!a.ResolvedAt.IsZero() && ts.Sub(a.ResolvedAt) > ruletypes.ResolvedRetention) {
|
||||
delete(r.Active, fp)
|
||||
}
|
||||
if a.State != model.StateInactive {
|
||||
if a.State != ruletypes.StateInactive {
|
||||
r.logger.DebugContext(ctx, "converting firing alert to inActive", "name", r.Name())
|
||||
a.State = model.StateInactive
|
||||
a.State = ruletypes.StateInactive
|
||||
a.ResolvedAt = ts
|
||||
itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
|
||||
itemsToAdd = append(itemsToAdd, rulestatehistorytypes.RuleStateHistory{
|
||||
RuleID: r.ID(),
|
||||
RuleName: r.Name(),
|
||||
State: model.StateInactive,
|
||||
State: ruletypes.StateInactive,
|
||||
StateChanged: true,
|
||||
UnixMilli: ts.UnixMilli(),
|
||||
Labels: model.LabelsString(labelsJSON),
|
||||
Fingerprint: a.QueryResultLables.Hash(),
|
||||
Labels: rulestatehistorytypes.LabelsString(labelsJSON),
|
||||
Fingerprint: a.QueryResultLabels.Hash(),
|
||||
Value: a.Value,
|
||||
})
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if a.State == model.StatePending && ts.Sub(a.ActiveAt) >= r.holdDuration.Duration() {
|
||||
if a.State == ruletypes.StatePending && ts.Sub(a.ActiveAt) >= r.holdDuration.Duration() {
|
||||
r.logger.DebugContext(ctx, "converting pending alert to firing", "name", r.Name())
|
||||
a.State = model.StateFiring
|
||||
a.State = ruletypes.StateFiring
|
||||
a.FiredAt = ts
|
||||
state := model.StateFiring
|
||||
state := ruletypes.StateFiring
|
||||
if a.Missing {
|
||||
state = model.StateNoData
|
||||
state = ruletypes.StateNoData
|
||||
}
|
||||
itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
|
||||
itemsToAdd = append(itemsToAdd, rulestatehistorytypes.RuleStateHistory{
|
||||
RuleID: r.ID(),
|
||||
RuleName: r.Name(),
|
||||
State: state,
|
||||
StateChanged: true,
|
||||
UnixMilli: ts.UnixMilli(),
|
||||
Labels: model.LabelsString(labelsJSON),
|
||||
Fingerprint: a.QueryResultLables.Hash(),
|
||||
Labels: rulestatehistorytypes.LabelsString(labelsJSON),
|
||||
Fingerprint: a.QueryResultLabels.Hash(),
|
||||
Value: a.Value,
|
||||
})
|
||||
}
|
||||
|
||||
// We need to change firing alert to recovering if the returned sample meets recovery threshold
|
||||
changeAlertingToRecovering := a.State == model.StateFiring && a.IsRecovering
|
||||
changeAlertingToRecovering := a.State == ruletypes.StateFiring && a.IsRecovering
|
||||
// We need to change recovering alerts to firing if the returned sample meets target threshold
|
||||
changeRecoveringToFiring := a.State == model.StateRecovering && !a.IsRecovering && !a.Missing
|
||||
changeRecoveringToFiring := a.State == ruletypes.StateRecovering && !a.IsRecovering && !a.Missing
|
||||
// in any of the above case we need to update the status of alert
|
||||
if changeAlertingToRecovering || changeRecoveringToFiring {
|
||||
state := model.StateRecovering
|
||||
state := ruletypes.StateRecovering
|
||||
if changeRecoveringToFiring {
|
||||
state = model.StateFiring
|
||||
state = ruletypes.StateFiring
|
||||
}
|
||||
a.State = state
|
||||
r.logger.DebugContext(ctx, "converting alert state", "name", r.Name(), "state", state)
|
||||
itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
|
||||
itemsToAdd = append(itemsToAdd, rulestatehistorytypes.RuleStateHistory{
|
||||
RuleID: r.ID(),
|
||||
RuleName: r.Name(),
|
||||
State: state,
|
||||
StateChanged: true,
|
||||
UnixMilli: ts.UnixMilli(),
|
||||
Labels: model.LabelsString(labelsJSON),
|
||||
Fingerprint: a.QueryResultLables.Hash(),
|
||||
Labels: rulestatehistorytypes.LabelsString(labelsJSON),
|
||||
Fingerprint: a.QueryResultLabels.Hash(),
|
||||
Value: a.Value,
|
||||
})
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,35 +0,0 @@
|
||||
package times
|
||||
|
||||
import (
|
||||
"math"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
// MinimumTick is the minimum supported time resolution. This has to be
|
||||
// at least time.Second in order for the code below to work.
|
||||
minimumTick = time.Millisecond
|
||||
// second is the Time duration equivalent to one second.
|
||||
second = int64(time.Second / minimumTick)
|
||||
// The number of nanoseconds per minimum tick.
|
||||
nanosPerTick = int64(minimumTick / time.Nanosecond)
|
||||
|
||||
// Earliest is the earliest Time representable. Handy for
|
||||
// initializing a high watermark.
|
||||
Earliest = Time(math.MinInt64)
|
||||
// Latest is the latest Time representable. Handy for initializing
|
||||
// a low watermark.
|
||||
Latest = Time(math.MaxInt64)
|
||||
)
|
||||
|
||||
type Time int64
|
||||
|
||||
// TimeFromUnixNano returns the Time equivalent to the Unix Time
|
||||
// t provided in nanoseconds.
|
||||
func TimeFromUnixNano(t int64) Time {
|
||||
return Time(t / nanosPerTick)
|
||||
}
|
||||
|
||||
func (t Time) Time() time.Time {
|
||||
return time.Unix(int64(t)/second, (int64(t)%second)*nanosPerTick)
|
||||
}
|
||||
@@ -1,13 +0,0 @@
|
||||
package timestamp
|
||||
|
||||
import "time"
|
||||
|
||||
// FromTime returns a new millisecond timestamp from a time.
|
||||
func FromTime(t time.Time) int64 {
|
||||
return t.Unix()*1000 + int64(t.Nanosecond())/int64(time.Millisecond)
|
||||
}
|
||||
|
||||
// Time returns a new time.Time object from a millisecond timestamp.
|
||||
func Time(ts int64) time.Time {
|
||||
return time.Unix(ts/1000, (ts%1000)*int64(time.Millisecond))
|
||||
}
|
||||
@@ -139,8 +139,7 @@ func (r *rule) GetStoredRulesByMetricName(ctx context.Context, orgID string, met
|
||||
|
||||
// Check conditions: must be metric-based alert with valid composite query
|
||||
if ruleData.AlertType != ruletypes.AlertTypeMetric ||
|
||||
ruleData.RuleCondition == nil ||
|
||||
ruleData.RuleCondition.CompositeQuery == nil {
|
||||
ruleData.RuleCondition == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
|
||||
@@ -96,6 +96,21 @@ type TimeSeries struct {
|
||||
Values []*TimeSeriesValue `json:"values"`
|
||||
}
|
||||
|
||||
// EvaluableValues returns only the values where Partial is false and value is not NaN or +/- Inf.
|
||||
// TODO(srikanthccv): should we skip them in the consume.go?
|
||||
func (ts *TimeSeries) EvaluableValues() []*TimeSeriesValue {
|
||||
if ts == nil {
|
||||
return nil
|
||||
}
|
||||
result := make([]*TimeSeriesValue, 0, len(ts.Values))
|
||||
for _, v := range ts.Values {
|
||||
if !v.Partial && !math.IsNaN(v.Value) && !math.IsInf(v.Value, 0) {
|
||||
result = append(result, v)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
type Label struct {
|
||||
Key telemetrytypes.TelemetryFieldKey `json:"key"`
|
||||
Value any `json:"value"`
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
package rulestatehistorytypes
|
||||
|
||||
import qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
|
||||
import (
|
||||
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
|
||||
"github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
)
|
||||
|
||||
// PostableRuleStateHistoryBaseQuery defines URL query params common across v2 rule history APIs.
|
||||
type PostableRuleStateHistoryBaseQuery struct {
|
||||
@@ -12,7 +15,7 @@ type PostableRuleStateHistoryBaseQuery struct {
|
||||
type PostableRuleStateHistoryTimelineQuery struct {
|
||||
Start int64 `query:"start" required:"true"`
|
||||
End int64 `query:"end" required:"true"`
|
||||
State AlertState `query:"state"`
|
||||
State ruletypes.AlertState `query:"state"`
|
||||
FilterExpression string `query:"filterExpression"`
|
||||
Limit int64 `query:"limit"`
|
||||
Order qbtypes.OrderDirection `query:"order"`
|
||||
|
||||
@@ -3,12 +3,13 @@ package rulestatehistorytypes
|
||||
import (
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
|
||||
"github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
)
|
||||
|
||||
type Query struct {
|
||||
Start int64
|
||||
End int64
|
||||
State AlertState
|
||||
State ruletypes.AlertState
|
||||
FilterExpression qbtypes.Filter
|
||||
Limit int64
|
||||
Offset int64
|
||||
|
||||
@@ -2,6 +2,7 @@ package rulestatehistorytypes
|
||||
|
||||
import (
|
||||
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
|
||||
"github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
)
|
||||
|
||||
type GettableRuleStateTimeline struct {
|
||||
@@ -11,16 +12,16 @@ type GettableRuleStateTimeline struct {
|
||||
}
|
||||
|
||||
type GettableRuleStateHistory struct {
|
||||
RuleID string `json:"ruleID" required:"true"`
|
||||
RuleName string `json:"ruleName" required:"true"`
|
||||
OverallState AlertState `json:"overallState" required:"true"`
|
||||
OverallStateChanged bool `json:"overallStateChanged" required:"true"`
|
||||
State AlertState `json:"state" required:"true"`
|
||||
StateChanged bool `json:"stateChanged" required:"true"`
|
||||
UnixMilli int64 `json:"unixMilli" required:"true"`
|
||||
Labels []*qbtypes.Label `json:"labels" required:"true"`
|
||||
Fingerprint uint64 `json:"fingerprint" required:"true"`
|
||||
Value float64 `json:"value" required:"true"`
|
||||
RuleID string `json:"ruleId" required:"true"`
|
||||
RuleName string `json:"ruleName" required:"true"`
|
||||
OverallState ruletypes.AlertState `json:"overallState" required:"true"`
|
||||
OverallStateChanged bool `json:"overallStateChanged" required:"true"`
|
||||
State ruletypes.AlertState `json:"state" required:"true"`
|
||||
StateChanged bool `json:"stateChanged" required:"true"`
|
||||
UnixMilli int64 `json:"unixMilli" required:"true"`
|
||||
Labels []*qbtypes.Label `json:"labels" required:"true"`
|
||||
Fingerprint uint64 `json:"fingerprint" required:"true"`
|
||||
Value float64 `json:"value" required:"true"`
|
||||
}
|
||||
|
||||
type GettableRuleStateHistoryContributor struct {
|
||||
@@ -32,9 +33,9 @@ type GettableRuleStateHistoryContributor struct {
|
||||
}
|
||||
|
||||
type GettableRuleStateWindow struct {
|
||||
State AlertState `json:"state" ch:"state" required:"true"`
|
||||
Start int64 `json:"start" ch:"start" required:"true"`
|
||||
End int64 `json:"end" ch:"end" required:"true"`
|
||||
State ruletypes.AlertState `json:"state" ch:"state" required:"true"`
|
||||
Start int64 `json:"start" ch:"start" required:"true"`
|
||||
End int64 `json:"end" ch:"end" required:"true"`
|
||||
}
|
||||
|
||||
type GettableRuleStateHistoryStats struct {
|
||||
|
||||
@@ -9,36 +9,12 @@ import (
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
|
||||
"github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
)
|
||||
|
||||
type AlertState struct {
|
||||
valuer.String
|
||||
}
|
||||
|
||||
var (
|
||||
StateInactive = AlertState{valuer.NewString("inactive")}
|
||||
StatePending = AlertState{valuer.NewString("pending")}
|
||||
StateRecovering = AlertState{valuer.NewString("recovering")}
|
||||
StateFiring = AlertState{valuer.NewString("firing")}
|
||||
StateNoData = AlertState{valuer.NewString("nodata")}
|
||||
StateDisabled = AlertState{valuer.NewString("disabled")}
|
||||
)
|
||||
|
||||
type LabelsString string
|
||||
|
||||
func (AlertState) Enum() []any {
|
||||
return []any{
|
||||
StateInactive,
|
||||
StatePending,
|
||||
StateRecovering,
|
||||
StateFiring,
|
||||
StateNoData,
|
||||
StateDisabled,
|
||||
}
|
||||
}
|
||||
|
||||
func (l *LabelsString) Scan(src any) error {
|
||||
switch data := src.(type) {
|
||||
case nil:
|
||||
@@ -90,15 +66,15 @@ type RuleStateHistory struct {
|
||||
RuleID string `ch:"rule_id"`
|
||||
RuleName string `ch:"rule_name"`
|
||||
|
||||
OverallState AlertState `ch:"overall_state"`
|
||||
OverallStateChanged bool `ch:"overall_state_changed"`
|
||||
OverallState ruletypes.AlertState `ch:"overall_state"`
|
||||
OverallStateChanged bool `ch:"overall_state_changed"`
|
||||
|
||||
State AlertState `ch:"state"`
|
||||
StateChanged bool `ch:"state_changed"`
|
||||
UnixMilli int64 `ch:"unix_milli"`
|
||||
Labels LabelsString `ch:"labels"`
|
||||
Fingerprint uint64 `ch:"fingerprint"`
|
||||
Value float64 `ch:"value"`
|
||||
State ruletypes.AlertState `ch:"state"`
|
||||
StateChanged bool `ch:"state_changed"`
|
||||
UnixMilli int64 `ch:"unix_milli"`
|
||||
Labels LabelsString `ch:"labels"`
|
||||
Fingerprint uint64 `ch:"fingerprint"`
|
||||
Value float64 `ch:"value"`
|
||||
}
|
||||
|
||||
type RuleStateHistoryContributor struct {
|
||||
|
||||
40
pkg/types/ruletypes/alert_state.go
Normal file
40
pkg/types/ruletypes/alert_state.go
Normal file
@@ -0,0 +1,40 @@
|
||||
package ruletypes
|
||||
|
||||
import "github.com/SigNoz/signoz/pkg/valuer"
|
||||
|
||||
type AlertState struct {
|
||||
valuer.String
|
||||
}
|
||||
|
||||
var (
|
||||
StateInactive = AlertState{valuer.NewString("inactive")}
|
||||
StatePending = AlertState{valuer.NewString("pending")}
|
||||
StateRecovering = AlertState{valuer.NewString("recovering")}
|
||||
StateFiring = AlertState{valuer.NewString("firing")}
|
||||
StateNoData = AlertState{valuer.NewString("nodata")}
|
||||
StateDisabled = AlertState{valuer.NewString("disabled")}
|
||||
)
|
||||
|
||||
func (AlertState) Enum() []any {
|
||||
return []any{
|
||||
StateInactive,
|
||||
StatePending,
|
||||
StateRecovering,
|
||||
StateFiring,
|
||||
StateNoData,
|
||||
StateDisabled,
|
||||
}
|
||||
}
|
||||
|
||||
var alertStateSeverity = map[AlertState]int{
|
||||
StateInactive: 0,
|
||||
StatePending: 1,
|
||||
StateRecovering: 2,
|
||||
StateFiring: 3,
|
||||
StateNoData: 4,
|
||||
StateDisabled: 5,
|
||||
}
|
||||
|
||||
func (a AlertState) Severity() int {
|
||||
return alertStateSeverity[a]
|
||||
}
|
||||
@@ -8,44 +8,24 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/query-service/model"
|
||||
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/labels"
|
||||
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
)
|
||||
|
||||
// this file contains common structs and methods used by
|
||||
// rule engine
|
||||
|
||||
const (
|
||||
// how long before re-sending the alert.
|
||||
ResolvedRetention = 15 * time.Minute
|
||||
TestAlertPostFix = "_TEST_ALERT"
|
||||
)
|
||||
|
||||
type RuleType string
|
||||
|
||||
const (
|
||||
RuleTypeThreshold = "threshold_rule"
|
||||
RuleTypeProm = "promql_rule"
|
||||
RuleTypeAnomaly = "anomaly_rule"
|
||||
)
|
||||
|
||||
type RuleHealth string
|
||||
|
||||
const (
|
||||
HealthUnknown RuleHealth = "unknown"
|
||||
HealthGood RuleHealth = "ok"
|
||||
HealthBad RuleHealth = "err"
|
||||
AlertTimeFormat = "2006-01-02 15:04:05"
|
||||
)
|
||||
|
||||
type Alert struct {
|
||||
State model.AlertState
|
||||
State AlertState
|
||||
|
||||
Labels labels.BaseLabels
|
||||
Annotations labels.BaseLabels
|
||||
Labels Labels
|
||||
Annotations Labels
|
||||
|
||||
QueryResultLables labels.BaseLabels
|
||||
QueryResultLabels Labels
|
||||
|
||||
GeneratorURL string
|
||||
|
||||
@@ -63,8 +43,13 @@ type Alert struct {
|
||||
IsRecovering bool
|
||||
}
|
||||
|
||||
type NamedAlert struct {
|
||||
Name string
|
||||
*Alert
|
||||
}
|
||||
|
||||
func (a *Alert) NeedsSending(ts time.Time, resendDelay time.Duration) bool {
|
||||
if a.State == model.StatePending {
|
||||
if a.State == StatePending {
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -76,153 +61,118 @@ func (a *Alert) NeedsSending(ts time.Time, resendDelay time.Duration) bool {
|
||||
return a.LastSentAt.Add(resendDelay).Before(ts)
|
||||
}
|
||||
|
||||
type NamedAlert struct {
|
||||
Name string
|
||||
*Alert
|
||||
type PanelType struct {
|
||||
valuer.String
|
||||
}
|
||||
|
||||
type CompareOp string
|
||||
|
||||
const (
|
||||
CompareOpNone CompareOp = "0"
|
||||
ValueIsAbove CompareOp = "1"
|
||||
ValueIsBelow CompareOp = "2"
|
||||
ValueIsEq CompareOp = "3"
|
||||
ValueIsNotEq CompareOp = "4"
|
||||
ValueAboveOrEq CompareOp = "5"
|
||||
ValueBelowOrEq CompareOp = "6"
|
||||
ValueOutsideBounds CompareOp = "7"
|
||||
var (
|
||||
PanelTypeValue = PanelType{valuer.NewString("value")}
|
||||
PanelTypeTable = PanelType{valuer.NewString("table")}
|
||||
PanelTypeGraph = PanelType{valuer.NewString("graph")}
|
||||
)
|
||||
|
||||
type MatchType string
|
||||
// Note: this is used to represent the state of the alert query
|
||||
// i.e the active tab which should be used to represent the selection
|
||||
|
||||
const (
|
||||
MatchTypeNone MatchType = "0"
|
||||
AtleastOnce MatchType = "1"
|
||||
AllTheTimes MatchType = "2"
|
||||
OnAverage MatchType = "3"
|
||||
InTotal MatchType = "4"
|
||||
Last MatchType = "5"
|
||||
type QueryType struct {
|
||||
valuer.String
|
||||
}
|
||||
|
||||
var (
|
||||
QueryTypeBuilder = QueryType{String: valuer.NewString("builder")}
|
||||
QueryTypeClickHouseSQL = QueryType{valuer.NewString("clickhouse_sql")}
|
||||
QueryTypePromQL = QueryType{valuer.NewString("promql")}
|
||||
)
|
||||
|
||||
type AlertCompositeQuery struct {
|
||||
Queries []qbtypes.QueryEnvelope `json:"queries"`
|
||||
|
||||
PanelType PanelType `json:"panelType"`
|
||||
QueryType QueryType `json:"queryType"`
|
||||
// Unit for the time series data shown in the graph
|
||||
// This is used to format the value and threshold
|
||||
Unit string `json:"unit,omitempty"`
|
||||
}
|
||||
|
||||
type RuleCondition struct {
|
||||
CompositeQuery *v3.CompositeQuery `json:"compositeQuery,omitempty"`
|
||||
CompareOp CompareOp `json:"op,omitempty"`
|
||||
Target *float64 `json:"target,omitempty"`
|
||||
AlertOnAbsent bool `json:"alertOnAbsent,omitempty"`
|
||||
AbsentFor uint64 `json:"absentFor,omitempty"`
|
||||
MatchType MatchType `json:"matchType,omitempty"`
|
||||
TargetUnit string `json:"targetUnit,omitempty"`
|
||||
Algorithm string `json:"algorithm,omitempty"`
|
||||
Seasonality string `json:"seasonality,omitempty"`
|
||||
SelectedQuery string `json:"selectedQueryName,omitempty"`
|
||||
RequireMinPoints bool `json:"requireMinPoints,omitempty"`
|
||||
RequiredNumPoints int `json:"requiredNumPoints,omitempty"`
|
||||
Thresholds *RuleThresholdData `json:"thresholds,omitempty"`
|
||||
CompositeQuery *AlertCompositeQuery `json:"compositeQuery"`
|
||||
CompareOperator CompareOperator `json:"op"`
|
||||
Target *float64 `json:"target,omitempty"`
|
||||
AlertOnAbsent bool `json:"alertOnAbsent,omitempty"`
|
||||
AbsentFor uint64 `json:"absentFor,omitempty"`
|
||||
MatchType MatchType `json:"matchType"`
|
||||
TargetUnit string `json:"targetUnit,omitempty"`
|
||||
Algorithm string `json:"algorithm,omitempty"`
|
||||
Seasonality string `json:"seasonality,omitempty"`
|
||||
SelectedQuery string `json:"selectedQueryName,omitempty"`
|
||||
RequireMinPoints bool `json:"requireMinPoints,omitempty"`
|
||||
RequiredNumPoints int `json:"requiredNumPoints,omitempty"`
|
||||
Thresholds *RuleThresholdData `json:"thresholds,omitempty"`
|
||||
}
|
||||
|
||||
func (rc *RuleCondition) GetSelectedQueryName() string {
|
||||
if rc != nil {
|
||||
if rc.SelectedQuery != "" {
|
||||
return rc.SelectedQuery
|
||||
}
|
||||
func (rc *RuleCondition) SelectedQueryName() string {
|
||||
|
||||
queryNames := map[string]struct{}{}
|
||||
queryNames := map[string]struct{}{}
|
||||
|
||||
if rc.CompositeQuery != nil {
|
||||
if rc.QueryType() == v3.QueryTypeBuilder {
|
||||
for name := range rc.CompositeQuery.BuilderQueries {
|
||||
queryNames[name] = struct{}{}
|
||||
}
|
||||
|
||||
for _, query := range rc.CompositeQuery.Queries {
|
||||
switch spec := query.Spec.(type) {
|
||||
case qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation]:
|
||||
queryNames[spec.Name] = struct{}{}
|
||||
case qbtypes.QueryBuilderQuery[qbtypes.LogAggregation]:
|
||||
queryNames[spec.Name] = struct{}{}
|
||||
case qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]:
|
||||
queryNames[spec.Name] = struct{}{}
|
||||
case qbtypes.QueryBuilderFormula:
|
||||
queryNames[spec.Name] = struct{}{}
|
||||
}
|
||||
}
|
||||
} else if rc.QueryType() == v3.QueryTypeClickHouseSQL {
|
||||
for name := range rc.CompositeQuery.ClickHouseQueries {
|
||||
queryNames[name] = struct{}{}
|
||||
}
|
||||
|
||||
for _, query := range rc.CompositeQuery.Queries {
|
||||
switch spec := query.Spec.(type) {
|
||||
case qbtypes.ClickHouseQuery:
|
||||
queryNames[spec.Name] = struct{}{}
|
||||
}
|
||||
}
|
||||
for _, query := range rc.CompositeQuery.Queries {
|
||||
switch spec := query.Spec.(type) {
|
||||
case qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation]:
|
||||
if !spec.Disabled {
|
||||
queryNames[spec.Name] = struct{}{}
|
||||
}
|
||||
case qbtypes.QueryBuilderQuery[qbtypes.LogAggregation]:
|
||||
if !spec.Disabled {
|
||||
queryNames[spec.Name] = struct{}{}
|
||||
}
|
||||
case qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]:
|
||||
if !spec.Disabled {
|
||||
queryNames[spec.Name] = struct{}{}
|
||||
}
|
||||
case qbtypes.QueryBuilderFormula:
|
||||
if !spec.Disabled {
|
||||
queryNames[spec.Name] = struct{}{}
|
||||
}
|
||||
case qbtypes.ClickHouseQuery:
|
||||
if !spec.Disabled {
|
||||
queryNames[spec.Name] = struct{}{}
|
||||
}
|
||||
case qbtypes.PromQuery:
|
||||
if !spec.Disabled {
|
||||
queryNames[spec.Name] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
// The following logic exists for backward compatibility
|
||||
// If there is no selected query, then
|
||||
// - check if F1 is present, if yes, return F1
|
||||
// - else return the query with max ascii value
|
||||
// this logic is not really correct. we should be considering
|
||||
// whether the query is enabled or not. but this is a temporary
|
||||
// fix to support backward compatibility
|
||||
if _, ok := queryNames["F1"]; ok {
|
||||
return "F1"
|
||||
}
|
||||
keys := make([]string, 0, len(queryNames))
|
||||
for k := range queryNames {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
return keys[len(keys)-1]
|
||||
}
|
||||
// This should never happen
|
||||
return ""
|
||||
|
||||
// The following logic exists for backward compatibility
|
||||
// If there is no selected query, then
|
||||
// - check if F1 is present, if yes, return F1
|
||||
// - else return the query with max ascii value
|
||||
if _, ok := queryNames["F1"]; ok {
|
||||
return "F1"
|
||||
}
|
||||
keys := make([]string, 0, len(queryNames))
|
||||
for k := range queryNames {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
return keys[len(keys)-1]
|
||||
}
|
||||
|
||||
func (rc *RuleCondition) IsValid() bool {
|
||||
|
||||
if rc.CompositeQuery == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
if rc.QueryType() == v3.QueryTypeBuilder {
|
||||
if rc.Thresholds == nil {
|
||||
return false
|
||||
}
|
||||
}
|
||||
if rc.QueryType() == v3.QueryTypePromQL {
|
||||
|
||||
if len(rc.CompositeQuery.PromQueries) == 0 && len(rc.CompositeQuery.Queries) == 0 {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// ShouldEval checks if the further series should be evaluated at all for alerts.
|
||||
func (rc *RuleCondition) ShouldEval(series *v3.Series) bool {
|
||||
if rc == nil {
|
||||
return true
|
||||
}
|
||||
return !rc.RequireMinPoints || len(series.Points) >= rc.RequiredNumPoints
|
||||
func (rc *RuleCondition) ShouldEval(series *qbtypes.TimeSeries) bool {
|
||||
return !rc.RequireMinPoints || len(series.Values) >= rc.RequiredNumPoints
|
||||
}
|
||||
|
||||
// QueryType is a shorthand method to get query type.
|
||||
func (rc *RuleCondition) QueryType() v3.QueryType {
|
||||
if rc.CompositeQuery != nil {
|
||||
return rc.CompositeQuery.QueryType
|
||||
}
|
||||
return v3.QueryTypeUnknown
|
||||
func (rc *RuleCondition) QueryType() QueryType {
|
||||
return rc.CompositeQuery.QueryType
|
||||
}
|
||||
|
||||
// String is useful in printing rule condition in logs.
|
||||
func (rc *RuleCondition) String() string {
|
||||
if rc == nil {
|
||||
return ""
|
||||
}
|
||||
data, _ := json.Marshal(*rc)
|
||||
return string(data)
|
||||
}
|
||||
@@ -230,7 +180,7 @@ func (rc *RuleCondition) String() string {
|
||||
// PrepareRuleGeneratorURL creates an appropriate url for the rule. The URL is
|
||||
// sent in Slack messages as well as to other systems and allows backtracking
|
||||
// to the rule definition from the third party systems.
|
||||
func PrepareRuleGeneratorURL(ruleId string, source string) string {
|
||||
func PrepareRuleGeneratorURL(ruleID string, source string) string {
|
||||
if source == "" {
|
||||
return source
|
||||
}
|
||||
@@ -246,7 +196,7 @@ func PrepareRuleGeneratorURL(ruleId string, source string) string {
|
||||
|
||||
hasNew := strings.LastIndex(source, "new")
|
||||
if hasNew > -1 {
|
||||
ruleURL := fmt.Sprintf("%sedit?ruleId=%s", source[0:hasNew], ruleId)
|
||||
ruleURL := fmt.Sprintf("%sedit?ruleId=%s", source[0:hasNew], ruleID)
|
||||
return ruleURL
|
||||
}
|
||||
|
||||
@@ -255,7 +205,7 @@ func PrepareRuleGeneratorURL(ruleId string, source string) string {
|
||||
// mainly to keep the URL short and lower the alert body contents
|
||||
// The generator URL with /alerts/edit?ruleId= is enough
|
||||
if parsedSource.Port() != "" {
|
||||
return fmt.Sprintf("%s://%s:%s/alerts/edit?ruleId=%s", parsedSource.Scheme, parsedSource.Hostname(), parsedSource.Port(), ruleId)
|
||||
return fmt.Sprintf("%s://%s:%s/alerts/edit?ruleId=%s", parsedSource.Scheme, parsedSource.Hostname(), parsedSource.Port(), ruleID)
|
||||
}
|
||||
return fmt.Sprintf("%s://%s/alerts/edit?ruleId=%s", parsedSource.Scheme, parsedSource.Hostname(), ruleId)
|
||||
return fmt.Sprintf("%s://%s/alerts/edit?ruleId=%s", parsedSource.Scheme, parsedSource.Hostname(), ruleID)
|
||||
}
|
||||
|
||||
@@ -10,11 +10,7 @@ import (
|
||||
|
||||
"github.com/prometheus/alertmanager/config"
|
||||
|
||||
signozError "github.com/SigNoz/signoz/pkg/errors"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/model"
|
||||
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/times"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/timestamp"
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
)
|
||||
@@ -40,7 +36,7 @@ const (
|
||||
|
||||
// PostableRule is used to create alerting rule from HTTP api.
|
||||
type PostableRule struct {
|
||||
AlertName string `json:"alert,omitempty"`
|
||||
AlertName string `json:"alert"`
|
||||
AlertType AlertType `json:"alertType,omitempty"`
|
||||
Description string `json:"description,omitempty"`
|
||||
RuleType RuleType `json:"ruleType,omitempty"`
|
||||
@@ -77,17 +73,17 @@ type NotificationSettings struct {
|
||||
type Renotify struct {
|
||||
Enabled bool `json:"enabled"`
|
||||
ReNotifyInterval valuer.TextDuration `json:"interval,omitzero"`
|
||||
AlertStates []model.AlertState `json:"alertStates,omitempty"`
|
||||
AlertStates []AlertState `json:"alertStates,omitempty"`
|
||||
}
|
||||
|
||||
func (ns *NotificationSettings) GetAlertManagerNotificationConfig() alertmanagertypes.NotificationConfig {
|
||||
var renotifyInterval time.Duration
|
||||
var noDataRenotifyInterval time.Duration
|
||||
if ns.Renotify.Enabled {
|
||||
if slices.Contains(ns.Renotify.AlertStates, model.StateNoData) {
|
||||
if slices.Contains(ns.Renotify.AlertStates, StateNoData) {
|
||||
noDataRenotifyInterval = ns.Renotify.ReNotifyInterval.Duration()
|
||||
}
|
||||
if slices.Contains(ns.Renotify.AlertStates, model.StateFiring) {
|
||||
if slices.Contains(ns.Renotify.AlertStates, StateFiring) {
|
||||
renotifyInterval = ns.Renotify.ReNotifyInterval.Duration()
|
||||
}
|
||||
} else {
|
||||
@@ -97,7 +93,7 @@ func (ns *NotificationSettings) GetAlertManagerNotificationConfig() alertmanager
|
||||
return alertmanagertypes.NewNotificationConfig(ns.GroupBy, renotifyInterval, noDataRenotifyInterval, ns.UsePolicy)
|
||||
}
|
||||
|
||||
func (r *PostableRule) GetRuleRouteRequest(ruleId string) ([]*alertmanagertypes.PostableRoutePolicy, error) {
|
||||
func (r *PostableRule) GetRuleRouteRequest(ruleID string) ([]*alertmanagertypes.PostableRoutePolicy, error) {
|
||||
threshold, err := r.RuleCondition.Thresholds.GetRuleThreshold()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -105,20 +101,20 @@ func (r *PostableRule) GetRuleRouteRequest(ruleId string) ([]*alertmanagertypes.
|
||||
receivers := threshold.GetRuleReceivers()
|
||||
routeRequests := make([]*alertmanagertypes.PostableRoutePolicy, 0)
|
||||
for _, receiver := range receivers {
|
||||
expression := fmt.Sprintf(`%s == "%s" && %s == "%s"`, LabelThresholdName, receiver.Name, LabelRuleId, ruleId)
|
||||
expression := fmt.Sprintf(`%s == "%s" && %s == "%s"`, LabelThresholdName, receiver.Name, LabelRuleID, ruleID)
|
||||
routeRequests = append(routeRequests, &alertmanagertypes.PostableRoutePolicy{
|
||||
Expression: expression,
|
||||
ExpressionKind: alertmanagertypes.RuleBasedExpression,
|
||||
Channels: receiver.Channels,
|
||||
Name: ruleId,
|
||||
Description: fmt.Sprintf("Auto-generated route for rule %s", ruleId),
|
||||
Name: ruleID,
|
||||
Description: fmt.Sprintf("Auto-generated route for rule %s", ruleID),
|
||||
Tags: []string{"auto-generated", "rule-based"},
|
||||
})
|
||||
}
|
||||
return routeRequests, nil
|
||||
}
|
||||
|
||||
func (r *PostableRule) GetInhibitRules(ruleId string) ([]config.InhibitRule, error) {
|
||||
func (r *PostableRule) GetInhibitRules(ruleID string) ([]config.InhibitRule, error) {
|
||||
threshold, err := r.RuleCondition.Thresholds.GetRuleThreshold()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -139,8 +135,8 @@ func (r *PostableRule) GetInhibitRules(ruleId string) ([]config.InhibitRule, err
|
||||
Value: receivers[i].Name,
|
||||
},
|
||||
{
|
||||
Name: LabelRuleId,
|
||||
Value: ruleId,
|
||||
Name: LabelRuleID,
|
||||
Value: ruleID,
|
||||
},
|
||||
},
|
||||
TargetMatchers: config.Matchers{
|
||||
@@ -149,8 +145,8 @@ func (r *PostableRule) GetInhibitRules(ruleId string) ([]config.InhibitRule, err
|
||||
Value: receivers[i+1].Name,
|
||||
},
|
||||
{
|
||||
Name: LabelRuleId,
|
||||
Value: ruleId,
|
||||
Name: LabelRuleID,
|
||||
Value: ruleID,
|
||||
},
|
||||
},
|
||||
Equal: groups,
|
||||
@@ -174,8 +170,8 @@ func (ns *NotificationSettings) UnmarshalJSON(data []byte) error {
|
||||
|
||||
// Validate states after unmarshaling
|
||||
for _, state := range ns.Renotify.AlertStates {
|
||||
if state != model.StateFiring && state != model.StateNoData {
|
||||
return signozError.NewInvalidInputf(signozError.CodeInvalidInput, "invalid alert state: %s", state)
|
||||
if state != StateFiring && state != StateNoData {
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid alert state: %s", state)
|
||||
|
||||
}
|
||||
}
|
||||
@@ -185,7 +181,6 @@ func (ns *NotificationSettings) UnmarshalJSON(data []byte) error {
|
||||
// processRuleDefaults applies the default values
|
||||
// for the rule options that are blank or unset.
|
||||
func (r *PostableRule) processRuleDefaults() {
|
||||
|
||||
if r.SchemaVersion == "" {
|
||||
r.SchemaVersion = DefaultSchemaVersion
|
||||
}
|
||||
@@ -200,21 +195,14 @@ func (r *PostableRule) processRuleDefaults() {
|
||||
|
||||
if r.RuleCondition != nil {
|
||||
switch r.RuleCondition.CompositeQuery.QueryType {
|
||||
case v3.QueryTypeBuilder:
|
||||
if r.RuleType == "" {
|
||||
case QueryTypeBuilder:
|
||||
if r.RuleType.IsZero() {
|
||||
r.RuleType = RuleTypeThreshold
|
||||
}
|
||||
case v3.QueryTypePromQL:
|
||||
case QueryTypePromQL:
|
||||
r.RuleType = RuleTypeProm
|
||||
}
|
||||
|
||||
for qLabel, q := range r.RuleCondition.CompositeQuery.BuilderQueries {
|
||||
if q.AggregateAttribute.Key != "" && q.Expression == "" {
|
||||
q.Expression = qLabel
|
||||
}
|
||||
}
|
||||
|
||||
//added alerts v2 fields
|
||||
if r.SchemaVersion == DefaultSchemaVersion {
|
||||
thresholdName := CriticalThresholdName
|
||||
if r.Labels != nil {
|
||||
@@ -225,7 +213,7 @@ func (r *PostableRule) processRuleDefaults() {
|
||||
|
||||
// For anomaly detection with ValueIsBelow, negate the target
|
||||
targetValue := r.RuleCondition.Target
|
||||
if r.RuleType == RuleTypeAnomaly && r.RuleCondition.CompareOp == ValueIsBelow && targetValue != nil {
|
||||
if r.RuleType == RuleTypeAnomaly && r.RuleCondition.CompareOperator == ValueIsBelow && targetValue != nil {
|
||||
negated := -1 * *targetValue
|
||||
targetValue = &negated
|
||||
}
|
||||
@@ -233,12 +221,12 @@ func (r *PostableRule) processRuleDefaults() {
|
||||
thresholdData := RuleThresholdData{
|
||||
Kind: BasicThresholdKind,
|
||||
Spec: BasicRuleThresholds{{
|
||||
Name: thresholdName,
|
||||
TargetUnit: r.RuleCondition.TargetUnit,
|
||||
TargetValue: targetValue,
|
||||
MatchType: r.RuleCondition.MatchType,
|
||||
CompareOp: r.RuleCondition.CompareOp,
|
||||
Channels: r.PreferredChannels,
|
||||
Name: thresholdName,
|
||||
TargetUnit: r.RuleCondition.TargetUnit,
|
||||
TargetValue: targetValue,
|
||||
MatchType: r.RuleCondition.MatchType,
|
||||
CompareOperator: r.RuleCondition.CompareOperator,
|
||||
Channels: r.PreferredChannels,
|
||||
}},
|
||||
}
|
||||
r.RuleCondition.Thresholds = &thresholdData
|
||||
@@ -247,11 +235,11 @@ func (r *PostableRule) processRuleDefaults() {
|
||||
Renotify: Renotify{
|
||||
Enabled: true,
|
||||
ReNotifyInterval: valuer.MustParseTextDuration("4h"),
|
||||
AlertStates: []model.AlertState{model.StateFiring},
|
||||
AlertStates: []AlertState{StateFiring},
|
||||
},
|
||||
}
|
||||
if r.RuleCondition.AlertOnAbsent {
|
||||
r.NotificationSettings.Renotify.AlertStates = append(r.NotificationSettings.Renotify.AlertStates, model.StateNoData)
|
||||
r.NotificationSettings.Renotify.AlertStates = append(r.NotificationSettings.Renotify.AlertStates, StateNoData)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -282,7 +270,7 @@ func (r *PostableRule) UnmarshalJSON(bytes []byte) error {
|
||||
type Alias PostableRule
|
||||
aux := (*Alias)(r)
|
||||
if err := json.Unmarshal(bytes, aux); err != nil {
|
||||
return signozError.NewInvalidInputf(signozError.CodeInvalidInput, "failed to parse json: %v", err)
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "failed to parse json: %v", err)
|
||||
}
|
||||
r.processRuleDefaults()
|
||||
return r.validate()
|
||||
@@ -304,83 +292,36 @@ func isValidLabelValue(v string) bool {
|
||||
return utf8.ValidString(v)
|
||||
}
|
||||
|
||||
func isAllQueriesDisabled(compositeQuery *v3.CompositeQuery) bool {
|
||||
if compositeQuery == nil {
|
||||
return false
|
||||
}
|
||||
if compositeQuery.BuilderQueries == nil && compositeQuery.PromQueries == nil && compositeQuery.ClickHouseQueries == nil {
|
||||
return false
|
||||
}
|
||||
switch compositeQuery.QueryType {
|
||||
case v3.QueryTypeBuilder:
|
||||
if len(compositeQuery.BuilderQueries) == 0 {
|
||||
return false
|
||||
}
|
||||
for _, query := range compositeQuery.BuilderQueries {
|
||||
if !query.Disabled {
|
||||
return false
|
||||
}
|
||||
}
|
||||
case v3.QueryTypePromQL:
|
||||
if len(compositeQuery.PromQueries) == 0 {
|
||||
return false
|
||||
}
|
||||
for _, query := range compositeQuery.PromQueries {
|
||||
if !query.Disabled {
|
||||
return false
|
||||
}
|
||||
}
|
||||
case v3.QueryTypeClickHouseSQL:
|
||||
if len(compositeQuery.ClickHouseQueries) == 0 {
|
||||
return false
|
||||
}
|
||||
for _, query := range compositeQuery.ClickHouseQueries {
|
||||
if !query.Disabled {
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (r *PostableRule) validate() error {
|
||||
|
||||
var errs []error
|
||||
|
||||
if r.RuleCondition == nil {
|
||||
// will get panic if we try to access CompositeQuery, so return here
|
||||
return signozError.NewInvalidInputf(signozError.CodeInvalidInput, "rule condition is required")
|
||||
}
|
||||
if r.RuleCondition.CompositeQuery == nil {
|
||||
errs = append(errs, signozError.NewInvalidInputf(signozError.CodeInvalidInput, "composite query is required"))
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "rule condition is required")
|
||||
}
|
||||
|
||||
if r.Version != "v5" {
|
||||
errs = append(errs, signozError.NewInvalidInputf(signozError.CodeInvalidInput, "only version v5 is supported, got %q", r.Version))
|
||||
}
|
||||
|
||||
if isAllQueriesDisabled(r.RuleCondition.CompositeQuery) {
|
||||
errs = append(errs, signozError.NewInvalidInputf(signozError.CodeInvalidInput, "all queries are disabled in rule condition"))
|
||||
errs = append(errs, errors.NewInvalidInputf(errors.CodeInvalidInput, "only version v5 is supported, got %q", r.Version))
|
||||
}
|
||||
|
||||
for k, v := range r.Labels {
|
||||
if !isValidLabelName(k) {
|
||||
errs = append(errs, signozError.NewInvalidInputf(signozError.CodeInvalidInput, "invalid label name: %s", k))
|
||||
errs = append(errs, errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid label name: %s", k))
|
||||
}
|
||||
|
||||
if !isValidLabelValue(v) {
|
||||
errs = append(errs, signozError.NewInvalidInputf(signozError.CodeInvalidInput, "invalid label value: %s", v))
|
||||
errs = append(errs, errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid label value: %s", v))
|
||||
}
|
||||
}
|
||||
|
||||
for k := range r.Annotations {
|
||||
if !isValidLabelName(k) {
|
||||
errs = append(errs, signozError.NewInvalidInputf(signozError.CodeInvalidInput, "invalid annotation name: %s", k))
|
||||
errs = append(errs, errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid annotation name: %s", k))
|
||||
}
|
||||
}
|
||||
|
||||
errs = append(errs, testTemplateParsing(r)...)
|
||||
return signozError.Join(errs...)
|
||||
return errors.Join(errs...)
|
||||
}
|
||||
|
||||
func testTemplateParsing(rl *PostableRule) (errs []error) {
|
||||
@@ -398,7 +339,6 @@ func testTemplateParsing(rl *PostableRule) (errs []error) {
|
||||
defs+text,
|
||||
"__alert_"+rl.AlertName,
|
||||
tmplData,
|
||||
times.Time(timestamp.FromTime(time.Now())),
|
||||
nil,
|
||||
)
|
||||
return tmpl.ParseTest()
|
||||
@@ -408,7 +348,7 @@ func testTemplateParsing(rl *PostableRule) (errs []error) {
|
||||
for _, val := range rl.Labels {
|
||||
err := parseTest(val)
|
||||
if err != nil {
|
||||
errs = append(errs, signozError.NewInvalidInputf(signozError.CodeInvalidInput, "template parsing error: %s", err.Error()))
|
||||
errs = append(errs, errors.NewInvalidInputf(errors.CodeInvalidInput, "template parsing error: %s", err.Error()))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -416,7 +356,7 @@ func testTemplateParsing(rl *PostableRule) (errs []error) {
|
||||
for _, val := range rl.Annotations {
|
||||
err := parseTest(val)
|
||||
if err != nil {
|
||||
errs = append(errs, signozError.NewInvalidInputf(signozError.CodeInvalidInput, "template parsing error: %s", err.Error()))
|
||||
errs = append(errs, errors.NewInvalidInputf(errors.CodeInvalidInput, "template parsing error: %s", err.Error()))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -430,8 +370,8 @@ type GettableRules struct {
|
||||
|
||||
// GettableRule has info for an alerting rules.
|
||||
type GettableRule struct {
|
||||
Id string `json:"id"`
|
||||
State model.AlertState `json:"state"`
|
||||
Id string `json:"id"`
|
||||
State AlertState `json:"state"`
|
||||
PostableRule
|
||||
CreatedAt *time.Time `json:"createAt"`
|
||||
CreatedBy *string `json:"createBy"`
|
||||
|
||||
@@ -7,88 +7,10 @@ import (
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
|
||||
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
|
||||
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
|
||||
)
|
||||
|
||||
func TestIsAllQueriesDisabled(t *testing.T) {
|
||||
testCases := []*v3.CompositeQuery{
|
||||
{
|
||||
BuilderQueries: map[string]*v3.BuilderQuery{
|
||||
"query1": {
|
||||
Disabled: true,
|
||||
},
|
||||
"query2": {
|
||||
Disabled: true,
|
||||
},
|
||||
},
|
||||
QueryType: v3.QueryTypeBuilder,
|
||||
},
|
||||
nil,
|
||||
{
|
||||
QueryType: v3.QueryTypeBuilder,
|
||||
},
|
||||
{
|
||||
QueryType: v3.QueryTypeBuilder,
|
||||
BuilderQueries: map[string]*v3.BuilderQuery{
|
||||
"query1": {
|
||||
Disabled: true,
|
||||
},
|
||||
"query2": {
|
||||
Disabled: false,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
QueryType: v3.QueryTypePromQL,
|
||||
},
|
||||
{
|
||||
QueryType: v3.QueryTypePromQL,
|
||||
PromQueries: map[string]*v3.PromQuery{
|
||||
"query3": {
|
||||
Disabled: false,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
QueryType: v3.QueryTypePromQL,
|
||||
PromQueries: map[string]*v3.PromQuery{
|
||||
"query3": {
|
||||
Disabled: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
QueryType: v3.QueryTypeClickHouseSQL,
|
||||
},
|
||||
{
|
||||
QueryType: v3.QueryTypeClickHouseSQL,
|
||||
ClickHouseQueries: map[string]*v3.ClickHouseQuery{
|
||||
"query4": {
|
||||
Disabled: false,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
QueryType: v3.QueryTypeClickHouseSQL,
|
||||
ClickHouseQueries: map[string]*v3.ClickHouseQuery{
|
||||
"query4": {
|
||||
Disabled: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
expectedResult := []bool{true, false, false, false, false, false, true, false, false, true}
|
||||
|
||||
for index, compositeQuery := range testCases {
|
||||
expected := expectedResult[index]
|
||||
actual := isAllQueriesDisabled(compositeQuery)
|
||||
if actual != expected {
|
||||
t.Errorf("Expected %v, but got %v", expected, actual)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseIntoRule(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
@@ -179,9 +101,6 @@ func TestParseIntoRule(t *testing.T) {
|
||||
if rule.Frequency.Duration() != time.Minute {
|
||||
t.Errorf("Expected default frequency '1m', got '%v'", rule.Frequency)
|
||||
}
|
||||
if rule.RuleCondition.CompositeQuery.BuilderQueries["A"].Expression != "A" {
|
||||
t.Errorf("Expected expression 'A', got '%s'", rule.RuleCondition.CompositeQuery.BuilderQueries["A"].Expression)
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -317,7 +236,7 @@ func TestParseIntoRuleSchemaVersioning(t *testing.T) {
|
||||
if spec.MatchType != rule.RuleCondition.MatchType {
|
||||
t.Error("Expected MatchType to be copied from RuleCondition")
|
||||
}
|
||||
if spec.CompareOp != rule.RuleCondition.CompareOp {
|
||||
if spec.CompareOperator != rule.RuleCondition.CompareOperator {
|
||||
t.Error("Expected CompareOp to be copied from RuleCondition")
|
||||
}
|
||||
|
||||
@@ -630,9 +549,16 @@ func TestParseIntoRuleThresholdGeneration(t *testing.T) {
|
||||
}
|
||||
|
||||
// Test that threshold can evaluate properly
|
||||
vector, err := threshold.Eval(v3.Series{
|
||||
Points: []v3.Point{{Value: 0.15, Timestamp: 1000}}, // 150ms in seconds
|
||||
Labels: map[string]string{"test": "label"},
|
||||
vector, err := threshold.Eval(&qbtypes.TimeSeries{
|
||||
Values: []*qbtypes.TimeSeriesValue{{Value: 0.15, Timestamp: 1000}}, // 150ms in seconds
|
||||
Labels: []*qbtypes.Label{
|
||||
{
|
||||
Key: telemetrytypes.TelemetryFieldKey{
|
||||
Name: "test",
|
||||
},
|
||||
Value: "label",
|
||||
},
|
||||
},
|
||||
}, "", EvalData{})
|
||||
if err != nil {
|
||||
t.Fatalf("Unexpected error in shouldAlert: %v", err)
|
||||
@@ -708,9 +634,16 @@ func TestParseIntoRuleMultipleThresholds(t *testing.T) {
|
||||
}
|
||||
|
||||
// Test with a value that should trigger both WARNING and CRITICAL thresholds
|
||||
vector, err := threshold.Eval(v3.Series{
|
||||
Points: []v3.Point{{Value: 95.0, Timestamp: 1000}}, // 95% CPU usage
|
||||
Labels: map[string]string{"service": "test"},
|
||||
vector, err := threshold.Eval(&qbtypes.TimeSeries{
|
||||
Values: []*qbtypes.TimeSeriesValue{{Value: 95.0, Timestamp: 1000}}, // 95% CPU usage
|
||||
Labels: []*qbtypes.Label{
|
||||
{
|
||||
Key: telemetrytypes.TelemetryFieldKey{
|
||||
Name: "service",
|
||||
},
|
||||
Value: "test",
|
||||
},
|
||||
},
|
||||
}, "", EvalData{})
|
||||
if err != nil {
|
||||
t.Fatalf("Unexpected error in shouldAlert: %v", err)
|
||||
@@ -718,9 +651,16 @@ func TestParseIntoRuleMultipleThresholds(t *testing.T) {
|
||||
|
||||
assert.Equal(t, 2, len(vector))
|
||||
|
||||
vector, err = threshold.Eval(v3.Series{
|
||||
Points: []v3.Point{{Value: 75.0, Timestamp: 1000}}, // 75% CPU usage
|
||||
Labels: map[string]string{"service": "test"},
|
||||
vector, err = threshold.Eval(&qbtypes.TimeSeries{
|
||||
Values: []*qbtypes.TimeSeriesValue{{Value: 75.0, Timestamp: 1000}}, // 75% CPU usage
|
||||
Labels: []*qbtypes.Label{
|
||||
{
|
||||
Key: telemetrytypes.TelemetryFieldKey{
|
||||
Name: "service",
|
||||
},
|
||||
Value: "test",
|
||||
},
|
||||
},
|
||||
}, "", EvalData{})
|
||||
if err != nil {
|
||||
t.Fatalf("Unexpected error in shouldAlert: %v", err)
|
||||
@@ -733,7 +673,7 @@ func TestAnomalyNegationEval(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
ruleJSON []byte
|
||||
series v3.Series
|
||||
series *qbtypes.TimeSeries
|
||||
shouldAlert bool
|
||||
expectedValue float64
|
||||
}{
|
||||
@@ -762,9 +702,16 @@ func TestAnomalyNegationEval(t *testing.T) {
|
||||
"selectedQuery": "A"
|
||||
}
|
||||
}`),
|
||||
series: v3.Series{
|
||||
Labels: map[string]string{"host": "server1"},
|
||||
Points: []v3.Point{
|
||||
series: &qbtypes.TimeSeries{
|
||||
Labels: []*qbtypes.Label{
|
||||
{
|
||||
Key: telemetrytypes.TelemetryFieldKey{
|
||||
Name: "host",
|
||||
},
|
||||
Value: "server1",
|
||||
},
|
||||
},
|
||||
Values: []*qbtypes.TimeSeriesValue{
|
||||
{Timestamp: 1000, Value: -2.1}, // below & at least once, should alert
|
||||
{Timestamp: 2000, Value: -2.3},
|
||||
},
|
||||
@@ -797,9 +744,16 @@ func TestAnomalyNegationEval(t *testing.T) {
|
||||
"selectedQuery": "A"
|
||||
}
|
||||
}`), // below & at least once, no value below -2.0
|
||||
series: v3.Series{
|
||||
Labels: map[string]string{"host": "server1"},
|
||||
Points: []v3.Point{
|
||||
series: &qbtypes.TimeSeries{
|
||||
Labels: []*qbtypes.Label{
|
||||
{
|
||||
Key: telemetrytypes.TelemetryFieldKey{
|
||||
Name: "host",
|
||||
},
|
||||
Value: "server1",
|
||||
},
|
||||
},
|
||||
Values: []*qbtypes.TimeSeriesValue{
|
||||
{Timestamp: 1000, Value: -1.9},
|
||||
{Timestamp: 2000, Value: -1.8},
|
||||
},
|
||||
@@ -831,9 +785,16 @@ func TestAnomalyNegationEval(t *testing.T) {
|
||||
"selectedQuery": "A"
|
||||
}
|
||||
}`), // above & at least once, should alert
|
||||
series: v3.Series{
|
||||
Labels: map[string]string{"host": "server1"},
|
||||
Points: []v3.Point{
|
||||
series: &qbtypes.TimeSeries{
|
||||
Labels: []*qbtypes.Label{
|
||||
{
|
||||
Key: telemetrytypes.TelemetryFieldKey{
|
||||
Name: "host",
|
||||
},
|
||||
Value: "server1",
|
||||
},
|
||||
},
|
||||
Values: []*qbtypes.TimeSeriesValue{
|
||||
{Timestamp: 1000, Value: 2.1}, // above 2.0, should alert
|
||||
{Timestamp: 2000, Value: 2.2},
|
||||
},
|
||||
@@ -866,9 +827,16 @@ func TestAnomalyNegationEval(t *testing.T) {
|
||||
"selectedQuery": "A"
|
||||
}
|
||||
}`),
|
||||
series: v3.Series{
|
||||
Labels: map[string]string{"host": "server1"},
|
||||
Points: []v3.Point{
|
||||
series: &qbtypes.TimeSeries{
|
||||
Labels: []*qbtypes.Label{
|
||||
{
|
||||
Key: telemetrytypes.TelemetryFieldKey{
|
||||
Name: "host",
|
||||
},
|
||||
Value: "server1",
|
||||
},
|
||||
},
|
||||
Values: []*qbtypes.TimeSeriesValue{
|
||||
{Timestamp: 1000, Value: 1.1},
|
||||
{Timestamp: 2000, Value: 1.2},
|
||||
},
|
||||
@@ -900,9 +868,16 @@ func TestAnomalyNegationEval(t *testing.T) {
|
||||
"selectedQuery": "A"
|
||||
}
|
||||
}`), // below and all the times
|
||||
series: v3.Series{
|
||||
Labels: map[string]string{"host": "server1"},
|
||||
Points: []v3.Point{
|
||||
series: &qbtypes.TimeSeries{
|
||||
Labels: []*qbtypes.Label{
|
||||
{
|
||||
Key: telemetrytypes.TelemetryFieldKey{
|
||||
Name: "host",
|
||||
},
|
||||
Value: "server1",
|
||||
},
|
||||
},
|
||||
Values: []*qbtypes.TimeSeriesValue{
|
||||
{Timestamp: 1000, Value: -2.1}, // all below -2
|
||||
{Timestamp: 2000, Value: -2.2},
|
||||
{Timestamp: 3000, Value: -2.5},
|
||||
@@ -936,9 +911,16 @@ func TestAnomalyNegationEval(t *testing.T) {
|
||||
"selectedQuery": "A"
|
||||
}
|
||||
}`),
|
||||
series: v3.Series{
|
||||
Labels: map[string]string{"host": "server1"},
|
||||
Points: []v3.Point{
|
||||
series: &qbtypes.TimeSeries{
|
||||
Labels: []*qbtypes.Label{
|
||||
{
|
||||
Key: telemetrytypes.TelemetryFieldKey{
|
||||
Name: "host",
|
||||
},
|
||||
Value: "server1",
|
||||
},
|
||||
},
|
||||
Values: []*qbtypes.TimeSeriesValue{
|
||||
{Timestamp: 1000, Value: -3.0},
|
||||
{Timestamp: 2000, Value: -1.0}, // above -2, breaks condition
|
||||
{Timestamp: 3000, Value: -2.5},
|
||||
@@ -971,9 +953,16 @@ func TestAnomalyNegationEval(t *testing.T) {
|
||||
"selectedQuery": "A"
|
||||
}
|
||||
}`),
|
||||
series: v3.Series{
|
||||
Labels: map[string]string{"host": "server1"},
|
||||
Points: []v3.Point{
|
||||
series: &qbtypes.TimeSeries{
|
||||
Labels: []*qbtypes.Label{
|
||||
{
|
||||
Key: telemetrytypes.TelemetryFieldKey{
|
||||
Name: "host",
|
||||
},
|
||||
Value: "server1",
|
||||
},
|
||||
},
|
||||
Values: []*qbtypes.TimeSeriesValue{
|
||||
{Timestamp: 1000, Value: -8.0}, // abs(−8) >= 7, alert
|
||||
{Timestamp: 2000, Value: 5.0},
|
||||
},
|
||||
@@ -1006,9 +995,16 @@ func TestAnomalyNegationEval(t *testing.T) {
|
||||
"selectedQuery": "A"
|
||||
}
|
||||
}`),
|
||||
series: v3.Series{
|
||||
Labels: map[string]string{"host": "server1"},
|
||||
Points: []v3.Point{
|
||||
series: &qbtypes.TimeSeries{
|
||||
Labels: []*qbtypes.Label{
|
||||
{
|
||||
Key: telemetrytypes.TelemetryFieldKey{
|
||||
Name: "host",
|
||||
},
|
||||
Value: "server1",
|
||||
},
|
||||
},
|
||||
Values: []*qbtypes.TimeSeriesValue{
|
||||
{Timestamp: 1000, Value: 80.0}, // below 90, should alert
|
||||
{Timestamp: 2000, Value: 85.0},
|
||||
},
|
||||
@@ -1041,9 +1037,16 @@ func TestAnomalyNegationEval(t *testing.T) {
|
||||
"selectedQuery": "A"
|
||||
}
|
||||
}`),
|
||||
series: v3.Series{
|
||||
Labels: map[string]string{"host": "server1"},
|
||||
Points: []v3.Point{
|
||||
series: &qbtypes.TimeSeries{
|
||||
Labels: []*qbtypes.Label{
|
||||
{
|
||||
Key: telemetrytypes.TelemetryFieldKey{
|
||||
Name: "host",
|
||||
},
|
||||
Value: "server1",
|
||||
},
|
||||
},
|
||||
Values: []*qbtypes.TimeSeriesValue{
|
||||
{Timestamp: 1000, Value: 60.0}, // below, should alert
|
||||
{Timestamp: 2000, Value: 90.0},
|
||||
},
|
||||
|
||||
79
pkg/types/ruletypes/compare.go
Normal file
79
pkg/types/ruletypes/compare.go
Normal file
@@ -0,0 +1,79 @@
|
||||
package ruletypes
|
||||
|
||||
import (
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
)
|
||||
|
||||
type CompareOperator struct {
|
||||
valuer.String
|
||||
}
|
||||
|
||||
var (
|
||||
ValueIsAbove = CompareOperator{valuer.NewString("1")}
|
||||
ValueIsAboveLiteral = CompareOperator{valuer.NewString("above")}
|
||||
ValueIsAboveSymbol = CompareOperator{valuer.NewString(">")}
|
||||
|
||||
ValueIsBelow = CompareOperator{valuer.NewString("2")}
|
||||
ValueIsBelowLiteral = CompareOperator{valuer.NewString("below")}
|
||||
ValueIsBelowSymbol = CompareOperator{valuer.NewString("<")}
|
||||
|
||||
ValueIsEq = CompareOperator{valuer.NewString("3")}
|
||||
ValueIsEqLiteral = CompareOperator{valuer.NewString("equal")}
|
||||
ValueIsEqLiteralShort = CompareOperator{valuer.NewString("eq")}
|
||||
ValueIsEqSymbol = CompareOperator{valuer.NewString("=")}
|
||||
|
||||
ValueIsNotEq = CompareOperator{valuer.NewString("4")}
|
||||
ValueIsNotEqLiteral = CompareOperator{valuer.NewString("not_equal")}
|
||||
ValueIsNotEqLiteralShort = CompareOperator{valuer.NewString("not_eq")}
|
||||
ValueIsNotEqSymbol = CompareOperator{valuer.NewString("!=")}
|
||||
|
||||
ValueAboveOrEq = CompareOperator{valuer.NewString("5")}
|
||||
ValueAboveOrEqLiteral = CompareOperator{valuer.NewString("above_or_equal")}
|
||||
ValueAboveOrEqLiteralShort = CompareOperator{valuer.NewString("above_or_eq")}
|
||||
ValueAboveOrEqSymbol = CompareOperator{valuer.NewString(">=")}
|
||||
|
||||
ValueBelowOrEq = CompareOperator{valuer.NewString("6")}
|
||||
ValueBelowOrEqLiteral = CompareOperator{valuer.NewString("below_or_equal")}
|
||||
ValueBelowOrEqLiteralShort = CompareOperator{valuer.NewString("below_or_eq")}
|
||||
ValueBelowOrEqSymbol = CompareOperator{valuer.NewString("<=")}
|
||||
|
||||
ValueOutsideBounds = CompareOperator{valuer.NewString("7")}
|
||||
ValueOutsideBoundsLiteral = CompareOperator{valuer.NewString("outside_bounds")}
|
||||
)
|
||||
|
||||
func (CompareOperator) Enum() []any {
|
||||
return []any{
|
||||
ValueIsAboveLiteral,
|
||||
ValueIsBelowLiteral,
|
||||
ValueIsEqLiteral,
|
||||
ValueIsNotEqLiteral,
|
||||
// ValueAboveOrEqLiteral,
|
||||
// ValueBelowOrEqLiteral,
|
||||
ValueOutsideBoundsLiteral,
|
||||
}
|
||||
}
|
||||
|
||||
func (c CompareOperator) Validate() error {
|
||||
switch c {
|
||||
case ValueIsAbove,
|
||||
ValueIsAboveLiteral,
|
||||
ValueIsAboveSymbol,
|
||||
ValueIsBelow,
|
||||
ValueIsBelowLiteral,
|
||||
ValueIsBelowSymbol,
|
||||
ValueIsEq,
|
||||
ValueIsEqLiteral,
|
||||
ValueIsEqLiteralShort,
|
||||
ValueIsEqSymbol,
|
||||
ValueIsNotEq,
|
||||
ValueIsNotEqLiteral,
|
||||
ValueIsNotEqLiteralShort,
|
||||
ValueIsNotEqSymbol,
|
||||
ValueOutsideBounds,
|
||||
ValueOutsideBoundsLiteral:
|
||||
return nil
|
||||
default:
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "unknown comparison operator, known values are: ")
|
||||
}
|
||||
}
|
||||
@@ -8,5 +8,5 @@ const (
|
||||
LabelThresholdName = "threshold.name"
|
||||
LabelSeverityName = "severity"
|
||||
LabelLastSeen = "lastSeen"
|
||||
LabelRuleId = "ruleId"
|
||||
LabelRuleID = "ruleId"
|
||||
)
|
||||
|
||||
316
pkg/types/ruletypes/labels.go
Normal file
316
pkg/types/ruletypes/labels.go
Normal file
@@ -0,0 +1,316 @@
|
||||
package ruletypes
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/cespare/xxhash/v2"
|
||||
)
|
||||
|
||||
const sep = '\xff'
|
||||
|
||||
// Well-known label names used by Prometheus components.
|
||||
const (
|
||||
MetricNameLabel = "__name__"
|
||||
TemporalityLabel = "__temporality__"
|
||||
AlertNameLabel = "alertname"
|
||||
TestAlertLabel = "testalert"
|
||||
NoDataLabel = "nodata"
|
||||
|
||||
// AlertStateLabel is the label name indicating the state of an alert.
|
||||
AlertStateLabel = "alertstate"
|
||||
|
||||
AlertRuleIDLabel = "ruleId"
|
||||
RuleSourceLabel = "ruleSource"
|
||||
|
||||
RuleThresholdLabel = "threshold"
|
||||
AlertSummaryLabel = "summary"
|
||||
AlertDescriptionLabel = "description"
|
||||
)
|
||||
|
||||
// Label is a key/value pair of strings.
|
||||
// TODO(srikanthccv): https://github.com/SigNoz/signoz/issues/9232?
|
||||
type Label struct {
|
||||
Name, Value string
|
||||
}
|
||||
|
||||
// Labels is a sorted set of labels. Order has to be guaranteed upon
|
||||
// instantiation.
|
||||
type Labels []Label
|
||||
|
||||
func (ls Labels) Len() int { return len(ls) }
|
||||
func (ls Labels) Swap(i, j int) { ls[i], ls[j] = ls[j], ls[i] }
|
||||
func (ls Labels) Less(i, j int) bool { return ls[i].Name < ls[j].Name }
|
||||
|
||||
func (ls Labels) String() string {
|
||||
var b bytes.Buffer
|
||||
|
||||
b.WriteByte('{')
|
||||
for i, l := range ls {
|
||||
if i > 0 {
|
||||
b.WriteByte(',')
|
||||
b.WriteByte(' ')
|
||||
}
|
||||
b.WriteString(l.Name)
|
||||
b.WriteByte('=')
|
||||
b.WriteString(strconv.Quote(l.Value))
|
||||
}
|
||||
b.WriteByte('}')
|
||||
|
||||
return b.String()
|
||||
}
|
||||
|
||||
// MarshalJSON implements json.Marshaler.
|
||||
func (ls Labels) MarshalJSON() ([]byte, error) {
|
||||
return json.Marshal(ls.Map())
|
||||
}
|
||||
|
||||
// UnmarshalJSON implements json.Unmarshaler.
|
||||
func (ls *Labels) UnmarshalJSON(b []byte) error {
|
||||
var m map[string]string
|
||||
|
||||
if err := json.Unmarshal(b, &m); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
*ls = FromMap(m)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Hash returns a hash value for the label set.
|
||||
func (ls Labels) Hash() uint64 {
|
||||
b := make([]byte, 0, 1024)
|
||||
|
||||
for _, v := range ls {
|
||||
b = append(b, v.Name...)
|
||||
b = append(b, sep)
|
||||
b = append(b, v.Value...)
|
||||
b = append(b, sep)
|
||||
}
|
||||
return xxhash.Sum64(b)
|
||||
}
|
||||
|
||||
// HashForLabels returns a hash value for the labels matching the provided names.
|
||||
func (ls Labels) HashForLabels(b []byte, names ...string) (uint64, []byte) {
|
||||
var seps = []byte{'\xff'}
|
||||
b = b[:0]
|
||||
i, j := 0, 0
|
||||
for i < len(ls) && j < len(names) {
|
||||
if names[j] < ls[i].Name {
|
||||
j++
|
||||
} else if ls[i].Name < names[j] {
|
||||
i++
|
||||
} else {
|
||||
b = append(b, ls[i].Name...)
|
||||
b = append(b, seps[0])
|
||||
b = append(b, ls[i].Value...)
|
||||
b = append(b, seps[0])
|
||||
i++
|
||||
j++
|
||||
}
|
||||
}
|
||||
return xxhash.Sum64(b), b
|
||||
}
|
||||
|
||||
// HashWithoutLabels returns a hash value for all labels except those matching
|
||||
// the provided names.
|
||||
func (ls Labels) HashWithoutLabels(names ...string) uint64 {
|
||||
b := make([]byte, 0, 1024)
|
||||
|
||||
Outer:
|
||||
for _, v := range ls {
|
||||
if v.Name == MetricNameLabel {
|
||||
continue
|
||||
}
|
||||
for _, n := range names {
|
||||
if v.Name == n {
|
||||
continue Outer
|
||||
}
|
||||
}
|
||||
b = append(b, v.Name...)
|
||||
b = append(b, sep)
|
||||
b = append(b, v.Value...)
|
||||
b = append(b, sep)
|
||||
}
|
||||
return xxhash.Sum64(b)
|
||||
}
|
||||
|
||||
// Copy returns a copy of the labels.
|
||||
func (ls Labels) Copy() Labels {
|
||||
res := make(Labels, len(ls))
|
||||
copy(res, ls)
|
||||
return res
|
||||
}
|
||||
|
||||
// Get returns the value for the label with the given name.
|
||||
// Returns an empty string if the label doesn't exist.
|
||||
func (ls Labels) Get(name string) string {
|
||||
for _, l := range ls {
|
||||
if l.Name == name {
|
||||
return l.Value
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// Has returns true if the label with the given name is present.
|
||||
func (ls Labels) Has(name string) bool {
|
||||
for _, l := range ls {
|
||||
if l.Name == name {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Equal returns whether the two label sets are equal.
|
||||
func Equal(ls, o Labels) bool {
|
||||
if len(ls) != len(o) {
|
||||
return false
|
||||
}
|
||||
for i, l := range ls {
|
||||
if l.Name != o[i].Name || l.Value != o[i].Value {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Map returns a string map of the labels.
|
||||
func (ls Labels) Map() map[string]string {
|
||||
m := make(map[string]string, len(ls))
|
||||
for _, l := range ls {
|
||||
m[l.Name] = l.Value
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
// New returns a sorted Labels from the given labels.
|
||||
// The caller has to guarantee that all label names are unique.
|
||||
func New(ls ...Label) Labels {
|
||||
set := make(Labels, 0, len(ls))
|
||||
for _, l := range ls {
|
||||
set = append(set, l)
|
||||
}
|
||||
sort.Sort(set)
|
||||
|
||||
return set
|
||||
}
|
||||
|
||||
// FromMap returns new sorted Labels from the given map.
|
||||
func FromMap(m map[string]string) Labels {
|
||||
l := make([]Label, 0, len(m))
|
||||
for k, v := range m {
|
||||
l = append(l, Label{Name: k, Value: v})
|
||||
}
|
||||
return New(l...)
|
||||
}
|
||||
|
||||
// FromStrings creates new labels from pairs of strings.
|
||||
func FromStrings(ss ...string) Labels {
|
||||
if len(ss)%2 != 0 {
|
||||
panic("invalid number of strings")
|
||||
}
|
||||
var res Labels
|
||||
for i := 0; i < len(ss); i += 2 {
|
||||
res = append(res, Label{Name: ss[i], Value: ss[i+1]})
|
||||
}
|
||||
|
||||
sort.Sort(res)
|
||||
return res
|
||||
}
|
||||
|
||||
// Compare compares the two label sets.
|
||||
// The result will be 0 if a==b, <0 if a < b, and >0 if a > b.
|
||||
func Compare(a, b Labels) int {
|
||||
l := len(a)
|
||||
if len(b) < l {
|
||||
l = len(b)
|
||||
}
|
||||
|
||||
for i := 0; i < l; i++ {
|
||||
if d := strings.Compare(a[i].Name, b[i].Name); d != 0 {
|
||||
return d
|
||||
}
|
||||
if d := strings.Compare(a[i].Value, b[i].Value); d != 0 {
|
||||
return d
|
||||
}
|
||||
}
|
||||
// If all labels so far were in common, the set with fewer labels comes first.
|
||||
return len(a) - len(b)
|
||||
}
|
||||
|
||||
// Builder allows modifiying Labels.
|
||||
type Builder struct {
|
||||
base Labels
|
||||
del []string
|
||||
add []Label
|
||||
}
|
||||
|
||||
// NewBuilder returns a new LabelsBuilder
|
||||
func NewBuilder(base ...Label) *Builder {
|
||||
return &Builder{
|
||||
base: base,
|
||||
del: make([]string, 0, 5),
|
||||
add: make([]Label, 0, 5),
|
||||
}
|
||||
}
|
||||
|
||||
// Del deletes the label of the given name.
|
||||
func (b *Builder) Del(ns ...string) *Builder {
|
||||
for _, n := range ns {
|
||||
for i, a := range b.add {
|
||||
if a.Name == n {
|
||||
b.add = append(b.add[:i], b.add[i+1:]...)
|
||||
}
|
||||
}
|
||||
b.del = append(b.del, n)
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// Set the name/value pair as a label.
|
||||
func (b *Builder) Set(n, v string) *Builder {
|
||||
for i, a := range b.add {
|
||||
if a.Name == n {
|
||||
b.add[i].Value = v
|
||||
return b
|
||||
}
|
||||
}
|
||||
b.add = append(b.add, Label{Name: n, Value: v})
|
||||
|
||||
return b
|
||||
}
|
||||
|
||||
// Labels returns the labels from the builder. If no modifications
|
||||
// were made, the original labels are returned.
|
||||
func (b *Builder) Labels() Labels {
|
||||
if len(b.del) == 0 && len(b.add) == 0 {
|
||||
return b.base
|
||||
}
|
||||
|
||||
// In the general case, labels are removed, modified or moved
|
||||
// rather than added.
|
||||
res := make(Labels, 0, len(b.base))
|
||||
Outer:
|
||||
for _, l := range b.base {
|
||||
for _, n := range b.del {
|
||||
if l.Name == n {
|
||||
continue Outer
|
||||
}
|
||||
}
|
||||
for _, la := range b.add {
|
||||
if l.Name == la.Name {
|
||||
continue Outer
|
||||
}
|
||||
}
|
||||
res = append(res, l)
|
||||
}
|
||||
res = append(res, b.add...)
|
||||
sort.Sort(res)
|
||||
|
||||
return res
|
||||
}
|
||||
60
pkg/types/ruletypes/match.go
Normal file
60
pkg/types/ruletypes/match.go
Normal file
@@ -0,0 +1,60 @@
|
||||
package ruletypes
|
||||
|
||||
import (
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
)
|
||||
|
||||
type MatchType struct {
|
||||
valuer.String
|
||||
}
|
||||
|
||||
var (
|
||||
AtleastOnce = MatchType{valuer.NewString("1")}
|
||||
AtleastOnceLiteral = MatchType{valuer.NewString("atleast_once")}
|
||||
|
||||
AllTheTimes = MatchType{valuer.NewString("2")}
|
||||
AllTheTimesLiteral = MatchType{valuer.NewString("all_the_times")}
|
||||
|
||||
OnAverage = MatchType{valuer.NewString("3")}
|
||||
OnAverageLiteral = MatchType{valuer.NewString("on_average")}
|
||||
OnAverageShort = MatchType{valuer.NewString("avg")}
|
||||
|
||||
InTotal = MatchType{valuer.NewString("4")}
|
||||
InTotalLiteral = MatchType{valuer.NewString("in_total")}
|
||||
InTotalShort = MatchType{valuer.NewString("sum")}
|
||||
|
||||
Last = MatchType{valuer.NewString("5")}
|
||||
LastLiteral = MatchType{valuer.NewString("last")}
|
||||
)
|
||||
|
||||
func (MatchType) Enum() []any {
|
||||
return []any{
|
||||
AtleastOnceLiteral,
|
||||
AllTheTimesLiteral,
|
||||
OnAverageLiteral,
|
||||
InTotalLiteral,
|
||||
LastLiteral,
|
||||
}
|
||||
}
|
||||
|
||||
func (m MatchType) Validate() error {
|
||||
switch m {
|
||||
case
|
||||
AtleastOnce,
|
||||
AtleastOnceLiteral,
|
||||
AllTheTimes,
|
||||
AllTheTimesLiteral,
|
||||
OnAverage,
|
||||
OnAverageLiteral,
|
||||
OnAverageShort,
|
||||
InTotal,
|
||||
InTotalLiteral,
|
||||
InTotalShort,
|
||||
Last,
|
||||
LastLiteral:
|
||||
return nil
|
||||
default:
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "unknown match type operator, known values are")
|
||||
}
|
||||
}
|
||||
@@ -4,8 +4,6 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strconv"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/labels"
|
||||
)
|
||||
|
||||
// common result format of query
|
||||
@@ -15,7 +13,7 @@ type Vector []Sample
|
||||
type Sample struct {
|
||||
Point
|
||||
|
||||
Metric labels.Labels
|
||||
Metric Labels
|
||||
|
||||
IsMissing bool
|
||||
|
||||
@@ -34,8 +32,8 @@ func (s Sample) String() string {
|
||||
|
||||
func (s Sample) MarshalJSON() ([]byte, error) {
|
||||
v := struct {
|
||||
M labels.Labels `json:"metric"`
|
||||
V Point `json:"value"`
|
||||
M Labels `json:"metric"`
|
||||
V Point `json:"value"`
|
||||
}{
|
||||
M: s.Metric,
|
||||
V: s.Point,
|
||||
@@ -57,5 +55,5 @@ func (p Point) String() string {
|
||||
// MarshalJSON implements json.Marshaler.
|
||||
func (p Point) MarshalJSON() ([]byte, error) {
|
||||
v := strconv.FormatFloat(p.V, 'f', -1, 64)
|
||||
return json.Marshal([...]interface{}{float64(p.T) / 1000, v})
|
||||
return json.Marshal([...]any{float64(p.T) / 1000, v})
|
||||
}
|
||||
|
||||
@@ -28,7 +28,7 @@ func NewStatsFromRules(rules []*Rule) map[string]any {
|
||||
continue
|
||||
}
|
||||
|
||||
key := "rule.type." + strings.TrimSuffix(strings.ToLower(string(gettableRule.RuleType)), "_rule") + ".count"
|
||||
key := "rule.type." + strings.TrimSuffix(strings.ToLower(gettableRule.RuleType.StringValue()), "_rule") + ".count"
|
||||
if _, ok := stats[key]; !ok {
|
||||
stats[key] = int64(1)
|
||||
} else {
|
||||
|
||||
13
pkg/types/ruletypes/rule_health.go
Normal file
13
pkg/types/ruletypes/rule_health.go
Normal file
@@ -0,0 +1,13 @@
|
||||
package ruletypes
|
||||
|
||||
import "github.com/SigNoz/signoz/pkg/valuer"
|
||||
|
||||
type RuleHealth struct {
|
||||
valuer.String
|
||||
}
|
||||
|
||||
var (
|
||||
HealthUnknown = RuleHealth{valuer.NewString("unknown")}
|
||||
HealthGood = RuleHealth{valuer.NewString("ok")}
|
||||
HealthBad = RuleHealth{valuer.NewString("err")}
|
||||
)
|
||||
36
pkg/types/ruletypes/rule_type.go
Normal file
36
pkg/types/ruletypes/rule_type.go
Normal file
@@ -0,0 +1,36 @@
|
||||
package ruletypes
|
||||
|
||||
import (
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
)
|
||||
|
||||
type RuleType struct {
|
||||
valuer.String
|
||||
}
|
||||
|
||||
var (
|
||||
RuleTypeThreshold = RuleType{valuer.NewString("threshold_rule")}
|
||||
RuleTypeProm = RuleType{valuer.NewString("promql_rule")}
|
||||
RuleTypeAnomaly = RuleType{valuer.NewString("anomaly_rule")}
|
||||
)
|
||||
|
||||
func (RuleType) Enum() []any {
|
||||
return []any{
|
||||
RuleTypeThreshold,
|
||||
RuleTypeProm,
|
||||
RuleTypeAnomaly,
|
||||
}
|
||||
}
|
||||
|
||||
func (r RuleType) Validate() error {
|
||||
switch r {
|
||||
case
|
||||
RuleTypeThreshold,
|
||||
RuleTypeProm,
|
||||
RuleTypeAnomaly:
|
||||
return nil
|
||||
default:
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "unknown rule type, known values are")
|
||||
}
|
||||
}
|
||||
@@ -9,16 +9,15 @@ import (
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
"unicode"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
|
||||
html_template "html/template"
|
||||
text_template "text/template"
|
||||
htmltpl "html/template"
|
||||
texttpl "text/template"
|
||||
|
||||
"golang.org/x/text/cases"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/query-service/common"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/times"
|
||||
)
|
||||
|
||||
// this file contains all the methods and structs
|
||||
@@ -52,8 +51,25 @@ func (q tmplQueryResultsByLabelSorter) Swap(i, j int) {
|
||||
type TemplateExpander struct {
|
||||
text string
|
||||
name string
|
||||
data interface{}
|
||||
funcMap text_template.FuncMap
|
||||
data any
|
||||
funcMap texttpl.FuncMap
|
||||
}
|
||||
|
||||
func NormalizeLabelName(name string) string {
|
||||
// See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels
|
||||
|
||||
// Regular expression to match non-alphanumeric characters except underscores
|
||||
reg := regexp.MustCompile(`[^a-zA-Z0-9_]`)
|
||||
|
||||
// Replace all non-alphanumeric characters except underscores with underscores
|
||||
normalized := reg.ReplaceAllString(name, "_")
|
||||
|
||||
// If the first character is not a letter or an underscore, prepend an underscore
|
||||
if len(normalized) > 0 && !unicode.IsLetter(rune(normalized[0])) && normalized[0] != '_' {
|
||||
normalized = "_" + normalized
|
||||
}
|
||||
|
||||
return normalized
|
||||
}
|
||||
|
||||
// NewTemplateExpander returns a template expander ready to use.
|
||||
@@ -61,15 +77,14 @@ func NewTemplateExpander(
|
||||
ctx context.Context,
|
||||
text string,
|
||||
name string,
|
||||
data interface{},
|
||||
timestamp times.Time,
|
||||
data any,
|
||||
externalURL *url.URL,
|
||||
) *TemplateExpander {
|
||||
return &TemplateExpander{
|
||||
text: text,
|
||||
name: name,
|
||||
data: data,
|
||||
funcMap: text_template.FuncMap{
|
||||
funcMap: texttpl.FuncMap{
|
||||
"first": func(v tmplQueryResults) (*tmplQueryRecord, error) {
|
||||
if len(v) > 0 {
|
||||
return v[0], nil
|
||||
@@ -85,8 +100,8 @@ func NewTemplateExpander(
|
||||
"strvalue": func(s *tmplQueryRecord) string {
|
||||
return s.Labels["__value__"]
|
||||
},
|
||||
"args": func(args ...interface{}) map[string]interface{} {
|
||||
result := make(map[string]interface{})
|
||||
"args": func(args ...any) map[string]any {
|
||||
result := make(map[string]any)
|
||||
for i, a := range args {
|
||||
result[fmt.Sprintf("arg%d", i)] = a
|
||||
}
|
||||
@@ -96,8 +111,8 @@ func NewTemplateExpander(
|
||||
re := regexp.MustCompile(pattern)
|
||||
return re.ReplaceAllString(text, repl)
|
||||
},
|
||||
"safeHtml": func(text string) html_template.HTML {
|
||||
return html_template.HTML(text)
|
||||
"safeHtml": func(text string) htmltpl.HTML {
|
||||
return htmltpl.HTML(text)
|
||||
},
|
||||
"match": regexp.MatchString,
|
||||
"title": cases.Title,
|
||||
@@ -191,7 +206,7 @@ func NewTemplateExpander(
|
||||
if math.IsNaN(v) || math.IsInf(v, 0) {
|
||||
return fmt.Sprintf("%.4g", v)
|
||||
}
|
||||
t := times.TimeFromUnixNano(int64(v * 1e9)).Time().UTC()
|
||||
t := time.Unix(0, int64(v*1e9)).UTC()
|
||||
return fmt.Sprint(t)
|
||||
},
|
||||
"pathPrefix": func() string {
|
||||
@@ -205,7 +220,7 @@ func NewTemplateExpander(
|
||||
}
|
||||
|
||||
// AlertTemplateData returns the interface to be used in expanding the template.
|
||||
func AlertTemplateData(labels map[string]string, value string, threshold string) interface{} {
|
||||
func AlertTemplateData(labels map[string]string, value string, threshold string) any {
|
||||
// This exists here for backwards compatibility.
|
||||
// The labels map passed in no longer contains the normalized labels.
|
||||
// To continue supporting the old way of referencing labels, we need to
|
||||
@@ -214,7 +229,7 @@ func AlertTemplateData(labels map[string]string, value string, threshold string)
|
||||
newLabels := make(map[string]string)
|
||||
for k, v := range labels {
|
||||
newLabels[k] = v
|
||||
newLabels[common.NormalizeLabelName(k)] = v
|
||||
newLabels[NormalizeLabelName(k)] = v
|
||||
}
|
||||
|
||||
return struct {
|
||||
@@ -275,7 +290,7 @@ func (te *TemplateExpander) preprocessTemplate() {
|
||||
|
||||
// Funcs adds the functions in fm to the Expander's function map.
|
||||
// Existing functions will be overwritten in case of conflict.
|
||||
func (te TemplateExpander) Funcs(fm text_template.FuncMap) {
|
||||
func (te TemplateExpander) Funcs(fm texttpl.FuncMap) {
|
||||
for k, v := range fm {
|
||||
te.funcMap[k] = v
|
||||
}
|
||||
@@ -297,7 +312,7 @@ func (te TemplateExpander) Expand() (result string, resultErr error) {
|
||||
|
||||
te.preprocessTemplate()
|
||||
|
||||
tmpl, err := text_template.New(te.name).Funcs(te.funcMap).Option("missingkey=zero").Parse(te.text)
|
||||
tmpl, err := texttpl.New(te.name).Funcs(te.funcMap).Option("missingkey=zero").Parse(te.text)
|
||||
if err != nil {
|
||||
return "", errors.WrapInvalidInputf(err, errors.CodeInvalidInput, "error parsing template %v", te.name)
|
||||
}
|
||||
@@ -321,13 +336,13 @@ func (te TemplateExpander) ExpandHTML(templateFiles []string) (result string, re
|
||||
}
|
||||
}()
|
||||
|
||||
tmpl := html_template.New(te.name).Funcs(html_template.FuncMap(te.funcMap))
|
||||
tmpl := htmltpl.New(te.name).Funcs(htmltpl.FuncMap(te.funcMap))
|
||||
tmpl.Option("missingkey=zero")
|
||||
tmpl.Funcs(html_template.FuncMap{
|
||||
"tmpl": func(name string, data interface{}) (html_template.HTML, error) {
|
||||
tmpl.Funcs(htmltpl.FuncMap{
|
||||
"tmpl": func(name string, data any) (htmltpl.HTML, error) {
|
||||
var buffer bytes.Buffer
|
||||
err := tmpl.ExecuteTemplate(&buffer, name, data)
|
||||
return html_template.HTML(buffer.String()), err
|
||||
return htmltpl.HTML(buffer.String()), err
|
||||
},
|
||||
})
|
||||
tmpl, err := tmpl.Parse(te.text)
|
||||
@@ -351,7 +366,7 @@ func (te TemplateExpander) ExpandHTML(templateFiles []string) (result string, re
|
||||
// ParseTest parses the templates and returns the error if any.
|
||||
func (te TemplateExpander) ParseTest() error {
|
||||
te.preprocessTemplate()
|
||||
_, err := text_template.New(te.name).Funcs(te.funcMap).Option("missingkey=zero").Parse(te.text)
|
||||
_, err := texttpl.New(te.name).Funcs(te.funcMap).Option("missingkey=zero").Parse(te.text)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -3,16 +3,14 @@ package ruletypes
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/times"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestTemplateExpander(t *testing.T) {
|
||||
defs := "{{$labels := .Labels}}{{$value := .Value}}{{$threshold := .Threshold}}"
|
||||
data := AlertTemplateData(map[string]string{"service.name": "my-service"}, "100", "200")
|
||||
expander := NewTemplateExpander(context.Background(), defs+"test $service.name", "test", data, times.Time(time.Now().Unix()), nil)
|
||||
expander := NewTemplateExpander(context.Background(), defs+"test $service.name", "test", data, nil)
|
||||
result, err := expander.Expand()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
@@ -23,7 +21,7 @@ func TestTemplateExpander(t *testing.T) {
|
||||
func TestTemplateExpander_WithThreshold(t *testing.T) {
|
||||
defs := "{{$labels := .Labels}}{{$value := .Value}}{{$threshold := .Threshold}}"
|
||||
data := AlertTemplateData(map[string]string{"service.name": "my-service"}, "200", "100")
|
||||
expander := NewTemplateExpander(context.Background(), defs+"test $service.name exceeds {{$threshold}} and observed at {{$value}}", "test", data, times.Time(time.Now().Unix()), nil)
|
||||
expander := NewTemplateExpander(context.Background(), defs+"test $service.name exceeds {{$threshold}} and observed at {{$value}}", "test", data, nil)
|
||||
result, err := expander.Expand()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
@@ -34,7 +32,7 @@ func TestTemplateExpander_WithThreshold(t *testing.T) {
|
||||
func TestTemplateExpanderOldVariableSyntax(t *testing.T) {
|
||||
defs := "{{$labels := .Labels}}{{$value := .Value}}{{$threshold := .Threshold}}"
|
||||
data := AlertTemplateData(map[string]string{"service.name": "my-service"}, "200", "100")
|
||||
expander := NewTemplateExpander(context.Background(), defs+"test {{.Labels.service_name}} exceeds {{$threshold}} and observed at {{$value}}", "test", data, times.Time(time.Now().Unix()), nil)
|
||||
expander := NewTemplateExpander(context.Background(), defs+"test {{.Labels.service_name}} exceeds {{$threshold}} and observed at {{$value}}", "test", data, nil)
|
||||
result, err := expander.Expand()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
@@ -45,7 +43,7 @@ func TestTemplateExpanderOldVariableSyntax(t *testing.T) {
|
||||
func TestTemplateExpander_WithAlreadyNormalizedKey(t *testing.T) {
|
||||
defs := "{{$labels := .Labels}}{{$value := .Value}}{{$threshold := .Threshold}}"
|
||||
data := AlertTemplateData(map[string]string{"service_name": "my-service"}, "200", "100")
|
||||
expander := NewTemplateExpander(context.Background(), defs+"test {{.Labels.service_name}} exceeds {{$threshold}} and observed at {{$value}}", "test", data, times.Time(time.Now().Unix()), nil)
|
||||
expander := NewTemplateExpander(context.Background(), defs+"test {{.Labels.service_name}} exceeds {{$threshold}} and observed at {{$value}}", "test", data, nil)
|
||||
result, err := expander.Expand()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
@@ -56,7 +54,7 @@ func TestTemplateExpander_WithAlreadyNormalizedKey(t *testing.T) {
|
||||
func TestTemplateExpander_WithMissingKey(t *testing.T) {
|
||||
defs := "{{$labels := .Labels}}{{$value := .Value}}{{$threshold := .Threshold}}"
|
||||
data := AlertTemplateData(map[string]string{"service_name": "my-service"}, "200", "100")
|
||||
expander := NewTemplateExpander(context.Background(), defs+"test {{.Labels.missing_key}} exceeds {{$threshold}} and observed at {{$value}}", "test", data, times.Time(time.Now().Unix()), nil)
|
||||
expander := NewTemplateExpander(context.Background(), defs+"test {{.Labels.missing_key}} exceeds {{$threshold}} and observed at {{$value}}", "test", data, nil)
|
||||
result, err := expander.Expand()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
@@ -67,7 +65,7 @@ func TestTemplateExpander_WithMissingKey(t *testing.T) {
|
||||
func TestTemplateExpander_WithLablesDotSyntax(t *testing.T) {
|
||||
defs := "{{$labels := .Labels}}{{$value := .Value}}{{$threshold := .Threshold}}"
|
||||
data := AlertTemplateData(map[string]string{"service.name": "my-service"}, "200", "100")
|
||||
expander := NewTemplateExpander(context.Background(), defs+"test {{.Labels.service.name}} exceeds {{$threshold}} and observed at {{$value}}", "test", data, times.Time(time.Now().Unix()), nil)
|
||||
expander := NewTemplateExpander(context.Background(), defs+"test {{.Labels.service.name}} exceeds {{$threshold}} and observed at {{$value}}", "test", data, nil)
|
||||
result, err := expander.Expand()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
@@ -78,7 +76,7 @@ func TestTemplateExpander_WithLablesDotSyntax(t *testing.T) {
|
||||
func TestTemplateExpander_WithVariableSyntax(t *testing.T) {
|
||||
defs := "{{$labels := .Labels}}{{$value := .Value}}{{$threshold := .Threshold}}"
|
||||
data := AlertTemplateData(map[string]string{"service.name": "my-service"}, "200", "100")
|
||||
expander := NewTemplateExpander(context.Background(), defs+"test {{$service.name}} exceeds {{$threshold}} and observed at {{$value}}", "test", data, times.Time(time.Now().Unix()), nil)
|
||||
expander := NewTemplateExpander(context.Background(), defs+"test {{$service.name}} exceeds {{$threshold}} and observed at {{$value}}", "test", data, nil)
|
||||
result, err := expander.Expand()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
|
||||
@@ -2,13 +2,13 @@ package ruletypes
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/labels"
|
||||
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
|
||||
"github.com/SigNoz/signoz/pkg/units"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
)
|
||||
@@ -38,7 +38,7 @@ func (r *RuleThresholdData) UnmarshalJSON(data []byte) error {
|
||||
case BasicThresholdKind:
|
||||
var basicThresholds BasicRuleThresholds
|
||||
if err := json.Unmarshal(raw["spec"], &basicThresholds); err != nil {
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "failed to unmarshal rule threhsold spec: %v", err)
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "failed to unmarshal rule threshold spec: %v", err)
|
||||
}
|
||||
if err := basicThresholds.Validate(); err != nil {
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid rule threshold spec: %v", err)
|
||||
@@ -70,7 +70,7 @@ type EvalData struct {
|
||||
SendUnmatched bool
|
||||
}
|
||||
|
||||
// HasActiveAlert checks if the given sample figerprint is active
|
||||
// HasActiveAlert checks if the given sample fingerprint is active
|
||||
// as an alert.
|
||||
func (eval EvalData) HasActiveAlert(sampleLabelFp uint64) bool {
|
||||
if len(eval.ActiveAlerts) == 0 {
|
||||
@@ -83,18 +83,18 @@ func (eval EvalData) HasActiveAlert(sampleLabelFp uint64) bool {
|
||||
type RuleThreshold interface {
|
||||
// Eval runs the given series through the threshold rules
|
||||
// using the given EvalData and returns the matching series
|
||||
Eval(series v3.Series, unit string, evalData EvalData) (Vector, error)
|
||||
Eval(series *qbtypes.TimeSeries, unit string, evalData EvalData) (Vector, error)
|
||||
GetRuleReceivers() []RuleReceivers
|
||||
}
|
||||
|
||||
type BasicRuleThreshold struct {
|
||||
Name string `json:"name"`
|
||||
TargetValue *float64 `json:"target"`
|
||||
TargetUnit string `json:"targetUnit"`
|
||||
RecoveryTarget *float64 `json:"recoveryTarget"`
|
||||
MatchType MatchType `json:"matchType"`
|
||||
CompareOp CompareOp `json:"op"`
|
||||
Channels []string `json:"channels"`
|
||||
Name string `json:"name"`
|
||||
TargetValue *float64 `json:"target"`
|
||||
TargetUnit string `json:"targetUnit"`
|
||||
RecoveryTarget *float64 `json:"recoveryTarget"`
|
||||
MatchType MatchType `json:"matchType"`
|
||||
CompareOperator CompareOperator `json:"op"`
|
||||
Channels []string `json:"channels"`
|
||||
}
|
||||
|
||||
type BasicRuleThresholds []BasicRuleThreshold
|
||||
@@ -122,7 +122,13 @@ func (r BasicRuleThresholds) Validate() error {
|
||||
return errors.Join(errs...)
|
||||
}
|
||||
|
||||
func (r BasicRuleThresholds) Eval(series v3.Series, unit string, evalData EvalData) (Vector, error) {
|
||||
func (r BasicRuleThresholds) Eval(s *qbtypes.TimeSeries, unit string, evalData EvalData) (Vector, error) {
|
||||
|
||||
series := &qbtypes.TimeSeries{
|
||||
Labels: s.Labels,
|
||||
Values: s.EvaluableValues(),
|
||||
}
|
||||
|
||||
var resultVector Vector
|
||||
thresholds := []BasicRuleThreshold(r)
|
||||
sortThresholds(thresholds)
|
||||
@@ -138,13 +144,12 @@ func (r BasicRuleThresholds) Eval(series v3.Series, unit string, evalData EvalDa
|
||||
continue
|
||||
} else if evalData.SendUnmatched {
|
||||
// Sanitise the series points to remove any NaN or Inf values
|
||||
series.Points = removeGroupinSetPoints(series)
|
||||
if len(series.Points) == 0 {
|
||||
if len(series.Values) == 0 {
|
||||
continue
|
||||
}
|
||||
// prepare the sample with the first point of the series
|
||||
smpl := Sample{
|
||||
Point: Point{T: series.Points[0].Timestamp, V: series.Points[0].Value},
|
||||
Point: Point{T: series.Values[0].Timestamp, V: series.Values[0].Value},
|
||||
Metric: PrepareSampleLabelsForRule(series.Labels, threshold.Name),
|
||||
Target: *threshold.TargetValue,
|
||||
TargetUnit: threshold.TargetUnit,
|
||||
@@ -181,11 +186,10 @@ func (r BasicRuleThresholds) Eval(series v3.Series, unit string, evalData EvalDa
|
||||
func sortThresholds(thresholds []BasicRuleThreshold) {
|
||||
sort.Slice(thresholds, func(i, j int) bool {
|
||||
|
||||
compareOp := thresholds[i].getCompareOp()
|
||||
targetI := thresholds[i].target(thresholds[i].TargetUnit) //for sorting we dont need rule unit
|
||||
targetJ := thresholds[j].target(thresholds[j].TargetUnit)
|
||||
|
||||
switch compareOp {
|
||||
switch thresholds[i].CompareOperator {
|
||||
case ValueIsAbove, ValueAboveOrEq, ValueOutsideBounds:
|
||||
// For "above" operations, sort descending (higher values first)
|
||||
return targetI > targetJ
|
||||
@@ -220,10 +224,6 @@ func (b BasicRuleThreshold) recoveryTarget(ruleUnit string) float64 {
|
||||
return b.convertToRuleUnit(*b.RecoveryTarget, ruleUnit)
|
||||
}
|
||||
|
||||
func (b BasicRuleThreshold) getCompareOp() CompareOp {
|
||||
return b.CompareOp
|
||||
}
|
||||
|
||||
func (b BasicRuleThreshold) Validate() error {
|
||||
var errs []error
|
||||
if b.Name == "" {
|
||||
@@ -234,105 +234,87 @@ func (b BasicRuleThreshold) Validate() error {
|
||||
errs = append(errs, errors.NewInvalidInputf(errors.CodeInvalidInput, "target value cannot be nil"))
|
||||
}
|
||||
|
||||
switch b.CompareOp {
|
||||
switch b.CompareOperator {
|
||||
case ValueIsAbove, ValueIsBelow, ValueIsEq, ValueIsNotEq, ValueAboveOrEq, ValueBelowOrEq, ValueOutsideBounds:
|
||||
// valid compare operations
|
||||
case CompareOpNone:
|
||||
errs = append(errs, errors.NewInvalidInputf(errors.CodeInvalidInput, "compare operation cannot be none"))
|
||||
default:
|
||||
errs = append(errs, errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid compare operation: %s", string(b.CompareOp)))
|
||||
errs = append(errs, errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid compare operation: %s", b.CompareOperator.StringValue()))
|
||||
}
|
||||
|
||||
switch b.MatchType {
|
||||
case AtleastOnce, AllTheTimes, OnAverage, InTotal, Last:
|
||||
// valid match types
|
||||
case MatchTypeNone:
|
||||
errs = append(errs, errors.NewInvalidInputf(errors.CodeInvalidInput, "match type cannot be none"))
|
||||
default:
|
||||
errs = append(errs, errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid match type: %s", string(b.MatchType)))
|
||||
errs = append(errs, errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid match type: %s", b.MatchType.StringValue()))
|
||||
}
|
||||
|
||||
return errors.Join(errs...)
|
||||
}
|
||||
|
||||
func (b BasicRuleThreshold) matchesRecoveryThreshold(series v3.Series, ruleUnit string) (Sample, bool) {
|
||||
func (b BasicRuleThreshold) matchesRecoveryThreshold(series *qbtypes.TimeSeries, ruleUnit string) (Sample, bool) {
|
||||
return b.shouldAlertWithTarget(series, b.recoveryTarget(ruleUnit))
|
||||
}
|
||||
func (b BasicRuleThreshold) shouldAlert(series v3.Series, ruleUnit string) (Sample, bool) {
|
||||
func (b BasicRuleThreshold) shouldAlert(series *qbtypes.TimeSeries, ruleUnit string) (Sample, bool) {
|
||||
return b.shouldAlertWithTarget(series, b.target(ruleUnit))
|
||||
}
|
||||
|
||||
func removeGroupinSetPoints(series v3.Series) []v3.Point {
|
||||
var result []v3.Point
|
||||
for _, s := range series.Points {
|
||||
if s.Timestamp >= 0 && !math.IsNaN(s.Value) && !math.IsInf(s.Value, 0) {
|
||||
result = append(result, s)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// PrepareSampleLabelsForRule prepares the labels for the sample to be used in the alerting.
|
||||
// It accepts seriesLabels and thresholdName as input and returns the labels with the threshold name label added.
|
||||
func PrepareSampleLabelsForRule(seriesLabels map[string]string, thresholdName string) (lbls labels.Labels) {
|
||||
lb := labels.NewBuilder(labels.Labels{})
|
||||
for name, value := range seriesLabels {
|
||||
lb.Set(name, value)
|
||||
func PrepareSampleLabelsForRule(seriesLabels []*qbtypes.Label, thresholdName string) Labels {
|
||||
lb := NewBuilder()
|
||||
for _, label := range seriesLabels {
|
||||
lb.Set(label.Key.Name, fmt.Sprint(label.Value))
|
||||
}
|
||||
lb.Set(LabelThresholdName, thresholdName)
|
||||
lb.Set(LabelSeverityName, strings.ToLower(thresholdName))
|
||||
return lb.Labels()
|
||||
}
|
||||
|
||||
func (b BasicRuleThreshold) shouldAlertWithTarget(series v3.Series, target float64) (Sample, bool) {
|
||||
func (b BasicRuleThreshold) shouldAlertWithTarget(series *qbtypes.TimeSeries, target float64) (Sample, bool) {
|
||||
var shouldAlert bool
|
||||
var alertSmpl Sample
|
||||
lbls := PrepareSampleLabelsForRule(series.Labels, b.Name)
|
||||
|
||||
series.Points = removeGroupinSetPoints(series)
|
||||
|
||||
// nothing to evaluate
|
||||
if len(series.Points) == 0 {
|
||||
if len(series.Values) == 0 {
|
||||
return alertSmpl, false
|
||||
}
|
||||
|
||||
switch b.MatchType {
|
||||
case AtleastOnce:
|
||||
// If any sample matches the condition, the rule is firing.
|
||||
if b.CompareOp == ValueIsAbove {
|
||||
for _, smpl := range series.Points {
|
||||
if b.CompareOperator == ValueIsAbove {
|
||||
for _, smpl := range series.Values {
|
||||
if smpl.Value > target {
|
||||
alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls}
|
||||
shouldAlert = true
|
||||
break
|
||||
}
|
||||
}
|
||||
} else if b.CompareOp == ValueIsBelow {
|
||||
for _, smpl := range series.Points {
|
||||
} else if b.CompareOperator == ValueIsBelow {
|
||||
for _, smpl := range series.Values {
|
||||
if smpl.Value < target {
|
||||
alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls}
|
||||
shouldAlert = true
|
||||
break
|
||||
}
|
||||
}
|
||||
} else if b.CompareOp == ValueIsEq {
|
||||
for _, smpl := range series.Points {
|
||||
} else if b.CompareOperator == ValueIsEq {
|
||||
for _, smpl := range series.Values {
|
||||
if smpl.Value == target {
|
||||
alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls}
|
||||
shouldAlert = true
|
||||
break
|
||||
}
|
||||
}
|
||||
} else if b.CompareOp == ValueIsNotEq {
|
||||
for _, smpl := range series.Points {
|
||||
} else if b.CompareOperator == ValueIsNotEq {
|
||||
for _, smpl := range series.Values {
|
||||
if smpl.Value != target {
|
||||
alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls}
|
||||
shouldAlert = true
|
||||
break
|
||||
}
|
||||
}
|
||||
} else if b.CompareOp == ValueOutsideBounds {
|
||||
for _, smpl := range series.Points {
|
||||
} else if b.CompareOperator == ValueOutsideBounds {
|
||||
for _, smpl := range series.Values {
|
||||
if math.Abs(smpl.Value) >= target {
|
||||
alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls}
|
||||
shouldAlert = true
|
||||
@@ -344,8 +326,8 @@ func (b BasicRuleThreshold) shouldAlertWithTarget(series v3.Series, target float
|
||||
// If all samples match the condition, the rule is firing.
|
||||
shouldAlert = true
|
||||
alertSmpl = Sample{Point: Point{V: target}, Metric: lbls}
|
||||
if b.CompareOp == ValueIsAbove {
|
||||
for _, smpl := range series.Points {
|
||||
if b.CompareOperator == ValueIsAbove {
|
||||
for _, smpl := range series.Values {
|
||||
if smpl.Value <= target {
|
||||
shouldAlert = false
|
||||
break
|
||||
@@ -354,15 +336,15 @@ func (b BasicRuleThreshold) shouldAlertWithTarget(series v3.Series, target float
|
||||
// use min value from the series
|
||||
if shouldAlert {
|
||||
var minValue = math.Inf(1)
|
||||
for _, smpl := range series.Points {
|
||||
for _, smpl := range series.Values {
|
||||
if smpl.Value < minValue {
|
||||
minValue = smpl.Value
|
||||
}
|
||||
}
|
||||
alertSmpl = Sample{Point: Point{V: minValue}, Metric: lbls}
|
||||
}
|
||||
} else if b.CompareOp == ValueIsBelow {
|
||||
for _, smpl := range series.Points {
|
||||
} else if b.CompareOperator == ValueIsBelow {
|
||||
for _, smpl := range series.Values {
|
||||
if smpl.Value >= target {
|
||||
shouldAlert = false
|
||||
break
|
||||
@@ -370,22 +352,22 @@ func (b BasicRuleThreshold) shouldAlertWithTarget(series v3.Series, target float
|
||||
}
|
||||
if shouldAlert {
|
||||
var maxValue = math.Inf(-1)
|
||||
for _, smpl := range series.Points {
|
||||
for _, smpl := range series.Values {
|
||||
if smpl.Value > maxValue {
|
||||
maxValue = smpl.Value
|
||||
}
|
||||
}
|
||||
alertSmpl = Sample{Point: Point{V: maxValue}, Metric: lbls}
|
||||
}
|
||||
} else if b.CompareOp == ValueIsEq {
|
||||
for _, smpl := range series.Points {
|
||||
} else if b.CompareOperator == ValueIsEq {
|
||||
for _, smpl := range series.Values {
|
||||
if smpl.Value != target {
|
||||
shouldAlert = false
|
||||
break
|
||||
}
|
||||
}
|
||||
} else if b.CompareOp == ValueIsNotEq {
|
||||
for _, smpl := range series.Points {
|
||||
} else if b.CompareOperator == ValueIsNotEq {
|
||||
for _, smpl := range series.Values {
|
||||
if smpl.Value == target {
|
||||
shouldAlert = false
|
||||
break
|
||||
@@ -393,15 +375,15 @@ func (b BasicRuleThreshold) shouldAlertWithTarget(series v3.Series, target float
|
||||
}
|
||||
// use any non-inf or nan value from the series
|
||||
if shouldAlert {
|
||||
for _, smpl := range series.Points {
|
||||
for _, smpl := range series.Values {
|
||||
if !math.IsInf(smpl.Value, 0) && !math.IsNaN(smpl.Value) {
|
||||
alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls}
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if b.CompareOp == ValueOutsideBounds {
|
||||
for _, smpl := range series.Points {
|
||||
} else if b.CompareOperator == ValueOutsideBounds {
|
||||
for _, smpl := range series.Values {
|
||||
if math.Abs(smpl.Value) < target {
|
||||
alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls}
|
||||
shouldAlert = false
|
||||
@@ -412,7 +394,7 @@ func (b BasicRuleThreshold) shouldAlertWithTarget(series v3.Series, target float
|
||||
case OnAverage:
|
||||
// If the average of all samples matches the condition, the rule is firing.
|
||||
var sum, count float64
|
||||
for _, smpl := range series.Points {
|
||||
for _, smpl := range series.Values {
|
||||
if math.IsNaN(smpl.Value) || math.IsInf(smpl.Value, 0) {
|
||||
continue
|
||||
}
|
||||
@@ -421,7 +403,7 @@ func (b BasicRuleThreshold) shouldAlertWithTarget(series v3.Series, target float
|
||||
}
|
||||
avg := sum / count
|
||||
alertSmpl = Sample{Point: Point{V: avg}, Metric: lbls}
|
||||
switch b.CompareOp {
|
||||
switch b.CompareOperator {
|
||||
case ValueIsAbove:
|
||||
if avg > target {
|
||||
shouldAlert = true
|
||||
@@ -447,14 +429,14 @@ func (b BasicRuleThreshold) shouldAlertWithTarget(series v3.Series, target float
|
||||
// If the sum of all samples matches the condition, the rule is firing.
|
||||
var sum float64
|
||||
|
||||
for _, smpl := range series.Points {
|
||||
for _, smpl := range series.Values {
|
||||
if math.IsNaN(smpl.Value) || math.IsInf(smpl.Value, 0) {
|
||||
continue
|
||||
}
|
||||
sum += smpl.Value
|
||||
}
|
||||
alertSmpl = Sample{Point: Point{V: sum}, Metric: lbls}
|
||||
switch b.CompareOp {
|
||||
switch b.CompareOperator {
|
||||
case ValueIsAbove:
|
||||
if sum > target {
|
||||
shouldAlert = true
|
||||
@@ -479,22 +461,22 @@ func (b BasicRuleThreshold) shouldAlertWithTarget(series v3.Series, target float
|
||||
case Last:
|
||||
// If the last sample matches the condition, the rule is firing.
|
||||
shouldAlert = false
|
||||
alertSmpl = Sample{Point: Point{V: series.Points[len(series.Points)-1].Value}, Metric: lbls}
|
||||
switch b.CompareOp {
|
||||
alertSmpl = Sample{Point: Point{V: series.Values[len(series.Values)-1].Value}, Metric: lbls}
|
||||
switch b.CompareOperator {
|
||||
case ValueIsAbove:
|
||||
if series.Points[len(series.Points)-1].Value > target {
|
||||
if series.Values[len(series.Values)-1].Value > target {
|
||||
shouldAlert = true
|
||||
}
|
||||
case ValueIsBelow:
|
||||
if series.Points[len(series.Points)-1].Value < target {
|
||||
if series.Values[len(series.Values)-1].Value < target {
|
||||
shouldAlert = true
|
||||
}
|
||||
case ValueIsEq:
|
||||
if series.Points[len(series.Points)-1].Value == target {
|
||||
if series.Values[len(series.Values)-1].Value == target {
|
||||
shouldAlert = true
|
||||
}
|
||||
case ValueIsNotEq:
|
||||
if series.Points[len(series.Points)-1].Value != target {
|
||||
if series.Values[len(series.Values)-1].Value != target {
|
||||
shouldAlert = true
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,7 +6,8 @@ import (
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
|
||||
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
|
||||
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
|
||||
)
|
||||
|
||||
func TestBasicRuleThresholdEval_UnitConversion(t *testing.T) {
|
||||
@@ -15,22 +16,29 @@ func TestBasicRuleThresholdEval_UnitConversion(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
threshold BasicRuleThreshold
|
||||
series v3.Series
|
||||
series *qbtypes.TimeSeries
|
||||
ruleUnit string
|
||||
shouldAlert bool
|
||||
}{
|
||||
{
|
||||
name: "milliseconds to seconds conversion - should alert",
|
||||
threshold: BasicRuleThreshold{
|
||||
Name: CriticalThresholdName,
|
||||
TargetValue: &target, // 100ms
|
||||
TargetUnit: "ms",
|
||||
MatchType: AtleastOnce,
|
||||
CompareOp: ValueIsAbove,
|
||||
Name: CriticalThresholdName,
|
||||
TargetValue: &target, // 100ms
|
||||
TargetUnit: "ms",
|
||||
MatchType: AtleastOnce,
|
||||
CompareOperator: ValueIsAbove,
|
||||
},
|
||||
series: v3.Series{
|
||||
Labels: map[string]string{"service": "test"},
|
||||
Points: []v3.Point{
|
||||
series: &qbtypes.TimeSeries{
|
||||
Labels: []*qbtypes.Label{
|
||||
{
|
||||
Key: telemetrytypes.TelemetryFieldKey{
|
||||
Name: "service",
|
||||
},
|
||||
Value: "test",
|
||||
},
|
||||
},
|
||||
Values: []*qbtypes.TimeSeriesValue{
|
||||
{Value: 0.15, Timestamp: 1000}, // 150ms in seconds
|
||||
},
|
||||
},
|
||||
@@ -40,15 +48,22 @@ func TestBasicRuleThresholdEval_UnitConversion(t *testing.T) {
|
||||
{
|
||||
name: "milliseconds to seconds conversion - should not alert",
|
||||
threshold: BasicRuleThreshold{
|
||||
Name: WarningThresholdName,
|
||||
TargetValue: &target, // 100ms
|
||||
TargetUnit: "ms",
|
||||
MatchType: AtleastOnce,
|
||||
CompareOp: ValueIsAbove,
|
||||
Name: WarningThresholdName,
|
||||
TargetValue: &target, // 100ms
|
||||
TargetUnit: "ms",
|
||||
MatchType: AtleastOnce,
|
||||
CompareOperator: ValueIsAbove,
|
||||
},
|
||||
series: v3.Series{
|
||||
Labels: map[string]string{"service": "test"},
|
||||
Points: []v3.Point{
|
||||
series: &qbtypes.TimeSeries{
|
||||
Labels: []*qbtypes.Label{
|
||||
{
|
||||
Key: telemetrytypes.TelemetryFieldKey{
|
||||
Name: "service",
|
||||
},
|
||||
Value: "test",
|
||||
},
|
||||
},
|
||||
Values: []*qbtypes.TimeSeriesValue{
|
||||
{Value: 0.05, Timestamp: 1000}, // 50ms in seconds
|
||||
},
|
||||
},
|
||||
@@ -58,15 +73,22 @@ func TestBasicRuleThresholdEval_UnitConversion(t *testing.T) {
|
||||
{
|
||||
name: "seconds to milliseconds conversion - should alert",
|
||||
threshold: BasicRuleThreshold{
|
||||
Name: CriticalThresholdName,
|
||||
TargetValue: &target, // 100s
|
||||
TargetUnit: "s",
|
||||
MatchType: AtleastOnce,
|
||||
CompareOp: ValueIsAbove,
|
||||
Name: CriticalThresholdName,
|
||||
TargetValue: &target, // 100s
|
||||
TargetUnit: "s",
|
||||
MatchType: AtleastOnce,
|
||||
CompareOperator: ValueIsAbove,
|
||||
},
|
||||
series: v3.Series{
|
||||
Labels: map[string]string{"service": "test"},
|
||||
Points: []v3.Point{
|
||||
series: &qbtypes.TimeSeries{
|
||||
Labels: []*qbtypes.Label{
|
||||
{
|
||||
Key: telemetrytypes.TelemetryFieldKey{
|
||||
Name: "service",
|
||||
},
|
||||
Value: "test",
|
||||
},
|
||||
},
|
||||
Values: []*qbtypes.TimeSeriesValue{
|
||||
{Value: 150000, Timestamp: 1000}, // 150000ms = 150s
|
||||
},
|
||||
},
|
||||
@@ -77,15 +99,22 @@ func TestBasicRuleThresholdEval_UnitConversion(t *testing.T) {
|
||||
{
|
||||
name: "bytes to kibibytes conversion - should alert",
|
||||
threshold: BasicRuleThreshold{
|
||||
Name: InfoThresholdName,
|
||||
TargetValue: &target, // 100 bytes
|
||||
TargetUnit: "bytes",
|
||||
MatchType: AtleastOnce,
|
||||
CompareOp: ValueIsAbove,
|
||||
Name: InfoThresholdName,
|
||||
TargetValue: &target, // 100 bytes
|
||||
TargetUnit: "bytes",
|
||||
MatchType: AtleastOnce,
|
||||
CompareOperator: ValueIsAbove,
|
||||
},
|
||||
series: v3.Series{
|
||||
Labels: map[string]string{"service": "test"},
|
||||
Points: []v3.Point{
|
||||
series: &qbtypes.TimeSeries{
|
||||
Labels: []*qbtypes.Label{
|
||||
{
|
||||
Key: telemetrytypes.TelemetryFieldKey{
|
||||
Name: "service",
|
||||
},
|
||||
Value: "test",
|
||||
},
|
||||
},
|
||||
Values: []*qbtypes.TimeSeriesValue{
|
||||
{Value: 0.15, Timestamp: 1000}, // 0.15KiB ≈ 153.6 bytes
|
||||
},
|
||||
},
|
||||
@@ -95,15 +124,22 @@ func TestBasicRuleThresholdEval_UnitConversion(t *testing.T) {
|
||||
{
|
||||
name: "kibibytes to mebibytes conversion - should alert",
|
||||
threshold: BasicRuleThreshold{
|
||||
Name: ErrorThresholdName,
|
||||
TargetValue: &target, // 100KiB
|
||||
TargetUnit: "kbytes",
|
||||
MatchType: AtleastOnce,
|
||||
CompareOp: ValueIsAbove,
|
||||
Name: ErrorThresholdName,
|
||||
TargetValue: &target, // 100KiB
|
||||
TargetUnit: "kbytes",
|
||||
MatchType: AtleastOnce,
|
||||
CompareOperator: ValueIsAbove,
|
||||
},
|
||||
series: v3.Series{
|
||||
Labels: map[string]string{"service": "test"},
|
||||
Points: []v3.Point{
|
||||
series: &qbtypes.TimeSeries{
|
||||
Labels: []*qbtypes.Label{
|
||||
{
|
||||
Key: telemetrytypes.TelemetryFieldKey{
|
||||
Name: "service",
|
||||
},
|
||||
Value: "test",
|
||||
},
|
||||
},
|
||||
Values: []*qbtypes.TimeSeriesValue{
|
||||
{Value: 0.15, Timestamp: 1000},
|
||||
},
|
||||
},
|
||||
@@ -114,15 +150,22 @@ func TestBasicRuleThresholdEval_UnitConversion(t *testing.T) {
|
||||
{
|
||||
name: "milliseconds to seconds with ValueIsBelow - should alert",
|
||||
threshold: BasicRuleThreshold{
|
||||
Name: WarningThresholdName,
|
||||
TargetValue: &target, // 100ms
|
||||
TargetUnit: "ms",
|
||||
MatchType: AtleastOnce,
|
||||
CompareOp: ValueIsBelow,
|
||||
Name: WarningThresholdName,
|
||||
TargetValue: &target, // 100ms
|
||||
TargetUnit: "ms",
|
||||
MatchType: AtleastOnce,
|
||||
CompareOperator: ValueIsBelow,
|
||||
},
|
||||
series: v3.Series{
|
||||
Labels: map[string]string{"service": "test"},
|
||||
Points: []v3.Point{
|
||||
series: &qbtypes.TimeSeries{
|
||||
Labels: []*qbtypes.Label{
|
||||
{
|
||||
Key: telemetrytypes.TelemetryFieldKey{
|
||||
Name: "service",
|
||||
},
|
||||
Value: "test",
|
||||
},
|
||||
},
|
||||
Values: []*qbtypes.TimeSeriesValue{
|
||||
{Value: 0.05, Timestamp: 1000}, // 50ms in seconds
|
||||
},
|
||||
},
|
||||
@@ -132,15 +175,22 @@ func TestBasicRuleThresholdEval_UnitConversion(t *testing.T) {
|
||||
{
|
||||
name: "milliseconds to seconds with OnAverage - should alert",
|
||||
threshold: BasicRuleThreshold{
|
||||
Name: CriticalThresholdName,
|
||||
TargetValue: &target, // 100ms
|
||||
TargetUnit: "ms",
|
||||
MatchType: OnAverage,
|
||||
CompareOp: ValueIsAbove,
|
||||
Name: CriticalThresholdName,
|
||||
TargetValue: &target, // 100ms
|
||||
TargetUnit: "ms",
|
||||
MatchType: OnAverage,
|
||||
CompareOperator: ValueIsAbove,
|
||||
},
|
||||
series: v3.Series{
|
||||
Labels: map[string]string{"service": "test"},
|
||||
Points: []v3.Point{
|
||||
series: &qbtypes.TimeSeries{
|
||||
Labels: []*qbtypes.Label{
|
||||
{
|
||||
Key: telemetrytypes.TelemetryFieldKey{
|
||||
Name: "service",
|
||||
},
|
||||
Value: "test",
|
||||
},
|
||||
},
|
||||
Values: []*qbtypes.TimeSeriesValue{
|
||||
{Value: 0.08, Timestamp: 1000}, // 80ms
|
||||
{Value: 0.12, Timestamp: 2000}, // 120ms
|
||||
{Value: 0.15, Timestamp: 3000}, // 150ms
|
||||
@@ -152,15 +202,22 @@ func TestBasicRuleThresholdEval_UnitConversion(t *testing.T) {
|
||||
{
|
||||
name: "decimal megabytes to gigabytes with InTotal - should alert",
|
||||
threshold: BasicRuleThreshold{
|
||||
Name: WarningThresholdName,
|
||||
TargetValue: &target, // 100MB
|
||||
TargetUnit: "decmbytes",
|
||||
MatchType: InTotal,
|
||||
CompareOp: ValueIsAbove,
|
||||
Name: WarningThresholdName,
|
||||
TargetValue: &target, // 100MB
|
||||
TargetUnit: "decmbytes",
|
||||
MatchType: InTotal,
|
||||
CompareOperator: ValueIsAbove,
|
||||
},
|
||||
series: v3.Series{
|
||||
Labels: map[string]string{"service": "test"},
|
||||
Points: []v3.Point{
|
||||
series: &qbtypes.TimeSeries{
|
||||
Labels: []*qbtypes.Label{
|
||||
{
|
||||
Key: telemetrytypes.TelemetryFieldKey{
|
||||
Name: "service",
|
||||
},
|
||||
Value: "test",
|
||||
},
|
||||
},
|
||||
Values: []*qbtypes.TimeSeriesValue{
|
||||
{Value: 0.04, Timestamp: 1000}, // 40MB
|
||||
{Value: 0.05, Timestamp: 2000}, // 50MB
|
||||
{Value: 0.03, Timestamp: 3000}, // 30MB
|
||||
@@ -172,15 +229,22 @@ func TestBasicRuleThresholdEval_UnitConversion(t *testing.T) {
|
||||
{
|
||||
name: "milliseconds to seconds with AllTheTimes - should alert",
|
||||
threshold: BasicRuleThreshold{
|
||||
Name: InfoThresholdName,
|
||||
TargetValue: &target, // 100ms
|
||||
TargetUnit: "ms",
|
||||
MatchType: AllTheTimes,
|
||||
CompareOp: ValueIsAbove,
|
||||
Name: InfoThresholdName,
|
||||
TargetValue: &target, // 100ms
|
||||
TargetUnit: "ms",
|
||||
MatchType: AllTheTimes,
|
||||
CompareOperator: ValueIsAbove,
|
||||
},
|
||||
series: v3.Series{
|
||||
Labels: map[string]string{"service": "test"},
|
||||
Points: []v3.Point{
|
||||
series: &qbtypes.TimeSeries{
|
||||
Labels: []*qbtypes.Label{
|
||||
{
|
||||
Key: telemetrytypes.TelemetryFieldKey{
|
||||
Name: "service",
|
||||
},
|
||||
Value: "test",
|
||||
},
|
||||
},
|
||||
Values: []*qbtypes.TimeSeriesValue{
|
||||
{Value: 0.11, Timestamp: 1000}, // 110ms
|
||||
{Value: 0.12, Timestamp: 2000}, // 120ms
|
||||
{Value: 0.15, Timestamp: 3000}, // 150ms
|
||||
@@ -192,15 +256,22 @@ func TestBasicRuleThresholdEval_UnitConversion(t *testing.T) {
|
||||
{
|
||||
name: "kilobytes to megabytes with Last - should not alert",
|
||||
threshold: BasicRuleThreshold{
|
||||
Name: ErrorThresholdName,
|
||||
TargetValue: &target, // 100kB
|
||||
TargetUnit: "deckbytes",
|
||||
MatchType: Last,
|
||||
CompareOp: ValueIsAbove,
|
||||
Name: ErrorThresholdName,
|
||||
TargetValue: &target, // 100kB
|
||||
TargetUnit: "deckbytes",
|
||||
MatchType: Last,
|
||||
CompareOperator: ValueIsAbove,
|
||||
},
|
||||
series: v3.Series{
|
||||
Labels: map[string]string{"service": "test"},
|
||||
Points: []v3.Point{
|
||||
series: &qbtypes.TimeSeries{
|
||||
Labels: []*qbtypes.Label{
|
||||
{
|
||||
Key: telemetrytypes.TelemetryFieldKey{
|
||||
Name: "service",
|
||||
},
|
||||
Value: "test",
|
||||
},
|
||||
},
|
||||
Values: []*qbtypes.TimeSeriesValue{
|
||||
{Value: 0.15, Timestamp: 1000}, // 150kB
|
||||
{Value: 0.05, Timestamp: 2000}, // 50kB (last value)
|
||||
},
|
||||
@@ -212,15 +283,22 @@ func TestBasicRuleThresholdEval_UnitConversion(t *testing.T) {
|
||||
{
|
||||
name: "bytes per second to kilobytes per second - should alert",
|
||||
threshold: BasicRuleThreshold{
|
||||
Name: CriticalThresholdName,
|
||||
TargetValue: &target, // 100 bytes/s
|
||||
TargetUnit: "Bps",
|
||||
MatchType: AtleastOnce,
|
||||
CompareOp: ValueIsAbove,
|
||||
Name: CriticalThresholdName,
|
||||
TargetValue: &target, // 100 bytes/s
|
||||
TargetUnit: "Bps",
|
||||
MatchType: AtleastOnce,
|
||||
CompareOperator: ValueIsAbove,
|
||||
},
|
||||
series: v3.Series{
|
||||
Labels: map[string]string{"service": "test"},
|
||||
Points: []v3.Point{
|
||||
series: &qbtypes.TimeSeries{
|
||||
Labels: []*qbtypes.Label{
|
||||
{
|
||||
Key: telemetrytypes.TelemetryFieldKey{
|
||||
Name: "service",
|
||||
},
|
||||
Value: "test",
|
||||
},
|
||||
},
|
||||
Values: []*qbtypes.TimeSeriesValue{
|
||||
{Value: 0.15, Timestamp: 1000},
|
||||
},
|
||||
},
|
||||
@@ -231,15 +309,22 @@ func TestBasicRuleThresholdEval_UnitConversion(t *testing.T) {
|
||||
{
|
||||
name: "same unit - no conversion needed - should alert",
|
||||
threshold: BasicRuleThreshold{
|
||||
Name: InfoThresholdName,
|
||||
TargetValue: &target, // 100ms
|
||||
TargetUnit: "ms",
|
||||
MatchType: AtleastOnce,
|
||||
CompareOp: ValueIsAbove,
|
||||
Name: InfoThresholdName,
|
||||
TargetValue: &target, // 100ms
|
||||
TargetUnit: "ms",
|
||||
MatchType: AtleastOnce,
|
||||
CompareOperator: ValueIsAbove,
|
||||
},
|
||||
series: v3.Series{
|
||||
Labels: map[string]string{"service": "test"},
|
||||
Points: []v3.Point{
|
||||
series: &qbtypes.TimeSeries{
|
||||
Labels: []*qbtypes.Label{
|
||||
{
|
||||
Key: telemetrytypes.TelemetryFieldKey{
|
||||
Name: "service",
|
||||
},
|
||||
Value: "test",
|
||||
},
|
||||
},
|
||||
Values: []*qbtypes.TimeSeriesValue{
|
||||
{Value: 150, Timestamp: 1000}, // 150ms
|
||||
},
|
||||
},
|
||||
@@ -250,15 +335,22 @@ func TestBasicRuleThresholdEval_UnitConversion(t *testing.T) {
|
||||
{
|
||||
name: "empty unit - no conversion - should alert",
|
||||
threshold: BasicRuleThreshold{
|
||||
Name: ErrorThresholdName,
|
||||
TargetValue: &target, // 100 (unitless)
|
||||
TargetUnit: "",
|
||||
MatchType: AtleastOnce,
|
||||
CompareOp: ValueIsAbove,
|
||||
Name: ErrorThresholdName,
|
||||
TargetValue: &target, // 100 (unitless)
|
||||
TargetUnit: "",
|
||||
MatchType: AtleastOnce,
|
||||
CompareOperator: ValueIsAbove,
|
||||
},
|
||||
series: v3.Series{
|
||||
Labels: map[string]string{"service": "test"},
|
||||
Points: []v3.Point{
|
||||
series: &qbtypes.TimeSeries{
|
||||
Labels: []*qbtypes.Label{
|
||||
{
|
||||
Key: telemetrytypes.TelemetryFieldKey{
|
||||
Name: "service",
|
||||
},
|
||||
Value: "test",
|
||||
},
|
||||
},
|
||||
Values: []*qbtypes.TimeSeriesValue{
|
||||
{Value: 150, Timestamp: 1000}, // 150 (unitless)
|
||||
},
|
||||
},
|
||||
@@ -270,15 +362,22 @@ func TestBasicRuleThresholdEval_UnitConversion(t *testing.T) {
|
||||
{
|
||||
name: "bytes to Gibibytes - should alert",
|
||||
threshold: BasicRuleThreshold{
|
||||
Name: CriticalThresholdName,
|
||||
TargetValue: &target, // 100 Gibibytes
|
||||
TargetUnit: "GiBy",
|
||||
MatchType: AtleastOnce,
|
||||
CompareOp: ValueIsBelow,
|
||||
Name: CriticalThresholdName,
|
||||
TargetValue: &target, // 100 Gibibytes
|
||||
TargetUnit: "GiBy",
|
||||
MatchType: AtleastOnce,
|
||||
CompareOperator: ValueIsBelow,
|
||||
},
|
||||
series: v3.Series{
|
||||
Labels: map[string]string{"service": "test"},
|
||||
Points: []v3.Point{
|
||||
series: &qbtypes.TimeSeries{
|
||||
Labels: []*qbtypes.Label{
|
||||
{
|
||||
Key: telemetrytypes.TelemetryFieldKey{
|
||||
Name: "service",
|
||||
},
|
||||
Value: "test",
|
||||
},
|
||||
},
|
||||
Values: []*qbtypes.TimeSeriesValue{
|
||||
{Value: 70 * 1024 * 1024 * 1024, Timestamp: 1000}, // 70 Gibibytes
|
||||
},
|
||||
},
|
||||
@@ -290,15 +389,22 @@ func TestBasicRuleThresholdEval_UnitConversion(t *testing.T) {
|
||||
{
|
||||
name: "bytes per second to MiB per second - should alert",
|
||||
threshold: BasicRuleThreshold{
|
||||
Name: CriticalThresholdName,
|
||||
TargetValue: &target, // 100 MiB/s
|
||||
TargetUnit: "MiBy/s",
|
||||
MatchType: AtleastOnce,
|
||||
CompareOp: ValueIsBelow,
|
||||
Name: CriticalThresholdName,
|
||||
TargetValue: &target, // 100 MiB/s
|
||||
TargetUnit: "MiBy/s",
|
||||
MatchType: AtleastOnce,
|
||||
CompareOperator: ValueIsBelow,
|
||||
},
|
||||
series: v3.Series{
|
||||
Labels: map[string]string{"service": "test"},
|
||||
Points: []v3.Point{
|
||||
series: &qbtypes.TimeSeries{
|
||||
Labels: []*qbtypes.Label{
|
||||
{
|
||||
Key: telemetrytypes.TelemetryFieldKey{
|
||||
Name: "service",
|
||||
},
|
||||
Value: "test",
|
||||
},
|
||||
},
|
||||
Values: []*qbtypes.TimeSeriesValue{
|
||||
{Value: 30 * 1024 * 1024, Timestamp: 1000}, // 30 MiB/s
|
||||
},
|
||||
},
|
||||
@@ -346,20 +452,20 @@ func TestPrepareSampleLabelsForRule(t *testing.T) {
|
||||
alertAllHashes := make(map[uint64]struct{})
|
||||
thresholdName := "test"
|
||||
for range 50_000 {
|
||||
sampleLabels := map[string]string{
|
||||
"service": "test",
|
||||
"env": "prod",
|
||||
"tier": "backend",
|
||||
"namespace": "default",
|
||||
"pod": "test-pod",
|
||||
"container": "test-container",
|
||||
"node": "test-node",
|
||||
"cluster": "test-cluster",
|
||||
"region": "test-region",
|
||||
"az": "test-az",
|
||||
"hostname": "test-hostname",
|
||||
"ip": "192.168.1.1",
|
||||
"port": "8080",
|
||||
sampleLabels := []*qbtypes.Label{
|
||||
{Key: telemetrytypes.TelemetryFieldKey{Name: "service"}, Value: "test"},
|
||||
{Key: telemetrytypes.TelemetryFieldKey{Name: "env"}, Value: "prod"},
|
||||
{Key: telemetrytypes.TelemetryFieldKey{Name: "tier"}, Value: "backend"},
|
||||
{Key: telemetrytypes.TelemetryFieldKey{Name: "namespace"}, Value: "default"},
|
||||
{Key: telemetrytypes.TelemetryFieldKey{Name: "pod"}, Value: "test-pod"},
|
||||
{Key: telemetrytypes.TelemetryFieldKey{Name: "container"}, Value: "test-container"},
|
||||
{Key: telemetrytypes.TelemetryFieldKey{Name: "node"}, Value: "test-node"},
|
||||
{Key: telemetrytypes.TelemetryFieldKey{Name: "cluster"}, Value: "test-cluster"},
|
||||
{Key: telemetrytypes.TelemetryFieldKey{Name: "region"}, Value: "test-region"},
|
||||
{Key: telemetrytypes.TelemetryFieldKey{Name: "az"}, Value: "test-az"},
|
||||
{Key: telemetrytypes.TelemetryFieldKey{Name: "hostname"}, Value: "test-hostname"},
|
||||
{Key: telemetrytypes.TelemetryFieldKey{Name: "ip"}, Value: "192.168.1.1"},
|
||||
{Key: telemetrytypes.TelemetryFieldKey{Name: "port"}, Value: "8080"},
|
||||
}
|
||||
lbls := PrepareSampleLabelsForRule(sampleLabels, thresholdName)
|
||||
assert.True(t, lbls.Has(LabelThresholdName), "LabelThresholdName not found in labels")
|
||||
|
||||
Reference in New Issue
Block a user