mirror of
https://github.com/SigNoz/signoz.git
synced 2026-05-05 18:10:31 +01:00
Compare commits
22 Commits
fix/recurr
...
feat/maint
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5aa05df256 | ||
|
|
a5bd9a6533 | ||
|
|
25225e6228 | ||
|
|
a9a33c5374 | ||
|
|
60930f6980 | ||
|
|
e849656d9c | ||
|
|
517bc8c01a | ||
|
|
1728e3a887 | ||
|
|
814f8b020f | ||
|
|
67b8b469d0 | ||
|
|
fe29eefbee | ||
|
|
eb247ff67d | ||
|
|
54d1470093 | ||
|
|
68464a7752 | ||
|
|
ba2d5f1842 | ||
|
|
f0d47acecb | ||
|
|
ecda68abd9 | ||
|
|
130e40fddc | ||
|
|
0f942ee8f0 | ||
|
|
227a934342 | ||
|
|
e258239c14 | ||
|
|
81a0791967 |
@@ -4450,6 +4450,8 @@ components:
|
||||
type: string
|
||||
kind:
|
||||
$ref: '#/components/schemas/RuletypesMaintenanceKind'
|
||||
labelExpression:
|
||||
type: string
|
||||
name:
|
||||
type: string
|
||||
schedule:
|
||||
@@ -4477,6 +4479,8 @@ components:
|
||||
type: array
|
||||
description:
|
||||
type: string
|
||||
labelExpression:
|
||||
type: string
|
||||
name:
|
||||
type: string
|
||||
schedule:
|
||||
|
||||
@@ -13,7 +13,6 @@ import (
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
baserules "github.com/SigNoz/signoz/pkg/query-service/rules"
|
||||
"github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
)
|
||||
|
||||
func PrepareTaskFunc(opts baserules.PrepareTaskOptions) (baserules.Task, error) {
|
||||
@@ -49,7 +48,7 @@ func PrepareTaskFunc(opts baserules.PrepareTaskOptions) (baserules.Task, error)
|
||||
rules = append(rules, tr)
|
||||
|
||||
// create ch rule task for evaluation
|
||||
task = newTask(baserules.TaskTypeCh, opts.TaskName, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc, opts.MaintenanceStore, opts.OrgID)
|
||||
task = newTask(baserules.TaskTypeCh, opts.TaskName, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc)
|
||||
|
||||
} else if opts.Rule.RuleType == ruletypes.RuleTypeProm {
|
||||
|
||||
@@ -73,7 +72,7 @@ func PrepareTaskFunc(opts baserules.PrepareTaskOptions) (baserules.Task, error)
|
||||
rules = append(rules, pr)
|
||||
|
||||
// create promql rule task for evaluation
|
||||
task = newTask(baserules.TaskTypeProm, opts.TaskName, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc, opts.MaintenanceStore, opts.OrgID)
|
||||
task = newTask(baserules.TaskTypeProm, opts.TaskName, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc)
|
||||
|
||||
} else if opts.Rule.RuleType == ruletypes.RuleTypeAnomaly {
|
||||
// create anomaly rule
|
||||
@@ -96,7 +95,7 @@ func PrepareTaskFunc(opts baserules.PrepareTaskOptions) (baserules.Task, error)
|
||||
rules = append(rules, ar)
|
||||
|
||||
// create anomaly rule task for evaluation
|
||||
task = newTask(baserules.TaskTypeCh, opts.TaskName, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc, opts.MaintenanceStore, opts.OrgID)
|
||||
task = newTask(baserules.TaskTypeCh, opts.TaskName, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc)
|
||||
|
||||
} else {
|
||||
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "unsupported rule type %s. Supported types: %s, %s", opts.Rule.RuleType, ruletypes.RuleTypeProm, ruletypes.RuleTypeThreshold)
|
||||
@@ -210,9 +209,9 @@ func TestNotification(opts baserules.PrepareTestRuleOptions) (int, error) {
|
||||
}
|
||||
|
||||
// newTask returns an appropriate group for the rule type
|
||||
func newTask(taskType baserules.TaskType, name string, frequency time.Duration, rules []baserules.Rule, opts *baserules.ManagerOptions, notify baserules.NotifyFunc, maintenanceStore ruletypes.MaintenanceStore, orgID valuer.UUID) baserules.Task {
|
||||
func newTask(taskType baserules.TaskType, name string, frequency time.Duration, rules []baserules.Rule, opts *baserules.ManagerOptions, notify baserules.NotifyFunc) baserules.Task {
|
||||
if taskType == baserules.TaskTypeCh {
|
||||
return baserules.NewRuleTask(name, "", frequency, rules, opts, notify, maintenanceStore, orgID)
|
||||
return baserules.NewRuleTask(name, "", frequency, rules, opts, notify)
|
||||
}
|
||||
return baserules.NewPromRuleTask(name, "", frequency, rules, opts, notify, maintenanceStore, orgID)
|
||||
return baserules.NewPromRuleTask(name, "", frequency, rules, opts, notify)
|
||||
}
|
||||
|
||||
@@ -6751,6 +6751,10 @@ export interface RuletypesPlannedMaintenanceDTO {
|
||||
*/
|
||||
id: string;
|
||||
kind: RuletypesMaintenanceKindDTO;
|
||||
/**
|
||||
* @type string
|
||||
*/
|
||||
labelExpression?: string;
|
||||
/**
|
||||
* @type string
|
||||
*/
|
||||
@@ -6778,6 +6782,10 @@ export interface RuletypesPostablePlannedMaintenanceDTO {
|
||||
* @type string
|
||||
*/
|
||||
description?: string;
|
||||
/**
|
||||
* @type string
|
||||
*/
|
||||
labelExpression?: string;
|
||||
/**
|
||||
* @type string
|
||||
*/
|
||||
|
||||
@@ -5,7 +5,7 @@ import React, {
|
||||
useMemo,
|
||||
useState,
|
||||
} from 'react';
|
||||
import { CheckOutlined } from '@ant-design/icons';
|
||||
import { CheckOutlined, InfoCircleOutlined } from '@ant-design/icons';
|
||||
import {
|
||||
Button,
|
||||
DatePicker,
|
||||
@@ -16,6 +16,7 @@ import {
|
||||
Select,
|
||||
SelectProps,
|
||||
Spin,
|
||||
Tooltip,
|
||||
Typography,
|
||||
} from 'antd';
|
||||
import type { DefaultOptionType } from 'antd/es/select';
|
||||
@@ -78,6 +79,7 @@ interface PlannedDowntimeFormData {
|
||||
recurrence?: RuletypesRecurrenceDTO | null;
|
||||
alertRules: DefaultOptionType[];
|
||||
timezone?: string;
|
||||
labelExpression?: string;
|
||||
}
|
||||
|
||||
const customFormat = DATE_TIME_FORMATS.ORDINAL_DATETIME;
|
||||
@@ -151,6 +153,7 @@ export function PlannedDowntimeForm(
|
||||
.map((alert) => alert.value)
|
||||
.filter((alert) => alert !== undefined) as string[],
|
||||
name: values.name,
|
||||
labelExpression: values.labelExpression,
|
||||
schedule: {
|
||||
startTime: new Date(
|
||||
handleTimeConversion(
|
||||
@@ -286,6 +289,7 @@ export function PlannedDowntimeForm(
|
||||
),
|
||||
} as RuletypesRecurrenceDTO,
|
||||
timezone: initialValues.schedule?.timezone as string,
|
||||
labelExpression: initialValues.labelExpression || '',
|
||||
};
|
||||
return formData;
|
||||
}, [initialValues, alertOptions]);
|
||||
@@ -566,6 +570,22 @@ export function PlannedDowntimeForm(
|
||||
</Select>
|
||||
</Form.Item>
|
||||
</div>
|
||||
<Form.Item
|
||||
label={
|
||||
<span>
|
||||
Label Expression
|
||||
<Tooltip title='Filter by alert labels. Examples: env == "prod", region == "us-east-1" && severity == "critical"'>
|
||||
<InfoCircleOutlined />
|
||||
</Tooltip>
|
||||
</span>
|
||||
}
|
||||
name="labelExpression"
|
||||
>
|
||||
<Input.TextArea
|
||||
placeholder='e.g. env == "prod" && region == "us-east-1"'
|
||||
autoSize={{ minRows: 2, maxRows: 4 }}
|
||||
/>
|
||||
</Form.Item>
|
||||
<Form.Item style={{ marginBottom: 0 }}>
|
||||
<ModalButtonWrapper>
|
||||
<Button
|
||||
|
||||
@@ -24,6 +24,7 @@ import (
|
||||
"github.com/prometheus/alertmanager/dispatch"
|
||||
"github.com/prometheus/alertmanager/notify"
|
||||
"github.com/prometheus/alertmanager/provider/mem"
|
||||
promTypes "github.com/prometheus/alertmanager/types"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/prometheus/common/promslog"
|
||||
@@ -364,7 +365,7 @@ route:
|
||||
providerSettings := createTestProviderSettings()
|
||||
logger := providerSettings.Logger
|
||||
route := dispatch.NewRoute(conf.Route, nil)
|
||||
marker := alertmanagertypes.NewMarker(prometheus.NewRegistry())
|
||||
marker := promTypes.NewMarker(prometheus.NewRegistry())
|
||||
alerts, err := mem.NewAlerts(context.Background(), marker, time.Hour, 0, nil, logger, prometheus.NewRegistry(), nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
@@ -637,7 +638,7 @@ route:
|
||||
providerSettings := createTestProviderSettings()
|
||||
logger := providerSettings.Logger
|
||||
route := dispatch.NewRoute(conf.Route, nil)
|
||||
marker := alertmanagertypes.NewMarker(prometheus.NewRegistry())
|
||||
marker := promTypes.NewMarker(prometheus.NewRegistry())
|
||||
alerts, err := mem.NewAlerts(context.Background(), marker, time.Hour, 0, nil, logger, prometheus.NewRegistry(), nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
@@ -896,7 +897,7 @@ route:
|
||||
providerSettings := createTestProviderSettings()
|
||||
logger := providerSettings.Logger
|
||||
route := dispatch.NewRoute(conf.Route, nil)
|
||||
marker := alertmanagertypes.NewMarker(prometheus.NewRegistry())
|
||||
marker := promTypes.NewMarker(prometheus.NewRegistry())
|
||||
alerts, err := mem.NewAlerts(context.Background(), marker, time.Hour, 0, nil, logger, prometheus.NewRegistry(), nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
@@ -1158,7 +1159,7 @@ func newAlert(labels model.LabelSet) *alertmanagertypes.Alert {
|
||||
|
||||
func TestDispatcherRace(t *testing.T) {
|
||||
logger := promslog.NewNopLogger()
|
||||
marker := alertmanagertypes.NewMarker(prometheus.NewRegistry())
|
||||
marker := promTypes.NewMarker(prometheus.NewRegistry())
|
||||
alerts, err := mem.NewAlerts(context.Background(), marker, time.Hour, 0, nil, logger, prometheus.NewRegistry(), nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
@@ -1194,7 +1195,7 @@ route:
|
||||
route := dispatch.NewRoute(conf.Route, nil)
|
||||
providerSettings := createTestProviderSettings()
|
||||
logger := providerSettings.Logger
|
||||
marker := alertmanagertypes.NewMarker(prometheus.NewRegistry())
|
||||
marker := promTypes.NewMarker(prometheus.NewRegistry())
|
||||
alerts, err := mem.NewAlerts(context.Background(), marker, time.Hour, 0, nil, logger, prometheus.NewRegistry(), nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
@@ -1264,7 +1265,7 @@ route:
|
||||
|
||||
func TestDispatcher_DoMaintenance(t *testing.T) {
|
||||
r := prometheus.NewRegistry()
|
||||
marker := alertmanagertypes.NewMarker(r)
|
||||
marker := promTypes.NewMarker(r)
|
||||
|
||||
alerts, err := mem.NewAlerts(context.Background(), marker, time.Minute, 0, nil, promslog.NewNopLogger(), prometheus.NewRegistry(), nil)
|
||||
if err != nil {
|
||||
@@ -1370,7 +1371,7 @@ route:
|
||||
providerSettings := createTestProviderSettings()
|
||||
logger := providerSettings.Logger
|
||||
route := dispatch.NewRoute(conf.Route, nil)
|
||||
marker := alertmanagertypes.NewMarker(prometheus.NewRegistry())
|
||||
marker := promTypes.NewMarker(prometheus.NewRegistry())
|
||||
alerts, err := mem.NewAlerts(context.Background(), marker, time.Hour, 0, nil, logger, prometheus.NewRegistry(), nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
|
||||
95
pkg/alertmanager/alertmanagerserver/maintenance_muter.go
Normal file
95
pkg/alertmanager/alertmanagerserver/maintenance_muter.go
Normal file
@@ -0,0 +1,95 @@
|
||||
package alertmanagerserver
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log/slog"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/common/model"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
)
|
||||
|
||||
// MaintenanceMuter implements types.Muter for maintenance windows.
|
||||
// It suppresses alerts whose ruleId label matches an active maintenance schedule.
|
||||
// Results are cached for cacheTTL to avoid a DB query on every per-alert check.
|
||||
type MaintenanceMuter struct {
|
||||
maintenanceStore ruletypes.MaintenanceStore
|
||||
orgID string
|
||||
logger *slog.Logger
|
||||
|
||||
mu sync.RWMutex
|
||||
cached []*ruletypes.PlannedMaintenance
|
||||
cacheExpiry time.Time
|
||||
}
|
||||
|
||||
const maintenanceCacheTTL = 30 * time.Second
|
||||
|
||||
func NewMaintenanceMuter(store ruletypes.MaintenanceStore, orgID string, logger *slog.Logger) *MaintenanceMuter {
|
||||
return &MaintenanceMuter{
|
||||
maintenanceStore: store,
|
||||
orgID: orgID,
|
||||
logger: logger,
|
||||
}
|
||||
}
|
||||
|
||||
func (m *MaintenanceMuter) Mutes(ctx context.Context, lset model.LabelSet) bool {
|
||||
ruleID := string(lset[ruletypes.AlertRuleIDLabel])
|
||||
if ruleID == "" {
|
||||
return false
|
||||
}
|
||||
now := time.Now()
|
||||
for _, mw := range m.getMaintenances(ctx) {
|
||||
if mw.ShouldSkip(ruleID, now, lset) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// MutedBy returns the IDs of all active maintenance windows currently
|
||||
// suppressing the alert identified by lset. It is used to populate the
|
||||
// `mutedBy` field on the v2 API alert response so that maintenance-suppressed
|
||||
// alerts surface as `state=suppressed` in GetAlerts responses.
|
||||
func (m *MaintenanceMuter) MutedBy(ctx context.Context, lset model.LabelSet) []string {
|
||||
ruleID := string(lset[ruletypes.AlertRuleIDLabel])
|
||||
if ruleID == "" {
|
||||
return nil
|
||||
}
|
||||
var ids []string
|
||||
now := time.Now()
|
||||
for _, mw := range m.getMaintenances(ctx) {
|
||||
if mw.ShouldSkip(ruleID, now, lset) {
|
||||
ids = append(ids, mw.ID.String())
|
||||
}
|
||||
}
|
||||
return ids
|
||||
}
|
||||
|
||||
func (m *MaintenanceMuter) getMaintenances(ctx context.Context) []*ruletypes.PlannedMaintenance {
|
||||
m.mu.RLock()
|
||||
if time.Now().Before(m.cacheExpiry) {
|
||||
cached := m.cached
|
||||
m.mu.RUnlock()
|
||||
return cached
|
||||
}
|
||||
m.mu.RUnlock()
|
||||
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
// Double-check after acquiring write lock.
|
||||
if time.Now().Before(m.cacheExpiry) {
|
||||
return m.cached
|
||||
}
|
||||
|
||||
mws, err := m.maintenanceStore.ListPlannedMaintenance(ctx, m.orgID)
|
||||
if err != nil {
|
||||
m.logger.ErrorContext(ctx, "failed to list planned maintenance windows; alerts will not be suppressed", slog.String("org_id", m.orgID))
|
||||
return m.cached // return stale (potentially empty) cache on error
|
||||
}
|
||||
m.cached = mws
|
||||
m.cacheExpiry = time.Now().Add(maintenanceCacheTTL)
|
||||
return m.cached
|
||||
}
|
||||
109
pkg/alertmanager/alertmanagerserver/pipeline_builder.go
Normal file
109
pkg/alertmanager/alertmanagerserver/pipeline_builder.go
Normal file
@@ -0,0 +1,109 @@
|
||||
// Copyright (c) 2026 SigNoz, Inc.
|
||||
// Copyright 2015 Prometheus Team
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package alertmanagerserver
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/alertmanager/featurecontrol"
|
||||
"github.com/prometheus/alertmanager/inhibit"
|
||||
"github.com/prometheus/alertmanager/nflog/nflogpb"
|
||||
"github.com/prometheus/alertmanager/notify"
|
||||
"github.com/prometheus/alertmanager/silence"
|
||||
"github.com/prometheus/alertmanager/timeinterval"
|
||||
"github.com/prometheus/alertmanager/types"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
// pipelineBuilder is a local copy of notify.PipelineBuilder that injects
|
||||
// the maintenance mute stage immediately before the receiver stage.
|
||||
//
|
||||
// We maintain our own copy so we can control exactly where in the pipeline
|
||||
// the maintenance stage runs (between the silence stage and the receiver),
|
||||
// which is not possible by wrapping the output of the upstream builder.
|
||||
//
|
||||
// Upstream pipeline order:
|
||||
// GossipSettle → Inhibit → TimeActive → TimeMute → Silence → [mms] → Receiver
|
||||
type pipelineBuilder struct {
|
||||
metrics *notify.Metrics
|
||||
ff featurecontrol.Flagger
|
||||
}
|
||||
|
||||
func newPipelineBuilder(
|
||||
r prometheus.Registerer,
|
||||
ff featurecontrol.Flagger,
|
||||
) *pipelineBuilder {
|
||||
return &pipelineBuilder{
|
||||
metrics: notify.NewMetrics(r, ff),
|
||||
ff: ff,
|
||||
}
|
||||
}
|
||||
|
||||
// New returns a map of receivers to Stages, mirroring notify.PipelineBuilder.New
|
||||
// but inserting a maintenanceMuteStage between the silence stage and the receiver.
|
||||
func (pb *pipelineBuilder) New(
|
||||
receivers map[string][]notify.Integration,
|
||||
wait func() time.Duration,
|
||||
inhibitor *inhibit.Inhibitor,
|
||||
silencer *silence.Silencer,
|
||||
intervener *timeinterval.Intervener,
|
||||
marker types.GroupMarker,
|
||||
muter *MaintenanceMuter,
|
||||
notificationLog notify.NotificationLog,
|
||||
peer notify.Peer,
|
||||
) notify.RoutingStage {
|
||||
rs := make(notify.RoutingStage, len(receivers))
|
||||
|
||||
ms := notify.NewGossipSettleStage(peer)
|
||||
is := notify.NewMuteStage(inhibitor, pb.metrics)
|
||||
tas := notify.NewTimeActiveStage(intervener, marker, pb.metrics)
|
||||
tms := notify.NewTimeMuteStage(intervener, marker, pb.metrics)
|
||||
ss := notify.NewMuteStage(silencer, pb.metrics)
|
||||
mms := notify.NewMuteStage(muter, pb.metrics)
|
||||
|
||||
for name := range receivers {
|
||||
stages := notify.MultiStage{ms, is, tas, tms, ss, mms}
|
||||
stages = append(stages, createReceiverStage(name, receivers[name], wait, notificationLog, pb.metrics))
|
||||
rs[name] = stages
|
||||
}
|
||||
|
||||
pb.metrics.InitializeFor(receivers)
|
||||
return rs
|
||||
}
|
||||
|
||||
// createReceiverStage is a copy of notify.createReceiverStage (unexported upstream).
|
||||
func createReceiverStage(
|
||||
name string,
|
||||
integrations []notify.Integration,
|
||||
wait func() time.Duration,
|
||||
notificationLog notify.NotificationLog,
|
||||
metrics *notify.Metrics,
|
||||
) notify.Stage {
|
||||
var fs notify.FanoutStage
|
||||
for i := range integrations {
|
||||
recv := &nflogpb.Receiver{
|
||||
GroupName: name,
|
||||
Integration: integrations[i].Name(),
|
||||
Idx: uint32(integrations[i].Index()),
|
||||
}
|
||||
var s notify.MultiStage
|
||||
s = append(s, notify.NewWaitStage(wait))
|
||||
s = append(s, notify.NewDedupStage(&integrations[i], notificationLog, recv))
|
||||
s = append(s, notify.NewRetryStage(integrations[i], name, metrics))
|
||||
s = append(s, notify.NewSetNotifiesStage(notificationLog, recv))
|
||||
fs = append(fs, s)
|
||||
}
|
||||
return fs
|
||||
}
|
||||
@@ -26,14 +26,13 @@ import (
|
||||
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager"
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
|
||||
"github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
)
|
||||
|
||||
var (
|
||||
// This is not a real file and will never be used. We need this placeholder to ensure maintenance runs on shutdown. See
|
||||
// https://github.com/prometheus/server/blob/3ee2cd0f1271e277295c02b6160507b4d193dde2/silence/silence.go#L435-L438
|
||||
// and https://github.com/prometheus/server/blob/3b06b97af4d146e141af92885a185891eb79a5b0/nflog/nflog.go#L362.
|
||||
snapfnoop string = "snapfnoop"
|
||||
)
|
||||
// This is not a real snapshot file and will never be used. We need this placeholder to ensure maintenance runs on shutdown.
|
||||
// See https://github.com/prometheus/alertmanager/blob/3ee2cd0f1271e277295c02b6160507b4d193dde2/silence/silence.go#L435-L438
|
||||
// and https://github.com/prometheus/alertmanager/blob/3b06b97af4d146e141af92885a185891eb79a5b0/nflog/nflog.go#L362.
|
||||
var snapfnoop string = "snapfnoop"
|
||||
|
||||
type Server struct {
|
||||
// logger is the logger for the alertmanager
|
||||
@@ -63,15 +62,25 @@ type Server struct {
|
||||
silencer *silence.Silencer
|
||||
silences *silence.Silences
|
||||
timeIntervals map[string][]timeinterval.TimeInterval
|
||||
pipelineBuilder *notify.PipelineBuilder
|
||||
marker *alertmanagertypes.MemMarker
|
||||
pipelineBuilder *pipelineBuilder
|
||||
muter *MaintenanceMuter
|
||||
marker *types.MemMarker
|
||||
tmpl *template.Template
|
||||
wg sync.WaitGroup
|
||||
stopc chan struct{}
|
||||
notificationManager nfmanager.NotificationManager
|
||||
}
|
||||
|
||||
func New(ctx context.Context, logger *slog.Logger, registry prometheus.Registerer, srvConfig Config, orgID string, stateStore alertmanagertypes.StateStore, nfManager nfmanager.NotificationManager) (*Server, error) {
|
||||
func New(
|
||||
ctx context.Context,
|
||||
logger *slog.Logger,
|
||||
registry prometheus.Registerer,
|
||||
srvConfig Config,
|
||||
orgID string,
|
||||
stateStore alertmanagertypes.StateStore,
|
||||
nfManager nfmanager.NotificationManager,
|
||||
maintenanceStore ruletypes.MaintenanceStore,
|
||||
) (*Server, error) {
|
||||
server := &Server{
|
||||
logger: logger.With(slog.String("pkg", "go.signoz.io/pkg/alertmanager/alertmanagerserver")),
|
||||
registry: registry,
|
||||
@@ -84,7 +93,7 @@ func New(ctx context.Context, logger *slog.Logger, registry prometheus.Registere
|
||||
signozRegisterer := prometheus.WrapRegistererWithPrefix("signoz_", registry)
|
||||
signozRegisterer = prometheus.WrapRegistererWith(prometheus.Labels{"org_id": server.orgID}, signozRegisterer)
|
||||
// initialize marker
|
||||
server.marker = alertmanagertypes.NewMarker(signozRegisterer)
|
||||
server.marker = types.NewMarker(signozRegisterer)
|
||||
|
||||
// get silences for initial state
|
||||
state, err := server.stateStore.Get(ctx, server.orgID)
|
||||
@@ -160,7 +169,6 @@ func New(ctx context.Context, logger *slog.Logger, registry prometheus.Registere
|
||||
|
||||
return c, server.stateStore.Set(ctx, storableSilences)
|
||||
})
|
||||
|
||||
}()
|
||||
|
||||
// Start maintenance for notification logs
|
||||
@@ -196,17 +204,25 @@ func New(ctx context.Context, logger *slog.Logger, registry prometheus.Registere
|
||||
return nil, err
|
||||
}
|
||||
|
||||
server.pipelineBuilder = notify.NewPipelineBuilder(signozRegisterer, featurecontrol.NoopFlags{})
|
||||
server.muter = NewMaintenanceMuter(maintenanceStore, orgID, server.logger)
|
||||
server.pipelineBuilder = newPipelineBuilder(signozRegisterer, featurecontrol.NoopFlags{})
|
||||
server.dispatcherMetrics = NewDispatcherMetrics(false, signozRegisterer)
|
||||
|
||||
return server, nil
|
||||
}
|
||||
|
||||
func (server *Server) GetAlerts(ctx context.Context, params alertmanagertypes.GettableAlertsParams) (alertmanagertypes.GettableAlerts, error) {
|
||||
return alertmanagertypes.NewGettableAlertsFromAlertProvider(server.alerts, server.alertmanagerConfig, server.marker.Status, func(labels model.LabelSet) {
|
||||
server.inhibitor.Mutes(ctx, labels)
|
||||
server.silencer.Mutes(ctx, labels)
|
||||
}, params)
|
||||
return alertmanagertypes.NewGettableAlertsFromAlertProvider(
|
||||
server.alerts, server.alertmanagerConfig, server.marker.Status,
|
||||
func(labels model.LabelSet) {
|
||||
server.inhibitor.Mutes(ctx, labels)
|
||||
server.silencer.Mutes(ctx, labels)
|
||||
},
|
||||
func(labels model.LabelSet) []string {
|
||||
return server.muter.MutedBy(ctx, labels)
|
||||
},
|
||||
params,
|
||||
)
|
||||
}
|
||||
|
||||
func (server *Server) PutAlerts(ctx context.Context, postableAlerts alertmanagertypes.PostableAlerts) error {
|
||||
@@ -290,6 +306,7 @@ func (server *Server) SetConfig(ctx context.Context, alertmanagerConfig *alertma
|
||||
server.silencer,
|
||||
intervener,
|
||||
server.marker,
|
||||
server.muter,
|
||||
server.nflog,
|
||||
pipelinePeer,
|
||||
)
|
||||
|
||||
@@ -7,6 +7,10 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
sqlmock "github.com/DATA-DOG/go-sqlmock"
|
||||
"github.com/SigNoz/signoz/pkg/ruler/rulestore/sqlrulestore"
|
||||
"github.com/SigNoz/signoz/pkg/sqlstore"
|
||||
"github.com/SigNoz/signoz/pkg/sqlstore/sqlstoretest"
|
||||
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes/alertmanagertypestest"
|
||||
"github.com/prometheus/alertmanager/dispatch"
|
||||
|
||||
@@ -90,7 +94,8 @@ func TestEndToEndAlertManagerFlow(t *testing.T) {
|
||||
stateStore := alertmanagertypestest.NewStateStore()
|
||||
registry := prometheus.NewRegistry()
|
||||
logger := slog.New(slog.DiscardHandler)
|
||||
server, err := New(context.Background(), logger, registry, srvCfg, orgID, stateStore, notificationManager)
|
||||
maintenanceStore := sqlrulestore.NewMaintenanceStore(sqlstoretest.New(sqlstore.Config{Provider: "sqlite"}, sqlmock.QueryMatcherEqual))
|
||||
server, err := New(context.Background(), logger, registry, srvCfg, orgID, stateStore, notificationManager, maintenanceStore)
|
||||
require.NoError(t, err)
|
||||
amConfig, err := alertmanagertypes.NewDefaultConfig(srvCfg.Global, srvCfg.Route, orgID)
|
||||
require.NoError(t, err)
|
||||
|
||||
@@ -10,9 +10,14 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/nfmanagertest"
|
||||
"github.com/SigNoz/signoz/pkg/ruler/rulestore/sqlrulestore"
|
||||
"github.com/SigNoz/signoz/pkg/sqlstore"
|
||||
"github.com/SigNoz/signoz/pkg/sqlstore/sqlstoretest"
|
||||
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
|
||||
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes/alertmanagertypestest"
|
||||
"github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
"github.com/go-openapi/strfmt"
|
||||
"github.com/prometheus/alertmanager/api/v2/models"
|
||||
"github.com/prometheus/alertmanager/config"
|
||||
@@ -23,9 +28,14 @@ import (
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func newTestMaintenanceStore() ruletypes.MaintenanceStore {
|
||||
ss := sqlstoretest.New(sqlstore.Config{Provider: "sqlite"}, sqlmock.QueryMatcherEqual)
|
||||
return sqlrulestore.NewMaintenanceStore(ss)
|
||||
}
|
||||
|
||||
func TestServerSetConfigAndStop(t *testing.T) {
|
||||
notificationManager := nfmanagertest.NewMock()
|
||||
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), NewConfig(), "1", alertmanagertypestest.NewStateStore(), notificationManager)
|
||||
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), NewConfig(), "1", alertmanagertypestest.NewStateStore(), notificationManager, newTestMaintenanceStore())
|
||||
require.NoError(t, err)
|
||||
|
||||
amConfig, err := alertmanagertypes.NewDefaultConfig(alertmanagertypes.GlobalConfig{}, alertmanagertypes.RouteConfig{GroupInterval: 1 * time.Minute, RepeatInterval: 1 * time.Minute, GroupWait: 1 * time.Minute}, "1")
|
||||
@@ -37,7 +47,7 @@ func TestServerSetConfigAndStop(t *testing.T) {
|
||||
|
||||
func TestServerTestReceiverTypeWebhook(t *testing.T) {
|
||||
notificationManager := nfmanagertest.NewMock()
|
||||
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), NewConfig(), "1", alertmanagertypestest.NewStateStore(), notificationManager)
|
||||
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), NewConfig(), "1", alertmanagertypestest.NewStateStore(), notificationManager, newTestMaintenanceStore())
|
||||
require.NoError(t, err)
|
||||
|
||||
amConfig, err := alertmanagertypes.NewDefaultConfig(alertmanagertypes.GlobalConfig{}, alertmanagertypes.RouteConfig{GroupInterval: 1 * time.Minute, RepeatInterval: 1 * time.Minute, GroupWait: 1 * time.Minute}, "1")
|
||||
@@ -85,7 +95,7 @@ func TestServerPutAlerts(t *testing.T) {
|
||||
srvCfg := NewConfig()
|
||||
srvCfg.Route.GroupInterval = 1 * time.Second
|
||||
notificationManager := nfmanagertest.NewMock()
|
||||
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), srvCfg, "1", stateStore, notificationManager)
|
||||
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), srvCfg, "1", stateStore, notificationManager, newTestMaintenanceStore())
|
||||
require.NoError(t, err)
|
||||
|
||||
amConfig, err := alertmanagertypes.NewDefaultConfig(srvCfg.Global, srvCfg.Route, "1")
|
||||
@@ -133,7 +143,7 @@ func TestServerTestAlert(t *testing.T) {
|
||||
srvCfg := NewConfig()
|
||||
srvCfg.Route.GroupInterval = 1 * time.Second
|
||||
notificationManager := nfmanagertest.NewMock()
|
||||
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), srvCfg, "1", stateStore, notificationManager)
|
||||
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), srvCfg, "1", stateStore, notificationManager, newTestMaintenanceStore())
|
||||
require.NoError(t, err)
|
||||
|
||||
amConfig, err := alertmanagertypes.NewDefaultConfig(srvCfg.Global, srvCfg.Route, "1")
|
||||
@@ -238,7 +248,7 @@ func TestServerTestAlertContinuesOnFailure(t *testing.T) {
|
||||
srvCfg := NewConfig()
|
||||
srvCfg.Route.GroupInterval = 1 * time.Second
|
||||
notificationManager := nfmanagertest.NewMock()
|
||||
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), srvCfg, "1", stateStore, notificationManager)
|
||||
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), srvCfg, "1", stateStore, notificationManager, newTestMaintenanceStore())
|
||||
require.NoError(t, err)
|
||||
|
||||
amConfig, err := alertmanagertypes.NewDefaultConfig(srvCfg.Global, srvCfg.Route, "1")
|
||||
|
||||
@@ -14,6 +14,7 @@ import (
|
||||
"github.com/SigNoz/signoz/pkg/factory"
|
||||
"github.com/SigNoz/signoz/pkg/modules/organization"
|
||||
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
|
||||
"github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
)
|
||||
|
||||
type Service struct {
|
||||
@@ -39,16 +40,18 @@ type Service struct {
|
||||
serversMtx sync.RWMutex
|
||||
|
||||
notificationManager nfmanager.NotificationManager
|
||||
|
||||
maintenanceStore ruletypes.MaintenanceStore
|
||||
}
|
||||
|
||||
func New(
|
||||
ctx context.Context,
|
||||
settings factory.ScopedProviderSettings,
|
||||
config alertmanagerserver.Config,
|
||||
stateStore alertmanagertypes.StateStore,
|
||||
configStore alertmanagertypes.ConfigStore,
|
||||
orgGetter organization.Getter,
|
||||
nfManager nfmanager.NotificationManager,
|
||||
maintenanceStore ruletypes.MaintenanceStore,
|
||||
) *Service {
|
||||
service := &Service{
|
||||
config: config,
|
||||
@@ -59,6 +62,7 @@ func New(
|
||||
servers: make(map[string]*alertmanagerserver.Server),
|
||||
serversMtx: sync.RWMutex{},
|
||||
notificationManager: nfManager,
|
||||
maintenanceStore: maintenanceStore,
|
||||
}
|
||||
|
||||
return service
|
||||
@@ -177,7 +181,10 @@ func (service *Service) newServer(ctx context.Context, orgID string) (*alertmana
|
||||
return nil, err
|
||||
}
|
||||
|
||||
server, err := alertmanagerserver.New(ctx, service.settings.Logger(), service.settings.PrometheusRegisterer(), service.config, orgID, service.stateStore, service.notificationManager)
|
||||
server, err := alertmanagerserver.New(
|
||||
ctx, service.settings.Logger(), service.settings.PrometheusRegisterer(), service.config, orgID,
|
||||
service.stateStore, service.notificationManager, service.maintenanceStore,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -4,11 +4,8 @@ import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/common/model"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/labels"
|
||||
|
||||
amConfig "github.com/prometheus/alertmanager/config"
|
||||
"github.com/prometheus/common/model"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/alertmanager"
|
||||
"github.com/SigNoz/signoz/pkg/alertmanager/alertmanagerstore/sqlalertmanagerstore"
|
||||
@@ -16,10 +13,12 @@ import (
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
"github.com/SigNoz/signoz/pkg/factory"
|
||||
"github.com/SigNoz/signoz/pkg/modules/organization"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/labels"
|
||||
"github.com/SigNoz/signoz/pkg/sqlstore"
|
||||
"github.com/SigNoz/signoz/pkg/types"
|
||||
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
|
||||
"github.com/SigNoz/signoz/pkg/types/authtypes"
|
||||
"github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
)
|
||||
|
||||
@@ -30,35 +29,49 @@ type provider struct {
|
||||
configStore alertmanagertypes.ConfigStore
|
||||
stateStore alertmanagertypes.StateStore
|
||||
notificationManager nfmanager.NotificationManager
|
||||
maintenanceStore ruletypes.MaintenanceStore
|
||||
stopC chan struct{}
|
||||
}
|
||||
|
||||
func NewFactory(sqlstore sqlstore.SQLStore, orgGetter organization.Getter, notificationManager nfmanager.NotificationManager) factory.ProviderFactory[alertmanager.Alertmanager, alertmanager.Config] {
|
||||
func NewFactory(
|
||||
sqlstore sqlstore.SQLStore,
|
||||
orgGetter organization.Getter,
|
||||
notificationManager nfmanager.NotificationManager,
|
||||
maintenanceStore ruletypes.MaintenanceStore,
|
||||
) factory.ProviderFactory[alertmanager.Alertmanager, alertmanager.Config] {
|
||||
return factory.NewProviderFactory(factory.MustNewName("signoz"), func(ctx context.Context, settings factory.ProviderSettings, config alertmanager.Config) (alertmanager.Alertmanager, error) {
|
||||
return New(ctx, settings, config, sqlstore, orgGetter, notificationManager)
|
||||
return New(settings, config, sqlstore, orgGetter, notificationManager, maintenanceStore)
|
||||
})
|
||||
}
|
||||
|
||||
func New(ctx context.Context, providerSettings factory.ProviderSettings, config alertmanager.Config, sqlstore sqlstore.SQLStore, orgGetter organization.Getter, notificationManager nfmanager.NotificationManager) (*provider, error) {
|
||||
func New(
|
||||
providerSettings factory.ProviderSettings,
|
||||
config alertmanager.Config,
|
||||
sqlstore sqlstore.SQLStore,
|
||||
orgGetter organization.Getter,
|
||||
notificationManager nfmanager.NotificationManager,
|
||||
maintenanceStore ruletypes.MaintenanceStore,
|
||||
) (*provider, error) {
|
||||
settings := factory.NewScopedProviderSettings(providerSettings, "github.com/SigNoz/signoz/pkg/alertmanager/signozalertmanager")
|
||||
configStore := sqlalertmanagerstore.NewConfigStore(sqlstore)
|
||||
stateStore := sqlalertmanagerstore.NewStateStore(sqlstore)
|
||||
|
||||
p := &provider{
|
||||
service: alertmanager.New(
|
||||
ctx,
|
||||
settings,
|
||||
config.Signoz.Config,
|
||||
stateStore,
|
||||
configStore,
|
||||
orgGetter,
|
||||
notificationManager,
|
||||
maintenanceStore,
|
||||
),
|
||||
settings: settings,
|
||||
config: config,
|
||||
configStore: configStore,
|
||||
stateStore: stateStore,
|
||||
notificationManager: notificationManager,
|
||||
maintenanceStore: maintenanceStore,
|
||||
stopC: make(chan struct{}),
|
||||
}
|
||||
|
||||
|
||||
@@ -2,6 +2,8 @@ package envprovider
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/config"
|
||||
@@ -9,7 +11,21 @@ import (
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// clearSignozEnv unsets all existing SIGNOZ_* env vars for the duration of the test.
|
||||
func clearSignozEnv(t *testing.T) {
|
||||
t.Helper()
|
||||
for _, kv := range os.Environ() {
|
||||
if strings.HasPrefix(kv, prefix) {
|
||||
key := strings.SplitN(kv, "=", 2)[0]
|
||||
orig, _ := os.LookupEnv(key)
|
||||
os.Unsetenv(key)
|
||||
t.Cleanup(func() { os.Setenv(key, orig) })
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetWithStrings(t *testing.T) {
|
||||
clearSignozEnv(t)
|
||||
t.Setenv("SIGNOZ_K1_K2", "string")
|
||||
t.Setenv("SIGNOZ_K3__K4", "string")
|
||||
t.Setenv("SIGNOZ_K5__K6_K7__K8", "string")
|
||||
@@ -31,6 +47,7 @@ func TestGetWithStrings(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestGetWithNoPrefix(t *testing.T) {
|
||||
clearSignozEnv(t)
|
||||
t.Setenv("K1_K2", "string")
|
||||
t.Setenv("K3_K4", "string")
|
||||
expected := map[string]any{}
|
||||
@@ -43,6 +60,7 @@ func TestGetWithNoPrefix(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestGetWithGoTypes(t *testing.T) {
|
||||
clearSignozEnv(t)
|
||||
t.Setenv("SIGNOZ_BOOL", "true")
|
||||
t.Setenv("SIGNOZ_STRING", "string")
|
||||
t.Setenv("SIGNOZ_INT", "1")
|
||||
|
||||
@@ -32,30 +32,28 @@ import (
|
||||
)
|
||||
|
||||
type PrepareTaskOptions struct {
|
||||
Rule *ruletypes.PostableRule
|
||||
TaskName string
|
||||
RuleStore ruletypes.RuleStore
|
||||
MaintenanceStore ruletypes.MaintenanceStore
|
||||
Querier querier.Querier
|
||||
Logger *slog.Logger
|
||||
Cache cache.Cache
|
||||
ManagerOpts *ManagerOptions
|
||||
NotifyFunc NotifyFunc
|
||||
SQLStore sqlstore.SQLStore
|
||||
OrgID valuer.UUID
|
||||
Rule *ruletypes.PostableRule
|
||||
TaskName string
|
||||
RuleStore ruletypes.RuleStore
|
||||
Querier querier.Querier
|
||||
Logger *slog.Logger
|
||||
Cache cache.Cache
|
||||
ManagerOpts *ManagerOptions
|
||||
NotifyFunc NotifyFunc
|
||||
SQLStore sqlstore.SQLStore
|
||||
OrgID valuer.UUID
|
||||
}
|
||||
|
||||
type PrepareTestRuleOptions struct {
|
||||
Rule *ruletypes.PostableRule
|
||||
RuleStore ruletypes.RuleStore
|
||||
MaintenanceStore ruletypes.MaintenanceStore
|
||||
Querier querier.Querier
|
||||
Logger *slog.Logger
|
||||
Cache cache.Cache
|
||||
ManagerOpts *ManagerOptions
|
||||
NotifyFunc NotifyFunc
|
||||
SQLStore sqlstore.SQLStore
|
||||
OrgID valuer.UUID
|
||||
Rule *ruletypes.PostableRule
|
||||
RuleStore ruletypes.RuleStore
|
||||
Querier querier.Querier
|
||||
Logger *slog.Logger
|
||||
Cache cache.Cache
|
||||
ManagerOpts *ManagerOptions
|
||||
NotifyFunc NotifyFunc
|
||||
SQLStore sqlstore.SQLStore
|
||||
OrgID valuer.UUID
|
||||
}
|
||||
|
||||
const taskNameSuffix = "webAppEditor"
|
||||
@@ -134,7 +132,6 @@ func defaultOptions(o *ManagerOptions) *ManagerOptions {
|
||||
}
|
||||
|
||||
func defaultPrepareTaskFunc(opts PrepareTaskOptions) (Task, error) {
|
||||
|
||||
rules := make([]Rule, 0)
|
||||
var task Task
|
||||
|
||||
@@ -159,7 +156,6 @@ func defaultPrepareTaskFunc(opts PrepareTaskOptions) (Task, error) {
|
||||
WithMetadataStore(opts.ManagerOpts.MetadataStore),
|
||||
WithRuleStateHistoryModule(opts.ManagerOpts.RuleStateHistoryModule),
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
return task, err
|
||||
}
|
||||
@@ -167,7 +163,7 @@ func defaultPrepareTaskFunc(opts PrepareTaskOptions) (Task, error) {
|
||||
rules = append(rules, tr)
|
||||
|
||||
// create ch rule task for evaluation
|
||||
task = newTask(TaskTypeCh, opts.TaskName, taskNameSuffix, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc, opts.MaintenanceStore, opts.OrgID)
|
||||
task = newTask(TaskTypeCh, opts.TaskName, taskNameSuffix, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc)
|
||||
|
||||
} else if opts.Rule.RuleType == ruletypes.RuleTypeProm {
|
||||
|
||||
@@ -183,7 +179,6 @@ func defaultPrepareTaskFunc(opts PrepareTaskOptions) (Task, error) {
|
||||
WithMetadataStore(opts.ManagerOpts.MetadataStore),
|
||||
WithRuleStateHistoryModule(opts.ManagerOpts.RuleStateHistoryModule),
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
return task, err
|
||||
}
|
||||
@@ -191,7 +186,7 @@ func defaultPrepareTaskFunc(opts PrepareTaskOptions) (Task, error) {
|
||||
rules = append(rules, pr)
|
||||
|
||||
// create promql rule task for evaluation
|
||||
task = newTask(TaskTypeProm, opts.TaskName, taskNameSuffix, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc, opts.MaintenanceStore, opts.OrgID)
|
||||
task = newTask(TaskTypeProm, opts.TaskName, taskNameSuffix, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc)
|
||||
|
||||
} else {
|
||||
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "unsupported rule type %s. Supported types: %s, %s", opts.Rule.RuleType, ruletypes.RuleTypeProm, ruletypes.RuleTypeThreshold)
|
||||
@@ -241,6 +236,7 @@ func (m *Manager) MaintenanceStore() ruletypes.MaintenanceStore {
|
||||
return m.maintenanceStore
|
||||
}
|
||||
|
||||
// TODO(jatinderjit): remove (unused)?
|
||||
func (m *Manager) Pause(b bool) {
|
||||
m.mtx.Lock()
|
||||
defer m.mtx.Unlock()
|
||||
@@ -430,19 +426,17 @@ func (m *Manager) editTask(_ context.Context, orgID valuer.UUID, rule *ruletypes
|
||||
m.logger.Debug("editing a rule task", "name", taskName)
|
||||
|
||||
newTask, err := m.prepareTaskFunc(PrepareTaskOptions{
|
||||
Rule: rule,
|
||||
TaskName: taskName,
|
||||
RuleStore: m.ruleStore,
|
||||
MaintenanceStore: m.maintenanceStore,
|
||||
Querier: m.opts.Querier,
|
||||
Logger: m.opts.Logger,
|
||||
Cache: m.cache,
|
||||
ManagerOpts: m.opts,
|
||||
NotifyFunc: m.notifyFunc,
|
||||
SQLStore: m.sqlstore,
|
||||
OrgID: orgID,
|
||||
Rule: rule,
|
||||
TaskName: taskName,
|
||||
RuleStore: m.ruleStore,
|
||||
Querier: m.opts.Querier,
|
||||
Logger: m.opts.Logger,
|
||||
Cache: m.cache,
|
||||
ManagerOpts: m.opts,
|
||||
NotifyFunc: m.notifyFunc,
|
||||
SQLStore: m.sqlstore,
|
||||
OrgID: orgID,
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
m.logger.Error("loading tasks failed", errors.Attr(err))
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "error preparing rule with given parameters, previous rule set restored")
|
||||
@@ -643,19 +637,17 @@ func (m *Manager) addTask(_ context.Context, orgID valuer.UUID, rule *ruletypes.
|
||||
|
||||
m.logger.Debug("adding a new rule task", "name", taskName)
|
||||
newTask, err := m.prepareTaskFunc(PrepareTaskOptions{
|
||||
Rule: rule,
|
||||
TaskName: taskName,
|
||||
RuleStore: m.ruleStore,
|
||||
MaintenanceStore: m.maintenanceStore,
|
||||
Querier: m.opts.Querier,
|
||||
Logger: m.opts.Logger,
|
||||
Cache: m.cache,
|
||||
ManagerOpts: m.opts,
|
||||
NotifyFunc: m.notifyFunc,
|
||||
SQLStore: m.sqlstore,
|
||||
OrgID: orgID,
|
||||
Rule: rule,
|
||||
TaskName: taskName,
|
||||
RuleStore: m.ruleStore,
|
||||
Querier: m.opts.Querier,
|
||||
Logger: m.opts.Logger,
|
||||
Cache: m.cache,
|
||||
ManagerOpts: m.opts,
|
||||
NotifyFunc: m.notifyFunc,
|
||||
SQLStore: m.sqlstore,
|
||||
OrgID: orgID,
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
m.logger.Error("creating rule task failed", "name", taskName, errors.Attr(err))
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "error loading rules, previous rule set restored")
|
||||
@@ -703,7 +695,6 @@ func (m *Manager) RuleTasks() []Task {
|
||||
// RuleTasksWithoutLock returns the list of manager's rule tasks without
|
||||
// acquiring a lock on the manager.
|
||||
func (m *Manager) RuleTasksWithoutLock() []Task {
|
||||
|
||||
rgs := make([]Task, 0, len(m.tasks))
|
||||
for _, g := range m.tasks {
|
||||
rgs = append(rgs, g)
|
||||
@@ -897,7 +888,6 @@ func (m *Manager) GetRule(ctx context.Context, id valuer.UUID) (*ruletypes.Getta
|
||||
// the task state. For example - if a stored rule is disabled, then
|
||||
// there is no task running against it.
|
||||
func (m *Manager) syncRuleStateWithTask(ctx context.Context, orgID valuer.UUID, taskName string, rule *ruletypes.PostableRule) error {
|
||||
|
||||
if rule.Disabled {
|
||||
// check if rule has any task running
|
||||
if _, ok := m.tasks[taskName]; ok {
|
||||
@@ -1029,16 +1019,15 @@ func (m *Manager) TestNotification(ctx context.Context, orgID valuer.UUID, ruleS
|
||||
}
|
||||
|
||||
alertCount, err := m.prepareTestRuleFunc(PrepareTestRuleOptions{
|
||||
Rule: &parsedRule,
|
||||
RuleStore: m.ruleStore,
|
||||
MaintenanceStore: m.maintenanceStore,
|
||||
Querier: m.opts.Querier,
|
||||
Logger: m.opts.Logger,
|
||||
Cache: m.cache,
|
||||
ManagerOpts: m.opts,
|
||||
NotifyFunc: m.testNotifyFunc,
|
||||
SQLStore: m.sqlstore,
|
||||
OrgID: orgID,
|
||||
Rule: &parsedRule,
|
||||
RuleStore: m.ruleStore,
|
||||
Querier: m.opts.Querier,
|
||||
Logger: m.opts.Logger,
|
||||
Cache: m.cache,
|
||||
ManagerOpts: m.opts,
|
||||
NotifyFunc: m.testNotifyFunc,
|
||||
SQLStore: m.sqlstore,
|
||||
OrgID: orgID,
|
||||
})
|
||||
|
||||
return alertCount, err
|
||||
|
||||
@@ -15,7 +15,6 @@ import (
|
||||
"github.com/SigNoz/signoz/pkg/types/authtypes"
|
||||
"github.com/SigNoz/signoz/pkg/types/ctxtypes"
|
||||
ruletypes "github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
)
|
||||
|
||||
// PromRuleTask is a promql rule executor
|
||||
@@ -39,14 +38,11 @@ type PromRuleTask struct {
|
||||
pause bool
|
||||
logger *slog.Logger
|
||||
notify NotifyFunc
|
||||
|
||||
maintenanceStore ruletypes.MaintenanceStore
|
||||
orgID valuer.UUID
|
||||
}
|
||||
|
||||
// NewPromRuleTask holds rules that have promql condition
|
||||
// and evaluates the rule at a given frequency
|
||||
func NewPromRuleTask(name, file string, frequency time.Duration, rules []Rule, opts *ManagerOptions, notify NotifyFunc, maintenanceStore ruletypes.MaintenanceStore, orgID valuer.UUID) *PromRuleTask {
|
||||
func NewPromRuleTask(name, file string, frequency time.Duration, rules []Rule, opts *ManagerOptions, notify NotifyFunc) *PromRuleTask {
|
||||
opts.Logger.Info("initiating a new rule group", "name", name, "frequency", frequency)
|
||||
|
||||
if frequency == 0 {
|
||||
@@ -63,10 +59,8 @@ func NewPromRuleTask(name, file string, frequency time.Duration, rules []Rule, o
|
||||
seriesInPreviousEval: make([]map[string]plabels.Labels, len(rules)),
|
||||
done: make(chan struct{}),
|
||||
terminated: make(chan struct{}),
|
||||
notify: notify,
|
||||
maintenanceStore: maintenanceStore,
|
||||
logger: opts.Logger,
|
||||
orgID: orgID,
|
||||
notify: notify,
|
||||
logger: opts.Logger,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -330,30 +324,12 @@ func (g *PromRuleTask) Eval(ctx context.Context, ts time.Time) {
|
||||
}()
|
||||
|
||||
g.logger.InfoContext(ctx, "promql rule task", "name", g.name, "eval_started_at", ts)
|
||||
maintenance, err := g.maintenanceStore.ListPlannedMaintenance(ctx, g.orgID.StringValue())
|
||||
if err != nil {
|
||||
g.logger.ErrorContext(ctx, "error in processing sql query", errors.Attr(err))
|
||||
}
|
||||
|
||||
for i, rule := range g.rules {
|
||||
if rule == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
shouldSkip := false
|
||||
for _, m := range maintenance {
|
||||
g.logger.InfoContext(ctx, "checking if rule should be skipped", slog.String("rule.id", rule.ID()), slog.Any("maintenance", m))
|
||||
if m.ShouldSkip(rule.ID(), ts) {
|
||||
shouldSkip = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if shouldSkip {
|
||||
g.logger.InfoContext(ctx, "rule should be skipped", slog.String("rule.id", rule.ID()))
|
||||
continue
|
||||
}
|
||||
|
||||
select {
|
||||
case <-g.done:
|
||||
return
|
||||
|
||||
@@ -2,20 +2,18 @@ package rules
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log/slog"
|
||||
"runtime/debug"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"log/slog"
|
||||
|
||||
opentracing "github.com/opentracing/opentracing-go"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
"github.com/SigNoz/signoz/pkg/types/authtypes"
|
||||
"github.com/SigNoz/signoz/pkg/types/ctxtypes"
|
||||
"github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
)
|
||||
|
||||
// RuleTask holds a rule (with composite queries)
|
||||
@@ -37,34 +35,28 @@ type RuleTask struct {
|
||||
|
||||
pause bool
|
||||
notify NotifyFunc
|
||||
|
||||
maintenanceStore ruletypes.MaintenanceStore
|
||||
orgID valuer.UUID
|
||||
}
|
||||
|
||||
const DefaultFrequency = 1 * time.Minute
|
||||
|
||||
// NewRuleTask makes a new RuleTask with the given name, options, and rules.
|
||||
func NewRuleTask(name, file string, frequency time.Duration, rules []Rule, opts *ManagerOptions, notify NotifyFunc, maintenanceStore ruletypes.MaintenanceStore, orgID valuer.UUID) *RuleTask {
|
||||
|
||||
func NewRuleTask(name, file string, frequency time.Duration, rules []Rule, opts *ManagerOptions, notify NotifyFunc) *RuleTask {
|
||||
if frequency == 0 {
|
||||
frequency = DefaultFrequency
|
||||
}
|
||||
opts.Logger.Info("initiating a new rule task", "name", name, "frequency", frequency)
|
||||
|
||||
return &RuleTask{
|
||||
name: name,
|
||||
file: file,
|
||||
pause: false,
|
||||
frequency: frequency,
|
||||
rules: rules,
|
||||
opts: opts,
|
||||
logger: opts.Logger,
|
||||
done: make(chan struct{}),
|
||||
terminated: make(chan struct{}),
|
||||
notify: notify,
|
||||
maintenanceStore: maintenanceStore,
|
||||
orgID: orgID,
|
||||
name: name,
|
||||
file: file,
|
||||
pause: false,
|
||||
frequency: frequency,
|
||||
rules: rules,
|
||||
opts: opts,
|
||||
logger: opts.Logger,
|
||||
done: make(chan struct{}),
|
||||
terminated: make(chan struct{}),
|
||||
notify: notify,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -72,6 +64,7 @@ func NewRuleTask(name, file string, frequency time.Duration, rules []Rule, opts
|
||||
func (g *RuleTask) Name() string { return g.name }
|
||||
|
||||
// Key returns the group key
|
||||
// TODO(jatinderjit): remove (unused)?
|
||||
func (g *RuleTask) Key() string {
|
||||
return g.name + ";" + g.file
|
||||
}
|
||||
@@ -83,7 +76,7 @@ func (g *RuleTask) Type() TaskType { return TaskTypeCh }
|
||||
func (g *RuleTask) Rules() []Rule { return g.rules }
|
||||
|
||||
// Interval returns the group's interval.
|
||||
// TODO: remove (unused)?
|
||||
// TODO(jatinderjit): remove (unused)?
|
||||
func (g *RuleTask) Interval() time.Duration { return g.frequency }
|
||||
|
||||
func (g *RuleTask) Pause(b bool) {
|
||||
@@ -261,7 +254,6 @@ func nameAndLabels(rule Rule) string {
|
||||
// Rules are matched based on their name and labels. If there are duplicates, the
|
||||
// first is matched with the first, second with the second etc.
|
||||
func (g *RuleTask) CopyState(fromTask Task) error {
|
||||
|
||||
from, ok := fromTask.(*RuleTask)
|
||||
if !ok {
|
||||
return errors.NewInternalf(errors.CodeInternal, "invalid from task for copy")
|
||||
@@ -306,7 +298,6 @@ func (g *RuleTask) CopyState(fromTask Task) error {
|
||||
|
||||
// Eval runs a single evaluation cycle in which all rules are evaluated sequentially.
|
||||
func (g *RuleTask) Eval(ctx context.Context, ts time.Time) {
|
||||
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
g.logger.ErrorContext(
|
||||
@@ -318,31 +309,11 @@ func (g *RuleTask) Eval(ctx context.Context, ts time.Time) {
|
||||
|
||||
g.logger.DebugContext(ctx, "rule task eval started", "name", g.name, "start_time", ts)
|
||||
|
||||
maintenance, err := g.maintenanceStore.ListPlannedMaintenance(ctx, g.orgID.StringValue())
|
||||
|
||||
if err != nil {
|
||||
g.logger.ErrorContext(ctx, "error in processing sql query", errors.Attr(err))
|
||||
}
|
||||
|
||||
for i, rule := range g.rules {
|
||||
if rule == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
shouldSkip := false
|
||||
for _, m := range maintenance {
|
||||
g.logger.InfoContext(ctx, "checking if rule should be skipped", slog.String("rule.id", rule.ID()), slog.Any("maintenance", m))
|
||||
if m.ShouldSkip(rule.ID(), ts) {
|
||||
shouldSkip = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if shouldSkip {
|
||||
g.logger.InfoContext(ctx, "rule should be skipped", slog.String("rule.id", rule.ID()))
|
||||
continue
|
||||
}
|
||||
|
||||
select {
|
||||
case <-g.done:
|
||||
return
|
||||
@@ -382,7 +353,6 @@ func (g *RuleTask) Eval(ctx context.Context, ts time.Time) {
|
||||
}
|
||||
|
||||
rule.SendAlerts(ctx, ts, g.opts.ResendDelay, g.frequency, g.notify)
|
||||
|
||||
}(i, rule)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,9 +3,6 @@ package rules
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
)
|
||||
|
||||
type TaskType string
|
||||
@@ -32,9 +29,9 @@ type Task interface {
|
||||
|
||||
// newTask returns an appropriate group for
|
||||
// rule type
|
||||
func newTask(taskType TaskType, name, file string, frequency time.Duration, rules []Rule, opts *ManagerOptions, notify NotifyFunc, maintenanceStore ruletypes.MaintenanceStore, orgID valuer.UUID) Task {
|
||||
func newTask(taskType TaskType, name, file string, frequency time.Duration, rules []Rule, opts *ManagerOptions, notify NotifyFunc) Task {
|
||||
if taskType == TaskTypeCh {
|
||||
return NewRuleTask(name, file, frequency, rules, opts, notify, maintenanceStore, orgID)
|
||||
return NewRuleTask(name, file, frequency, rules, opts, notify)
|
||||
}
|
||||
return NewPromRuleTask(name, file, frequency, rules, opts, notify, maintenanceStore, orgID)
|
||||
return NewPromRuleTask(name, file, frequency, rules, opts, notify)
|
||||
}
|
||||
|
||||
@@ -75,10 +75,11 @@ func (r *maintenance) CreatePlannedMaintenance(ctx context.Context, maintenance
|
||||
CreatedBy: claims.Email,
|
||||
UpdatedBy: claims.Email,
|
||||
},
|
||||
Name: maintenance.Name,
|
||||
Description: maintenance.Description,
|
||||
Schedule: maintenance.Schedule,
|
||||
OrgID: claims.OrgID,
|
||||
Name: maintenance.Name,
|
||||
Description: maintenance.Description,
|
||||
Schedule: maintenance.Schedule,
|
||||
OrgID: claims.OrgID,
|
||||
LabelExpression: maintenance.LabelExpression,
|
||||
}
|
||||
|
||||
maintenanceRules := make([]*ruletypes.StorablePlannedMaintenanceRule, 0)
|
||||
@@ -126,15 +127,16 @@ func (r *maintenance) CreatePlannedMaintenance(ctx context.Context, maintenance
|
||||
}
|
||||
|
||||
return &ruletypes.PlannedMaintenance{
|
||||
ID: storablePlannedMaintenance.ID,
|
||||
Name: storablePlannedMaintenance.Name,
|
||||
Description: storablePlannedMaintenance.Description,
|
||||
Schedule: storablePlannedMaintenance.Schedule,
|
||||
RuleIDs: maintenance.AlertIds,
|
||||
CreatedAt: storablePlannedMaintenance.CreatedAt,
|
||||
CreatedBy: storablePlannedMaintenance.CreatedBy,
|
||||
UpdatedAt: storablePlannedMaintenance.UpdatedAt,
|
||||
UpdatedBy: storablePlannedMaintenance.UpdatedBy,
|
||||
ID: storablePlannedMaintenance.ID,
|
||||
Name: storablePlannedMaintenance.Name,
|
||||
Description: storablePlannedMaintenance.Description,
|
||||
Schedule: storablePlannedMaintenance.Schedule,
|
||||
RuleIDs: maintenance.AlertIds,
|
||||
LabelExpression: maintenance.LabelExpression,
|
||||
CreatedAt: storablePlannedMaintenance.CreatedAt,
|
||||
CreatedBy: storablePlannedMaintenance.CreatedBy,
|
||||
UpdatedAt: storablePlannedMaintenance.UpdatedAt,
|
||||
UpdatedBy: storablePlannedMaintenance.UpdatedBy,
|
||||
}, nil
|
||||
}
|
||||
|
||||
@@ -175,10 +177,11 @@ func (r *maintenance) UpdatePlannedMaintenance(ctx context.Context, maintenance
|
||||
CreatedBy: existing.CreatedBy,
|
||||
UpdatedBy: claims.Email,
|
||||
},
|
||||
Name: maintenance.Name,
|
||||
Description: maintenance.Description,
|
||||
Schedule: maintenance.Schedule,
|
||||
OrgID: claims.OrgID,
|
||||
Name: maintenance.Name,
|
||||
Description: maintenance.Description,
|
||||
Schedule: maintenance.Schedule,
|
||||
OrgID: claims.OrgID,
|
||||
LabelExpression: maintenance.LabelExpression,
|
||||
}
|
||||
|
||||
storablePlannedMaintenanceRules := make([]*ruletypes.StorablePlannedMaintenanceRule, 0)
|
||||
|
||||
@@ -19,6 +19,7 @@ import (
|
||||
"github.com/SigNoz/signoz/pkg/modules/user/impluser"
|
||||
"github.com/SigNoz/signoz/pkg/querier"
|
||||
"github.com/SigNoz/signoz/pkg/queryparser"
|
||||
"github.com/SigNoz/signoz/pkg/ruler/rulestore/sqlrulestore"
|
||||
"github.com/SigNoz/signoz/pkg/sharder"
|
||||
"github.com/SigNoz/signoz/pkg/sharder/noopsharder"
|
||||
"github.com/SigNoz/signoz/pkg/sqlstore"
|
||||
@@ -38,7 +39,8 @@ func TestNewHandlers(t *testing.T) {
|
||||
orgGetter := implorganization.NewGetter(implorganization.NewStore(sqlstore), sharder)
|
||||
notificationManager := nfmanagertest.NewMock()
|
||||
require.NoError(t, err)
|
||||
alertmanager, err := signozalertmanager.New(context.TODO(), providerSettings, alertmanager.Config{}, sqlstore, orgGetter, notificationManager)
|
||||
maintenanceStore := sqlrulestore.NewMaintenanceStore(sqlstore)
|
||||
alertmanager, err := signozalertmanager.New(providerSettings, alertmanager.Config{}, sqlstore, orgGetter, notificationManager, maintenanceStore)
|
||||
require.NoError(t, err)
|
||||
tokenizer := tokenizertest.NewMockTokenizer(t)
|
||||
emailing := emailingtest.New()
|
||||
|
||||
@@ -20,6 +20,7 @@ import (
|
||||
"github.com/SigNoz/signoz/pkg/modules/serviceaccount/implserviceaccount"
|
||||
"github.com/SigNoz/signoz/pkg/modules/user/impluser"
|
||||
"github.com/SigNoz/signoz/pkg/queryparser"
|
||||
"github.com/SigNoz/signoz/pkg/ruler/rulestore/sqlrulestore"
|
||||
"github.com/SigNoz/signoz/pkg/sharder"
|
||||
"github.com/SigNoz/signoz/pkg/sharder/noopsharder"
|
||||
"github.com/SigNoz/signoz/pkg/sqlstore"
|
||||
@@ -39,7 +40,8 @@ func TestNewModules(t *testing.T) {
|
||||
orgGetter := implorganization.NewGetter(implorganization.NewStore(sqlstore), sharder)
|
||||
notificationManager := nfmanagertest.NewMock()
|
||||
require.NoError(t, err)
|
||||
alertmanager, err := signozalertmanager.New(context.TODO(), providerSettings, alertmanager.Config{}, sqlstore, orgGetter, notificationManager)
|
||||
maintenanceStore := sqlrulestore.NewMaintenanceStore(sqlstore)
|
||||
alertmanager, err := signozalertmanager.New(providerSettings, alertmanager.Config{}, sqlstore, orgGetter, notificationManager, maintenanceStore)
|
||||
require.NoError(t, err)
|
||||
tokenizer := tokenizertest.NewMockTokenizer(t)
|
||||
emailing := emailingtest.New()
|
||||
|
||||
@@ -65,6 +65,7 @@ import (
|
||||
"github.com/SigNoz/signoz/pkg/tokenizer/tokenizerstore/sqltokenizerstore"
|
||||
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
|
||||
"github.com/SigNoz/signoz/pkg/types/featuretypes"
|
||||
ruletypes "github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
"github.com/SigNoz/signoz/pkg/version"
|
||||
"github.com/SigNoz/signoz/pkg/web"
|
||||
"github.com/SigNoz/signoz/pkg/web/noopweb"
|
||||
@@ -195,6 +196,7 @@ func NewSQLMigrationProviderFactories(
|
||||
sqlmigration.NewServiceAccountAuthzactory(sqlstore),
|
||||
sqlmigration.NewDropUserDeletedAtFactory(sqlstore, sqlschema),
|
||||
sqlmigration.NewMigrateAWSAllRegionsFactory(sqlstore),
|
||||
sqlmigration.NewAddLabelExpressionToPlannedMaintenanceFactory(sqlstore, sqlschema),
|
||||
)
|
||||
}
|
||||
|
||||
@@ -221,9 +223,14 @@ func NewNotificationManagerProviderFactories(routeStore alertmanagertypes.RouteS
|
||||
)
|
||||
}
|
||||
|
||||
func NewAlertmanagerProviderFactories(sqlstore sqlstore.SQLStore, orgGetter organization.Getter, nfManager nfmanager.NotificationManager) factory.NamedMap[factory.ProviderFactory[alertmanager.Alertmanager, alertmanager.Config]] {
|
||||
func NewAlertmanagerProviderFactories(
|
||||
sqlstore sqlstore.SQLStore,
|
||||
orgGetter organization.Getter,
|
||||
nfManager nfmanager.NotificationManager,
|
||||
maintenanceStore ruletypes.MaintenanceStore,
|
||||
) factory.NamedMap[factory.ProviderFactory[alertmanager.Alertmanager, alertmanager.Config]] {
|
||||
return factory.MustNewNamedMap(
|
||||
signozalertmanager.NewFactory(sqlstore, orgGetter, nfManager),
|
||||
signozalertmanager.NewFactory(sqlstore, orgGetter, nfManager, maintenanceStore),
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@ import (
|
||||
"github.com/SigNoz/signoz/pkg/modules/user/impluser"
|
||||
"github.com/SigNoz/signoz/pkg/sqlschema"
|
||||
"github.com/SigNoz/signoz/pkg/sqlschema/sqlschematest"
|
||||
"github.com/SigNoz/signoz/pkg/ruler/rulestore/sqlrulestore"
|
||||
"github.com/SigNoz/signoz/pkg/sqlstore"
|
||||
"github.com/SigNoz/signoz/pkg/sqlstore/sqlstoretest"
|
||||
"github.com/SigNoz/signoz/pkg/statsreporter"
|
||||
@@ -59,9 +60,11 @@ func TestNewProviderFactories(t *testing.T) {
|
||||
})
|
||||
|
||||
assert.NotPanics(t, func() {
|
||||
orgGetter := implorganization.NewGetter(implorganization.NewStore(sqlstoretest.New(sqlstore.Config{Provider: "sqlite"}, sqlmock.QueryMatcherEqual)), nil)
|
||||
store := sqlstoretest.New(sqlstore.Config{Provider: "sqlite"}, sqlmock.QueryMatcherEqual)
|
||||
orgGetter := implorganization.NewGetter(implorganization.NewStore(store), nil)
|
||||
notificationManager := nfmanagertest.NewMock()
|
||||
NewAlertmanagerProviderFactories(sqlstoretest.New(sqlstore.Config{Provider: "sqlite"}, sqlmock.QueryMatcherEqual), orgGetter, notificationManager)
|
||||
maintenanceStore := sqlrulestore.NewMaintenanceStore(store)
|
||||
NewAlertmanagerProviderFactories(store, orgGetter, notificationManager, maintenanceStore)
|
||||
})
|
||||
|
||||
assert.NotPanics(t, func() {
|
||||
|
||||
@@ -34,6 +34,7 @@ import (
|
||||
"github.com/SigNoz/signoz/pkg/querier"
|
||||
"github.com/SigNoz/signoz/pkg/queryparser"
|
||||
"github.com/SigNoz/signoz/pkg/ruler"
|
||||
"github.com/SigNoz/signoz/pkg/ruler/rulestore/sqlrulestore"
|
||||
"github.com/SigNoz/signoz/pkg/sharder"
|
||||
"github.com/SigNoz/signoz/pkg/sqlmigration"
|
||||
"github.com/SigNoz/signoz/pkg/sqlmigrator"
|
||||
@@ -362,12 +363,14 @@ func New(
|
||||
return nil, err
|
||||
}
|
||||
|
||||
maintenanceStore := sqlrulestore.NewMaintenanceStore(sqlstore)
|
||||
|
||||
// Initialize alertmanager from the available alertmanager provider factories
|
||||
alertmanager, err := factory.NewProviderFromNamedMap(
|
||||
ctx,
|
||||
providerSettings,
|
||||
config.Alertmanager,
|
||||
NewAlertmanagerProviderFactories(sqlstore, orgGetter, nfManager),
|
||||
NewAlertmanagerProviderFactories(sqlstore, orgGetter, nfManager, maintenanceStore),
|
||||
config.Alertmanager.Provider,
|
||||
)
|
||||
if err != nil {
|
||||
|
||||
@@ -0,0 +1,97 @@
|
||||
package sqlmigration
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/factory"
|
||||
"github.com/SigNoz/signoz/pkg/sqlschema"
|
||||
"github.com/SigNoz/signoz/pkg/sqlstore"
|
||||
"github.com/uptrace/bun"
|
||||
"github.com/uptrace/bun/migrate"
|
||||
)
|
||||
|
||||
type addLabelExpressionToPlannedMaintenance struct {
|
||||
sqlstore sqlstore.SQLStore
|
||||
sqlschema sqlschema.SQLSchema
|
||||
}
|
||||
|
||||
func NewAddLabelExpressionToPlannedMaintenanceFactory(sqlstore sqlstore.SQLStore, sqlschema sqlschema.SQLSchema) factory.ProviderFactory[SQLMigration, Config] {
|
||||
return factory.NewProviderFactory(
|
||||
factory.MustNewName("add_label_expr_to_planned"),
|
||||
func(ctx context.Context, ps factory.ProviderSettings, c Config) (SQLMigration, error) {
|
||||
return &addLabelExpressionToPlannedMaintenance{
|
||||
sqlstore: sqlstore,
|
||||
sqlschema: sqlschema,
|
||||
}, nil
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
func (migration *addLabelExpressionToPlannedMaintenance) Register(migrations *migrate.Migrations) error {
|
||||
if err := migrations.Register(migration.Up, migration.Down); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (migration *addLabelExpressionToPlannedMaintenance) Up(ctx context.Context, db *bun.DB) error {
|
||||
tx, err := db.BeginTx(ctx, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
defer func() {
|
||||
_ = tx.Rollback()
|
||||
}()
|
||||
|
||||
table, _, err := migration.sqlschema.GetTable(ctx, sqlschema.TableName("planned_maintenance"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
column := &sqlschema.Column{
|
||||
Name: sqlschema.ColumnName("label_expression"),
|
||||
DataType: sqlschema.DataTypeText,
|
||||
Nullable: true,
|
||||
}
|
||||
|
||||
sqls := migration.sqlschema.Operator().AddColumn(table, nil, column, nil)
|
||||
for _, sql := range sqls {
|
||||
if _, err := tx.ExecContext(ctx, string(sql)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return tx.Commit()
|
||||
}
|
||||
|
||||
func (migration *addLabelExpressionToPlannedMaintenance) Down(ctx context.Context, db *bun.DB) error {
|
||||
tx, err := db.BeginTx(ctx, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
defer func() {
|
||||
_ = tx.Rollback()
|
||||
}()
|
||||
|
||||
table, _, err := migration.sqlschema.GetTable(ctx, sqlschema.TableName("planned_maintenance"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
column := &sqlschema.Column{
|
||||
Name: sqlschema.ColumnName("label_expression"),
|
||||
DataType: sqlschema.DataTypeText,
|
||||
Nullable: true,
|
||||
}
|
||||
|
||||
sqls := migration.sqlschema.Operator().DropColumn(table, column)
|
||||
for _, sql := range sqls {
|
||||
if _, err := tx.ExecContext(ctx, string(sql)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return tx.Commit()
|
||||
}
|
||||
@@ -170,6 +170,7 @@ func NewGettableAlertsFromAlertProvider(
|
||||
cfg *Config,
|
||||
getAlertStatusFunc func(model.Fingerprint) types.AlertStatus,
|
||||
setAlertStatusFunc func(model.LabelSet),
|
||||
mutedByFunc func(model.LabelSet) []string,
|
||||
params GettableAlertsParams,
|
||||
) (GettableAlerts, error) {
|
||||
res := GettableAlerts{}
|
||||
@@ -219,7 +220,7 @@ func NewGettableAlertsFromAlertProvider(
|
||||
continue
|
||||
}
|
||||
|
||||
alert := v2.AlertToOpenAPIAlert(alertData, getAlertStatusFunc(alertData.Fingerprint()), receivers, nil)
|
||||
alert := v2.AlertToOpenAPIAlert(alertData, getAlertStatusFunc(alertData.Fingerprint()), receivers, mutedByFunc(alertData.Labels))
|
||||
|
||||
res = append(res, alert)
|
||||
}
|
||||
|
||||
@@ -1,12 +0,0 @@
|
||||
package alertmanagertypes
|
||||
|
||||
import (
|
||||
"github.com/prometheus/alertmanager/types"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
type MemMarker = types.MemMarker
|
||||
|
||||
func NewMarker(r prometheus.Registerer) *MemMarker {
|
||||
return types.NewMarker(r)
|
||||
}
|
||||
@@ -8,6 +8,8 @@ import (
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
"github.com/SigNoz/signoz/pkg/types"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
"github.com/expr-lang/expr"
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/uptrace/bun"
|
||||
)
|
||||
|
||||
@@ -54,34 +56,37 @@ type StorablePlannedMaintenance struct {
|
||||
types.Identifiable
|
||||
types.TimeAuditable
|
||||
types.UserAuditable
|
||||
Name string `bun:"name,type:text,notnull"`
|
||||
Description string `bun:"description,type:text"`
|
||||
Schedule *Schedule `bun:"schedule,type:text,notnull"`
|
||||
OrgID string `bun:"org_id,type:text"`
|
||||
Name string `bun:"name,type:text,notnull"`
|
||||
Description string `bun:"description,type:text"`
|
||||
Schedule *Schedule `bun:"schedule,type:text,notnull"`
|
||||
OrgID string `bun:"org_id,type:text"`
|
||||
LabelExpression string `bun:"label_expression,type:text"`
|
||||
}
|
||||
|
||||
type PlannedMaintenance struct {
|
||||
ID valuer.UUID `json:"id" required:"true"`
|
||||
Name string `json:"name" required:"true"`
|
||||
Description string `json:"description"`
|
||||
Schedule *Schedule `json:"schedule" required:"true"`
|
||||
RuleIDs []string `json:"alertIds"`
|
||||
CreatedAt time.Time `json:"createdAt"`
|
||||
CreatedBy string `json:"createdBy"`
|
||||
UpdatedAt time.Time `json:"updatedAt"`
|
||||
UpdatedBy string `json:"updatedBy"`
|
||||
Status MaintenanceStatus `json:"status" required:"true"`
|
||||
Kind MaintenanceKind `json:"kind" required:"true"`
|
||||
ID valuer.UUID `json:"id" required:"true"`
|
||||
Name string `json:"name" required:"true"`
|
||||
Description string `json:"description"`
|
||||
Schedule *Schedule `json:"schedule" required:"true"`
|
||||
RuleIDs []string `json:"alertIds"`
|
||||
LabelExpression string `json:"labelExpression,omitempty"`
|
||||
CreatedAt time.Time `json:"createdAt"`
|
||||
CreatedBy string `json:"createdBy"`
|
||||
UpdatedAt time.Time `json:"updatedAt"`
|
||||
UpdatedBy string `json:"updatedBy"`
|
||||
Status MaintenanceStatus `json:"status" required:"true"`
|
||||
Kind MaintenanceKind `json:"kind" required:"true"`
|
||||
}
|
||||
|
||||
// PostablePlannedMaintenance is the input payload for creating or updating a
|
||||
// planned maintenance. Server-owned fields (id, timestamps, audit users,
|
||||
// derived status / kind) are deliberately not accepted from the client.
|
||||
type PostablePlannedMaintenance struct {
|
||||
Name string `json:"name" required:"true"`
|
||||
Description string `json:"description"`
|
||||
Schedule *Schedule `json:"schedule" required:"true"`
|
||||
AlertIds []string `json:"alertIds"`
|
||||
Name string `json:"name" required:"true"`
|
||||
Description string `json:"description"`
|
||||
Schedule *Schedule `json:"schedule" required:"true"`
|
||||
AlertIds []string `json:"alertIds"`
|
||||
LabelExpression string `json:"labelExpression"`
|
||||
}
|
||||
|
||||
func (p *PostablePlannedMaintenance) Validate() error {
|
||||
@@ -113,6 +118,11 @@ func (p *PostablePlannedMaintenance) Validate() error {
|
||||
return errors.Newf(errors.TypeInvalidInput, ErrCodeInvalidPlannedMaintenancePayload, "missing duration in the payload")
|
||||
}
|
||||
}
|
||||
if p.LabelExpression != "" {
|
||||
if _, err := expr.Compile(p.LabelExpression, expr.AllowUndefinedVariables(), expr.AsBool()); err != nil {
|
||||
return errors.Newf(errors.TypeInvalidInput, ErrCodeInvalidPlannedMaintenancePayload, "invalid label expression: %v", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -128,7 +138,7 @@ type PlannedMaintenanceWithRules struct {
|
||||
Rules []*StorablePlannedMaintenanceRule `bun:"rel:has-many,join:id=planned_maintenance_id"`
|
||||
}
|
||||
|
||||
func (m *PlannedMaintenance) ShouldSkip(ruleID string, now time.Time) bool {
|
||||
func (m *PlannedMaintenance) ShouldSkip(ruleID string, now time.Time, lset model.LabelSet) bool {
|
||||
// Check if the alert ID is in the maintenance window
|
||||
found := false
|
||||
if len(m.RuleIDs) > 0 {
|
||||
@@ -148,7 +158,23 @@ func (m *PlannedMaintenance) ShouldSkip(ruleID string, now time.Time) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// If alert is found, we check if it should be skipped based on the schedule
|
||||
if !m.isScheduleActive(now) {
|
||||
return false
|
||||
}
|
||||
|
||||
// lset is empty when called from IsActive (no instance labels available);
|
||||
// skip expression filtering in that case.
|
||||
if m.LabelExpression != "" && len(lset) != 0 {
|
||||
if !evalLabelExpression(m.LabelExpression, lset) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// isScheduleActive reports whether now falls inside the maintenance window's schedule.
|
||||
func (m *PlannedMaintenance) isScheduleActive(now time.Time) bool {
|
||||
loc, err := time.LoadLocation(m.Schedule.Timezone)
|
||||
if err != nil {
|
||||
return false
|
||||
@@ -187,6 +213,25 @@ func (m *PlannedMaintenance) ShouldSkip(ruleID string, now time.Time) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// evalLabelExpression compiles and runs the expression against the provided labels.
|
||||
// Returns false on any error (safety-first: don't suppress on a bad expression).
|
||||
func evalLabelExpression(expression string, lset model.LabelSet) bool {
|
||||
env := make(map[string]interface{}, len(lset))
|
||||
for k, v := range lset {
|
||||
env[string(k)] = string(v)
|
||||
}
|
||||
program, err := expr.Compile(expression, expr.Env(env), expr.AllowUndefinedVariables())
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
output, err := expr.Run(program, env)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
result, ok := output.(bool)
|
||||
return ok && result
|
||||
}
|
||||
|
||||
// checkDaily rebases the recurrence start to today (or yesterday if needed)
|
||||
// and returns true if currentTime is within [candidate, candidate+Duration].
|
||||
func (m *PlannedMaintenance) checkDaily(currentTime time.Time, rec *Recurrence, loc *time.Location) bool {
|
||||
@@ -274,7 +319,7 @@ func (m *PlannedMaintenance) IsActive(now time.Time) bool {
|
||||
if len(m.RuleIDs) > 0 {
|
||||
ruleID = (m.RuleIDs)[0]
|
||||
}
|
||||
return m.ShouldSkip(ruleID, now)
|
||||
return m.ShouldSkip(ruleID, now, nil)
|
||||
}
|
||||
|
||||
func (m *PlannedMaintenance) IsUpcoming() bool {
|
||||
@@ -342,29 +387,31 @@ func (m PlannedMaintenance) MarshalJSON() ([]byte, error) {
|
||||
}
|
||||
|
||||
return json.Marshal(struct {
|
||||
ID valuer.UUID `json:"id" db:"id"`
|
||||
Name string `json:"name" db:"name"`
|
||||
Description string `json:"description" db:"description"`
|
||||
Schedule *Schedule `json:"schedule" db:"schedule"`
|
||||
AlertIds []string `json:"alertIds" db:"alert_ids"`
|
||||
CreatedAt time.Time `json:"createdAt" db:"created_at"`
|
||||
CreatedBy string `json:"createdBy" db:"created_by"`
|
||||
UpdatedAt time.Time `json:"updatedAt" db:"updated_at"`
|
||||
UpdatedBy string `json:"updatedBy" db:"updated_by"`
|
||||
Status MaintenanceStatus `json:"status"`
|
||||
Kind MaintenanceKind `json:"kind"`
|
||||
ID valuer.UUID `json:"id" db:"id"`
|
||||
Name string `json:"name" db:"name"`
|
||||
Description string `json:"description" db:"description"`
|
||||
Schedule *Schedule `json:"schedule" db:"schedule"`
|
||||
AlertIds []string `json:"alertIds" db:"alert_ids"`
|
||||
LabelExpression string `json:"labelExpression,omitempty"`
|
||||
CreatedAt time.Time `json:"createdAt" db:"created_at"`
|
||||
CreatedBy string `json:"createdBy" db:"created_by"`
|
||||
UpdatedAt time.Time `json:"updatedAt" db:"updated_at"`
|
||||
UpdatedBy string `json:"updatedBy" db:"updated_by"`
|
||||
Status MaintenanceStatus `json:"status"`
|
||||
Kind MaintenanceKind `json:"kind"`
|
||||
}{
|
||||
ID: m.ID,
|
||||
Name: m.Name,
|
||||
Description: m.Description,
|
||||
Schedule: m.Schedule,
|
||||
AlertIds: m.RuleIDs,
|
||||
CreatedAt: m.CreatedAt,
|
||||
CreatedBy: m.CreatedBy,
|
||||
UpdatedAt: m.UpdatedAt,
|
||||
UpdatedBy: m.UpdatedBy,
|
||||
Status: status,
|
||||
Kind: kind,
|
||||
ID: m.ID,
|
||||
Name: m.Name,
|
||||
Description: m.Description,
|
||||
Schedule: m.Schedule,
|
||||
AlertIds: m.RuleIDs,
|
||||
LabelExpression: m.LabelExpression,
|
||||
CreatedAt: m.CreatedAt,
|
||||
CreatedBy: m.CreatedBy,
|
||||
UpdatedAt: m.UpdatedAt,
|
||||
UpdatedBy: m.UpdatedBy,
|
||||
Status: status,
|
||||
Kind: kind,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -377,15 +424,16 @@ func (m *PlannedMaintenanceWithRules) ToPlannedMaintenance() *PlannedMaintenance
|
||||
}
|
||||
|
||||
return &PlannedMaintenance{
|
||||
ID: m.ID,
|
||||
Name: m.Name,
|
||||
Description: m.Description,
|
||||
Schedule: m.Schedule,
|
||||
RuleIDs: ruleIDs,
|
||||
CreatedAt: m.CreatedAt,
|
||||
UpdatedAt: m.UpdatedAt,
|
||||
CreatedBy: m.CreatedBy,
|
||||
UpdatedBy: m.UpdatedBy,
|
||||
ID: m.ID,
|
||||
Name: m.Name,
|
||||
Description: m.Description,
|
||||
Schedule: m.Schedule,
|
||||
RuleIDs: ruleIDs,
|
||||
LabelExpression: m.LabelExpression,
|
||||
CreatedAt: m.CreatedAt,
|
||||
UpdatedAt: m.UpdatedAt,
|
||||
CreatedBy: m.CreatedBy,
|
||||
UpdatedBy: m.UpdatedBy,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
"github.com/prometheus/common/model"
|
||||
)
|
||||
|
||||
func TestShouldSkipMaintenance(t *testing.T) {
|
||||
@@ -660,7 +661,7 @@ func TestShouldSkipMaintenance(t *testing.T) {
|
||||
}
|
||||
|
||||
for idx, c := range cases {
|
||||
result := c.maintenance.ShouldSkip(c.name, c.ts)
|
||||
result := c.maintenance.ShouldSkip(c.name, c.ts, model.LabelSet{})
|
||||
if result != c.skip {
|
||||
t.Errorf("skip %v, got %v, case:%d - %s", c.skip, result, idx, c.name)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user