Compare commits

...

9 Commits

Author SHA1 Message Date
Jatinderjit Singh
7e50c82d89 refactor: pass MaintenanceMuter directly to pipelineBuilder
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-28 01:43:03 +05:30
Jatinderjit Singh
254c792a37 chore: replace SPDX tag with full Apache 2.0 license boilerplate
The full license text is unambiguously compliant with Apache 2.0 Section 4(a),
which requires giving recipients "a copy of this License".

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-28 01:43:03 +05:30
Jatinderjit Singh
23051468f6 chore: add license header to pipeline_builder.go
Copied code originates from Apache-2.0 licensed Prometheus Alertmanager;
add dual copyright + SPDX identifier following the repo's convention.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-28 01:43:03 +05:30
Jatinderjit Singh
707bc4d740 refactor: move maintenance mute stage into custom pipelineBuilder
Copy notify.PipelineBuilder locally so we can inject mms between the
silence stage and the receiver stage (GossipSettle → Inhibit →
TimeActive → TimeMute → Silence → mms → Receiver), matching the
correct suppression order the team requires.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-28 01:43:03 +05:30
Jatinderjit Singh
c26f9b9900 refactor: wrap routing pipeline once instead of per-route injection
Replace the per-route-entry loop with a single MultiStage wrap so
maintenance suppression runs once per dispatch group before routing.
2026-04-28 01:43:03 +05:30
Jatinderjit Singh
ddf09e5846 add maintenanceMuteStage to move planned maintenance to alertmanager
Rules previously skipped rule.Eval() entirely during maintenance windows.
This change moves suppression to MaintenanceMuter, injected as a Stage
in the alertmanager notification pipeline. Now rules always evaluate and
everys suppression is handled by alertmanager.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-28 01:43:03 +05:30
Jatinderjit Singh
61cd4e38dc fix: maintenance ignores recurrence when fixed times also set 2026-04-28 01:43:03 +05:30
Prakhar Dewan
d1c9864f52 chore: remove unused files (#11033)
Some checks failed
build-staging / prepare (push) Has been cancelled
build-staging / js-build (push) Has been cancelled
build-staging / go-build (push) Has been cancelled
build-staging / staging (push) Has been cancelled
Release Drafter / update_release_draft (push) Has been cancelled
* chore: remove unused files

* chore: add knip.json file
2026-04-27 19:23:12 +00:00
Manika Malhotra
f80b650390 chore(onboarding): add open-source tooling to interest in signoz option (#11083) 2026-04-27 14:55:09 +00:00
36 changed files with 402 additions and 443 deletions

View File

@@ -49,7 +49,7 @@ func PrepareTaskFunc(opts baserules.PrepareTaskOptions) (baserules.Task, error)
rules = append(rules, tr)
// create ch rule task for evaluation
task = newTask(baserules.TaskTypeCh, opts.TaskName, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc, opts.MaintenanceStore, opts.OrgID)
task = newTask(baserules.TaskTypeCh, opts.TaskName, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc, opts.OrgID)
} else if opts.Rule.RuleType == ruletypes.RuleTypeProm {
@@ -73,7 +73,7 @@ func PrepareTaskFunc(opts baserules.PrepareTaskOptions) (baserules.Task, error)
rules = append(rules, pr)
// create promql rule task for evaluation
task = newTask(baserules.TaskTypeProm, opts.TaskName, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc, opts.MaintenanceStore, opts.OrgID)
task = newTask(baserules.TaskTypeProm, opts.TaskName, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc, opts.OrgID)
} else if opts.Rule.RuleType == ruletypes.RuleTypeAnomaly {
// create anomaly rule
@@ -96,7 +96,7 @@ func PrepareTaskFunc(opts baserules.PrepareTaskOptions) (baserules.Task, error)
rules = append(rules, ar)
// create anomaly rule task for evaluation
task = newTask(baserules.TaskTypeCh, opts.TaskName, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc, opts.MaintenanceStore, opts.OrgID)
task = newTask(baserules.TaskTypeCh, opts.TaskName, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc, opts.OrgID)
} else {
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "unsupported rule type %s. Supported types: %s, %s", opts.Rule.RuleType, ruletypes.RuleTypeProm, ruletypes.RuleTypeThreshold)
@@ -210,9 +210,9 @@ func TestNotification(opts baserules.PrepareTestRuleOptions) (int, error) {
}
// newTask returns an appropriate group for the rule type
func newTask(taskType baserules.TaskType, name string, frequency time.Duration, rules []baserules.Rule, opts *baserules.ManagerOptions, notify baserules.NotifyFunc, maintenanceStore ruletypes.MaintenanceStore, orgID valuer.UUID) baserules.Task {
func newTask(taskType baserules.TaskType, name string, frequency time.Duration, rules []baserules.Rule, opts *baserules.ManagerOptions, notify baserules.NotifyFunc, orgID valuer.UUID) baserules.Task {
if taskType == baserules.TaskTypeCh {
return baserules.NewRuleTask(name, "", frequency, rules, opts, notify, maintenanceStore, orgID)
return baserules.NewRuleTask(name, "", frequency, rules, opts, notify, orgID)
}
return baserules.NewPromRuleTask(name, "", frequency, rules, opts, notify, maintenanceStore, orgID)
return baserules.NewPromRuleTask(name, "", frequency, rules, opts, notify, orgID)
}

5
frontend/knip.json Normal file
View File

@@ -0,0 +1,5 @@
{
"$schema": "https://unpkg.com/knip@5/schema.json",
"project": ["src/**/*.ts", "src/**/*.tsx"],
"ignore": ["src/api/generated/**/*.ts"]
}

View File

@@ -27,6 +27,7 @@ const interestedInOptions: Record<string, string> = {
singleTool:
'Single Tool (logs, metrics & traces) to reduce operational overhead',
correlateSignals: 'Correlate signals for faster troubleshooting',
openSourceTooling: 'Prefer open-source tooling',
};
export function AboutSigNozQuestions({

View File

@@ -1,22 +0,0 @@
import ROUTES from 'constants/routes';
import CreateAlertChannels from 'container/CreateAlertChannels';
import { ChannelType } from 'container/CreateAlertChannels/config';
import GeneralSettings from 'container/GeneralSettings';
import { t } from 'i18next';
export const alertsRoutesConfig = [
{
Component: GeneralSettings,
name: t('routes.general'),
route: ROUTES.SETTINGS,
key: ROUTES.SETTINGS,
},
{
Component: (): JSX.Element => (
<CreateAlertChannels preType={ChannelType.Slack} />
),
name: t('routes.alert_channels'),
route: ROUTES.CHANNELS_NEW,
key: ROUTES.CHANNELS_NEW,
},
];

View File

@@ -1,19 +0,0 @@
import { useLocation } from 'react-router-dom';
import RouteTab from 'components/RouteTab';
import history from 'lib/history';
import { alertsRoutesConfig } from './config';
function SettingsPage(): JSX.Element {
const { pathname } = useLocation();
return (
<RouteTab
history={history}
routes={alertsRoutesConfig}
activeKey={pathname}
/>
);
}
export default SettingsPage;

View File

@@ -1,54 +0,0 @@
import { useMemo } from 'react';
import { Color } from '@signozhq/design-tokens';
import { CircleCheck, Siren } from 'lucide-react';
import { getDurationFromNow } from 'utils/timeUtils';
import { AlertStatusProps, StatusConfig } from './types';
import './AlertStatus.styles.scss';
export default function AlertStatus({
status,
timestamp,
}: AlertStatusProps): JSX.Element {
const statusConfig: StatusConfig = useMemo(
() => ({
firing: {
icon: <Siren size={14} color={Color.TEXT_VANILLA_400} />,
text: 'Firing since',
extraInfo: timestamp ? (
<>
<div></div>
<div className="time">{getDurationFromNow(timestamp)}</div>
</>
) : null,
className: 'alert-status-info--firing',
},
resolved: {
icon: (
<CircleCheck
size={14}
fill={Color.BG_VANILLA_400}
color={Color.BG_INK_400}
/>
),
text: 'Resolved',
extraInfo: null,
className: 'alert-status-info--resolved',
},
}),
[timestamp],
);
const currentStatus = statusConfig[status];
return (
<div className={`alert-status-info ${currentStatus.className}`}>
<div className="alert-status-info__icon">{currentStatus.icon}</div>
<div className="alert-status-info__details">
<div className="text">{currentStatus.text}</div>
{currentStatus.extraInfo}
</div>
</div>
);
}

View File

@@ -1,18 +0,0 @@
export type AlertStatusProps =
| { status: 'firing'; timestamp: number }
| { status: 'resolved'; timestamp?: number };
export type StatusConfig = {
firing: {
icon: JSX.Element;
text: string;
extraInfo: JSX.Element | null;
className: string;
};
resolved: {
icon: JSX.Element;
text: string;
extraInfo: JSX.Element | null;
className: string;
};
};

View File

@@ -1,3 +0,0 @@
import AlertHistory from 'container/AlertHistory';
export default AlertHistory;

View File

@@ -1,86 +0,0 @@
import {
FiltersType,
IQuickFiltersConfig,
} from 'components/QuickFilters/types';
import { DataTypes } from 'types/api/queryBuilder/queryAutocompleteResponse';
import { DataSource } from 'types/common/queryBuilder';
export const ExceptionsQuickFiltersConfig: IQuickFiltersConfig[] = [
{
type: FiltersType.CHECKBOX,
title: 'Environment',
dataSource: DataSource.TRACES,
attributeKey: {
key: 'deployment.environment',
dataType: DataTypes.String,
type: 'resource',
},
defaultOpen: true,
},
{
type: FiltersType.CHECKBOX,
title: 'Service Name',
dataSource: DataSource.TRACES,
attributeKey: {
key: 'service.name',
dataType: DataTypes.String,
type: 'resource',
},
defaultOpen: false,
},
{
type: FiltersType.CHECKBOX,
title: 'Hostname',
dataSource: DataSource.TRACES,
attributeKey: {
key: 'host.name',
dataType: DataTypes.String,
type: 'resource',
},
defaultOpen: false,
},
{
type: FiltersType.CHECKBOX,
title: 'K8s Cluster Name',
dataSource: DataSource.TRACES,
attributeKey: {
key: 'k8s.cluster.name',
dataType: DataTypes.String,
type: 'resource',
},
defaultOpen: false,
},
{
type: FiltersType.CHECKBOX,
title: 'K8s Deployment Name',
dataSource: DataSource.TRACES,
attributeKey: {
key: 'k8s.deployment.name',
dataType: DataTypes.String,
type: 'resource',
},
defaultOpen: false,
},
{
type: FiltersType.CHECKBOX,
title: 'K8s Namespace Name',
dataSource: DataSource.TRACES,
attributeKey: {
key: 'k8s.namespace.name',
dataType: DataTypes.String,
type: 'resource',
},
defaultOpen: false,
},
{
type: FiltersType.CHECKBOX,
title: 'K8s Pod Name',
dataSource: DataSource.TRACES,
attributeKey: {
key: 'k8s.pod.name',
dataType: DataTypes.String,
type: 'resource',
},
defaultOpen: false,
},
];

View File

@@ -1,13 +0,0 @@
import BillingContainer from 'container/BillingContainer/BillingContainer';
import './BillingPage.styles.scss';
function BillingPage(): JSX.Element {
return (
<div className="billingPageContainer">
<BillingContainer />
</div>
);
}
export default BillingPage;

View File

@@ -1,3 +0,0 @@
import BillingPage from './BillingPage';
export default BillingPage;

View File

@@ -1,18 +0,0 @@
import { Typography } from 'antd';
import styled from 'styled-components';
export const Title = styled(Typography)`
&&& {
margin-top: 1rem;
margin-bottom: 1rem;
}
`;
export const ButtonContainer = styled.div`
&&& {
display: flex;
justify-content: flex-end;
align-items: center;
margin-top: 1rem;
}
`;

View File

@@ -1,3 +0,0 @@
import HomePage from './HomePage';
export default HomePage;

View File

@@ -1,14 +0,0 @@
import { Col } from 'antd';
import { themeColors } from 'constants/theme';
import styled from 'styled-components';
export const WrapperStyled = styled.div`
display: flex;
flex-direction: column;
flex: 1;
color: ${themeColors.lightWhite};
`;
export const ButtonWrapperStyled = styled(Col)`
margin-left: auto;
`;

View File

@@ -1,7 +0,0 @@
export const removeSourcePageFromPath = (path: string): string => {
const lastSlashIndex = path.lastIndexOf('/');
if (lastSlashIndex !== -1) {
return path.substring(0, lastSlashIndex);
}
return path;
};

View File

@@ -1,6 +0,0 @@
import MySettingsContainer from 'container/MySettings';
function MySettings(): JSX.Element {
return <MySettingsContainer />;
}
export default MySettings;

View File

@@ -1,25 +0,0 @@
import { Button, Typography } from 'antd';
import SomethingWentWrongAsset from 'assets/SomethingWentWrong';
import { Container } from 'components/NotFound/styles';
import ROUTES from 'constants/routes';
import history from 'lib/history';
function SomethingWentWrong(): JSX.Element {
return (
<Container>
<SomethingWentWrongAsset />
<Typography.Title level={3}>Oops! Something went wrong</Typography.Title>
<Button
type="primary"
onClick={(): void => {
history.push(ROUTES.HOME);
}}
className="periscope-btn primary"
>
Return to Home
</Button>
</Container>
);
}
export default SomethingWentWrong;

View File

@@ -1,10 +0,0 @@
import styled from 'styled-components';
export const Container = styled.div`
margin: 1rem 0;
`;
export const ActionsWrapper = styled.div`
display: flex;
justify-content: flex-end;
`;

View File

@@ -0,0 +1,94 @@
package alertmanagerserver
import (
"context"
"log/slog"
"sync"
"time"
"github.com/prometheus/alertmanager/types"
"github.com/prometheus/common/model"
"github.com/SigNoz/signoz/pkg/types/ruletypes"
)
// MaintenanceMuter implements types.Muter for maintenance windows.
// It suppresses alerts whose ruleId label matches an active maintenance schedule.
// Results are cached for cacheTTL to avoid a DB query on every per-alert check.
type MaintenanceMuter struct {
maintenanceStore ruletypes.MaintenanceStore
orgID string
logger *slog.Logger
mu sync.RWMutex
cached []*ruletypes.PlannedMaintenance
cacheExpiry time.Time
}
const maintenanceCacheTTL = 30 * time.Second
func NewMaintenanceMuter(store ruletypes.MaintenanceStore, orgID string, logger *slog.Logger) *MaintenanceMuter {
return &MaintenanceMuter{
maintenanceStore: store,
orgID: orgID,
logger: logger,
}
}
func (m *MaintenanceMuter) Mutes(ctx context.Context, lset model.LabelSet) bool {
ruleID := string(lset[ruletypes.AlertRuleIDLabel])
if ruleID == "" {
return false
}
now := time.Now()
for _, mw := range m.getMaintenances(ctx) {
if mw.ShouldSkip(ruleID, now) {
return true
}
}
return false
}
func (m *MaintenanceMuter) getMaintenances(ctx context.Context) []*ruletypes.PlannedMaintenance {
m.mu.RLock()
if time.Now().Before(m.cacheExpiry) {
cached := m.cached
m.mu.RUnlock()
return cached
}
m.mu.RUnlock()
m.mu.Lock()
defer m.mu.Unlock()
// Double-check after acquiring write lock.
if time.Now().Before(m.cacheExpiry) {
return m.cached
}
mws, err := m.maintenanceStore.ListPlannedMaintenance(ctx, m.orgID)
if err != nil {
m.logger.ErrorContext(ctx, "failed to list planned maintenance windows; alerts will not be suppressed", slog.String("org_id", m.orgID))
return m.cached // return stale (potentially empty) cache on error
}
m.cached = mws
m.cacheExpiry = time.Now().Add(maintenanceCacheTTL)
return m.cached
}
// maintenanceMuteStage wraps MaintenanceMuter as a notify.Stage.
// We implement the stage directly rather than using notify.NewMuteStage to avoid
// a dependency on the unexported *notify.Metrics field of PipelineBuilder.
type maintenanceMuteStage struct {
muter *MaintenanceMuter
}
func (s *maintenanceMuteStage) Exec(ctx context.Context, _ *slog.Logger, alerts ...*types.Alert) (context.Context, []*types.Alert, error) {
filtered := make([]*types.Alert, 0, len(alerts))
for _, a := range alerts {
if !s.muter.Mutes(ctx, a.Labels) {
filtered = append(filtered, a)
}
}
return ctx, filtered, nil
}

View File

@@ -0,0 +1,120 @@
// Copyright (c) 2026 SigNoz, Inc.
// Copyright 2015 Prometheus Team
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package alertmanagerserver
// pipelineBuilder is a local copy of notify.PipelineBuilder that injects
// the maintenance mute stage immediately before the receiver stage.
//
// We maintain our own copy so we can control exactly where in the pipeline
// the maintenance stage runs (between the silence stage and the receiver),
// which is not possible by wrapping the output of the upstream builder.
//
// Upstream pipeline order (notify.PipelineBuilder.New, notify.go:444):
//
// GossipSettle → Inhibit → TimeActive → TimeMute → Silence → [mms] → Receiver
import (
"time"
"github.com/prometheus/alertmanager/featurecontrol"
"github.com/prometheus/alertmanager/inhibit"
"github.com/prometheus/alertmanager/nflog/nflogpb"
"github.com/prometheus/alertmanager/notify"
"github.com/prometheus/alertmanager/silence"
"github.com/prometheus/alertmanager/timeinterval"
"github.com/prometheus/alertmanager/types"
"github.com/prometheus/client_golang/prometheus"
)
type pipelineBuilder struct {
metrics *notify.Metrics
ff featurecontrol.Flagger
muter *MaintenanceMuter
}
func newPipelineBuilder(
r prometheus.Registerer,
ff featurecontrol.Flagger,
muter *MaintenanceMuter,
) *pipelineBuilder {
return &pipelineBuilder{
metrics: notify.NewMetrics(r, ff),
ff: ff,
muter: muter,
}
}
// New returns a map of receivers to Stages, mirroring notify.PipelineBuilder.New
// but inserting a maintenanceMuteStage between the silence stage and the receiver.
func (pb *pipelineBuilder) New(
receivers map[string][]notify.Integration,
wait func() time.Duration,
inhibitor *inhibit.Inhibitor,
silencer *silence.Silencer,
intervener *timeinterval.Intervener,
marker types.GroupMarker,
notificationLog notify.NotificationLog,
peer notify.Peer,
) notify.RoutingStage {
rs := make(notify.RoutingStage, len(receivers))
ms := notify.NewGossipSettleStage(peer)
is := notify.NewMuteStage(inhibitor, pb.metrics)
tas := notify.NewTimeActiveStage(intervener, marker, pb.metrics)
tms := notify.NewTimeMuteStage(intervener, marker, pb.metrics)
ss := notify.NewMuteStage(silencer, pb.metrics)
var mms *maintenanceMuteStage
if pb.muter != nil {
mms = &maintenanceMuteStage{muter: pb.muter}
}
for name := range receivers {
stages := notify.MultiStage{ms, is, tas, tms, ss}
if mms != nil {
stages = append(stages, mms)
}
stages = append(stages, buildReceiverStage(name, receivers[name], wait, notificationLog, pb.metrics))
rs[name] = stages
}
pb.metrics.InitializeFor(receivers)
return rs
}
// buildReceiverStage is a copy of notify.createReceiverStage (unexported upstream).
func buildReceiverStage(
name string,
integrations []notify.Integration,
wait func() time.Duration,
notificationLog notify.NotificationLog,
metrics *notify.Metrics,
) notify.Stage {
var fs notify.FanoutStage
for i := range integrations {
recv := &nflogpb.Receiver{
GroupName: name,
Integration: integrations[i].Name(),
Idx: uint32(integrations[i].Index()),
}
var s notify.MultiStage
s = append(s, notify.NewWaitStage(wait))
s = append(s, notify.NewDedupStage(&integrations[i], notificationLog, recv))
s = append(s, notify.NewRetryStage(integrations[i], name, metrics))
s = append(s, notify.NewSetNotifiesStage(notificationLog, recv))
fs = append(fs, s)
}
return fs
}

View File

@@ -26,14 +26,13 @@ import (
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
ruletypes "github.com/SigNoz/signoz/pkg/types/ruletypes"
)
var (
// This is not a real file and will never be used. We need this placeholder to ensure maintenance runs on shutdown. See
// https://github.com/prometheus/server/blob/3ee2cd0f1271e277295c02b6160507b4d193dde2/silence/silence.go#L435-L438
// and https://github.com/prometheus/server/blob/3b06b97af4d146e141af92885a185891eb79a5b0/nflog/nflog.go#L362.
snapfnoop string = "snapfnoop"
)
// This is not a real snapshot file and will never be used. We need this placeholder to ensure maintenance runs on shutdown.
// See https://github.com/prometheus/alertmanager/blob/3ee2cd0f1271e277295c02b6160507b4d193dde2/silence/silence.go#L435-L438
// and https://github.com/prometheus/alertmanager/blob/3b06b97af4d146e141af92885a185891eb79a5b0/nflog/nflog.go#L362.
var snapfnoop string = "snapfnoop"
type Server struct {
// logger is the logger for the alertmanager
@@ -63,7 +62,7 @@ type Server struct {
silencer *silence.Silencer
silences *silence.Silences
timeIntervals map[string][]timeinterval.TimeInterval
pipelineBuilder *notify.PipelineBuilder
pipelineBuilder *pipelineBuilder
marker *alertmanagertypes.MemMarker
tmpl *template.Template
wg sync.WaitGroup
@@ -71,7 +70,16 @@ type Server struct {
notificationManager nfmanager.NotificationManager
}
func New(ctx context.Context, logger *slog.Logger, registry prometheus.Registerer, srvConfig Config, orgID string, stateStore alertmanagertypes.StateStore, nfManager nfmanager.NotificationManager) (*Server, error) {
func New(
ctx context.Context,
logger *slog.Logger,
registry prometheus.Registerer,
srvConfig Config,
orgID string,
stateStore alertmanagertypes.StateStore,
nfManager nfmanager.NotificationManager,
maintenanceStore ruletypes.MaintenanceStore,
) (*Server, error) {
server := &Server{
logger: logger.With(slog.String("pkg", "go.signoz.io/pkg/alertmanager/alertmanagerserver")),
registry: registry,
@@ -160,7 +168,6 @@ func New(ctx context.Context, logger *slog.Logger, registry prometheus.Registere
return c, server.stateStore.Set(ctx, storableSilences)
})
}()
// Start maintenance for notification logs
@@ -196,7 +203,11 @@ func New(ctx context.Context, logger *slog.Logger, registry prometheus.Registere
return nil, err
}
server.pipelineBuilder = notify.NewPipelineBuilder(signozRegisterer, featurecontrol.NoopFlags{})
var muter *MaintenanceMuter
if maintenanceStore != nil {
muter = NewMaintenanceMuter(maintenanceStore, orgID, server.logger)
}
server.pipelineBuilder = newPipelineBuilder(signozRegisterer, featurecontrol.NoopFlags{}, muter)
server.dispatcherMetrics = NewDispatcherMetrics(false, signozRegisterer)
return server, nil

View File

@@ -90,7 +90,7 @@ func TestEndToEndAlertManagerFlow(t *testing.T) {
stateStore := alertmanagertypestest.NewStateStore()
registry := prometheus.NewRegistry()
logger := slog.New(slog.DiscardHandler)
server, err := New(context.Background(), logger, registry, srvCfg, orgID, stateStore, notificationManager)
server, err := New(context.Background(), logger, registry, srvCfg, orgID, stateStore, notificationManager, nil)
require.NoError(t, err)
amConfig, err := alertmanagertypes.NewDefaultConfig(srvCfg.Global, srvCfg.Route, orgID)
require.NoError(t, err)

View File

@@ -25,7 +25,7 @@ import (
func TestServerSetConfigAndStop(t *testing.T) {
notificationManager := nfmanagertest.NewMock()
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), NewConfig(), "1", alertmanagertypestest.NewStateStore(), notificationManager)
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), NewConfig(), "1", alertmanagertypestest.NewStateStore(), notificationManager, nil)
require.NoError(t, err)
amConfig, err := alertmanagertypes.NewDefaultConfig(alertmanagertypes.GlobalConfig{}, alertmanagertypes.RouteConfig{GroupInterval: 1 * time.Minute, RepeatInterval: 1 * time.Minute, GroupWait: 1 * time.Minute}, "1")
@@ -37,7 +37,7 @@ func TestServerSetConfigAndStop(t *testing.T) {
func TestServerTestReceiverTypeWebhook(t *testing.T) {
notificationManager := nfmanagertest.NewMock()
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), NewConfig(), "1", alertmanagertypestest.NewStateStore(), notificationManager)
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), NewConfig(), "1", alertmanagertypestest.NewStateStore(), notificationManager, nil)
require.NoError(t, err)
amConfig, err := alertmanagertypes.NewDefaultConfig(alertmanagertypes.GlobalConfig{}, alertmanagertypes.RouteConfig{GroupInterval: 1 * time.Minute, RepeatInterval: 1 * time.Minute, GroupWait: 1 * time.Minute}, "1")
@@ -85,7 +85,7 @@ func TestServerPutAlerts(t *testing.T) {
srvCfg := NewConfig()
srvCfg.Route.GroupInterval = 1 * time.Second
notificationManager := nfmanagertest.NewMock()
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), srvCfg, "1", stateStore, notificationManager)
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), srvCfg, "1", stateStore, notificationManager, nil)
require.NoError(t, err)
amConfig, err := alertmanagertypes.NewDefaultConfig(srvCfg.Global, srvCfg.Route, "1")
@@ -133,7 +133,7 @@ func TestServerTestAlert(t *testing.T) {
srvCfg := NewConfig()
srvCfg.Route.GroupInterval = 1 * time.Second
notificationManager := nfmanagertest.NewMock()
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), srvCfg, "1", stateStore, notificationManager)
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), srvCfg, "1", stateStore, notificationManager, nil)
require.NoError(t, err)
amConfig, err := alertmanagertypes.NewDefaultConfig(srvCfg.Global, srvCfg.Route, "1")
@@ -238,7 +238,7 @@ func TestServerTestAlertContinuesOnFailure(t *testing.T) {
srvCfg := NewConfig()
srvCfg.Route.GroupInterval = 1 * time.Second
notificationManager := nfmanagertest.NewMock()
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), srvCfg, "1", stateStore, notificationManager)
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), srvCfg, "1", stateStore, notificationManager, nil)
require.NoError(t, err)
amConfig, err := alertmanagertypes.NewDefaultConfig(srvCfg.Global, srvCfg.Route, "1")

View File

@@ -14,6 +14,7 @@ import (
"github.com/SigNoz/signoz/pkg/factory"
"github.com/SigNoz/signoz/pkg/modules/organization"
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
ruletypes "github.com/SigNoz/signoz/pkg/types/ruletypes"
)
type Service struct {
@@ -39,16 +40,18 @@ type Service struct {
serversMtx sync.RWMutex
notificationManager nfmanager.NotificationManager
maintenanceStore ruletypes.MaintenanceStore
}
func New(
ctx context.Context,
settings factory.ScopedProviderSettings,
config alertmanagerserver.Config,
stateStore alertmanagertypes.StateStore,
configStore alertmanagertypes.ConfigStore,
orgGetter organization.Getter,
nfManager nfmanager.NotificationManager,
maintenanceStore ruletypes.MaintenanceStore,
) *Service {
service := &Service{
config: config,
@@ -59,6 +62,7 @@ func New(
servers: make(map[string]*alertmanagerserver.Server),
serversMtx: sync.RWMutex{},
notificationManager: nfManager,
maintenanceStore: maintenanceStore,
}
return service
@@ -177,7 +181,10 @@ func (service *Service) newServer(ctx context.Context, orgID string) (*alertmana
return nil, err
}
server, err := alertmanagerserver.New(ctx, service.settings.Logger(), service.settings.PrometheusRegisterer(), service.config, orgID, service.stateStore, service.notificationManager)
server, err := alertmanagerserver.New(
ctx, service.settings.Logger(), service.settings.PrometheusRegisterer(), service.config, orgID,
service.stateStore, service.notificationManager, service.maintenanceStore,
)
if err != nil {
return nil, err
}

View File

@@ -20,6 +20,7 @@ import (
"github.com/SigNoz/signoz/pkg/types"
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
"github.com/SigNoz/signoz/pkg/types/authtypes"
ruletypes "github.com/SigNoz/signoz/pkg/types/ruletypes"
"github.com/SigNoz/signoz/pkg/valuer"
)
@@ -30,35 +31,49 @@ type provider struct {
configStore alertmanagertypes.ConfigStore
stateStore alertmanagertypes.StateStore
notificationManager nfmanager.NotificationManager
maintenanceStore ruletypes.MaintenanceStore
stopC chan struct{}
}
func NewFactory(sqlstore sqlstore.SQLStore, orgGetter organization.Getter, notificationManager nfmanager.NotificationManager) factory.ProviderFactory[alertmanager.Alertmanager, alertmanager.Config] {
func NewFactory(
sqlstore sqlstore.SQLStore,
orgGetter organization.Getter,
notificationManager nfmanager.NotificationManager,
maintenanceStore ruletypes.MaintenanceStore,
) factory.ProviderFactory[alertmanager.Alertmanager, alertmanager.Config] {
return factory.NewProviderFactory(factory.MustNewName("signoz"), func(ctx context.Context, settings factory.ProviderSettings, config alertmanager.Config) (alertmanager.Alertmanager, error) {
return New(ctx, settings, config, sqlstore, orgGetter, notificationManager)
return New(settings, config, sqlstore, orgGetter, notificationManager, maintenanceStore)
})
}
func New(ctx context.Context, providerSettings factory.ProviderSettings, config alertmanager.Config, sqlstore sqlstore.SQLStore, orgGetter organization.Getter, notificationManager nfmanager.NotificationManager) (*provider, error) {
func New(
providerSettings factory.ProviderSettings,
config alertmanager.Config,
sqlstore sqlstore.SQLStore,
orgGetter organization.Getter,
notificationManager nfmanager.NotificationManager,
maintenanceStore ruletypes.MaintenanceStore,
) (*provider, error) {
settings := factory.NewScopedProviderSettings(providerSettings, "github.com/SigNoz/signoz/pkg/alertmanager/signozalertmanager")
configStore := sqlalertmanagerstore.NewConfigStore(sqlstore)
stateStore := sqlalertmanagerstore.NewStateStore(sqlstore)
p := &provider{
service: alertmanager.New(
ctx,
settings,
config.Signoz.Config,
stateStore,
configStore,
orgGetter,
notificationManager,
maintenanceStore,
),
settings: settings,
config: config,
configStore: configStore,
stateStore: stateStore,
notificationManager: notificationManager,
maintenanceStore: maintenanceStore,
stopC: make(chan struct{}),
}

View File

@@ -32,11 +32,10 @@ import (
)
type PrepareTaskOptions struct {
Rule *ruletypes.PostableRule
TaskName string
RuleStore ruletypes.RuleStore
MaintenanceStore ruletypes.MaintenanceStore
Querier querier.Querier
Rule *ruletypes.PostableRule
TaskName string
RuleStore ruletypes.RuleStore
Querier querier.Querier
Logger *slog.Logger
Cache cache.Cache
ManagerOpts *ManagerOptions
@@ -46,10 +45,9 @@ type PrepareTaskOptions struct {
}
type PrepareTestRuleOptions struct {
Rule *ruletypes.PostableRule
RuleStore ruletypes.RuleStore
MaintenanceStore ruletypes.MaintenanceStore
Querier querier.Querier
Rule *ruletypes.PostableRule
RuleStore ruletypes.RuleStore
Querier querier.Querier
Logger *slog.Logger
Cache cache.Cache
ManagerOpts *ManagerOptions
@@ -167,7 +165,7 @@ func defaultPrepareTaskFunc(opts PrepareTaskOptions) (Task, error) {
rules = append(rules, tr)
// create ch rule task for evaluation
task = newTask(TaskTypeCh, opts.TaskName, taskNameSuffix, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc, opts.MaintenanceStore, opts.OrgID)
task = newTask(TaskTypeCh, opts.TaskName, taskNameSuffix, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc, opts.OrgID)
} else if opts.Rule.RuleType == ruletypes.RuleTypeProm {
@@ -191,7 +189,7 @@ func defaultPrepareTaskFunc(opts PrepareTaskOptions) (Task, error) {
rules = append(rules, pr)
// create promql rule task for evaluation
task = newTask(TaskTypeProm, opts.TaskName, taskNameSuffix, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc, opts.MaintenanceStore, opts.OrgID)
task = newTask(TaskTypeProm, opts.TaskName, taskNameSuffix, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc, opts.OrgID)
} else {
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "unsupported rule type %s. Supported types: %s, %s", opts.Rule.RuleType, ruletypes.RuleTypeProm, ruletypes.RuleTypeThreshold)
@@ -432,9 +430,8 @@ func (m *Manager) editTask(_ context.Context, orgID valuer.UUID, rule *ruletypes
newTask, err := m.prepareTaskFunc(PrepareTaskOptions{
Rule: rule,
TaskName: taskName,
RuleStore: m.ruleStore,
MaintenanceStore: m.maintenanceStore,
Querier: m.opts.Querier,
RuleStore: m.ruleStore,
Querier: m.opts.Querier,
Logger: m.opts.Logger,
Cache: m.cache,
ManagerOpts: m.opts,
@@ -645,9 +642,8 @@ func (m *Manager) addTask(_ context.Context, orgID valuer.UUID, rule *ruletypes.
newTask, err := m.prepareTaskFunc(PrepareTaskOptions{
Rule: rule,
TaskName: taskName,
RuleStore: m.ruleStore,
MaintenanceStore: m.maintenanceStore,
Querier: m.opts.Querier,
RuleStore: m.ruleStore,
Querier: m.opts.Querier,
Logger: m.opts.Logger,
Cache: m.cache,
ManagerOpts: m.opts,
@@ -1030,9 +1026,8 @@ func (m *Manager) TestNotification(ctx context.Context, orgID valuer.UUID, ruleS
alertCount, err := m.prepareTestRuleFunc(PrepareTestRuleOptions{
Rule: &parsedRule,
RuleStore: m.ruleStore,
MaintenanceStore: m.maintenanceStore,
Querier: m.opts.Querier,
RuleStore: m.ruleStore,
Querier: m.opts.Querier,
Logger: m.opts.Logger,
Cache: m.cache,
ManagerOpts: m.opts,

View File

@@ -40,13 +40,12 @@ type PromRuleTask struct {
logger *slog.Logger
notify NotifyFunc
maintenanceStore ruletypes.MaintenanceStore
orgID valuer.UUID
orgID valuer.UUID
}
// NewPromRuleTask holds rules that have promql condition
// and evaluates the rule at a given frequency
func NewPromRuleTask(name, file string, frequency time.Duration, rules []Rule, opts *ManagerOptions, notify NotifyFunc, maintenanceStore ruletypes.MaintenanceStore, orgID valuer.UUID) *PromRuleTask {
func NewPromRuleTask(name, file string, frequency time.Duration, rules []Rule, opts *ManagerOptions, notify NotifyFunc, orgID valuer.UUID) *PromRuleTask {
opts.Logger.Info("initiating a new rule group", "name", name, "frequency", frequency)
if frequency == 0 {
@@ -63,10 +62,9 @@ func NewPromRuleTask(name, file string, frequency time.Duration, rules []Rule, o
seriesInPreviousEval: make([]map[string]plabels.Labels, len(rules)),
done: make(chan struct{}),
terminated: make(chan struct{}),
notify: notify,
maintenanceStore: maintenanceStore,
logger: opts.Logger,
orgID: orgID,
notify: notify,
logger: opts.Logger,
orgID: orgID,
}
}
@@ -330,30 +328,12 @@ func (g *PromRuleTask) Eval(ctx context.Context, ts time.Time) {
}()
g.logger.InfoContext(ctx, "promql rule task", "name", g.name, "eval_started_at", ts)
maintenance, err := g.maintenanceStore.ListPlannedMaintenance(ctx, g.orgID.StringValue())
if err != nil {
g.logger.ErrorContext(ctx, "error in processing sql query", errors.Attr(err))
}
for i, rule := range g.rules {
if rule == nil {
continue
}
shouldSkip := false
for _, m := range maintenance {
g.logger.InfoContext(ctx, "checking if rule should be skipped", slog.String("rule.id", rule.ID()), slog.Any("maintenance", m))
if m.ShouldSkip(rule.ID(), ts) {
shouldSkip = true
break
}
}
if shouldSkip {
g.logger.InfoContext(ctx, "rule should be skipped", slog.String("rule.id", rule.ID()))
continue
}
select {
case <-g.done:
return

View File

@@ -38,14 +38,13 @@ type RuleTask struct {
pause bool
notify NotifyFunc
maintenanceStore ruletypes.MaintenanceStore
orgID valuer.UUID
orgID valuer.UUID
}
const DefaultFrequency = 1 * time.Minute
// NewRuleTask makes a new RuleTask with the given name, options, and rules.
func NewRuleTask(name, file string, frequency time.Duration, rules []Rule, opts *ManagerOptions, notify NotifyFunc, maintenanceStore ruletypes.MaintenanceStore, orgID valuer.UUID) *RuleTask {
func NewRuleTask(name, file string, frequency time.Duration, rules []Rule, opts *ManagerOptions, notify NotifyFunc, orgID valuer.UUID) *RuleTask {
if frequency == 0 {
frequency = DefaultFrequency
@@ -62,9 +61,8 @@ func NewRuleTask(name, file string, frequency time.Duration, rules []Rule, opts
logger: opts.Logger,
done: make(chan struct{}),
terminated: make(chan struct{}),
notify: notify,
maintenanceStore: maintenanceStore,
orgID: orgID,
notify: notify,
orgID: orgID,
}
}
@@ -318,31 +316,11 @@ func (g *RuleTask) Eval(ctx context.Context, ts time.Time) {
g.logger.DebugContext(ctx, "rule task eval started", "name", g.name, "start_time", ts)
maintenance, err := g.maintenanceStore.ListPlannedMaintenance(ctx, g.orgID.StringValue())
if err != nil {
g.logger.ErrorContext(ctx, "error in processing sql query", errors.Attr(err))
}
for i, rule := range g.rules {
if rule == nil {
continue
}
shouldSkip := false
for _, m := range maintenance {
g.logger.InfoContext(ctx, "checking if rule should be skipped", slog.String("rule.id", rule.ID()), slog.Any("maintenance", m))
if m.ShouldSkip(rule.ID(), ts) {
shouldSkip = true
break
}
}
if shouldSkip {
g.logger.InfoContext(ctx, "rule should be skipped", slog.String("rule.id", rule.ID()))
continue
}
select {
case <-g.done:
return

View File

@@ -4,7 +4,6 @@ import (
"context"
"time"
"github.com/SigNoz/signoz/pkg/types/ruletypes"
"github.com/SigNoz/signoz/pkg/valuer"
)
@@ -32,9 +31,9 @@ type Task interface {
// newTask returns an appropriate group for
// rule type
func newTask(taskType TaskType, name, file string, frequency time.Duration, rules []Rule, opts *ManagerOptions, notify NotifyFunc, maintenanceStore ruletypes.MaintenanceStore, orgID valuer.UUID) Task {
func newTask(taskType TaskType, name, file string, frequency time.Duration, rules []Rule, opts *ManagerOptions, notify NotifyFunc, orgID valuer.UUID) Task {
if taskType == TaskTypeCh {
return NewRuleTask(name, file, frequency, rules, opts, notify, maintenanceStore, orgID)
return NewRuleTask(name, file, frequency, rules, opts, notify, orgID)
}
return NewPromRuleTask(name, file, frequency, rules, opts, notify, maintenanceStore, orgID)
return NewPromRuleTask(name, file, frequency, rules, opts, notify, orgID)
}

View File

@@ -19,6 +19,7 @@ import (
"github.com/SigNoz/signoz/pkg/modules/user/impluser"
"github.com/SigNoz/signoz/pkg/querier"
"github.com/SigNoz/signoz/pkg/queryparser"
"github.com/SigNoz/signoz/pkg/ruler/rulestore/sqlrulestore"
"github.com/SigNoz/signoz/pkg/sharder"
"github.com/SigNoz/signoz/pkg/sharder/noopsharder"
"github.com/SigNoz/signoz/pkg/sqlstore"
@@ -38,7 +39,8 @@ func TestNewHandlers(t *testing.T) {
orgGetter := implorganization.NewGetter(implorganization.NewStore(sqlstore), sharder)
notificationManager := nfmanagertest.NewMock()
require.NoError(t, err)
alertmanager, err := signozalertmanager.New(context.TODO(), providerSettings, alertmanager.Config{}, sqlstore, orgGetter, notificationManager)
maintenanceStore := sqlrulestore.NewMaintenanceStore(sqlstore)
alertmanager, err := signozalertmanager.New(providerSettings, alertmanager.Config{}, sqlstore, orgGetter, notificationManager, maintenanceStore)
require.NoError(t, err)
tokenizer := tokenizertest.NewMockTokenizer(t)
emailing := emailingtest.New()

View File

@@ -20,6 +20,7 @@ import (
"github.com/SigNoz/signoz/pkg/modules/serviceaccount/implserviceaccount"
"github.com/SigNoz/signoz/pkg/modules/user/impluser"
"github.com/SigNoz/signoz/pkg/queryparser"
"github.com/SigNoz/signoz/pkg/ruler/rulestore/sqlrulestore"
"github.com/SigNoz/signoz/pkg/sharder"
"github.com/SigNoz/signoz/pkg/sharder/noopsharder"
"github.com/SigNoz/signoz/pkg/sqlstore"
@@ -39,7 +40,8 @@ func TestNewModules(t *testing.T) {
orgGetter := implorganization.NewGetter(implorganization.NewStore(sqlstore), sharder)
notificationManager := nfmanagertest.NewMock()
require.NoError(t, err)
alertmanager, err := signozalertmanager.New(context.TODO(), providerSettings, alertmanager.Config{}, sqlstore, orgGetter, notificationManager)
maintenanceStore := sqlrulestore.NewMaintenanceStore(sqlstore)
alertmanager, err := signozalertmanager.New(providerSettings, alertmanager.Config{}, sqlstore, orgGetter, notificationManager, maintenanceStore)
require.NoError(t, err)
tokenizer := tokenizertest.NewMockTokenizer(t)
emailing := emailingtest.New()

View File

@@ -65,6 +65,7 @@ import (
"github.com/SigNoz/signoz/pkg/tokenizer/tokenizerstore/sqltokenizerstore"
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
"github.com/SigNoz/signoz/pkg/types/featuretypes"
ruletypes "github.com/SigNoz/signoz/pkg/types/ruletypes"
"github.com/SigNoz/signoz/pkg/version"
"github.com/SigNoz/signoz/pkg/web"
"github.com/SigNoz/signoz/pkg/web/noopweb"
@@ -221,9 +222,14 @@ func NewNotificationManagerProviderFactories(routeStore alertmanagertypes.RouteS
)
}
func NewAlertmanagerProviderFactories(sqlstore sqlstore.SQLStore, orgGetter organization.Getter, nfManager nfmanager.NotificationManager) factory.NamedMap[factory.ProviderFactory[alertmanager.Alertmanager, alertmanager.Config]] {
func NewAlertmanagerProviderFactories(
sqlstore sqlstore.SQLStore,
orgGetter organization.Getter,
nfManager nfmanager.NotificationManager,
maintenanceStore ruletypes.MaintenanceStore,
) factory.NamedMap[factory.ProviderFactory[alertmanager.Alertmanager, alertmanager.Config]] {
return factory.MustNewNamedMap(
signozalertmanager.NewFactory(sqlstore, orgGetter, nfManager),
signozalertmanager.NewFactory(sqlstore, orgGetter, nfManager, maintenanceStore),
)
}

View File

@@ -14,6 +14,7 @@ import (
"github.com/SigNoz/signoz/pkg/modules/user/impluser"
"github.com/SigNoz/signoz/pkg/sqlschema"
"github.com/SigNoz/signoz/pkg/sqlschema/sqlschematest"
"github.com/SigNoz/signoz/pkg/ruler/rulestore/sqlrulestore"
"github.com/SigNoz/signoz/pkg/sqlstore"
"github.com/SigNoz/signoz/pkg/sqlstore/sqlstoretest"
"github.com/SigNoz/signoz/pkg/statsreporter"
@@ -59,9 +60,11 @@ func TestNewProviderFactories(t *testing.T) {
})
assert.NotPanics(t, func() {
orgGetter := implorganization.NewGetter(implorganization.NewStore(sqlstoretest.New(sqlstore.Config{Provider: "sqlite"}, sqlmock.QueryMatcherEqual)), nil)
store := sqlstoretest.New(sqlstore.Config{Provider: "sqlite"}, sqlmock.QueryMatcherEqual)
orgGetter := implorganization.NewGetter(implorganization.NewStore(store), nil)
notificationManager := nfmanagertest.NewMock()
NewAlertmanagerProviderFactories(sqlstoretest.New(sqlstore.Config{Provider: "sqlite"}, sqlmock.QueryMatcherEqual), orgGetter, notificationManager)
maintenanceStore := sqlrulestore.NewMaintenanceStore(store)
NewAlertmanagerProviderFactories(store, orgGetter, notificationManager, maintenanceStore)
})
assert.NotPanics(t, func() {

View File

@@ -34,6 +34,7 @@ import (
"github.com/SigNoz/signoz/pkg/querier"
"github.com/SigNoz/signoz/pkg/queryparser"
"github.com/SigNoz/signoz/pkg/ruler"
sqlrulestore "github.com/SigNoz/signoz/pkg/ruler/rulestore/sqlrulestore"
"github.com/SigNoz/signoz/pkg/sharder"
"github.com/SigNoz/signoz/pkg/sqlmigration"
"github.com/SigNoz/signoz/pkg/sqlmigrator"
@@ -362,12 +363,14 @@ func New(
return nil, err
}
maintenanceStore := sqlrulestore.NewMaintenanceStore(sqlstore)
// Initialize alertmanager from the available alertmanager provider factories
alertmanager, err := factory.NewProviderFromNamedMap(
ctx,
providerSettings,
config.Alertmanager,
NewAlertmanagerProviderFactories(sqlstore, orgGetter, nfManager),
NewAlertmanagerProviderFactories(sqlstore, orgGetter, nfManager, maintenanceStore),
config.Alertmanager.Provider,
)
if err != nil {

View File

@@ -11,9 +11,7 @@ import (
"github.com/uptrace/bun"
)
var (
ErrCodeInvalidPlannedMaintenancePayload = errors.MustNewCode("invalid_planned_maintenance_payload")
)
var ErrCodeInvalidPlannedMaintenancePayload = errors.MustNewCode("invalid_planned_maintenance_payload")
type MaintenanceStatus struct {
valuer.String
@@ -63,15 +61,15 @@ type StorablePlannedMaintenance struct {
}
type PlannedMaintenance struct {
ID valuer.UUID `json:"id" required:"true"`
Name string `json:"name" required:"true"`
Description string `json:"description"`
Schedule *Schedule `json:"schedule" required:"true"`
RuleIDs []string `json:"alertIds"`
CreatedAt time.Time `json:"createdAt"`
CreatedBy string `json:"createdBy"`
UpdatedAt time.Time `json:"updatedAt"`
UpdatedBy string `json:"updatedBy"`
ID valuer.UUID `json:"id" required:"true"`
Name string `json:"name" required:"true"`
Description string `json:"description"`
Schedule *Schedule `json:"schedule" required:"true"`
RuleIDs []string `json:"alertIds"`
CreatedAt time.Time `json:"createdAt"`
CreatedBy string `json:"createdBy"`
UpdatedAt time.Time `json:"updatedAt"`
UpdatedBy string `json:"updatedBy"`
Status MaintenanceStatus `json:"status" required:"true"`
Kind MaintenanceKind `json:"kind" required:"true"`
}
@@ -161,8 +159,11 @@ func (m *PlannedMaintenance) ShouldSkip(ruleID string, now time.Time) bool {
currentTime := now.In(loc)
// fixed schedule
if !m.Schedule.StartTime.IsZero() && !m.Schedule.EndTime.IsZero() {
// fixed schedule — only when no recurrence is configured.
// When recurrence is set, the recurring check below handles everything;
// falling through here would cause the window to match the absolute
// StartTimeEndTime range instead of the daily/weekly/monthly pattern.
if m.Schedule.Recurrence == nil && !m.Schedule.StartTime.IsZero() && !m.Schedule.EndTime.IsZero() {
startTime := m.Schedule.StartTime.In(loc)
endTime := m.Schedule.EndTime.In(loc)
if currentTime.Equal(startTime) || currentTime.Equal(endTime) ||

View File

@@ -13,7 +13,6 @@ func timePtr(t time.Time) *time.Time {
}
func TestShouldSkipMaintenance(t *testing.T) {
cases := []struct {
name string
maintenance *PlannedMaintenance
@@ -499,7 +498,7 @@ func TestShouldSkipMaintenance(t *testing.T) {
},
},
},
ts: time.Date(2024, 04, 1, 12, 10, 0, 0, time.UTC),
ts: time.Date(2024, 4, 1, 12, 10, 0, 0, time.UTC),
skip: true,
},
{
@@ -508,14 +507,14 @@ func TestShouldSkipMaintenance(t *testing.T) {
Schedule: &Schedule{
Timezone: "UTC",
Recurrence: &Recurrence{
StartTime: time.Date(2024, 04, 01, 12, 0, 0, 0, time.UTC),
StartTime: time.Date(2024, 4, 1, 12, 0, 0, 0, time.UTC),
Duration: valuer.MustParseTextDuration("2h"),
RepeatType: RepeatTypeWeekly,
RepeatOn: []RepeatOn{RepeatOnMonday},
},
},
},
ts: time.Date(2024, 04, 15, 12, 10, 0, 0, time.UTC),
ts: time.Date(2024, 4, 15, 12, 10, 0, 0, time.UTC),
skip: true,
},
{
@@ -524,14 +523,14 @@ func TestShouldSkipMaintenance(t *testing.T) {
Schedule: &Schedule{
Timezone: "UTC",
Recurrence: &Recurrence{
StartTime: time.Date(2024, 04, 01, 12, 0, 0, 0, time.UTC),
StartTime: time.Date(2024, 4, 1, 12, 0, 0, 0, time.UTC),
Duration: valuer.MustParseTextDuration("2h"),
RepeatType: RepeatTypeWeekly,
RepeatOn: []RepeatOn{RepeatOnMonday},
},
},
},
ts: time.Date(2024, 04, 14, 12, 10, 0, 0, time.UTC), // 14th 04 is sunday
ts: time.Date(2024, 4, 14, 12, 10, 0, 0, time.UTC), // 14th 04 is sunday
skip: false,
},
{
@@ -540,14 +539,14 @@ func TestShouldSkipMaintenance(t *testing.T) {
Schedule: &Schedule{
Timezone: "UTC",
Recurrence: &Recurrence{
StartTime: time.Date(2024, 04, 01, 12, 0, 0, 0, time.UTC),
StartTime: time.Date(2024, 4, 1, 12, 0, 0, 0, time.UTC),
Duration: valuer.MustParseTextDuration("2h"),
RepeatType: RepeatTypeWeekly,
RepeatOn: []RepeatOn{RepeatOnMonday},
},
},
},
ts: time.Date(2024, 04, 16, 12, 10, 0, 0, time.UTC), // 16th 04 is tuesday
ts: time.Date(2024, 4, 16, 12, 10, 0, 0, time.UTC), // 16th 04 is tuesday
skip: false,
},
{
@@ -556,14 +555,14 @@ func TestShouldSkipMaintenance(t *testing.T) {
Schedule: &Schedule{
Timezone: "UTC",
Recurrence: &Recurrence{
StartTime: time.Date(2024, 04, 01, 12, 0, 0, 0, time.UTC),
StartTime: time.Date(2024, 4, 1, 12, 0, 0, 0, time.UTC),
Duration: valuer.MustParseTextDuration("2h"),
RepeatType: RepeatTypeWeekly,
RepeatOn: []RepeatOn{RepeatOnMonday},
},
},
},
ts: time.Date(2024, 05, 06, 12, 10, 0, 0, time.UTC),
ts: time.Date(2024, 5, 6, 12, 10, 0, 0, time.UTC),
skip: true,
},
{
@@ -572,14 +571,14 @@ func TestShouldSkipMaintenance(t *testing.T) {
Schedule: &Schedule{
Timezone: "UTC",
Recurrence: &Recurrence{
StartTime: time.Date(2024, 04, 01, 12, 0, 0, 0, time.UTC),
StartTime: time.Date(2024, 4, 1, 12, 0, 0, 0, time.UTC),
Duration: valuer.MustParseTextDuration("2h"),
RepeatType: RepeatTypeWeekly,
RepeatOn: []RepeatOn{RepeatOnMonday},
},
},
},
ts: time.Date(2024, 05, 06, 14, 00, 0, 0, time.UTC),
ts: time.Date(2024, 5, 6, 14, 0, 0, 0, time.UTC),
skip: true,
},
{
@@ -588,13 +587,13 @@ func TestShouldSkipMaintenance(t *testing.T) {
Schedule: &Schedule{
Timezone: "UTC",
Recurrence: &Recurrence{
StartTime: time.Date(2024, 04, 04, 12, 0, 0, 0, time.UTC),
StartTime: time.Date(2024, 4, 4, 12, 0, 0, 0, time.UTC),
Duration: valuer.MustParseTextDuration("2h"),
RepeatType: RepeatTypeMonthly,
},
},
},
ts: time.Date(2024, 04, 04, 12, 10, 0, 0, time.UTC),
ts: time.Date(2024, 4, 4, 12, 10, 0, 0, time.UTC),
skip: true,
},
{
@@ -603,13 +602,13 @@ func TestShouldSkipMaintenance(t *testing.T) {
Schedule: &Schedule{
Timezone: "UTC",
Recurrence: &Recurrence{
StartTime: time.Date(2024, 04, 04, 12, 0, 0, 0, time.UTC),
StartTime: time.Date(2024, 4, 4, 12, 0, 0, 0, time.UTC),
Duration: valuer.MustParseTextDuration("2h"),
RepeatType: RepeatTypeMonthly,
},
},
},
ts: time.Date(2024, 04, 04, 14, 10, 0, 0, time.UTC),
ts: time.Date(2024, 4, 4, 14, 10, 0, 0, time.UTC),
skip: false,
},
{
@@ -618,13 +617,52 @@ func TestShouldSkipMaintenance(t *testing.T) {
Schedule: &Schedule{
Timezone: "UTC",
Recurrence: &Recurrence{
StartTime: time.Date(2024, 04, 04, 12, 0, 0, 0, time.UTC),
StartTime: time.Date(2024, 4, 4, 12, 0, 0, 0, time.UTC),
Duration: valuer.MustParseTextDuration("2h"),
RepeatType: RepeatTypeMonthly,
},
},
},
ts: time.Date(2024, 05, 04, 12, 10, 0, 0, time.UTC),
ts: time.Date(2024, 5, 4, 12, 10, 0, 0, time.UTC),
skip: true,
},
// The recurrence should govern, when set. Not the fixed range.
{
name: "recurring-daily-with-fixed-times-outside-daily-window",
maintenance: &PlannedMaintenance{
Schedule: &Schedule{
Timezone: "UTC",
// These fixed fields should be ignored when Recurrence is set.
StartTime: time.Date(2026, 4, 1, 10, 0, 0, 0, time.UTC),
EndTime: time.Date(2026, 4, 30, 18, 0, 0, 0, time.UTC),
Recurrence: &Recurrence{
StartTime: time.Date(2026, 4, 1, 14, 0, 0, 0, time.UTC), // daily at 14:00
Duration: valuer.MustParseTextDuration("2h"), // until 16:00
RepeatType: RepeatTypeDaily,
},
},
},
// 11:00 is inside the fixed range but outside the daily 14:00-16:00 window.
// Before the fix this returned true (bug); after fix it returns false.
ts: time.Date(2026, 4, 15, 11, 0, 0, 0, time.UTC),
skip: false,
},
{
name: "recurring-daily-with-fixed-times-inside-daily-window",
maintenance: &PlannedMaintenance{
Schedule: &Schedule{
Timezone: "UTC",
StartTime: time.Date(2026, 4, 1, 10, 0, 0, 0, time.UTC),
EndTime: time.Date(2026, 4, 30, 18, 0, 0, 0, time.UTC),
Recurrence: &Recurrence{
StartTime: time.Date(2026, 4, 1, 14, 0, 0, 0, time.UTC),
Duration: valuer.MustParseTextDuration("2h"),
RepeatType: RepeatTypeDaily,
},
},
},
// 15:00 is inside the daily 14:00-16:00 window — should skip.
ts: time.Date(2026, 4, 15, 15, 0, 0, 0, time.UTC),
skip: true,
},
}