Compare commits

..

8 Commits

Author SHA1 Message Date
Vinícius Lourenço
acf20e3208 fix(alert-rules-history): crash due to relativeTime empty 2026-03-27 09:52:40 -03:00
Nikhil Soni
58dabf9a6c feat(waterfall): return all span for small traces and nested level for large (#10399)
* feat: add support for configurable pre expanded levels

When a span is expanded, now it will return nested children
up to a max depth level

* feat: show spinner on expanding span and fix autoscroll

Two improvemnts are included in this commit:
- Show a loading spiner in UI when a span is expanded
  to signal api call.
- Don't auto scroll the waterfall to bring the selected
  span to centre in case it's expanded or scrolled up
  because it looks very flickering behaviour to user

* feat: add support for returning all the spans

If total spans are less than a max value.
This allows UI to not have to call API
for smaller spans on ever collapse/uncollapse

* Revert "feat: show spinner on expanding span and fix autoscroll"

This reverts commit 6bd194dfb4.

* chore: add tests

* chore: make the code changes more easy to understand

* chore: cleanup traversal in trace waterfall

* chore: rename variables to be self descriptive

* chore: add rests for auto expand

* chore: add tests for getAllSpans

---------

Co-authored-by: nityanandagohain <nityanandagohain@gmail.com>
2026-03-27 12:13:53 +00:00
Vinicius Lourenço
91edc2a895 fix(infra-monitoring): not fetching correct group by keys (#10651) 2026-03-27 11:42:25 +00:00
SagarRajput-7
2e70857554 fix: remove custom domain from self hosted deployments (#10731)
* feat: remove custom domain from self hosted deployments

* fix: updated errors to surface from BE in invite member modal

* fix: updated test cases
2026-03-27 09:24:20 +00:00
Nityananda Gohain
f3e6892d5b fix: remove flakyness for trace waterfall tests (#10734) 2026-03-27 06:50:42 +00:00
Nityananda Gohain
23a4960e74 chore: don't run functions if the series is empty (#10725)
Some checks failed
build-staging / js-build (push) Has been cancelled
build-staging / prepare (push) Has been cancelled
build-staging / go-build (push) Has been cancelled
build-staging / staging (push) Has been cancelled
Release Drafter / update_release_draft (push) Has been cancelled
* chore: funcRunningDiff don't panic when series is empty

* chore: don't run functions if the series is empty
2026-03-27 04:41:00 +00:00
Vinicius Lourenço
5d0c55d682 fix(alerts-history): formatTime expecting number but receiving string (#10719)
Co-authored-by: Srikanth Chekuri <srikanth.chekuri92@gmail.com>
2026-03-27 00:52:40 +00:00
Nityananda Gohain
15704e0433 chore: cleanup traversal in trace waterfall (#10706)
Some checks failed
build-staging / prepare (push) Has been cancelled
build-staging / js-build (push) Has been cancelled
build-staging / go-build (push) Has been cancelled
build-staging / staging (push) Has been cancelled
Release Drafter / update_release_draft (push) Has been cancelled
2026-03-26 11:46:45 +00:00
22 changed files with 1295 additions and 429 deletions

View File

@@ -7,21 +7,29 @@ import (
"log/slog"
"math"
"strings"
"sync"
"time"
"github.com/SigNoz/signoz/ee/query-service/anomaly"
"github.com/SigNoz/signoz/pkg/cache"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/query-service/common"
"github.com/SigNoz/signoz/pkg/query-service/model"
"github.com/SigNoz/signoz/pkg/transition"
"github.com/SigNoz/signoz/pkg/types/ruletypes"
"github.com/SigNoz/signoz/pkg/valuer"
querierV2 "github.com/SigNoz/signoz/pkg/query-service/app/querier/v2"
"github.com/SigNoz/signoz/pkg/query-service/app/queryBuilder"
"github.com/SigNoz/signoz/pkg/query-service/interfaces"
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
baserules "github.com/SigNoz/signoz/pkg/query-service/rules"
"github.com/SigNoz/signoz/pkg/query-service/utils/labels"
"github.com/SigNoz/signoz/pkg/query-service/utils/times"
"github.com/SigNoz/signoz/pkg/query-service/utils/timestamp"
"github.com/SigNoz/signoz/pkg/units"
baserules "github.com/SigNoz/signoz/pkg/query-service/rules"
querierV5 "github.com/SigNoz/signoz/pkg/querier"
@@ -37,6 +45,16 @@ const (
type AnomalyRule struct {
*baserules.BaseRule
mtx sync.Mutex
reader interfaces.Reader
// querierV2 is used for alerts created after the introduction of new metrics query builder
querierV2 interfaces.Querier
// querierV5 is used for alerts migrated after the introduction of new query builder
querierV5 querierV5.Querier
provider anomaly.Provider
providerV2 anomalyV2.Provider
@@ -85,6 +103,14 @@ func NewAnomalyRule(
logger.Info("using seasonality", "seasonality", t.seasonality.String())
querierOptsV2 := querierV2.QuerierOptions{
Reader: reader,
Cache: cache,
KeyGenerator: queryBuilder.NewKeyGenerator(),
}
t.querierV2 = querierV2.NewQuerier(querierOptsV2)
t.reader = reader
if t.seasonality == anomaly.SeasonalityHourly {
t.provider = anomaly.NewHourlyProvider(
anomaly.WithCache[*anomaly.HourlyProvider](cache),
@@ -122,6 +148,7 @@ func NewAnomalyRule(
)
}
t.querierV5 = querierV5
t.version = p.Version
t.logger = logger
return &t, nil
@@ -306,8 +333,14 @@ func (r *AnomalyRule) buildAndRunQueryV5(ctx context.Context, orgID valuer.UUID,
}
func (r *AnomalyRule) Eval(ctx context.Context, ts time.Time) (int, error) {
prevState := r.State()
valueFormatter := units.FormatterFromUnit(r.Unit())
var res ruletypes.Vector
var err error
if r.version == "v5" {
r.logger.InfoContext(ctx, "running v5 query")
res, err = r.buildAndRunQueryV5(ctx, r.OrgID(), ts)
@@ -319,8 +352,220 @@ func (r *AnomalyRule) Eval(ctx context.Context, ts time.Time) (int, error) {
return 0, err
}
opts := baserules.EvalVectorOptions{
DeleteLabels: []string{labels.MetricNameLabel, labels.TemporalityLabel},
r.mtx.Lock()
defer r.mtx.Unlock()
resultFPs := map[uint64]struct{}{}
var alerts = make(map[uint64]*ruletypes.Alert, len(res))
ruleReceivers := r.Threshold.GetRuleReceivers()
ruleReceiverMap := make(map[string][]string)
for _, value := range ruleReceivers {
ruleReceiverMap[value.Name] = value.Channels
}
return r.EvalVector(ctx, ts, res, opts)
for _, smpl := range res {
l := make(map[string]string, len(smpl.Metric))
for _, lbl := range smpl.Metric {
l[lbl.Name] = lbl.Value
}
value := valueFormatter.Format(smpl.V, r.Unit())
threshold := valueFormatter.Format(smpl.Target, smpl.TargetUnit)
r.logger.DebugContext(ctx, "Alert template data for rule", "rule_name", r.Name(), "formatter", valueFormatter.Name(), "value", value, "threshold", threshold)
tmplData := ruletypes.AlertTemplateData(l, value, threshold)
// Inject some convenience variables that are easier to remember for users
// who are not used to Go's templating system.
defs := "{{$labels := .Labels}}{{$value := .Value}}{{$threshold := .Threshold}}"
// utility function to apply go template on labels and annotations
expand := func(text string) string {
tmpl := ruletypes.NewTemplateExpander(
ctx,
defs+text,
"__alert_"+r.Name(),
tmplData,
times.Time(timestamp.FromTime(ts)),
nil,
)
result, err := tmpl.Expand()
if err != nil {
result = fmt.Sprintf("<error expanding template: %s>", err)
r.logger.ErrorContext(ctx, "Expanding alert template failed", errors.Attr(err), "data", tmplData, "rule_name", r.Name())
}
return result
}
lb := labels.NewBuilder(smpl.Metric).Del(labels.MetricNameLabel).Del(labels.TemporalityLabel)
resultLabels := labels.NewBuilder(smpl.Metric).Del(labels.MetricNameLabel).Del(labels.TemporalityLabel).Labels()
for name, value := range r.Labels().Map() {
lb.Set(name, expand(value))
}
lb.Set(labels.AlertNameLabel, r.Name())
lb.Set(labels.AlertRuleIdLabel, r.ID())
lb.Set(labels.RuleSourceLabel, r.GeneratorURL())
annotations := make(labels.Labels, 0, len(r.Annotations().Map()))
for name, value := range r.Annotations().Map() {
annotations = append(annotations, labels.Label{Name: name, Value: expand(value)})
}
if smpl.IsMissing {
lb.Set(labels.AlertNameLabel, "[No data] "+r.Name())
lb.Set(labels.NoDataLabel, "true")
}
lbs := lb.Labels()
h := lbs.Hash()
resultFPs[h] = struct{}{}
if _, ok := alerts[h]; ok {
r.logger.ErrorContext(ctx, "the alert query returns duplicate records", "rule_id", r.ID(), "alert", alerts[h])
err = fmt.Errorf("duplicate alert found, vector contains metrics with the same labelset after applying alert labels")
return 0, err
}
alerts[h] = &ruletypes.Alert{
Labels: lbs,
QueryResultLables: resultLabels,
Annotations: annotations,
ActiveAt: ts,
State: model.StatePending,
Value: smpl.V,
GeneratorURL: r.GeneratorURL(),
Receivers: ruleReceiverMap[lbs.Map()[ruletypes.LabelThresholdName]],
Missing: smpl.IsMissing,
IsRecovering: smpl.IsRecovering,
}
}
r.logger.InfoContext(ctx, "number of alerts found", "rule_name", r.Name(), "alerts_count", len(alerts))
// alerts[h] is ready, add or update active list now
for h, a := range alerts {
// Check whether we already have alerting state for the identifying label set.
// Update the last value and annotations if so, create a new alert entry otherwise.
if alert, ok := r.Active[h]; ok && alert.State != model.StateInactive {
alert.Value = a.Value
alert.Annotations = a.Annotations
// Update the recovering and missing state of existing alert
alert.IsRecovering = a.IsRecovering
alert.Missing = a.Missing
if v, ok := alert.Labels.Map()[ruletypes.LabelThresholdName]; ok {
alert.Receivers = ruleReceiverMap[v]
}
continue
}
r.Active[h] = a
}
itemsToAdd := []model.RuleStateHistory{}
// Check if any pending alerts should be removed or fire now. Write out alert timeseries.
for fp, a := range r.Active {
labelsJSON, err := json.Marshal(a.QueryResultLables)
if err != nil {
r.logger.ErrorContext(ctx, "error marshaling labels", errors.Attr(err), "labels", a.Labels)
}
if _, ok := resultFPs[fp]; !ok {
// If the alert was previously firing, keep it around for a given
// retention time so it is reported as resolved to the AlertManager.
if a.State == model.StatePending || (!a.ResolvedAt.IsZero() && ts.Sub(a.ResolvedAt) > ruletypes.ResolvedRetention) {
delete(r.Active, fp)
}
if a.State != model.StateInactive {
a.State = model.StateInactive
a.ResolvedAt = ts
itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
RuleID: r.ID(),
RuleName: r.Name(),
State: model.StateInactive,
StateChanged: true,
UnixMilli: ts.UnixMilli(),
Labels: model.LabelsString(labelsJSON),
Fingerprint: a.QueryResultLables.Hash(),
Value: a.Value,
})
}
continue
}
if a.State == model.StatePending && ts.Sub(a.ActiveAt) >= r.HoldDuration().Duration() {
a.State = model.StateFiring
a.FiredAt = ts
state := model.StateFiring
if a.Missing {
state = model.StateNoData
}
itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
RuleID: r.ID(),
RuleName: r.Name(),
State: state,
StateChanged: true,
UnixMilli: ts.UnixMilli(),
Labels: model.LabelsString(labelsJSON),
Fingerprint: a.QueryResultLables.Hash(),
Value: a.Value,
})
}
// We need to change firing alert to recovering if the returned sample meets recovery threshold
changeFiringToRecovering := a.State == model.StateFiring && a.IsRecovering
// We need to change recovering alerts to firing if the returned sample meets target threshold
changeRecoveringToFiring := a.State == model.StateRecovering && !a.IsRecovering && !a.Missing
// in any of the above case we need to update the status of alert
if changeFiringToRecovering || changeRecoveringToFiring {
state := model.StateRecovering
if changeRecoveringToFiring {
state = model.StateFiring
}
a.State = state
r.logger.DebugContext(ctx, "converting alert state", "name", r.Name(), "state", state)
itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
RuleID: r.ID(),
RuleName: r.Name(),
State: state,
StateChanged: true,
UnixMilli: ts.UnixMilli(),
Labels: model.LabelsString(labelsJSON),
Fingerprint: a.QueryResultLables.Hash(),
Value: a.Value,
})
}
}
currentState := r.State()
overallStateChanged := currentState != prevState
for idx, item := range itemsToAdd {
item.OverallStateChanged = overallStateChanged
item.OverallState = currentState
itemsToAdd[idx] = item
}
r.RecordRuleStateHistory(ctx, prevState, currentState, itemsToAdd)
return len(r.Active), nil
}
func (r *AnomalyRule) String() string {
ar := ruletypes.PostableRule{
AlertName: r.Name(),
RuleCondition: r.Condition(),
EvalWindow: r.EvalWindow(),
Labels: r.Labels().Map(),
Annotations: r.Annotations().Map(),
PreferredChannels: r.PreferredChannels(),
}
byt, err := json.Marshal(ar)
if err != nil {
return fmt.Sprintf("error marshaling alerting rule: %s", err.Error())
}
return string(byt)
}

View File

@@ -202,19 +202,8 @@ function InviteMembersModal({
onComplete?.();
} catch (err) {
const apiErr = err as APIError;
if (apiErr?.getHttpStatusCode() === 409) {
toast.error(
touchedRows.length === 1
? `${touchedRows[0].email} is already a member`
: 'Invite for one or more users already exists',
{ richColors: true },
);
} else {
const errorMessage = apiErr?.getErrorMessage?.() ?? 'An error occurred';
toast.error(`Failed to send invites: ${errorMessage}`, {
richColors: true,
});
}
const errorMessage = apiErr?.getErrorMessage?.() ?? 'An error occurred';
toast.error(errorMessage, { richColors: true });
} finally {
setIsSubmitting(false);
}

View File

@@ -1,9 +1,18 @@
import { toast } from '@signozhq/sonner';
import inviteUsers from 'api/v1/invite/bulk/create';
import sendInvite from 'api/v1/invite/create';
import { StatusCodes } from 'http-status-codes';
import { render, screen, userEvent, waitFor } from 'tests/test-utils';
import APIError from 'types/api/error';
import InviteMembersModal from '../InviteMembersModal';
const makeApiError = (message: string, code = StatusCodes.CONFLICT): APIError =>
new APIError({
httpStatusCode: code,
error: { code: 'already_exists', message, url: '', errors: [] },
});
jest.mock('api/v1/invite/create');
jest.mock('api/v1/invite/bulk/create');
jest.mock('@signozhq/sonner', () => ({
@@ -142,6 +151,90 @@ describe('InviteMembersModal', () => {
});
});
describe('error handling', () => {
it('shows BE message on single invite 409', async () => {
const user = userEvent.setup({ pointerEventsCheck: 0 });
mockSendInvite.mockRejectedValue(
makeApiError('An invite already exists for this email: single@signoz.io'),
);
render(<InviteMembersModal {...defaultProps} />);
await user.type(
screen.getAllByPlaceholderText('john@signoz.io')[0],
'single@signoz.io',
);
await user.click(screen.getAllByText('Select roles')[0]);
await user.click(await screen.findByText('Viewer'));
await user.click(
screen.getByRole('button', { name: /invite team members/i }),
);
await waitFor(() => {
expect(toast.error).toHaveBeenCalledWith(
'An invite already exists for this email: single@signoz.io',
expect.anything(),
);
});
});
it('shows BE message on bulk invite 409', async () => {
const user = userEvent.setup({ pointerEventsCheck: 0 });
mockInviteUsers.mockRejectedValue(
makeApiError('An invite already exists for this email: alice@signoz.io'),
);
render(<InviteMembersModal {...defaultProps} />);
const emailInputs = screen.getAllByPlaceholderText('john@signoz.io');
await user.type(emailInputs[0], 'alice@signoz.io');
await user.click(screen.getAllByText('Select roles')[0]);
await user.click(await screen.findByText('Viewer'));
await user.type(emailInputs[1], 'bob@signoz.io');
await user.click(screen.getAllByText('Select roles')[0]);
const editorOptions = await screen.findAllByText('Editor');
await user.click(editorOptions[editorOptions.length - 1]);
await user.click(
screen.getByRole('button', { name: /invite team members/i }),
);
await waitFor(() => {
expect(toast.error).toHaveBeenCalledWith(
'An invite already exists for this email: alice@signoz.io',
expect.anything(),
);
});
});
it('shows BE message on generic error', async () => {
const user = userEvent.setup({ pointerEventsCheck: 0 });
mockSendInvite.mockRejectedValue(
makeApiError('Internal server error', StatusCodes.INTERNAL_SERVER_ERROR),
);
render(<InviteMembersModal {...defaultProps} />);
await user.type(
screen.getAllByPlaceholderText('john@signoz.io')[0],
'single@signoz.io',
);
await user.click(screen.getAllByText('Select roles')[0]);
await user.click(await screen.findByText('Viewer'));
await user.click(
screen.getByRole('button', { name: /invite team members/i }),
);
await waitFor(() => {
expect(toast.error).toHaveBeenCalledWith(
'Internal server error',
expect.anything(),
);
});
});
});
it('uses inviteUsers (bulk) when multiple rows are filled', async () => {
const user = userEvent.setup({ pointerEventsCheck: 0 });
const onComplete = jest.fn();

View File

@@ -16,9 +16,9 @@ function AverageResolutionCard({
}: TotalTriggeredCardProps): JSX.Element {
return (
<StatsCard
displayValue={formatTime(currentAvgResolutionTime)}
totalCurrentCount={currentAvgResolutionTime}
totalPastCount={pastAvgResolutionTime}
displayValue={formatTime(+currentAvgResolutionTime)}
totalCurrentCount={+currentAvgResolutionTime}
totalPastCount={+pastAvgResolutionTime}
title="Avg. Resolution Time"
timeSeries={timeSeries}
/>

View File

@@ -464,14 +464,10 @@ function GeneralSettings({
onModalToggleHandler(type);
};
const {
isCloudUser: isCloudUserVal,
isEnterpriseSelfHostedUser,
} = useGetTenantLicense();
const { isCloudUser: isCloudUserVal } = useGetTenantLicense();
const isAdmin = user.role === USER_ROLES.ADMIN;
const showCustomDomainSettings =
(isCloudUserVal || isEnterpriseSelfHostedUser) && isAdmin;
const showCustomDomainSettings = isCloudUserVal && isAdmin;
const renderConfig = [
{

View File

@@ -38,7 +38,6 @@ jest.mock('hooks/useComponentPermission', () => ({
jest.mock('hooks/useGetTenantLicense', () => ({
useGetTenantLicense: jest.fn(() => ({
isCloudUser: false,
isEnterpriseSelfHostedUser: false,
})),
}));
@@ -389,7 +388,6 @@ describe('GeneralSettings - S3 Logs Retention', () => {
beforeEach(() => {
(useGetTenantLicense as jest.Mock).mockReturnValue({
isCloudUser: true,
isEnterpriseSelfHostedUser: false,
});
});
@@ -411,15 +409,14 @@ describe('GeneralSettings - S3 Logs Retention', () => {
});
});
describe('Enterprise Self-Hosted User Rendering', () => {
describe('Non-cloud user rendering', () => {
beforeEach(() => {
(useGetTenantLicense as jest.Mock).mockReturnValue({
isCloudUser: false,
isEnterpriseSelfHostedUser: true,
});
});
it('should render CustomDomainSettings but not GeneralSettingsCloud', () => {
it('should not render CustomDomainSettings or GeneralSettingsCloud', () => {
render(
<GeneralSettings
metricsTtlValuesPayload={mockMetricsRetention}
@@ -432,12 +429,14 @@ describe('GeneralSettings - S3 Logs Retention', () => {
/>,
);
expect(screen.getByTestId('custom-domain-settings')).toBeInTheDocument();
expect(
screen.queryByTestId('custom-domain-settings'),
).not.toBeInTheDocument();
expect(
screen.queryByTestId('general-settings-cloud'),
).not.toBeInTheDocument();
// Save buttons should be visible for self-hosted
// Save buttons should be visible for non-cloud users (these are from retentions)
const saveButtons = screen.getAllByRole('button', { name: /save/i });
expect(saveButtons.length).toBeGreaterThan(0);
});

View File

@@ -217,7 +217,7 @@ function K8sVolumesList({
{
dataSource: currentQuery.builder.queryData[0].dataSource,
aggregateAttribute: GetK8sEntityToAggregateAttribute(
K8sCategory.NODES,
K8sCategory.VOLUMES,
dotMetricsEnabled,
),
aggregateOperator: 'noop',
@@ -228,7 +228,7 @@ function K8sVolumesList({
queryKey: [currentQuery.builder.queryData[0].dataSource, 'noop'],
},
true,
K8sCategory.NODES,
K8sCategory.VOLUMES,
);
const query = useMemo(() => {
@@ -597,7 +597,7 @@ function K8sVolumesList({
isLoadingGroupByFilters={isLoadingGroupByFilters}
handleGroupByChange={handleGroupByChange}
selectedGroupBy={groupBy}
entity={K8sCategory.NODES}
entity={K8sCategory.VOLUMES}
showAutoRefresh={!selectedVolumeData}
/>
{isError && <Typography>{data?.error || 'Something went wrong'}</Typography>}

View File

@@ -0,0 +1,162 @@
import setupCommonMocks from '../commonMocks';
setupCommonMocks();
import { QueryClient, QueryClientProvider } from 'react-query';
// eslint-disable-next-line no-restricted-imports
import { Provider } from 'react-redux';
import { MemoryRouter } from 'react-router-dom';
import { render, waitFor } from '@testing-library/react';
import { FeatureKeys } from 'constants/features';
import K8sVolumesList from 'container/InfraMonitoringK8s/Volumes/K8sVolumesList';
import { rest, server } from 'mocks-server/server';
import { IAppContext, IUser } from 'providers/App/types';
import store from 'store';
import { LicenseResModel } from 'types/api/licensesV3/getActive';
const queryClient = new QueryClient({
defaultOptions: {
queries: {
retry: false,
cacheTime: 0,
},
},
});
const SERVER_URL = 'http://localhost/api';
describe('K8sVolumesList - useGetAggregateKeys Category Regression', () => {
let requestsMade: Array<{
url: string;
params: URLSearchParams;
body?: any;
}> = [];
beforeEach(() => {
requestsMade = [];
queryClient.clear();
server.use(
rest.get(`${SERVER_URL}/v3/autocomplete/attribute_keys`, (req, res, ctx) => {
const url = req.url.toString();
const params = req.url.searchParams;
requestsMade.push({
url,
params,
});
return res(
ctx.status(200),
ctx.json({
status: 'success',
data: {
attributeKeys: [],
},
}),
);
}),
rest.post(`${SERVER_URL}/v1/pvcs/list`, (req, res, ctx) =>
res(
ctx.status(200),
ctx.json({
status: 'success',
data: {
type: 'list',
records: [],
groups: null,
total: 0,
sentAnyHostMetricsData: false,
isSendingK8SAgentMetrics: false,
},
}),
),
),
);
});
afterEach(() => {
server.resetHandlers();
});
it('should call aggregate keys API with k8s_volume_capacity', async () => {
render(
<QueryClientProvider client={queryClient}>
<Provider store={store}>
<MemoryRouter>
<K8sVolumesList
isFiltersVisible={false}
handleFilterVisibilityChange={jest.fn()}
quickFiltersLastUpdated={-1}
/>
</MemoryRouter>
</Provider>
</QueryClientProvider>,
);
await waitFor(() => {
expect(requestsMade.length).toBeGreaterThan(0);
});
// Find the attribute_keys request
const attributeKeysRequest = requestsMade.find((req) =>
req.url.includes('/autocomplete/attribute_keys'),
);
expect(attributeKeysRequest).toBeDefined();
const aggregateAttribute = attributeKeysRequest?.params.get(
'aggregateAttribute',
);
expect(aggregateAttribute).toBe('k8s_volume_capacity');
});
it('should call aggregate keys API with k8s.volume.capacity when dotMetrics enabled', async () => {
jest
.spyOn(await import('providers/App/App'), 'useAppContext')
.mockReturnValue({
featureFlags: [
{
name: FeatureKeys.DOT_METRICS_ENABLED,
active: true,
usage: 0,
usage_limit: 0,
route: '',
},
],
user: { role: 'ADMIN' } as IUser,
activeLicense: (null as unknown) as LicenseResModel,
} as IAppContext);
render(
<QueryClientProvider client={queryClient}>
<Provider store={store}>
<MemoryRouter>
<K8sVolumesList
isFiltersVisible={false}
handleFilterVisibilityChange={jest.fn()}
quickFiltersLastUpdated={-1}
/>
</MemoryRouter>
</Provider>
</QueryClientProvider>,
);
await waitFor(() => {
expect(requestsMade.length).toBeGreaterThan(0);
});
const attributeKeysRequest = requestsMade.find((req) =>
req.url.includes('/autocomplete/attribute_keys'),
);
expect(attributeKeysRequest).toBeDefined();
const aggregateAttribute = attributeKeysRequest?.params.get(
'aggregateAttribute',
);
expect(aggregateAttribute).toBe('k8s.volume.capacity');
});
});

View File

@@ -20,7 +20,6 @@ import AlertHistory from 'container/AlertHistory';
import { TIMELINE_TABLE_PAGE_SIZE } from 'container/AlertHistory/constants';
import { AlertDetailsTab, TimelineFilter } from 'container/AlertHistory/types';
import { urlKey } from 'container/AllError/utils';
import { RelativeTimeMap } from 'container/TopNav/DateTimeSelectionV2/constants';
import useAxiosError from 'hooks/useAxiosError';
import { useNotifications } from 'hooks/useNotifications';
import { useSafeNavigate } from 'hooks/useSafeNavigate';
@@ -47,6 +46,8 @@ import { PayloadProps } from 'types/api/alerts/get';
import { TagFilter } from 'types/api/queryBuilder/queryBuilderData';
import { nanoToMilli } from 'utils/timeUtils';
const DEFAULT_TIME_RANGE = '30m';
export const useAlertHistoryQueryParams = (): {
ruleId: string | null;
startTime: number;
@@ -61,8 +62,8 @@ export const useAlertHistoryQueryParams = (): {
const relativeTimeParam = params.get(QueryParams.relativeTime);
const relativeTime =
(relativeTimeParam === 'null' ? null : relativeTimeParam) ??
RelativeTimeMap['6hr'];
(relativeTimeParam === 'null' ? null : relativeTimeParam) ||
DEFAULT_TIME_RANGE;
const intStartTime = parseInt(startTime || '0', 10);
const intEndTime = parseInt(endTime || '0', 10);

View File

@@ -56,8 +56,8 @@ export interface AlertRuleStats {
totalPastTriggers: number;
currentTriggersSeries: CurrentTriggersSeries;
pastTriggersSeries: CurrentTriggersSeries | null;
currentAvgResolutionTime: number;
pastAvgResolutionTime: number;
currentAvgResolutionTime: string;
pastAvgResolutionTime: string;
currentAvgResolutionTimeSeries: CurrentTriggersSeries;
pastAvgResolutionTimeSeries: any | null;
}

View File

@@ -112,6 +112,12 @@ export function formatEpochTimestamp(epoch: number): string {
*/
export function formatTime(seconds: number): string {
seconds = +seconds;
if (Number.isNaN(seconds)) {
return '-';
}
const days = seconds / 86400;
if (days >= 1) {

View File

@@ -1090,7 +1090,21 @@ func (r *ClickHouseReader) GetWaterfallSpansForTraceWithMetadata(ctx context.Con
}
processingPostCache := time.Now()
selectedSpans, uncollapsedSpans, rootServiceName, rootServiceEntryPoint := tracedetail.GetSelectedSpans(req.UncollapsedSpans, req.SelectedSpanID, traceRoots, spanIdToSpanNodeMap, req.IsSelectedSpanIDUnCollapsed)
// When req.Limit is 0 (not set by the client), selectAllSpans is set to false
// preserving the old paged behaviour for backward compatibility
limit := min(req.Limit, tracedetail.MaxLimitToSelectAllSpans)
selectAllSpans := totalSpans <= uint64(limit)
var (
selectedSpans []*model.Span
uncollapsedSpans []string
rootServiceName, rootServiceEntryPoint string
)
if selectAllSpans {
selectedSpans, rootServiceName, rootServiceEntryPoint = tracedetail.GetAllSpans(traceRoots)
} else {
selectedSpans, uncollapsedSpans, rootServiceName, rootServiceEntryPoint = tracedetail.GetSelectedSpans(req.UncollapsedSpans, req.SelectedSpanID, traceRoots, spanIdToSpanNodeMap, req.IsSelectedSpanIDUnCollapsed)
}
r.logger.Info("getWaterfallSpansForTraceWithMetadata: processing post cache", "duration", time.Since(processingPostCache), "traceID", traceID)
// convert start timestamp to millis because right now frontend is expecting it in millis
@@ -1103,7 +1117,7 @@ func (r *ClickHouseReader) GetWaterfallSpansForTraceWithMetadata(ctx context.Con
}
response.Spans = selectedSpans
response.UncollapsedSpans = uncollapsedSpans
response.UncollapsedSpans = uncollapsedSpans // ignoring if all spans are returning
response.StartTimestampMillis = startTime / 1000000
response.EndTimestampMillis = endTime / 1000000
response.TotalSpansCount = totalSpans
@@ -1112,6 +1126,7 @@ func (r *ClickHouseReader) GetWaterfallSpansForTraceWithMetadata(ctx context.Con
response.RootServiceEntryPoint = rootServiceEntryPoint
response.ServiceNameToTotalDurationMap = serviceNameToTotalDurationMap
response.HasMissingSpans = hasMissingSpans
response.HasMore = !selectAllSpans
return response, nil
}
@@ -3371,8 +3386,8 @@ func (r *ClickHouseReader) GetMetricAggregateAttributes(ctx context.Context, org
// Query all relevant metric names from time_series_v4, but leave metadata retrieval to cache/db
query := fmt.Sprintf(
`SELECT DISTINCT metric_name
FROM %s.%s
`SELECT DISTINCT metric_name
FROM %s.%s
WHERE metric_name ILIKE $1 AND __normalized = $2`,
signozMetricDBName, signozTSTableNameV41Day)
@@ -3449,8 +3464,8 @@ func (r *ClickHouseReader) GetMeterAggregateAttributes(ctx context.Context, orgI
var response v3.AggregateAttributeResponse
// Query all relevant metric names from time_series_v4, but leave metadata retrieval to cache/db
query := fmt.Sprintf(
`SELECT metric_name,type,temporality,is_monotonic
FROM %s.%s
`SELECT metric_name,type,temporality,is_monotonic
FROM %s.%s
WHERE metric_name ILIKE $1
GROUP BY metric_name,type,temporality,is_monotonic`,
signozMeterDBName, signozMeterSamplesName)
@@ -5146,7 +5161,7 @@ func (r *ClickHouseReader) GetOverallStateTransitions(ctx context.Context, ruleI
state,
unix_milli AS firing_time
FROM %s.%s
WHERE overall_state = '` + model.StateFiring.String() + `'
WHERE overall_state = '` + model.StateFiring.String() + `'
AND overall_state_changed = true
AND rule_id IN ('%s')
AND unix_milli >= %d AND unix_milli <= %d
@@ -5157,7 +5172,7 @@ resolution_events AS (
state,
unix_milli AS resolution_time
FROM %s.%s
WHERE overall_state = '` + model.StateInactive.String() + `'
WHERE overall_state = '` + model.StateInactive.String() + `'
AND overall_state_changed = true
AND rule_id IN ('%s')
AND unix_milli >= %d AND unix_milli <= %d
@@ -5278,7 +5293,7 @@ WITH firing_events AS (
state,
unix_milli AS firing_time
FROM %s.%s
WHERE overall_state = '` + model.StateFiring.String() + `'
WHERE overall_state = '` + model.StateFiring.String() + `'
AND overall_state_changed = true
AND rule_id IN ('%s')
AND unix_milli >= %d AND unix_milli <= %d
@@ -5289,7 +5304,7 @@ resolution_events AS (
state,
unix_milli AS resolution_time
FROM %s.%s
WHERE overall_state = '` + model.StateInactive.String() + `'
WHERE overall_state = '` + model.StateInactive.String() + `'
AND overall_state_changed = true
AND rule_id IN ('%s')
AND unix_milli >= %d AND unix_milli <= %d
@@ -5335,7 +5350,7 @@ WITH firing_events AS (
state,
unix_milli AS firing_time
FROM %s.%s
WHERE overall_state = '` + model.StateFiring.String() + `'
WHERE overall_state = '` + model.StateFiring.String() + `'
AND overall_state_changed = true
AND rule_id IN ('%s')
AND unix_milli >= %d AND unix_milli <= %d
@@ -5346,7 +5361,7 @@ resolution_events AS (
state,
unix_milli AS resolution_time
FROM %s.%s
WHERE overall_state = '` + model.StateInactive.String() + `'
WHERE overall_state = '` + model.StateInactive.String() + `'
AND overall_state_changed = true
AND rule_id IN ('%s')
AND unix_milli >= %d AND unix_milli <= %d
@@ -5608,7 +5623,7 @@ func (r *ClickHouseReader) GetMetricsDataPoints(ctx context.Context, metricName
instrumentationtypes.CodeNamespace: "clickhouse-reader",
instrumentationtypes.CodeFunctionName: "GetMetricsDataPoints",
})
query := fmt.Sprintf(`SELECT
query := fmt.Sprintf(`SELECT
sum(count) as data_points
FROM %s.%s
WHERE metric_name = ?
@@ -5628,7 +5643,7 @@ func (r *ClickHouseReader) GetMetricsLastReceived(ctx context.Context, metricNam
instrumentationtypes.CodeNamespace: "clickhouse-reader",
instrumentationtypes.CodeFunctionName: "GetMetricsLastReceived",
})
query := fmt.Sprintf(`SELECT
query := fmt.Sprintf(`SELECT
MAX(unix_milli) AS last_received_time
FROM %s.%s
WHERE metric_name = ?
@@ -5639,7 +5654,7 @@ WHERE metric_name = ?
if err != nil {
return 0, &model.ApiError{Typ: "ClickHouseError", Err: err}
}
query = fmt.Sprintf(`SELECT
query = fmt.Sprintf(`SELECT
MAX(unix_milli) AS last_received_time
FROM %s.%s
WHERE metric_name = ? and unix_milli > ?
@@ -5658,7 +5673,7 @@ func (r *ClickHouseReader) GetTotalTimeSeriesForMetricName(ctx context.Context,
instrumentationtypes.CodeNamespace: "clickhouse-reader",
instrumentationtypes.CodeFunctionName: "GetTotalTimeSeriesForMetricName",
})
query := fmt.Sprintf(`SELECT
query := fmt.Sprintf(`SELECT
uniq(fingerprint) AS timeSeriesCount
FROM %s.%s
WHERE metric_name = ?;`, signozMetricDBName, signozTSTableNameV41Week)
@@ -5690,7 +5705,7 @@ func (r *ClickHouseReader) GetAttributesForMetricName(ctx context.Context, metri
}
const baseQueryTemplate = `
SELECT
SELECT
kv.1 AS key,
arrayMap(x -> trim(BOTH '"' FROM x), groupUniqArray(1000)(kv.2)) AS values,
length(groupUniqArray(10000)(kv.2)) AS valueCount
@@ -5799,7 +5814,7 @@ func (r *ClickHouseReader) ListSummaryMetrics(ctx context.Context, orgID valuer.
sampleTable, countExp := utils.WhichSampleTableToUse(req.Start, req.End)
metricsQuery := fmt.Sprintf(
`SELECT
`SELECT
t.metric_name AS metric_name,
ANY_VALUE(t.description) AS description,
ANY_VALUE(t.type) AS metric_type,
@@ -5864,11 +5879,11 @@ func (r *ClickHouseReader) ListSummaryMetrics(ctx context.Context, orgID valuer.
if whereClause != "" {
sb.WriteString(fmt.Sprintf(
`SELECT
`SELECT
s.samples,
s.metric_name
FROM (
SELECT
SELECT
dm.metric_name,
%s AS samples
FROM %s.%s AS dm
@@ -5898,11 +5913,11 @@ func (r *ClickHouseReader) ListSummaryMetrics(ctx context.Context, orgID valuer.
} else {
// If no filters, it is a simpler query.
sb.WriteString(fmt.Sprintf(
`SELECT
`SELECT
s.samples,
s.metric_name
FROM (
SELECT
SELECT
metric_name,
%s AS samples
FROM %s.%s
@@ -6007,16 +6022,16 @@ func (r *ClickHouseReader) GetMetricsTimeSeriesPercentage(ctx context.Context, r
// Construct the query without backticks
query := fmt.Sprintf(`
SELECT
SELECT
metric_name,
total_value,
(total_value * 100.0 / total_time_series) AS percentage
FROM (
SELECT
SELECT
metric_name,
uniq(fingerprint) AS total_value,
(SELECT uniq(fingerprint)
FROM %s.%s
(SELECT uniq(fingerprint)
FROM %s.%s
WHERE unix_milli BETWEEN ? AND ? AND __normalized = ?) AS total_time_series
FROM %s.%s
WHERE unix_milli BETWEEN ? AND ? AND NOT startsWith(metric_name, 'signoz') AND __normalized = ? %s
@@ -6092,7 +6107,7 @@ func (r *ClickHouseReader) GetMetricsSamplesPercentage(ctx context.Context, req
queryLimit := 50 + req.Limit
metricsQuery := fmt.Sprintf(
`SELECT
`SELECT
ts.metric_name AS metric_name,
uniq(ts.fingerprint) AS timeSeries
FROM %s.%s AS ts
@@ -6149,13 +6164,13 @@ func (r *ClickHouseReader) GetMetricsSamplesPercentage(ctx context.Context, req
FROM %s.%s
WHERE unix_milli BETWEEN ? AND ?
)
SELECT
SELECT
s.samples,
s.metric_name,
COALESCE((s.samples * 100.0 / t.total_samples), 0) AS percentage
FROM
FROM
(
SELECT
SELECT
dm.metric_name,
%s AS samples
FROM %s.%s AS dm`,
@@ -6176,7 +6191,7 @@ func (r *ClickHouseReader) GetMetricsSamplesPercentage(ctx context.Context, req
if whereClause != "" {
sb.WriteString(fmt.Sprintf(
` AND dm.fingerprint IN (
SELECT ts.fingerprint
SELECT ts.fingerprint
FROM %s.%s AS ts
WHERE ts.metric_name IN (%s)
AND unix_milli BETWEEN ? AND ?
@@ -6247,7 +6262,7 @@ func (r *ClickHouseReader) GetNameSimilarity(ctx context.Context, req *metrics_e
}
query := fmt.Sprintf(`
SELECT
SELECT
metric_name,
any(type) as type,
any(temporality) as temporality,
@@ -6306,7 +6321,7 @@ func (r *ClickHouseReader) GetAttributeSimilarity(ctx context.Context, req *metr
// Get target labels
extractedLabelsQuery := fmt.Sprintf(`
SELECT
SELECT
kv.1 AS label_key,
topK(10)(JSONExtractString(kv.2)) AS label_values
FROM %s.%s
@@ -6350,12 +6365,12 @@ func (r *ClickHouseReader) GetAttributeSimilarity(ctx context.Context, req *metr
priorityListString := strings.Join(priorityList, ", ")
candidateLabelsQuery := fmt.Sprintf(`
WITH
arrayDistinct([%s]) AS filter_keys,
WITH
arrayDistinct([%s]) AS filter_keys,
arrayDistinct([%s]) AS filter_values,
[%s] AS priority_pairs_input,
%d AS priority_multiplier
SELECT
SELECT
metric_name,
any(type) as type,
any(temporality) as temporality,
@@ -6461,17 +6476,17 @@ func (r *ClickHouseReader) GetMetricsAllResourceAttributes(ctx context.Context,
instrumentationtypes.CodeFunctionName: "GetMetricsAllResourceAttributes",
})
start, end, attTable, _ := utils.WhichAttributesTableToUse(start, end)
query := fmt.Sprintf(`SELECT
key,
query := fmt.Sprintf(`SELECT
key,
count(distinct value) AS distinct_value_count
FROM (
SELECT key, value
FROM %s.%s
ARRAY JOIN
ARRAY JOIN
arrayConcat(mapKeys(resource_attributes)) AS key,
arrayConcat(mapValues(resource_attributes)) AS value
WHERE unix_milli between ? and ?
)
)
GROUP BY key
ORDER BY distinct_value_count DESC;`, signozMetadataDbName, attTable)
valueCtx := context.WithValue(ctx, "clickhouse_max_threads", constants.MetricsExplorerClickhouseThreads)
@@ -6618,11 +6633,11 @@ func (r *ClickHouseReader) GetInspectMetricsFingerprints(ctx context.Context, at
start, end, tsTable, _ := utils.WhichTSTableToUse(req.Start, req.End)
query := fmt.Sprintf(`
SELECT
SELECT
arrayDistinct(groupArray(toString(fingerprint))) AS fingerprints
FROM
(
SELECT
SELECT
metric_name, labels, fingerprint,
%s
FROM %s.%s
@@ -6793,14 +6808,14 @@ func (r *ClickHouseReader) GetUpdatedMetricsMetadata(ctx context.Context, orgID
var stillMissing []string
if len(missingMetrics) > 0 {
metricList := "'" + strings.Join(missingMetrics, "', '") + "'"
query := fmt.Sprintf(`SELECT
query := fmt.Sprintf(`SELECT
metric_name,
argMax(type, created_at) AS type,
argMax(description, created_at) AS description,
argMax(temporality, created_at) AS temporality,
argMax(is_monotonic, created_at) AS is_monotonic,
argMax(unit, created_at) AS unit
FROM %s.%s
FROM %s.%s
WHERE metric_name IN (%s)
GROUP BY metric_name;`,
signozMetricDBName,
@@ -6848,7 +6863,7 @@ func (r *ClickHouseReader) GetUpdatedMetricsMetadata(ctx context.Context, orgID
if len(stillMissing) > 0 {
metricList := "'" + strings.Join(stillMissing, "', '") + "'"
query := fmt.Sprintf(`SELECT DISTINCT metric_name, type, description, temporality, is_monotonic, unit
FROM %s.%s
FROM %s.%s
WHERE metric_name IN (%s)`, signozMetricDBName, signozTSTableNameV4, metricList)
valueCtx := context.WithValue(ctx, "clickhouse_max_threads", constants.MetricsExplorerClickhouseThreads)
rows, err := r.db.Query(valueCtx, query)

View File

@@ -1,6 +1,7 @@
package tracedetail
import (
"maps"
"slices"
"sort"
@@ -9,6 +10,9 @@ import (
var (
SPAN_LIMIT_PER_REQUEST_FOR_WATERFALL float64 = 500
maxDepthForSelectedSpanChildren int = 5
MaxLimitToSelectAllSpans uint = 10_000
)
type Interval struct {
@@ -63,26 +67,22 @@ func findIndexForSelectedSpanFromPreOrder(spans []*model.Span, selectedSpanId st
return selectedSpanIndex
}
func getPathFromRootToSelectedSpanId(node *model.Span, selectedSpanId string, uncollapsedSpans []string, isSelectedSpanIDUnCollapsed bool) (bool, []string) {
func getPathFromRootToSelectedSpanId(node *model.Span, selectedSpanId string) (bool, []string) {
spansFromRootToNode := []string{}
spansFromRootToNode = append(spansFromRootToNode, node.SpanID)
if node.SpanID == selectedSpanId {
if isSelectedSpanIDUnCollapsed && !slices.Contains(uncollapsedSpans, node.SpanID) {
spansFromRootToNode = append(spansFromRootToNode, node.SpanID)
}
return true, spansFromRootToNode
}
isPresentInSubtreeForTheNode := false
for _, child := range node.Children {
isPresentInThisSubtree, _spansFromRootToNode := getPathFromRootToSelectedSpanId(child, selectedSpanId, uncollapsedSpans, isSelectedSpanIDUnCollapsed)
isPresentInThisSubtree, _spansFromRootToNode := getPathFromRootToSelectedSpanId(child, selectedSpanId)
// if the interested node is present in the given subtree then add the span node to uncollapsed node list
if isPresentInThisSubtree {
if !slices.Contains(uncollapsedSpans, node.SpanID) {
spansFromRootToNode = append(spansFromRootToNode, node.SpanID)
}
isPresentInSubtreeForTheNode = true
spansFromRootToNode = append(spansFromRootToNode, _spansFromRootToNode...)
break
}
}
return isPresentInSubtreeForTheNode, spansFromRootToNode
@@ -92,12 +92,23 @@ func getPathFromRootToSelectedSpanId(node *model.Span, selectedSpanId string, un
// throughout the recursion. Per-call state (level, isPartOfPreOrder, etc.)
// is passed as direct arguments.
type traverseOpts struct {
uncollapsedSpans []string
selectedSpanID string
uncollapsedSpans map[string]struct{}
selectedSpanID string
isSelectedSpanUncollapsed bool
selectAll bool
}
func traverseTrace(span *model.Span, opts traverseOpts, level uint64, isPartOfPreOrder bool, hasSibling bool) []*model.Span {
func traverseTrace(
span *model.Span,
opts traverseOpts,
level uint64,
isPartOfPreOrder bool,
hasSibling bool,
autoExpandDepth int,
) ([]*model.Span, []string) {
preOrderTraversal := []*model.Span{}
autoExpandedSpans := []string{}
// sort the children to maintain the order across requests
sort.Slice(span.Children, func(i, j int) bool {
@@ -134,16 +145,36 @@ func traverseTrace(span *model.Span, opts traverseOpts, level uint64, isPartOfPr
preOrderTraversal = append(preOrderTraversal, &nodeWithoutChildren)
}
isAlreadyUncollapsed := slices.Contains(opts.uncollapsedSpans, span.SpanID)
remainingAutoExpandDepth := 0
if span.SpanID == opts.selectedSpanID && opts.isSelectedSpanUncollapsed {
remainingAutoExpandDepth = maxDepthForSelectedSpanChildren
} else if autoExpandDepth > 0 {
remainingAutoExpandDepth = autoExpandDepth - 1
}
_, isAlreadyUncollapsed := opts.uncollapsedSpans[span.SpanID]
for index, child := range span.Children {
_childTraversal := traverseTrace(child, opts, level+1, isPartOfPreOrder && isAlreadyUncollapsed, index != (len(span.Children)-1))
// A child is included in the pre-order output if its parent is uncollapsed
// OR if the child falls within MAX_DEPTH_FOR_SELECTED_SPAN_CHILDREN levels
// below the selected span.
isChildWithinMaxDepth := remainingAutoExpandDepth > 0
childIsPartOfPreOrder := opts.selectAll || (isPartOfPreOrder && (isAlreadyUncollapsed || isChildWithinMaxDepth))
if isPartOfPreOrder && isChildWithinMaxDepth && !isAlreadyUncollapsed {
if !slices.Contains(autoExpandedSpans, span.SpanID) {
autoExpandedSpans = append(autoExpandedSpans, span.SpanID)
}
}
_childTraversal, _autoExpanded := traverseTrace(child, opts, level+1, childIsPartOfPreOrder, index != (len(span.Children)-1), remainingAutoExpandDepth)
preOrderTraversal = append(preOrderTraversal, _childTraversal...)
autoExpandedSpans = append(autoExpandedSpans, _autoExpanded...)
nodeWithoutChildren.SubTreeNodeCount += child.SubTreeNodeCount + 1
span.SubTreeNodeCount += child.SubTreeNodeCount + 1
}
nodeWithoutChildren.SubTreeNodeCount += 1
return preOrderTraversal
return preOrderTraversal, autoExpandedSpans
}
@@ -169,19 +200,36 @@ func GetSelectedSpans(uncollapsedSpans []string, selectedSpanID string, traceRoo
var preOrderTraversal = make([]*model.Span, 0)
var rootServiceName, rootServiceEntryPoint string
updatedUncollapsedSpans := uncollapsedSpans
// create a map of uncollapsed spans for quick lookup
uncollapsedSpanMap := make(map[string]struct{})
for _, spanID := range uncollapsedSpans {
uncollapsedSpanMap[spanID] = struct{}{}
}
selectedSpanIndex := -1
for _, rootSpanID := range traceRoots {
if rootNode, exists := spanIdToSpanNodeMap[rootSpanID.SpanID]; exists {
_, spansFromRootToNode := getPathFromRootToSelectedSpanId(rootNode, selectedSpanID, updatedUncollapsedSpans, isSelectedSpanIDUnCollapsed)
updatedUncollapsedSpans = append(updatedUncollapsedSpans, spansFromRootToNode...)
present, spansFromRootToNode := getPathFromRootToSelectedSpanId(rootNode, selectedSpanID)
if present {
for _, spanID := range spansFromRootToNode {
if selectedSpanID == spanID && !isSelectedSpanIDUnCollapsed {
continue
}
uncollapsedSpanMap[spanID] = struct{}{}
}
}
opts := traverseOpts{
uncollapsedSpans: updatedUncollapsedSpans,
selectedSpanID: selectedSpanID,
uncollapsedSpans: uncollapsedSpanMap,
selectedSpanID: selectedSpanID,
isSelectedSpanUncollapsed: isSelectedSpanIDUnCollapsed,
}
_preOrderTraversal, _autoExpanded := traverseTrace(rootNode, opts, 0, true, false, 0)
// Merge auto-expanded spans into updatedUncollapsedSpans for returning in response
for _, spanID := range _autoExpanded {
uncollapsedSpanMap[spanID] = struct{}{}
}
_preOrderTraversal := traverseTrace(rootNode, opts, 0, true, false)
_selectedSpanIndex := findIndexForSelectedSpanFromPreOrder(_preOrderTraversal, selectedSpanID)
if _selectedSpanIndex != -1 {
@@ -223,5 +271,17 @@ func GetSelectedSpans(uncollapsedSpans []string, selectedSpanID string, traceRoo
startIndex = 0
}
return preOrderTraversal[startIndex:endIndex], updatedUncollapsedSpans, rootServiceName, rootServiceEntryPoint
return preOrderTraversal[startIndex:endIndex], slices.Collect(maps.Keys(uncollapsedSpanMap)), rootServiceName, rootServiceEntryPoint
}
func GetAllSpans(traceRoots []*model.Span) (spans []*model.Span, rootServiceName, rootEntryPoint string) {
if len(traceRoots) > 0 {
rootServiceName = traceRoots[0].ServiceName
rootEntryPoint = traceRoots[0].Name
}
for _, root := range traceRoots {
childSpans, _ := traverseTrace(root, traverseOpts{selectAll: true}, 0, true, false, 0)
spans = append(spans, childSpans...)
}
return
}

View File

@@ -81,28 +81,6 @@ func TestGetSelectedSpans_MultipleRoots(t *testing.T) {
assert.Equal(t, "root1-op", entryPoint, "metadata comes from first root")
}
// isSelectedSpanIDUnCollapsed=true opens only the selected span's direct children,
// not deeper descendants.
//
// root → selected (expanded)
// ├─ child1 ✓
// │ └─ grandchild ✗ (only one level opened)
// └─ child2 ✓
func TestGetSelectedSpans_ExpandedSelectedSpan(t *testing.T) {
root := mkSpan("root", "svc",
mkSpan("selected", "svc",
mkSpan("child1", "svc", mkSpan("grandchild", "svc")),
mkSpan("child2", "svc"),
),
)
spanMap := buildSpanMap(root)
spans, _, _, _ := GetSelectedSpans([]string{}, "selected", []*model.Span{root}, spanMap, true)
// root and selected are on the auto-uncollapsed path; child1/child2 are direct
// children of the expanded selected span; grandchild stays hidden.
assert.Equal(t, []string{"root", "selected", "child1", "child2"}, spanIDs(spans))
}
// Multiple spans uncollapsed simultaneously: children of all uncollapsed spans
// are visible at once.
//
@@ -183,7 +161,7 @@ func TestGetSelectedSpans_PathReturnedInUncollapsed(t *testing.T) {
spanMap := buildSpanMap(root)
spans, uncollapsed, _, _ := GetSelectedSpans([]string{}, "selected", []*model.Span{root}, spanMap, false)
assert.Equal(t, []string{"root", "parent"}, uncollapsed)
assert.ElementsMatch(t, []string{"root", "parent"}, uncollapsed)
assert.Equal(t, []string{"root", "parent", "selected"}, spanIDs(spans))
}
@@ -206,7 +184,7 @@ func TestGetSelectedSpans_SiblingsNotExpanded(t *testing.T) {
// children of root sort alphabetically: parent < unrelated; unrelated-child stays hidden
assert.Equal(t, []string{"root", "parent", "selected", "unrelated"}, spanIDs(spans))
// only the path nodes are tracked as uncollapsed — unrelated is not
assert.Equal(t, []string{"root", "parent"}, uncollapsed)
assert.ElementsMatch(t, []string{"root", "parent"}, uncollapsed)
}
// An unknown selectedSpanID must not panic; returns a window from index 0.
@@ -320,6 +298,119 @@ func TestGetSelectedSpans_WindowShiftsAtStart(t *testing.T) {
assert.Equal(t, "span10", spans[10].SpanID, "selected span still in window")
}
// Auto-expanded span IDs from ALL branches are returned in
// updatedUncollapsedSpans. Only internal nodes (spans with children) are
// tracked — leaf spans are never added.
//
// root (selected)
// ├─ childA (internal ✓)
// │ └─ grandchildA (internal ✓)
// │ └─ leafA (leaf ✗)
// └─ childB (internal ✓)
// └─ grandchildB (internal ✓)
// └─ leafB (leaf ✗)
func TestGetSelectedSpans_AutoExpandedSpansReturnedInUncollapsed(t *testing.T) {
root := mkSpan("root", "svc",
mkSpan("childA", "svc",
mkSpan("grandchildA", "svc",
mkSpan("leafA", "svc"),
),
),
mkSpan("childB", "svc",
mkSpan("grandchildB", "svc",
mkSpan("leafB", "svc"),
),
),
)
spanMap := buildSpanMap(root)
_, uncollapsed, _, _ := GetSelectedSpans([]string{}, "root", []*model.Span{root}, spanMap, true)
// all internal nodes across both branches must be tracked
assert.Contains(t, uncollapsed, "root")
assert.Contains(t, uncollapsed, "childA", "internal node depth 1, branch A")
assert.Contains(t, uncollapsed, "childB", "internal node depth 1, branch B")
assert.Contains(t, uncollapsed, "grandchildA", "internal node depth 2, branch A")
assert.Contains(t, uncollapsed, "grandchildB", "internal node depth 2, branch B")
// leaves have no children to show — never added to uncollapsedSpans
assert.NotContains(t, uncollapsed, "leafA", "leaf spans are never added to uncollapsedSpans")
assert.NotContains(t, uncollapsed, "leafB", "leaf spans are never added to uncollapsedSpans")
}
// ─────────────────────────────────────────────────────────────────────────────
// maxDepthForSelectedSpanChildren boundary tests
// ─────────────────────────────────────────────────────────────────────────────
// Depth is measured from the selected span, not the trace root.
// Ancestors appear via the path-to-root logic, not the depth limit.
// Each depth level has two children to confirm the limit is enforced on all
// branches, not just the first.
//
// root
// └─ A ancestor ✓ (path-to-root)
// └─ selected
// ├─ d1a depth 1 ✓
// │ ├─ d2a depth 2 ✓
// │ │ ├─ d3a depth 3 ✓
// │ │ │ ├─ d4a depth 4 ✓
// │ │ │ │ ├─ d5a depth 5 ✓
// │ │ │ │ │ └─ d6a depth 6 ✗
// │ │ │ │ └─ d5b depth 5 ✓
// │ │ │ └─ d4b depth 4 ✓
// │ │ └─ d3b depth 3 ✓
// │ └─ d2b depth 2 ✓
// └─ d1b depth 1 ✓
func TestGetSelectedSpans_DepthCountedFromSelectedSpan(t *testing.T) {
selected := mkSpan("selected", "svc",
mkSpan("d1a", "svc",
mkSpan("d2a", "svc",
mkSpan("d3a", "svc",
mkSpan("d4a", "svc",
mkSpan("d5a", "svc",
mkSpan("d6a", "svc"), // depth 6 — excluded
),
mkSpan("d5b", "svc"), // depth 5 — included
),
mkSpan("d4b", "svc"), // depth 4 — included
),
mkSpan("d3b", "svc"), // depth 3 — included
),
mkSpan("d2b", "svc"), // depth 2 — included
),
mkSpan("d1b", "svc"), // depth 1 — included
)
root := mkSpan("root", "svc", mkSpan("A", "svc", selected))
spanMap := buildSpanMap(root)
spans, _, _, _ := GetSelectedSpans([]string{}, "selected", []*model.Span{root}, spanMap, true)
ids := spanIDs(spans)
assert.Contains(t, ids, "root", "ancestor shown via path-to-root")
assert.Contains(t, ids, "A", "ancestor shown via path-to-root")
for _, id := range []string{"d1a", "d1b", "d2a", "d2b", "d3a", "d3b", "d4a", "d4b", "d5a", "d5b"} {
assert.Contains(t, ids, id, "depth ≤ 5 — must be included")
}
assert.NotContains(t, ids, "d6a", "depth 6 > limit — excluded")
}
func TestGetAllSpans(t *testing.T) {
root := mkSpan("root", "svc",
mkSpan("childA", "svc",
mkSpan("grandchildA", "svc",
mkSpan("leafA", "svc2"),
),
),
mkSpan("childB", "svc3",
mkSpan("grandchildB", "svc",
mkSpan("leafB", "svc2"),
),
),
)
spans, rootServiceName, rootEntryPoint := GetAllSpans([]*model.Span{root})
assert.ElementsMatch(t, spanIDs(spans), []string{"root", "childA", "grandchildA", "leafA", "childB", "grandchildB", "leafB"})
assert.Equal(t, rootServiceName, "svc")
assert.Equal(t, rootEntryPoint, "root-op")
}
func mkSpan(id, service string, children ...*model.Span) *model.Span {
return &model.Span{
SpanID: id,

View File

@@ -333,6 +333,7 @@ type GetWaterfallSpansForTraceWithMetadataParams struct {
SelectedSpanID string `json:"selectedSpanId"`
IsSelectedSpanIDUnCollapsed bool `json:"isSelectedSpanIDUnCollapsed"`
UncollapsedSpans []string `json:"uncollapsedSpans"`
Limit uint `json:"limit"`
}
type GetFlamegraphSpansForTraceParams struct {

View File

@@ -329,6 +329,7 @@ type GetWaterfallSpansForTraceWithMetadataResponse struct {
HasMissingSpans bool `json:"hasMissingSpans"`
// this is needed for frontend and query service sync
UncollapsedSpans []string `json:"uncollapsedSpans"`
HasMore bool `json:"hasMore"`
}
type GetFlamegraphSpansForTraceResponse struct {

View File

@@ -2,7 +2,6 @@ package rules
import (
"context"
"encoding/json"
"fmt"
"log/slog"
"sync"
@@ -191,24 +190,6 @@ func NewBaseRule(id string, orgID valuer.UUID, p *ruletypes.PostableRule, reader
return baseRule, nil
}
func (r *BaseRule) String() string {
ar := ruletypes.PostableRule{
AlertName: r.name,
RuleCondition: r.ruleCondition,
EvalWindow: r.evalWindow,
Labels: r.labels.Map(),
Annotations: r.annotations.Map(),
PreferredChannels: r.preferredChannels,
}
byt, err := json.Marshal(ar)
if err != nil {
return fmt.Sprintf("error marshaling alerting rule: %s", err.Error())
}
return string(byt)
}
func (r *BaseRule) matchType() ruletypes.MatchType {
if r.ruleCondition == nil {
return ruletypes.AtleastOnce

View File

@@ -1,239 +0,0 @@
package rules
import (
"context"
"encoding/json"
"fmt"
"time"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/query-service/model"
"github.com/SigNoz/signoz/pkg/query-service/utils/labels"
"github.com/SigNoz/signoz/pkg/query-service/utils/times"
"github.com/SigNoz/signoz/pkg/query-service/utils/timestamp"
"github.com/SigNoz/signoz/pkg/types/ruletypes"
"github.com/SigNoz/signoz/pkg/units"
)
type EvalVectorOptions struct {
DeleteLabels []string
ExtraAnnotations func(ctx context.Context, ts time.Time, metric labels.Labels) []labels.Label
}
func (r *BaseRule) EvalVector(ctx context.Context, ts time.Time, res ruletypes.Vector, opts EvalVectorOptions) (int, error) {
prevState := r.State()
valueFormatter := units.FormatterFromUnit(r.Unit())
r.mtx.Lock()
defer r.mtx.Unlock()
resultFPs := map[uint64]struct{}{}
alerts := make(map[uint64]*ruletypes.Alert, len(res))
ruleReceivers := r.Threshold.GetRuleReceivers()
ruleReceiverMap := make(map[string][]string)
for _, value := range ruleReceivers {
ruleReceiverMap[value.Name] = value.Channels
}
for _, smpl := range res {
l := make(map[string]string, len(smpl.Metric))
for _, lbl := range smpl.Metric {
l[lbl.Name] = lbl.Value
}
r.logger.DebugContext(ctx, "alerting for series", "rule_name", r.Name(), "series", smpl)
value := valueFormatter.Format(smpl.V, r.Unit())
threshold := valueFormatter.Format(smpl.Target, smpl.TargetUnit)
r.logger.DebugContext(ctx, "Alert template data for rule", "rule_name", r.Name(), "formatter", valueFormatter.Name(), "value", value, "threshold", threshold)
tmplData := ruletypes.AlertTemplateData(l, value, threshold)
// Inject some convenience variables that are easier to remember for users
// who are not used to Go's templating system.
defs := "{{$labels := .Labels}}{{$value := .Value}}{{$threshold := .Threshold}}"
// utility function to apply go template on labels and annotations
expand := func(text string) string {
tmpl := ruletypes.NewTemplateExpander(
ctx,
defs+text,
"__alert_"+r.Name(),
tmplData,
times.Time(timestamp.FromTime(ts)),
nil,
)
result, err := tmpl.Expand()
if err != nil {
result = fmt.Sprintf("<error expanding template: %s>", err)
r.logger.ErrorContext(ctx, "Expanding alert template failed", "rule_name", r.Name(), errors.Attr(err), "data", tmplData)
}
return result
}
lb := labels.NewBuilder(smpl.Metric).Del(opts.DeleteLabels...).Del()
resultLabels := labels.NewBuilder(smpl.Metric).Del(opts.DeleteLabels...).Labels()
for name, value := range r.labels.Map() {
lb.Set(name, expand(value))
}
lb.Set(labels.AlertNameLabel, r.Name())
lb.Set(labels.AlertRuleIdLabel, r.ID())
lb.Set(labels.RuleSourceLabel, r.GeneratorURL())
annotations := make(labels.Labels, 0, len(r.annotations.Map()))
for name, value := range r.annotations.Map() {
annotations = append(annotations, labels.Label{Name: name, Value: expand(value)})
}
if smpl.IsMissing {
lb.Set(labels.AlertNameLabel, "[No data] "+r.Name())
lb.Set(labels.NoDataLabel, "true")
}
if opts.ExtraAnnotations != nil {
extra := opts.ExtraAnnotations(ctx, ts, smpl.Metric)
annotations = append(annotations, extra...)
}
lbs := lb.Labels()
h := lbs.Hash()
resultFPs[h] = struct{}{}
if _, ok := alerts[h]; ok {
r.logger.ErrorContext(ctx, "the alert query returns duplicate records", "rule_id", r.ID(), "alert", alerts[h])
err := fmt.Errorf("duplicate alert found, vector contains metrics with the same labelset after applying alert labels")
// We have already acquired the lock above hence using SetHealth and SetLastError will deadlock.
r.health = ruletypes.HealthBad
r.lastError = err
return 0, err
}
alerts[h] = &ruletypes.Alert{
Labels: lbs,
QueryResultLables: resultLabels,
Annotations: annotations,
ActiveAt: ts,
State: model.StatePending,
Value: smpl.V,
GeneratorURL: r.GeneratorURL(),
Receivers: ruleReceiverMap[lbs.Map()[ruletypes.LabelThresholdName]],
Missing: smpl.IsMissing,
IsRecovering: smpl.IsRecovering,
}
}
r.logger.InfoContext(ctx, "number of alerts found", "rule_name", r.Name(), "alerts_count", len(alerts))
// alerts[h] is ready, add or update active list now
for h, a := range alerts {
// Check whether we already have alerting state for the identifying label set.
// Update the last value and annotations if so, create a new alert entry otherwise.
if alert, ok := r.Active[h]; ok && alert.State != model.StateInactive {
alert.Value = a.Value
alert.Annotations = a.Annotations
// Update the recovering and missing state of existing alert
alert.IsRecovering = a.IsRecovering
alert.Missing = a.Missing
if v, ok := alert.Labels.Map()[ruletypes.LabelThresholdName]; ok {
alert.Receivers = ruleReceiverMap[v]
}
continue
}
r.Active[h] = a
}
itemsToAdd := []model.RuleStateHistory{}
// Check if any pending alerts should be removed or fire now. Write out alert timeseries.
for fp, a := range r.Active {
labelsJSON, err := json.Marshal(a.QueryResultLables)
if err != nil {
r.logger.ErrorContext(ctx, "error marshaling labels", errors.Attr(err), "rule_name", r.Name(), "labels", a.Labels)
}
if _, ok := resultFPs[fp]; !ok {
// If the alert was previously firing, keep it around for a given
// retention time so it is reported as resolved to the AlertManager.
if a.State == model.StatePending || (!a.ResolvedAt.IsZero() && ts.Sub(a.ResolvedAt) > ruletypes.ResolvedRetention) {
delete(r.Active, fp)
}
if a.State != model.StateInactive {
r.logger.DebugContext(ctx, "converting firing alert to inActive", "name", r.Name())
a.State = model.StateInactive
a.ResolvedAt = ts
itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
RuleID: r.ID(),
RuleName: r.Name(),
State: model.StateInactive,
StateChanged: true,
UnixMilli: ts.UnixMilli(),
Labels: model.LabelsString(labelsJSON),
Fingerprint: a.QueryResultLables.Hash(),
Value: a.Value,
})
}
continue
}
if a.State == model.StatePending && ts.Sub(a.ActiveAt) >= r.holdDuration.Duration() {
r.logger.DebugContext(ctx, "converting pending alert to firing", "name", r.Name())
a.State = model.StateFiring
a.FiredAt = ts
state := model.StateFiring
if a.Missing {
state = model.StateNoData
}
itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
RuleID: r.ID(),
RuleName: r.Name(),
State: state,
StateChanged: true,
UnixMilli: ts.UnixMilli(),
Labels: model.LabelsString(labelsJSON),
Fingerprint: a.QueryResultLables.Hash(),
Value: a.Value,
})
}
// We need to change firing alert to recovering if the returned sample meets recovery threshold
changeFiringToRecovering := a.State == model.StateFiring && a.IsRecovering
// We need to change recovering alerts to firing if the returned sample meets target threshold
changeRecoveringToFiring := a.State == model.StateRecovering && !a.IsRecovering && !a.Missing
// in any of the above case we need to update the status of alert
if changeFiringToRecovering || changeRecoveringToFiring {
state := model.StateRecovering
if changeRecoveringToFiring {
state = model.StateFiring
}
a.State = state
r.logger.DebugContext(ctx, "converting alert state", "name", r.Name(), "state", state)
itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
RuleID: r.ID(),
RuleName: r.Name(),
State: state,
StateChanged: true,
UnixMilli: ts.UnixMilli(),
Labels: model.LabelsString(labelsJSON),
Fingerprint: a.QueryResultLables.Hash(),
Value: a.Value,
})
}
}
currentState := r.State()
overallStateChanged := currentState != prevState
for idx, item := range itemsToAdd {
item.OverallStateChanged = overallStateChanged
item.OverallState = currentState
itemsToAdd[idx] = item
}
r.RecordRuleStateHistory(ctx, prevState, currentState, itemsToAdd)
r.health = ruletypes.HealthGood
r.lastError = nil
return len(r.Active), nil
}

View File

@@ -2,20 +2,25 @@ package rules
import (
"context"
"encoding/json"
"fmt"
"log/slog"
"time"
plabels "github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/promql"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/prometheus"
"github.com/SigNoz/signoz/pkg/query-service/interfaces"
"github.com/SigNoz/signoz/pkg/query-service/model"
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
"github.com/SigNoz/signoz/pkg/query-service/utils/labels"
qslabels "github.com/SigNoz/signoz/pkg/query-service/utils/labels"
"github.com/SigNoz/signoz/pkg/query-service/utils/times"
"github.com/SigNoz/signoz/pkg/query-service/utils/timestamp"
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
"github.com/SigNoz/signoz/pkg/types/ruletypes"
"github.com/SigNoz/signoz/pkg/units"
"github.com/SigNoz/signoz/pkg/valuer"
)
@@ -181,16 +186,232 @@ func (r *PromRule) buildAndRunQuery(ctx context.Context, ts time.Time) (ruletype
}
func (r *PromRule) Eval(ctx context.Context, ts time.Time) (int, error) {
prevState := r.State()
valueFormatter := units.FormatterFromUnit(r.Unit())
// prepare query, run query get data and filter the data based on the threshold
res, err := r.buildAndRunQuery(ctx, ts)
results, err := r.buildAndRunQuery(ctx, ts)
if err != nil {
return 0, err
}
opts := EvalVectorOptions{
DeleteLabels: []string{labels.MetricNameLabel},
r.mtx.Lock()
defer r.mtx.Unlock()
resultFPs := map[uint64]struct{}{}
alerts := make(map[uint64]*ruletypes.Alert, len(results))
ruleReceivers := r.Threshold.GetRuleReceivers()
ruleReceiverMap := make(map[string][]string)
for _, value := range ruleReceivers {
ruleReceiverMap[value.Name] = value.Channels
}
return r.EvalVector(ctx, ts, res, opts)
for _, result := range results {
l := make(map[string]string, len(result.Metric))
for _, lbl := range result.Metric {
l[lbl.Name] = lbl.Value
}
r.logger.DebugContext(ctx, "alerting for series", "rule_name", r.Name(), "series", result)
threshold := valueFormatter.Format(result.Target, result.TargetUnit)
tmplData := ruletypes.AlertTemplateData(l, valueFormatter.Format(result.V, r.Unit()), threshold)
// Inject some convenience variables that are easier to remember for users
// who are not used to Go's templating system.
defs := "{{$labels := .Labels}}{{$value := .Value}}{{$threshold := .Threshold}}"
expand := func(text string) string {
tmpl := ruletypes.NewTemplateExpander(
ctx,
defs+text,
"__alert_"+r.Name(),
tmplData,
times.Time(timestamp.FromTime(ts)),
nil,
)
result, err := tmpl.Expand()
if err != nil {
result = fmt.Sprintf("<error expanding template: %s>", err)
r.logger.WarnContext(ctx, "Expanding alert template failed", "rule_name", r.Name(), errors.Attr(err), "data", tmplData)
}
return result
}
lb := qslabels.NewBuilder(result.Metric).Del(qslabels.MetricNameLabel)
resultLabels := qslabels.NewBuilder(result.Metric).Del(qslabels.MetricNameLabel).Labels()
for name, value := range r.labels.Map() {
lb.Set(name, expand(value))
}
lb.Set(qslabels.AlertNameLabel, r.Name())
lb.Set(qslabels.AlertRuleIdLabel, r.ID())
lb.Set(qslabels.RuleSourceLabel, r.GeneratorURL())
annotations := make(qslabels.Labels, 0, len(r.annotations.Map()))
for name, value := range r.annotations.Map() {
annotations = append(annotations, qslabels.Label{Name: name, Value: expand(value)})
}
if result.IsMissing {
lb.Set(qslabels.AlertNameLabel, "[No data] "+r.Name())
lb.Set(qslabels.NoDataLabel, "true")
}
lbs := lb.Labels()
h := lbs.Hash()
resultFPs[h] = struct{}{}
if _, ok := alerts[h]; ok {
err = fmt.Errorf("vector contains metrics with the same labelset after applying alert labels")
// We have already acquired the lock above hence using SetHealth and
// SetLastError will deadlock.
r.health = ruletypes.HealthBad
r.lastError = err
return 0, err
}
alerts[h] = &ruletypes.Alert{
Labels: lbs,
QueryResultLables: resultLabels,
Annotations: annotations,
ActiveAt: ts,
State: model.StatePending,
Value: result.V,
GeneratorURL: r.GeneratorURL(),
Receivers: ruleReceiverMap[lbs.Map()[ruletypes.LabelThresholdName]],
Missing: result.IsMissing,
IsRecovering: result.IsRecovering,
}
}
r.logger.InfoContext(ctx, "number of alerts found", "rule_name", r.Name(), "alerts_count", len(alerts))
// alerts[h] is ready, add or update active list now
for h, a := range alerts {
// Check whether we already have alerting state for the identifying label set.
// Update the last value and annotations if so, create a new alert entry otherwise.
if alert, ok := r.Active[h]; ok && alert.State != model.StateInactive {
alert.Value = a.Value
alert.Annotations = a.Annotations
// Update the recovering and missing state of existing alert
alert.IsRecovering = a.IsRecovering
alert.Missing = a.Missing
if v, ok := alert.Labels.Map()[ruletypes.LabelThresholdName]; ok {
alert.Receivers = ruleReceiverMap[v]
}
continue
}
r.Active[h] = a
}
itemsToAdd := []model.RuleStateHistory{}
// Check if any pending alerts should be removed or fire now. Write out alert timeseries.
for fp, a := range r.Active {
labelsJSON, err := json.Marshal(a.QueryResultLables)
if err != nil {
r.logger.ErrorContext(ctx, "error marshaling labels", errors.Attr(err), "rule_name", r.Name())
}
if _, ok := resultFPs[fp]; !ok {
// If the alert was previously firing, keep it around for a given
// retention time so it is reported as resolved to the AlertManager.
if a.State == model.StatePending || (!a.ResolvedAt.IsZero() && ts.Sub(a.ResolvedAt) > ruletypes.ResolvedRetention) {
delete(r.Active, fp)
}
if a.State != model.StateInactive {
a.State = model.StateInactive
a.ResolvedAt = ts
itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
RuleID: r.ID(),
RuleName: r.Name(),
State: model.StateInactive,
StateChanged: true,
UnixMilli: ts.UnixMilli(),
Labels: model.LabelsString(labelsJSON),
Fingerprint: a.QueryResultLables.Hash(),
})
}
continue
}
if a.State == model.StatePending && ts.Sub(a.ActiveAt) >= r.holdDuration.Duration() {
a.State = model.StateFiring
a.FiredAt = ts
state := model.StateFiring
if a.Missing {
state = model.StateNoData
}
itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
RuleID: r.ID(),
RuleName: r.Name(),
State: state,
StateChanged: true,
UnixMilli: ts.UnixMilli(),
Labels: model.LabelsString(labelsJSON),
Fingerprint: a.QueryResultLables.Hash(),
Value: a.Value,
})
}
// We need to change firing alert to recovering if the returned sample meets recovery threshold
changeAlertingToRecovering := a.State == model.StateFiring && a.IsRecovering
// We need to change recovering alerts to firing if the returned sample meets target threshold
changeRecoveringToFiring := a.State == model.StateRecovering && !a.IsRecovering && !a.Missing
// in any of the above case we need to update the status of alert
if changeAlertingToRecovering || changeRecoveringToFiring {
state := model.StateRecovering
if changeRecoveringToFiring {
state = model.StateFiring
}
a.State = state
r.logger.DebugContext(ctx, "converting alert state", "name", r.Name(), "state", state)
itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
RuleID: r.ID(),
RuleName: r.Name(),
State: state,
StateChanged: true,
UnixMilli: ts.UnixMilli(),
Labels: model.LabelsString(labelsJSON),
Fingerprint: a.QueryResultLables.Hash(),
Value: a.Value,
})
}
}
r.health = ruletypes.HealthGood
r.lastError = err
currentState := r.State()
overallStateChanged := currentState != prevState
for idx, item := range itemsToAdd {
item.OverallStateChanged = overallStateChanged
item.OverallState = currentState
itemsToAdd[idx] = item
}
r.RecordRuleStateHistory(ctx, prevState, currentState, itemsToAdd)
return len(r.Active), nil
}
func (r *PromRule) String() string {
ar := ruletypes.PostableRule{
AlertName: r.name,
RuleCondition: r.ruleCondition,
EvalWindow: r.evalWindow,
Labels: r.labels.Map(),
Annotations: r.annotations.Map(),
PreferredChannels: r.preferredChannels,
}
byt, err := json.Marshal(ar)
if err != nil {
return fmt.Sprintf("error marshaling alerting rule: %s", err.Error())
}
return string(byt)
}
func (r *PromRule) RunAlertQuery(ctx context.Context, qs string, start, end time.Time, interval time.Duration) (promql.Matrix, error) {
@@ -242,7 +463,7 @@ func toCommonSeries(series promql.Series) v3.Series {
Points: make([]v3.Point, 0),
}
series.Metric.Range(func(lbl plabels.Label) {
series.Metric.Range(func(lbl labels.Label) {
commonSeries.Labels[lbl.Name] = lbl.Value
commonSeries.LabelsArray = append(commonSeries.LabelsArray, map[string]string{
lbl.Name: lbl.Value,

View File

@@ -3,6 +3,7 @@ package rules
import (
"bytes"
"context"
"encoding/json"
"fmt"
"log/slog"
"math"
@@ -23,16 +24,21 @@ import (
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
"github.com/SigNoz/signoz/pkg/valuer"
querierV5 "github.com/SigNoz/signoz/pkg/querier"
logsv3 "github.com/SigNoz/signoz/pkg/query-service/app/logs/v3"
"github.com/SigNoz/signoz/pkg/query-service/app/querier"
querierV2 "github.com/SigNoz/signoz/pkg/query-service/app/querier/v2"
"github.com/SigNoz/signoz/pkg/query-service/app/queryBuilder"
tracesV4 "github.com/SigNoz/signoz/pkg/query-service/app/traces/v4"
"github.com/SigNoz/signoz/pkg/query-service/interfaces"
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
"github.com/SigNoz/signoz/pkg/query-service/utils/labels"
querytemplate "github.com/SigNoz/signoz/pkg/query-service/utils/queryTemplate"
"github.com/SigNoz/signoz/pkg/query-service/utils/times"
"github.com/SigNoz/signoz/pkg/query-service/utils/timestamp"
logsv3 "github.com/SigNoz/signoz/pkg/query-service/app/logs/v3"
tracesV4 "github.com/SigNoz/signoz/pkg/query-service/app/traces/v4"
"github.com/SigNoz/signoz/pkg/units"
querierV5 "github.com/SigNoz/signoz/pkg/querier"
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
)
@@ -277,7 +283,7 @@ func (r *ThresholdRule) prepareLinksToTraces(ctx context.Context, ts time.Time,
return contextlinks.PrepareLinksToTraces(start, end, filterItems)
}
func (r *ThresholdRule) prepareQueryRangeV5(ctx context.Context, ts time.Time) *qbtypes.QueryRangeRequest {
func (r *ThresholdRule) prepareQueryRangeV5(ctx context.Context, ts time.Time) (*qbtypes.QueryRangeRequest, error) {
r.logger.InfoContext(
ctx, "prepare query range request v5", "ts", ts.UnixMilli(), "eval_window", r.evalWindow.Milliseconds(), "eval_delay", r.evalDelay.Milliseconds(),
)
@@ -296,13 +302,16 @@ func (r *ThresholdRule) prepareQueryRangeV5(ctx context.Context, ts time.Time) *
}
req.CompositeQuery.Queries = make([]qbtypes.QueryEnvelope, len(r.Condition().CompositeQuery.Queries))
copy(req.CompositeQuery.Queries, r.Condition().CompositeQuery.Queries)
return req
return req, nil
}
func (r *ThresholdRule) prepareLinksToLogsV5(ctx context.Context, ts time.Time, lbls labels.Labels) string {
selectedQuery := r.GetSelectedQuery()
qr := r.prepareQueryRangeV5(ctx, ts)
qr, err := r.prepareQueryRangeV5(ctx, ts)
if err != nil {
return ""
}
start := time.UnixMilli(int64(qr.Start))
end := time.UnixMilli(int64(qr.End))
@@ -339,7 +348,10 @@ func (r *ThresholdRule) prepareLinksToLogsV5(ctx context.Context, ts time.Time,
func (r *ThresholdRule) prepareLinksToTracesV5(ctx context.Context, ts time.Time, lbls labels.Labels) string {
selectedQuery := r.GetSelectedQuery()
qr := r.prepareQueryRangeV5(ctx, ts)
qr, err := r.prepareQueryRangeV5(ctx, ts)
if err != nil {
return ""
}
start := time.UnixMilli(int64(qr.Start))
end := time.UnixMilli(int64(qr.End))
@@ -488,7 +500,10 @@ func (r *ThresholdRule) buildAndRunQuery(ctx context.Context, orgID valuer.UUID,
}
func (r *ThresholdRule) buildAndRunQueryV5(ctx context.Context, orgID valuer.UUID, ts time.Time) (ruletypes.Vector, error) {
params := r.prepareQueryRangeV5(ctx, ts)
params, err := r.prepareQueryRangeV5(ctx, ts)
if err != nil {
return nil, err
}
var results []*v3.Result
@@ -565,8 +580,13 @@ func (r *ThresholdRule) buildAndRunQueryV5(ctx context.Context, orgID valuer.UUI
}
func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (int, error) {
prevState := r.State()
valueFormatter := units.FormatterFromUnit(r.Unit())
var res ruletypes.Vector
var err error
if r.version == "v5" {
r.logger.InfoContext(ctx, "running v5 query")
res, err = r.buildAndRunQueryV5(ctx, r.orgID, ts)
@@ -574,35 +594,247 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (int, error) {
r.logger.InfoContext(ctx, "running v4 query")
res, err = r.buildAndRunQuery(ctx, r.orgID, ts)
}
if err != nil {
return 0, err
}
opts := EvalVectorOptions{
DeleteLabels: []string{labels.MetricNameLabel, labels.TemporalityLabel},
ExtraAnnotations: func(ctx context.Context, ts time.Time, smpl labels.Labels) []labels.Label {
host := r.hostFromSource()
if host == "" {
return nil
}
r.mtx.Lock()
defer r.mtx.Unlock()
//// Links with timestamps should go in annotations since labels
//// is used alert grouping, and we want to group alerts with the same
//// label set, but different timestamps, together.
switch r.typ { // DIFF
case ruletypes.AlertTypeTraces:
if link := r.prepareLinksToTraces(ctx, ts, smpl); link != "" {
r.logger.InfoContext(ctx, "adding traces link to annotations", "link", fmt.Sprintf("%s/traces-explorer?%s", host, link))
return []labels.Label{{Name: "related_traces", Value: fmt.Sprintf("%s/traces-explorer?%s", host, link)}}
}
case ruletypes.AlertTypeLogs:
if link := r.prepareLinksToLogs(ctx, ts, smpl); link != "" {
r.logger.InfoContext(ctx, "adding logs link to annotations", "link", fmt.Sprintf("%s/logs/logs-explorer?%s", host, link))
return []labels.Label{{Name: "related_logs", Value: fmt.Sprintf("%s/logs/logs-explorer?%s", host, link)}}
}
}
return nil
},
resultFPs := map[uint64]struct{}{}
alerts := make(map[uint64]*ruletypes.Alert, len(res))
ruleReceivers := r.Threshold.GetRuleReceivers()
ruleReceiverMap := make(map[string][]string)
for _, value := range ruleReceivers {
ruleReceiverMap[value.Name] = value.Channels
}
return r.EvalVector(ctx, ts, res, opts)
for _, smpl := range res {
l := make(map[string]string, len(smpl.Metric))
for _, lbl := range smpl.Metric {
l[lbl.Name] = lbl.Value
}
value := valueFormatter.Format(smpl.V, r.Unit())
// todo(aniket): handle different threshold
threshold := valueFormatter.Format(smpl.Target, smpl.TargetUnit)
r.logger.DebugContext(ctx, "Alert template data for rule", "rule_name", r.Name(), "formatter", valueFormatter.Name(), "value", value, "threshold", threshold)
tmplData := ruletypes.AlertTemplateData(l, value, threshold)
// Inject some convenience variables that are easier to remember for users
// who are not used to Go's templating system.
defs := "{{$labels := .Labels}}{{$value := .Value}}{{$threshold := .Threshold}}"
// utility function to apply go template on labels and annotations
expand := func(text string) string {
tmpl := ruletypes.NewTemplateExpander(
ctx,
defs+text,
"__alert_"+r.Name(),
tmplData,
times.Time(timestamp.FromTime(ts)),
nil,
)
result, err := tmpl.Expand()
if err != nil {
result = fmt.Sprintf("<error expanding template: %s>", err)
r.logger.ErrorContext(ctx, "Expanding alert template failed", errors.Attr(err), "data", tmplData)
}
return result
}
lb := labels.NewBuilder(smpl.Metric).Del(labels.MetricNameLabel).Del(labels.TemporalityLabel)
resultLabels := labels.NewBuilder(smpl.Metric).Del(labels.MetricNameLabel).Del(labels.TemporalityLabel).Labels()
for name, value := range r.labels.Map() {
lb.Set(name, expand(value))
}
lb.Set(labels.AlertNameLabel, r.Name())
lb.Set(labels.AlertRuleIdLabel, r.ID())
lb.Set(labels.RuleSourceLabel, r.GeneratorURL())
annotations := make(labels.Labels, 0, len(r.annotations.Map()))
for name, value := range r.annotations.Map() {
annotations = append(annotations, labels.Label{Name: name, Value: expand(value)})
}
if smpl.IsMissing {
lb.Set(labels.AlertNameLabel, "[No data] "+r.Name())
lb.Set(labels.NoDataLabel, "true")
}
// Links with timestamps should go in annotations since labels
// is used alert grouping, and we want to group alerts with the same
// label set, but different timestamps, together.
switch r.typ {
case ruletypes.AlertTypeTraces:
link := r.prepareLinksToTraces(ctx, ts, smpl.Metric)
if link != "" && r.hostFromSource() != "" {
r.logger.InfoContext(ctx, "adding traces link to annotations", "link", fmt.Sprintf("%s/traces-explorer?%s", r.hostFromSource(), link))
annotations = append(annotations, labels.Label{Name: "related_traces", Value: fmt.Sprintf("%s/traces-explorer?%s", r.hostFromSource(), link)})
}
case ruletypes.AlertTypeLogs:
link := r.prepareLinksToLogs(ctx, ts, smpl.Metric)
if link != "" && r.hostFromSource() != "" {
r.logger.InfoContext(ctx, "adding logs link to annotations", "link", fmt.Sprintf("%s/logs/logs-explorer?%s", r.hostFromSource(), link))
annotations = append(annotations, labels.Label{Name: "related_logs", Value: fmt.Sprintf("%s/logs/logs-explorer?%s", r.hostFromSource(), link)})
}
}
lbs := lb.Labels()
h := lbs.Hash()
resultFPs[h] = struct{}{}
if _, ok := alerts[h]; ok {
return 0, fmt.Errorf("duplicate alert found, vector contains metrics with the same labelset after applying alert labels")
}
alerts[h] = &ruletypes.Alert{
Labels: lbs,
QueryResultLables: resultLabels,
Annotations: annotations,
ActiveAt: ts,
State: model.StatePending,
Value: smpl.V,
GeneratorURL: r.GeneratorURL(),
Receivers: ruleReceiverMap[lbs.Map()[ruletypes.LabelThresholdName]],
Missing: smpl.IsMissing,
IsRecovering: smpl.IsRecovering,
}
}
r.logger.InfoContext(ctx, "number of alerts found", "rule_name", r.Name(), "alerts_count", len(alerts))
// alerts[h] is ready, add or update active list now
for h, a := range alerts {
// Check whether we already have alerting state for the identifying label set.
// Update the last value and annotations if so, create a new alert entry otherwise.
if alert, ok := r.Active[h]; ok && alert.State != model.StateInactive {
alert.Value = a.Value
alert.Annotations = a.Annotations
// Update the recovering and missing state of existing alert
alert.IsRecovering = a.IsRecovering
alert.Missing = a.Missing
if v, ok := alert.Labels.Map()[ruletypes.LabelThresholdName]; ok {
alert.Receivers = ruleReceiverMap[v]
}
continue
}
r.Active[h] = a
}
itemsToAdd := []model.RuleStateHistory{}
// Check if any pending alerts should be removed or fire now. Write out alert timeseries.
for fp, a := range r.Active {
labelsJSON, err := json.Marshal(a.QueryResultLables)
if err != nil {
r.logger.ErrorContext(ctx, "error marshaling labels", errors.Attr(err), "labels", a.Labels)
}
if _, ok := resultFPs[fp]; !ok {
// If the alert was previously firing, keep it around for a given
// retention time so it is reported as resolved to the AlertManager.
if a.State == model.StatePending || (!a.ResolvedAt.IsZero() && ts.Sub(a.ResolvedAt) > ruletypes.ResolvedRetention) {
delete(r.Active, fp)
}
if a.State != model.StateInactive {
r.logger.DebugContext(ctx, "converting firing alert to inActive", "name", r.Name())
a.State = model.StateInactive
a.ResolvedAt = ts
itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
RuleID: r.ID(),
RuleName: r.Name(),
State: model.StateInactive,
StateChanged: true,
UnixMilli: ts.UnixMilli(),
Labels: model.LabelsString(labelsJSON),
Fingerprint: a.QueryResultLables.Hash(),
Value: a.Value,
})
}
continue
}
if a.State == model.StatePending && ts.Sub(a.ActiveAt) >= r.holdDuration.Duration() {
r.logger.DebugContext(ctx, "converting pending alert to firing", "name", r.Name())
a.State = model.StateFiring
a.FiredAt = ts
state := model.StateFiring
if a.Missing {
state = model.StateNoData
}
itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
RuleID: r.ID(),
RuleName: r.Name(),
State: state,
StateChanged: true,
UnixMilli: ts.UnixMilli(),
Labels: model.LabelsString(labelsJSON),
Fingerprint: a.QueryResultLables.Hash(),
Value: a.Value,
})
}
// We need to change firing alert to recovering if the returned sample meets recovery threshold
changeAlertingToRecovering := a.State == model.StateFiring && a.IsRecovering
// We need to change recovering alerts to firing if the returned sample meets target threshold
changeRecoveringToFiring := a.State == model.StateRecovering && !a.IsRecovering && !a.Missing
// in any of the above case we need to update the status of alert
if changeAlertingToRecovering || changeRecoveringToFiring {
state := model.StateRecovering
if changeRecoveringToFiring {
state = model.StateFiring
}
a.State = state
r.logger.DebugContext(ctx, "converting alert state", "name", r.Name(), "state", state)
itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
RuleID: r.ID(),
RuleName: r.Name(),
State: state,
StateChanged: true,
UnixMilli: ts.UnixMilli(),
Labels: model.LabelsString(labelsJSON),
Fingerprint: a.QueryResultLables.Hash(),
Value: a.Value,
})
}
}
currentState := r.State()
overallStateChanged := currentState != prevState
for idx, item := range itemsToAdd {
item.OverallStateChanged = overallStateChanged
item.OverallState = currentState
itemsToAdd[idx] = item
}
r.RecordRuleStateHistory(ctx, prevState, currentState, itemsToAdd)
r.health = ruletypes.HealthGood
r.lastError = err
return len(r.Active), nil
}
func (r *ThresholdRule) String() string {
ar := ruletypes.PostableRule{
AlertName: r.name,
RuleCondition: r.ruleCondition,
EvalWindow: r.evalWindow,
Labels: r.labels.Map(),
Annotations: r.annotations.Map(),
PreferredChannels: r.preferredChannels,
}
byt, err := json.Marshal(ar)
if err != nil {
return fmt.Sprintf("error marshaling alerting rule: %s", err.Error())
}
return string(byt)
}

View File

@@ -102,6 +102,10 @@ func (fn FunctionName) Validate() error {
// ApplyFunction applies the given function to the result data
func ApplyFunction(fn Function, result *TimeSeries) *TimeSeries {
if len(result.Values) == 0 {
return result
}
// Extract the function name and arguments
name := fn.Name
args := fn.Args

View File

@@ -599,6 +599,14 @@ func TestApplyFunction(t *testing.T) {
values []float64
want []float64
}{
{
name: "test with empty series",
function: Function{
Name: FunctionNameRunningDiff,
},
values: []float64{},
want: []float64{},
},
{
name: "cutOffMin function",
function: Function{