mirror of
https://github.com/SigNoz/signoz.git
synced 2026-03-27 14:40:25 +00:00
Compare commits
8 Commits
chore/refa
...
fix/alert-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
acf20e3208 | ||
|
|
58dabf9a6c | ||
|
|
91edc2a895 | ||
|
|
2e70857554 | ||
|
|
f3e6892d5b | ||
|
|
23a4960e74 | ||
|
|
5d0c55d682 | ||
|
|
15704e0433 |
@@ -7,21 +7,29 @@ import (
|
||||
"log/slog"
|
||||
"math"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/SigNoz/signoz/ee/query-service/anomaly"
|
||||
"github.com/SigNoz/signoz/pkg/cache"
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/common"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/model"
|
||||
"github.com/SigNoz/signoz/pkg/transition"
|
||||
"github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
|
||||
querierV2 "github.com/SigNoz/signoz/pkg/query-service/app/querier/v2"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/app/queryBuilder"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/interfaces"
|
||||
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
|
||||
baserules "github.com/SigNoz/signoz/pkg/query-service/rules"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/labels"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/times"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/timestamp"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/units"
|
||||
|
||||
baserules "github.com/SigNoz/signoz/pkg/query-service/rules"
|
||||
|
||||
querierV5 "github.com/SigNoz/signoz/pkg/querier"
|
||||
|
||||
@@ -37,6 +45,16 @@ const (
|
||||
type AnomalyRule struct {
|
||||
*baserules.BaseRule
|
||||
|
||||
mtx sync.Mutex
|
||||
|
||||
reader interfaces.Reader
|
||||
|
||||
// querierV2 is used for alerts created after the introduction of new metrics query builder
|
||||
querierV2 interfaces.Querier
|
||||
|
||||
// querierV5 is used for alerts migrated after the introduction of new query builder
|
||||
querierV5 querierV5.Querier
|
||||
|
||||
provider anomaly.Provider
|
||||
providerV2 anomalyV2.Provider
|
||||
|
||||
@@ -85,6 +103,14 @@ func NewAnomalyRule(
|
||||
|
||||
logger.Info("using seasonality", "seasonality", t.seasonality.String())
|
||||
|
||||
querierOptsV2 := querierV2.QuerierOptions{
|
||||
Reader: reader,
|
||||
Cache: cache,
|
||||
KeyGenerator: queryBuilder.NewKeyGenerator(),
|
||||
}
|
||||
|
||||
t.querierV2 = querierV2.NewQuerier(querierOptsV2)
|
||||
t.reader = reader
|
||||
if t.seasonality == anomaly.SeasonalityHourly {
|
||||
t.provider = anomaly.NewHourlyProvider(
|
||||
anomaly.WithCache[*anomaly.HourlyProvider](cache),
|
||||
@@ -122,6 +148,7 @@ func NewAnomalyRule(
|
||||
)
|
||||
}
|
||||
|
||||
t.querierV5 = querierV5
|
||||
t.version = p.Version
|
||||
t.logger = logger
|
||||
return &t, nil
|
||||
@@ -306,8 +333,14 @@ func (r *AnomalyRule) buildAndRunQueryV5(ctx context.Context, orgID valuer.UUID,
|
||||
}
|
||||
|
||||
func (r *AnomalyRule) Eval(ctx context.Context, ts time.Time) (int, error) {
|
||||
|
||||
prevState := r.State()
|
||||
|
||||
valueFormatter := units.FormatterFromUnit(r.Unit())
|
||||
|
||||
var res ruletypes.Vector
|
||||
var err error
|
||||
|
||||
if r.version == "v5" {
|
||||
r.logger.InfoContext(ctx, "running v5 query")
|
||||
res, err = r.buildAndRunQueryV5(ctx, r.OrgID(), ts)
|
||||
@@ -319,8 +352,220 @@ func (r *AnomalyRule) Eval(ctx context.Context, ts time.Time) (int, error) {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
opts := baserules.EvalVectorOptions{
|
||||
DeleteLabels: []string{labels.MetricNameLabel, labels.TemporalityLabel},
|
||||
r.mtx.Lock()
|
||||
defer r.mtx.Unlock()
|
||||
|
||||
resultFPs := map[uint64]struct{}{}
|
||||
var alerts = make(map[uint64]*ruletypes.Alert, len(res))
|
||||
|
||||
ruleReceivers := r.Threshold.GetRuleReceivers()
|
||||
ruleReceiverMap := make(map[string][]string)
|
||||
for _, value := range ruleReceivers {
|
||||
ruleReceiverMap[value.Name] = value.Channels
|
||||
}
|
||||
return r.EvalVector(ctx, ts, res, opts)
|
||||
|
||||
for _, smpl := range res {
|
||||
l := make(map[string]string, len(smpl.Metric))
|
||||
for _, lbl := range smpl.Metric {
|
||||
l[lbl.Name] = lbl.Value
|
||||
}
|
||||
value := valueFormatter.Format(smpl.V, r.Unit())
|
||||
threshold := valueFormatter.Format(smpl.Target, smpl.TargetUnit)
|
||||
r.logger.DebugContext(ctx, "Alert template data for rule", "rule_name", r.Name(), "formatter", valueFormatter.Name(), "value", value, "threshold", threshold)
|
||||
|
||||
tmplData := ruletypes.AlertTemplateData(l, value, threshold)
|
||||
// Inject some convenience variables that are easier to remember for users
|
||||
// who are not used to Go's templating system.
|
||||
defs := "{{$labels := .Labels}}{{$value := .Value}}{{$threshold := .Threshold}}"
|
||||
|
||||
// utility function to apply go template on labels and annotations
|
||||
expand := func(text string) string {
|
||||
|
||||
tmpl := ruletypes.NewTemplateExpander(
|
||||
ctx,
|
||||
defs+text,
|
||||
"__alert_"+r.Name(),
|
||||
tmplData,
|
||||
times.Time(timestamp.FromTime(ts)),
|
||||
nil,
|
||||
)
|
||||
result, err := tmpl.Expand()
|
||||
if err != nil {
|
||||
result = fmt.Sprintf("<error expanding template: %s>", err)
|
||||
r.logger.ErrorContext(ctx, "Expanding alert template failed", errors.Attr(err), "data", tmplData, "rule_name", r.Name())
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
lb := labels.NewBuilder(smpl.Metric).Del(labels.MetricNameLabel).Del(labels.TemporalityLabel)
|
||||
resultLabels := labels.NewBuilder(smpl.Metric).Del(labels.MetricNameLabel).Del(labels.TemporalityLabel).Labels()
|
||||
|
||||
for name, value := range r.Labels().Map() {
|
||||
lb.Set(name, expand(value))
|
||||
}
|
||||
|
||||
lb.Set(labels.AlertNameLabel, r.Name())
|
||||
lb.Set(labels.AlertRuleIdLabel, r.ID())
|
||||
lb.Set(labels.RuleSourceLabel, r.GeneratorURL())
|
||||
|
||||
annotations := make(labels.Labels, 0, len(r.Annotations().Map()))
|
||||
for name, value := range r.Annotations().Map() {
|
||||
annotations = append(annotations, labels.Label{Name: name, Value: expand(value)})
|
||||
}
|
||||
if smpl.IsMissing {
|
||||
lb.Set(labels.AlertNameLabel, "[No data] "+r.Name())
|
||||
lb.Set(labels.NoDataLabel, "true")
|
||||
}
|
||||
|
||||
lbs := lb.Labels()
|
||||
h := lbs.Hash()
|
||||
resultFPs[h] = struct{}{}
|
||||
|
||||
if _, ok := alerts[h]; ok {
|
||||
r.logger.ErrorContext(ctx, "the alert query returns duplicate records", "rule_id", r.ID(), "alert", alerts[h])
|
||||
err = fmt.Errorf("duplicate alert found, vector contains metrics with the same labelset after applying alert labels")
|
||||
return 0, err
|
||||
}
|
||||
|
||||
alerts[h] = &ruletypes.Alert{
|
||||
Labels: lbs,
|
||||
QueryResultLables: resultLabels,
|
||||
Annotations: annotations,
|
||||
ActiveAt: ts,
|
||||
State: model.StatePending,
|
||||
Value: smpl.V,
|
||||
GeneratorURL: r.GeneratorURL(),
|
||||
Receivers: ruleReceiverMap[lbs.Map()[ruletypes.LabelThresholdName]],
|
||||
Missing: smpl.IsMissing,
|
||||
IsRecovering: smpl.IsRecovering,
|
||||
}
|
||||
}
|
||||
|
||||
r.logger.InfoContext(ctx, "number of alerts found", "rule_name", r.Name(), "alerts_count", len(alerts))
|
||||
// alerts[h] is ready, add or update active list now
|
||||
for h, a := range alerts {
|
||||
// Check whether we already have alerting state for the identifying label set.
|
||||
// Update the last value and annotations if so, create a new alert entry otherwise.
|
||||
if alert, ok := r.Active[h]; ok && alert.State != model.StateInactive {
|
||||
|
||||
alert.Value = a.Value
|
||||
alert.Annotations = a.Annotations
|
||||
// Update the recovering and missing state of existing alert
|
||||
alert.IsRecovering = a.IsRecovering
|
||||
alert.Missing = a.Missing
|
||||
if v, ok := alert.Labels.Map()[ruletypes.LabelThresholdName]; ok {
|
||||
alert.Receivers = ruleReceiverMap[v]
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
r.Active[h] = a
|
||||
}
|
||||
|
||||
itemsToAdd := []model.RuleStateHistory{}
|
||||
|
||||
// Check if any pending alerts should be removed or fire now. Write out alert timeseries.
|
||||
for fp, a := range r.Active {
|
||||
labelsJSON, err := json.Marshal(a.QueryResultLables)
|
||||
if err != nil {
|
||||
r.logger.ErrorContext(ctx, "error marshaling labels", errors.Attr(err), "labels", a.Labels)
|
||||
}
|
||||
if _, ok := resultFPs[fp]; !ok {
|
||||
// If the alert was previously firing, keep it around for a given
|
||||
// retention time so it is reported as resolved to the AlertManager.
|
||||
if a.State == model.StatePending || (!a.ResolvedAt.IsZero() && ts.Sub(a.ResolvedAt) > ruletypes.ResolvedRetention) {
|
||||
delete(r.Active, fp)
|
||||
}
|
||||
if a.State != model.StateInactive {
|
||||
a.State = model.StateInactive
|
||||
a.ResolvedAt = ts
|
||||
itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
|
||||
RuleID: r.ID(),
|
||||
RuleName: r.Name(),
|
||||
State: model.StateInactive,
|
||||
StateChanged: true,
|
||||
UnixMilli: ts.UnixMilli(),
|
||||
Labels: model.LabelsString(labelsJSON),
|
||||
Fingerprint: a.QueryResultLables.Hash(),
|
||||
Value: a.Value,
|
||||
})
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if a.State == model.StatePending && ts.Sub(a.ActiveAt) >= r.HoldDuration().Duration() {
|
||||
a.State = model.StateFiring
|
||||
a.FiredAt = ts
|
||||
state := model.StateFiring
|
||||
if a.Missing {
|
||||
state = model.StateNoData
|
||||
}
|
||||
itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
|
||||
RuleID: r.ID(),
|
||||
RuleName: r.Name(),
|
||||
State: state,
|
||||
StateChanged: true,
|
||||
UnixMilli: ts.UnixMilli(),
|
||||
Labels: model.LabelsString(labelsJSON),
|
||||
Fingerprint: a.QueryResultLables.Hash(),
|
||||
Value: a.Value,
|
||||
})
|
||||
}
|
||||
|
||||
// We need to change firing alert to recovering if the returned sample meets recovery threshold
|
||||
changeFiringToRecovering := a.State == model.StateFiring && a.IsRecovering
|
||||
// We need to change recovering alerts to firing if the returned sample meets target threshold
|
||||
changeRecoveringToFiring := a.State == model.StateRecovering && !a.IsRecovering && !a.Missing
|
||||
// in any of the above case we need to update the status of alert
|
||||
if changeFiringToRecovering || changeRecoveringToFiring {
|
||||
state := model.StateRecovering
|
||||
if changeRecoveringToFiring {
|
||||
state = model.StateFiring
|
||||
}
|
||||
a.State = state
|
||||
r.logger.DebugContext(ctx, "converting alert state", "name", r.Name(), "state", state)
|
||||
itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
|
||||
RuleID: r.ID(),
|
||||
RuleName: r.Name(),
|
||||
State: state,
|
||||
StateChanged: true,
|
||||
UnixMilli: ts.UnixMilli(),
|
||||
Labels: model.LabelsString(labelsJSON),
|
||||
Fingerprint: a.QueryResultLables.Hash(),
|
||||
Value: a.Value,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
currentState := r.State()
|
||||
|
||||
overallStateChanged := currentState != prevState
|
||||
for idx, item := range itemsToAdd {
|
||||
item.OverallStateChanged = overallStateChanged
|
||||
item.OverallState = currentState
|
||||
itemsToAdd[idx] = item
|
||||
}
|
||||
|
||||
r.RecordRuleStateHistory(ctx, prevState, currentState, itemsToAdd)
|
||||
|
||||
return len(r.Active), nil
|
||||
}
|
||||
|
||||
func (r *AnomalyRule) String() string {
|
||||
|
||||
ar := ruletypes.PostableRule{
|
||||
AlertName: r.Name(),
|
||||
RuleCondition: r.Condition(),
|
||||
EvalWindow: r.EvalWindow(),
|
||||
Labels: r.Labels().Map(),
|
||||
Annotations: r.Annotations().Map(),
|
||||
PreferredChannels: r.PreferredChannels(),
|
||||
}
|
||||
|
||||
byt, err := json.Marshal(ar)
|
||||
if err != nil {
|
||||
return fmt.Sprintf("error marshaling alerting rule: %s", err.Error())
|
||||
}
|
||||
|
||||
return string(byt)
|
||||
}
|
||||
|
||||
@@ -202,19 +202,8 @@ function InviteMembersModal({
|
||||
onComplete?.();
|
||||
} catch (err) {
|
||||
const apiErr = err as APIError;
|
||||
if (apiErr?.getHttpStatusCode() === 409) {
|
||||
toast.error(
|
||||
touchedRows.length === 1
|
||||
? `${touchedRows[0].email} is already a member`
|
||||
: 'Invite for one or more users already exists',
|
||||
{ richColors: true },
|
||||
);
|
||||
} else {
|
||||
const errorMessage = apiErr?.getErrorMessage?.() ?? 'An error occurred';
|
||||
toast.error(`Failed to send invites: ${errorMessage}`, {
|
||||
richColors: true,
|
||||
});
|
||||
}
|
||||
const errorMessage = apiErr?.getErrorMessage?.() ?? 'An error occurred';
|
||||
toast.error(errorMessage, { richColors: true });
|
||||
} finally {
|
||||
setIsSubmitting(false);
|
||||
}
|
||||
|
||||
@@ -1,9 +1,18 @@
|
||||
import { toast } from '@signozhq/sonner';
|
||||
import inviteUsers from 'api/v1/invite/bulk/create';
|
||||
import sendInvite from 'api/v1/invite/create';
|
||||
import { StatusCodes } from 'http-status-codes';
|
||||
import { render, screen, userEvent, waitFor } from 'tests/test-utils';
|
||||
import APIError from 'types/api/error';
|
||||
|
||||
import InviteMembersModal from '../InviteMembersModal';
|
||||
|
||||
const makeApiError = (message: string, code = StatusCodes.CONFLICT): APIError =>
|
||||
new APIError({
|
||||
httpStatusCode: code,
|
||||
error: { code: 'already_exists', message, url: '', errors: [] },
|
||||
});
|
||||
|
||||
jest.mock('api/v1/invite/create');
|
||||
jest.mock('api/v1/invite/bulk/create');
|
||||
jest.mock('@signozhq/sonner', () => ({
|
||||
@@ -142,6 +151,90 @@ describe('InviteMembersModal', () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe('error handling', () => {
|
||||
it('shows BE message on single invite 409', async () => {
|
||||
const user = userEvent.setup({ pointerEventsCheck: 0 });
|
||||
mockSendInvite.mockRejectedValue(
|
||||
makeApiError('An invite already exists for this email: single@signoz.io'),
|
||||
);
|
||||
|
||||
render(<InviteMembersModal {...defaultProps} />);
|
||||
|
||||
await user.type(
|
||||
screen.getAllByPlaceholderText('john@signoz.io')[0],
|
||||
'single@signoz.io',
|
||||
);
|
||||
await user.click(screen.getAllByText('Select roles')[0]);
|
||||
await user.click(await screen.findByText('Viewer'));
|
||||
await user.click(
|
||||
screen.getByRole('button', { name: /invite team members/i }),
|
||||
);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(toast.error).toHaveBeenCalledWith(
|
||||
'An invite already exists for this email: single@signoz.io',
|
||||
expect.anything(),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
it('shows BE message on bulk invite 409', async () => {
|
||||
const user = userEvent.setup({ pointerEventsCheck: 0 });
|
||||
mockInviteUsers.mockRejectedValue(
|
||||
makeApiError('An invite already exists for this email: alice@signoz.io'),
|
||||
);
|
||||
|
||||
render(<InviteMembersModal {...defaultProps} />);
|
||||
|
||||
const emailInputs = screen.getAllByPlaceholderText('john@signoz.io');
|
||||
await user.type(emailInputs[0], 'alice@signoz.io');
|
||||
await user.click(screen.getAllByText('Select roles')[0]);
|
||||
await user.click(await screen.findByText('Viewer'));
|
||||
|
||||
await user.type(emailInputs[1], 'bob@signoz.io');
|
||||
await user.click(screen.getAllByText('Select roles')[0]);
|
||||
const editorOptions = await screen.findAllByText('Editor');
|
||||
await user.click(editorOptions[editorOptions.length - 1]);
|
||||
|
||||
await user.click(
|
||||
screen.getByRole('button', { name: /invite team members/i }),
|
||||
);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(toast.error).toHaveBeenCalledWith(
|
||||
'An invite already exists for this email: alice@signoz.io',
|
||||
expect.anything(),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
it('shows BE message on generic error', async () => {
|
||||
const user = userEvent.setup({ pointerEventsCheck: 0 });
|
||||
mockSendInvite.mockRejectedValue(
|
||||
makeApiError('Internal server error', StatusCodes.INTERNAL_SERVER_ERROR),
|
||||
);
|
||||
|
||||
render(<InviteMembersModal {...defaultProps} />);
|
||||
|
||||
await user.type(
|
||||
screen.getAllByPlaceholderText('john@signoz.io')[0],
|
||||
'single@signoz.io',
|
||||
);
|
||||
await user.click(screen.getAllByText('Select roles')[0]);
|
||||
await user.click(await screen.findByText('Viewer'));
|
||||
await user.click(
|
||||
screen.getByRole('button', { name: /invite team members/i }),
|
||||
);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(toast.error).toHaveBeenCalledWith(
|
||||
'Internal server error',
|
||||
expect.anything(),
|
||||
);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
it('uses inviteUsers (bulk) when multiple rows are filled', async () => {
|
||||
const user = userEvent.setup({ pointerEventsCheck: 0 });
|
||||
const onComplete = jest.fn();
|
||||
|
||||
@@ -16,9 +16,9 @@ function AverageResolutionCard({
|
||||
}: TotalTriggeredCardProps): JSX.Element {
|
||||
return (
|
||||
<StatsCard
|
||||
displayValue={formatTime(currentAvgResolutionTime)}
|
||||
totalCurrentCount={currentAvgResolutionTime}
|
||||
totalPastCount={pastAvgResolutionTime}
|
||||
displayValue={formatTime(+currentAvgResolutionTime)}
|
||||
totalCurrentCount={+currentAvgResolutionTime}
|
||||
totalPastCount={+pastAvgResolutionTime}
|
||||
title="Avg. Resolution Time"
|
||||
timeSeries={timeSeries}
|
||||
/>
|
||||
|
||||
@@ -464,14 +464,10 @@ function GeneralSettings({
|
||||
onModalToggleHandler(type);
|
||||
};
|
||||
|
||||
const {
|
||||
isCloudUser: isCloudUserVal,
|
||||
isEnterpriseSelfHostedUser,
|
||||
} = useGetTenantLicense();
|
||||
const { isCloudUser: isCloudUserVal } = useGetTenantLicense();
|
||||
|
||||
const isAdmin = user.role === USER_ROLES.ADMIN;
|
||||
const showCustomDomainSettings =
|
||||
(isCloudUserVal || isEnterpriseSelfHostedUser) && isAdmin;
|
||||
const showCustomDomainSettings = isCloudUserVal && isAdmin;
|
||||
|
||||
const renderConfig = [
|
||||
{
|
||||
|
||||
@@ -38,7 +38,6 @@ jest.mock('hooks/useComponentPermission', () => ({
|
||||
jest.mock('hooks/useGetTenantLicense', () => ({
|
||||
useGetTenantLicense: jest.fn(() => ({
|
||||
isCloudUser: false,
|
||||
isEnterpriseSelfHostedUser: false,
|
||||
})),
|
||||
}));
|
||||
|
||||
@@ -389,7 +388,6 @@ describe('GeneralSettings - S3 Logs Retention', () => {
|
||||
beforeEach(() => {
|
||||
(useGetTenantLicense as jest.Mock).mockReturnValue({
|
||||
isCloudUser: true,
|
||||
isEnterpriseSelfHostedUser: false,
|
||||
});
|
||||
});
|
||||
|
||||
@@ -411,15 +409,14 @@ describe('GeneralSettings - S3 Logs Retention', () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe('Enterprise Self-Hosted User Rendering', () => {
|
||||
describe('Non-cloud user rendering', () => {
|
||||
beforeEach(() => {
|
||||
(useGetTenantLicense as jest.Mock).mockReturnValue({
|
||||
isCloudUser: false,
|
||||
isEnterpriseSelfHostedUser: true,
|
||||
});
|
||||
});
|
||||
|
||||
it('should render CustomDomainSettings but not GeneralSettingsCloud', () => {
|
||||
it('should not render CustomDomainSettings or GeneralSettingsCloud', () => {
|
||||
render(
|
||||
<GeneralSettings
|
||||
metricsTtlValuesPayload={mockMetricsRetention}
|
||||
@@ -432,12 +429,14 @@ describe('GeneralSettings - S3 Logs Retention', () => {
|
||||
/>,
|
||||
);
|
||||
|
||||
expect(screen.getByTestId('custom-domain-settings')).toBeInTheDocument();
|
||||
expect(
|
||||
screen.queryByTestId('custom-domain-settings'),
|
||||
).not.toBeInTheDocument();
|
||||
expect(
|
||||
screen.queryByTestId('general-settings-cloud'),
|
||||
).not.toBeInTheDocument();
|
||||
|
||||
// Save buttons should be visible for self-hosted
|
||||
// Save buttons should be visible for non-cloud users (these are from retentions)
|
||||
const saveButtons = screen.getAllByRole('button', { name: /save/i });
|
||||
expect(saveButtons.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
@@ -217,7 +217,7 @@ function K8sVolumesList({
|
||||
{
|
||||
dataSource: currentQuery.builder.queryData[0].dataSource,
|
||||
aggregateAttribute: GetK8sEntityToAggregateAttribute(
|
||||
K8sCategory.NODES,
|
||||
K8sCategory.VOLUMES,
|
||||
dotMetricsEnabled,
|
||||
),
|
||||
aggregateOperator: 'noop',
|
||||
@@ -228,7 +228,7 @@ function K8sVolumesList({
|
||||
queryKey: [currentQuery.builder.queryData[0].dataSource, 'noop'],
|
||||
},
|
||||
true,
|
||||
K8sCategory.NODES,
|
||||
K8sCategory.VOLUMES,
|
||||
);
|
||||
|
||||
const query = useMemo(() => {
|
||||
@@ -597,7 +597,7 @@ function K8sVolumesList({
|
||||
isLoadingGroupByFilters={isLoadingGroupByFilters}
|
||||
handleGroupByChange={handleGroupByChange}
|
||||
selectedGroupBy={groupBy}
|
||||
entity={K8sCategory.NODES}
|
||||
entity={K8sCategory.VOLUMES}
|
||||
showAutoRefresh={!selectedVolumeData}
|
||||
/>
|
||||
{isError && <Typography>{data?.error || 'Something went wrong'}</Typography>}
|
||||
|
||||
@@ -0,0 +1,162 @@
|
||||
import setupCommonMocks from '../commonMocks';
|
||||
|
||||
setupCommonMocks();
|
||||
|
||||
import { QueryClient, QueryClientProvider } from 'react-query';
|
||||
// eslint-disable-next-line no-restricted-imports
|
||||
import { Provider } from 'react-redux';
|
||||
import { MemoryRouter } from 'react-router-dom';
|
||||
import { render, waitFor } from '@testing-library/react';
|
||||
import { FeatureKeys } from 'constants/features';
|
||||
import K8sVolumesList from 'container/InfraMonitoringK8s/Volumes/K8sVolumesList';
|
||||
import { rest, server } from 'mocks-server/server';
|
||||
import { IAppContext, IUser } from 'providers/App/types';
|
||||
import store from 'store';
|
||||
import { LicenseResModel } from 'types/api/licensesV3/getActive';
|
||||
|
||||
const queryClient = new QueryClient({
|
||||
defaultOptions: {
|
||||
queries: {
|
||||
retry: false,
|
||||
cacheTime: 0,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const SERVER_URL = 'http://localhost/api';
|
||||
|
||||
describe('K8sVolumesList - useGetAggregateKeys Category Regression', () => {
|
||||
let requestsMade: Array<{
|
||||
url: string;
|
||||
params: URLSearchParams;
|
||||
body?: any;
|
||||
}> = [];
|
||||
|
||||
beforeEach(() => {
|
||||
requestsMade = [];
|
||||
queryClient.clear();
|
||||
|
||||
server.use(
|
||||
rest.get(`${SERVER_URL}/v3/autocomplete/attribute_keys`, (req, res, ctx) => {
|
||||
const url = req.url.toString();
|
||||
const params = req.url.searchParams;
|
||||
|
||||
requestsMade.push({
|
||||
url,
|
||||
params,
|
||||
});
|
||||
|
||||
return res(
|
||||
ctx.status(200),
|
||||
ctx.json({
|
||||
status: 'success',
|
||||
data: {
|
||||
attributeKeys: [],
|
||||
},
|
||||
}),
|
||||
);
|
||||
}),
|
||||
rest.post(`${SERVER_URL}/v1/pvcs/list`, (req, res, ctx) =>
|
||||
res(
|
||||
ctx.status(200),
|
||||
ctx.json({
|
||||
status: 'success',
|
||||
data: {
|
||||
type: 'list',
|
||||
records: [],
|
||||
groups: null,
|
||||
total: 0,
|
||||
sentAnyHostMetricsData: false,
|
||||
isSendingK8SAgentMetrics: false,
|
||||
},
|
||||
}),
|
||||
),
|
||||
),
|
||||
);
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
server.resetHandlers();
|
||||
});
|
||||
|
||||
it('should call aggregate keys API with k8s_volume_capacity', async () => {
|
||||
render(
|
||||
<QueryClientProvider client={queryClient}>
|
||||
<Provider store={store}>
|
||||
<MemoryRouter>
|
||||
<K8sVolumesList
|
||||
isFiltersVisible={false}
|
||||
handleFilterVisibilityChange={jest.fn()}
|
||||
quickFiltersLastUpdated={-1}
|
||||
/>
|
||||
</MemoryRouter>
|
||||
</Provider>
|
||||
</QueryClientProvider>,
|
||||
);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(requestsMade.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
// Find the attribute_keys request
|
||||
const attributeKeysRequest = requestsMade.find((req) =>
|
||||
req.url.includes('/autocomplete/attribute_keys'),
|
||||
);
|
||||
|
||||
expect(attributeKeysRequest).toBeDefined();
|
||||
|
||||
const aggregateAttribute = attributeKeysRequest?.params.get(
|
||||
'aggregateAttribute',
|
||||
);
|
||||
|
||||
expect(aggregateAttribute).toBe('k8s_volume_capacity');
|
||||
});
|
||||
|
||||
it('should call aggregate keys API with k8s.volume.capacity when dotMetrics enabled', async () => {
|
||||
jest
|
||||
.spyOn(await import('providers/App/App'), 'useAppContext')
|
||||
.mockReturnValue({
|
||||
featureFlags: [
|
||||
{
|
||||
name: FeatureKeys.DOT_METRICS_ENABLED,
|
||||
active: true,
|
||||
usage: 0,
|
||||
usage_limit: 0,
|
||||
route: '',
|
||||
},
|
||||
],
|
||||
user: { role: 'ADMIN' } as IUser,
|
||||
activeLicense: (null as unknown) as LicenseResModel,
|
||||
} as IAppContext);
|
||||
|
||||
render(
|
||||
<QueryClientProvider client={queryClient}>
|
||||
<Provider store={store}>
|
||||
<MemoryRouter>
|
||||
<K8sVolumesList
|
||||
isFiltersVisible={false}
|
||||
handleFilterVisibilityChange={jest.fn()}
|
||||
quickFiltersLastUpdated={-1}
|
||||
/>
|
||||
</MemoryRouter>
|
||||
</Provider>
|
||||
</QueryClientProvider>,
|
||||
);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(requestsMade.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
const attributeKeysRequest = requestsMade.find((req) =>
|
||||
req.url.includes('/autocomplete/attribute_keys'),
|
||||
);
|
||||
|
||||
expect(attributeKeysRequest).toBeDefined();
|
||||
|
||||
const aggregateAttribute = attributeKeysRequest?.params.get(
|
||||
'aggregateAttribute',
|
||||
);
|
||||
|
||||
expect(aggregateAttribute).toBe('k8s.volume.capacity');
|
||||
});
|
||||
});
|
||||
@@ -20,7 +20,6 @@ import AlertHistory from 'container/AlertHistory';
|
||||
import { TIMELINE_TABLE_PAGE_SIZE } from 'container/AlertHistory/constants';
|
||||
import { AlertDetailsTab, TimelineFilter } from 'container/AlertHistory/types';
|
||||
import { urlKey } from 'container/AllError/utils';
|
||||
import { RelativeTimeMap } from 'container/TopNav/DateTimeSelectionV2/constants';
|
||||
import useAxiosError from 'hooks/useAxiosError';
|
||||
import { useNotifications } from 'hooks/useNotifications';
|
||||
import { useSafeNavigate } from 'hooks/useSafeNavigate';
|
||||
@@ -47,6 +46,8 @@ import { PayloadProps } from 'types/api/alerts/get';
|
||||
import { TagFilter } from 'types/api/queryBuilder/queryBuilderData';
|
||||
import { nanoToMilli } from 'utils/timeUtils';
|
||||
|
||||
const DEFAULT_TIME_RANGE = '30m';
|
||||
|
||||
export const useAlertHistoryQueryParams = (): {
|
||||
ruleId: string | null;
|
||||
startTime: number;
|
||||
@@ -61,8 +62,8 @@ export const useAlertHistoryQueryParams = (): {
|
||||
const relativeTimeParam = params.get(QueryParams.relativeTime);
|
||||
|
||||
const relativeTime =
|
||||
(relativeTimeParam === 'null' ? null : relativeTimeParam) ??
|
||||
RelativeTimeMap['6hr'];
|
||||
(relativeTimeParam === 'null' ? null : relativeTimeParam) ||
|
||||
DEFAULT_TIME_RANGE;
|
||||
|
||||
const intStartTime = parseInt(startTime || '0', 10);
|
||||
const intEndTime = parseInt(endTime || '0', 10);
|
||||
|
||||
@@ -56,8 +56,8 @@ export interface AlertRuleStats {
|
||||
totalPastTriggers: number;
|
||||
currentTriggersSeries: CurrentTriggersSeries;
|
||||
pastTriggersSeries: CurrentTriggersSeries | null;
|
||||
currentAvgResolutionTime: number;
|
||||
pastAvgResolutionTime: number;
|
||||
currentAvgResolutionTime: string;
|
||||
pastAvgResolutionTime: string;
|
||||
currentAvgResolutionTimeSeries: CurrentTriggersSeries;
|
||||
pastAvgResolutionTimeSeries: any | null;
|
||||
}
|
||||
|
||||
@@ -112,6 +112,12 @@ export function formatEpochTimestamp(epoch: number): string {
|
||||
*/
|
||||
|
||||
export function formatTime(seconds: number): string {
|
||||
seconds = +seconds;
|
||||
|
||||
if (Number.isNaN(seconds)) {
|
||||
return '-';
|
||||
}
|
||||
|
||||
const days = seconds / 86400;
|
||||
|
||||
if (days >= 1) {
|
||||
|
||||
@@ -1090,7 +1090,21 @@ func (r *ClickHouseReader) GetWaterfallSpansForTraceWithMetadata(ctx context.Con
|
||||
}
|
||||
|
||||
processingPostCache := time.Now()
|
||||
selectedSpans, uncollapsedSpans, rootServiceName, rootServiceEntryPoint := tracedetail.GetSelectedSpans(req.UncollapsedSpans, req.SelectedSpanID, traceRoots, spanIdToSpanNodeMap, req.IsSelectedSpanIDUnCollapsed)
|
||||
// When req.Limit is 0 (not set by the client), selectAllSpans is set to false
|
||||
// preserving the old paged behaviour for backward compatibility
|
||||
limit := min(req.Limit, tracedetail.MaxLimitToSelectAllSpans)
|
||||
selectAllSpans := totalSpans <= uint64(limit)
|
||||
|
||||
var (
|
||||
selectedSpans []*model.Span
|
||||
uncollapsedSpans []string
|
||||
rootServiceName, rootServiceEntryPoint string
|
||||
)
|
||||
if selectAllSpans {
|
||||
selectedSpans, rootServiceName, rootServiceEntryPoint = tracedetail.GetAllSpans(traceRoots)
|
||||
} else {
|
||||
selectedSpans, uncollapsedSpans, rootServiceName, rootServiceEntryPoint = tracedetail.GetSelectedSpans(req.UncollapsedSpans, req.SelectedSpanID, traceRoots, spanIdToSpanNodeMap, req.IsSelectedSpanIDUnCollapsed)
|
||||
}
|
||||
r.logger.Info("getWaterfallSpansForTraceWithMetadata: processing post cache", "duration", time.Since(processingPostCache), "traceID", traceID)
|
||||
|
||||
// convert start timestamp to millis because right now frontend is expecting it in millis
|
||||
@@ -1103,7 +1117,7 @@ func (r *ClickHouseReader) GetWaterfallSpansForTraceWithMetadata(ctx context.Con
|
||||
}
|
||||
|
||||
response.Spans = selectedSpans
|
||||
response.UncollapsedSpans = uncollapsedSpans
|
||||
response.UncollapsedSpans = uncollapsedSpans // ignoring if all spans are returning
|
||||
response.StartTimestampMillis = startTime / 1000000
|
||||
response.EndTimestampMillis = endTime / 1000000
|
||||
response.TotalSpansCount = totalSpans
|
||||
@@ -1112,6 +1126,7 @@ func (r *ClickHouseReader) GetWaterfallSpansForTraceWithMetadata(ctx context.Con
|
||||
response.RootServiceEntryPoint = rootServiceEntryPoint
|
||||
response.ServiceNameToTotalDurationMap = serviceNameToTotalDurationMap
|
||||
response.HasMissingSpans = hasMissingSpans
|
||||
response.HasMore = !selectAllSpans
|
||||
return response, nil
|
||||
}
|
||||
|
||||
@@ -3371,8 +3386,8 @@ func (r *ClickHouseReader) GetMetricAggregateAttributes(ctx context.Context, org
|
||||
|
||||
// Query all relevant metric names from time_series_v4, but leave metadata retrieval to cache/db
|
||||
query := fmt.Sprintf(
|
||||
`SELECT DISTINCT metric_name
|
||||
FROM %s.%s
|
||||
`SELECT DISTINCT metric_name
|
||||
FROM %s.%s
|
||||
WHERE metric_name ILIKE $1 AND __normalized = $2`,
|
||||
signozMetricDBName, signozTSTableNameV41Day)
|
||||
|
||||
@@ -3449,8 +3464,8 @@ func (r *ClickHouseReader) GetMeterAggregateAttributes(ctx context.Context, orgI
|
||||
var response v3.AggregateAttributeResponse
|
||||
// Query all relevant metric names from time_series_v4, but leave metadata retrieval to cache/db
|
||||
query := fmt.Sprintf(
|
||||
`SELECT metric_name,type,temporality,is_monotonic
|
||||
FROM %s.%s
|
||||
`SELECT metric_name,type,temporality,is_monotonic
|
||||
FROM %s.%s
|
||||
WHERE metric_name ILIKE $1
|
||||
GROUP BY metric_name,type,temporality,is_monotonic`,
|
||||
signozMeterDBName, signozMeterSamplesName)
|
||||
@@ -5146,7 +5161,7 @@ func (r *ClickHouseReader) GetOverallStateTransitions(ctx context.Context, ruleI
|
||||
state,
|
||||
unix_milli AS firing_time
|
||||
FROM %s.%s
|
||||
WHERE overall_state = '` + model.StateFiring.String() + `'
|
||||
WHERE overall_state = '` + model.StateFiring.String() + `'
|
||||
AND overall_state_changed = true
|
||||
AND rule_id IN ('%s')
|
||||
AND unix_milli >= %d AND unix_milli <= %d
|
||||
@@ -5157,7 +5172,7 @@ resolution_events AS (
|
||||
state,
|
||||
unix_milli AS resolution_time
|
||||
FROM %s.%s
|
||||
WHERE overall_state = '` + model.StateInactive.String() + `'
|
||||
WHERE overall_state = '` + model.StateInactive.String() + `'
|
||||
AND overall_state_changed = true
|
||||
AND rule_id IN ('%s')
|
||||
AND unix_milli >= %d AND unix_milli <= %d
|
||||
@@ -5278,7 +5293,7 @@ WITH firing_events AS (
|
||||
state,
|
||||
unix_milli AS firing_time
|
||||
FROM %s.%s
|
||||
WHERE overall_state = '` + model.StateFiring.String() + `'
|
||||
WHERE overall_state = '` + model.StateFiring.String() + `'
|
||||
AND overall_state_changed = true
|
||||
AND rule_id IN ('%s')
|
||||
AND unix_milli >= %d AND unix_milli <= %d
|
||||
@@ -5289,7 +5304,7 @@ resolution_events AS (
|
||||
state,
|
||||
unix_milli AS resolution_time
|
||||
FROM %s.%s
|
||||
WHERE overall_state = '` + model.StateInactive.String() + `'
|
||||
WHERE overall_state = '` + model.StateInactive.String() + `'
|
||||
AND overall_state_changed = true
|
||||
AND rule_id IN ('%s')
|
||||
AND unix_milli >= %d AND unix_milli <= %d
|
||||
@@ -5335,7 +5350,7 @@ WITH firing_events AS (
|
||||
state,
|
||||
unix_milli AS firing_time
|
||||
FROM %s.%s
|
||||
WHERE overall_state = '` + model.StateFiring.String() + `'
|
||||
WHERE overall_state = '` + model.StateFiring.String() + `'
|
||||
AND overall_state_changed = true
|
||||
AND rule_id IN ('%s')
|
||||
AND unix_milli >= %d AND unix_milli <= %d
|
||||
@@ -5346,7 +5361,7 @@ resolution_events AS (
|
||||
state,
|
||||
unix_milli AS resolution_time
|
||||
FROM %s.%s
|
||||
WHERE overall_state = '` + model.StateInactive.String() + `'
|
||||
WHERE overall_state = '` + model.StateInactive.String() + `'
|
||||
AND overall_state_changed = true
|
||||
AND rule_id IN ('%s')
|
||||
AND unix_milli >= %d AND unix_milli <= %d
|
||||
@@ -5608,7 +5623,7 @@ func (r *ClickHouseReader) GetMetricsDataPoints(ctx context.Context, metricName
|
||||
instrumentationtypes.CodeNamespace: "clickhouse-reader",
|
||||
instrumentationtypes.CodeFunctionName: "GetMetricsDataPoints",
|
||||
})
|
||||
query := fmt.Sprintf(`SELECT
|
||||
query := fmt.Sprintf(`SELECT
|
||||
sum(count) as data_points
|
||||
FROM %s.%s
|
||||
WHERE metric_name = ?
|
||||
@@ -5628,7 +5643,7 @@ func (r *ClickHouseReader) GetMetricsLastReceived(ctx context.Context, metricNam
|
||||
instrumentationtypes.CodeNamespace: "clickhouse-reader",
|
||||
instrumentationtypes.CodeFunctionName: "GetMetricsLastReceived",
|
||||
})
|
||||
query := fmt.Sprintf(`SELECT
|
||||
query := fmt.Sprintf(`SELECT
|
||||
MAX(unix_milli) AS last_received_time
|
||||
FROM %s.%s
|
||||
WHERE metric_name = ?
|
||||
@@ -5639,7 +5654,7 @@ WHERE metric_name = ?
|
||||
if err != nil {
|
||||
return 0, &model.ApiError{Typ: "ClickHouseError", Err: err}
|
||||
}
|
||||
query = fmt.Sprintf(`SELECT
|
||||
query = fmt.Sprintf(`SELECT
|
||||
MAX(unix_milli) AS last_received_time
|
||||
FROM %s.%s
|
||||
WHERE metric_name = ? and unix_milli > ?
|
||||
@@ -5658,7 +5673,7 @@ func (r *ClickHouseReader) GetTotalTimeSeriesForMetricName(ctx context.Context,
|
||||
instrumentationtypes.CodeNamespace: "clickhouse-reader",
|
||||
instrumentationtypes.CodeFunctionName: "GetTotalTimeSeriesForMetricName",
|
||||
})
|
||||
query := fmt.Sprintf(`SELECT
|
||||
query := fmt.Sprintf(`SELECT
|
||||
uniq(fingerprint) AS timeSeriesCount
|
||||
FROM %s.%s
|
||||
WHERE metric_name = ?;`, signozMetricDBName, signozTSTableNameV41Week)
|
||||
@@ -5690,7 +5705,7 @@ func (r *ClickHouseReader) GetAttributesForMetricName(ctx context.Context, metri
|
||||
}
|
||||
|
||||
const baseQueryTemplate = `
|
||||
SELECT
|
||||
SELECT
|
||||
kv.1 AS key,
|
||||
arrayMap(x -> trim(BOTH '"' FROM x), groupUniqArray(1000)(kv.2)) AS values,
|
||||
length(groupUniqArray(10000)(kv.2)) AS valueCount
|
||||
@@ -5799,7 +5814,7 @@ func (r *ClickHouseReader) ListSummaryMetrics(ctx context.Context, orgID valuer.
|
||||
sampleTable, countExp := utils.WhichSampleTableToUse(req.Start, req.End)
|
||||
|
||||
metricsQuery := fmt.Sprintf(
|
||||
`SELECT
|
||||
`SELECT
|
||||
t.metric_name AS metric_name,
|
||||
ANY_VALUE(t.description) AS description,
|
||||
ANY_VALUE(t.type) AS metric_type,
|
||||
@@ -5864,11 +5879,11 @@ func (r *ClickHouseReader) ListSummaryMetrics(ctx context.Context, orgID valuer.
|
||||
|
||||
if whereClause != "" {
|
||||
sb.WriteString(fmt.Sprintf(
|
||||
`SELECT
|
||||
`SELECT
|
||||
s.samples,
|
||||
s.metric_name
|
||||
FROM (
|
||||
SELECT
|
||||
SELECT
|
||||
dm.metric_name,
|
||||
%s AS samples
|
||||
FROM %s.%s AS dm
|
||||
@@ -5898,11 +5913,11 @@ func (r *ClickHouseReader) ListSummaryMetrics(ctx context.Context, orgID valuer.
|
||||
} else {
|
||||
// If no filters, it is a simpler query.
|
||||
sb.WriteString(fmt.Sprintf(
|
||||
`SELECT
|
||||
`SELECT
|
||||
s.samples,
|
||||
s.metric_name
|
||||
FROM (
|
||||
SELECT
|
||||
SELECT
|
||||
metric_name,
|
||||
%s AS samples
|
||||
FROM %s.%s
|
||||
@@ -6007,16 +6022,16 @@ func (r *ClickHouseReader) GetMetricsTimeSeriesPercentage(ctx context.Context, r
|
||||
|
||||
// Construct the query without backticks
|
||||
query := fmt.Sprintf(`
|
||||
SELECT
|
||||
SELECT
|
||||
metric_name,
|
||||
total_value,
|
||||
(total_value * 100.0 / total_time_series) AS percentage
|
||||
FROM (
|
||||
SELECT
|
||||
SELECT
|
||||
metric_name,
|
||||
uniq(fingerprint) AS total_value,
|
||||
(SELECT uniq(fingerprint)
|
||||
FROM %s.%s
|
||||
(SELECT uniq(fingerprint)
|
||||
FROM %s.%s
|
||||
WHERE unix_milli BETWEEN ? AND ? AND __normalized = ?) AS total_time_series
|
||||
FROM %s.%s
|
||||
WHERE unix_milli BETWEEN ? AND ? AND NOT startsWith(metric_name, 'signoz') AND __normalized = ? %s
|
||||
@@ -6092,7 +6107,7 @@ func (r *ClickHouseReader) GetMetricsSamplesPercentage(ctx context.Context, req
|
||||
|
||||
queryLimit := 50 + req.Limit
|
||||
metricsQuery := fmt.Sprintf(
|
||||
`SELECT
|
||||
`SELECT
|
||||
ts.metric_name AS metric_name,
|
||||
uniq(ts.fingerprint) AS timeSeries
|
||||
FROM %s.%s AS ts
|
||||
@@ -6149,13 +6164,13 @@ func (r *ClickHouseReader) GetMetricsSamplesPercentage(ctx context.Context, req
|
||||
FROM %s.%s
|
||||
WHERE unix_milli BETWEEN ? AND ?
|
||||
)
|
||||
SELECT
|
||||
SELECT
|
||||
s.samples,
|
||||
s.metric_name,
|
||||
COALESCE((s.samples * 100.0 / t.total_samples), 0) AS percentage
|
||||
FROM
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
SELECT
|
||||
dm.metric_name,
|
||||
%s AS samples
|
||||
FROM %s.%s AS dm`,
|
||||
@@ -6176,7 +6191,7 @@ func (r *ClickHouseReader) GetMetricsSamplesPercentage(ctx context.Context, req
|
||||
if whereClause != "" {
|
||||
sb.WriteString(fmt.Sprintf(
|
||||
` AND dm.fingerprint IN (
|
||||
SELECT ts.fingerprint
|
||||
SELECT ts.fingerprint
|
||||
FROM %s.%s AS ts
|
||||
WHERE ts.metric_name IN (%s)
|
||||
AND unix_milli BETWEEN ? AND ?
|
||||
@@ -6247,7 +6262,7 @@ func (r *ClickHouseReader) GetNameSimilarity(ctx context.Context, req *metrics_e
|
||||
}
|
||||
|
||||
query := fmt.Sprintf(`
|
||||
SELECT
|
||||
SELECT
|
||||
metric_name,
|
||||
any(type) as type,
|
||||
any(temporality) as temporality,
|
||||
@@ -6306,7 +6321,7 @@ func (r *ClickHouseReader) GetAttributeSimilarity(ctx context.Context, req *metr
|
||||
|
||||
// Get target labels
|
||||
extractedLabelsQuery := fmt.Sprintf(`
|
||||
SELECT
|
||||
SELECT
|
||||
kv.1 AS label_key,
|
||||
topK(10)(JSONExtractString(kv.2)) AS label_values
|
||||
FROM %s.%s
|
||||
@@ -6350,12 +6365,12 @@ func (r *ClickHouseReader) GetAttributeSimilarity(ctx context.Context, req *metr
|
||||
priorityListString := strings.Join(priorityList, ", ")
|
||||
|
||||
candidateLabelsQuery := fmt.Sprintf(`
|
||||
WITH
|
||||
arrayDistinct([%s]) AS filter_keys,
|
||||
WITH
|
||||
arrayDistinct([%s]) AS filter_keys,
|
||||
arrayDistinct([%s]) AS filter_values,
|
||||
[%s] AS priority_pairs_input,
|
||||
%d AS priority_multiplier
|
||||
SELECT
|
||||
SELECT
|
||||
metric_name,
|
||||
any(type) as type,
|
||||
any(temporality) as temporality,
|
||||
@@ -6461,17 +6476,17 @@ func (r *ClickHouseReader) GetMetricsAllResourceAttributes(ctx context.Context,
|
||||
instrumentationtypes.CodeFunctionName: "GetMetricsAllResourceAttributes",
|
||||
})
|
||||
start, end, attTable, _ := utils.WhichAttributesTableToUse(start, end)
|
||||
query := fmt.Sprintf(`SELECT
|
||||
key,
|
||||
query := fmt.Sprintf(`SELECT
|
||||
key,
|
||||
count(distinct value) AS distinct_value_count
|
||||
FROM (
|
||||
SELECT key, value
|
||||
FROM %s.%s
|
||||
ARRAY JOIN
|
||||
ARRAY JOIN
|
||||
arrayConcat(mapKeys(resource_attributes)) AS key,
|
||||
arrayConcat(mapValues(resource_attributes)) AS value
|
||||
WHERE unix_milli between ? and ?
|
||||
)
|
||||
)
|
||||
GROUP BY key
|
||||
ORDER BY distinct_value_count DESC;`, signozMetadataDbName, attTable)
|
||||
valueCtx := context.WithValue(ctx, "clickhouse_max_threads", constants.MetricsExplorerClickhouseThreads)
|
||||
@@ -6618,11 +6633,11 @@ func (r *ClickHouseReader) GetInspectMetricsFingerprints(ctx context.Context, at
|
||||
|
||||
start, end, tsTable, _ := utils.WhichTSTableToUse(req.Start, req.End)
|
||||
query := fmt.Sprintf(`
|
||||
SELECT
|
||||
SELECT
|
||||
arrayDistinct(groupArray(toString(fingerprint))) AS fingerprints
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
SELECT
|
||||
metric_name, labels, fingerprint,
|
||||
%s
|
||||
FROM %s.%s
|
||||
@@ -6793,14 +6808,14 @@ func (r *ClickHouseReader) GetUpdatedMetricsMetadata(ctx context.Context, orgID
|
||||
var stillMissing []string
|
||||
if len(missingMetrics) > 0 {
|
||||
metricList := "'" + strings.Join(missingMetrics, "', '") + "'"
|
||||
query := fmt.Sprintf(`SELECT
|
||||
query := fmt.Sprintf(`SELECT
|
||||
metric_name,
|
||||
argMax(type, created_at) AS type,
|
||||
argMax(description, created_at) AS description,
|
||||
argMax(temporality, created_at) AS temporality,
|
||||
argMax(is_monotonic, created_at) AS is_monotonic,
|
||||
argMax(unit, created_at) AS unit
|
||||
FROM %s.%s
|
||||
FROM %s.%s
|
||||
WHERE metric_name IN (%s)
|
||||
GROUP BY metric_name;`,
|
||||
signozMetricDBName,
|
||||
@@ -6848,7 +6863,7 @@ func (r *ClickHouseReader) GetUpdatedMetricsMetadata(ctx context.Context, orgID
|
||||
if len(stillMissing) > 0 {
|
||||
metricList := "'" + strings.Join(stillMissing, "', '") + "'"
|
||||
query := fmt.Sprintf(`SELECT DISTINCT metric_name, type, description, temporality, is_monotonic, unit
|
||||
FROM %s.%s
|
||||
FROM %s.%s
|
||||
WHERE metric_name IN (%s)`, signozMetricDBName, signozTSTableNameV4, metricList)
|
||||
valueCtx := context.WithValue(ctx, "clickhouse_max_threads", constants.MetricsExplorerClickhouseThreads)
|
||||
rows, err := r.db.Query(valueCtx, query)
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package tracedetail
|
||||
|
||||
import (
|
||||
"maps"
|
||||
"slices"
|
||||
"sort"
|
||||
|
||||
@@ -9,6 +10,9 @@ import (
|
||||
|
||||
var (
|
||||
SPAN_LIMIT_PER_REQUEST_FOR_WATERFALL float64 = 500
|
||||
|
||||
maxDepthForSelectedSpanChildren int = 5
|
||||
MaxLimitToSelectAllSpans uint = 10_000
|
||||
)
|
||||
|
||||
type Interval struct {
|
||||
@@ -63,26 +67,22 @@ func findIndexForSelectedSpanFromPreOrder(spans []*model.Span, selectedSpanId st
|
||||
return selectedSpanIndex
|
||||
}
|
||||
|
||||
func getPathFromRootToSelectedSpanId(node *model.Span, selectedSpanId string, uncollapsedSpans []string, isSelectedSpanIDUnCollapsed bool) (bool, []string) {
|
||||
func getPathFromRootToSelectedSpanId(node *model.Span, selectedSpanId string) (bool, []string) {
|
||||
spansFromRootToNode := []string{}
|
||||
|
||||
spansFromRootToNode = append(spansFromRootToNode, node.SpanID)
|
||||
if node.SpanID == selectedSpanId {
|
||||
if isSelectedSpanIDUnCollapsed && !slices.Contains(uncollapsedSpans, node.SpanID) {
|
||||
spansFromRootToNode = append(spansFromRootToNode, node.SpanID)
|
||||
}
|
||||
return true, spansFromRootToNode
|
||||
}
|
||||
|
||||
isPresentInSubtreeForTheNode := false
|
||||
for _, child := range node.Children {
|
||||
isPresentInThisSubtree, _spansFromRootToNode := getPathFromRootToSelectedSpanId(child, selectedSpanId, uncollapsedSpans, isSelectedSpanIDUnCollapsed)
|
||||
isPresentInThisSubtree, _spansFromRootToNode := getPathFromRootToSelectedSpanId(child, selectedSpanId)
|
||||
// if the interested node is present in the given subtree then add the span node to uncollapsed node list
|
||||
if isPresentInThisSubtree {
|
||||
if !slices.Contains(uncollapsedSpans, node.SpanID) {
|
||||
spansFromRootToNode = append(spansFromRootToNode, node.SpanID)
|
||||
}
|
||||
isPresentInSubtreeForTheNode = true
|
||||
spansFromRootToNode = append(spansFromRootToNode, _spansFromRootToNode...)
|
||||
break
|
||||
}
|
||||
}
|
||||
return isPresentInSubtreeForTheNode, spansFromRootToNode
|
||||
@@ -92,12 +92,23 @@ func getPathFromRootToSelectedSpanId(node *model.Span, selectedSpanId string, un
|
||||
// throughout the recursion. Per-call state (level, isPartOfPreOrder, etc.)
|
||||
// is passed as direct arguments.
|
||||
type traverseOpts struct {
|
||||
uncollapsedSpans []string
|
||||
selectedSpanID string
|
||||
uncollapsedSpans map[string]struct{}
|
||||
selectedSpanID string
|
||||
isSelectedSpanUncollapsed bool
|
||||
selectAll bool
|
||||
}
|
||||
|
||||
func traverseTrace(span *model.Span, opts traverseOpts, level uint64, isPartOfPreOrder bool, hasSibling bool) []*model.Span {
|
||||
func traverseTrace(
|
||||
span *model.Span,
|
||||
opts traverseOpts,
|
||||
level uint64,
|
||||
isPartOfPreOrder bool,
|
||||
hasSibling bool,
|
||||
autoExpandDepth int,
|
||||
) ([]*model.Span, []string) {
|
||||
|
||||
preOrderTraversal := []*model.Span{}
|
||||
autoExpandedSpans := []string{}
|
||||
|
||||
// sort the children to maintain the order across requests
|
||||
sort.Slice(span.Children, func(i, j int) bool {
|
||||
@@ -134,16 +145,36 @@ func traverseTrace(span *model.Span, opts traverseOpts, level uint64, isPartOfPr
|
||||
preOrderTraversal = append(preOrderTraversal, &nodeWithoutChildren)
|
||||
}
|
||||
|
||||
isAlreadyUncollapsed := slices.Contains(opts.uncollapsedSpans, span.SpanID)
|
||||
remainingAutoExpandDepth := 0
|
||||
if span.SpanID == opts.selectedSpanID && opts.isSelectedSpanUncollapsed {
|
||||
remainingAutoExpandDepth = maxDepthForSelectedSpanChildren
|
||||
} else if autoExpandDepth > 0 {
|
||||
remainingAutoExpandDepth = autoExpandDepth - 1
|
||||
}
|
||||
|
||||
_, isAlreadyUncollapsed := opts.uncollapsedSpans[span.SpanID]
|
||||
for index, child := range span.Children {
|
||||
_childTraversal := traverseTrace(child, opts, level+1, isPartOfPreOrder && isAlreadyUncollapsed, index != (len(span.Children)-1))
|
||||
// A child is included in the pre-order output if its parent is uncollapsed
|
||||
// OR if the child falls within MAX_DEPTH_FOR_SELECTED_SPAN_CHILDREN levels
|
||||
// below the selected span.
|
||||
isChildWithinMaxDepth := remainingAutoExpandDepth > 0
|
||||
childIsPartOfPreOrder := opts.selectAll || (isPartOfPreOrder && (isAlreadyUncollapsed || isChildWithinMaxDepth))
|
||||
|
||||
if isPartOfPreOrder && isChildWithinMaxDepth && !isAlreadyUncollapsed {
|
||||
if !slices.Contains(autoExpandedSpans, span.SpanID) {
|
||||
autoExpandedSpans = append(autoExpandedSpans, span.SpanID)
|
||||
}
|
||||
}
|
||||
|
||||
_childTraversal, _autoExpanded := traverseTrace(child, opts, level+1, childIsPartOfPreOrder, index != (len(span.Children)-1), remainingAutoExpandDepth)
|
||||
preOrderTraversal = append(preOrderTraversal, _childTraversal...)
|
||||
autoExpandedSpans = append(autoExpandedSpans, _autoExpanded...)
|
||||
nodeWithoutChildren.SubTreeNodeCount += child.SubTreeNodeCount + 1
|
||||
span.SubTreeNodeCount += child.SubTreeNodeCount + 1
|
||||
}
|
||||
|
||||
nodeWithoutChildren.SubTreeNodeCount += 1
|
||||
return preOrderTraversal
|
||||
return preOrderTraversal, autoExpandedSpans
|
||||
|
||||
}
|
||||
|
||||
@@ -169,19 +200,36 @@ func GetSelectedSpans(uncollapsedSpans []string, selectedSpanID string, traceRoo
|
||||
|
||||
var preOrderTraversal = make([]*model.Span, 0)
|
||||
var rootServiceName, rootServiceEntryPoint string
|
||||
updatedUncollapsedSpans := uncollapsedSpans
|
||||
|
||||
// create a map of uncollapsed spans for quick lookup
|
||||
uncollapsedSpanMap := make(map[string]struct{})
|
||||
for _, spanID := range uncollapsedSpans {
|
||||
uncollapsedSpanMap[spanID] = struct{}{}
|
||||
}
|
||||
|
||||
selectedSpanIndex := -1
|
||||
for _, rootSpanID := range traceRoots {
|
||||
if rootNode, exists := spanIdToSpanNodeMap[rootSpanID.SpanID]; exists {
|
||||
_, spansFromRootToNode := getPathFromRootToSelectedSpanId(rootNode, selectedSpanID, updatedUncollapsedSpans, isSelectedSpanIDUnCollapsed)
|
||||
updatedUncollapsedSpans = append(updatedUncollapsedSpans, spansFromRootToNode...)
|
||||
present, spansFromRootToNode := getPathFromRootToSelectedSpanId(rootNode, selectedSpanID)
|
||||
if present {
|
||||
for _, spanID := range spansFromRootToNode {
|
||||
if selectedSpanID == spanID && !isSelectedSpanIDUnCollapsed {
|
||||
continue
|
||||
}
|
||||
uncollapsedSpanMap[spanID] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
opts := traverseOpts{
|
||||
uncollapsedSpans: updatedUncollapsedSpans,
|
||||
selectedSpanID: selectedSpanID,
|
||||
uncollapsedSpans: uncollapsedSpanMap,
|
||||
selectedSpanID: selectedSpanID,
|
||||
isSelectedSpanUncollapsed: isSelectedSpanIDUnCollapsed,
|
||||
}
|
||||
_preOrderTraversal, _autoExpanded := traverseTrace(rootNode, opts, 0, true, false, 0)
|
||||
// Merge auto-expanded spans into updatedUncollapsedSpans for returning in response
|
||||
for _, spanID := range _autoExpanded {
|
||||
uncollapsedSpanMap[spanID] = struct{}{}
|
||||
}
|
||||
_preOrderTraversal := traverseTrace(rootNode, opts, 0, true, false)
|
||||
_selectedSpanIndex := findIndexForSelectedSpanFromPreOrder(_preOrderTraversal, selectedSpanID)
|
||||
|
||||
if _selectedSpanIndex != -1 {
|
||||
@@ -223,5 +271,17 @@ func GetSelectedSpans(uncollapsedSpans []string, selectedSpanID string, traceRoo
|
||||
startIndex = 0
|
||||
}
|
||||
|
||||
return preOrderTraversal[startIndex:endIndex], updatedUncollapsedSpans, rootServiceName, rootServiceEntryPoint
|
||||
return preOrderTraversal[startIndex:endIndex], slices.Collect(maps.Keys(uncollapsedSpanMap)), rootServiceName, rootServiceEntryPoint
|
||||
}
|
||||
|
||||
func GetAllSpans(traceRoots []*model.Span) (spans []*model.Span, rootServiceName, rootEntryPoint string) {
|
||||
if len(traceRoots) > 0 {
|
||||
rootServiceName = traceRoots[0].ServiceName
|
||||
rootEntryPoint = traceRoots[0].Name
|
||||
}
|
||||
for _, root := range traceRoots {
|
||||
childSpans, _ := traverseTrace(root, traverseOpts{selectAll: true}, 0, true, false, 0)
|
||||
spans = append(spans, childSpans...)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
@@ -81,28 +81,6 @@ func TestGetSelectedSpans_MultipleRoots(t *testing.T) {
|
||||
assert.Equal(t, "root1-op", entryPoint, "metadata comes from first root")
|
||||
}
|
||||
|
||||
// isSelectedSpanIDUnCollapsed=true opens only the selected span's direct children,
|
||||
// not deeper descendants.
|
||||
//
|
||||
// root → selected (expanded)
|
||||
// ├─ child1 ✓
|
||||
// │ └─ grandchild ✗ (only one level opened)
|
||||
// └─ child2 ✓
|
||||
func TestGetSelectedSpans_ExpandedSelectedSpan(t *testing.T) {
|
||||
root := mkSpan("root", "svc",
|
||||
mkSpan("selected", "svc",
|
||||
mkSpan("child1", "svc", mkSpan("grandchild", "svc")),
|
||||
mkSpan("child2", "svc"),
|
||||
),
|
||||
)
|
||||
spanMap := buildSpanMap(root)
|
||||
spans, _, _, _ := GetSelectedSpans([]string{}, "selected", []*model.Span{root}, spanMap, true)
|
||||
|
||||
// root and selected are on the auto-uncollapsed path; child1/child2 are direct
|
||||
// children of the expanded selected span; grandchild stays hidden.
|
||||
assert.Equal(t, []string{"root", "selected", "child1", "child2"}, spanIDs(spans))
|
||||
}
|
||||
|
||||
// Multiple spans uncollapsed simultaneously: children of all uncollapsed spans
|
||||
// are visible at once.
|
||||
//
|
||||
@@ -183,7 +161,7 @@ func TestGetSelectedSpans_PathReturnedInUncollapsed(t *testing.T) {
|
||||
spanMap := buildSpanMap(root)
|
||||
spans, uncollapsed, _, _ := GetSelectedSpans([]string{}, "selected", []*model.Span{root}, spanMap, false)
|
||||
|
||||
assert.Equal(t, []string{"root", "parent"}, uncollapsed)
|
||||
assert.ElementsMatch(t, []string{"root", "parent"}, uncollapsed)
|
||||
assert.Equal(t, []string{"root", "parent", "selected"}, spanIDs(spans))
|
||||
}
|
||||
|
||||
@@ -206,7 +184,7 @@ func TestGetSelectedSpans_SiblingsNotExpanded(t *testing.T) {
|
||||
// children of root sort alphabetically: parent < unrelated; unrelated-child stays hidden
|
||||
assert.Equal(t, []string{"root", "parent", "selected", "unrelated"}, spanIDs(spans))
|
||||
// only the path nodes are tracked as uncollapsed — unrelated is not
|
||||
assert.Equal(t, []string{"root", "parent"}, uncollapsed)
|
||||
assert.ElementsMatch(t, []string{"root", "parent"}, uncollapsed)
|
||||
}
|
||||
|
||||
// An unknown selectedSpanID must not panic; returns a window from index 0.
|
||||
@@ -320,6 +298,119 @@ func TestGetSelectedSpans_WindowShiftsAtStart(t *testing.T) {
|
||||
assert.Equal(t, "span10", spans[10].SpanID, "selected span still in window")
|
||||
}
|
||||
|
||||
// Auto-expanded span IDs from ALL branches are returned in
|
||||
// updatedUncollapsedSpans. Only internal nodes (spans with children) are
|
||||
// tracked — leaf spans are never added.
|
||||
//
|
||||
// root (selected)
|
||||
// ├─ childA (internal ✓)
|
||||
// │ └─ grandchildA (internal ✓)
|
||||
// │ └─ leafA (leaf ✗)
|
||||
// └─ childB (internal ✓)
|
||||
// └─ grandchildB (internal ✓)
|
||||
// └─ leafB (leaf ✗)
|
||||
func TestGetSelectedSpans_AutoExpandedSpansReturnedInUncollapsed(t *testing.T) {
|
||||
root := mkSpan("root", "svc",
|
||||
mkSpan("childA", "svc",
|
||||
mkSpan("grandchildA", "svc",
|
||||
mkSpan("leafA", "svc"),
|
||||
),
|
||||
),
|
||||
mkSpan("childB", "svc",
|
||||
mkSpan("grandchildB", "svc",
|
||||
mkSpan("leafB", "svc"),
|
||||
),
|
||||
),
|
||||
)
|
||||
spanMap := buildSpanMap(root)
|
||||
_, uncollapsed, _, _ := GetSelectedSpans([]string{}, "root", []*model.Span{root}, spanMap, true)
|
||||
|
||||
// all internal nodes across both branches must be tracked
|
||||
assert.Contains(t, uncollapsed, "root")
|
||||
assert.Contains(t, uncollapsed, "childA", "internal node depth 1, branch A")
|
||||
assert.Contains(t, uncollapsed, "childB", "internal node depth 1, branch B")
|
||||
assert.Contains(t, uncollapsed, "grandchildA", "internal node depth 2, branch A")
|
||||
assert.Contains(t, uncollapsed, "grandchildB", "internal node depth 2, branch B")
|
||||
// leaves have no children to show — never added to uncollapsedSpans
|
||||
assert.NotContains(t, uncollapsed, "leafA", "leaf spans are never added to uncollapsedSpans")
|
||||
assert.NotContains(t, uncollapsed, "leafB", "leaf spans are never added to uncollapsedSpans")
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// maxDepthForSelectedSpanChildren boundary tests
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
// Depth is measured from the selected span, not the trace root.
|
||||
// Ancestors appear via the path-to-root logic, not the depth limit.
|
||||
// Each depth level has two children to confirm the limit is enforced on all
|
||||
// branches, not just the first.
|
||||
//
|
||||
// root
|
||||
// └─ A ancestor ✓ (path-to-root)
|
||||
// └─ selected
|
||||
// ├─ d1a depth 1 ✓
|
||||
// │ ├─ d2a depth 2 ✓
|
||||
// │ │ ├─ d3a depth 3 ✓
|
||||
// │ │ │ ├─ d4a depth 4 ✓
|
||||
// │ │ │ │ ├─ d5a depth 5 ✓
|
||||
// │ │ │ │ │ └─ d6a depth 6 ✗
|
||||
// │ │ │ │ └─ d5b depth 5 ✓
|
||||
// │ │ │ └─ d4b depth 4 ✓
|
||||
// │ │ └─ d3b depth 3 ✓
|
||||
// │ └─ d2b depth 2 ✓
|
||||
// └─ d1b depth 1 ✓
|
||||
func TestGetSelectedSpans_DepthCountedFromSelectedSpan(t *testing.T) {
|
||||
selected := mkSpan("selected", "svc",
|
||||
mkSpan("d1a", "svc",
|
||||
mkSpan("d2a", "svc",
|
||||
mkSpan("d3a", "svc",
|
||||
mkSpan("d4a", "svc",
|
||||
mkSpan("d5a", "svc",
|
||||
mkSpan("d6a", "svc"), // depth 6 — excluded
|
||||
),
|
||||
mkSpan("d5b", "svc"), // depth 5 — included
|
||||
),
|
||||
mkSpan("d4b", "svc"), // depth 4 — included
|
||||
),
|
||||
mkSpan("d3b", "svc"), // depth 3 — included
|
||||
),
|
||||
mkSpan("d2b", "svc"), // depth 2 — included
|
||||
),
|
||||
mkSpan("d1b", "svc"), // depth 1 — included
|
||||
)
|
||||
root := mkSpan("root", "svc", mkSpan("A", "svc", selected))
|
||||
|
||||
spanMap := buildSpanMap(root)
|
||||
spans, _, _, _ := GetSelectedSpans([]string{}, "selected", []*model.Span{root}, spanMap, true)
|
||||
ids := spanIDs(spans)
|
||||
|
||||
assert.Contains(t, ids, "root", "ancestor shown via path-to-root")
|
||||
assert.Contains(t, ids, "A", "ancestor shown via path-to-root")
|
||||
for _, id := range []string{"d1a", "d1b", "d2a", "d2b", "d3a", "d3b", "d4a", "d4b", "d5a", "d5b"} {
|
||||
assert.Contains(t, ids, id, "depth ≤ 5 — must be included")
|
||||
}
|
||||
assert.NotContains(t, ids, "d6a", "depth 6 > limit — excluded")
|
||||
}
|
||||
|
||||
func TestGetAllSpans(t *testing.T) {
|
||||
root := mkSpan("root", "svc",
|
||||
mkSpan("childA", "svc",
|
||||
mkSpan("grandchildA", "svc",
|
||||
mkSpan("leafA", "svc2"),
|
||||
),
|
||||
),
|
||||
mkSpan("childB", "svc3",
|
||||
mkSpan("grandchildB", "svc",
|
||||
mkSpan("leafB", "svc2"),
|
||||
),
|
||||
),
|
||||
)
|
||||
spans, rootServiceName, rootEntryPoint := GetAllSpans([]*model.Span{root})
|
||||
assert.ElementsMatch(t, spanIDs(spans), []string{"root", "childA", "grandchildA", "leafA", "childB", "grandchildB", "leafB"})
|
||||
assert.Equal(t, rootServiceName, "svc")
|
||||
assert.Equal(t, rootEntryPoint, "root-op")
|
||||
}
|
||||
|
||||
func mkSpan(id, service string, children ...*model.Span) *model.Span {
|
||||
return &model.Span{
|
||||
SpanID: id,
|
||||
|
||||
@@ -333,6 +333,7 @@ type GetWaterfallSpansForTraceWithMetadataParams struct {
|
||||
SelectedSpanID string `json:"selectedSpanId"`
|
||||
IsSelectedSpanIDUnCollapsed bool `json:"isSelectedSpanIDUnCollapsed"`
|
||||
UncollapsedSpans []string `json:"uncollapsedSpans"`
|
||||
Limit uint `json:"limit"`
|
||||
}
|
||||
|
||||
type GetFlamegraphSpansForTraceParams struct {
|
||||
|
||||
@@ -329,6 +329,7 @@ type GetWaterfallSpansForTraceWithMetadataResponse struct {
|
||||
HasMissingSpans bool `json:"hasMissingSpans"`
|
||||
// this is needed for frontend and query service sync
|
||||
UncollapsedSpans []string `json:"uncollapsedSpans"`
|
||||
HasMore bool `json:"hasMore"`
|
||||
}
|
||||
|
||||
type GetFlamegraphSpansForTraceResponse struct {
|
||||
|
||||
@@ -2,7 +2,6 @@ package rules
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"sync"
|
||||
@@ -191,24 +190,6 @@ func NewBaseRule(id string, orgID valuer.UUID, p *ruletypes.PostableRule, reader
|
||||
return baseRule, nil
|
||||
}
|
||||
|
||||
func (r *BaseRule) String() string {
|
||||
ar := ruletypes.PostableRule{
|
||||
AlertName: r.name,
|
||||
RuleCondition: r.ruleCondition,
|
||||
EvalWindow: r.evalWindow,
|
||||
Labels: r.labels.Map(),
|
||||
Annotations: r.annotations.Map(),
|
||||
PreferredChannels: r.preferredChannels,
|
||||
}
|
||||
|
||||
byt, err := json.Marshal(ar)
|
||||
if err != nil {
|
||||
return fmt.Sprintf("error marshaling alerting rule: %s", err.Error())
|
||||
}
|
||||
|
||||
return string(byt)
|
||||
}
|
||||
|
||||
func (r *BaseRule) matchType() ruletypes.MatchType {
|
||||
if r.ruleCondition == nil {
|
||||
return ruletypes.AtleastOnce
|
||||
|
||||
@@ -1,239 +0,0 @@
|
||||
package rules
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/model"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/labels"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/times"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/timestamp"
|
||||
"github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
"github.com/SigNoz/signoz/pkg/units"
|
||||
)
|
||||
|
||||
type EvalVectorOptions struct {
|
||||
DeleteLabels []string
|
||||
ExtraAnnotations func(ctx context.Context, ts time.Time, metric labels.Labels) []labels.Label
|
||||
}
|
||||
|
||||
func (r *BaseRule) EvalVector(ctx context.Context, ts time.Time, res ruletypes.Vector, opts EvalVectorOptions) (int, error) {
|
||||
prevState := r.State()
|
||||
valueFormatter := units.FormatterFromUnit(r.Unit())
|
||||
|
||||
r.mtx.Lock()
|
||||
defer r.mtx.Unlock()
|
||||
|
||||
resultFPs := map[uint64]struct{}{}
|
||||
alerts := make(map[uint64]*ruletypes.Alert, len(res))
|
||||
|
||||
ruleReceivers := r.Threshold.GetRuleReceivers()
|
||||
ruleReceiverMap := make(map[string][]string)
|
||||
for _, value := range ruleReceivers {
|
||||
ruleReceiverMap[value.Name] = value.Channels
|
||||
}
|
||||
|
||||
for _, smpl := range res {
|
||||
l := make(map[string]string, len(smpl.Metric))
|
||||
for _, lbl := range smpl.Metric {
|
||||
l[lbl.Name] = lbl.Value
|
||||
}
|
||||
|
||||
r.logger.DebugContext(ctx, "alerting for series", "rule_name", r.Name(), "series", smpl)
|
||||
|
||||
value := valueFormatter.Format(smpl.V, r.Unit())
|
||||
threshold := valueFormatter.Format(smpl.Target, smpl.TargetUnit)
|
||||
r.logger.DebugContext(ctx, "Alert template data for rule", "rule_name", r.Name(), "formatter", valueFormatter.Name(), "value", value, "threshold", threshold)
|
||||
|
||||
tmplData := ruletypes.AlertTemplateData(l, value, threshold)
|
||||
// Inject some convenience variables that are easier to remember for users
|
||||
// who are not used to Go's templating system.
|
||||
defs := "{{$labels := .Labels}}{{$value := .Value}}{{$threshold := .Threshold}}"
|
||||
|
||||
// utility function to apply go template on labels and annotations
|
||||
expand := func(text string) string {
|
||||
tmpl := ruletypes.NewTemplateExpander(
|
||||
ctx,
|
||||
defs+text,
|
||||
"__alert_"+r.Name(),
|
||||
tmplData,
|
||||
times.Time(timestamp.FromTime(ts)),
|
||||
nil,
|
||||
)
|
||||
result, err := tmpl.Expand()
|
||||
if err != nil {
|
||||
result = fmt.Sprintf("<error expanding template: %s>", err)
|
||||
r.logger.ErrorContext(ctx, "Expanding alert template failed", "rule_name", r.Name(), errors.Attr(err), "data", tmplData)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
lb := labels.NewBuilder(smpl.Metric).Del(opts.DeleteLabels...).Del()
|
||||
resultLabels := labels.NewBuilder(smpl.Metric).Del(opts.DeleteLabels...).Labels()
|
||||
|
||||
for name, value := range r.labels.Map() {
|
||||
lb.Set(name, expand(value))
|
||||
}
|
||||
|
||||
lb.Set(labels.AlertNameLabel, r.Name())
|
||||
lb.Set(labels.AlertRuleIdLabel, r.ID())
|
||||
lb.Set(labels.RuleSourceLabel, r.GeneratorURL())
|
||||
|
||||
annotations := make(labels.Labels, 0, len(r.annotations.Map()))
|
||||
for name, value := range r.annotations.Map() {
|
||||
annotations = append(annotations, labels.Label{Name: name, Value: expand(value)})
|
||||
}
|
||||
if smpl.IsMissing {
|
||||
lb.Set(labels.AlertNameLabel, "[No data] "+r.Name())
|
||||
lb.Set(labels.NoDataLabel, "true")
|
||||
}
|
||||
|
||||
if opts.ExtraAnnotations != nil {
|
||||
extra := opts.ExtraAnnotations(ctx, ts, smpl.Metric)
|
||||
annotations = append(annotations, extra...)
|
||||
}
|
||||
|
||||
lbs := lb.Labels()
|
||||
h := lbs.Hash()
|
||||
resultFPs[h] = struct{}{}
|
||||
|
||||
if _, ok := alerts[h]; ok {
|
||||
r.logger.ErrorContext(ctx, "the alert query returns duplicate records", "rule_id", r.ID(), "alert", alerts[h])
|
||||
err := fmt.Errorf("duplicate alert found, vector contains metrics with the same labelset after applying alert labels")
|
||||
// We have already acquired the lock above hence using SetHealth and SetLastError will deadlock.
|
||||
r.health = ruletypes.HealthBad
|
||||
r.lastError = err
|
||||
return 0, err
|
||||
}
|
||||
|
||||
alerts[h] = &ruletypes.Alert{
|
||||
Labels: lbs,
|
||||
QueryResultLables: resultLabels,
|
||||
Annotations: annotations,
|
||||
ActiveAt: ts,
|
||||
State: model.StatePending,
|
||||
Value: smpl.V,
|
||||
GeneratorURL: r.GeneratorURL(),
|
||||
Receivers: ruleReceiverMap[lbs.Map()[ruletypes.LabelThresholdName]],
|
||||
Missing: smpl.IsMissing,
|
||||
IsRecovering: smpl.IsRecovering,
|
||||
}
|
||||
}
|
||||
|
||||
r.logger.InfoContext(ctx, "number of alerts found", "rule_name", r.Name(), "alerts_count", len(alerts))
|
||||
|
||||
// alerts[h] is ready, add or update active list now
|
||||
for h, a := range alerts {
|
||||
// Check whether we already have alerting state for the identifying label set.
|
||||
// Update the last value and annotations if so, create a new alert entry otherwise.
|
||||
if alert, ok := r.Active[h]; ok && alert.State != model.StateInactive {
|
||||
alert.Value = a.Value
|
||||
alert.Annotations = a.Annotations
|
||||
// Update the recovering and missing state of existing alert
|
||||
alert.IsRecovering = a.IsRecovering
|
||||
alert.Missing = a.Missing
|
||||
if v, ok := alert.Labels.Map()[ruletypes.LabelThresholdName]; ok {
|
||||
alert.Receivers = ruleReceiverMap[v]
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
r.Active[h] = a
|
||||
}
|
||||
|
||||
itemsToAdd := []model.RuleStateHistory{}
|
||||
|
||||
// Check if any pending alerts should be removed or fire now. Write out alert timeseries.
|
||||
for fp, a := range r.Active {
|
||||
labelsJSON, err := json.Marshal(a.QueryResultLables)
|
||||
if err != nil {
|
||||
r.logger.ErrorContext(ctx, "error marshaling labels", errors.Attr(err), "rule_name", r.Name(), "labels", a.Labels)
|
||||
}
|
||||
if _, ok := resultFPs[fp]; !ok {
|
||||
// If the alert was previously firing, keep it around for a given
|
||||
// retention time so it is reported as resolved to the AlertManager.
|
||||
if a.State == model.StatePending || (!a.ResolvedAt.IsZero() && ts.Sub(a.ResolvedAt) > ruletypes.ResolvedRetention) {
|
||||
delete(r.Active, fp)
|
||||
}
|
||||
if a.State != model.StateInactive {
|
||||
r.logger.DebugContext(ctx, "converting firing alert to inActive", "name", r.Name())
|
||||
a.State = model.StateInactive
|
||||
a.ResolvedAt = ts
|
||||
itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
|
||||
RuleID: r.ID(),
|
||||
RuleName: r.Name(),
|
||||
State: model.StateInactive,
|
||||
StateChanged: true,
|
||||
UnixMilli: ts.UnixMilli(),
|
||||
Labels: model.LabelsString(labelsJSON),
|
||||
Fingerprint: a.QueryResultLables.Hash(),
|
||||
Value: a.Value,
|
||||
})
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if a.State == model.StatePending && ts.Sub(a.ActiveAt) >= r.holdDuration.Duration() {
|
||||
r.logger.DebugContext(ctx, "converting pending alert to firing", "name", r.Name())
|
||||
a.State = model.StateFiring
|
||||
a.FiredAt = ts
|
||||
state := model.StateFiring
|
||||
if a.Missing {
|
||||
state = model.StateNoData
|
||||
}
|
||||
itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
|
||||
RuleID: r.ID(),
|
||||
RuleName: r.Name(),
|
||||
State: state,
|
||||
StateChanged: true,
|
||||
UnixMilli: ts.UnixMilli(),
|
||||
Labels: model.LabelsString(labelsJSON),
|
||||
Fingerprint: a.QueryResultLables.Hash(),
|
||||
Value: a.Value,
|
||||
})
|
||||
}
|
||||
|
||||
// We need to change firing alert to recovering if the returned sample meets recovery threshold
|
||||
changeFiringToRecovering := a.State == model.StateFiring && a.IsRecovering
|
||||
// We need to change recovering alerts to firing if the returned sample meets target threshold
|
||||
changeRecoveringToFiring := a.State == model.StateRecovering && !a.IsRecovering && !a.Missing
|
||||
// in any of the above case we need to update the status of alert
|
||||
if changeFiringToRecovering || changeRecoveringToFiring {
|
||||
state := model.StateRecovering
|
||||
if changeRecoveringToFiring {
|
||||
state = model.StateFiring
|
||||
}
|
||||
a.State = state
|
||||
r.logger.DebugContext(ctx, "converting alert state", "name", r.Name(), "state", state)
|
||||
itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
|
||||
RuleID: r.ID(),
|
||||
RuleName: r.Name(),
|
||||
State: state,
|
||||
StateChanged: true,
|
||||
UnixMilli: ts.UnixMilli(),
|
||||
Labels: model.LabelsString(labelsJSON),
|
||||
Fingerprint: a.QueryResultLables.Hash(),
|
||||
Value: a.Value,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
currentState := r.State()
|
||||
|
||||
overallStateChanged := currentState != prevState
|
||||
for idx, item := range itemsToAdd {
|
||||
item.OverallStateChanged = overallStateChanged
|
||||
item.OverallState = currentState
|
||||
itemsToAdd[idx] = item
|
||||
}
|
||||
|
||||
r.RecordRuleStateHistory(ctx, prevState, currentState, itemsToAdd)
|
||||
|
||||
r.health = ruletypes.HealthGood
|
||||
r.lastError = nil
|
||||
|
||||
return len(r.Active), nil
|
||||
}
|
||||
@@ -2,20 +2,25 @@ package rules
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"time"
|
||||
|
||||
plabels "github.com/prometheus/prometheus/model/labels"
|
||||
"github.com/prometheus/prometheus/model/labels"
|
||||
"github.com/prometheus/prometheus/promql"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
"github.com/SigNoz/signoz/pkg/prometheus"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/interfaces"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/model"
|
||||
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/labels"
|
||||
qslabels "github.com/SigNoz/signoz/pkg/query-service/utils/labels"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/times"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/timestamp"
|
||||
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
|
||||
"github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
"github.com/SigNoz/signoz/pkg/units"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
)
|
||||
|
||||
@@ -181,16 +186,232 @@ func (r *PromRule) buildAndRunQuery(ctx context.Context, ts time.Time) (ruletype
|
||||
}
|
||||
|
||||
func (r *PromRule) Eval(ctx context.Context, ts time.Time) (int, error) {
|
||||
prevState := r.State()
|
||||
valueFormatter := units.FormatterFromUnit(r.Unit())
|
||||
|
||||
// prepare query, run query get data and filter the data based on the threshold
|
||||
res, err := r.buildAndRunQuery(ctx, ts)
|
||||
results, err := r.buildAndRunQuery(ctx, ts)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
opts := EvalVectorOptions{
|
||||
DeleteLabels: []string{labels.MetricNameLabel},
|
||||
r.mtx.Lock()
|
||||
defer r.mtx.Unlock()
|
||||
|
||||
resultFPs := map[uint64]struct{}{}
|
||||
|
||||
alerts := make(map[uint64]*ruletypes.Alert, len(results))
|
||||
|
||||
ruleReceivers := r.Threshold.GetRuleReceivers()
|
||||
ruleReceiverMap := make(map[string][]string)
|
||||
for _, value := range ruleReceivers {
|
||||
ruleReceiverMap[value.Name] = value.Channels
|
||||
}
|
||||
return r.EvalVector(ctx, ts, res, opts)
|
||||
|
||||
for _, result := range results {
|
||||
l := make(map[string]string, len(result.Metric))
|
||||
for _, lbl := range result.Metric {
|
||||
l[lbl.Name] = lbl.Value
|
||||
}
|
||||
r.logger.DebugContext(ctx, "alerting for series", "rule_name", r.Name(), "series", result)
|
||||
|
||||
threshold := valueFormatter.Format(result.Target, result.TargetUnit)
|
||||
|
||||
tmplData := ruletypes.AlertTemplateData(l, valueFormatter.Format(result.V, r.Unit()), threshold)
|
||||
// Inject some convenience variables that are easier to remember for users
|
||||
// who are not used to Go's templating system.
|
||||
defs := "{{$labels := .Labels}}{{$value := .Value}}{{$threshold := .Threshold}}"
|
||||
|
||||
expand := func(text string) string {
|
||||
tmpl := ruletypes.NewTemplateExpander(
|
||||
ctx,
|
||||
defs+text,
|
||||
"__alert_"+r.Name(),
|
||||
tmplData,
|
||||
times.Time(timestamp.FromTime(ts)),
|
||||
nil,
|
||||
)
|
||||
result, err := tmpl.Expand()
|
||||
if err != nil {
|
||||
result = fmt.Sprintf("<error expanding template: %s>", err)
|
||||
r.logger.WarnContext(ctx, "Expanding alert template failed", "rule_name", r.Name(), errors.Attr(err), "data", tmplData)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
lb := qslabels.NewBuilder(result.Metric).Del(qslabels.MetricNameLabel)
|
||||
resultLabels := qslabels.NewBuilder(result.Metric).Del(qslabels.MetricNameLabel).Labels()
|
||||
|
||||
for name, value := range r.labels.Map() {
|
||||
lb.Set(name, expand(value))
|
||||
}
|
||||
|
||||
lb.Set(qslabels.AlertNameLabel, r.Name())
|
||||
lb.Set(qslabels.AlertRuleIdLabel, r.ID())
|
||||
lb.Set(qslabels.RuleSourceLabel, r.GeneratorURL())
|
||||
|
||||
annotations := make(qslabels.Labels, 0, len(r.annotations.Map()))
|
||||
for name, value := range r.annotations.Map() {
|
||||
annotations = append(annotations, qslabels.Label{Name: name, Value: expand(value)})
|
||||
}
|
||||
if result.IsMissing {
|
||||
lb.Set(qslabels.AlertNameLabel, "[No data] "+r.Name())
|
||||
lb.Set(qslabels.NoDataLabel, "true")
|
||||
}
|
||||
|
||||
lbs := lb.Labels()
|
||||
h := lbs.Hash()
|
||||
resultFPs[h] = struct{}{}
|
||||
|
||||
if _, ok := alerts[h]; ok {
|
||||
err = fmt.Errorf("vector contains metrics with the same labelset after applying alert labels")
|
||||
// We have already acquired the lock above hence using SetHealth and
|
||||
// SetLastError will deadlock.
|
||||
r.health = ruletypes.HealthBad
|
||||
r.lastError = err
|
||||
return 0, err
|
||||
}
|
||||
alerts[h] = &ruletypes.Alert{
|
||||
Labels: lbs,
|
||||
QueryResultLables: resultLabels,
|
||||
Annotations: annotations,
|
||||
ActiveAt: ts,
|
||||
State: model.StatePending,
|
||||
Value: result.V,
|
||||
GeneratorURL: r.GeneratorURL(),
|
||||
Receivers: ruleReceiverMap[lbs.Map()[ruletypes.LabelThresholdName]],
|
||||
Missing: result.IsMissing,
|
||||
IsRecovering: result.IsRecovering,
|
||||
}
|
||||
}
|
||||
|
||||
r.logger.InfoContext(ctx, "number of alerts found", "rule_name", r.Name(), "alerts_count", len(alerts))
|
||||
// alerts[h] is ready, add or update active list now
|
||||
for h, a := range alerts {
|
||||
// Check whether we already have alerting state for the identifying label set.
|
||||
// Update the last value and annotations if so, create a new alert entry otherwise.
|
||||
if alert, ok := r.Active[h]; ok && alert.State != model.StateInactive {
|
||||
alert.Value = a.Value
|
||||
alert.Annotations = a.Annotations
|
||||
// Update the recovering and missing state of existing alert
|
||||
alert.IsRecovering = a.IsRecovering
|
||||
alert.Missing = a.Missing
|
||||
if v, ok := alert.Labels.Map()[ruletypes.LabelThresholdName]; ok {
|
||||
alert.Receivers = ruleReceiverMap[v]
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
r.Active[h] = a
|
||||
|
||||
}
|
||||
|
||||
itemsToAdd := []model.RuleStateHistory{}
|
||||
|
||||
// Check if any pending alerts should be removed or fire now. Write out alert timeseries.
|
||||
for fp, a := range r.Active {
|
||||
labelsJSON, err := json.Marshal(a.QueryResultLables)
|
||||
if err != nil {
|
||||
r.logger.ErrorContext(ctx, "error marshaling labels", errors.Attr(err), "rule_name", r.Name())
|
||||
}
|
||||
if _, ok := resultFPs[fp]; !ok {
|
||||
// If the alert was previously firing, keep it around for a given
|
||||
// retention time so it is reported as resolved to the AlertManager.
|
||||
if a.State == model.StatePending || (!a.ResolvedAt.IsZero() && ts.Sub(a.ResolvedAt) > ruletypes.ResolvedRetention) {
|
||||
delete(r.Active, fp)
|
||||
}
|
||||
if a.State != model.StateInactive {
|
||||
a.State = model.StateInactive
|
||||
a.ResolvedAt = ts
|
||||
itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
|
||||
RuleID: r.ID(),
|
||||
RuleName: r.Name(),
|
||||
State: model.StateInactive,
|
||||
StateChanged: true,
|
||||
UnixMilli: ts.UnixMilli(),
|
||||
Labels: model.LabelsString(labelsJSON),
|
||||
Fingerprint: a.QueryResultLables.Hash(),
|
||||
})
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if a.State == model.StatePending && ts.Sub(a.ActiveAt) >= r.holdDuration.Duration() {
|
||||
a.State = model.StateFiring
|
||||
a.FiredAt = ts
|
||||
state := model.StateFiring
|
||||
if a.Missing {
|
||||
state = model.StateNoData
|
||||
}
|
||||
itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
|
||||
RuleID: r.ID(),
|
||||
RuleName: r.Name(),
|
||||
State: state,
|
||||
StateChanged: true,
|
||||
UnixMilli: ts.UnixMilli(),
|
||||
Labels: model.LabelsString(labelsJSON),
|
||||
Fingerprint: a.QueryResultLables.Hash(),
|
||||
Value: a.Value,
|
||||
})
|
||||
}
|
||||
|
||||
// We need to change firing alert to recovering if the returned sample meets recovery threshold
|
||||
changeAlertingToRecovering := a.State == model.StateFiring && a.IsRecovering
|
||||
// We need to change recovering alerts to firing if the returned sample meets target threshold
|
||||
changeRecoveringToFiring := a.State == model.StateRecovering && !a.IsRecovering && !a.Missing
|
||||
// in any of the above case we need to update the status of alert
|
||||
if changeAlertingToRecovering || changeRecoveringToFiring {
|
||||
state := model.StateRecovering
|
||||
if changeRecoveringToFiring {
|
||||
state = model.StateFiring
|
||||
}
|
||||
a.State = state
|
||||
r.logger.DebugContext(ctx, "converting alert state", "name", r.Name(), "state", state)
|
||||
itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
|
||||
RuleID: r.ID(),
|
||||
RuleName: r.Name(),
|
||||
State: state,
|
||||
StateChanged: true,
|
||||
UnixMilli: ts.UnixMilli(),
|
||||
Labels: model.LabelsString(labelsJSON),
|
||||
Fingerprint: a.QueryResultLables.Hash(),
|
||||
Value: a.Value,
|
||||
})
|
||||
}
|
||||
}
|
||||
r.health = ruletypes.HealthGood
|
||||
r.lastError = err
|
||||
|
||||
currentState := r.State()
|
||||
|
||||
overallStateChanged := currentState != prevState
|
||||
for idx, item := range itemsToAdd {
|
||||
item.OverallStateChanged = overallStateChanged
|
||||
item.OverallState = currentState
|
||||
itemsToAdd[idx] = item
|
||||
}
|
||||
|
||||
r.RecordRuleStateHistory(ctx, prevState, currentState, itemsToAdd)
|
||||
|
||||
return len(r.Active), nil
|
||||
}
|
||||
|
||||
func (r *PromRule) String() string {
|
||||
ar := ruletypes.PostableRule{
|
||||
AlertName: r.name,
|
||||
RuleCondition: r.ruleCondition,
|
||||
EvalWindow: r.evalWindow,
|
||||
Labels: r.labels.Map(),
|
||||
Annotations: r.annotations.Map(),
|
||||
PreferredChannels: r.preferredChannels,
|
||||
}
|
||||
|
||||
byt, err := json.Marshal(ar)
|
||||
if err != nil {
|
||||
return fmt.Sprintf("error marshaling alerting rule: %s", err.Error())
|
||||
}
|
||||
|
||||
return string(byt)
|
||||
}
|
||||
|
||||
func (r *PromRule) RunAlertQuery(ctx context.Context, qs string, start, end time.Time, interval time.Duration) (promql.Matrix, error) {
|
||||
@@ -242,7 +463,7 @@ func toCommonSeries(series promql.Series) v3.Series {
|
||||
Points: make([]v3.Point, 0),
|
||||
}
|
||||
|
||||
series.Metric.Range(func(lbl plabels.Label) {
|
||||
series.Metric.Range(func(lbl labels.Label) {
|
||||
commonSeries.Labels[lbl.Name] = lbl.Value
|
||||
commonSeries.LabelsArray = append(commonSeries.LabelsArray, map[string]string{
|
||||
lbl.Name: lbl.Value,
|
||||
|
||||
@@ -3,6 +3,7 @@ package rules
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"math"
|
||||
@@ -23,16 +24,21 @@ import (
|
||||
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
|
||||
querierV5 "github.com/SigNoz/signoz/pkg/querier"
|
||||
logsv3 "github.com/SigNoz/signoz/pkg/query-service/app/logs/v3"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/app/querier"
|
||||
querierV2 "github.com/SigNoz/signoz/pkg/query-service/app/querier/v2"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/app/queryBuilder"
|
||||
tracesV4 "github.com/SigNoz/signoz/pkg/query-service/app/traces/v4"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/interfaces"
|
||||
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/labels"
|
||||
querytemplate "github.com/SigNoz/signoz/pkg/query-service/utils/queryTemplate"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/times"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/timestamp"
|
||||
|
||||
logsv3 "github.com/SigNoz/signoz/pkg/query-service/app/logs/v3"
|
||||
tracesV4 "github.com/SigNoz/signoz/pkg/query-service/app/traces/v4"
|
||||
"github.com/SigNoz/signoz/pkg/units"
|
||||
|
||||
querierV5 "github.com/SigNoz/signoz/pkg/querier"
|
||||
|
||||
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
|
||||
)
|
||||
@@ -277,7 +283,7 @@ func (r *ThresholdRule) prepareLinksToTraces(ctx context.Context, ts time.Time,
|
||||
return contextlinks.PrepareLinksToTraces(start, end, filterItems)
|
||||
}
|
||||
|
||||
func (r *ThresholdRule) prepareQueryRangeV5(ctx context.Context, ts time.Time) *qbtypes.QueryRangeRequest {
|
||||
func (r *ThresholdRule) prepareQueryRangeV5(ctx context.Context, ts time.Time) (*qbtypes.QueryRangeRequest, error) {
|
||||
r.logger.InfoContext(
|
||||
ctx, "prepare query range request v5", "ts", ts.UnixMilli(), "eval_window", r.evalWindow.Milliseconds(), "eval_delay", r.evalDelay.Milliseconds(),
|
||||
)
|
||||
@@ -296,13 +302,16 @@ func (r *ThresholdRule) prepareQueryRangeV5(ctx context.Context, ts time.Time) *
|
||||
}
|
||||
req.CompositeQuery.Queries = make([]qbtypes.QueryEnvelope, len(r.Condition().CompositeQuery.Queries))
|
||||
copy(req.CompositeQuery.Queries, r.Condition().CompositeQuery.Queries)
|
||||
return req
|
||||
return req, nil
|
||||
}
|
||||
|
||||
func (r *ThresholdRule) prepareLinksToLogsV5(ctx context.Context, ts time.Time, lbls labels.Labels) string {
|
||||
selectedQuery := r.GetSelectedQuery()
|
||||
|
||||
qr := r.prepareQueryRangeV5(ctx, ts)
|
||||
qr, err := r.prepareQueryRangeV5(ctx, ts)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
start := time.UnixMilli(int64(qr.Start))
|
||||
end := time.UnixMilli(int64(qr.End))
|
||||
|
||||
@@ -339,7 +348,10 @@ func (r *ThresholdRule) prepareLinksToLogsV5(ctx context.Context, ts time.Time,
|
||||
func (r *ThresholdRule) prepareLinksToTracesV5(ctx context.Context, ts time.Time, lbls labels.Labels) string {
|
||||
selectedQuery := r.GetSelectedQuery()
|
||||
|
||||
qr := r.prepareQueryRangeV5(ctx, ts)
|
||||
qr, err := r.prepareQueryRangeV5(ctx, ts)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
start := time.UnixMilli(int64(qr.Start))
|
||||
end := time.UnixMilli(int64(qr.End))
|
||||
|
||||
@@ -488,7 +500,10 @@ func (r *ThresholdRule) buildAndRunQuery(ctx context.Context, orgID valuer.UUID,
|
||||
}
|
||||
|
||||
func (r *ThresholdRule) buildAndRunQueryV5(ctx context.Context, orgID valuer.UUID, ts time.Time) (ruletypes.Vector, error) {
|
||||
params := r.prepareQueryRangeV5(ctx, ts)
|
||||
params, err := r.prepareQueryRangeV5(ctx, ts)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var results []*v3.Result
|
||||
|
||||
@@ -565,8 +580,13 @@ func (r *ThresholdRule) buildAndRunQueryV5(ctx context.Context, orgID valuer.UUI
|
||||
}
|
||||
|
||||
func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (int, error) {
|
||||
prevState := r.State()
|
||||
|
||||
valueFormatter := units.FormatterFromUnit(r.Unit())
|
||||
|
||||
var res ruletypes.Vector
|
||||
var err error
|
||||
|
||||
if r.version == "v5" {
|
||||
r.logger.InfoContext(ctx, "running v5 query")
|
||||
res, err = r.buildAndRunQueryV5(ctx, r.orgID, ts)
|
||||
@@ -574,35 +594,247 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (int, error) {
|
||||
r.logger.InfoContext(ctx, "running v4 query")
|
||||
res, err = r.buildAndRunQuery(ctx, r.orgID, ts)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
opts := EvalVectorOptions{
|
||||
DeleteLabels: []string{labels.MetricNameLabel, labels.TemporalityLabel},
|
||||
ExtraAnnotations: func(ctx context.Context, ts time.Time, smpl labels.Labels) []labels.Label {
|
||||
host := r.hostFromSource()
|
||||
if host == "" {
|
||||
return nil
|
||||
}
|
||||
r.mtx.Lock()
|
||||
defer r.mtx.Unlock()
|
||||
|
||||
//// Links with timestamps should go in annotations since labels
|
||||
//// is used alert grouping, and we want to group alerts with the same
|
||||
//// label set, but different timestamps, together.
|
||||
switch r.typ { // DIFF
|
||||
case ruletypes.AlertTypeTraces:
|
||||
if link := r.prepareLinksToTraces(ctx, ts, smpl); link != "" {
|
||||
r.logger.InfoContext(ctx, "adding traces link to annotations", "link", fmt.Sprintf("%s/traces-explorer?%s", host, link))
|
||||
return []labels.Label{{Name: "related_traces", Value: fmt.Sprintf("%s/traces-explorer?%s", host, link)}}
|
||||
}
|
||||
case ruletypes.AlertTypeLogs:
|
||||
if link := r.prepareLinksToLogs(ctx, ts, smpl); link != "" {
|
||||
r.logger.InfoContext(ctx, "adding logs link to annotations", "link", fmt.Sprintf("%s/logs/logs-explorer?%s", host, link))
|
||||
return []labels.Label{{Name: "related_logs", Value: fmt.Sprintf("%s/logs/logs-explorer?%s", host, link)}}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
},
|
||||
resultFPs := map[uint64]struct{}{}
|
||||
alerts := make(map[uint64]*ruletypes.Alert, len(res))
|
||||
|
||||
ruleReceivers := r.Threshold.GetRuleReceivers()
|
||||
ruleReceiverMap := make(map[string][]string)
|
||||
for _, value := range ruleReceivers {
|
||||
ruleReceiverMap[value.Name] = value.Channels
|
||||
}
|
||||
return r.EvalVector(ctx, ts, res, opts)
|
||||
|
||||
for _, smpl := range res {
|
||||
l := make(map[string]string, len(smpl.Metric))
|
||||
for _, lbl := range smpl.Metric {
|
||||
l[lbl.Name] = lbl.Value
|
||||
}
|
||||
|
||||
value := valueFormatter.Format(smpl.V, r.Unit())
|
||||
// todo(aniket): handle different threshold
|
||||
threshold := valueFormatter.Format(smpl.Target, smpl.TargetUnit)
|
||||
r.logger.DebugContext(ctx, "Alert template data for rule", "rule_name", r.Name(), "formatter", valueFormatter.Name(), "value", value, "threshold", threshold)
|
||||
|
||||
tmplData := ruletypes.AlertTemplateData(l, value, threshold)
|
||||
// Inject some convenience variables that are easier to remember for users
|
||||
// who are not used to Go's templating system.
|
||||
defs := "{{$labels := .Labels}}{{$value := .Value}}{{$threshold := .Threshold}}"
|
||||
|
||||
// utility function to apply go template on labels and annotations
|
||||
expand := func(text string) string {
|
||||
tmpl := ruletypes.NewTemplateExpander(
|
||||
ctx,
|
||||
defs+text,
|
||||
"__alert_"+r.Name(),
|
||||
tmplData,
|
||||
times.Time(timestamp.FromTime(ts)),
|
||||
nil,
|
||||
)
|
||||
result, err := tmpl.Expand()
|
||||
if err != nil {
|
||||
result = fmt.Sprintf("<error expanding template: %s>", err)
|
||||
r.logger.ErrorContext(ctx, "Expanding alert template failed", errors.Attr(err), "data", tmplData)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
lb := labels.NewBuilder(smpl.Metric).Del(labels.MetricNameLabel).Del(labels.TemporalityLabel)
|
||||
resultLabels := labels.NewBuilder(smpl.Metric).Del(labels.MetricNameLabel).Del(labels.TemporalityLabel).Labels()
|
||||
|
||||
for name, value := range r.labels.Map() {
|
||||
lb.Set(name, expand(value))
|
||||
}
|
||||
|
||||
lb.Set(labels.AlertNameLabel, r.Name())
|
||||
lb.Set(labels.AlertRuleIdLabel, r.ID())
|
||||
lb.Set(labels.RuleSourceLabel, r.GeneratorURL())
|
||||
|
||||
annotations := make(labels.Labels, 0, len(r.annotations.Map()))
|
||||
for name, value := range r.annotations.Map() {
|
||||
annotations = append(annotations, labels.Label{Name: name, Value: expand(value)})
|
||||
}
|
||||
if smpl.IsMissing {
|
||||
lb.Set(labels.AlertNameLabel, "[No data] "+r.Name())
|
||||
lb.Set(labels.NoDataLabel, "true")
|
||||
}
|
||||
|
||||
// Links with timestamps should go in annotations since labels
|
||||
// is used alert grouping, and we want to group alerts with the same
|
||||
// label set, but different timestamps, together.
|
||||
switch r.typ {
|
||||
case ruletypes.AlertTypeTraces:
|
||||
link := r.prepareLinksToTraces(ctx, ts, smpl.Metric)
|
||||
if link != "" && r.hostFromSource() != "" {
|
||||
r.logger.InfoContext(ctx, "adding traces link to annotations", "link", fmt.Sprintf("%s/traces-explorer?%s", r.hostFromSource(), link))
|
||||
annotations = append(annotations, labels.Label{Name: "related_traces", Value: fmt.Sprintf("%s/traces-explorer?%s", r.hostFromSource(), link)})
|
||||
}
|
||||
case ruletypes.AlertTypeLogs:
|
||||
link := r.prepareLinksToLogs(ctx, ts, smpl.Metric)
|
||||
if link != "" && r.hostFromSource() != "" {
|
||||
r.logger.InfoContext(ctx, "adding logs link to annotations", "link", fmt.Sprintf("%s/logs/logs-explorer?%s", r.hostFromSource(), link))
|
||||
annotations = append(annotations, labels.Label{Name: "related_logs", Value: fmt.Sprintf("%s/logs/logs-explorer?%s", r.hostFromSource(), link)})
|
||||
}
|
||||
}
|
||||
|
||||
lbs := lb.Labels()
|
||||
h := lbs.Hash()
|
||||
resultFPs[h] = struct{}{}
|
||||
|
||||
if _, ok := alerts[h]; ok {
|
||||
return 0, fmt.Errorf("duplicate alert found, vector contains metrics with the same labelset after applying alert labels")
|
||||
}
|
||||
|
||||
alerts[h] = &ruletypes.Alert{
|
||||
Labels: lbs,
|
||||
QueryResultLables: resultLabels,
|
||||
Annotations: annotations,
|
||||
ActiveAt: ts,
|
||||
State: model.StatePending,
|
||||
Value: smpl.V,
|
||||
GeneratorURL: r.GeneratorURL(),
|
||||
Receivers: ruleReceiverMap[lbs.Map()[ruletypes.LabelThresholdName]],
|
||||
Missing: smpl.IsMissing,
|
||||
IsRecovering: smpl.IsRecovering,
|
||||
}
|
||||
}
|
||||
|
||||
r.logger.InfoContext(ctx, "number of alerts found", "rule_name", r.Name(), "alerts_count", len(alerts))
|
||||
|
||||
// alerts[h] is ready, add or update active list now
|
||||
for h, a := range alerts {
|
||||
// Check whether we already have alerting state for the identifying label set.
|
||||
// Update the last value and annotations if so, create a new alert entry otherwise.
|
||||
if alert, ok := r.Active[h]; ok && alert.State != model.StateInactive {
|
||||
|
||||
alert.Value = a.Value
|
||||
alert.Annotations = a.Annotations
|
||||
// Update the recovering and missing state of existing alert
|
||||
alert.IsRecovering = a.IsRecovering
|
||||
alert.Missing = a.Missing
|
||||
if v, ok := alert.Labels.Map()[ruletypes.LabelThresholdName]; ok {
|
||||
alert.Receivers = ruleReceiverMap[v]
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
r.Active[h] = a
|
||||
}
|
||||
|
||||
itemsToAdd := []model.RuleStateHistory{}
|
||||
|
||||
// Check if any pending alerts should be removed or fire now. Write out alert timeseries.
|
||||
for fp, a := range r.Active {
|
||||
labelsJSON, err := json.Marshal(a.QueryResultLables)
|
||||
if err != nil {
|
||||
r.logger.ErrorContext(ctx, "error marshaling labels", errors.Attr(err), "labels", a.Labels)
|
||||
}
|
||||
if _, ok := resultFPs[fp]; !ok {
|
||||
// If the alert was previously firing, keep it around for a given
|
||||
// retention time so it is reported as resolved to the AlertManager.
|
||||
if a.State == model.StatePending || (!a.ResolvedAt.IsZero() && ts.Sub(a.ResolvedAt) > ruletypes.ResolvedRetention) {
|
||||
delete(r.Active, fp)
|
||||
}
|
||||
if a.State != model.StateInactive {
|
||||
r.logger.DebugContext(ctx, "converting firing alert to inActive", "name", r.Name())
|
||||
a.State = model.StateInactive
|
||||
a.ResolvedAt = ts
|
||||
itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
|
||||
RuleID: r.ID(),
|
||||
RuleName: r.Name(),
|
||||
State: model.StateInactive,
|
||||
StateChanged: true,
|
||||
UnixMilli: ts.UnixMilli(),
|
||||
Labels: model.LabelsString(labelsJSON),
|
||||
Fingerprint: a.QueryResultLables.Hash(),
|
||||
Value: a.Value,
|
||||
})
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if a.State == model.StatePending && ts.Sub(a.ActiveAt) >= r.holdDuration.Duration() {
|
||||
r.logger.DebugContext(ctx, "converting pending alert to firing", "name", r.Name())
|
||||
a.State = model.StateFiring
|
||||
a.FiredAt = ts
|
||||
state := model.StateFiring
|
||||
if a.Missing {
|
||||
state = model.StateNoData
|
||||
}
|
||||
itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
|
||||
RuleID: r.ID(),
|
||||
RuleName: r.Name(),
|
||||
State: state,
|
||||
StateChanged: true,
|
||||
UnixMilli: ts.UnixMilli(),
|
||||
Labels: model.LabelsString(labelsJSON),
|
||||
Fingerprint: a.QueryResultLables.Hash(),
|
||||
Value: a.Value,
|
||||
})
|
||||
}
|
||||
|
||||
// We need to change firing alert to recovering if the returned sample meets recovery threshold
|
||||
changeAlertingToRecovering := a.State == model.StateFiring && a.IsRecovering
|
||||
// We need to change recovering alerts to firing if the returned sample meets target threshold
|
||||
changeRecoveringToFiring := a.State == model.StateRecovering && !a.IsRecovering && !a.Missing
|
||||
// in any of the above case we need to update the status of alert
|
||||
if changeAlertingToRecovering || changeRecoveringToFiring {
|
||||
state := model.StateRecovering
|
||||
if changeRecoveringToFiring {
|
||||
state = model.StateFiring
|
||||
}
|
||||
a.State = state
|
||||
r.logger.DebugContext(ctx, "converting alert state", "name", r.Name(), "state", state)
|
||||
itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
|
||||
RuleID: r.ID(),
|
||||
RuleName: r.Name(),
|
||||
State: state,
|
||||
StateChanged: true,
|
||||
UnixMilli: ts.UnixMilli(),
|
||||
Labels: model.LabelsString(labelsJSON),
|
||||
Fingerprint: a.QueryResultLables.Hash(),
|
||||
Value: a.Value,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
currentState := r.State()
|
||||
|
||||
overallStateChanged := currentState != prevState
|
||||
for idx, item := range itemsToAdd {
|
||||
item.OverallStateChanged = overallStateChanged
|
||||
item.OverallState = currentState
|
||||
itemsToAdd[idx] = item
|
||||
}
|
||||
|
||||
r.RecordRuleStateHistory(ctx, prevState, currentState, itemsToAdd)
|
||||
|
||||
r.health = ruletypes.HealthGood
|
||||
r.lastError = err
|
||||
|
||||
return len(r.Active), nil
|
||||
}
|
||||
|
||||
func (r *ThresholdRule) String() string {
|
||||
ar := ruletypes.PostableRule{
|
||||
AlertName: r.name,
|
||||
RuleCondition: r.ruleCondition,
|
||||
EvalWindow: r.evalWindow,
|
||||
Labels: r.labels.Map(),
|
||||
Annotations: r.annotations.Map(),
|
||||
PreferredChannels: r.preferredChannels,
|
||||
}
|
||||
|
||||
byt, err := json.Marshal(ar)
|
||||
if err != nil {
|
||||
return fmt.Sprintf("error marshaling alerting rule: %s", err.Error())
|
||||
}
|
||||
|
||||
return string(byt)
|
||||
}
|
||||
|
||||
@@ -102,6 +102,10 @@ func (fn FunctionName) Validate() error {
|
||||
|
||||
// ApplyFunction applies the given function to the result data
|
||||
func ApplyFunction(fn Function, result *TimeSeries) *TimeSeries {
|
||||
if len(result.Values) == 0 {
|
||||
return result
|
||||
}
|
||||
|
||||
// Extract the function name and arguments
|
||||
name := fn.Name
|
||||
args := fn.Args
|
||||
|
||||
@@ -599,6 +599,14 @@ func TestApplyFunction(t *testing.T) {
|
||||
values []float64
|
||||
want []float64
|
||||
}{
|
||||
{
|
||||
name: "test with empty series",
|
||||
function: Function{
|
||||
Name: FunctionNameRunningDiff,
|
||||
},
|
||||
values: []float64{},
|
||||
want: []float64{},
|
||||
},
|
||||
{
|
||||
name: "cutOffMin function",
|
||||
function: Function{
|
||||
|
||||
Reference in New Issue
Block a user