Compare commits

..

2 Commits

Author SHA1 Message Date
srikanthccv
b91e664e14 chore: address lint 2026-06-17 21:47:17 +05:30
srikanthccv
04ec681eda fix(alerts): surface individual validation errors in API response 2026-06-17 16:41:28 +05:30
10 changed files with 120 additions and 136 deletions

View File

@@ -1,5 +1,6 @@
import { useMemo } from 'react';
import { useParams } from 'react-router-dom';
import { Skeleton } from 'antd';
import { ENTITY_VERSION_V4 } from 'constants/app';
import { FeatureKeys } from 'constants/features';
import { PANEL_TYPES } from 'constants/queryBuilder';
@@ -123,14 +124,24 @@ function ServiceOverview({
/>
<Card data-testid="service_latency">
<GraphContainer>
<Graph
onDragSelect={onDragSelect}
widget={latencyWidget}
onClickHandler={handleGraphClick('Service')}
isQueryEnabled={isQueryEnabled}
version={ENTITY_VERSION_V4}
enableDrillDown={SERVICE_DETAIL_DRILLDOWN_ENABLED}
/>
{topLevelOperationsIsLoading && (
<Skeleton
style={{
height: '100%',
padding: '16px',
}}
/>
)}
{!topLevelOperationsIsLoading && (
<Graph
onDragSelect={onDragSelect}
widget={latencyWidget}
onClickHandler={handleGraphClick('Service')}
isQueryEnabled={isQueryEnabled}
version={ENTITY_VERSION_V4}
enableDrillDown={SERVICE_DETAIL_DRILLDOWN_ENABLED}
/>
)}
</GraphContainer>
</Card>
</>

View File

@@ -1,3 +1,4 @@
import { Skeleton } from 'antd';
import { Typography } from '@signozhq/ui/typography';
import axios from 'axios';
import { SOMETHING_WENT_WRONG } from 'constants/api';
@@ -28,14 +29,24 @@ function TopLevelOperation({
</Typography>
) : (
<GraphContainer>
<Graph
widget={widget}
onClickHandler={handleGraphClick(opName)}
onDragSelect={onDragSelect}
isQueryEnabled={!topLevelOperationsIsLoading}
version={ENTITY_VERSION_V4}
enableDrillDown={SERVICE_DETAIL_DRILLDOWN_ENABLED}
/>
{topLevelOperationsIsLoading && (
<Skeleton
style={{
height: '100%',
padding: '16px',
}}
/>
)}
{!topLevelOperationsIsLoading && (
<Graph
widget={widget}
onClickHandler={handleGraphClick(opName)}
onDragSelect={onDragSelect}
isQueryEnabled={!topLevelOperationsIsLoading}
version={ENTITY_VERSION_V4}
enableDrillDown={SERVICE_DETAIL_DRILLDOWN_ENABLED}
/>
)}
</GraphContainer>
)}
</Card>

View File

@@ -22,7 +22,7 @@ func newConfig() factory.Config {
Agent: AgentConfig{
// we will maintain the latest version of cloud integration agent from here,
// till we automate it externally or figure out a way to validate it.
Version: "v0.0.13",
Version: "v0.0.12",
},
}
}

View File

@@ -119,7 +119,11 @@ func (q *querier) QueryRange(ctx context.Context, orgID valuer.UUID, req *qbtype
queries := make(map[string]qbtypes.Query)
steps := make(map[string]qbtypes.Step)
missingMetricQueries, metricWarnings, err := q.resolveMetricMetadata(ctx, req.CompositeQuery.Queries, req.Start, req.End)
// Resolve metric metadata once per request: patches each metric-aggregation
// query's spec in place, returns the queries whose every aggregation was
// missing (used for preseeded empty results), and any dormant-metric
// warning string. NotFound errors for never-seen metrics are propagated.
missingMetricQueries, dormantMetricsWarningMsg, err := q.resolveMetricMetadata(ctx, req.CompositeQuery.Queries, req.Start, req.End)
if err != nil {
return nil, err
}
@@ -236,15 +240,13 @@ func (q *querier) QueryRange(ctx context.Context, orgID valuer.UUID, req *qbtype
}
}
}
if len(metricWarnings) > 0 {
if dormantMetricsWarningMsg != "" {
if qbResp.Warning == nil {
qbResp.Warning = &qbtypes.QueryWarnData{}
}
for _, w := range metricWarnings {
qbResp.Warning.Warnings = append(qbResp.Warning.Warnings, qbtypes.QueryWarnDataAdditional{
Message: w,
})
}
qbResp.Warning.Warnings = append(qbResp.Warning.Warnings, qbtypes.QueryWarnDataAdditional{
Message: dormantMetricsWarningMsg,
})
}
}
return qbResp, qbErr
@@ -300,11 +302,12 @@ func (q *querier) populateQBEvent(event *qbtypes.QBEvent, queries []qbtypes.Quer
// - missingMetricQueries: names of queries whose every aggregation was
// missing. Used downstream to preseed empty result placeholders so the
// response still has an entry per requested query name.
// - metricWarnings: human-readable warnings for metrics that could not be
// resolved: never-seen metrics and dormant metrics (seen but no data in
// the query window).
// - err: Internal when a metadata fetch fails.
func (q *querier) resolveMetricMetadata(ctx context.Context, queries []qbtypes.QueryEnvelope, start, end uint64) (missingMetricQueries []string, metricWarnings []string, err error) {
// - dormantWarning: a human-readable warning describing metrics that exist in
// the store but produced no data within the query window. Empty when no
// such metrics are present.
// - err: NotFound when one or more referenced metrics have never been seen,
// or Internal when a metadata fetch fails.
func (q *querier) resolveMetricMetadata(ctx context.Context, queries []qbtypes.QueryEnvelope, start, end uint64) (missingMetricQueries []string, dormantWarning string, err error) {
metricNames := make([]string, 0)
for idx := range queries {
if queries[idx].Type != qbtypes.QueryTypeBuilder {
@@ -322,13 +325,13 @@ func (q *querier) resolveMetricMetadata(ctx context.Context, queries []qbtypes.Q
}
if len(metricNames) == 0 {
return nil, nil, nil
return nil, "", nil
}
metricTemporality, metricTypes, err := q.metadataStore.FetchTemporalityAndTypeMulti(ctx, start, end, metricNames...)
if err != nil {
q.logger.WarnContext(ctx, "failed to fetch metric temporality", errors.Attr(err), slog.Any("metrics", metricNames))
return nil, nil, errors.NewInternalf(errors.CodeInternal, "failed to fetch metrics temporality")
return nil, "", errors.NewInternalf(errors.CodeInternal, "failed to fetch metrics temporality")
}
q.logger.DebugContext(ctx, "fetched metric temporalities and types", slog.Any("metric_temporality", metricTemporality), slog.Any("metric_types", metricTypes))
@@ -360,7 +363,7 @@ func (q *querier) resolveMetricMetadata(ctx context.Context, queries []qbtypes.Q
}
// Type is resolved now; validate aggregation compatibility against it.
if err := spec.Aggregations[i].ValidateForType(); err != nil {
return nil, nil, err
return nil, "", err
}
presentAggregations = append(presentAggregations, spec.Aggregations[i])
}
@@ -373,7 +376,7 @@ func (q *querier) resolveMetricMetadata(ctx context.Context, queries []qbtypes.Q
}
if len(missingMetrics) == 0 {
return missingMetricQueries, nil, nil
return missingMetricQueries, "", nil
}
isInternalMetric := func(n string) bool { return strings.HasPrefix(n, "signoz.") || strings.HasPrefix(n, "signoz_") }
@@ -384,33 +387,29 @@ func (q *querier) resolveMetricMetadata(ctx context.Context, queries []qbtypes.Q
}
}
if len(externalMissingMetrics) == 0 {
return missingMetricQueries, nil, nil
// this means all missing metrics are internal, and since internal metrics
// aren't user-controlled, skip errors/warnings for them since users can't act on them
return missingMetricQueries, "", nil
}
// Classify each missing metric: never-seen -> warning with empty result;
// seen-but-no-data-in-window -> dormant warning.
// Classify each missing metric: never-seen → NotFound error; seen-but-no-
// data-in-window dormant warning.
lastSeenInfo, _ := q.metadataStore.FetchLastSeenInfoMulti(ctx, externalMissingMetrics...)
var nonExistentMetrics []string
var dormantMetrics []string
nonExistentMetrics := []string{}
for _, name := range externalMissingMetrics {
if ts, ok := lastSeenInfo[name]; ok && ts > 0 {
dormantMetrics = append(dormantMetrics, name)
continue
}
nonExistentMetrics = append(nonExistentMetrics, name)
}
var warnings []string
// Never-seen metrics: the query already gets a preseeded empty result
// via the aggregation-dropping path above; we just attach a warning.
if len(nonExistentMetrics) == 1 {
warnings = append(warnings, fmt.Sprintf("metric %s has never been received. Check the metric name and instrumentation", nonExistentMetrics[0]))
} else if len(nonExistentMetrics) > 1 {
warnings = append(warnings, fmt.Sprintf("the following metrics have never been received. Check the metric names and instrumentation: %s", strings.Join(nonExistentMetrics, ", ")))
return nil, "", errors.NewNotFoundf(errors.CodeNotFound, "could not find the metric %s", nonExistentMetrics[0])
}
if len(nonExistentMetrics) > 1 {
return nil, "", errors.NewNotFoundf(errors.CodeNotFound, "the following metrics were not found: %s", strings.Join(nonExistentMetrics, ", "))
}
// Dormant metrics: seen before but no data in the query window.
// All missing metrics are dormant — assemble the warning string.
lastSeenStr := func(name string) string {
if ts, ok := lastSeenInfo[name]; ok && ts > 0 {
ago := humanize.RelTime(time.UnixMilli(ts), time.Now(), "ago", "from now")
@@ -418,16 +417,16 @@ func (q *querier) resolveMetricMetadata(ctx context.Context, queries []qbtypes.Q
}
return name
}
if len(dormantMetrics) == 1 {
warnings = append(warnings, fmt.Sprintf("no data found for the metric %s in the query time range", lastSeenStr(dormantMetrics[0])))
} else if len(dormantMetrics) > 1 {
parts := make([]string, len(dormantMetrics))
for i, m := range dormantMetrics {
if len(externalMissingMetrics) == 1 {
dormantWarning = fmt.Sprintf("no data found for the metric %s in the query time range", lastSeenStr(missingMetrics[0]))
} else {
parts := make([]string, len(externalMissingMetrics))
for i, m := range externalMissingMetrics {
parts[i] = lastSeenStr(m)
}
warnings = append(warnings, fmt.Sprintf("no data found for the following metrics in the query time range: %s", strings.Join(parts, ", ")))
dormantWarning = fmt.Sprintf("no data found for the following metrics in the query time range: %s", strings.Join(parts, ", "))
}
return missingMetricQueries, warnings, nil
return missingMetricQueries, dormantWarning, nil
}
func (q *querier) QueryRawStream(ctx context.Context, orgID valuer.UUID, req *qbtypes.QueryRangeRequest, client *qbtypes.RawStream) {

View File

@@ -37,7 +37,7 @@ func (m *mockMetricStmtBuilder) Build(_ context.Context, _, _ uint64, _ qbtypes.
func TestQueryRange_MetricTypeMissing(t *testing.T) {
// When a metric has UnspecifiedType and is not found in the metadata store,
// the querier should return an empty result with a warning instead of an error.
// the querier should return a not-found error, even if the request provides a temporality
providerSettings := instrumentationtest.New().ToProviderSettings()
metadataStore := telemetrytypestest.NewMockMetadataStore()
@@ -80,14 +80,9 @@ func TestQueryRange_MetricTypeMissing(t *testing.T) {
},
}
resp, err := q.QueryRange(context.Background(), valuer.GenerateUUID(), req)
require.NoError(t, err)
require.NotNil(t, resp)
require.NotNil(t, resp.Warning)
require.Len(t, resp.Warning.Warnings, 1)
assert.Contains(t, resp.Warning.Warnings[0].Message, "unknown_metric")
assert.Contains(t, resp.Warning.Warnings[0].Message, "has never been received")
_, err := q.QueryRange(context.Background(), valuer.GenerateUUID(), req)
require.Error(t, err)
assert.Contains(t, err.Error(), "could not find the metric unknown_metric")
}
func TestQueryRange_MetricTypeFromStore(t *testing.T) {

View File

@@ -101,29 +101,9 @@ func (b *MetricQueryStatementBuilder) Build(
return nil, err
}
var pairFallbackWarnings []string
for _, sel := range keySelectors {
if _, ok := keys[sel.Name]; !ok {
keys[sel.Name] = []*telemetrytypes.TelemetryFieldKey{{
Name: sel.Name,
FieldContext: telemetrytypes.FieldContextAttribute,
FieldDataType: telemetrytypes.FieldDataTypeString,
Signal: telemetrytypes.SignalMetrics,
}}
pairFallbackWarnings = append(pairFallbackWarnings,
fmt.Sprintf("key `%s` not found on metric %s", sel.Name, query.Aggregations[0].MetricName),
)
}
}
start, end = querybuilder.AdjustedMetricTimeRange(start, end, uint64(query.StepInterval.Seconds()), query)
stmt, err := b.buildPipelineStatement(ctx, start, end, query, keys, variables)
if err != nil {
return nil, err
}
stmt.Warnings = append(stmt.Warnings, pairFallbackWarnings...)
return stmt, nil
return b.buildPipelineStatement(ctx, start, end, query, keys, variables)
}
func (b *MetricQueryStatementBuilder) buildPipelineStatement(

View File

@@ -217,39 +217,6 @@ func TestStatementBuilder(t *testing.T) {
},
expectedErr: nil,
},
{
name: "test_missing_key_falls_back_to_labels",
requestType: qbtypes.RequestTypeTimeSeries,
query: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
Signal: telemetrytypes.SignalMetrics,
StepInterval: qbtypes.Step{Duration: 30 * time.Second},
Aggregations: []qbtypes.MetricAggregation{
{
MetricName: "signoz_calls_total",
Type: metrictypes.SumType,
Temporality: metrictypes.Cumulative,
TimeAggregation: metrictypes.TimeAggregationRate,
SpaceAggregation: metrictypes.SpaceAggregationSum,
},
},
Filter: &qbtypes.Filter{
Expression: "k8s.statefulset.name = 'my-statefulset'",
},
GroupBy: []qbtypes.GroupByKey{
{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: "k8s.statefulset.name",
},
},
},
},
expected: qbtypes.Statement{
Query: "WITH __temporal_aggregation_cte AS (SELECT ts, `k8s.statefulset.name`, multiIf(row_number() OVER rate_window = 1, nan, (per_series_value - lagInFrame(per_series_value, 1) OVER rate_window) < 0, per_series_value / (ts - lagInFrame(ts, 1) OVER rate_window), (per_series_value - lagInFrame(per_series_value, 1) OVER rate_window) / (ts - lagInFrame(ts, 1) OVER rate_window)) AS per_series_value FROM (SELECT fingerprint, toStartOfInterval(toDateTime(intDiv(unix_milli, 1000)), toIntervalSecond(30)) AS ts, `k8s.statefulset.name`, max(value) AS per_series_value FROM signoz_metrics.distributed_samples_v4 AS points INNER JOIN (SELECT fingerprint, JSONExtractString(labels, 'k8s.statefulset.name') AS `k8s.statefulset.name` FROM signoz_metrics.time_series_v4_6hrs WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli <= ? AND LOWER(temporality) LIKE LOWER(?) AND __normalized = ? AND JSONExtractString(labels, 'k8s.statefulset.name') = ? GROUP BY fingerprint, `k8s.statefulset.name`) AS filtered_time_series ON points.fingerprint = filtered_time_series.fingerprint WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli < ? GROUP BY fingerprint, ts, `k8s.statefulset.name` ORDER BY fingerprint, ts) WINDOW rate_window AS (PARTITION BY fingerprint ORDER BY fingerprint, ts)), __spatial_aggregation_cte AS (SELECT ts, `k8s.statefulset.name`, sum(per_series_value) AS value FROM __temporal_aggregation_cte WHERE isNaN(per_series_value) = ? GROUP BY ts, `k8s.statefulset.name`) SELECT * FROM __spatial_aggregation_cte ORDER BY `k8s.statefulset.name`, ts",
Args: []any{"signoz_calls_total", uint64(1747936800000), uint64(1747983420000), "cumulative", false, "my-statefulset", "signoz_calls_total", uint64(1747947360000), uint64(1747983420000), 0},
Warnings: []string{"key `k8s.statefulset.name` not found on metric signoz_calls_total"},
},
expectedErr: nil,
},
}
fm := NewFieldMapper()

View File

@@ -338,6 +338,7 @@ func isValidLabelValue(v string) bool {
// validate runs during UnmarshalJSON (read + write path).
// Preserves the original pre-existing checks only so that stored rules
// continue to load without errors.
// TODO(srikanthccv): remove this once v1 is deprecated and removed.
func (r *PostableRule) validate() error {
var errs []error
@@ -366,9 +367,13 @@ func (r *PostableRule) validate() error {
errs = append(errs, testTemplateParsing(r)...)
joined := errors.Join(errs...)
if joined != nil {
return errors.WrapInvalidInputf(joined, errors.CodeInvalidInput, "validation failed")
if len(errs) > 0 {
messages := make([]string, len(errs))
for i, e := range errs {
messages[i] = e.Error()
}
return errors.NewInvalidInputf(errors.CodeInvalidInput, "alert rule definition is not valid").
WithAdditional(messages...)
}
return nil
}
@@ -466,9 +471,13 @@ func (r *PostableRule) Validate() error {
errs = append(errs, testTemplateParsing(r)...)
joined := errors.Join(errs...)
if joined != nil {
return errors.WrapInvalidInputf(joined, errors.CodeInvalidInput, "validation failed")
if len(errs) > 0 {
messages := make([]string, len(errs))
for i, e := range errs {
messages[i] = e.Error()
}
return errors.NewInvalidInputf(errors.CodeInvalidInput, "alert rule is not valid").
WithAdditional(messages...)
}
return nil
}

View File

@@ -4,8 +4,23 @@ import (
"encoding/json"
"strings"
"testing"
"github.com/SigNoz/signoz/pkg/errors"
)
func errorContains(err error, substr string) bool {
j := errors.AsJSON(err)
if strings.Contains(j.Message, substr) {
return true
}
for _, e := range j.Errors {
if strings.Contains(e.Message, substr) {
return true
}
}
return false
}
// validV1Builder returns a minimal valid v1 builder rule JSON.
func validV1Builder() string {
return `{
@@ -494,7 +509,7 @@ func TestValidate_PostableRule_Common(t *testing.T) {
if tt.wantErr {
if err == nil {
t.Errorf("expected error containing %q, got nil", tt.errSubstr)
} else if tt.errSubstr != "" && !strings.Contains(err.Error(), tt.errSubstr) {
} else if tt.errSubstr != "" && !errorContains(err, tt.errSubstr) {
t.Errorf("expected error containing %q, got: %v", tt.errSubstr, err)
}
} else {
@@ -687,7 +702,7 @@ func TestValidate_V1_ConditionFields(t *testing.T) {
if tt.wantErr {
if validateErr == nil {
t.Errorf("expected Validate() error containing %q, got nil", tt.errSubstr)
} else if tt.errSubstr != "" && !strings.Contains(validateErr.Error(), tt.errSubstr) {
} else if tt.errSubstr != "" && !errorContains(validateErr, tt.errSubstr) {
t.Errorf("expected error containing %q, got: %v", tt.errSubstr, validateErr)
}
} else {
@@ -1029,7 +1044,7 @@ func TestValidate_V2Alpha1(t *testing.T) {
if tt.wantErr {
if err == nil {
t.Errorf("expected error containing %q, got nil", tt.errSubstr)
} else if tt.errSubstr != "" && !strings.Contains(err.Error(), tt.errSubstr) {
} else if tt.errSubstr != "" && !errorContains(err, tt.errSubstr) {
t.Errorf("expected error containing %q, got: %v", tt.errSubstr, err)
}
} else {
@@ -1337,7 +1352,7 @@ func TestValidate_MultipleErrors(t *testing.T) {
t.Fatal("expected unmarshal error for wrong version")
}
// The error should mention version
if !strings.Contains(err.Error(), "version") {
if !errorContains(err, "version") {
t.Errorf("expected error to mention version, got: %v", err)
}
})
@@ -1355,10 +1370,9 @@ func TestValidate_MultipleErrors(t *testing.T) {
if validateErr == nil {
t.Fatal("expected Validate() error")
}
errStr := validateErr.Error()
// Should contain errors for thresholds, evaluation, notificationSettings
for _, substr := range []string{"evaluation", "notificationSettings"} {
if !strings.Contains(errStr, substr) {
if !errorContains(validateErr, substr) {
t.Errorf("expected error to mention %q, got: %v", substr, validateErr)
}
}
@@ -1469,7 +1483,7 @@ func TestValidate_V2Alpha1_CumulativeEvaluation(t *testing.T) {
if tt.wantErr {
if err == nil {
t.Errorf("expected error containing %q, got nil", tt.errSubstr)
} else if !strings.Contains(err.Error(), tt.errSubstr) {
} else if !errorContains(err, tt.errSubstr) {
t.Errorf("expected error containing %q, got: %v", tt.errSubstr, err)
}
} else if err != nil {

View File

@@ -614,7 +614,7 @@ def test_histogram_p90_returns_warning_outside_data_window(
assert warnings[0]["message"].startswith(f"no data found for the metric {metric_name}")
def test_non_existent_metrics_returns_warning(
def test_non_existent_metrics_returns_404(
signoz: types.SigNoz,
create_user_admin: None, # pylint: disable=unused-argument
get_token: Callable[[str, str], str],
@@ -635,11 +635,9 @@ def test_non_existent_metrics_returns_warning(
start_2h = int((now - timedelta(hours=2)).timestamp() * 1000)
response = make_query_request(signoz, token, start_2h, end_ms, [query])
assert response.status_code == HTTPStatus.OK
assert response.status_code == HTTPStatus.NOT_FOUND
data = response.json()
warnings = get_all_warnings(data)
assert any("whatevergoennnsgoeshere" in w["message"] and "has never been received" in w["message"] for w in warnings), f"expected never-seen metric warning, got: {warnings}"
assert get_error_message(response.json()) == "could not find the metric whatevergoennnsgoeshere"
def test_non_existent_internal_metrics_returns_no_warning(