Compare commits

..

3 Commits

Author SHA1 Message Date
Ashwin Bhatkal
446cfd07f9 test: add tests for decodeURIComponent fallback in useVariablesFromUrl 2026-03-24 21:27:03 +05:30
Ashwin Bhatkal
fc5a9e3d51 fix: fallback to raw param if decodeURIComponent fails for dashboard variables 2026-03-24 20:05:39 +05:30
Pandey
234716df53 fix(querier): return proper HTTP status for PromQL timeout errors (#10689)
Some checks are pending
build-staging / prepare (push) Waiting to run
build-staging / js-build (push) Blocked by required conditions
build-staging / go-build (push) Blocked by required conditions
build-staging / staging (push) Blocked by required conditions
Release Drafter / update_release_draft (push) Waiting to run
* fix(querier): return proper HTTP status for PromQL timeout errors

PromQL queries hitting the context deadline were incorrectly returning
400 Bad Request with "invalid_input" because enhancePromQLError
unconditionally wrapped all errors as TypeInvalidInput. Extract
tryEnhancePromQLExecError to properly classify timeout, cancellation,
and storage errors before falling through to parse error handling.

Also make the PromQL engine timeout configurable via prometheus.timeout
config (default 2m) instead of hardcoding it.

* chore: refactor files

* fix(prometheus): validate timeout config and fix test setups

Add validation in prometheus.Config to reject zero timeout. Update all
test files to explicitly set Timeout: 2 * time.Minute in prometheus.Config
literals to avoid immediate query timeouts.
2026-03-24 13:32:45 +00:00
16 changed files with 110 additions and 45 deletions

View File

@@ -144,6 +144,8 @@ telemetrystore:
##################### Prometheus #####################
prometheus:
# The maximum time a PromQL query is allowed to run before being aborted.
timeout: 2m
active_query_tracker:
# Whether to enable the active query tracker.
enabled: true

View File

@@ -257,7 +257,7 @@ func TestManager_TestNotification_SendUnmatched_PromRule(t *testing.T) {
WillReturnRows(samplesRows)
// Create Prometheus provider for this test
promProvider = prometheustest.New(context.Background(), instrumentationtest.New().ToProviderSettings(), prometheus.Config{}, store)
promProvider = prometheustest.New(context.Background(), instrumentationtest.New().ToProviderSettings(), prometheus.Config{Timeout: 2 * time.Minute}, store)
},
ManagerOptionsHook: func(opts *rules.ManagerOptions) {
// Set Prometheus provider for PromQL queries

View File

@@ -226,6 +226,41 @@ describe('useVariablesFromUrl', () => {
expect(urlVariables.undefinedVar).toBeUndefined();
});
it('should return empty object when URL param contains a bare % that makes decodeURIComponent throw', () => {
// Simulate a URL where the variables param is a raw JSON string containing a literal %
// (e.g. a metric value like "cpu_usage_50%"). URLSearchParams.get() returns the value
// as-is when it was set directly; if it contains a bare %, decodeURIComponent throws a URIError.
const rawJson = JSON.stringify({ threshold: '50%' });
const history = createMemoryHistory({
initialEntries: [`/?${QueryParams.variables}=${rawJson}`],
});
const { result } = renderHook(() => useVariablesFromUrl(), {
wrapper: ({ children }: { children: React.ReactNode }) => (
<Router history={history}>{children}</Router>
),
});
// Should parse successfully via the raw fallback, not throw or return {}
const vars = result.current.getUrlVariables();
expect(vars).toEqual({ threshold: '50%' });
});
it('should return empty object when URL param is completely unparseable', () => {
// A value that fails both decodeURIComponent and JSON.parse
const history = createMemoryHistory({
initialEntries: [`/?${QueryParams.variables}=not-json-%ZZ`],
});
const { result } = renderHook(() => useVariablesFromUrl(), {
wrapper: ({ children }: { children: React.ReactNode }) => (
<Router history={history}>{children}</Router>
),
});
expect(result.current.getUrlVariables()).toEqual({});
});
it('should update variables with array values correctly', () => {
const history = createMemoryHistory({
initialEntries: ['/'],

View File

@@ -32,7 +32,14 @@ const useVariablesFromUrl = (): UseVariablesFromUrlReturn => {
}
try {
return JSON.parse(decodeURIComponent(variablesParam));
const decoded = ((): string => {
try {
return decodeURIComponent(variablesParam);
} catch {
return variablesParam;
}
})();
return JSON.parse(decoded);
} catch (error) {
Sentry.captureEvent({
message: `Failed to parse dashboard variables from URL: ${error}`,

View File

@@ -3,6 +3,7 @@ package prometheus
import (
"time"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/factory"
)
@@ -20,6 +21,9 @@ type Config struct {
//
// If not set, the prometheus default is used (currently 5m).
LookbackDelta time.Duration `mapstructure:"lookback_delta"`
// Timeout is the maximum time a query is allowed to run before being aborted.
Timeout time.Duration `mapstructure:"timeout"`
}
func NewConfigFactory() factory.ConfigFactory {
@@ -33,10 +37,14 @@ func newConfig() factory.Config {
Path: "",
MaxConcurrent: 20,
},
Timeout: 2 * time.Minute,
}
}
func (c Config) Validate() error {
if c.Timeout <= 0 {
return errors.Newf(errors.TypeInvalidInput, errors.CodeInvalidInput, "prometheus::timeout must be greater than 0")
}
return nil
}

View File

@@ -1,3 +0,0 @@
package prometheus
const FingerprintAsPromLabelName = "fingerprint"

View File

@@ -2,7 +2,6 @@ package prometheus
import (
"log/slog"
"time"
"github.com/prometheus/prometheus/promql"
)
@@ -21,7 +20,7 @@ func NewEngine(logger *slog.Logger, cfg Config) *Engine {
Logger: logger,
Reg: nil,
MaxSamples: 5_0000_000,
Timeout: 2 * time.Minute,
Timeout: cfg.Timeout,
ActiveQueryTracker: activeQueryTracker,
LookbackDelta: cfg.LookbackDelta,
})

View File

@@ -6,6 +6,8 @@ import (
"github.com/prometheus/prometheus/promql"
)
const FingerprintAsPromLabelName string = "fingerprint"
func RemoveExtraLabels(res *promql.Result, labelsToRemove ...string) error {
if len(labelsToRemove) == 0 || res == nil {
return nil

View File

@@ -36,6 +36,28 @@ var unquotedDottedNamePattern = regexp.MustCompile(`(?:^|[{,(\s])([a-zA-Z_][a-zA
// This is a common mistake when migrating to UTF-8 syntax.
var quotedMetricOutsideBracesPattern = regexp.MustCompile(`"([^"]+)"\s*\{`)
// tryEnhancePromQLExecError attempts to convert a PromQL execution error into
// a properly typed error. Returns nil if the error is not a recognized execution error.
func tryEnhancePromQLExecError(execErr error) error {
var eqc promql.ErrQueryCanceled
var eqt promql.ErrQueryTimeout
var es promql.ErrStorage
switch {
case errors.As(execErr, &eqc):
return errors.Newf(errors.TypeCanceled, errors.CodeCanceled, "query canceled").WithAdditional(eqc.Error())
case errors.As(execErr, &eqt):
return errors.Newf(errors.TypeTimeout, errors.CodeTimeout, "query timed out").WithAdditional(eqt.Error())
case errors.Is(execErr, context.DeadlineExceeded):
return errors.Newf(errors.TypeTimeout, errors.CodeTimeout, "query timed out")
case errors.Is(execErr, context.Canceled):
return errors.Newf(errors.TypeCanceled, errors.CodeCanceled, "query canceled")
case errors.As(execErr, &es):
return errors.Newf(errors.TypeInternal, errors.CodeInternal, "query execution error: %v", execErr)
default:
return nil
}
}
// enhancePromQLError adds helpful context to PromQL parse errors,
// particularly for UTF-8 syntax migration issues where metric and label
// names containing dots need to be quoted.
@@ -213,27 +235,20 @@ func (q *promqlQuery) Execute(ctx context.Context) (*qbv5.Result, error) {
time.Unix(0, end),
q.query.Step.Duration,
)
if err != nil {
// NewRangeQuery can fail with execution errors (e.g. context deadline exceeded)
// during the query queue/scheduling stage, not just parse errors.
if err := tryEnhancePromQLExecError(err); err != nil {
return nil, err
}
return nil, enhancePromQLError(query, err)
}
res := qry.Exec(ctx)
if res.Err != nil {
var eqc promql.ErrQueryCanceled
var eqt promql.ErrQueryTimeout
var es promql.ErrStorage
switch {
case errors.As(res.Err, &eqc):
return nil, errors.Newf(errors.TypeCanceled, errors.CodeCanceled, "query canceled")
case errors.As(res.Err, &eqt):
return nil, errors.Newf(errors.TypeTimeout, errors.CodeTimeout, "query timeout")
case errors.As(res.Err, &es):
return nil, errors.Newf(errors.TypeInternal, errors.CodeInternal, "query execution error: %v", res.Err)
}
if errors.Is(res.Err, context.Canceled) {
return nil, errors.Newf(errors.TypeCanceled, errors.CodeCanceled, "query canceled")
if err := tryEnhancePromQLExecError(res.Err); err != nil {
return nil, err
}
return nil, errors.Newf(errors.TypeInternal, errors.CodeInternal, "query execution error: %v", res.Err)

View File

@@ -1407,7 +1407,7 @@ func Test_querier_Traces_runWindowBasedListQueryDesc(t *testing.T) {
slog.Default(),
nil,
telemetryStore,
prometheustest.New(context.Background(), instrumentationtest.New().ToProviderSettings(), prometheus.Config{}, telemetryStore),
prometheustest.New(context.Background(), instrumentationtest.New().ToProviderSettings(), prometheus.Config{Timeout: 2 * time.Minute}, telemetryStore),
"",
time.Duration(time.Second),
nil,
@@ -1633,7 +1633,7 @@ func Test_querier_Traces_runWindowBasedListQueryAsc(t *testing.T) {
slog.Default(),
nil,
telemetryStore,
prometheustest.New(context.Background(), instrumentationtest.New().ToProviderSettings(), prometheus.Config{}, telemetryStore),
prometheustest.New(context.Background(), instrumentationtest.New().ToProviderSettings(), prometheus.Config{Timeout: 2 * time.Minute}, telemetryStore),
"",
time.Duration(time.Second),
nil,
@@ -1934,7 +1934,7 @@ func Test_querier_Logs_runWindowBasedListQueryDesc(t *testing.T) {
slog.Default(),
nil,
telemetryStore,
prometheustest.New(context.Background(), instrumentationtest.New().ToProviderSettings(), prometheus.Config{}, telemetryStore),
prometheustest.New(context.Background(), instrumentationtest.New().ToProviderSettings(), prometheus.Config{Timeout: 2 * time.Minute}, telemetryStore),
"",
time.Duration(time.Second),
nil,
@@ -2162,7 +2162,7 @@ func Test_querier_Logs_runWindowBasedListQueryAsc(t *testing.T) {
slog.Default(),
nil,
telemetryStore,
prometheustest.New(context.Background(), instrumentationtest.New().ToProviderSettings(), prometheus.Config{}, telemetryStore),
prometheustest.New(context.Background(), instrumentationtest.New().ToProviderSettings(), prometheus.Config{Timeout: 2 * time.Minute}, telemetryStore),
"",
time.Duration(time.Second),
nil,

View File

@@ -1459,7 +1459,7 @@ func Test_querier_Traces_runWindowBasedListQueryDesc(t *testing.T) {
slog.Default(),
nil,
telemetryStore,
prometheustest.New(context.Background(), instrumentationtest.New().ToProviderSettings(), prometheus.Config{}, telemetryStore),
prometheustest.New(context.Background(), instrumentationtest.New().ToProviderSettings(), prometheus.Config{Timeout: 2 * time.Minute}, telemetryStore),
"",
time.Duration(time.Second),
nil,
@@ -1685,7 +1685,7 @@ func Test_querier_Traces_runWindowBasedListQueryAsc(t *testing.T) {
slog.Default(),
nil,
telemetryStore,
prometheustest.New(context.Background(), instrumentationtest.New().ToProviderSettings(), prometheus.Config{}, telemetryStore),
prometheustest.New(context.Background(), instrumentationtest.New().ToProviderSettings(), prometheus.Config{Timeout: 2 * time.Minute}, telemetryStore),
"",
time.Duration(time.Second),
nil,
@@ -1985,7 +1985,7 @@ func Test_querier_Logs_runWindowBasedListQueryDesc(t *testing.T) {
slog.Default(),
nil,
telemetryStore,
prometheustest.New(context.Background(), instrumentationtest.New().ToProviderSettings(), prometheus.Config{}, telemetryStore),
prometheustest.New(context.Background(), instrumentationtest.New().ToProviderSettings(), prometheus.Config{Timeout: 2 * time.Minute}, telemetryStore),
"",
time.Duration(time.Second),
nil,
@@ -2213,7 +2213,7 @@ func Test_querier_Logs_runWindowBasedListQueryAsc(t *testing.T) {
slog.Default(),
nil,
telemetryStore,
prometheustest.New(context.Background(), instrumentationtest.New().ToProviderSettings(), prometheus.Config{}, telemetryStore),
prometheustest.New(context.Background(), instrumentationtest.New().ToProviderSettings(), prometheus.Config{Timeout: 2 * time.Minute}, telemetryStore),
"",
time.Duration(time.Second),
nil,

View File

@@ -698,7 +698,7 @@ func TestBaseRule_FilterNewSeries(t *testing.T) {
slog.Default(),
nil,
telemetryStore,
prometheustest.New(context.Background(), settings, prometheus.Config{}, telemetryStore),
prometheustest.New(context.Background(), settings, prometheus.Config{Timeout: 2 * time.Minute}, telemetryStore),
"",
time.Second,
nil,

View File

@@ -253,7 +253,7 @@ func TestManager_TestNotification_SendUnmatched_PromRule(t *testing.T) {
WillReturnRows(samplesRows)
// Create Prometheus provider for this test
promProvider = prometheustest.New(context.Background(), instrumentationtest.New().ToProviderSettings(), prometheus.Config{}, store)
promProvider = prometheustest.New(context.Background(), instrumentationtest.New().ToProviderSettings(), prometheus.Config{Timeout: 2 * time.Minute}, store)
},
ManagerOptionsHook: func(opts *ManagerOptions) {
// Set Prometheus provider for PromQL queries

View File

@@ -99,7 +99,7 @@ func NewTestManager(t *testing.T, testOpts *TestManagerOptions) *Manager {
options := clickhouseReader.NewOptions("", "", "archiveNamespace")
providerSettings := instrumentationtest.New().ToProviderSettings()
prometheus := prometheustest.New(context.Background(), providerSettings, prometheus.Config{}, telemetryStore)
prometheus := prometheustest.New(context.Background(), providerSettings, prometheus.Config{Timeout: 2 * time.Minute}, telemetryStore)
reader := clickhouseReader.NewReader(
instrumentationtest.New().Logger(),
nil,

View File

@@ -940,7 +940,7 @@ func TestPromRuleUnitCombinations(t *testing.T) {
).
WillReturnRows(samplesRows)
promProvider := prometheustest.New(context.Background(), instrumentationtest.New().ToProviderSettings(), prometheus.Config{}, telemetryStore)
promProvider := prometheustest.New(context.Background(), instrumentationtest.New().ToProviderSettings(), prometheus.Config{Timeout: 2 * time.Minute}, telemetryStore)
postableRule.RuleCondition.CompareOp = ruletypes.CompareOp(c.compareOp)
postableRule.RuleCondition.MatchType = ruletypes.MatchType(c.matchType)
@@ -1061,7 +1061,7 @@ func _Enable_this_after_9146_issue_fix_is_merged_TestPromRuleNoData(t *testing.T
WithArgs("test_metric", "__name__", "test_metric").
WillReturnRows(fingerprintRows)
promProvider := prometheustest.New(context.Background(), instrumentationtest.New().ToProviderSettings(), prometheus.Config{}, telemetryStore)
promProvider := prometheustest.New(context.Background(), instrumentationtest.New().ToProviderSettings(), prometheus.Config{Timeout: 2 * time.Minute}, telemetryStore)
var target float64 = 0
postableRule.RuleCondition.Thresholds = &ruletypes.RuleThresholdData{
@@ -1281,7 +1281,7 @@ func TestMultipleThresholdPromRule(t *testing.T) {
).
WillReturnRows(samplesRows)
promProvider := prometheustest.New(context.Background(), instrumentationtest.New().ToProviderSettings(), prometheus.Config{}, telemetryStore)
promProvider := prometheustest.New(context.Background(), instrumentationtest.New().ToProviderSettings(), prometheus.Config{Timeout: 2 * time.Minute}, telemetryStore)
postableRule.RuleCondition.CompareOp = ruletypes.CompareOp(c.compareOp)
postableRule.RuleCondition.MatchType = ruletypes.MatchType(c.matchType)
@@ -1441,7 +1441,7 @@ func TestPromRule_NoData(t *testing.T) {
promProvider := prometheustest.New(
context.Background(),
instrumentationtest.New().ToProviderSettings(),
prometheus.Config{},
prometheus.Config{Timeout: 2 * time.Minute},
telemetryStore,
)
defer func() {
@@ -1590,7 +1590,7 @@ func TestPromRule_NoData_AbsentFor(t *testing.T) {
promProvider := prometheustest.New(
context.Background(),
instrumentationtest.New().ToProviderSettings(),
prometheus.Config{},
prometheus.Config{Timeout: 2 * time.Minute},
telemetryStore,
)
defer func() {
@@ -1748,7 +1748,7 @@ func TestPromRuleEval_RequireMinPoints(t *testing.T) {
promProvider := prometheustest.New(
context.Background(),
instrumentationtest.New().ToProviderSettings(),
prometheus.Config{LookbackDelta: lookBackDelta},
prometheus.Config{Timeout: 2 * time.Minute, LookbackDelta: lookBackDelta},
telemetryStore,
)
defer func() {

View File

@@ -779,7 +779,7 @@ func TestThresholdRuleUnitCombinations(t *testing.T) {
},
)
require.NoError(t, err)
reader := clickhouseReader.NewReader(slog.Default(), nil, telemetryStore, prometheustest.New(context.Background(), instrumentationtest.New().ToProviderSettings(), prometheus.Config{}, telemetryStore), "", time.Duration(time.Second), nil, readerCache, options)
reader := clickhouseReader.NewReader(slog.Default(), nil, telemetryStore, prometheustest.New(context.Background(), instrumentationtest.New().ToProviderSettings(), prometheus.Config{Timeout: 2 * time.Minute}, telemetryStore), "", time.Duration(time.Second), nil, readerCache, options)
rule, err := NewThresholdRule("69", valuer.GenerateUUID(), &postableRule, reader, nil, logger)
rule.TemporalityMap = map[string]map[v3.Temporality]bool{
"signoz_calls_total": {
@@ -894,7 +894,7 @@ func TestThresholdRuleNoData(t *testing.T) {
)
assert.NoError(t, err)
options := clickhouseReader.NewOptions("", "", "archiveNamespace")
reader := clickhouseReader.NewReader(slog.Default(), nil, telemetryStore, prometheustest.New(context.Background(), instrumentationtest.New().ToProviderSettings(), prometheus.Config{}, telemetryStore), "", time.Duration(time.Second), nil, readerCache, options)
reader := clickhouseReader.NewReader(slog.Default(), nil, telemetryStore, prometheustest.New(context.Background(), instrumentationtest.New().ToProviderSettings(), prometheus.Config{Timeout: 2 * time.Minute}, telemetryStore), "", time.Duration(time.Second), nil, readerCache, options)
rule, err := NewThresholdRule("69", valuer.GenerateUUID(), &postableRule, reader, nil, logger)
rule.TemporalityMap = map[string]map[v3.Temporality]bool{
@@ -1014,7 +1014,7 @@ func TestThresholdRuleTracesLink(t *testing.T) {
}
options := clickhouseReader.NewOptions("", "", "archiveNamespace")
reader := clickhouseReader.NewReader(slog.Default(), nil, telemetryStore, prometheustest.New(context.Background(), instrumentationtest.New().ToProviderSettings(), prometheus.Config{}, telemetryStore), "", time.Duration(time.Second), nil, nil, options)
reader := clickhouseReader.NewReader(slog.Default(), nil, telemetryStore, prometheustest.New(context.Background(), instrumentationtest.New().ToProviderSettings(), prometheus.Config{Timeout: 2 * time.Minute}, telemetryStore), "", time.Duration(time.Second), nil, nil, options)
rule, err := NewThresholdRule("69", valuer.GenerateUUID(), &postableRule, reader, nil, logger)
rule.TemporalityMap = map[string]map[v3.Temporality]bool{
@@ -1151,7 +1151,7 @@ func TestThresholdRuleLogsLink(t *testing.T) {
}
options := clickhouseReader.NewOptions("", "", "archiveNamespace")
reader := clickhouseReader.NewReader(slog.Default(), nil, telemetryStore, prometheustest.New(context.Background(), instrumentationtest.New().ToProviderSettings(), prometheus.Config{}, telemetryStore), "", time.Duration(time.Second), nil, nil, options)
reader := clickhouseReader.NewReader(slog.Default(), nil, telemetryStore, prometheustest.New(context.Background(), instrumentationtest.New().ToProviderSettings(), prometheus.Config{Timeout: 2 * time.Minute}, telemetryStore), "", time.Duration(time.Second), nil, nil, options)
rule, err := NewThresholdRule("69", valuer.GenerateUUID(), &postableRule, reader, nil, logger)
rule.TemporalityMap = map[string]map[v3.Temporality]bool{
@@ -1418,7 +1418,7 @@ func TestMultipleThresholdRule(t *testing.T) {
},
)
require.NoError(t, err)
reader := clickhouseReader.NewReader(slog.Default(), nil, telemetryStore, prometheustest.New(context.Background(), instrumentationtest.New().ToProviderSettings(), prometheus.Config{}, telemetryStore), "", time.Second, nil, readerCache, options)
reader := clickhouseReader.NewReader(slog.Default(), nil, telemetryStore, prometheustest.New(context.Background(), instrumentationtest.New().ToProviderSettings(), prometheus.Config{Timeout: 2 * time.Minute}, telemetryStore), "", time.Second, nil, readerCache, options)
rule, err := NewThresholdRule("69", valuer.GenerateUUID(), &postableRule, reader, nil, logger)
rule.TemporalityMap = map[string]map[v3.Temporality]bool{
"signoz_calls_total": {
@@ -2220,7 +2220,7 @@ func TestThresholdEval_RequireMinPoints(t *testing.T) {
)
require.NoError(t, err)
prometheusProvider := prometheustest.New(context.Background(), instrumentationtest.New().ToProviderSettings(), prometheus.Config{}, telemetryStore)
prometheusProvider := prometheustest.New(context.Background(), instrumentationtest.New().ToProviderSettings(), prometheus.Config{Timeout: 2 * time.Minute}, telemetryStore)
reader := clickhouseReader.NewReader(slog.Default(), nil, telemetryStore, prometheusProvider, "", time.Second, nil, readerCache, options)
rule, err := NewThresholdRule("some-id", valuer.GenerateUUID(), &postableRule, reader, nil, logger)