Compare commits

...

1 Commits

Author SHA1 Message Date
Tushar Vats
c8c050053c fix: draft 2026-06-11 01:21:25 +05:30
18 changed files with 1211 additions and 5 deletions

View File

@@ -101,6 +101,10 @@ func (h *handler) QueryRange(rw http.ResponseWriter, req *http.Request) {
h.community.QueryRange(rw, req)
}
func (h *handler) QueryRangePreview(rw http.ResponseWriter, req *http.Request) {
h.community.QueryRangePreview(rw, req)
}
func (h *handler) QueryRawStream(rw http.ResponseWriter, req *http.Request) {
h.community.QueryRawStream(rw, req)
}

2
go.mod
View File

@@ -180,7 +180,7 @@ require (
github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.13.1 // indirect
github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2 // indirect
github.com/AzureAD/microsoft-authentication-library-for-go v1.6.0 // indirect
github.com/ClickHouse/ch-go v0.71.0 // indirect
github.com/ClickHouse/ch-go v0.71.0
github.com/Masterminds/squirrel v1.5.4 // indirect
github.com/Yiling-J/theine-go v0.6.2 // indirect
github.com/alecthomas/units v0.0.0-20240927000941-0f3dac36c52b

View File

@@ -451,6 +451,23 @@ func (provider *provider) addQuerierRoutes(router *mux.Router) error {
return err
}
if err := router.Handle("/api/v5/query_range/dry_run", handler.New(provider.authzMiddleware.ViewAccess(provider.querierHandler.QueryRangePreview), handler.OpenAPIDef{
ID: "QueryRangeDryRunV5",
Tags: []string{"querier"},
Summary: "Query range dry run",
Description: "Validate a composite query without executing it. Accepts the same payload as the query range endpoint. By default returns a lightweight per-query verdict (valid/error/warnings) plus a top-level score (0-100 granule-skip selectivity; higher is better). Pass ?verbose=true to also include the rendered underlying ClickHouse statement(s) for each query (each carrying its own granuleSkipScore). Pass ?explain=plan|estimate to attach the corresponding ClickHouse EXPLAIN output to each statement (implies verbose). Pass ?score=false to skip the score for the cheapest validation-only preview. Intended for agentic/dry-run consumption: per-query errors are reported in the response rather than failing the whole request.",
Request: new(qbtypes.QueryRangeRequest),
RequestQuery: new(qbtypes.QueryRangePreviewParams),
RequestContentType: "application/json",
Response: new(qbtypes.QueryRangePreviewResponse),
ResponseContentType: "application/json",
SuccessStatusCode: http.StatusOK,
ErrorStatusCodes: []int{http.StatusBadRequest},
SecuritySchemes: newSecuritySchemes(types.RoleViewer),
})).Methods(http.MethodPost).GetError(); err != nil {
return err
}
if err := router.Handle("/api/v5/substitute_vars", handler.New(provider.authzMiddleware.ViewAccess(provider.querierHandler.ReplaceVariables), handler.OpenAPIDef{
ID: "ReplaceVariables",
Tags: []string{"querier"},

View File

@@ -0,0 +1,94 @@
package clickhouseprometheus
import (
"context"
"sync"
"github.com/SigNoz/signoz/pkg/prometheus"
"github.com/prometheus/prometheus/prompb"
"github.com/prometheus/prometheus/storage"
)
// statementRecorder collects the ClickHouse statements a PromQL evaluation would
// run. It is safe for concurrent use because the Prometheus engine may evaluate
// (and therefore Select) multiple selectors concurrently.
type statementRecorder struct {
mu sync.Mutex
statements []prometheus.CapturedStatement
}
func (r *statementRecorder) record(query string, args []any) {
r.mu.Lock()
defer r.mu.Unlock()
r.statements = append(r.statements, prometheus.CapturedStatement{Query: query, Args: args})
}
func (r *statementRecorder) Statements() []prometheus.CapturedStatement {
r.mu.Lock()
defer r.mu.Unlock()
out := make([]prometheus.CapturedStatement, len(r.statements))
copy(out, r.statements)
return out
}
// captureClient is a remote.ReadClient that builds the same ClickHouse SQL as
// the real client but records it instead of executing, returning an empty
// result so the engine completes without touching ClickHouse. It records the
// self-contained samples query per selector (which embeds the series-selection
// subquery), so the recorded statement reflects the actual data read.
type captureClient struct {
*client
recorder *statementRecorder
}
func (c *captureClient) Read(ctx context.Context, query *prompb.Query, _ bool) (storage.SeriesSet, error) {
// Raw-SQL passthrough ({job="rawsql", query="..."}): record the raw query.
if len(query.Matchers) == 2 {
var hasJob bool
var queryString string
for _, m := range query.Matchers {
if m.Type == prompb.LabelMatcher_EQ && m.Name == "job" && m.Value == "rawsql" {
hasJob = true
}
if m.Type == prompb.LabelMatcher_EQ && m.Name == "query" {
queryString = m.Value
}
}
if hasJob && queryString != "" {
c.recorder.record(queryString, nil)
return storage.EmptySeriesSet(), nil
}
}
var metricName string
for _, matcher := range query.Matchers {
if matcher.Name == "__name__" {
metricName = matcher.Value
}
}
// Build the series-selection subquery and the self-contained samples query
// exactly as the executing path would, but only record them.
subQuery, args, err := c.client.queryToClickhouseQuery(ctx, query, metricName, true)
if err != nil {
return nil, err
}
samplesQuery, samplesArgs := buildSamplesQuery(int64(query.StartTimestampMs), int64(query.EndTimestampMs), metricName, subQuery, args)
c.recorder.record(samplesQuery, samplesArgs)
return storage.EmptySeriesSet(), nil
}
// captureQueryable adapts the capturing read client to storage.Queryable,
// mirroring how the real provider wraps its querier.
type captureQueryable struct {
inner storage.SampleAndChunkQueryable
}
func (c captureQueryable) Querier(mint, maxt int64) (storage.Querier, error) {
querier, err := c.inner.Querier(mint, maxt)
if err != nil {
return nil, err
}
return storage.NewMergeQuerier(nil, []storage.Querier{querier}, storage.ChainedSeriesMerge), nil
}

View File

@@ -204,8 +204,11 @@ func (client *client) getFingerprintsFromClickhouseQuery(ctx context.Context, qu
return fingerprints, nil
}
func (client *client) querySamples(ctx context.Context, start int64, end int64, fingerprints map[uint64][]prompb.Label, metricName string, subQuery string, args []any) ([]*prompb.TimeSeries, error) {
ctx = client.withClickhousePrometheusContext(ctx, "querySamples")
// buildSamplesQuery renders the samples SQL (and its args) that fetches the
// data points for the series selected by subQuery. It embeds the series
// selection as a subquery, so the returned statement is self-contained — the
// dry-run/preview path renders it without executing.
func buildSamplesQuery(start int64, end int64, metricName string, subQuery string, args []any) (string, []any) {
argCount := len(args)
query := fmt.Sprintf(`
@@ -217,6 +220,13 @@ func (client *client) querySamples(ctx context.Context, start int64, end int64,
allArgs := append([]any{metricName}, args...)
allArgs = append(allArgs, start, end)
return query, allArgs
}
func (client *client) querySamples(ctx context.Context, start int64, end int64, fingerprints map[uint64][]prompb.Label, metricName string, subQuery string, args []any) ([]*prompb.TimeSeries, error) {
ctx = client.withClickhousePrometheusContext(ctx, "querySamples")
query, allArgs := buildSamplesQuery(start, end, metricName, subQuery, args)
rows, err := client.telemetryStore.ClickhouseDB().Query(ctx, query, allArgs...)
if err != nil {

View File

@@ -5,8 +5,8 @@ import (
"sort"
"testing"
"github.com/SigNoz/signoz/pkg/telemetrystore/telemetrystoretest"
cmock "github.com/SigNoz/clickhouse-go-mock"
"github.com/SigNoz/signoz/pkg/telemetrystore/telemetrystoretest"
"github.com/stretchr/testify/require"
"github.com/DATA-DOG/go-sqlmock"

View File

@@ -64,3 +64,17 @@ func (provider *provider) Querier(mint, maxt int64) (storage.Querier, error) {
return storage.NewMergeQuerier(nil, []storage.Querier{querier}, storage.ChainedSeriesMerge), nil
}
// CapturingStorage implements prometheus.StatementCapturer: it returns a Storage
// that records the ClickHouse SQL each selector would run (without executing
// it) and a recorder to read the captured statements back. A fresh recorder is
// created per call so concurrent dry-runs don't share state.
func (provider *provider) CapturingStorage() (storage.Queryable, prometheus.StatementRecorder) {
recorder := &statementRecorder{}
capture := &captureClient{
client: &client{settings: provider.settings, telemetryStore: provider.telemetryStore},
recorder: recorder,
}
queryable := remote.NewSampleAndChunkQueryableClient(capture, labels.EmptyLabels(), []*labels.Matcher{}, false, stCallback)
return captureQueryable{inner: queryable}, recorder
}

View File

@@ -15,3 +15,25 @@ type Prometheus interface {
Storage() storage.Queryable
Parser() Parser
}
// CapturedStatement is one underlying datastore statement that a PromQL query would
// run, captured without executing it.
type CapturedStatement struct {
Query string
Args []any
}
// StatementRecorder collects the Statements captured while a PromQL query is
// evaluated against a capturing Storage (see StatementCapturer).
type StatementRecorder interface {
Statements() []CapturedStatement
}
// StatementCapturer is an optional capability of a Prometheus provider: it
// returns a Storage that records the datastore statement(s) each Select would
// run — without executing them — together with a recorder to read them back.
// The query dry-run path discovers it via a type assertion, so providers that
// do not implement it simply expose no underlying SQL.
type StatementCapturer interface {
CapturingStorage() (storage.Queryable, StatementRecorder)
}

View File

@@ -72,6 +72,70 @@ func (handler *handler) QueryRange(rw http.ResponseWriter, req *http.Request) {
render.Success(rw, http.StatusOK, queryRangeResponse)
}
// QueryRangePreview is the dry-run counterpart of QueryRange. It accepts the
// same payload, validates and renders the underlying SQL/PromQL for each query
// without executing it, and returns the per-query statements. When the
// ?explain= query parameter selects a ClickHouse EXPLAIN variant, the rendered
// SQL is EXPLAIN-ed against the telemetry store and the output is attached to
// each statement.
func (handler *handler) QueryRangePreview(rw http.ResponseWriter, req *http.Request) {
ctx := req.Context()
ctx = ctxtypes.NewContextWithCommentVals(ctx, map[string]string{
instrumentationtypes.CodeNamespace: "querier",
instrumentationtypes.CodeFunctionName: "QueryRangePreview",
})
claims, err := authtypes.ClaimsFromContext(ctx)
if err != nil {
render.Error(rw, err)
return
}
var queryRangeRequest qbtypes.QueryRangeRequest
if err := json.NewDecoder(req.Body).Decode(&queryRangeRequest); err != nil {
render.Error(rw, err)
return
}
// NB: validation is intentionally NOT done here. QueryRangePreview checks
// request-level invariants (aborting on failure) and validates each query's
// spec individually, reporting per-query structural errors in the response
// instead of failing fast on the first one — the point of the dry-run.
orgID, err := valuer.NewUUID(claims.OrgID)
if err != nil {
render.Error(rw, err)
return
}
explain, err := ParseExplainVariant(req.URL.Query().Get("explain"))
if err != nil {
render.Error(rw, err)
return
}
verbose, err := ParseVerbose(req.URL.Query().Get("verbose"))
if err != nil {
render.Error(rw, err)
return
}
score, err := ParseScore(req.URL.Query().Get("score"))
if err != nil {
render.Error(rw, err)
return
}
preview, err := handler.querier.QueryRangePreview(ctx, orgID, &queryRangeRequest, qbtypes.QueryRangePreviewOptions{Explain: explain, Verbose: verbose, IncludeGranuleSkipScore: score})
if err != nil {
render.Error(rw, err)
return
}
render.Success(rw, http.StatusOK, preview)
}
func (handler *handler) QueryRawStream(rw http.ResponseWriter, req *http.Request) {
ctx := req.Context()

View File

@@ -194,6 +194,12 @@ func (q *builderQuery[T]) isWindowList() bool {
return true
}
// Statement renders the SQL statement for the builder query without executing
// it. It is used by the dry-run/preview path.
func (q *builderQuery[T]) Statement(ctx context.Context) (*qbtypes.Statement, error) {
return q.stmtBuilder.Build(ctx, q.fromMS, q.toMS, q.kind, q.spec, q.variables)
}
func (q *builderQuery[T]) Execute(ctx context.Context) (*qbtypes.Result, error) {
// can we do window based pagination?

View File

@@ -99,6 +99,16 @@ func (q *chSQLQuery) renderVars(query string, vars map[string]qbtypes.VariableIt
return newQuery.String(), nil
}
// Statement renders the SQL statement for the ClickHouse SQL query without
// executing it. It is used by the dry-run/preview path.
func (q *chSQLQuery) Statement(_ context.Context) (*qbtypes.Statement, error) {
rendered, err := q.renderVars(q.query.Query, q.vars, q.fromMS, q.toMS)
if err != nil {
return nil, err
}
return &qbtypes.Statement{Query: rendered, Args: q.args}, nil
}
func (q *chSQLQuery) Execute(ctx context.Context) (*qbtypes.Result, error) {
ctx = ctxtypes.NewContextWithCommentVals(ctx, map[string]string{
instrumentationtypes.QueryDuration: instrumentationtypes.DurationBucket(q.fromMS, q.toMS),

View File

@@ -12,6 +12,11 @@ import (
type Querier interface {
QueryRange(ctx context.Context, orgID valuer.UUID, req *qbtypes.QueryRangeRequest) (*qbtypes.QueryRangeResponse, error)
QueryRawStream(ctx context.Context, orgID valuer.UUID, req *qbtypes.QueryRangeRequest, client *qbtypes.RawStream)
// QueryRangePreview validates and renders the queries in req without
// executing them. opts controls dry-run behavior such as which
// EXPLAIN variant to attach to the response; the zero value performs
// a validation-only preview with no EXPLAIN.
QueryRangePreview(ctx context.Context, orgID valuer.UUID, req *qbtypes.QueryRangeRequest, opts qbtypes.QueryRangePreviewOptions) (*qbtypes.QueryRangePreviewResponse, error)
}
// BucketCache is the interface for bucket-based caching.
@@ -24,6 +29,10 @@ type BucketCache interface {
type Handler interface {
QueryRange(rw http.ResponseWriter, req *http.Request)
// QueryRangePreview is the dry-run endpoint: it validates and renders the
// queries without executing them, optionally attaching ClickHouse EXPLAIN
// output selected by the ?explain= query parameter.
QueryRangePreview(rw http.ResponseWriter, req *http.Request)
QueryRawStream(rw http.ResponseWriter, req *http.Request)
ReplaceVariables(rw http.ResponseWriter, req *http.Request)
}

694
pkg/querier/preview.go Normal file
View File

@@ -0,0 +1,694 @@
package querier
import (
"context"
"encoding/json"
"fmt"
"math"
"reflect"
"slices"
"strings"
"sync"
chproto "github.com/ClickHouse/ch-go/proto"
"github.com/ClickHouse/clickhouse-go/v2"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/querybuilder"
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
"github.com/SigNoz/signoz/pkg/valuer"
)
// statementProvider is implemented by query types that can render the
// underlying SQL/PromQL statement without executing it.
type statementProvider interface {
Statement(ctx context.Context) (*qbtypes.Statement, error)
}
// missingMetricNames returns the distinct metric names referenced by a metric
// builder query, in order of first appearance. It is used to name the metric(s)
// in the warning attached to a fully-missing-metric query. Returns nil for any
// non-metric query.
func missingMetricNames(env qbtypes.QueryEnvelope) []string {
spec, ok := env.Spec.(qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation])
if !ok {
return nil
}
names := make([]string, 0, len(spec.Aggregations))
for _, agg := range spec.Aggregations {
if agg.MetricName != "" && !slices.Contains(names, agg.MetricName) {
names = append(names, agg.MetricName)
}
}
return names
}
// clickhouseExplainClause maps a variant to the EXPLAIN clause understood
// by ClickHouse (i.e. what comes between EXPLAIN and the SELECT).
func clickhouseExplainClause(v qbtypes.ExplainVariant) (string, bool) {
switch v {
case qbtypes.ExplainVariantPlan:
return "PLAN", true
case qbtypes.ExplainVariantEstimate:
return "ESTIMATE", true
default:
return "", false
}
}
// ParseExplainVariant parses the ?explain= query parameter. An empty value
// (or "false") returns ExplainVariantNone. The literal "true" maps to PLAN
// for back-compat with simple ?explain=true. Otherwise the value must
// match one of the named variants.
func ParseExplainVariant(value string) (qbtypes.ExplainVariant, error) {
token := strings.ToLower(strings.TrimSpace(value))
switch token {
case "", "false":
return qbtypes.ExplainVariantNone, nil
case "true":
return qbtypes.ExplainVariantPlan, nil
}
v := qbtypes.ExplainVariant(token)
if _, ok := clickhouseExplainClause(v); !ok {
return qbtypes.ExplainVariantNone, errors.NewInvalidInputf(errors.CodeInvalidInput, "unsupported explain variant %q (allowed: plan, estimate)", token)
}
return v, nil
}
// parseBoolQueryParam parses a true/false query parameter. An empty value (or
// "false"/"0") is false; "true"/"1" is true. name is used only in the error
// message so each caller reports the parameter the user actually sent.
func parseBoolQueryParam(value, name string) (bool, error) {
switch strings.ToLower(strings.TrimSpace(value)) {
case "", "false", "0":
return false, nil
case "true", "1":
return true, nil
}
return false, errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid %s value %q (allowed: true, false)", name, value)
}
// ParseVerbose parses the ?verbose= query parameter. When true the preview
// includes the rendered ClickHouse statement(s); the default is a lightweight
// verdict-only preview (valid/error/warnings per query).
func ParseVerbose(value string) (bool, error) {
return parseBoolQueryParam(value, "verbose")
}
// ParseScore parses the ?score= query parameter. It defaults to TRUE: the
// top-level granuleSkipScore is computed unless explicitly disabled with
// score=false (which skips the granule-skip EXPLAIN round trips).
func ParseScore(value string) (bool, error) {
switch strings.ToLower(strings.TrimSpace(value)) {
case "", "true", "1":
return true, nil
case "false", "0":
return false, nil
}
return false, errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid score value %q (allowed: true, false)", value)
}
// QueryRangePreview validates each query in the composite query without
// executing it. By default it returns a lightweight per-query verdict
// (valid/error/warnings) plus the headline GranuleSkipScore (unless disabled via
// opts.IncludeGranuleSkipScore=false). When opts.Verbose (or Explain, which
// implies it) is set, it also renders the underlying ClickHouse statement(s)
// each query would run, with opts.Explain attaching EXPLAIN output and the
// per-statement GranuleSkipScore alongside each.
func (q *querier) QueryRangePreview(
ctx context.Context,
_ valuer.UUID,
req *qbtypes.QueryRangeRequest,
opts qbtypes.QueryRangePreviewOptions,
) (*qbtypes.QueryRangePreviewResponse, error) {
// The preview must transform the payload exactly as QueryRange does so the
// rendered SQL matches what the same payload will actually execute. Coerce
// the window to epoch milliseconds up front, just like QueryRange.
req.Start = querybuilder.ToMilliSecs(req.Start)
req.End = querybuilder.ToMilliSecs(req.End)
tmplVars := req.Variables
if tmplVars == nil {
tmplVars = make(map[string]qbtypes.VariableItem)
}
// Validate request-level invariants (time range, request type, unique
// names, …) up front — these are request-wide, so there is nothing per
// query to preview if they fail. Per-query spec validation is deliberately
// NOT done here: it runs per query below so each query's structural error is
// reported in its own QueryPreview instead of aborting the whole
// preview on the first one. validationOpts carries the request-type-specific
// options into that per-query validation.
validationOpts, err := req.ValidateRequestScope()
if err != nil {
return nil, err
}
// A query that only exists as a dependency of a trace operator (e.g. A and
// B in C := A => B) is not executed standalone, so it gets no statement of
// its own — matching QueryRange.
dependencyQueries, err := q.constructTraceOperatorDependencyMap(req.CompositeQuery.Queries)
if err != nil {
return nil, err
}
results := make(map[string]qbtypes.QueryPreview, len(req.CompositeQuery.Queries))
// Phase 1: normalize every query's spec (step interval + metric metadata)
// and capture the per-query warnings/errors. This runs for ALL queries —
// including trace-operator dependencies — before any statement is rendered,
// because a trace-operator query reads its siblings' specs at render time
// and they must already be normalized. adjustStepInterval and
// resolveMetricMetadata both patch the spec in place, so feed each a
// single-element slice and write the patched envelope back into the
// composite query. Doing it per-query (rather than once over all queries
// like QueryRange) lets us attribute each warning/error to the query that
// produced it, which is the whole point of a per-query preview report; the
// extra metadata lookups are acceptable on this low-volume dry-run path.
prepared := make(map[string]qbtypes.QueryPreview, len(req.CompositeQuery.Queries))
missingMetricQuerySet := make(map[string]bool)
for idx := range req.CompositeQuery.Queries {
name := req.CompositeQuery.Queries[idx].GetQueryName()
ps := qbtypes.QueryPreview{}
// Validate this query's spec on its own and attribute any structural
// error to it, instead of aborting the whole preview on the first bad
// query (the request-level invariants were already checked above). An
// invalid spec gets no step/metadata normalization or rendering.
if vErr := req.CompositeQuery.Queries[idx].Validate(validationOpts...); vErr != nil {
ps.Error = vErr
prepared[name] = ps
continue
}
env := []qbtypes.QueryEnvelope{req.CompositeQuery.Queries[idx]}
ps.Warnings = q.adjustStepInterval(env, req.Start, req.End)
missingMetricQueries, dormantMetricsWarningMsg, mErr := q.resolveMetricMetadata(ctx, env, req.Start, req.End)
if mErr != nil {
// Don't abort the whole preview: report this query's error and keep
// going so the agent sees every problem in one round trip.
ps.Error = mErr
} else {
if dormantMetricsWarningMsg != "" {
ps.Warnings = append(ps.Warnings, dormantMetricsWarningMsg)
}
if len(missingMetricQueries) > 0 {
missingMetricQuerySet[name] = true
// A fully-missing-metric query renders no SQL and returns an empty
// result, so flag it explicitly. resolveMetricMetadata only emits a
// (dormant) warning for external metrics it has seen before; when it
// stays silent — e.g. internal signoz.* metrics — the empty result
// would otherwise be unexplained, so attach a clear note naming the
// metric(s) the agent referenced.
if dormantMetricsWarningMsg == "" {
if metricNames := missingMetricNames(env[0]); len(metricNames) > 0 {
ps.Warnings = append(ps.Warnings, fmt.Sprintf(
"query %q references metric(s) %s with no data available; it will return an empty result",
name, strings.Join(metricNames, ", ")))
}
}
}
}
req.CompositeQuery.Queries[idx] = env[0]
prepared[name] = ps
}
// Phase 2: render the statement for each query that actually executes, and
// collect the ClickHouse-bound work (granuleSkipScore/EXPLAIN) to run concurrently.
var previewTasks []previewTask
for _, query := range req.CompositeQuery.Queries {
name := query.GetQueryName()
if query.GetType() != qbtypes.QueryTypeTraceOperator && dependencyQueries[name] {
continue
}
ps := prepared[name]
// Surface a phase-1 error (e.g. a not-found metric) without rendering.
if ps.Error != nil {
results[name] = ps
continue
}
// Every aggregation resolved to a missing metric: QueryRange returns an
// empty result for this query and renders no SQL. Mirror that.
if missingMetricQuerySet[name] {
results[name] = ps
continue
}
var provider qbtypes.Query
switch query.Type {
case qbtypes.QueryTypePromQL:
promQuery, ok := query.Spec.(qbtypes.PromQuery)
if !ok {
ps.Error = errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid promql query spec %T", query.Spec)
results[name] = ps
continue
}
provider = newPromqlQuery(q.logger, q.promEngine, promQuery, qbtypes.TimeRange{From: req.Start, To: req.End}, req.RequestType, tmplVars)
case qbtypes.QueryTypeClickHouseSQL:
chQuery, ok := query.Spec.(qbtypes.ClickHouseQuery)
if !ok {
ps.Error = errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid clickhouse query spec %T", query.Spec)
results[name] = ps
continue
}
provider = newchSQLQuery(q.logger, q.telemetryStore, chQuery, nil, qbtypes.TimeRange{From: req.Start, To: req.End}, req.RequestType, tmplVars)
case qbtypes.QueryTypeTraceOperator:
traceOpQuery, ok := query.Spec.(qbtypes.QueryBuilderTraceOperator)
if !ok {
ps.Error = errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid trace operator query spec %T", query.Spec)
results[name] = ps
continue
}
provider = &traceOperatorQuery{
telemetryStore: q.telemetryStore,
stmtBuilder: q.traceOperatorStmtBuilder,
spec: traceOpQuery,
compositeQuery: &req.CompositeQuery,
fromMS: uint64(req.Start),
toMS: uint64(req.End),
kind: req.RequestType,
}
case qbtypes.QueryTypeBuilder:
switch spec := query.Spec.(type) {
case qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation]:
spec.ShiftBy = extractShiftFromBuilderQuery(spec)
timeRange := adjustTimeRangeForShift(spec, qbtypes.TimeRange{From: req.Start, To: req.End}, req.RequestType)
provider = newBuilderQuery(q.logger, q.telemetryStore, q.traceStmtBuilder, spec, timeRange, req.RequestType, tmplVars)
case qbtypes.QueryBuilderQuery[qbtypes.LogAggregation]:
spec.ShiftBy = extractShiftFromBuilderQuery(spec)
timeRange := adjustTimeRangeForShift(spec, qbtypes.TimeRange{From: req.Start, To: req.End}, req.RequestType)
stmtBuilder := q.logStmtBuilder
if spec.Source == telemetrytypes.SourceAudit {
stmtBuilder = q.auditStmtBuilder
}
provider = newBuilderQuery(q.logger, q.telemetryStore, stmtBuilder, spec, timeRange, req.RequestType, tmplVars)
case qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]:
spec.ShiftBy = extractShiftFromBuilderQuery(spec)
timeRange := adjustTimeRangeForShift(spec, qbtypes.TimeRange{From: req.Start, To: req.End}, req.RequestType)
if spec.Source == telemetrytypes.SourceMeter {
provider = newBuilderQuery(q.logger, q.telemetryStore, q.meterStmtBuilder, spec, timeRange, req.RequestType, tmplVars)
} else {
provider = newBuilderQuery(q.logger, q.telemetryStore, q.metricStmtBuilder, spec, timeRange, req.RequestType, tmplVars)
}
default:
ps.Error = errors.NewInvalidInputf(errors.CodeInvalidInput, "unsupported builder spec type %T", query.Spec)
results[name] = ps
continue
}
default:
ps.Error = errors.NewInvalidInputf(errors.CodeInvalidInput, "unsupported query type %q", query.Type)
results[name] = ps
continue
}
stmtProvider, ok := provider.(statementProvider)
if !ok {
ps.Error = errors.NewInternalf(errors.CodeInternal, "query does not support preview")
results[name] = ps
continue
}
// Build the statement even in validate-only mode: a successful build is
// the strongest validation we can do (it parses the filter/group-by and
// resolves fields against the schema), and a build error is exactly the
// per-query verdict a validation caller wants.
stmt, sErr := stmtProvider.Statement(ctx)
if sErr != nil {
ps.Error = sErr
results[name] = ps
continue
}
ps.Warnings = append(ps.Warnings, stmt.Warnings...)
// clickhouse_sql is user-authored raw SQL; rendering only substitutes
// variables, so by itself it doesn't prove the SQL is valid. Verify it
// parses and binds (tables/columns/types resolve) via EXPLAIN PLAN —
// without executing. Builder/PromQL/trace-operator SQL is engine-generated
// and well-formed by construction, so this is scoped to clickhouse_sql.
if query.Type == qbtypes.QueryTypeClickHouseSQL {
if invalidErr, infraErr := q.explainBindCheck(ctx, stmt.Query, stmt.Args); invalidErr != nil {
ps.Error = invalidErr
results[name] = ps
continue
} else if infraErr != nil {
ps.Warnings = append(ps.Warnings, "could not validate ClickHouse SQL: "+infraErr.Error())
}
}
// The query is fully validated by this point (statement built, plus the
// clickhouse_sql bind check). Render the underlying statement(s) when the
// caller wants them (verbose/explain) or when we need them to compute the
// top-level granuleSkipScore (on by default). If none of those apply
// (score disabled and not verbose), return just the verdict.
needScore := opts.IncludeGranuleSkipScore
needExplain := opts.Explain != qbtypes.ExplainVariantNone
if !opts.Verbose && !needExplain && !needScore {
results[name] = ps
continue
}
// Every query exposes its underlying ClickHouse statement(s) uniformly in
// Statements. Builder/ClickHouse/trace-operator render exactly one; PromQL
// is not SQL — the Prometheus engine issues one statement per metric
// selector, captured (without executing) via PreviewStatements.
if query.Type == qbtypes.QueryTypePromQL {
if pq, ok := provider.(*promqlQuery); ok {
sqlStmts, pErr := pq.PreviewStatements(ctx)
if pErr != nil {
ps.Warnings = append(ps.Warnings, "could not render underlying ClickHouse SQL: "+pErr.Error())
} else {
for _, s := range sqlStmts {
ps.Statements = append(ps.Statements, qbtypes.PreviewStatement{Query: s.Query, Args: s.Args})
}
}
}
} else {
ps.Statements = []qbtypes.PreviewStatement{{Query: stmt.Query, Args: stmt.Args}}
}
results[name] = ps
// granuleSkipScore and EXPLAIN both hit ClickHouse. Queue one task per
// statement; runPreviewTasks executes them concurrently across queries
// after rendering, rather than serializing one query's round trips behind
// the next.
if needScore || needExplain {
for j := range ps.Statements {
previewTasks = append(previewTasks, previewTask{name: name, stmtIdx: j, query: ps.Statements[j].Query, args: ps.Statements[j].Args})
}
}
}
q.runPreviewTasks(ctx, previewTasks, opts, results)
// granuleSkipScore is on by default, but the rendered statements are only
// returned when the caller asked (verbose/explain). So derive the headline
// per-query score from the statements (the minimum — the least-selective,
// worst-skipping statement, which dominates cost), then drop the statements
// from the response unless they were requested.
includeStatements := opts.Verbose || opts.Explain != qbtypes.ExplainVariantNone
for name, ps := range results {
var minScore *float64
for i := range ps.Statements {
s := ps.Statements[i].GranuleSkipScore
if s != nil && (minScore == nil || *s < *minScore) {
minScore = s
}
}
if minScore != nil {
v := *minScore // copy so the top-level field doesn't alias a statement entry
ps.Score = &v
}
if !includeStatements {
ps.Statements = nil
}
results[name] = ps
}
return &qbtypes.QueryRangePreviewResponse{
Queries: results,
}, nil
}
// previewTask is one rendered ClickHouse statement queued for ClickHouse-bound
// preview work (granuleSkipScore and/or EXPLAIN). stmtIdx is the index into the
// query's Statements list that this task's results merge back into.
type previewTask struct {
name string
stmtIdx int
query string
args []any
}
// runPreviewTasks computes the granuleSkipScore and/or EXPLAIN output for each task
// concurrently — every query's ClickHouse round trips are in flight at once
// instead of serialized — and merges the outcomes back into previews. A
// composite query holds only a handful of queries, so a goroutine per task is
// fine without an explicit concurrency bound. Each goroutine writes to its own
// slot; the merge into the previews map happens after the wait, single-
// threaded, so there are no map races.
func (q *querier) runPreviewTasks(ctx context.Context, tasks []previewTask, opts qbtypes.QueryRangePreviewOptions, previews map[string]qbtypes.QueryPreview) {
if len(tasks) == 0 {
return
}
type outcome struct {
score *float64
explain string
warnings []string
}
outcomes := make([]outcome, len(tasks))
var wg sync.WaitGroup
for i := range tasks {
wg.Add(1)
go func(i int) {
defer wg.Done()
t := tasks[i]
var out outcome
if opts.IncludeGranuleSkipScore {
if score, scErr := q.computeGranuleSkipScore(ctx, t.query, t.args); scErr != nil {
// Surface the failure instead of silently dropping the score.
out.warnings = append(out.warnings, "could not compute query score: "+scErr.Error())
} else if score != nil {
out.score = score
}
}
if opts.Explain != qbtypes.ExplainVariantNone {
if explained, eErr := q.runExplain(ctx, opts.Explain, t.query, t.args); eErr != nil {
// Surface the failure instead of silently dropping the output.
out.warnings = append(out.warnings, "could not run EXPLAIN: "+eErr.Error())
} else {
out.explain = explained
}
}
outcomes[i] = out
}(i)
}
wg.Wait()
for i := range tasks {
ps := previews[tasks[i].name]
if idx := tasks[i].stmtIdx; idx >= 0 && idx < len(ps.Statements) {
if outcomes[i].score != nil {
ps.Statements[idx].GranuleSkipScore = outcomes[i].score
}
if outcomes[i].explain != "" {
ps.Statements[idx].Explain = outcomes[i].explain
}
}
ps.Warnings = append(ps.Warnings, outcomes[i].warnings...)
previews[tasks[i].name] = ps
}
}
// runExplain runs `EXPLAIN <variant> <stmt>` against the telemetry store and
// returns the formatted output as a single string with one row per line.
//
// The column shape differs by variant: most variants (plan, ast, syntax,
// pipeline, query_tree) return a single `explain` String column, but ESTIMATE
// returns five columns (database, table, parts, rows, marks). So the scan is
// driven by the result's column types — each row's columns are scanned into
// destinations of the driver-reported types and tab-joined — rather than
// assuming a single string (which silently dropped ESTIMATE output before).
func (q *querier) runExplain(ctx context.Context, variant qbtypes.ExplainVariant, stmt string, args []any) (string, error) {
clause, ok := clickhouseExplainClause(variant)
if !ok {
return "", errors.NewInvalidInputf(errors.CodeInvalidInput, "unsupported explain variant %q", string(variant))
}
explainQuery := "EXPLAIN " + clause + " " + stmt
rows, err := q.telemetryStore.ClickhouseDB().Query(ctx, explainQuery, args...)
if err != nil {
return "", errors.WrapInternalf(err, errors.CodeInternal, "failed to run EXPLAIN %s", clause)
}
defer rows.Close()
colTypes := rows.ColumnTypes()
multiColumn := len(colTypes) > 1
var lines []string
// For a multi-column variant (ESTIMATE), lead with a header row so the
// tab-separated values are readable; single-column variants stay verbatim.
if multiColumn {
header := make([]string, len(colTypes))
for i, ct := range colTypes {
header[i] = ct.Name()
}
lines = append(lines, strings.Join(header, "\t"))
}
for rows.Next() {
dest := make([]any, len(colTypes))
for i, ct := range colTypes {
dest[i] = reflect.New(ct.ScanType()).Interface()
}
if err := rows.Scan(dest...); err != nil {
return "", errors.WrapInternalf(err, errors.CodeInternal, "failed to scan EXPLAIN row")
}
fields := make([]string, len(dest))
for i := range dest {
fields[i] = fmt.Sprintf("%v", reflect.ValueOf(dest[i]).Elem().Interface())
}
lines = append(lines, strings.Join(fields, "\t"))
}
if err := rows.Err(); err != nil {
return "", errors.WrapInternalf(err, errors.CodeInternal, "EXPLAIN row iteration failed")
}
return strings.Join(lines, "\n"), nil
}
// userFacingClickHouseErrorCodes mirrors PR #10679's userFacingCHCodes: the
// ClickHouse error codes that indicate a problem with the query itself (bad SQL,
// unknown table/column, …) rather than a server-side/infra failure — i.e. the
// ones that should map to invalid input (400) instead of internal (500).
//
// TODO(#10679): once that PR lands, delete this and have explainBindCheck call
// the shared querier.mapClickHouseError so there's a single source of truth.
var userFacingClickHouseErrorCodes = map[chproto.Error]bool{
chproto.ErrSyntaxError: true,
chproto.ErrUnknownTable: true,
chproto.ErrUnknownDatabase: true,
chproto.ErrUnknownIdentifier: true,
chproto.ErrUnknownFunction: true,
chproto.ErrUnknownAggregateFunction: true,
chproto.ErrUnknownType: true,
chproto.ErrUnknownStorage: true,
chproto.ErrUnknownElementInAst: true,
chproto.ErrUnknownTypeOfQuery: true,
chproto.ErrIllegalTypeOfArgument: true,
chproto.ErrIllegalColumn: true,
chproto.ErrNumberOfArgumentsDoesntMatch: true,
chproto.ErrTooManyArgumentsForFunction: true,
chproto.ErrTooLessArgumentsForFunction: true,
}
// explainBindCheck validates that a rendered ClickHouse statement parses and
// binds (its tables, columns, and types resolve) by running EXPLAIN PLAN
// against it without executing it. It distinguishes two failure modes:
//
// - invalidErr (non-nil): ClickHouse rejected the statement with a user-facing
// error code — it's genuinely invalid input (syntax, unknown table/column,
// type mismatch). The caller marks the query invalid.
// - infraErr (non-nil): the check couldn't run, or ClickHouse failed with a
// non-user-facing code (e.g. unreachable, timeout, server-side). The caller
// warns rather than falsely marking the query invalid, since validity is
// unknown.
//
// Both nil means the statement is valid.
func (q *querier) explainBindCheck(ctx context.Context, stmt string, args []any) (invalidErr error, infraErr error) {
rows, err := q.telemetryStore.ClickhouseDB().Query(ctx, "EXPLAIN PLAN "+stmt, args...)
if err != nil {
var ex *clickhouse.Exception
if errors.As(err, &ex) && userFacingClickHouseErrorCodes[chproto.Error(ex.Code)] {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid ClickHouse SQL: %s", ex.Message), nil
}
return nil, err
}
rows.Close()
return nil, nil
}
// explainPlanNode is the subset of a ClickHouse `EXPLAIN json = 1, indexes = 1`
// plan node that granuleSkipScore needs: the node type, its per-index granule funnel,
// and its children.
type explainPlanNode struct {
NodeType string `json:"Node Type"`
Indexes []explainPlanIndex `json:"Indexes"`
Plans []explainPlanNode `json:"Plans"`
}
// explainPlanIndex is one index step under a ReadFromMergeTree node. The index
// steps run in sequence, so the first step's Initial Granules is the candidate
// total and the last step's Selected Granules is what survives all pruning.
type explainPlanIndex struct {
InitialGranules *int64 `json:"Initial Granules"`
SelectedGranules *int64 `json:"Selected Granules"`
}
// computeGranuleSkipScore runs `EXPLAIN json = 1, indexes = 1` against the telemetry
// store and returns a 0-100 score: the percentage of candidate granules
// eliminated by partition, primary-key, and skip-index pruning before any data
// is read (higher = more selective, reads less). Granules are summed across
// every ReadFromMergeTree node so multi-read queries (e.g. a resource-filter
// subquery plus the main read) are scored as a whole. Returns nil — not an
// error — when the plan exposes no MergeTree index analysis, so the caller
// simply omits the score.
func (q *querier) computeGranuleSkipScore(ctx context.Context, stmt string, args []any) (*float64, error) {
rows, err := q.telemetryStore.ClickhouseDB().Query(ctx, "EXPLAIN json = 1, indexes = 1 "+stmt, args...)
if err != nil {
return nil, errors.WrapInternalf(err, errors.CodeInternal, "failed to run EXPLAIN for query score")
}
defer rows.Close()
// json=1 emits the plan as a single JSON document; read every row and join
// so we are robust to the driver splitting it across rows.
var sb strings.Builder
for rows.Next() {
var line string
if err := rows.Scan(&line); err != nil {
return nil, errors.WrapInternalf(err, errors.CodeInternal, "failed to scan EXPLAIN json row")
}
sb.WriteString(line)
sb.WriteByte('\n')
}
if err := rows.Err(); err != nil {
return nil, errors.WrapInternalf(err, errors.CodeInternal, "EXPLAIN json row iteration failed")
}
var plans []struct {
Plan explainPlanNode `json:"Plan"`
}
if err := json.Unmarshal([]byte(sb.String()), &plans); err != nil {
return nil, errors.WrapInternalf(err, errors.CodeInternal, "failed to parse EXPLAIN json")
}
var totalInitial, totalSelected int64
for i := range plans {
accumulateGranules(&plans[i].Plan, &totalInitial, &totalSelected)
}
if totalInitial <= 0 {
// No MergeTree index analysis in the plan — nothing to score.
return nil, nil
}
if totalSelected < 0 {
totalSelected = 0
}
skipped := float64(totalInitial-totalSelected) / float64(totalInitial)
if skipped < 0 {
skipped = 0
}
score := math.Round(skipped*100*100) / 100 // percentage, 2 decimal places
return &score, nil
}
// accumulateGranules walks the plan tree and, for every ReadFromMergeTree node,
// adds its candidate-granule total (first index step's Initial Granules) and
// surviving granules (last index step's Selected Granules) to the running sums.
func accumulateGranules(node *explainPlanNode, totalInitial, totalSelected *int64) {
if node.NodeType == "ReadFromMergeTree" && len(node.Indexes) > 0 {
var initial, selected *int64
for i := range node.Indexes {
if node.Indexes[i].InitialGranules != nil && initial == nil {
initial = node.Indexes[i].InitialGranules
}
if node.Indexes[i].SelectedGranules != nil {
selected = node.Indexes[i].SelectedGranules
}
}
if initial != nil && selected != nil {
*totalInitial += *initial
*totalSelected += *selected
}
}
for i := range node.Plans {
accumulateGranules(&node.Plans[i], totalInitial, totalSelected)
}
}

View File

@@ -220,6 +220,68 @@ func (q *promqlQuery) renderVars(query string, vars map[string]qbv5.VariableItem
return newQuery.String(), nil
}
// Statement renders the PromQL query string after variable substitution. It
// is used by the dry-run/preview path; PromQL queries do not have a
// SQL-style argument list.
func (q *promqlQuery) Statement(_ context.Context) (*qbv5.Statement, error) {
rendered, err := q.renderVars(q.query.Query, q.vars, q.tr.From, q.tr.To)
if err != nil {
return nil, err
}
return &qbv5.Statement{Query: rendered}, nil
}
// PreviewStatements returns the underlying ClickHouse statement(s) this PromQL
// query would run, captured without executing them. PromQL is evaluated by the
// Prometheus engine rather than compiled to one SQL statement: the engine calls
// the storage adapter's Select per metric selector, which builds ClickHouse
// SQL. We drive the engine with a capturing Storage that records that SQL and
// returns empty results, so nothing is read from ClickHouse. Returns nil when
// the provider does not support capture (e.g. test doubles).
func (q *promqlQuery) PreviewStatements(ctx context.Context) ([]prometheus.CapturedStatement, error) {
storer, ok := q.promEngine.(prometheus.StatementCapturer)
if !ok {
return nil, nil
}
rendered, err := q.renderVars(q.query.Query, q.vars, q.tr.From, q.tr.To)
if err != nil {
return nil, err
}
start := int64(querybuilder.ToNanoSecs(q.tr.From))
end := int64(querybuilder.ToNanoSecs(q.tr.To))
capStorage, recorder := storer.CapturingStorage()
qry, err := q.promEngine.Engine().NewRangeQuery(
ctx,
capStorage,
nil,
rendered,
time.Unix(0, start),
time.Unix(0, end),
q.query.Step.Duration,
)
if err != nil {
if e := tryEnhancePromQLExecError(err); e != nil {
return nil, e
}
return nil, enhancePromQLError(rendered, err)
}
defer qry.Close()
// Evaluate against the capturing storage: this drives a Select per selector
// (recording the SQL) but reads no data, so the result is discarded.
if res := qry.Exec(ctx); res.Err != nil {
if e := tryEnhancePromQLExecError(res.Err); e != nil {
return nil, e
}
return nil, errors.Newf(errors.TypeInternal, errors.CodeInternal, "query execution error: %v", res.Err)
}
return recorder.Statements(), nil
}
func (q *promqlQuery) Execute(ctx context.Context) (*qbv5.Result, error) {
ctx = ctxtypes.NewContextWithCommentVals(ctx, map[string]string{

View File

@@ -32,6 +32,12 @@ func (q *traceOperatorQuery) Window() (uint64, uint64) {
return q.fromMS, q.toMS
}
// Statement renders the SQL statement for the trace operator query without
// executing it. It is used by the dry-run/preview path.
func (q *traceOperatorQuery) Statement(ctx context.Context) (*qbtypes.Statement, error) {
return q.stmtBuilder.Build(ctx, q.fromMS, q.toMS, q.kind, q.spec, q.compositeQuery)
}
func (q *traceOperatorQuery) Execute(ctx context.Context) (*qbtypes.Result, error) {
stmt, err := q.stmtBuilder.Build(
ctx,

View File

@@ -145,7 +145,7 @@ func PrepareWhereClause(query string, opts FilterExprVisitorOpts) (PreparedWhere
"Found %d syntax errors while parsing the search expression.",
len(parserErrorListener.SyntaxErrors),
)
additionals := make([]string, len(parserErrorListener.SyntaxErrors))
additionals := make([]string, 0, len(parserErrorListener.SyntaxErrors))
for _, err := range parserErrorListener.SyntaxErrors {
if err.Error() != "" {
additionals = append(additionals, err.Error())

View File

@@ -10,6 +10,7 @@ import (
"strings"
"time"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
"github.com/SigNoz/signoz/pkg/valuer"
"github.com/swaggest/jsonschema-go"
@@ -64,6 +65,137 @@ type QueryRangeResponse struct {
QBEvent *QBEvent `json:"-"`
}
// QueryRangePreviewResponse describes the dry-run output of a query range
// request. Each entry corresponds to a single query in the composite query.
type QueryRangePreviewResponse struct {
Queries map[string]QueryPreview `json:"queries"`
}
// ExplainVariant identifies one of the ClickHouse EXPLAIN modes that the
// preview endpoint can run against a rendered SQL statement.
type ExplainVariant string
const (
ExplainVariantNone ExplainVariant = ""
// ExplainVariantPlan returns the query execution plan tree (what gets read
// and how it's aggregated).
ExplainVariantPlan ExplainVariant = "plan"
// ExplainVariantEstimate returns ClickHouse's per-table estimate of the
// parts/rows/marks the query will read — an absolute cost estimate that
// complements the (ratio-based) granuleSkipScore.
ExplainVariantEstimate ExplainVariant = "estimate"
)
// QueryRangePreviewOptions carries per-call options for the query range
// preview (dry-run) endpoint. The zero value produces a lightweight,
// verdict-only preview (valid/error/warnings per query, no rendered SQL).
type QueryRangePreviewOptions struct {
// Explain selects which ClickHouse EXPLAIN variant to run for each rendered
// SQL statement. Leave empty to skip EXPLAIN. Implies Verbose (the EXPLAIN
// output attaches to each statement).
Explain ExplainVariant
// Verbose includes the rendered ClickHouse statement(s) (Statements) in the
// response. The default (false) returns only the per-query verdict
// (valid/error/warnings) plus the headline GranuleSkipScore — every query is
// still fully validated, just not rendered into the response. Requesting
// Explain implies Verbose, since EXPLAIN output attaches to each statement.
Verbose bool
// IncludeGranuleSkipScore computes the GranuleSkipScore. The HTTP endpoint
// sets it true by default (the headline top-level score is returned even in
// the lightweight, non-verbose response), and only false on ?score=false.
// When the response includes statements (Verbose/Explain), each statement
// also carries its own GranuleSkipScore and the top-level one is their
// minimum. Computing it costs one ClickHouse EXPLAIN per statement.
IncludeGranuleSkipScore bool
}
// QueryRangePreviewParams documents the query-string parameters accepted by the
// query range preview (dry-run) endpoint.
type QueryRangePreviewParams struct {
// Explain selects which ClickHouse EXPLAIN variant to run against each
// rendered SQL statement. Empty or "false" skips EXPLAIN; "true" maps to
// "plan". Allowed: plan (execution plan tree), estimate (parts/rows/marks
// to read). Implies verbose.
Explain string `query:"explain"`
// Verbose, when "true", includes the rendered ClickHouse statement(s) in the
// response. The default response is lightweight: the per-query verdict
// (valid/error/warnings) plus the top-level granuleSkipScore. Requesting
// explain implies verbose.
Verbose string `query:"verbose"`
// Score controls the granuleSkipScore (granule-skip selectivity, 0-100;
// higher is better). It defaults to "true" — the top-level score is returned
// even in the lightweight response. Set score=false to skip it (and its
// ClickHouse EXPLAIN round trips) for the cheapest validation-only preview.
Score string `query:"score"`
}
// PrepareJSONSchema adds description to the QueryRangePreviewResponse schema.
func (q *QueryRangePreviewResponse) PrepareJSONSchema(schema *jsonschema.Schema) error {
schema.WithDescription("Response from the v5 query range preview (dry-run) endpoint. For each query in the composite query, returns the underlying ClickHouse statement(s) it renders to without executing them (one per PromQL metric selector; exactly one for builder/ClickHouse/trace-operator queries), with optional EXPLAIN output and granule-skip score when requested.")
return nil
}
// QueryPreview is the dry-run result for a single query, keyed by query name
// in QueryRangePreviewResponse.Queries.
type QueryPreview struct {
// Valid is the headline verdict for this query: true when it previewed
// without error, false when Error is set. It is always present (derived from
// Error at marshal time) so an agent can branch on a single boolean instead
// of testing for the presence of the error object.
Valid bool `json:"valid"`
// Error describes why this query is invalid or could not be previewed; nil
// when the query previewed successfully. It is the structured form
// (code, message, and — when available — suggestions and invalidReferences)
// so an agent can act on it programmatically instead of parsing a string.
Error error `json:"error,omitempty"`
Warnings []string `json:"warnings,omitempty"`
// Score is the headline selectivity for this query: the percentage (0-100) of
// candidate granules eliminated by partition, primary-key, and skip-index
// pruning before any data is read (higher = less data read). It is the
// minimum of the per-statement Statements[].GranuleSkipScore values — the
// least-selective (worst) underlying statement, which dominates cost.
// Returned by default; omitted when ?score=false or no statement reads a
// MergeTree table.
Score *float64 `json:"score,omitempty"`
// Statements are the underlying ClickHouse statement(s) this query renders to,
// in execution order. Builder, ClickHouse SQL, and trace-operator queries
// render exactly one; a PromQL query renders one per metric selector (the
// Prometheus engine issues a statement per selector). Empty for a
// validation-only preview, a query that failed to render (see Error), or one
// that resolves to no data (a fully-missing metric, see Warnings).
Statements []PreviewStatement `json:"statements,omitempty"`
}
// PreviewStatement is one rendered ClickHouse statement the query will execute,
// with its bound args and — when requested — its EXPLAIN output and
// GranuleSkipScore.
type PreviewStatement struct {
Query string `json:"query"`
Args []any `json:"args,omitempty"`
Explain string `json:"explain,omitempty"`
GranuleSkipScore *float64 `json:"granuleSkipScore,omitempty"`
}
// MarshalJSON renders Error as the structured error form (code, message and,
// when present, suggestions/invalidReferences) instead of the default {} that a
// bare error interface produces, so an agent consuming the dry-run can act on it
// programmatically.
func (p QueryPreview) MarshalJSON() ([]byte, error) {
type alias QueryPreview
out := struct {
alias
Error *errors.JSON `json:"error,omitempty"`
}{alias: alias(p)}
out.alias.Error = nil
// Derive the verdict from the error so callers can't desync the two.
out.alias.Valid = p.Error == nil
if p.Error != nil {
out.Error = errors.AsJSON(p.Error)
}
return json.Marshal(out)
}
var _ jsonschema.Preparer = &QueryRangeResponse{}
// PrepareJSONSchema adds description to the QueryRangeResponse schema.

View File

@@ -535,6 +535,68 @@ func (r *QueryRangeRequest) Validate(opts ...ValidationOption) error {
return nil
}
// ValidateRequestScope validates request-level invariants — time range,
// request type, the raw/trace metric-query restriction, non-empty composite
// query, unique builder query names, and not-all-disabled — WITHOUT validating
// individual query specs, and returns the ValidationOptions for the request
// type. The dry-run/preview path uses this so that per-query spec errors can be
// attributed to each query (via QueryEnvelope.Validate) instead of aborting the
// whole request on the first one, the way Validate does. The normal execution
// path keeps using the fail-fast Validate.
func (r *QueryRangeRequest) ValidateRequestScope() ([]ValidationOption, error) {
if r.RequestType != RequestTypeRawStream && r.Start >= r.End {
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "start time must be before end time")
}
var opts []ValidationOption
switch r.RequestType {
case RequestTypeRaw, RequestTypeRawStream, RequestTypeTrace, RequestTypeTimeSeries, RequestTypeScalar:
opts = GetValidationOptions(r.RequestType)
default:
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid request type: %s", r.RequestType).
WithAdditional("Valid request types are: raw, timeseries, scalar")
}
if r.RequestType == RequestTypeRaw || r.RequestType == RequestTypeRawStream || r.RequestType == RequestTypeTrace {
for _, envelope := range r.CompositeQuery.Queries {
if envelope.GetSignal() == telemetrytypes.SignalMetrics {
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "raw request type is not supported for metric queries")
}
}
}
if len(r.CompositeQuery.Queries) == 0 {
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "at least one query is required")
}
// Builder query names must be unique across the composite query.
queryNames := make(map[string]bool)
for _, envelope := range r.CompositeQuery.Queries {
if envelope.Type == QueryTypeBuilder || envelope.Type == QueryTypeSubQuery {
name := envelope.GetQueryName()
if name != "" {
if queryNames[name] {
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "duplicate query name '%s'", name)
}
queryNames[name] = true
}
}
}
if err := r.validateAllQueriesNotDisabled(); err != nil {
return nil, err
}
return opts, nil
}
// Validate validates a single query envelope's spec. It is the per-query
// counterpart to QueryRangeRequest.ValidateRequestScope, used by the dry-run to
// report each query's structural error independently.
func (e QueryEnvelope) Validate(opts ...ValidationOption) error {
return validateQueryEnvelope(e, opts...)
}
// validateAllQueriesNotDisabled validates that at least one query in the composite query is enabled.
func (r *QueryRangeRequest) validateAllQueriesNotDisabled() error {
for _, envelope := range r.CompositeQuery.Queries {