mirror of
https://github.com/SigNoz/signoz.git
synced 2026-06-11 11:20:32 +01:00
Compare commits
1 Commits
feat/flame
...
tvats-dry-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c8c050053c |
@@ -101,6 +101,10 @@ func (h *handler) QueryRange(rw http.ResponseWriter, req *http.Request) {
|
||||
h.community.QueryRange(rw, req)
|
||||
}
|
||||
|
||||
func (h *handler) QueryRangePreview(rw http.ResponseWriter, req *http.Request) {
|
||||
h.community.QueryRangePreview(rw, req)
|
||||
}
|
||||
|
||||
func (h *handler) QueryRawStream(rw http.ResponseWriter, req *http.Request) {
|
||||
h.community.QueryRawStream(rw, req)
|
||||
}
|
||||
|
||||
2
go.mod
2
go.mod
@@ -180,7 +180,7 @@ require (
|
||||
github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.13.1 // indirect
|
||||
github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2 // indirect
|
||||
github.com/AzureAD/microsoft-authentication-library-for-go v1.6.0 // indirect
|
||||
github.com/ClickHouse/ch-go v0.71.0 // indirect
|
||||
github.com/ClickHouse/ch-go v0.71.0
|
||||
github.com/Masterminds/squirrel v1.5.4 // indirect
|
||||
github.com/Yiling-J/theine-go v0.6.2 // indirect
|
||||
github.com/alecthomas/units v0.0.0-20240927000941-0f3dac36c52b
|
||||
|
||||
@@ -451,6 +451,23 @@ func (provider *provider) addQuerierRoutes(router *mux.Router) error {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := router.Handle("/api/v5/query_range/dry_run", handler.New(provider.authzMiddleware.ViewAccess(provider.querierHandler.QueryRangePreview), handler.OpenAPIDef{
|
||||
ID: "QueryRangeDryRunV5",
|
||||
Tags: []string{"querier"},
|
||||
Summary: "Query range dry run",
|
||||
Description: "Validate a composite query without executing it. Accepts the same payload as the query range endpoint. By default returns a lightweight per-query verdict (valid/error/warnings) plus a top-level score (0-100 granule-skip selectivity; higher is better). Pass ?verbose=true to also include the rendered underlying ClickHouse statement(s) for each query (each carrying its own granuleSkipScore). Pass ?explain=plan|estimate to attach the corresponding ClickHouse EXPLAIN output to each statement (implies verbose). Pass ?score=false to skip the score for the cheapest validation-only preview. Intended for agentic/dry-run consumption: per-query errors are reported in the response rather than failing the whole request.",
|
||||
Request: new(qbtypes.QueryRangeRequest),
|
||||
RequestQuery: new(qbtypes.QueryRangePreviewParams),
|
||||
RequestContentType: "application/json",
|
||||
Response: new(qbtypes.QueryRangePreviewResponse),
|
||||
ResponseContentType: "application/json",
|
||||
SuccessStatusCode: http.StatusOK,
|
||||
ErrorStatusCodes: []int{http.StatusBadRequest},
|
||||
SecuritySchemes: newSecuritySchemes(types.RoleViewer),
|
||||
})).Methods(http.MethodPost).GetError(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := router.Handle("/api/v5/substitute_vars", handler.New(provider.authzMiddleware.ViewAccess(provider.querierHandler.ReplaceVariables), handler.OpenAPIDef{
|
||||
ID: "ReplaceVariables",
|
||||
Tags: []string{"querier"},
|
||||
|
||||
94
pkg/prometheus/clickhouseprometheus/capture.go
Normal file
94
pkg/prometheus/clickhouseprometheus/capture.go
Normal file
@@ -0,0 +1,94 @@
|
||||
package clickhouseprometheus
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/prometheus"
|
||||
"github.com/prometheus/prometheus/prompb"
|
||||
"github.com/prometheus/prometheus/storage"
|
||||
)
|
||||
|
||||
// statementRecorder collects the ClickHouse statements a PromQL evaluation would
|
||||
// run. It is safe for concurrent use because the Prometheus engine may evaluate
|
||||
// (and therefore Select) multiple selectors concurrently.
|
||||
type statementRecorder struct {
|
||||
mu sync.Mutex
|
||||
statements []prometheus.CapturedStatement
|
||||
}
|
||||
|
||||
func (r *statementRecorder) record(query string, args []any) {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
r.statements = append(r.statements, prometheus.CapturedStatement{Query: query, Args: args})
|
||||
}
|
||||
|
||||
func (r *statementRecorder) Statements() []prometheus.CapturedStatement {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
out := make([]prometheus.CapturedStatement, len(r.statements))
|
||||
copy(out, r.statements)
|
||||
return out
|
||||
}
|
||||
|
||||
// captureClient is a remote.ReadClient that builds the same ClickHouse SQL as
|
||||
// the real client but records it instead of executing, returning an empty
|
||||
// result so the engine completes without touching ClickHouse. It records the
|
||||
// self-contained samples query per selector (which embeds the series-selection
|
||||
// subquery), so the recorded statement reflects the actual data read.
|
||||
type captureClient struct {
|
||||
*client
|
||||
recorder *statementRecorder
|
||||
}
|
||||
|
||||
func (c *captureClient) Read(ctx context.Context, query *prompb.Query, _ bool) (storage.SeriesSet, error) {
|
||||
// Raw-SQL passthrough ({job="rawsql", query="..."}): record the raw query.
|
||||
if len(query.Matchers) == 2 {
|
||||
var hasJob bool
|
||||
var queryString string
|
||||
for _, m := range query.Matchers {
|
||||
if m.Type == prompb.LabelMatcher_EQ && m.Name == "job" && m.Value == "rawsql" {
|
||||
hasJob = true
|
||||
}
|
||||
if m.Type == prompb.LabelMatcher_EQ && m.Name == "query" {
|
||||
queryString = m.Value
|
||||
}
|
||||
}
|
||||
if hasJob && queryString != "" {
|
||||
c.recorder.record(queryString, nil)
|
||||
return storage.EmptySeriesSet(), nil
|
||||
}
|
||||
}
|
||||
|
||||
var metricName string
|
||||
for _, matcher := range query.Matchers {
|
||||
if matcher.Name == "__name__" {
|
||||
metricName = matcher.Value
|
||||
}
|
||||
}
|
||||
|
||||
// Build the series-selection subquery and the self-contained samples query
|
||||
// exactly as the executing path would, but only record them.
|
||||
subQuery, args, err := c.client.queryToClickhouseQuery(ctx, query, metricName, true)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
samplesQuery, samplesArgs := buildSamplesQuery(int64(query.StartTimestampMs), int64(query.EndTimestampMs), metricName, subQuery, args)
|
||||
c.recorder.record(samplesQuery, samplesArgs)
|
||||
|
||||
return storage.EmptySeriesSet(), nil
|
||||
}
|
||||
|
||||
// captureQueryable adapts the capturing read client to storage.Queryable,
|
||||
// mirroring how the real provider wraps its querier.
|
||||
type captureQueryable struct {
|
||||
inner storage.SampleAndChunkQueryable
|
||||
}
|
||||
|
||||
func (c captureQueryable) Querier(mint, maxt int64) (storage.Querier, error) {
|
||||
querier, err := c.inner.Querier(mint, maxt)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return storage.NewMergeQuerier(nil, []storage.Querier{querier}, storage.ChainedSeriesMerge), nil
|
||||
}
|
||||
@@ -204,8 +204,11 @@ func (client *client) getFingerprintsFromClickhouseQuery(ctx context.Context, qu
|
||||
return fingerprints, nil
|
||||
}
|
||||
|
||||
func (client *client) querySamples(ctx context.Context, start int64, end int64, fingerprints map[uint64][]prompb.Label, metricName string, subQuery string, args []any) ([]*prompb.TimeSeries, error) {
|
||||
ctx = client.withClickhousePrometheusContext(ctx, "querySamples")
|
||||
// buildSamplesQuery renders the samples SQL (and its args) that fetches the
|
||||
// data points for the series selected by subQuery. It embeds the series
|
||||
// selection as a subquery, so the returned statement is self-contained — the
|
||||
// dry-run/preview path renders it without executing.
|
||||
func buildSamplesQuery(start int64, end int64, metricName string, subQuery string, args []any) (string, []any) {
|
||||
argCount := len(args)
|
||||
|
||||
query := fmt.Sprintf(`
|
||||
@@ -217,6 +220,13 @@ func (client *client) querySamples(ctx context.Context, start int64, end int64,
|
||||
|
||||
allArgs := append([]any{metricName}, args...)
|
||||
allArgs = append(allArgs, start, end)
|
||||
return query, allArgs
|
||||
}
|
||||
|
||||
func (client *client) querySamples(ctx context.Context, start int64, end int64, fingerprints map[uint64][]prompb.Label, metricName string, subQuery string, args []any) ([]*prompb.TimeSeries, error) {
|
||||
ctx = client.withClickhousePrometheusContext(ctx, "querySamples")
|
||||
|
||||
query, allArgs := buildSamplesQuery(start, end, metricName, subQuery, args)
|
||||
|
||||
rows, err := client.telemetryStore.ClickhouseDB().Query(ctx, query, allArgs...)
|
||||
if err != nil {
|
||||
|
||||
@@ -5,8 +5,8 @@ import (
|
||||
"sort"
|
||||
"testing"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/telemetrystore/telemetrystoretest"
|
||||
cmock "github.com/SigNoz/clickhouse-go-mock"
|
||||
"github.com/SigNoz/signoz/pkg/telemetrystore/telemetrystoretest"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
|
||||
@@ -64,3 +64,17 @@ func (provider *provider) Querier(mint, maxt int64) (storage.Querier, error) {
|
||||
|
||||
return storage.NewMergeQuerier(nil, []storage.Querier{querier}, storage.ChainedSeriesMerge), nil
|
||||
}
|
||||
|
||||
// CapturingStorage implements prometheus.StatementCapturer: it returns a Storage
|
||||
// that records the ClickHouse SQL each selector would run (without executing
|
||||
// it) and a recorder to read the captured statements back. A fresh recorder is
|
||||
// created per call so concurrent dry-runs don't share state.
|
||||
func (provider *provider) CapturingStorage() (storage.Queryable, prometheus.StatementRecorder) {
|
||||
recorder := &statementRecorder{}
|
||||
capture := &captureClient{
|
||||
client: &client{settings: provider.settings, telemetryStore: provider.telemetryStore},
|
||||
recorder: recorder,
|
||||
}
|
||||
queryable := remote.NewSampleAndChunkQueryableClient(capture, labels.EmptyLabels(), []*labels.Matcher{}, false, stCallback)
|
||||
return captureQueryable{inner: queryable}, recorder
|
||||
}
|
||||
|
||||
@@ -15,3 +15,25 @@ type Prometheus interface {
|
||||
Storage() storage.Queryable
|
||||
Parser() Parser
|
||||
}
|
||||
|
||||
// CapturedStatement is one underlying datastore statement that a PromQL query would
|
||||
// run, captured without executing it.
|
||||
type CapturedStatement struct {
|
||||
Query string
|
||||
Args []any
|
||||
}
|
||||
|
||||
// StatementRecorder collects the Statements captured while a PromQL query is
|
||||
// evaluated against a capturing Storage (see StatementCapturer).
|
||||
type StatementRecorder interface {
|
||||
Statements() []CapturedStatement
|
||||
}
|
||||
|
||||
// StatementCapturer is an optional capability of a Prometheus provider: it
|
||||
// returns a Storage that records the datastore statement(s) each Select would
|
||||
// run — without executing them — together with a recorder to read them back.
|
||||
// The query dry-run path discovers it via a type assertion, so providers that
|
||||
// do not implement it simply expose no underlying SQL.
|
||||
type StatementCapturer interface {
|
||||
CapturingStorage() (storage.Queryable, StatementRecorder)
|
||||
}
|
||||
|
||||
@@ -72,6 +72,70 @@ func (handler *handler) QueryRange(rw http.ResponseWriter, req *http.Request) {
|
||||
|
||||
render.Success(rw, http.StatusOK, queryRangeResponse)
|
||||
}
|
||||
|
||||
// QueryRangePreview is the dry-run counterpart of QueryRange. It accepts the
|
||||
// same payload, validates and renders the underlying SQL/PromQL for each query
|
||||
// without executing it, and returns the per-query statements. When the
|
||||
// ?explain= query parameter selects a ClickHouse EXPLAIN variant, the rendered
|
||||
// SQL is EXPLAIN-ed against the telemetry store and the output is attached to
|
||||
// each statement.
|
||||
func (handler *handler) QueryRangePreview(rw http.ResponseWriter, req *http.Request) {
|
||||
ctx := req.Context()
|
||||
ctx = ctxtypes.NewContextWithCommentVals(ctx, map[string]string{
|
||||
instrumentationtypes.CodeNamespace: "querier",
|
||||
instrumentationtypes.CodeFunctionName: "QueryRangePreview",
|
||||
})
|
||||
|
||||
claims, err := authtypes.ClaimsFromContext(ctx)
|
||||
if err != nil {
|
||||
render.Error(rw, err)
|
||||
return
|
||||
}
|
||||
|
||||
var queryRangeRequest qbtypes.QueryRangeRequest
|
||||
if err := json.NewDecoder(req.Body).Decode(&queryRangeRequest); err != nil {
|
||||
render.Error(rw, err)
|
||||
return
|
||||
}
|
||||
|
||||
// NB: validation is intentionally NOT done here. QueryRangePreview checks
|
||||
// request-level invariants (aborting on failure) and validates each query's
|
||||
// spec individually, reporting per-query structural errors in the response
|
||||
// instead of failing fast on the first one — the point of the dry-run.
|
||||
|
||||
orgID, err := valuer.NewUUID(claims.OrgID)
|
||||
if err != nil {
|
||||
render.Error(rw, err)
|
||||
return
|
||||
}
|
||||
|
||||
explain, err := ParseExplainVariant(req.URL.Query().Get("explain"))
|
||||
if err != nil {
|
||||
render.Error(rw, err)
|
||||
return
|
||||
}
|
||||
|
||||
verbose, err := ParseVerbose(req.URL.Query().Get("verbose"))
|
||||
if err != nil {
|
||||
render.Error(rw, err)
|
||||
return
|
||||
}
|
||||
|
||||
score, err := ParseScore(req.URL.Query().Get("score"))
|
||||
if err != nil {
|
||||
render.Error(rw, err)
|
||||
return
|
||||
}
|
||||
|
||||
preview, err := handler.querier.QueryRangePreview(ctx, orgID, &queryRangeRequest, qbtypes.QueryRangePreviewOptions{Explain: explain, Verbose: verbose, IncludeGranuleSkipScore: score})
|
||||
if err != nil {
|
||||
render.Error(rw, err)
|
||||
return
|
||||
}
|
||||
|
||||
render.Success(rw, http.StatusOK, preview)
|
||||
}
|
||||
|
||||
func (handler *handler) QueryRawStream(rw http.ResponseWriter, req *http.Request) {
|
||||
ctx := req.Context()
|
||||
|
||||
|
||||
@@ -194,6 +194,12 @@ func (q *builderQuery[T]) isWindowList() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// Statement renders the SQL statement for the builder query without executing
|
||||
// it. It is used by the dry-run/preview path.
|
||||
func (q *builderQuery[T]) Statement(ctx context.Context) (*qbtypes.Statement, error) {
|
||||
return q.stmtBuilder.Build(ctx, q.fromMS, q.toMS, q.kind, q.spec, q.variables)
|
||||
}
|
||||
|
||||
func (q *builderQuery[T]) Execute(ctx context.Context) (*qbtypes.Result, error) {
|
||||
|
||||
// can we do window based pagination?
|
||||
|
||||
@@ -99,6 +99,16 @@ func (q *chSQLQuery) renderVars(query string, vars map[string]qbtypes.VariableIt
|
||||
return newQuery.String(), nil
|
||||
}
|
||||
|
||||
// Statement renders the SQL statement for the ClickHouse SQL query without
|
||||
// executing it. It is used by the dry-run/preview path.
|
||||
func (q *chSQLQuery) Statement(_ context.Context) (*qbtypes.Statement, error) {
|
||||
rendered, err := q.renderVars(q.query.Query, q.vars, q.fromMS, q.toMS)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &qbtypes.Statement{Query: rendered, Args: q.args}, nil
|
||||
}
|
||||
|
||||
func (q *chSQLQuery) Execute(ctx context.Context) (*qbtypes.Result, error) {
|
||||
ctx = ctxtypes.NewContextWithCommentVals(ctx, map[string]string{
|
||||
instrumentationtypes.QueryDuration: instrumentationtypes.DurationBucket(q.fromMS, q.toMS),
|
||||
|
||||
@@ -12,6 +12,11 @@ import (
|
||||
type Querier interface {
|
||||
QueryRange(ctx context.Context, orgID valuer.UUID, req *qbtypes.QueryRangeRequest) (*qbtypes.QueryRangeResponse, error)
|
||||
QueryRawStream(ctx context.Context, orgID valuer.UUID, req *qbtypes.QueryRangeRequest, client *qbtypes.RawStream)
|
||||
// QueryRangePreview validates and renders the queries in req without
|
||||
// executing them. opts controls dry-run behavior such as which
|
||||
// EXPLAIN variant to attach to the response; the zero value performs
|
||||
// a validation-only preview with no EXPLAIN.
|
||||
QueryRangePreview(ctx context.Context, orgID valuer.UUID, req *qbtypes.QueryRangeRequest, opts qbtypes.QueryRangePreviewOptions) (*qbtypes.QueryRangePreviewResponse, error)
|
||||
}
|
||||
|
||||
// BucketCache is the interface for bucket-based caching.
|
||||
@@ -24,6 +29,10 @@ type BucketCache interface {
|
||||
|
||||
type Handler interface {
|
||||
QueryRange(rw http.ResponseWriter, req *http.Request)
|
||||
// QueryRangePreview is the dry-run endpoint: it validates and renders the
|
||||
// queries without executing them, optionally attaching ClickHouse EXPLAIN
|
||||
// output selected by the ?explain= query parameter.
|
||||
QueryRangePreview(rw http.ResponseWriter, req *http.Request)
|
||||
QueryRawStream(rw http.ResponseWriter, req *http.Request)
|
||||
ReplaceVariables(rw http.ResponseWriter, req *http.Request)
|
||||
}
|
||||
|
||||
694
pkg/querier/preview.go
Normal file
694
pkg/querier/preview.go
Normal file
@@ -0,0 +1,694 @@
|
||||
package querier
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math"
|
||||
"reflect"
|
||||
"slices"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
chproto "github.com/ClickHouse/ch-go/proto"
|
||||
"github.com/ClickHouse/clickhouse-go/v2"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
"github.com/SigNoz/signoz/pkg/querybuilder"
|
||||
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
|
||||
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
)
|
||||
|
||||
// statementProvider is implemented by query types that can render the
|
||||
// underlying SQL/PromQL statement without executing it.
|
||||
type statementProvider interface {
|
||||
Statement(ctx context.Context) (*qbtypes.Statement, error)
|
||||
}
|
||||
|
||||
// missingMetricNames returns the distinct metric names referenced by a metric
|
||||
// builder query, in order of first appearance. It is used to name the metric(s)
|
||||
// in the warning attached to a fully-missing-metric query. Returns nil for any
|
||||
// non-metric query.
|
||||
func missingMetricNames(env qbtypes.QueryEnvelope) []string {
|
||||
spec, ok := env.Spec.(qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation])
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
names := make([]string, 0, len(spec.Aggregations))
|
||||
for _, agg := range spec.Aggregations {
|
||||
if agg.MetricName != "" && !slices.Contains(names, agg.MetricName) {
|
||||
names = append(names, agg.MetricName)
|
||||
}
|
||||
}
|
||||
return names
|
||||
}
|
||||
|
||||
// clickhouseExplainClause maps a variant to the EXPLAIN clause understood
|
||||
// by ClickHouse (i.e. what comes between EXPLAIN and the SELECT).
|
||||
func clickhouseExplainClause(v qbtypes.ExplainVariant) (string, bool) {
|
||||
switch v {
|
||||
case qbtypes.ExplainVariantPlan:
|
||||
return "PLAN", true
|
||||
case qbtypes.ExplainVariantEstimate:
|
||||
return "ESTIMATE", true
|
||||
default:
|
||||
return "", false
|
||||
}
|
||||
}
|
||||
|
||||
// ParseExplainVariant parses the ?explain= query parameter. An empty value
|
||||
// (or "false") returns ExplainVariantNone. The literal "true" maps to PLAN
|
||||
// for back-compat with simple ?explain=true. Otherwise the value must
|
||||
// match one of the named variants.
|
||||
func ParseExplainVariant(value string) (qbtypes.ExplainVariant, error) {
|
||||
token := strings.ToLower(strings.TrimSpace(value))
|
||||
switch token {
|
||||
case "", "false":
|
||||
return qbtypes.ExplainVariantNone, nil
|
||||
case "true":
|
||||
return qbtypes.ExplainVariantPlan, nil
|
||||
}
|
||||
v := qbtypes.ExplainVariant(token)
|
||||
if _, ok := clickhouseExplainClause(v); !ok {
|
||||
return qbtypes.ExplainVariantNone, errors.NewInvalidInputf(errors.CodeInvalidInput, "unsupported explain variant %q (allowed: plan, estimate)", token)
|
||||
}
|
||||
return v, nil
|
||||
}
|
||||
|
||||
// parseBoolQueryParam parses a true/false query parameter. An empty value (or
|
||||
// "false"/"0") is false; "true"/"1" is true. name is used only in the error
|
||||
// message so each caller reports the parameter the user actually sent.
|
||||
func parseBoolQueryParam(value, name string) (bool, error) {
|
||||
switch strings.ToLower(strings.TrimSpace(value)) {
|
||||
case "", "false", "0":
|
||||
return false, nil
|
||||
case "true", "1":
|
||||
return true, nil
|
||||
}
|
||||
return false, errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid %s value %q (allowed: true, false)", name, value)
|
||||
}
|
||||
|
||||
// ParseVerbose parses the ?verbose= query parameter. When true the preview
|
||||
// includes the rendered ClickHouse statement(s); the default is a lightweight
|
||||
// verdict-only preview (valid/error/warnings per query).
|
||||
func ParseVerbose(value string) (bool, error) {
|
||||
return parseBoolQueryParam(value, "verbose")
|
||||
}
|
||||
|
||||
// ParseScore parses the ?score= query parameter. It defaults to TRUE: the
|
||||
// top-level granuleSkipScore is computed unless explicitly disabled with
|
||||
// score=false (which skips the granule-skip EXPLAIN round trips).
|
||||
func ParseScore(value string) (bool, error) {
|
||||
switch strings.ToLower(strings.TrimSpace(value)) {
|
||||
case "", "true", "1":
|
||||
return true, nil
|
||||
case "false", "0":
|
||||
return false, nil
|
||||
}
|
||||
return false, errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid score value %q (allowed: true, false)", value)
|
||||
}
|
||||
|
||||
// QueryRangePreview validates each query in the composite query without
|
||||
// executing it. By default it returns a lightweight per-query verdict
|
||||
// (valid/error/warnings) plus the headline GranuleSkipScore (unless disabled via
|
||||
// opts.IncludeGranuleSkipScore=false). When opts.Verbose (or Explain, which
|
||||
// implies it) is set, it also renders the underlying ClickHouse statement(s)
|
||||
// each query would run, with opts.Explain attaching EXPLAIN output and the
|
||||
// per-statement GranuleSkipScore alongside each.
|
||||
func (q *querier) QueryRangePreview(
|
||||
ctx context.Context,
|
||||
_ valuer.UUID,
|
||||
req *qbtypes.QueryRangeRequest,
|
||||
opts qbtypes.QueryRangePreviewOptions,
|
||||
) (*qbtypes.QueryRangePreviewResponse, error) {
|
||||
|
||||
// The preview must transform the payload exactly as QueryRange does so the
|
||||
// rendered SQL matches what the same payload will actually execute. Coerce
|
||||
// the window to epoch milliseconds up front, just like QueryRange.
|
||||
req.Start = querybuilder.ToMilliSecs(req.Start)
|
||||
req.End = querybuilder.ToMilliSecs(req.End)
|
||||
|
||||
tmplVars := req.Variables
|
||||
if tmplVars == nil {
|
||||
tmplVars = make(map[string]qbtypes.VariableItem)
|
||||
}
|
||||
|
||||
// Validate request-level invariants (time range, request type, unique
|
||||
// names, …) up front — these are request-wide, so there is nothing per
|
||||
// query to preview if they fail. Per-query spec validation is deliberately
|
||||
// NOT done here: it runs per query below so each query's structural error is
|
||||
// reported in its own QueryPreview instead of aborting the whole
|
||||
// preview on the first one. validationOpts carries the request-type-specific
|
||||
// options into that per-query validation.
|
||||
validationOpts, err := req.ValidateRequestScope()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// A query that only exists as a dependency of a trace operator (e.g. A and
|
||||
// B in C := A => B) is not executed standalone, so it gets no statement of
|
||||
// its own — matching QueryRange.
|
||||
dependencyQueries, err := q.constructTraceOperatorDependencyMap(req.CompositeQuery.Queries)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
results := make(map[string]qbtypes.QueryPreview, len(req.CompositeQuery.Queries))
|
||||
|
||||
// Phase 1: normalize every query's spec (step interval + metric metadata)
|
||||
// and capture the per-query warnings/errors. This runs for ALL queries —
|
||||
// including trace-operator dependencies — before any statement is rendered,
|
||||
// because a trace-operator query reads its siblings' specs at render time
|
||||
// and they must already be normalized. adjustStepInterval and
|
||||
// resolveMetricMetadata both patch the spec in place, so feed each a
|
||||
// single-element slice and write the patched envelope back into the
|
||||
// composite query. Doing it per-query (rather than once over all queries
|
||||
// like QueryRange) lets us attribute each warning/error to the query that
|
||||
// produced it, which is the whole point of a per-query preview report; the
|
||||
// extra metadata lookups are acceptable on this low-volume dry-run path.
|
||||
prepared := make(map[string]qbtypes.QueryPreview, len(req.CompositeQuery.Queries))
|
||||
missingMetricQuerySet := make(map[string]bool)
|
||||
for idx := range req.CompositeQuery.Queries {
|
||||
name := req.CompositeQuery.Queries[idx].GetQueryName()
|
||||
ps := qbtypes.QueryPreview{}
|
||||
|
||||
// Validate this query's spec on its own and attribute any structural
|
||||
// error to it, instead of aborting the whole preview on the first bad
|
||||
// query (the request-level invariants were already checked above). An
|
||||
// invalid spec gets no step/metadata normalization or rendering.
|
||||
if vErr := req.CompositeQuery.Queries[idx].Validate(validationOpts...); vErr != nil {
|
||||
ps.Error = vErr
|
||||
prepared[name] = ps
|
||||
continue
|
||||
}
|
||||
|
||||
env := []qbtypes.QueryEnvelope{req.CompositeQuery.Queries[idx]}
|
||||
ps.Warnings = q.adjustStepInterval(env, req.Start, req.End)
|
||||
|
||||
missingMetricQueries, dormantMetricsWarningMsg, mErr := q.resolveMetricMetadata(ctx, env, req.Start, req.End)
|
||||
if mErr != nil {
|
||||
// Don't abort the whole preview: report this query's error and keep
|
||||
// going so the agent sees every problem in one round trip.
|
||||
ps.Error = mErr
|
||||
} else {
|
||||
if dormantMetricsWarningMsg != "" {
|
||||
ps.Warnings = append(ps.Warnings, dormantMetricsWarningMsg)
|
||||
}
|
||||
if len(missingMetricQueries) > 0 {
|
||||
missingMetricQuerySet[name] = true
|
||||
// A fully-missing-metric query renders no SQL and returns an empty
|
||||
// result, so flag it explicitly. resolveMetricMetadata only emits a
|
||||
// (dormant) warning for external metrics it has seen before; when it
|
||||
// stays silent — e.g. internal signoz.* metrics — the empty result
|
||||
// would otherwise be unexplained, so attach a clear note naming the
|
||||
// metric(s) the agent referenced.
|
||||
if dormantMetricsWarningMsg == "" {
|
||||
if metricNames := missingMetricNames(env[0]); len(metricNames) > 0 {
|
||||
ps.Warnings = append(ps.Warnings, fmt.Sprintf(
|
||||
"query %q references metric(s) %s with no data available; it will return an empty result",
|
||||
name, strings.Join(metricNames, ", ")))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
req.CompositeQuery.Queries[idx] = env[0]
|
||||
prepared[name] = ps
|
||||
}
|
||||
|
||||
// Phase 2: render the statement for each query that actually executes, and
|
||||
// collect the ClickHouse-bound work (granuleSkipScore/EXPLAIN) to run concurrently.
|
||||
var previewTasks []previewTask
|
||||
for _, query := range req.CompositeQuery.Queries {
|
||||
name := query.GetQueryName()
|
||||
|
||||
if query.GetType() != qbtypes.QueryTypeTraceOperator && dependencyQueries[name] {
|
||||
continue
|
||||
}
|
||||
|
||||
ps := prepared[name]
|
||||
|
||||
// Surface a phase-1 error (e.g. a not-found metric) without rendering.
|
||||
if ps.Error != nil {
|
||||
results[name] = ps
|
||||
continue
|
||||
}
|
||||
// Every aggregation resolved to a missing metric: QueryRange returns an
|
||||
// empty result for this query and renders no SQL. Mirror that.
|
||||
if missingMetricQuerySet[name] {
|
||||
results[name] = ps
|
||||
continue
|
||||
}
|
||||
|
||||
var provider qbtypes.Query
|
||||
switch query.Type {
|
||||
case qbtypes.QueryTypePromQL:
|
||||
promQuery, ok := query.Spec.(qbtypes.PromQuery)
|
||||
if !ok {
|
||||
ps.Error = errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid promql query spec %T", query.Spec)
|
||||
results[name] = ps
|
||||
continue
|
||||
}
|
||||
provider = newPromqlQuery(q.logger, q.promEngine, promQuery, qbtypes.TimeRange{From: req.Start, To: req.End}, req.RequestType, tmplVars)
|
||||
case qbtypes.QueryTypeClickHouseSQL:
|
||||
chQuery, ok := query.Spec.(qbtypes.ClickHouseQuery)
|
||||
if !ok {
|
||||
ps.Error = errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid clickhouse query spec %T", query.Spec)
|
||||
results[name] = ps
|
||||
continue
|
||||
}
|
||||
provider = newchSQLQuery(q.logger, q.telemetryStore, chQuery, nil, qbtypes.TimeRange{From: req.Start, To: req.End}, req.RequestType, tmplVars)
|
||||
case qbtypes.QueryTypeTraceOperator:
|
||||
traceOpQuery, ok := query.Spec.(qbtypes.QueryBuilderTraceOperator)
|
||||
if !ok {
|
||||
ps.Error = errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid trace operator query spec %T", query.Spec)
|
||||
results[name] = ps
|
||||
continue
|
||||
}
|
||||
provider = &traceOperatorQuery{
|
||||
telemetryStore: q.telemetryStore,
|
||||
stmtBuilder: q.traceOperatorStmtBuilder,
|
||||
spec: traceOpQuery,
|
||||
compositeQuery: &req.CompositeQuery,
|
||||
fromMS: uint64(req.Start),
|
||||
toMS: uint64(req.End),
|
||||
kind: req.RequestType,
|
||||
}
|
||||
case qbtypes.QueryTypeBuilder:
|
||||
switch spec := query.Spec.(type) {
|
||||
case qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation]:
|
||||
spec.ShiftBy = extractShiftFromBuilderQuery(spec)
|
||||
timeRange := adjustTimeRangeForShift(spec, qbtypes.TimeRange{From: req.Start, To: req.End}, req.RequestType)
|
||||
provider = newBuilderQuery(q.logger, q.telemetryStore, q.traceStmtBuilder, spec, timeRange, req.RequestType, tmplVars)
|
||||
case qbtypes.QueryBuilderQuery[qbtypes.LogAggregation]:
|
||||
spec.ShiftBy = extractShiftFromBuilderQuery(spec)
|
||||
timeRange := adjustTimeRangeForShift(spec, qbtypes.TimeRange{From: req.Start, To: req.End}, req.RequestType)
|
||||
stmtBuilder := q.logStmtBuilder
|
||||
if spec.Source == telemetrytypes.SourceAudit {
|
||||
stmtBuilder = q.auditStmtBuilder
|
||||
}
|
||||
provider = newBuilderQuery(q.logger, q.telemetryStore, stmtBuilder, spec, timeRange, req.RequestType, tmplVars)
|
||||
case qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]:
|
||||
spec.ShiftBy = extractShiftFromBuilderQuery(spec)
|
||||
timeRange := adjustTimeRangeForShift(spec, qbtypes.TimeRange{From: req.Start, To: req.End}, req.RequestType)
|
||||
if spec.Source == telemetrytypes.SourceMeter {
|
||||
provider = newBuilderQuery(q.logger, q.telemetryStore, q.meterStmtBuilder, spec, timeRange, req.RequestType, tmplVars)
|
||||
} else {
|
||||
provider = newBuilderQuery(q.logger, q.telemetryStore, q.metricStmtBuilder, spec, timeRange, req.RequestType, tmplVars)
|
||||
}
|
||||
default:
|
||||
ps.Error = errors.NewInvalidInputf(errors.CodeInvalidInput, "unsupported builder spec type %T", query.Spec)
|
||||
results[name] = ps
|
||||
continue
|
||||
}
|
||||
default:
|
||||
ps.Error = errors.NewInvalidInputf(errors.CodeInvalidInput, "unsupported query type %q", query.Type)
|
||||
results[name] = ps
|
||||
continue
|
||||
}
|
||||
|
||||
stmtProvider, ok := provider.(statementProvider)
|
||||
if !ok {
|
||||
ps.Error = errors.NewInternalf(errors.CodeInternal, "query does not support preview")
|
||||
results[name] = ps
|
||||
continue
|
||||
}
|
||||
|
||||
// Build the statement even in validate-only mode: a successful build is
|
||||
// the strongest validation we can do (it parses the filter/group-by and
|
||||
// resolves fields against the schema), and a build error is exactly the
|
||||
// per-query verdict a validation caller wants.
|
||||
stmt, sErr := stmtProvider.Statement(ctx)
|
||||
if sErr != nil {
|
||||
ps.Error = sErr
|
||||
results[name] = ps
|
||||
continue
|
||||
}
|
||||
|
||||
ps.Warnings = append(ps.Warnings, stmt.Warnings...)
|
||||
|
||||
// clickhouse_sql is user-authored raw SQL; rendering only substitutes
|
||||
// variables, so by itself it doesn't prove the SQL is valid. Verify it
|
||||
// parses and binds (tables/columns/types resolve) via EXPLAIN PLAN —
|
||||
// without executing. Builder/PromQL/trace-operator SQL is engine-generated
|
||||
// and well-formed by construction, so this is scoped to clickhouse_sql.
|
||||
if query.Type == qbtypes.QueryTypeClickHouseSQL {
|
||||
if invalidErr, infraErr := q.explainBindCheck(ctx, stmt.Query, stmt.Args); invalidErr != nil {
|
||||
ps.Error = invalidErr
|
||||
results[name] = ps
|
||||
continue
|
||||
} else if infraErr != nil {
|
||||
ps.Warnings = append(ps.Warnings, "could not validate ClickHouse SQL: "+infraErr.Error())
|
||||
}
|
||||
}
|
||||
|
||||
// The query is fully validated by this point (statement built, plus the
|
||||
// clickhouse_sql bind check). Render the underlying statement(s) when the
|
||||
// caller wants them (verbose/explain) or when we need them to compute the
|
||||
// top-level granuleSkipScore (on by default). If none of those apply
|
||||
// (score disabled and not verbose), return just the verdict.
|
||||
needScore := opts.IncludeGranuleSkipScore
|
||||
needExplain := opts.Explain != qbtypes.ExplainVariantNone
|
||||
if !opts.Verbose && !needExplain && !needScore {
|
||||
results[name] = ps
|
||||
continue
|
||||
}
|
||||
|
||||
// Every query exposes its underlying ClickHouse statement(s) uniformly in
|
||||
// Statements. Builder/ClickHouse/trace-operator render exactly one; PromQL
|
||||
// is not SQL — the Prometheus engine issues one statement per metric
|
||||
// selector, captured (without executing) via PreviewStatements.
|
||||
if query.Type == qbtypes.QueryTypePromQL {
|
||||
if pq, ok := provider.(*promqlQuery); ok {
|
||||
sqlStmts, pErr := pq.PreviewStatements(ctx)
|
||||
if pErr != nil {
|
||||
ps.Warnings = append(ps.Warnings, "could not render underlying ClickHouse SQL: "+pErr.Error())
|
||||
} else {
|
||||
for _, s := range sqlStmts {
|
||||
ps.Statements = append(ps.Statements, qbtypes.PreviewStatement{Query: s.Query, Args: s.Args})
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
ps.Statements = []qbtypes.PreviewStatement{{Query: stmt.Query, Args: stmt.Args}}
|
||||
}
|
||||
|
||||
results[name] = ps
|
||||
|
||||
// granuleSkipScore and EXPLAIN both hit ClickHouse. Queue one task per
|
||||
// statement; runPreviewTasks executes them concurrently across queries
|
||||
// after rendering, rather than serializing one query's round trips behind
|
||||
// the next.
|
||||
if needScore || needExplain {
|
||||
for j := range ps.Statements {
|
||||
previewTasks = append(previewTasks, previewTask{name: name, stmtIdx: j, query: ps.Statements[j].Query, args: ps.Statements[j].Args})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
q.runPreviewTasks(ctx, previewTasks, opts, results)
|
||||
|
||||
// granuleSkipScore is on by default, but the rendered statements are only
|
||||
// returned when the caller asked (verbose/explain). So derive the headline
|
||||
// per-query score from the statements (the minimum — the least-selective,
|
||||
// worst-skipping statement, which dominates cost), then drop the statements
|
||||
// from the response unless they were requested.
|
||||
includeStatements := opts.Verbose || opts.Explain != qbtypes.ExplainVariantNone
|
||||
for name, ps := range results {
|
||||
var minScore *float64
|
||||
for i := range ps.Statements {
|
||||
s := ps.Statements[i].GranuleSkipScore
|
||||
if s != nil && (minScore == nil || *s < *minScore) {
|
||||
minScore = s
|
||||
}
|
||||
}
|
||||
if minScore != nil {
|
||||
v := *minScore // copy so the top-level field doesn't alias a statement entry
|
||||
ps.Score = &v
|
||||
}
|
||||
if !includeStatements {
|
||||
ps.Statements = nil
|
||||
}
|
||||
results[name] = ps
|
||||
}
|
||||
|
||||
return &qbtypes.QueryRangePreviewResponse{
|
||||
Queries: results,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// previewTask is one rendered ClickHouse statement queued for ClickHouse-bound
|
||||
// preview work (granuleSkipScore and/or EXPLAIN). stmtIdx is the index into the
|
||||
// query's Statements list that this task's results merge back into.
|
||||
type previewTask struct {
|
||||
name string
|
||||
stmtIdx int
|
||||
query string
|
||||
args []any
|
||||
}
|
||||
|
||||
// runPreviewTasks computes the granuleSkipScore and/or EXPLAIN output for each task
|
||||
// concurrently — every query's ClickHouse round trips are in flight at once
|
||||
// instead of serialized — and merges the outcomes back into previews. A
|
||||
// composite query holds only a handful of queries, so a goroutine per task is
|
||||
// fine without an explicit concurrency bound. Each goroutine writes to its own
|
||||
// slot; the merge into the previews map happens after the wait, single-
|
||||
// threaded, so there are no map races.
|
||||
func (q *querier) runPreviewTasks(ctx context.Context, tasks []previewTask, opts qbtypes.QueryRangePreviewOptions, previews map[string]qbtypes.QueryPreview) {
|
||||
if len(tasks) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
type outcome struct {
|
||||
score *float64
|
||||
explain string
|
||||
warnings []string
|
||||
}
|
||||
outcomes := make([]outcome, len(tasks))
|
||||
|
||||
var wg sync.WaitGroup
|
||||
for i := range tasks {
|
||||
wg.Add(1)
|
||||
go func(i int) {
|
||||
defer wg.Done()
|
||||
t := tasks[i]
|
||||
var out outcome
|
||||
if opts.IncludeGranuleSkipScore {
|
||||
if score, scErr := q.computeGranuleSkipScore(ctx, t.query, t.args); scErr != nil {
|
||||
// Surface the failure instead of silently dropping the score.
|
||||
out.warnings = append(out.warnings, "could not compute query score: "+scErr.Error())
|
||||
} else if score != nil {
|
||||
out.score = score
|
||||
}
|
||||
}
|
||||
if opts.Explain != qbtypes.ExplainVariantNone {
|
||||
if explained, eErr := q.runExplain(ctx, opts.Explain, t.query, t.args); eErr != nil {
|
||||
// Surface the failure instead of silently dropping the output.
|
||||
out.warnings = append(out.warnings, "could not run EXPLAIN: "+eErr.Error())
|
||||
} else {
|
||||
out.explain = explained
|
||||
}
|
||||
}
|
||||
outcomes[i] = out
|
||||
}(i)
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
for i := range tasks {
|
||||
ps := previews[tasks[i].name]
|
||||
if idx := tasks[i].stmtIdx; idx >= 0 && idx < len(ps.Statements) {
|
||||
if outcomes[i].score != nil {
|
||||
ps.Statements[idx].GranuleSkipScore = outcomes[i].score
|
||||
}
|
||||
if outcomes[i].explain != "" {
|
||||
ps.Statements[idx].Explain = outcomes[i].explain
|
||||
}
|
||||
}
|
||||
ps.Warnings = append(ps.Warnings, outcomes[i].warnings...)
|
||||
previews[tasks[i].name] = ps
|
||||
}
|
||||
}
|
||||
|
||||
// runExplain runs `EXPLAIN <variant> <stmt>` against the telemetry store and
|
||||
// returns the formatted output as a single string with one row per line.
|
||||
//
|
||||
// The column shape differs by variant: most variants (plan, ast, syntax,
|
||||
// pipeline, query_tree) return a single `explain` String column, but ESTIMATE
|
||||
// returns five columns (database, table, parts, rows, marks). So the scan is
|
||||
// driven by the result's column types — each row's columns are scanned into
|
||||
// destinations of the driver-reported types and tab-joined — rather than
|
||||
// assuming a single string (which silently dropped ESTIMATE output before).
|
||||
func (q *querier) runExplain(ctx context.Context, variant qbtypes.ExplainVariant, stmt string, args []any) (string, error) {
|
||||
clause, ok := clickhouseExplainClause(variant)
|
||||
if !ok {
|
||||
return "", errors.NewInvalidInputf(errors.CodeInvalidInput, "unsupported explain variant %q", string(variant))
|
||||
}
|
||||
explainQuery := "EXPLAIN " + clause + " " + stmt
|
||||
rows, err := q.telemetryStore.ClickhouseDB().Query(ctx, explainQuery, args...)
|
||||
if err != nil {
|
||||
return "", errors.WrapInternalf(err, errors.CodeInternal, "failed to run EXPLAIN %s", clause)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
colTypes := rows.ColumnTypes()
|
||||
multiColumn := len(colTypes) > 1
|
||||
|
||||
var lines []string
|
||||
// For a multi-column variant (ESTIMATE), lead with a header row so the
|
||||
// tab-separated values are readable; single-column variants stay verbatim.
|
||||
if multiColumn {
|
||||
header := make([]string, len(colTypes))
|
||||
for i, ct := range colTypes {
|
||||
header[i] = ct.Name()
|
||||
}
|
||||
lines = append(lines, strings.Join(header, "\t"))
|
||||
}
|
||||
|
||||
for rows.Next() {
|
||||
dest := make([]any, len(colTypes))
|
||||
for i, ct := range colTypes {
|
||||
dest[i] = reflect.New(ct.ScanType()).Interface()
|
||||
}
|
||||
if err := rows.Scan(dest...); err != nil {
|
||||
return "", errors.WrapInternalf(err, errors.CodeInternal, "failed to scan EXPLAIN row")
|
||||
}
|
||||
fields := make([]string, len(dest))
|
||||
for i := range dest {
|
||||
fields[i] = fmt.Sprintf("%v", reflect.ValueOf(dest[i]).Elem().Interface())
|
||||
}
|
||||
lines = append(lines, strings.Join(fields, "\t"))
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
return "", errors.WrapInternalf(err, errors.CodeInternal, "EXPLAIN row iteration failed")
|
||||
}
|
||||
return strings.Join(lines, "\n"), nil
|
||||
}
|
||||
|
||||
// userFacingClickHouseErrorCodes mirrors PR #10679's userFacingCHCodes: the
|
||||
// ClickHouse error codes that indicate a problem with the query itself (bad SQL,
|
||||
// unknown table/column, …) rather than a server-side/infra failure — i.e. the
|
||||
// ones that should map to invalid input (400) instead of internal (500).
|
||||
//
|
||||
// TODO(#10679): once that PR lands, delete this and have explainBindCheck call
|
||||
// the shared querier.mapClickHouseError so there's a single source of truth.
|
||||
var userFacingClickHouseErrorCodes = map[chproto.Error]bool{
|
||||
chproto.ErrSyntaxError: true,
|
||||
chproto.ErrUnknownTable: true,
|
||||
chproto.ErrUnknownDatabase: true,
|
||||
chproto.ErrUnknownIdentifier: true,
|
||||
chproto.ErrUnknownFunction: true,
|
||||
chproto.ErrUnknownAggregateFunction: true,
|
||||
chproto.ErrUnknownType: true,
|
||||
chproto.ErrUnknownStorage: true,
|
||||
chproto.ErrUnknownElementInAst: true,
|
||||
chproto.ErrUnknownTypeOfQuery: true,
|
||||
chproto.ErrIllegalTypeOfArgument: true,
|
||||
chproto.ErrIllegalColumn: true,
|
||||
chproto.ErrNumberOfArgumentsDoesntMatch: true,
|
||||
chproto.ErrTooManyArgumentsForFunction: true,
|
||||
chproto.ErrTooLessArgumentsForFunction: true,
|
||||
}
|
||||
|
||||
// explainBindCheck validates that a rendered ClickHouse statement parses and
|
||||
// binds (its tables, columns, and types resolve) by running EXPLAIN PLAN
|
||||
// against it without executing it. It distinguishes two failure modes:
|
||||
//
|
||||
// - invalidErr (non-nil): ClickHouse rejected the statement with a user-facing
|
||||
// error code — it's genuinely invalid input (syntax, unknown table/column,
|
||||
// type mismatch). The caller marks the query invalid.
|
||||
// - infraErr (non-nil): the check couldn't run, or ClickHouse failed with a
|
||||
// non-user-facing code (e.g. unreachable, timeout, server-side). The caller
|
||||
// warns rather than falsely marking the query invalid, since validity is
|
||||
// unknown.
|
||||
//
|
||||
// Both nil means the statement is valid.
|
||||
func (q *querier) explainBindCheck(ctx context.Context, stmt string, args []any) (invalidErr error, infraErr error) {
|
||||
rows, err := q.telemetryStore.ClickhouseDB().Query(ctx, "EXPLAIN PLAN "+stmt, args...)
|
||||
if err != nil {
|
||||
var ex *clickhouse.Exception
|
||||
if errors.As(err, &ex) && userFacingClickHouseErrorCodes[chproto.Error(ex.Code)] {
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid ClickHouse SQL: %s", ex.Message), nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
rows.Close()
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// explainPlanNode is the subset of a ClickHouse `EXPLAIN json = 1, indexes = 1`
|
||||
// plan node that granuleSkipScore needs: the node type, its per-index granule funnel,
|
||||
// and its children.
|
||||
type explainPlanNode struct {
|
||||
NodeType string `json:"Node Type"`
|
||||
Indexes []explainPlanIndex `json:"Indexes"`
|
||||
Plans []explainPlanNode `json:"Plans"`
|
||||
}
|
||||
|
||||
// explainPlanIndex is one index step under a ReadFromMergeTree node. The index
|
||||
// steps run in sequence, so the first step's Initial Granules is the candidate
|
||||
// total and the last step's Selected Granules is what survives all pruning.
|
||||
type explainPlanIndex struct {
|
||||
InitialGranules *int64 `json:"Initial Granules"`
|
||||
SelectedGranules *int64 `json:"Selected Granules"`
|
||||
}
|
||||
|
||||
// computeGranuleSkipScore runs `EXPLAIN json = 1, indexes = 1` against the telemetry
|
||||
// store and returns a 0-100 score: the percentage of candidate granules
|
||||
// eliminated by partition, primary-key, and skip-index pruning before any data
|
||||
// is read (higher = more selective, reads less). Granules are summed across
|
||||
// every ReadFromMergeTree node so multi-read queries (e.g. a resource-filter
|
||||
// subquery plus the main read) are scored as a whole. Returns nil — not an
|
||||
// error — when the plan exposes no MergeTree index analysis, so the caller
|
||||
// simply omits the score.
|
||||
func (q *querier) computeGranuleSkipScore(ctx context.Context, stmt string, args []any) (*float64, error) {
|
||||
rows, err := q.telemetryStore.ClickhouseDB().Query(ctx, "EXPLAIN json = 1, indexes = 1 "+stmt, args...)
|
||||
if err != nil {
|
||||
return nil, errors.WrapInternalf(err, errors.CodeInternal, "failed to run EXPLAIN for query score")
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
// json=1 emits the plan as a single JSON document; read every row and join
|
||||
// so we are robust to the driver splitting it across rows.
|
||||
var sb strings.Builder
|
||||
for rows.Next() {
|
||||
var line string
|
||||
if err := rows.Scan(&line); err != nil {
|
||||
return nil, errors.WrapInternalf(err, errors.CodeInternal, "failed to scan EXPLAIN json row")
|
||||
}
|
||||
sb.WriteString(line)
|
||||
sb.WriteByte('\n')
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
return nil, errors.WrapInternalf(err, errors.CodeInternal, "EXPLAIN json row iteration failed")
|
||||
}
|
||||
|
||||
var plans []struct {
|
||||
Plan explainPlanNode `json:"Plan"`
|
||||
}
|
||||
if err := json.Unmarshal([]byte(sb.String()), &plans); err != nil {
|
||||
return nil, errors.WrapInternalf(err, errors.CodeInternal, "failed to parse EXPLAIN json")
|
||||
}
|
||||
|
||||
var totalInitial, totalSelected int64
|
||||
for i := range plans {
|
||||
accumulateGranules(&plans[i].Plan, &totalInitial, &totalSelected)
|
||||
}
|
||||
if totalInitial <= 0 {
|
||||
// No MergeTree index analysis in the plan — nothing to score.
|
||||
return nil, nil
|
||||
}
|
||||
if totalSelected < 0 {
|
||||
totalSelected = 0
|
||||
}
|
||||
skipped := float64(totalInitial-totalSelected) / float64(totalInitial)
|
||||
if skipped < 0 {
|
||||
skipped = 0
|
||||
}
|
||||
score := math.Round(skipped*100*100) / 100 // percentage, 2 decimal places
|
||||
return &score, nil
|
||||
}
|
||||
|
||||
// accumulateGranules walks the plan tree and, for every ReadFromMergeTree node,
|
||||
// adds its candidate-granule total (first index step's Initial Granules) and
|
||||
// surviving granules (last index step's Selected Granules) to the running sums.
|
||||
func accumulateGranules(node *explainPlanNode, totalInitial, totalSelected *int64) {
|
||||
if node.NodeType == "ReadFromMergeTree" && len(node.Indexes) > 0 {
|
||||
var initial, selected *int64
|
||||
for i := range node.Indexes {
|
||||
if node.Indexes[i].InitialGranules != nil && initial == nil {
|
||||
initial = node.Indexes[i].InitialGranules
|
||||
}
|
||||
if node.Indexes[i].SelectedGranules != nil {
|
||||
selected = node.Indexes[i].SelectedGranules
|
||||
}
|
||||
}
|
||||
if initial != nil && selected != nil {
|
||||
*totalInitial += *initial
|
||||
*totalSelected += *selected
|
||||
}
|
||||
}
|
||||
for i := range node.Plans {
|
||||
accumulateGranules(&node.Plans[i], totalInitial, totalSelected)
|
||||
}
|
||||
}
|
||||
@@ -220,6 +220,68 @@ func (q *promqlQuery) renderVars(query string, vars map[string]qbv5.VariableItem
|
||||
return newQuery.String(), nil
|
||||
}
|
||||
|
||||
// Statement renders the PromQL query string after variable substitution. It
|
||||
// is used by the dry-run/preview path; PromQL queries do not have a
|
||||
// SQL-style argument list.
|
||||
func (q *promqlQuery) Statement(_ context.Context) (*qbv5.Statement, error) {
|
||||
rendered, err := q.renderVars(q.query.Query, q.vars, q.tr.From, q.tr.To)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &qbv5.Statement{Query: rendered}, nil
|
||||
}
|
||||
|
||||
// PreviewStatements returns the underlying ClickHouse statement(s) this PromQL
|
||||
// query would run, captured without executing them. PromQL is evaluated by the
|
||||
// Prometheus engine rather than compiled to one SQL statement: the engine calls
|
||||
// the storage adapter's Select per metric selector, which builds ClickHouse
|
||||
// SQL. We drive the engine with a capturing Storage that records that SQL and
|
||||
// returns empty results, so nothing is read from ClickHouse. Returns nil when
|
||||
// the provider does not support capture (e.g. test doubles).
|
||||
func (q *promqlQuery) PreviewStatements(ctx context.Context) ([]prometheus.CapturedStatement, error) {
|
||||
storer, ok := q.promEngine.(prometheus.StatementCapturer)
|
||||
if !ok {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
rendered, err := q.renderVars(q.query.Query, q.vars, q.tr.From, q.tr.To)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
start := int64(querybuilder.ToNanoSecs(q.tr.From))
|
||||
end := int64(querybuilder.ToNanoSecs(q.tr.To))
|
||||
|
||||
capStorage, recorder := storer.CapturingStorage()
|
||||
qry, err := q.promEngine.Engine().NewRangeQuery(
|
||||
ctx,
|
||||
capStorage,
|
||||
nil,
|
||||
rendered,
|
||||
time.Unix(0, start),
|
||||
time.Unix(0, end),
|
||||
q.query.Step.Duration,
|
||||
)
|
||||
if err != nil {
|
||||
if e := tryEnhancePromQLExecError(err); e != nil {
|
||||
return nil, e
|
||||
}
|
||||
return nil, enhancePromQLError(rendered, err)
|
||||
}
|
||||
defer qry.Close()
|
||||
|
||||
// Evaluate against the capturing storage: this drives a Select per selector
|
||||
// (recording the SQL) but reads no data, so the result is discarded.
|
||||
if res := qry.Exec(ctx); res.Err != nil {
|
||||
if e := tryEnhancePromQLExecError(res.Err); e != nil {
|
||||
return nil, e
|
||||
}
|
||||
return nil, errors.Newf(errors.TypeInternal, errors.CodeInternal, "query execution error: %v", res.Err)
|
||||
}
|
||||
|
||||
return recorder.Statements(), nil
|
||||
}
|
||||
|
||||
func (q *promqlQuery) Execute(ctx context.Context) (*qbv5.Result, error) {
|
||||
|
||||
ctx = ctxtypes.NewContextWithCommentVals(ctx, map[string]string{
|
||||
|
||||
@@ -32,6 +32,12 @@ func (q *traceOperatorQuery) Window() (uint64, uint64) {
|
||||
return q.fromMS, q.toMS
|
||||
}
|
||||
|
||||
// Statement renders the SQL statement for the trace operator query without
|
||||
// executing it. It is used by the dry-run/preview path.
|
||||
func (q *traceOperatorQuery) Statement(ctx context.Context) (*qbtypes.Statement, error) {
|
||||
return q.stmtBuilder.Build(ctx, q.fromMS, q.toMS, q.kind, q.spec, q.compositeQuery)
|
||||
}
|
||||
|
||||
func (q *traceOperatorQuery) Execute(ctx context.Context) (*qbtypes.Result, error) {
|
||||
stmt, err := q.stmtBuilder.Build(
|
||||
ctx,
|
||||
|
||||
@@ -145,7 +145,7 @@ func PrepareWhereClause(query string, opts FilterExprVisitorOpts) (PreparedWhere
|
||||
"Found %d syntax errors while parsing the search expression.",
|
||||
len(parserErrorListener.SyntaxErrors),
|
||||
)
|
||||
additionals := make([]string, len(parserErrorListener.SyntaxErrors))
|
||||
additionals := make([]string, 0, len(parserErrorListener.SyntaxErrors))
|
||||
for _, err := range parserErrorListener.SyntaxErrors {
|
||||
if err.Error() != "" {
|
||||
additionals = append(additionals, err.Error())
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
"github.com/swaggest/jsonschema-go"
|
||||
@@ -64,6 +65,137 @@ type QueryRangeResponse struct {
|
||||
QBEvent *QBEvent `json:"-"`
|
||||
}
|
||||
|
||||
// QueryRangePreviewResponse describes the dry-run output of a query range
|
||||
// request. Each entry corresponds to a single query in the composite query.
|
||||
|
||||
type QueryRangePreviewResponse struct {
|
||||
Queries map[string]QueryPreview `json:"queries"`
|
||||
}
|
||||
|
||||
// ExplainVariant identifies one of the ClickHouse EXPLAIN modes that the
|
||||
// preview endpoint can run against a rendered SQL statement.
|
||||
type ExplainVariant string
|
||||
|
||||
const (
|
||||
ExplainVariantNone ExplainVariant = ""
|
||||
// ExplainVariantPlan returns the query execution plan tree (what gets read
|
||||
// and how it's aggregated).
|
||||
ExplainVariantPlan ExplainVariant = "plan"
|
||||
// ExplainVariantEstimate returns ClickHouse's per-table estimate of the
|
||||
// parts/rows/marks the query will read — an absolute cost estimate that
|
||||
// complements the (ratio-based) granuleSkipScore.
|
||||
ExplainVariantEstimate ExplainVariant = "estimate"
|
||||
)
|
||||
|
||||
// QueryRangePreviewOptions carries per-call options for the query range
|
||||
// preview (dry-run) endpoint. The zero value produces a lightweight,
|
||||
// verdict-only preview (valid/error/warnings per query, no rendered SQL).
|
||||
type QueryRangePreviewOptions struct {
|
||||
// Explain selects which ClickHouse EXPLAIN variant to run for each rendered
|
||||
// SQL statement. Leave empty to skip EXPLAIN. Implies Verbose (the EXPLAIN
|
||||
// output attaches to each statement).
|
||||
Explain ExplainVariant
|
||||
// Verbose includes the rendered ClickHouse statement(s) (Statements) in the
|
||||
// response. The default (false) returns only the per-query verdict
|
||||
// (valid/error/warnings) plus the headline GranuleSkipScore — every query is
|
||||
// still fully validated, just not rendered into the response. Requesting
|
||||
// Explain implies Verbose, since EXPLAIN output attaches to each statement.
|
||||
Verbose bool
|
||||
// IncludeGranuleSkipScore computes the GranuleSkipScore. The HTTP endpoint
|
||||
// sets it true by default (the headline top-level score is returned even in
|
||||
// the lightweight, non-verbose response), and only false on ?score=false.
|
||||
// When the response includes statements (Verbose/Explain), each statement
|
||||
// also carries its own GranuleSkipScore and the top-level one is their
|
||||
// minimum. Computing it costs one ClickHouse EXPLAIN per statement.
|
||||
IncludeGranuleSkipScore bool
|
||||
}
|
||||
|
||||
// QueryRangePreviewParams documents the query-string parameters accepted by the
|
||||
// query range preview (dry-run) endpoint.
|
||||
type QueryRangePreviewParams struct {
|
||||
// Explain selects which ClickHouse EXPLAIN variant to run against each
|
||||
// rendered SQL statement. Empty or "false" skips EXPLAIN; "true" maps to
|
||||
// "plan". Allowed: plan (execution plan tree), estimate (parts/rows/marks
|
||||
// to read). Implies verbose.
|
||||
Explain string `query:"explain"`
|
||||
// Verbose, when "true", includes the rendered ClickHouse statement(s) in the
|
||||
// response. The default response is lightweight: the per-query verdict
|
||||
// (valid/error/warnings) plus the top-level granuleSkipScore. Requesting
|
||||
// explain implies verbose.
|
||||
Verbose string `query:"verbose"`
|
||||
// Score controls the granuleSkipScore (granule-skip selectivity, 0-100;
|
||||
// higher is better). It defaults to "true" — the top-level score is returned
|
||||
// even in the lightweight response. Set score=false to skip it (and its
|
||||
// ClickHouse EXPLAIN round trips) for the cheapest validation-only preview.
|
||||
Score string `query:"score"`
|
||||
}
|
||||
|
||||
// PrepareJSONSchema adds description to the QueryRangePreviewResponse schema.
|
||||
func (q *QueryRangePreviewResponse) PrepareJSONSchema(schema *jsonschema.Schema) error {
|
||||
schema.WithDescription("Response from the v5 query range preview (dry-run) endpoint. For each query in the composite query, returns the underlying ClickHouse statement(s) it renders to without executing them (one per PromQL metric selector; exactly one for builder/ClickHouse/trace-operator queries), with optional EXPLAIN output and granule-skip score when requested.")
|
||||
return nil
|
||||
}
|
||||
|
||||
// QueryPreview is the dry-run result for a single query, keyed by query name
|
||||
// in QueryRangePreviewResponse.Queries.
|
||||
type QueryPreview struct {
|
||||
// Valid is the headline verdict for this query: true when it previewed
|
||||
// without error, false when Error is set. It is always present (derived from
|
||||
// Error at marshal time) so an agent can branch on a single boolean instead
|
||||
// of testing for the presence of the error object.
|
||||
Valid bool `json:"valid"`
|
||||
// Error describes why this query is invalid or could not be previewed; nil
|
||||
// when the query previewed successfully. It is the structured form
|
||||
// (code, message, and — when available — suggestions and invalidReferences)
|
||||
// so an agent can act on it programmatically instead of parsing a string.
|
||||
Error error `json:"error,omitempty"`
|
||||
Warnings []string `json:"warnings,omitempty"`
|
||||
// Score is the headline selectivity for this query: the percentage (0-100) of
|
||||
// candidate granules eliminated by partition, primary-key, and skip-index
|
||||
// pruning before any data is read (higher = less data read). It is the
|
||||
// minimum of the per-statement Statements[].GranuleSkipScore values — the
|
||||
// least-selective (worst) underlying statement, which dominates cost.
|
||||
// Returned by default; omitted when ?score=false or no statement reads a
|
||||
// MergeTree table.
|
||||
Score *float64 `json:"score,omitempty"`
|
||||
// Statements are the underlying ClickHouse statement(s) this query renders to,
|
||||
// in execution order. Builder, ClickHouse SQL, and trace-operator queries
|
||||
// render exactly one; a PromQL query renders one per metric selector (the
|
||||
// Prometheus engine issues a statement per selector). Empty for a
|
||||
// validation-only preview, a query that failed to render (see Error), or one
|
||||
// that resolves to no data (a fully-missing metric, see Warnings).
|
||||
Statements []PreviewStatement `json:"statements,omitempty"`
|
||||
}
|
||||
|
||||
// PreviewStatement is one rendered ClickHouse statement the query will execute,
|
||||
// with its bound args and — when requested — its EXPLAIN output and
|
||||
// GranuleSkipScore.
|
||||
type PreviewStatement struct {
|
||||
Query string `json:"query"`
|
||||
Args []any `json:"args,omitempty"`
|
||||
Explain string `json:"explain,omitempty"`
|
||||
GranuleSkipScore *float64 `json:"granuleSkipScore,omitempty"`
|
||||
}
|
||||
|
||||
// MarshalJSON renders Error as the structured error form (code, message and,
|
||||
// when present, suggestions/invalidReferences) instead of the default {} that a
|
||||
// bare error interface produces, so an agent consuming the dry-run can act on it
|
||||
// programmatically.
|
||||
func (p QueryPreview) MarshalJSON() ([]byte, error) {
|
||||
type alias QueryPreview
|
||||
out := struct {
|
||||
alias
|
||||
Error *errors.JSON `json:"error,omitempty"`
|
||||
}{alias: alias(p)}
|
||||
out.alias.Error = nil
|
||||
// Derive the verdict from the error so callers can't desync the two.
|
||||
out.alias.Valid = p.Error == nil
|
||||
if p.Error != nil {
|
||||
out.Error = errors.AsJSON(p.Error)
|
||||
}
|
||||
return json.Marshal(out)
|
||||
}
|
||||
|
||||
var _ jsonschema.Preparer = &QueryRangeResponse{}
|
||||
|
||||
// PrepareJSONSchema adds description to the QueryRangeResponse schema.
|
||||
|
||||
@@ -535,6 +535,68 @@ func (r *QueryRangeRequest) Validate(opts ...ValidationOption) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// ValidateRequestScope validates request-level invariants — time range,
|
||||
// request type, the raw/trace metric-query restriction, non-empty composite
|
||||
// query, unique builder query names, and not-all-disabled — WITHOUT validating
|
||||
// individual query specs, and returns the ValidationOptions for the request
|
||||
// type. The dry-run/preview path uses this so that per-query spec errors can be
|
||||
// attributed to each query (via QueryEnvelope.Validate) instead of aborting the
|
||||
// whole request on the first one, the way Validate does. The normal execution
|
||||
// path keeps using the fail-fast Validate.
|
||||
func (r *QueryRangeRequest) ValidateRequestScope() ([]ValidationOption, error) {
|
||||
if r.RequestType != RequestTypeRawStream && r.Start >= r.End {
|
||||
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "start time must be before end time")
|
||||
}
|
||||
|
||||
var opts []ValidationOption
|
||||
switch r.RequestType {
|
||||
case RequestTypeRaw, RequestTypeRawStream, RequestTypeTrace, RequestTypeTimeSeries, RequestTypeScalar:
|
||||
opts = GetValidationOptions(r.RequestType)
|
||||
default:
|
||||
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid request type: %s", r.RequestType).
|
||||
WithAdditional("Valid request types are: raw, timeseries, scalar")
|
||||
}
|
||||
|
||||
if r.RequestType == RequestTypeRaw || r.RequestType == RequestTypeRawStream || r.RequestType == RequestTypeTrace {
|
||||
for _, envelope := range r.CompositeQuery.Queries {
|
||||
if envelope.GetSignal() == telemetrytypes.SignalMetrics {
|
||||
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "raw request type is not supported for metric queries")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(r.CompositeQuery.Queries) == 0 {
|
||||
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "at least one query is required")
|
||||
}
|
||||
|
||||
// Builder query names must be unique across the composite query.
|
||||
queryNames := make(map[string]bool)
|
||||
for _, envelope := range r.CompositeQuery.Queries {
|
||||
if envelope.Type == QueryTypeBuilder || envelope.Type == QueryTypeSubQuery {
|
||||
name := envelope.GetQueryName()
|
||||
if name != "" {
|
||||
if queryNames[name] {
|
||||
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "duplicate query name '%s'", name)
|
||||
}
|
||||
queryNames[name] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if err := r.validateAllQueriesNotDisabled(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return opts, nil
|
||||
}
|
||||
|
||||
// Validate validates a single query envelope's spec. It is the per-query
|
||||
// counterpart to QueryRangeRequest.ValidateRequestScope, used by the dry-run to
|
||||
// report each query's structural error independently.
|
||||
func (e QueryEnvelope) Validate(opts ...ValidationOption) error {
|
||||
return validateQueryEnvelope(e, opts...)
|
||||
}
|
||||
|
||||
// validateAllQueriesNotDisabled validates that at least one query in the composite query is enabled.
|
||||
func (r *QueryRangeRequest) validateAllQueriesNotDisabled() error {
|
||||
for _, envelope := range r.CompositeQuery.Queries {
|
||||
|
||||
Reference in New Issue
Block a user