Compare commits

..

1 Commits

Author SHA1 Message Date
Tushar Vats
caeca731e6 fix: first draft 2026-05-05 11:11:43 +05:30
48 changed files with 2350 additions and 409 deletions

View File

@@ -9,6 +9,7 @@ var (
FeatureGetMetersFromZeus = featuretypes.MustNewName("get_meters_from_zeus")
FeaturePutMetersInZeus = featuretypes.MustNewName("put_meters_in_zeus")
FeatureUseJSONBody = featuretypes.MustNewName("use_json_body")
FeatureUseScalarCache = featuretypes.MustNewName("use_scalar_cache")
)
func MustNewRegistry() featuretypes.Registry {
@@ -61,6 +62,14 @@ func MustNewRegistry() featuretypes.Registry {
DefaultVariant: featuretypes.MustNewName("disabled"),
Variants: featuretypes.NewBooleanVariants(),
},
&featuretypes.Feature{
Name: FeatureUseScalarCache,
Kind: featuretypes.KindBoolean,
Stage: featuretypes.StageExperimental,
Description: "Controls whether caching for scalar requests is enabled",
DefaultVariant: featuretypes.MustNewName("disabled"),
Variants: featuretypes.NewBooleanVariants(),
},
)
if err != nil {
panic(err)

View File

@@ -15,6 +15,13 @@ import (
"github.com/SigNoz/signoz/pkg/valuer"
)
// cacheKeyVersion is prefixed onto every bucket-cache key. Bump when
// anything that affects the cached payload changes — Fingerprint inputs,
// ScalarStateData layout, AggregateFunction state encoding, AggNames
// semantics, etc. Old entries under the previous prefix are orphaned
// and age out via TTL.
const cacheKeyVersion = "v5.3"
// bucketCache implements the BucketCache interface.
type bucketCache struct {
cache cache.Cache
@@ -192,10 +199,10 @@ func (bc *bucketCache) Put(ctx context.Context, orgID valuer.UUID, q qbtypes.Que
}
// generateCacheKey creates a unique cache key based on query fingerprint.
// Format: <cacheKeyVersion>:query:<fingerprint>. See cacheKeyVersion for
// when to bump.
func (bc *bucketCache) generateCacheKey(q qbtypes.Query) string {
fingerprint := q.Fingerprint()
return fmt.Sprintf("v5:query:%s", fingerprint)
return fmt.Sprintf("%s:query:%s", cacheKeyVersion, q.Fingerprint())
}
// findMissingRangesWithStep identifies time ranges not covered by cached buckets with step alignment.
@@ -445,7 +452,8 @@ func (bc *bucketCache) mergeBuckets(ctx context.Context, buckets []*qbtypes.Cach
switch resultType {
case qbtypes.RequestTypeTimeSeries:
mergedValue = bc.mergeTimeSeriesValues(ctx, buckets)
// Raw and Scalar types are not cached, so no merge needed
case qbtypes.RequestTypeScalar:
mergedValue = bc.mergeScalarStateValues(ctx, buckets)
}
return &qbtypes.Result{
@@ -557,6 +565,24 @@ func (bc *bucketCache) mergeTimeSeriesValues(ctx context.Context, buckets []*qbt
return result
}
// mergeScalarStateValues unmarshals each cached bucket payload into a
// ScalarStateData and concatenates the per-(chunk × group × agg) state
// rows. Metadata adoption is delegated to ScalarStateData.Adopt so this
// and mergeScalarStateRows can't drift. The aggregate registry merge
// runs later, on read, in merge_scalar.go (TRD: scalar caching, Option 2).
func (bc *bucketCache) mergeScalarStateValues(ctx context.Context, buckets []*qbtypes.CachedBucket) *qbtypes.ScalarStateData {
out := &qbtypes.ScalarStateData{}
for _, bucket := range buckets {
var ssd qbtypes.ScalarStateData
if err := json.Unmarshal(bucket.Value, &ssd); err != nil {
bc.logger.ErrorContext(ctx, "failed to unmarshal scalar state data", errors.Attr(err))
continue
}
out.Adopt(&ssd)
}
return out
}
// isEmptyResult checks if a result is truly empty (no data exists) vs filtered empty (data was filtered out).
func (bc *bucketCache) isEmptyResult(result *qbtypes.Result) (isEmpty bool, isFiltered bool) {
if result.Value == nil {
@@ -599,8 +625,16 @@ func (bc *bucketCache) isEmptyResult(result *qbtypes.Result) (isEmpty bool, isFi
return !hasValues, !hasValues && totalSeries > 0
}
case qbtypes.RequestTypeRaw, qbtypes.RequestTypeScalar, qbtypes.RequestTypeTrace:
// Raw and scalar data are not cached
case qbtypes.RequestTypeScalar:
if ssd, ok := result.Value.(*qbtypes.ScalarStateData); ok {
return len(ssd.Rows) == 0, false
}
// Anything else under RequestTypeScalar (e.g., a *ScalarData
// being routed in fallback) is not cacheable.
return true, false
case qbtypes.RequestTypeRaw, qbtypes.RequestTypeTrace:
// Raw and trace data are not cached
return true, false
}
@@ -743,28 +777,24 @@ func (bc *bucketCache) trimResultToFluxBoundary(result *qbtypes.Result, fluxBoun
trimmedResult.Value = trimmedData
}
case qbtypes.RequestTypeRaw, qbtypes.RequestTypeScalar, qbtypes.RequestTypeTrace:
// Don't cache raw or scalar data
case qbtypes.RequestTypeScalar:
// Scalar caching: state blobs have no per-bucket time
// dimension to trim. The chunk-range itself is what gates
// cacheability; pass the value through unchanged.
if _, ok := result.Value.(*qbtypes.ScalarStateData); ok {
trimmedResult.Value = result.Value
return trimmedResult
}
return nil
case qbtypes.RequestTypeRaw, qbtypes.RequestTypeTrace:
// Don't cache raw or trace data
return nil
}
return trimmedResult
}
func min(a, b uint64) uint64 {
if a < b {
return a
}
return b
}
func max(a, b uint64) uint64 {
if a > b {
return a
}
return b
}
// filterResultToTimeRange filters the result to only include values within the requested time range.
func (bc *bucketCache) filterResultToTimeRange(result *qbtypes.Result, startMs, endMs uint64) *qbtypes.Result {
if result == nil || result.Value == nil {

View File

@@ -47,6 +47,10 @@ func (m *mockQuery) Fingerprint() string {
return m.fingerprint
}
func (m *mockQuery) IsCacheable() bool {
return m.fingerprint != ""
}
func (m *mockQuery) Window() (uint64, uint64) {
return m.startMs, m.endMs
}

View File

@@ -3,6 +3,7 @@ package querier
import (
"context"
"encoding/base64"
"encoding/hex"
"fmt"
"log/slog"
"strconv"
@@ -11,6 +12,7 @@ import (
"github.com/ClickHouse/clickhouse-go/v2"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/querybuilder"
"github.com/SigNoz/signoz/pkg/telemetrystore"
"github.com/SigNoz/signoz/pkg/telemetrytraces"
"github.com/SigNoz/signoz/pkg/types/ctxtypes"
@@ -29,6 +31,8 @@ type builderQuery[T any] struct {
fromMS uint64
toMS uint64
kind qbtypes.RequestType
opts qbtypes.BuilderQueryOptions
}
var _ qbtypes.Query = (*builderQuery[any])(nil)
@@ -41,6 +45,7 @@ func newBuilderQuery[T any](
tr qbtypes.TimeRange,
kind qbtypes.RequestType,
variables map[string]qbtypes.VariableItem,
opts qbtypes.BuilderQueryOptions,
) *builderQuery[T] {
return &builderQuery[T]{
logger: logger,
@@ -51,21 +56,48 @@ func newBuilderQuery[T any](
fromMS: tr.From,
toMS: tr.To,
kind: kind,
opts: opts,
}
}
func (q *builderQuery[T]) Fingerprint() string {
if (q.spec.Signal == telemetrytypes.SignalTraces ||
q.spec.Signal == telemetrytypes.SignalLogs) && q.kind != qbtypes.RequestTypeTimeSeries {
// No caching for non-timeseries queries
return ""
// Cacheable reports whether this query should be routed through the
// bucket cache. For traces/logs it gates on request-type and aggregate
// cacheability; metrics are always cacheable.
func (q *builderQuery[T]) IsCacheable() bool {
if q.spec.Signal == telemetrytypes.SignalTraces || q.spec.Signal == telemetrytypes.SignalLogs {
switch q.kind {
case qbtypes.RequestTypeTimeSeries:
return true
case qbtypes.RequestTypeScalar:
if !q.opts.UseScalarState {
return false
}
// HAVING'd scalar queries skip the cache: per-chunk
// HAVING drops groups whose merged aggregate would pass,
// and we don't (yet) re-apply HAVING post-merge in Go.
if q.spec.Having != nil && q.spec.Having.Expression != "" {
return false
}
return allAggsCacheable(q.spec.Aggregations)
default:
return false
}
}
return true
}
func (q *builderQuery[T]) Fingerprint() string {
// Create a deterministic fingerprint for builder queries
// This needs to include all fields that affect the query results
parts := []string{"builder"}
// Request kind partitions the cache: scalar requests store
// *ScalarStateData blobs while time-series store *TimeSeriesData.
// Sharing a key would cross-feed the wrong shape into the
// materializer (e.g. scalar-state cache served to a time-series
// request → response collapsed to a single ScalarData row).
parts = append(parts, fmt.Sprintf("kind=%s", q.kind.StringValue()))
// Add signal type
parts = append(parts, fmt.Sprintf("signal=%s", q.spec.Signal.StringValue()))
@@ -150,6 +182,31 @@ func (q *builderQuery[T]) Fingerprint() string {
return strings.Join(parts, "&")
}
// allAggsCacheable reports whether every aggregation expression resolves
// to an AggrFunc that has a registered state form. Used by Fingerprint()
// to gate scalar caching admission.
func allAggsCacheable[T any](aggs []T) bool {
if len(aggs) == 0 {
return false
}
for _, a := range aggs {
var expr string
switch v := any(a).(type) {
case qbtypes.TraceAggregation:
expr = v.Expression
case qbtypes.LogAggregation:
expr = v.Expression
default:
return false
}
af, ok := querybuilder.ExtractOuterAggName(expr)
if !ok || !af.Cacheable {
return false
}
}
return true
}
func fingerprintGroupByKey(gb qbtypes.GroupByKey) string {
return fingerprintFieldKey(gb.TelemetryFieldKey)
}
@@ -199,7 +256,17 @@ func (q *builderQuery[T]) Execute(ctx context.Context) (*qbtypes.Result, error)
return q.executeWindowList(ctx)
}
stmt, err := q.stmtBuilder.Build(ctx, q.fromMS, q.toMS, q.kind, q.spec, q.variables)
// State-mode SQL is only emitted when the bucket cache will
// consume it — i.e. the query opted in via UseScalarState AND
// every aggregate has a registered StateName. For everything
// else (UseScalarState off, or aggregates like p99/countDistinct
// that have no state form), fall back to plain aggregates so
// RewriteWithState doesn't fail with ErrAggregateNotStateCacheable.
stmtOpts := qbtypes.NewStatementBuilderOptions()
if !(q.opts.UseScalarState && q.IsCacheable()) {
stmtOpts = stmtOpts.WithSkipScalarState()
}
stmt, err := q.stmtBuilder.Build(ctx, q.fromMS, q.toMS, q.kind, q.spec, q.variables, stmtOpts)
if err != nil {
return nil, err
}
@@ -260,9 +327,44 @@ func (q *builderQuery[T]) executeWithContext(ctx context.Context, query string,
kind = qbtypes.RequestTypeTimeSeries
}
payload, err := consume(rows, kind, queryWindow, q.spec.StepInterval, q.spec.Name)
if err != nil {
return nil, err
var payload any
// Match the SQL-emit gate in Execute: state SQL is only emitted
// when UseScalarState && IsCacheable (and signal is traces/logs).
// Metrics' IsCacheable is true unconditionally, so we can't gate
// on IsCacheable alone — the metrics builders don't emit state
// columns and reading them as bytes would fail.
if q.kind == qbtypes.RequestTypeScalar && q.opts.UseScalarState && q.IsCacheable() {
// State-mode SQL emits __result_<idx> AggregateFunction blob
// columns; scan them as raw bytes and populate the QB
// aggregate names so the materializer can find the matching
// scalarstate.Aggregate at merge time.
ssd, perr := readAsScalarState(rows, q.spec.Name)
if perr != nil {
return nil, perr
}
ssd.AggNames = aggNamesFromSpec(q.spec.Aggregations)
ssd.RateMask = rateMaskFromSpec(q.spec.Aggregations)
ssd.Order = q.spec.Order
ssd.Limit = q.spec.Limit
if len(ssd.Rows) > 0 {
r0 := ssd.Rows[0]
aggName := ""
if r0.AggIdx < len(ssd.AggNames) {
aggName = ssd.AggNames[r0.AggIdx]
}
q.logger.InfoContext(ctx, "scalar state sample",
slog.String("agg", aggName),
slog.Int("aggIdx", r0.AggIdx),
slog.Int("len", len(r0.State)),
slog.String("hex", hex.EncodeToString(r0.State)),
)
}
payload = ssd
} else {
payload, err = consume(rows, kind, queryWindow, q.spec.StepInterval, q.spec.Name)
if err != nil {
return nil, err
}
}
return &qbtypes.Result{
@@ -276,6 +378,52 @@ func (q *builderQuery[T]) executeWithContext(ctx context.Context, query string,
}, nil
}
// aggNamesFromSpec extracts the underlying ClickHouse aggregate name
// (e.g. "avg", "count", "sum") per aggregation expression in spec order.
// We use AggrFunc.FuncName rather than the QB-facing Name because rate-style
// aggregates (rate_avg, rate_sum, …) share the on-wire state of their
// non-rate counterpart — rate_avg's state is avgState, rate's is countState
// — and the scalarstate registry is keyed by the underlying CH aggregate.
// The rate division is applied separately, post-merge, via RateMask.
// Returns "" for any aggregation the parser cannot resolve — the
// materializer rejects "" entries with a clear error.
func aggNamesFromSpec[T any](aggs []T) []string {
out := make([]string, len(aggs))
for i, a := range aggs {
var expr string
switch v := any(a).(type) {
case qbtypes.TraceAggregation:
expr = v.Expression
case qbtypes.LogAggregation:
expr = v.Expression
}
if af, ok := querybuilder.ExtractOuterAggName(expr); ok {
out[i] = strings.ToLower(af.FuncName)
}
}
return out
}
// rateMaskFromSpec returns a bool per aggregation: true when the outer
// aggregate is rate-style (Rate flag set on AggrFunc). Drives the
// post-merge `Final / windowSeconds` step in materializeScalarData.
func rateMaskFromSpec[T any](aggs []T) []bool {
out := make([]bool, len(aggs))
for i, a := range aggs {
var expr string
switch v := any(a).(type) {
case qbtypes.TraceAggregation:
expr = v.Expression
case qbtypes.LogAggregation:
expr = v.Expression
}
if af, ok := querybuilder.ExtractOuterAggName(expr); ok {
out[i] = af.Rate
}
}
return out
}
func (q *builderQuery[T]) executeWindowList(ctx context.Context) (*qbtypes.Result, error) {
isAsc := len(q.spec.Order) > 0 &&
strings.ToLower(string(q.spec.Order[0].Direction.StringValue())) == "asc"
@@ -365,7 +513,7 @@ func (q *builderQuery[T]) executeWindowList(ctx context.Context) (*qbtypes.Resul
q.spec.Offset = 0
q.spec.Limit = need
stmt, err := q.stmtBuilder.Build(ctx, r.fromNS/1e6, r.toNS/1e6, q.kind, q.spec, q.variables)
stmt, err := q.stmtBuilder.Build(ctx, r.fromNS/1e6, r.toNS/1e6, q.kind, q.spec, q.variables, qbtypes.NewStatementBuilderOptions())
if err != nil {
return nil, err
}

View File

@@ -59,6 +59,8 @@ func (q *chSQLQuery) Fingerprint() string {
return ""
}
func (q *chSQLQuery) IsCacheable() bool { return false }
func (q *chSQLQuery) Window() (uint64, uint64) { return q.fromMS, q.toMS }
// TODO(srikanthccv): cleanup the templating logic.

View File

@@ -1,6 +1,7 @@
package querier
import (
"encoding/hex"
"fmt"
"math"
"reflect"
@@ -331,6 +332,104 @@ func readAsScalar(rows driver.Rows, queryName string) (*qbtypes.ScalarData, erro
}, nil
}
// readAsScalarState scans rows produced by a per-chunk scalar-state SQL.
// Group-by columns are scanned as their natural Go type; aggregation
// columns named __result_<idx> hold hex-encoded AggregateFunction blobs
// (the SQL emitter wraps the state in hex(...) because clickhouse-go
// cannot decode AggregateFunction columns directly). We scan the hex
// string and decode it back to raw state bytes for pkg/scalarstate.
func readAsScalarState(rows driver.Rows, queryName string) (*qbtypes.ScalarStateData, error) {
colNames := rows.Columns()
colTypes := rows.ColumnTypes()
type colKind int
const (
colGroup colKind = iota
colState
)
kinds := make([]colKind, len(colNames))
stateAggIdx := make([]int, len(colNames))
groupCols := make([]*qbtypes.ColumnDescriptor, 0, len(colNames))
aggCols := make([]*qbtypes.ColumnDescriptor, 0, len(colNames))
groupColIdx := make([]int, 0, len(colNames))
for i, name := range colNames {
if m := aggRe.FindStringSubmatch(name); m != nil {
kinds[i] = colState
id, _ := strconv.Atoi(m[1])
stateAggIdx[i] = id
aggCols = append(aggCols, &qbtypes.ColumnDescriptor{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{Name: name},
QueryName: queryName,
AggregationIndex: int64(id),
Type: qbtypes.ColumnTypeAggregation,
})
continue
}
kinds[i] = colGroup
groupCols = append(groupCols, &qbtypes.ColumnDescriptor{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{Name: name},
QueryName: queryName,
Type: qbtypes.ColumnTypeGroup,
})
groupColIdx = append(groupColIdx, i)
}
scan := make([]any, len(colTypes))
for i := range scan {
if kinds[i] == colState {
var s string
scan[i] = &s
} else {
scan[i] = reflect.New(colTypes[i].ScanType()).Interface()
}
}
out := &qbtypes.ScalarStateData{
QueryName: queryName,
GroupCols: groupCols,
AggCols: aggCols,
// AggNames is populated by the caller (which knows the QB
// aggregate name from the spec) — readAsScalarState only sees
// the SQL column aliases.
}
for rows.Next() {
if err := rows.Scan(scan...); err != nil {
return nil, err
}
groupKey := make([]any, 0, len(groupColIdx))
for _, ci := range groupColIdx {
groupKey = append(groupKey, derefValue(scan[ci]))
}
for i, k := range kinds {
if k != colState {
continue
}
sp, ok := scan[i].(*string)
if !ok || sp == nil {
continue
}
b, err := hex.DecodeString(*sp)
if err != nil {
return nil, fmt.Errorf("scalar state: hex-decode __result_%d: %w", stateAggIdx[i], err)
}
out.Rows = append(out.Rows, qbtypes.ScalarStateRow{
GroupKey: groupKey,
AggIdx: stateAggIdx[i],
State: b,
})
}
}
if err := rows.Err(); err != nil {
return nil, err
}
return out, nil
}
func derefValue(v any) any {
if v == nil {
return nil

297
pkg/querier/merge_scalar.go Normal file
View File

@@ -0,0 +1,297 @@
package querier
import (
"fmt"
"math"
"sort"
"strings"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/scalarstate"
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
)
// mergeScalarStateRows concatenates state rows from cached and fresh
// scalar-state results. Chunk ranges are disjoint by construction so a
// plain append is correct — the per-aggregate Go merge runs later, in
// materializeScalarResult. Metadata adoption is delegated to
// ScalarStateData.Adopt so this and the cache-side merger can't drift.
func (q *querier) mergeScalarStateRows(cachedValue any, fresh []*qbtypes.Result) *qbtypes.ScalarStateData {
out := &qbtypes.ScalarStateData{}
if ssd, ok := cachedValue.(*qbtypes.ScalarStateData); ok {
out.Adopt(ssd)
}
for _, r := range fresh {
if r == nil {
continue
}
if ssd, ok := r.Value.(*qbtypes.ScalarStateData); ok {
out.Adopt(ssd)
}
}
return out
}
// materializeIfScalarState converts a Result carrying a *ScalarStateData
// (the cache-side shape) into one carrying *ScalarData (the API shape)
// via the Go-side decode + merge + final pipeline. For any other shape
// the result is returned unchanged. windowSec is the full user-facing
// query window in seconds, used by rate aggregates at finalize time.
func (q *querier) materializeIfScalarState(r *qbtypes.Result, windowSec uint64) (*qbtypes.Result, error) {
if r == nil {
return nil, nil
}
if _, ok := r.Value.(*qbtypes.ScalarStateData); !ok {
return r, nil
}
return q.materializeScalarResult(r, windowSec)
}
// materializeScalarResult turns a Result whose Value is a *ScalarStateData
// (the cache shape) into a Result whose Value is a *ScalarData (the API
// shape). It runs the Go-side decode + merge + final per group + agg via
// the scalarstate registry. If any aggregate lacks a registered Go
// merger, returns an error so the caller can fall back to direct
// execution.
func (q *querier) materializeScalarResult(r *qbtypes.Result, windowSec uint64) (*qbtypes.Result, error) {
if r == nil {
return nil, nil
}
ssd, ok := r.Value.(*qbtypes.ScalarStateData)
if !ok || ssd == nil {
// Already materialized or wrong shape — pass through.
return r, nil
}
scalar, err := materializeScalarData(ssd, windowSec)
if err != nil {
return nil, err
}
return &qbtypes.Result{
Type: qbtypes.RequestTypeScalar,
Value: scalar,
Stats: r.Stats,
Warnings: r.Warnings,
WarningsDocURL: r.WarningsDocURL,
}, nil
}
// materializeScalarData groups state rows by GroupKey, decodes each
// per-aggregate state, runs the registered Go merger, and assembles the
// flat tabular ScalarData the API consumers expect. One row per unique
// group key with one column per group_by + one column per aggregation.
func materializeScalarData(ssd *qbtypes.ScalarStateData, windowSec uint64) (*qbtypes.ScalarData, error) {
// Resolve aggregate handlers up front so we fail loudly before
// touching any blob bytes.
aggs := make([]scalarstate.Aggregate, len(ssd.AggNames))
for i, name := range ssd.AggNames {
a, ok := scalarstate.Lookup(name)
if !ok {
return nil, errors.NewInternalf(errors.CodeInternal, "scalar state: no registered aggregate for %q", name)
}
aggs[i] = a
}
// Index: groupKeyString -> aggIdx -> []State
type groupBucket struct {
key []any
states map[int][]scalarstate.State
}
groups := map[string]*groupBucket{}
order := make([]string, 0)
for _, row := range ssd.Rows {
if row.AggIdx < 0 || row.AggIdx >= len(aggs) {
return nil, errors.NewInternalf(errors.CodeInternal, "scalar state: aggIdx %d out of range (have %d aggs)", row.AggIdx, len(aggs))
}
st, err := aggs[row.AggIdx].Decode(row.State)
if err != nil {
return nil, fmt.Errorf("scalar state: decode (agg=%s): %w", ssd.AggNames[row.AggIdx], err)
}
k := groupKeyString(row.GroupKey)
gb, ok := groups[k]
if !ok {
gb = &groupBucket{
key: cloneAnySlice(row.GroupKey),
states: map[int][]scalarstate.State{},
}
groups[k] = gb
order = append(order, k)
}
gb.states[row.AggIdx] = append(gb.states[row.AggIdx], st)
}
out := &qbtypes.ScalarData{
QueryName: ssd.QueryName,
}
out.Columns = append(out.Columns, ssd.GroupCols...)
out.Columns = append(out.Columns, ssd.AggCols...)
for _, k := range order {
gb := groups[k]
row := make([]any, 0, len(ssd.GroupCols)+len(ssd.AggCols))
row = append(row, gb.key...)
for i, agg := range aggs {
states := gb.states[i]
if len(states) == 0 {
row = append(row, nil)
continue
}
merged, err := agg.Merge(states)
if err != nil {
return nil, fmt.Errorf("scalar state: merge (agg=%s): %w", ssd.AggNames[i], err)
}
final, err := agg.Final(merged)
if err != nil {
return nil, fmt.Errorf("scalar state: final (agg=%s): %w", ssd.AggNames[i], err)
}
if i < len(ssd.RateMask) && ssd.RateMask[i] {
final = applyRate(final, windowSec)
}
// JSON can't encode NaN/±Inf — coerce to nil so the
// response marshals cleanly. Mirrors the time-series
// consume path's drop in readAsTimeSeries.
if f, ok := final.(float64); ok && (math.IsNaN(f) || math.IsInf(f, 0)) {
final = nil
}
row = append(row, final)
}
out.Data = append(out.Data, row)
}
applyOrderAndLimit(out, ssd.Order, ssd.Limit)
return out, nil
}
// applyOrderAndLimit sorts data rows by the requested order keys and
// truncates to limit. Skipping in chunk SQL is safe only because we
// have full per-group state at this step (TRD: scalar caching, Option 2).
// Default ordering (when no Order is supplied) is descending by the
// first aggregation, matching the existing non-cached path.
func applyOrderAndLimit(d *qbtypes.ScalarData, order []qbtypes.OrderBy, limit int) {
if len(d.Data) == 0 {
return
}
// Resolve each Order key to a column index in d.Columns. An
// unresolved key is silently skipped — same forgiving behavior as
// the SQL ORDER BY path.
type sortKey struct {
colIdx int
desc bool
}
keys := make([]sortKey, 0, len(order))
for _, o := range order {
idx := lookupColumnIdx(d.Columns, o.Key.Name)
if idx < 0 {
continue
}
keys = append(keys, sortKey{colIdx: idx, desc: strings.EqualFold(o.Direction.StringValue(), "desc")})
}
// Default: descending by the first aggregation column (matches
// the SQL fallback `ORDER BY __result_0 DESC`).
if len(keys) == 0 {
for i, c := range d.Columns {
if c.Type == qbtypes.ColumnTypeAggregation {
keys = append(keys, sortKey{colIdx: i, desc: true})
break
}
}
}
if len(keys) > 0 {
sort.SliceStable(d.Data, func(i, j int) bool {
for _, k := range keys {
cmp := compareAny(d.Data[i][k.colIdx], d.Data[j][k.colIdx])
if cmp == 0 {
continue
}
if k.desc {
return cmp > 0
}
return cmp < 0
}
return false
})
}
if limit > 0 && len(d.Data) > limit {
d.Data = d.Data[:limit]
}
}
// lookupColumnIdx returns the index of the column whose Name matches
// (also accepts the __result_<idx> alias matched directly). Returns
// -1 if not found.
func lookupColumnIdx(cols []*qbtypes.ColumnDescriptor, name string) int {
for i, c := range cols {
if c.Name == name {
return i
}
}
return -1
}
// compareAny returns -1, 0, +1 for v1 < v2, ==, > using numeric
// comparison when both are numeric; otherwise falls back to string
// comparison.
func compareAny(a, b any) int {
af, aOk := toFloat64(a)
bf, bOk := toFloat64(b)
if aOk && bOk {
switch {
case af < bf:
return -1
case af > bf:
return 1
default:
return 0
}
}
as := fmt.Sprint(a)
bs := fmt.Sprint(b)
switch {
case as < bs:
return -1
case as > bs:
return 1
default:
return 0
}
}
// applyRate divides a finalized aggregate by the full query window in
// seconds. Used for rate-style aggregates (rate, rate_sum, rate_avg,
// rate_min, rate_max). Returns NaN when the window is zero. Always
// returns float64 — rate inherently has time-1 units regardless of the
// underlying aggregate's type.
func applyRate(v any, windowSec uint64) any {
if windowSec == 0 {
return math.NaN()
}
if f, ok := toFloat64(v); ok {
return f / float64(windowSec)
}
return v
}
// toFloat64 lives in postprocess.go and is reused here.
func groupKeyString(vals []any) string {
if len(vals) == 0 {
return ""
}
var sb strings.Builder
for i, v := range vals {
if i > 0 {
sb.WriteByte(0x1f) // unit separator — won't collide with str values
}
fmt.Fprintf(&sb, "%v", v)
}
return sb.String()
}
func cloneAnySlice(in []any) []any {
out := make([]any, len(in))
copy(out, in)
return out
}

View File

@@ -127,6 +127,8 @@ func (q *promqlQuery) Fingerprint() string {
return strings.Join(parts, "&")
}
func (q *promqlQuery) IsCacheable() bool { return true }
func (q *promqlQuery) Window() (uint64, uint64) {
return q.tr.From, q.tr.To
}

View File

@@ -16,11 +16,13 @@ import (
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/factory"
"github.com/SigNoz/signoz/pkg/flagger"
"github.com/SigNoz/signoz/pkg/prometheus"
"github.com/SigNoz/signoz/pkg/query-service/utils"
"github.com/SigNoz/signoz/pkg/querybuilder"
"github.com/SigNoz/signoz/pkg/telemetrystore"
"github.com/SigNoz/signoz/pkg/types/ctxtypes"
"github.com/SigNoz/signoz/pkg/types/featuretypes"
"github.com/SigNoz/signoz/pkg/types/instrumentationtypes"
"github.com/SigNoz/signoz/pkg/types/metrictypes"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
@@ -46,6 +48,7 @@ type querier struct {
traceOperatorStmtBuilder qbtypes.TraceOperatorStatementBuilder
bucketCache BucketCache
liveDataRefresh time.Duration
flagger flagger.Flagger
}
var _ Querier = (*querier)(nil)
@@ -62,6 +65,7 @@ func New(
meterStmtBuilder qbtypes.StatementBuilder[qbtypes.MetricAggregation],
traceOperatorStmtBuilder qbtypes.TraceOperatorStatementBuilder,
bucketCache BucketCache,
flagger flagger.Flagger,
) *querier {
querierSettings := factory.NewScopedProviderSettings(settings, "github.com/SigNoz/signoz/pkg/querier")
return &querier{
@@ -77,6 +81,7 @@ func New(
traceOperatorStmtBuilder: traceOperatorStmtBuilder,
bucketCache: bucketCache,
liveDataRefresh: 5 * time.Second,
flagger: flagger,
}
}
@@ -358,7 +363,11 @@ func (q *querier) QueryRange(ctx context.Context, orgID valuer.UUID, req *qbtype
case qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation]:
spec.ShiftBy = extractShiftFromBuilderQuery(spec)
timeRange := adjustTimeRangeForShift(spec, qbtypes.TimeRange{From: req.Start, To: req.End}, req.RequestType)
bq := newBuilderQuery(q.logger, q.telemetryStore, q.traceStmtBuilder, spec, timeRange, req.RequestType, tmplVars)
options := qbtypes.NewBuilderQueryOptions()
if q.flagger.BooleanOrEmpty(ctx, flagger.FeatureUseScalarCache, featuretypes.NewFlaggerEvaluationContext(orgID)) {
options = options.WithUseScalarState()
}
bq := newBuilderQuery(q.logger, q.telemetryStore, q.traceStmtBuilder, spec, timeRange, req.RequestType, tmplVars, options)
queries[spec.Name] = bq
steps[spec.Name] = spec.StepInterval
case qbtypes.QueryBuilderQuery[qbtypes.LogAggregation]:
@@ -368,7 +377,11 @@ func (q *querier) QueryRange(ctx context.Context, orgID valuer.UUID, req *qbtype
if spec.Source == telemetrytypes.SourceAudit {
stmtBuilder = q.auditStmtBuilder
}
bq := newBuilderQuery(q.logger, q.telemetryStore, stmtBuilder, spec, timeRange, req.RequestType, tmplVars)
options := qbtypes.NewBuilderQueryOptions()
if q.flagger.BooleanOrEmpty(ctx, flagger.FeatureUseScalarCache, featuretypes.NewFlaggerEvaluationContext(orgID)) {
options = options.WithUseScalarState()
}
bq := newBuilderQuery(q.logger, q.telemetryStore, stmtBuilder, spec, timeRange, req.RequestType, tmplVars, options)
queries[spec.Name] = bq
steps[spec.Name] = spec.StepInterval
case qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]:
@@ -412,9 +425,9 @@ func (q *querier) QueryRange(ctx context.Context, orgID valuer.UUID, req *qbtype
if spec.Source == telemetrytypes.SourceMeter {
event.Source = telemetrytypes.SourceMeter.StringValue()
bq = newBuilderQuery(q.logger, q.telemetryStore, q.meterStmtBuilder, spec, timeRange, req.RequestType, tmplVars)
bq = newBuilderQuery(q.logger, q.telemetryStore, q.meterStmtBuilder, spec, timeRange, req.RequestType, tmplVars, qbtypes.NewBuilderQueryOptions())
} else {
bq = newBuilderQuery(q.logger, q.telemetryStore, q.metricStmtBuilder, spec, timeRange, req.RequestType, tmplVars)
bq = newBuilderQuery(q.logger, q.telemetryStore, q.metricStmtBuilder, spec, timeRange, req.RequestType, tmplVars, qbtypes.NewBuilderQueryOptions())
}
queries[spec.Name] = bq
@@ -565,7 +578,7 @@ func (q *querier) QueryRawStream(ctx context.Context, orgID valuer.UUID, req *qb
"id": {
Value: updatedLogID,
},
})
}, qbtypes.NewBuilderQueryOptions())
queries[spec.Name] = bq
qbResp, qbErr := q.run(ctx, orgID, queries, req, nil, event, nil)
@@ -642,11 +655,11 @@ func (q *querier) run(
for name, query := range qs {
// Skip cache if NoCache is set, or if cache is not available
if req.NoCache || q.bucketCache == nil || query.Fingerprint() == "" {
if req.NoCache || q.bucketCache == nil || !query.IsCacheable() {
if req.NoCache {
q.logger.DebugContext(ctx, "NoCache flag set, bypassing cache", slog.String("query", name))
} else {
q.logger.InfoContext(ctx, "no bucket cache or fingerprint, executing query", slog.String("fingerprint", query.Fingerprint()))
q.logger.InfoContext(ctx, "query not cacheable, executing directly", slog.String("query", name))
}
result, err := query.Execute(ctx)
qbEvent.HasData = qbEvent.HasData || hasData(result)
@@ -739,22 +752,31 @@ func (q *querier) executeWithCache(ctx context.Context, orgID valuer.UUID, query
// Get cached data and missing ranges
cachedResult, missingRanges := q.bucketCache.GetMissRanges(ctx, orgID, query, step)
startMs, endMs := query.Window()
windowSec := (endMs - startMs) / 1000
// If no missing ranges, return cached result
if len(missingRanges) == 0 && cachedResult != nil {
return cachedResult, nil
return q.materializeIfScalarState(cachedResult, windowSec)
}
// If entire range is missing, execute normally
// If entire range is missing, execute through createRangedQuery so
// scalar-state mode is applied uniformly when applicable. For
// non-scalar queries the clone produces identical SQL to the
// original.
if cachedResult == nil && len(missingRanges) == 1 {
startMs, endMs := query.Window()
if missingRanges[0].From == startMs && missingRanges[0].To == endMs {
result, err := query.Execute(ctx)
execQuery := q.createRangedQuery(ctx, orgID, query, *missingRanges[0])
if execQuery == nil {
execQuery = query
}
result, err := execQuery.Execute(ctx)
if err != nil {
return nil, err
}
// Store in cache for future use
q.bucketCache.Put(ctx, orgID, query, step, result)
return result, nil
if !result.IsNotCacheable {
q.bucketCache.Put(ctx, orgID, query, step, result)
}
return q.materializeIfScalarState(result, windowSec)
}
}
@@ -779,7 +801,7 @@ func (q *querier) executeWithCache(ctx context.Context, orgID valuer.UUID, query
defer func() { <-sem }()
// Create a new query with the missing time range
rangedQuery := q.createRangedQuery(query, *tr)
rangedQuery := q.createRangedQuery(ctx, orgID, query, *tr)
if rangedQuery == nil {
errs[idx] = errors.NewInternalf(errors.CodeInternal, "failed to create ranged query for range %d-%d", tr.From, tr.To)
return
@@ -832,13 +854,15 @@ func (q *querier) executeWithCache(ctx context.Context, orgID valuer.UUID, query
mergedResult.Stats.DurationMS += totalStats.DurationMS
// Store merged result in cache
q.bucketCache.Put(ctx, orgID, query, step, mergedResult)
if !mergedResult.IsNotCacheable {
q.bucketCache.Put(ctx, orgID, query, step, mergedResult)
}
return mergedResult, nil
return q.materializeIfScalarState(mergedResult, windowSec)
}
// createRangedQuery creates a copy of the query with a different time range.
func (q *querier) createRangedQuery(originalQuery qbtypes.Query, timeRange qbtypes.TimeRange) qbtypes.Query {
func (q *querier) createRangedQuery(ctx context.Context, orgID valuer.UUID, originalQuery qbtypes.Query, timeRange qbtypes.TimeRange) qbtypes.Query {
// this is called in a goroutine, so we create a copy of the query to avoid race conditions
switch qt := originalQuery.(type) {
case *promqlQuery:
@@ -855,7 +879,11 @@ func (q *querier) createRangedQuery(originalQuery qbtypes.Query, timeRange qbtyp
specCopy := qt.spec.Copy()
specCopy.ShiftBy = extractShiftFromBuilderQuery(specCopy)
adjustedTimeRange := adjustTimeRangeForShift(specCopy, timeRange, qt.kind)
return newBuilderQuery(q.logger, q.telemetryStore, q.traceStmtBuilder, specCopy, adjustedTimeRange, qt.kind, qt.variables)
opts := qbtypes.NewBuilderQueryOptions()
if q.flagger.BooleanOrEmpty(ctx, flagger.FeatureUseScalarCache, featuretypes.NewFlaggerEvaluationContext(orgID)) {
opts = opts.WithUseScalarState()
}
return newBuilderQuery(q.logger, q.telemetryStore, q.traceStmtBuilder, specCopy, adjustedTimeRange, qt.kind, qt.variables, opts)
case *builderQuery[qbtypes.LogAggregation]:
specCopy := qt.spec.Copy()
@@ -865,16 +893,20 @@ func (q *querier) createRangedQuery(originalQuery qbtypes.Query, timeRange qbtyp
if qt.spec.Source == telemetrytypes.SourceAudit {
shiftStmtBuilder = q.auditStmtBuilder
}
return newBuilderQuery(q.logger, q.telemetryStore, shiftStmtBuilder, specCopy, adjustedTimeRange, qt.kind, qt.variables)
opts := qbtypes.NewBuilderQueryOptions()
if q.flagger.BooleanOrEmpty(ctx, flagger.FeatureUseScalarCache, featuretypes.NewFlaggerEvaluationContext(orgID)) {
opts = opts.WithUseScalarState()
}
return newBuilderQuery(q.logger, q.telemetryStore, shiftStmtBuilder, specCopy, adjustedTimeRange, qt.kind, qt.variables, opts)
case *builderQuery[qbtypes.MetricAggregation]:
specCopy := qt.spec.Copy()
specCopy.ShiftBy = extractShiftFromBuilderQuery(specCopy)
adjustedTimeRange := adjustTimeRangeForShift(specCopy, timeRange, qt.kind)
if qt.spec.Source == telemetrytypes.SourceMeter {
return newBuilderQuery(q.logger, q.telemetryStore, q.meterStmtBuilder, specCopy, adjustedTimeRange, qt.kind, qt.variables)
return newBuilderQuery(q.logger, q.telemetryStore, q.meterStmtBuilder, specCopy, adjustedTimeRange, qt.kind, qt.variables, qbtypes.NewBuilderQueryOptions())
}
return newBuilderQuery(q.logger, q.telemetryStore, q.metricStmtBuilder, specCopy, adjustedTimeRange, qt.kind, qt.variables)
return newBuilderQuery(q.logger, q.telemetryStore, q.metricStmtBuilder, specCopy, adjustedTimeRange, qt.kind, qt.variables, qbtypes.NewBuilderQueryOptions())
case *traceOperatorQuery:
specCopy := qt.spec.Copy()
return &traceOperatorQuery{
@@ -914,6 +946,8 @@ func (q *querier) mergeResults(cached *qbtypes.Result, fresh []*qbtypes.Result)
case qbtypes.RequestTypeTimeSeries:
// Pass nil as cached value to ensure proper merging of all fresh results
merged.Value = q.mergeTimeSeriesResults(nil, fresh)
case qbtypes.RequestTypeScalar:
merged.Value = q.mergeScalarStateRows(nil, fresh)
}
return merged
@@ -936,6 +970,8 @@ func (q *querier) mergeResults(cached *qbtypes.Result, fresh []*qbtypes.Result)
switch merged.Type {
case qbtypes.RequestTypeTimeSeries:
merged.Value = q.mergeTimeSeriesResults(cached.Value.(*qbtypes.TimeSeriesData), fresh)
case qbtypes.RequestTypeScalar:
merged.Value = q.mergeScalarStateRows(cached.Value, fresh)
}
if len(fresh) > 0 {

View File

@@ -7,6 +7,7 @@ import (
cmock "github.com/srikanthccv/ClickHouse-go-mock"
"github.com/SigNoz/signoz/pkg/flagger/flaggertest"
"github.com/SigNoz/signoz/pkg/instrumentation/instrumentationtest"
"github.com/SigNoz/signoz/pkg/telemetrystore"
"github.com/SigNoz/signoz/pkg/telemetrystore/telemetrystoretest"
@@ -27,7 +28,7 @@ func (m *queryMatcherAny) Match(string, string) error { return nil }
// and returns a fixed query string so the mock ClickHouse can match it.
type mockMetricStmtBuilder struct{}
func (m *mockMetricStmtBuilder) Build(_ context.Context, _, _ uint64, _ qbtypes.RequestType, _ qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation], _ map[string]qbtypes.VariableItem) (*qbtypes.Statement, error) {
func (m *mockMetricStmtBuilder) Build(_ context.Context, _, _ uint64, _ qbtypes.RequestType, _ qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation], _ map[string]qbtypes.VariableItem, _ qbtypes.StatementBuilderOptions) (*qbtypes.Statement, error) {
return &qbtypes.Statement{
Query: "SELECT ts, value FROM signoz_metrics",
Args: nil,
@@ -52,6 +53,7 @@ func TestQueryRange_MetricTypeMissing(t *testing.T) {
nil, // meterStmtBuilder
nil, // traceOperatorStmtBuilder
nil, // bucketCache
flaggertest.New(t),
)
req := &qbtypes.QueryRangeRequest{
@@ -116,6 +118,7 @@ func TestQueryRange_MetricTypeFromStore(t *testing.T) {
nil, // meterStmtBuilder
nil, // traceOperatorStmtBuilder
nil, // bucketCache
flaggertest.New(t),
)
req := &qbtypes.QueryRangeRequest{

View File

@@ -186,5 +186,6 @@ func newProvider(
meterStmtBuilder,
traceOperatorStmtBuilder,
bucketCache,
flagger,
), nil
}

View File

@@ -28,6 +28,8 @@ func (q *traceOperatorQuery) Fingerprint() string {
return ""
}
func (q *traceOperatorQuery) IsCacheable() bool { return false }
func (q *traceOperatorQuery) Window() (uint64, uint64) {
return q.fromMS, q.toMS
}

View File

@@ -53,6 +53,7 @@ func prepareQuerierForMetrics(t *testing.T, telemetryStore telemetrystore.Teleme
nil, // meterStmtBuilder
nil, // traceOperatorStmtBuilder
nil, // bucketCache
flaggertest.New(t),
), metadataStore
}
@@ -102,6 +103,7 @@ func prepareQuerierForLogs(t *testing.T, telemetryStore telemetrystore.Telemetry
nil, // meterStmtBuilder
nil, // traceOperatorStmtBuilder
nil, // bucketCache
flaggertest.New(t),
)
}
@@ -146,5 +148,6 @@ func prepareQuerierForTraces(t *testing.T, telemetryStore telemetrystore.Telemet
nil, // meterStmtBuilder
nil, // traceOperatorStmtBuilder
nil, // bucketCache
flaggertest.New(t),
)
}

View File

@@ -1,11 +1,19 @@
package querybuilder
import (
"strings"
chparser "github.com/AfterShip/clickhouse-sql-parser/parser"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/valuer"
)
var (
AggreFuncMap = map[valuer.String]AggrFunc{}
// ErrAggregateNotStateCacheable signals the outer aggregate has no
// registered ClickHouse "-State" form.
ErrAggregateNotStateCacheable = errors.NewInternalf(errors.CodeInternal, "aggregate is not state-cacheable")
)
type AggrFunc struct {
@@ -18,12 +26,40 @@ type AggrFunc struct {
Rate bool
MinArgs int
MaxArgs int
// StateName is the ClickHouse "-State" combinator name
// (e.g. "avg" -> "avgState").
StateName string
// Cacheable enables/disables scalar-state caching for this
// aggregate. It can be turned off without losing the state-form mapping.
Cacheable bool
}
// ExtractOuterAggName returns the AggrFunc for the outermost aggregate
// in expr (e.g. "avg" for "avg(duration_nano)").
func ExtractOuterAggName(expr string) (AggrFunc, bool) {
wrapped := "SELECT " + expr
stmts, err := chparser.NewParser(wrapped).ParseStmts()
if err != nil || len(stmts) == 0 {
return AggrFunc{}, false
}
sel, ok := stmts[0].(*chparser.SelectQuery)
if !ok || len(sel.SelectItems) == 0 {
return AggrFunc{}, false
}
fn, ok := sel.SelectItems[0].Expr.(*chparser.FunctionExpr)
if !ok {
return AggrFunc{}, false
}
a, ok := AggreFuncMap[valuer.NewString(strings.ToLower(fn.Name.Name))]
return a, ok
}
var (
AggrFuncCount = AggrFunc{
Name: valuer.NewString("count"),
FuncName: "count",
StateName: "countState",
Cacheable: true,
RequireArgs: false, MinArgs: 0, MaxArgs: 1,
}
AggrFuncCountIf = AggrFunc{
@@ -47,6 +83,8 @@ var (
AggrFuncSum = AggrFunc{
Name: valuer.NewString("sum"),
FuncName: "sum",
StateName: "sumState",
Cacheable: true,
RequireArgs: true, Numeric: true, MinArgs: 1, MaxArgs: 1,
}
AggrFuncSumIf = AggrFunc{
@@ -58,6 +96,8 @@ var (
AggrFuncAvg = AggrFunc{
Name: valuer.NewString("avg"),
FuncName: "avg",
StateName: "avgState",
Cacheable: true,
RequireArgs: true, Numeric: true, MinArgs: 1, MaxArgs: 1,
}
AggrFuncAvgIf = AggrFunc{
@@ -69,6 +109,8 @@ var (
AggrFuncMin = AggrFunc{
Name: valuer.NewString("min"),
FuncName: "min",
StateName: "minState",
Cacheable: true,
RequireArgs: true, Numeric: true, MinArgs: 1, MaxArgs: 1,
}
AggrFuncMinIf = AggrFunc{
@@ -80,6 +122,8 @@ var (
AggrFuncMax = AggrFunc{
Name: valuer.NewString("max"),
FuncName: "max",
StateName: "maxState",
Cacheable: true,
RequireArgs: true, Numeric: true, MinArgs: 1, MaxArgs: 1,
}
AggrFuncMaxIf = AggrFunc{
@@ -201,6 +245,8 @@ var (
AggrFuncRate = AggrFunc{
Name: valuer.NewString("rate"),
FuncName: "count",
StateName: "countState",
Cacheable: true,
RequireArgs: true, Rate: true, MinArgs: 0, MaxArgs: 1,
}
AggrFuncRateIf = AggrFunc{
@@ -212,21 +258,29 @@ var (
AggrFuncRateSum = AggrFunc{
Name: valuer.NewString("rate_sum"),
FuncName: "sum",
StateName: "sumState",
Cacheable: true,
RequireArgs: true, Numeric: true, Rate: true, MinArgs: 1, MaxArgs: 1,
}
AggrFuncRateAvg = AggrFunc{
Name: valuer.NewString("rate_avg"),
FuncName: "avg",
StateName: "avgState",
Cacheable: true,
RequireArgs: true, Numeric: true, Rate: true, MinArgs: 1, MaxArgs: 1,
}
AggrFuncRateMin = AggrFunc{
Name: valuer.NewString("rate_min"),
FuncName: "min",
StateName: "minState",
Cacheable: true,
RequireArgs: true, Numeric: true, Rate: true, MinArgs: 1, MaxArgs: 1,
}
AggrFuncRateMax = AggrFunc{
Name: valuer.NewString("rate_max"),
FuncName: "max",
StateName: "maxState",
Cacheable: true,
RequireArgs: true, Numeric: true, Rate: true, MinArgs: 1, MaxArgs: 1,
}
)

View File

@@ -48,6 +48,43 @@ func NewAggExprRewriter(
}
}
// rewrite parses expr, runs the visitor over the outermost SelectItem,
// and returns the (mutated) item along with accumulated chArgs and the
// isRate flag. The returned item still references the in-place AST so
// callers can further mutate before serializing.
func (r *aggExprRewriter) rewrite(
ctx context.Context,
startNs uint64,
endNs uint64,
expr string,
keys map[string][]*telemetrytypes.TelemetryFieldKey,
) (*chparser.SelectItem, []any, bool, error) {
wrapped := fmt.Sprintf("SELECT %s", expr)
stmts, err := chparser.NewParser(wrapped).ParseStmts()
if err != nil {
return nil, nil, false, errors.WrapInternalf(err, errors.CodeInternal, "failed to parse aggregation expression %q", expr)
}
if len(stmts) == 0 {
return nil, nil, false, errors.NewInternalf(errors.CodeInternal, "no statements found for %q", expr)
}
sel, ok := stmts[0].(*chparser.SelectQuery)
if !ok {
return nil, nil, false, errors.NewInternalf(errors.CodeInternal, "expected SelectQuery, got %T", stmts[0])
}
if len(sel.SelectItems) == 0 {
return nil, nil, false, errors.NewInternalf(errors.CodeInternal, "no SELECT items for %q", expr)
}
visitor := newExprVisitor(
ctx, startNs, endNs, r.logger, keys,
r.fullTextColumn, r.fieldMapper, r.conditionBuilder, r.jsonKeyToKey, r.flagger,
)
if err := sel.SelectItems[0].Accept(visitor); err != nil {
return nil, nil, false, err
}
return sel.SelectItems[0], visitor.chArgs, visitor.isRate, nil
}
// Rewrite parses the given aggregation expression, maps the column, and condition to
// valid data source column and condition expression, and returns the rewritten expression
// and the args if the parametric aggregation function is used.
@@ -59,49 +96,58 @@ func (r *aggExprRewriter) Rewrite(
rateInterval uint64,
keys map[string][]*telemetrytypes.TelemetryFieldKey,
) (string, []any, error) {
wrapped := fmt.Sprintf("SELECT %s", expr)
p := chparser.NewParser(wrapped)
stmts, err := p.ParseStmts()
item, chArgs, isRate, err := r.rewrite(ctx, startNs, endNs, expr, keys)
if err != nil {
return "", nil, errors.WrapInternalf(err, errors.CodeInternal, "failed to parse aggregation expression %q", expr)
}
if len(stmts) == 0 {
return "", nil, errors.NewInternalf(errors.CodeInternal, "no statements found for %q", expr)
}
sel, ok := stmts[0].(*chparser.SelectQuery)
if !ok {
return "", nil, errors.NewInternalf(errors.CodeInternal, "expected SelectQuery, got %T", stmts[0])
}
if len(sel.SelectItems) == 0 {
return "", nil, errors.NewInternalf(errors.CodeInternal, "no SELECT items for %q", expr)
}
visitor := newExprVisitor(
ctx,
startNs,
endNs,
r.logger,
keys,
r.fullTextColumn,
r.fieldMapper,
r.conditionBuilder,
r.jsonKeyToKey,
r.flagger,
)
// Rewrite the first select item (our expression)
if err := sel.SelectItems[0].Accept(visitor); err != nil {
return "", nil, err
}
if visitor.isRate {
return fmt.Sprintf("%s/%d", sel.SelectItems[0].String(), rateInterval), visitor.chArgs, nil
if isRate {
return fmt.Sprintf("%s/%d", item.String(), rateInterval), chArgs, nil
}
return sel.SelectItems[0].String(), visitor.chArgs, nil
return item.String(), chArgs, nil
}
// RewriteWithState rewrites the aggregation expression and swaps the
// outermost aggregate to its ClickHouse "-State" combinator. Returns
// ErrAggregateNotStateCacheable if the outer aggregate has no StateName.
//
// For numeric state aggregates (sum/avg/min/max and their rate variants)
// the argument is wrapped with toFloat64(...) so the on-wire state always
// uses a Float64 numerator/value, regardless of the input column type.
// Without the cast, an integer input column (e.g. UInt64 duration_nano)
// would yield AggregateFunction(avg, UInt64) whose serialize() writes a
// UInt64 numerator — same byte count as Float64 but different bits, and
// the Go-side decoder in pkg/scalarstate hardcodes Float64.
func (r *aggExprRewriter) RewriteWithState(
ctx context.Context,
startNs uint64,
endNs uint64,
expr string,
keys map[string][]*telemetrytypes.TelemetryFieldKey,
) (string, []any, error) {
item, chArgs, _, err := r.rewrite(ctx, startNs, endNs, expr, keys)
if err != nil {
return "", nil, err
}
outer, ok := item.Expr.(*chparser.FunctionExpr)
if !ok {
return "", nil, ErrAggregateNotStateCacheable
}
aggFunc, ok := AggreFuncMap[valuer.NewString(strings.ToLower(outer.Name.Name))]
if !ok || aggFunc.StateName == "" {
return "", nil, ErrAggregateNotStateCacheable
}
outer.Name.Name = aggFunc.StateName
if aggFunc.Numeric && outer.Params != nil && outer.Params.Items != nil {
for i, arg := range outer.Params.Items.Items {
wrapped, perr := parseFragment(fmt.Sprintf("toFloat64(%s)", arg.String()))
if perr != nil {
return "", nil, perr
}
outer.Params.Items.Items[i] = wrapped
}
}
return item.String(), chArgs, nil
}
// RewriteMulti rewrites a slice of expressions.

View File

@@ -0,0 +1,201 @@
package querybuilder
import (
"context"
"strings"
"testing"
schema "github.com/SigNoz/signoz-otel-collector/cmd/signozschemamigrator/schema_migrator"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/flagger/flaggertest"
"github.com/SigNoz/signoz/pkg/instrumentation/instrumentationtest"
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
"github.com/huandu/go-sqlbuilder"
"github.com/stretchr/testify/require"
)
// fakeFieldMapper rewrites every key to a fixed `attrs[k]` style column
// expression so the rewriter has something to substitute. Tests only
// inspect the head function name and the presence of the column, not
// the exact mapping.
type fakeFieldMapper struct{}
func (fakeFieldMapper) FieldFor(_ context.Context, _, _ uint64, key *telemetrytypes.TelemetryFieldKey) (string, error) {
return key.Name, nil
}
func (fakeFieldMapper) ColumnFor(_ context.Context, _, _ uint64, _ *telemetrytypes.TelemetryFieldKey) ([]*schema.Column, error) {
return nil, nil
}
func (fakeFieldMapper) ColumnExpressionFor(_ context.Context, _, _ uint64, key *telemetrytypes.TelemetryFieldKey, _ map[string][]*telemetrytypes.TelemetryFieldKey) (string, error) {
return key.Name, nil
}
type fakeConditionBuilder struct{}
func (fakeConditionBuilder) ConditionFor(_ context.Context, _, _ uint64, key *telemetrytypes.TelemetryFieldKey, _ qbtypes.FilterOperator, _ any, _ *sqlbuilder.SelectBuilder) (string, error) {
return key.Name + " = ?", nil
}
func newTestRewriter(t *testing.T) *aggExprRewriter {
t.Helper()
return NewAggExprRewriter(
instrumentationtest.New().ToProviderSettings(),
nil,
fakeFieldMapper{},
fakeConditionBuilder{},
nil,
flaggertest.New(t),
)
}
func TestRewrite_SimpleAggregates(t *testing.T) {
r := newTestRewriter(t)
ctx := context.Background()
keys := map[string][]*telemetrytypes.TelemetryFieldKey{}
cases := []struct {
name string
expr string
wantHead string
}{
{"count_no_args", "count()", "count("},
{"count_with_arg", "count(latency)", "count("},
{"sum", "sum(latency)", "sum("},
{"avg", "avg(latency)", "avg("},
{"min", "min(latency)", "min("},
{"max", "max(latency)", "max("},
{"p99", "p99(latency)", "quantile(0.99)("},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
got, _, err := r.Rewrite(ctx, 0, 1, c.expr, 1, keys)
require.NoError(t, err)
require.True(t, strings.HasPrefix(got, c.wantHead),
"want prefix %q, got %q", c.wantHead, got)
})
}
}
func TestRewrite_RateAppliesDivision(t *testing.T) {
r := newTestRewriter(t)
got, _, err := r.Rewrite(context.Background(), 0, 1, "rate(latency)", 60, nil)
require.NoError(t, err)
require.Contains(t, got, "/60", "rate output must apply rate-interval division: %s", got)
}
func TestRewrite_UnknownFunction(t *testing.T) {
r := newTestRewriter(t)
_, _, err := r.Rewrite(context.Background(), 0, 1, "nosuchfn(latency)", 0, nil)
require.Error(t, err)
require.Contains(t, err.Error(), "unrecognized function")
}
func TestRewrite_BadExpression(t *testing.T) {
r := newTestRewriter(t)
_, _, err := r.Rewrite(context.Background(), 0, 1, "this is not sql ((", 0, nil)
require.Error(t, err)
}
func TestRewriteWithState_SwapsToStateName(t *testing.T) {
r := newTestRewriter(t)
cases := []struct {
expr string
wantHead string
}{
{"count(latency)", "countState("},
{"count()", "countState("},
{"sum(latency)", "sumState("},
{"avg(latency)", "avgState("},
{"min(latency)", "minState("},
{"max(latency)", "maxState("},
}
for _, c := range cases {
t.Run(c.expr, func(t *testing.T) {
got, _, err := r.RewriteWithState(context.Background(), 0, 1, c.expr, nil)
require.NoError(t, err)
require.True(t, strings.HasPrefix(got, c.wantHead),
"want prefix %q, got %q", c.wantHead, got)
})
}
}
func TestRewriteWithState_RejectsAggregatesWithoutState(t *testing.T) {
r := newTestRewriter(t)
cases := []string{
"p99(latency)", // quantile — no StateName registered
"count_distinct(latency)", // not state-cacheable in v1
}
for _, expr := range cases {
t.Run(expr, func(t *testing.T) {
_, _, err := r.RewriteWithState(context.Background(), 0, 1, expr, nil)
require.Error(t, err)
require.True(t, errors.Is(err, ErrAggregateNotStateCacheable),
"want ErrAggregateNotStateCacheable, got %v", err)
})
}
}
func TestRewriteWithState_RateAggregatesEmitBaseStateNoDivision(t *testing.T) {
r := newTestRewriter(t)
// Rate aggregates emit only the underlying state (no /<window>
// suffix) — the rate division happens post-merge in Go using the
// full query window.
cases := []struct {
expr string
wantHead string
}{
{"rate(latency)", "countState("},
{"rate_sum(latency)", "sumState("},
{"rate_avg(latency)", "avgState("},
{"rate_min(latency)", "minState("},
{"rate_max(latency)", "maxState("},
}
for _, c := range cases {
t.Run(c.expr, func(t *testing.T) {
got, _, err := r.RewriteWithState(context.Background(), 0, 1, c.expr, nil)
require.NoError(t, err)
require.True(t, strings.HasPrefix(got, c.wantHead),
"want prefix %q, got %q", c.wantHead, got)
require.NotContains(t, got, "/", "RewriteWithState must not apply rate division: %s", got)
})
}
}
func TestRewriteWithState_RejectsNonFunctionExpr(t *testing.T) {
r := newTestRewriter(t)
// Bare column expression, not a function call — should be rejected.
_, _, err := r.RewriteWithState(context.Background(), 0, 1, "latency", nil)
require.Error(t, err)
require.True(t, errors.Is(err, ErrAggregateNotStateCacheable),
"want ErrAggregateNotStateCacheable, got %v", err)
}
func TestRewriteWithState_PropagatesParseErrors(t *testing.T) {
r := newTestRewriter(t)
_, _, err := r.RewriteWithState(context.Background(), 0, 1, "this is not sql ((", nil)
require.Error(t, err)
}
func TestExtractOuterAggName(t *testing.T) {
cases := []struct {
expr string
wantName string
wantFound bool
}{
{"avg(latency)", "avg", true},
{"COUNT(latency)", "count", true},
{"p99(latency)", "p99", true},
{"latency", "", false}, // not a function
{"unknownfn(x)", "", false}, // not in AggreFuncMap
}
for _, c := range cases {
t.Run(c.expr, func(t *testing.T) {
af, ok := ExtractOuterAggName(c.expr)
require.Equal(t, c.wantFound, ok)
if ok {
require.Equal(t, c.wantName, af.Name.StringValue())
}
})
}
}

76
pkg/scalarstate/avg.go Normal file
View File

@@ -0,0 +1,76 @@
package scalarstate
import (
"encoding/binary"
"encoding/hex"
"math"
"github.com/SigNoz/signoz/pkg/errors"
)
// avgState mirrors AvgFraction in CH's AggregateFunctionAvg.h for Float64 input.
// CH's serialize writes (numerator: Float64 LE, denominator: VarUInt) — i.e.
// 8 fixed bytes + 19 varint bytes, NOT a fixed 16-byte block.
//
// SigNoz's expression rewriter feeds avgState a Nullable(Float64) (multiIf
// returns NULL for non-matching rows). CH wraps that in
// AggregateFunctionNullUnary<serialize_flag=true>, which prefixes the
// nested state with a 1-byte flag: 0 = no non-null value ever seen
// (state ends here), 1 = nested avg state follows.
type avgState struct {
Num float64
Den uint64
}
type avgAgg struct{}
func (avgAgg) Name() string { return "avg" }
func (avgAgg) StateFunc(inner string) string { return "avgState(toFloat64(" + inner + "))" }
func (avgAgg) StateColumnType() string { return "AggregateFunction(avg, Float64)" }
func (avgAgg) Decode(b []byte) (State, error) {
body, ok := stripNullableFlag(b)
if !ok {
return &avgState{}, nil
}
if len(body) < avgMinNestedBytes {
return nil, errors.NewInternalf(errors.CodeInternal, "scalarstate.avg: need >=%d nested bytes (8 num + 1+ varint den), got %d (hex=%s)", avgMinNestedBytes, len(body), hex.EncodeToString(b))
}
num := math.Float64frombits(binary.LittleEndian.Uint64(body[0:8]))
den, read := binary.Uvarint(body[8:])
if read <= 0 {
return nil, errors.NewInternalf(errors.CodeInternal, "scalarstate.avg: bad VarUInt denominator (hex=%s)", hex.EncodeToString(b))
}
if 8+read != len(body) {
return nil, errors.NewInternalf(errors.CodeInternal, "scalarstate.avg: unexpected trailing bytes (len=%d, consumed=%d, hex=%s)", len(b), 8+read, hex.EncodeToString(b))
}
return &avgState{Num: num, Den: den}, nil
}
// avgMinNestedBytes is the smallest valid nested-state size: 8 bytes of
// Float64 numerator + 1-byte VarUInt denominator (denominator < 128).
const avgMinNestedBytes = 9
func (avgAgg) Merge(states []State) (State, error) {
out := &avgState{}
for _, s := range states {
c, ok := s.(*avgState)
if !ok {
return nil, errors.NewInternalf(errors.CodeInternal, "scalarstate.avg.merge: bad state type %T", s)
}
out.Num += c.Num
out.Den += c.Den
}
return out, nil
}
func (avgAgg) Final(s State) (any, error) {
c, ok := s.(*avgState)
if !ok {
return nil, errors.NewInternalf(errors.CodeInternal, "scalarstate.avg.final: bad state type %T", s)
}
if c.Den == 0 {
return math.NaN(), nil
}
return c.Num / float64(c.Den), nil
}

49
pkg/scalarstate/count.go Normal file
View File

@@ -0,0 +1,49 @@
package scalarstate
import (
"encoding/binary"
"encoding/hex"
"github.com/SigNoz/signoz/pkg/errors"
)
type countState struct{ Count uint64 }
type countAgg struct{}
func (countAgg) Name() string { return "count" }
func (countAgg) StateFunc(inner string) string { return "countState(" + inner + ")" }
func (countAgg) StateColumnType() string { return "AggregateFunction(count)" }
// AggregateFunctionCount serializes via writeVarUInt — LEB128, 19 bytes,
// not a fixed UInt64. Matches Go's binary.Uvarint encoding.
func (countAgg) Decode(b []byte) (State, error) {
n, read := binary.Uvarint(b)
if read <= 0 {
return nil, errors.NewInternalf(errors.CodeInternal, "scalarstate.count: bad VarUInt (hex=%s)", hex.EncodeToString(b))
}
if read != len(b) {
return nil, errors.NewInternalf(errors.CodeInternal, "scalarstate.count: unexpected trailing bytes (len=%d, consumed=%d, hex=%s)", len(b), read, hex.EncodeToString(b))
}
return &countState{Count: n}, nil
}
func (countAgg) Merge(states []State) (State, error) {
out := &countState{}
for _, s := range states {
c, ok := s.(*countState)
if !ok {
return nil, errors.NewInternalf(errors.CodeInternal, "scalarstate.count.merge: bad state type %T", s)
}
out.Count += c.Count
}
return out, nil
}
func (countAgg) Final(s State) (any, error) {
c, ok := s.(*countState)
if !ok {
return nil, errors.NewInternalf(errors.CodeInternal, "scalarstate.count.final: bad state type %T", s)
}
return c.Count, nil
}

13
pkg/scalarstate/init.go Normal file
View File

@@ -0,0 +1,13 @@
package scalarstate
func init() {
Register(countAgg{})
Register(sumAgg{})
Register(avgAgg{})
Register(minAgg{})
Register(maxAgg{})
Register(varPopAgg{})
Register(varSampAgg{})
Register(stddevPopAgg{})
Register(stddevSampAgg{})
}

127
pkg/scalarstate/minmax.go Normal file
View File

@@ -0,0 +1,127 @@
package scalarstate
import (
"encoding/binary"
"encoding/hex"
"math"
"github.com/SigNoz/signoz/pkg/errors"
)
// singleValueState mirrors CH's SingleValueDataFixed<Float64>::serialize:
//
// writeBinary(has_value, buf) // 1 byte (UInt8)
// if (has_value) writeBinary(value, buf) // 8 bytes Float64 LE
//
// For min/max over Float64 expressions (which is what aggExprRewriter
// produces because Numeric=true is rewritten to FieldDataTypeFloat64),
// the unwrapped blob is either singleValueAbsentBytes (no value) or
// singleValuePresentBytes (has value + payload). When the input is
// Nullable (SigNoz's multiIf path), CH further wraps the state with a
// 1-byte Null-flag — see stripNullableFlag in registry.go.
type singleValueState struct {
Has bool
Value float64
}
const (
singleValueAbsentBytes = 1 // [has=0]
singleValuePresentBytes = 9 // [has=1][8-byte Float64]
)
func decodeSingleValue(b []byte) (*singleValueState, error) {
body, ok := stripNullableFlag(b)
if !ok {
return &singleValueState{}, nil
}
if len(body) == 0 {
return nil, errors.NewInternalf(errors.CodeInternal, "scalarstate.singleValue: empty nested blob (hex=%s)", hex.EncodeToString(b))
}
has := body[0] != 0
out := &singleValueState{Has: has}
if !has {
if len(body) != singleValueAbsentBytes {
return nil, errors.NewInternalf(errors.CodeInternal, "scalarstate.singleValue: expected %d nested byte for has=false, got %d (hex=%s)", singleValueAbsentBytes, len(body), hex.EncodeToString(b))
}
return out, nil
}
if len(body) != singleValuePresentBytes {
return nil, errors.NewInternalf(errors.CodeInternal, "scalarstate.singleValue: expected %d nested bytes for has=true, got %d (hex=%s)", singleValuePresentBytes, len(body), hex.EncodeToString(b))
}
out.Value = math.Float64frombits(binary.LittleEndian.Uint64(body[1:singleValuePresentBytes]))
return out, nil
}
type minAgg struct{}
func (minAgg) Name() string { return "min" }
func (minAgg) StateFunc(inner string) string { return "minState(toFloat64(" + inner + "))" }
func (minAgg) StateColumnType() string { return "AggregateFunction(min, Float64)" }
func (minAgg) Decode(b []byte) (State, error) { return decodeSingleValue(b) }
func (minAgg) Merge(states []State) (State, error) {
out := &singleValueState{}
for _, s := range states {
c, ok := s.(*singleValueState)
if !ok {
return nil, errors.NewInternalf(errors.CodeInternal, "scalarstate.min.merge: bad state type %T", s)
}
if !c.Has {
continue
}
if !out.Has || c.Value < out.Value {
out.Has = true
out.Value = c.Value
}
}
return out, nil
}
func (minAgg) Final(s State) (any, error) {
c, ok := s.(*singleValueState)
if !ok {
return nil, errors.NewInternalf(errors.CodeInternal, "scalarstate.min.final: bad state type %T", s)
}
if !c.Has {
return math.NaN(), nil
}
return c.Value, nil
}
type maxAgg struct{}
func (maxAgg) Name() string { return "max" }
func (maxAgg) StateFunc(inner string) string { return "maxState(toFloat64(" + inner + "))" }
func (maxAgg) StateColumnType() string { return "AggregateFunction(max, Float64)" }
func (maxAgg) Decode(b []byte) (State, error) { return decodeSingleValue(b) }
func (maxAgg) Merge(states []State) (State, error) {
out := &singleValueState{}
for _, s := range states {
c, ok := s.(*singleValueState)
if !ok {
return nil, errors.NewInternalf(errors.CodeInternal, "scalarstate.max.merge: bad state type %T", s)
}
if !c.Has {
continue
}
if !out.Has || c.Value > out.Value {
out.Has = true
out.Value = c.Value
}
}
return out, nil
}
func (maxAgg) Final(s State) (any, error) {
c, ok := s.(*singleValueState)
if !ok {
return nil, errors.NewInternalf(errors.CodeInternal, "scalarstate.max.final: bad state type %T", s)
}
if !c.Has {
return math.NaN(), nil
}
return c.Value, nil
}

116
pkg/scalarstate/registry.go Normal file
View File

@@ -0,0 +1,116 @@
// Package scalarstate provides Go decoders, mergers, and finalizers for
// ClickHouse AggregateFunction state blobs. It is the application-side merge
// path described as Option 2 in the "Caching for Scalar Queries" TRD.
//
// Each registered Aggregate maps a query-builder aggregate name (e.g. "avg")
// to:
// - a ClickHouse "-State" SQL emitter (StateFunc)
// - a byte-level Decode that mirrors CH's serialize() for that aggregate
// - a Merge over decoded states (matching CH's *Merge semantics)
// - a Final that produces the user-facing scalar value
//
// The registry intentionally only carries simple/exact aggregates in v1.
// Sketch-class aggregates (quantileTDigest, uniqCombined) are out of scope
// because their on-wire layouts are CH-internal and shift between versions
// — see the TRD's "Frequency of disruption" analysis.
package scalarstate
import (
"strings"
"sync"
)
// State is an opaque marker for a per-aggregate decoded state. Concrete
// types live alongside each aggregate's Decode/Merge implementation.
type State interface{}
// Aggregate is the per-aggregate behavior contract.
type Aggregate interface {
// Name is the lowercase query-builder aggregate name (e.g. "avg",
// "p99"). Used as the registry key.
Name() string
// StateFunc returns the ClickHouse function expression to emit per
// chunk. innerExpr is the rewritten column/expression argument as
// produced by aggExprRewriter (e.g. `duration_nano`). For parametric
// aggregates (quantiles), the parameter is baked into the returned
// string.
StateFunc(innerExpr string) string
// StateColumnType returns the AggregateFunction(...) DDL form used
// for the per-chunk state column. Currently unused at runtime
// (clickhouse-go scans AggregateFunction columns as []byte regardless),
// but kept in the interface so a future temp-table path stays trivial.
StateColumnType() string
// Decode parses the raw AggregateFunction blob bytes into a
// per-aggregate State value.
Decode(b []byte) (State, error)
// Merge combines per-chunk states into a single state. Matches the
// semantics of ClickHouse's *Merge combinator for this aggregate.
Merge(states []State) (State, error)
// Final produces the user-facing scalar value (typically float64,
// uint64, or int64) from a merged state.
Final(s State) (any, error)
}
var (
mu sync.RWMutex
registry = map[string]Aggregate{}
)
// Register adds an Aggregate to the registry. Intended to be called from
// package init() functions.
func Register(a Aggregate) {
mu.Lock()
defer mu.Unlock()
registry[strings.ToLower(a.Name())] = a
}
// Lookup returns the Aggregate for the given query-builder aggregate name
// (case-insensitive).
func Lookup(name string) (Aggregate, bool) {
mu.RLock()
defer mu.RUnlock()
a, ok := registry[strings.ToLower(name)]
return a, ok
}
// IsCacheable is true when an aggregate has a registered Go merge path.
func IsCacheable(name string) bool {
_, ok := Lookup(name)
return ok
}
// stripNullableFlag peels the 1-byte "had a non-null value" flag that CH's
// AggregateFunctionNullUnary<serialize_flag=true> wrapper writes around the
// nested aggregate state when the input column is Nullable. SigNoz's
// expression rewriter emits multiIf(..., NULL) for unmatched rows, so the
// flag is always present for avg/sum/min/max (count is special-cased
// inside CH and ships unwrapped).
//
// Returns:
//
// (nestedBytes, true) when the wrapper says a nested state follows
// (nil, false) when the wrapper says no non-null value was ever seen
//
// If the buffer doesn't fit either wrapped shape (e.g. b[0] not in {0,1},
// or flag=0 with extra trailing bytes), returns (b, true) so callers fall
// through to the unwrapped decode path. The caller's own length / varint
// checks will then surface a precise error including the hex.
func stripNullableFlag(b []byte) ([]byte, bool) {
if len(b) == 0 {
return b, true
}
switch b[0] {
case 0:
if len(b) == 1 {
return nil, false
}
case 1:
return b[1:], true
}
return b, true
}

View File

@@ -0,0 +1,206 @@
package scalarstate
import (
"encoding/binary"
"math"
"testing"
)
// le64f writes a Float64 as little-endian into the given slice at offset.
func le64f(b []byte, off int, v float64) {
binary.LittleEndian.PutUint64(b[off:off+8], math.Float64bits(v))
}
func TestRegistryLookup(t *testing.T) {
cases := []struct {
name string
expect bool
}{
{"count", true},
{"sum", true},
{"avg", true},
{"min", true},
{"max", true},
{"varpop", true},
{"stddevpop", true},
// Sketch aggregates intentionally not registered for v1.
{"p99", false},
{"count_distinct", false},
}
for _, c := range cases {
_, ok := Lookup(c.name)
if ok != c.expect {
t.Errorf("Lookup(%q): got ok=%v want=%v", c.name, ok, c.expect)
}
}
}
func TestCountDecodeMergeFinal(t *testing.T) {
a, _ := Lookup("count")
// CH AggregateFunctionCount serializes the count as a VarUInt (LEB128).
mk := func(n uint64) []byte {
b := make([]byte, binary.MaxVarintLen64)
written := binary.PutUvarint(b, n)
return b[:written]
}
s1, err := a.Decode(mk(7))
if err != nil {
t.Fatal(err)
}
s2, err := a.Decode(mk(13))
if err != nil {
t.Fatal(err)
}
// Also exercise a value that needs >1 VarUInt byte.
s3, err := a.Decode(mk(300))
if err != nil {
t.Fatal(err)
}
merged, err := a.Merge([]State{s1, s2, s3})
if err != nil {
t.Fatal(err)
}
v, err := a.Final(merged)
if err != nil {
t.Fatal(err)
}
if got, want := v.(uint64), uint64(320); got != want {
t.Errorf("count final: got %d want %d", got, want)
}
}
func TestSumDecodeMergeFinal(t *testing.T) {
a, _ := Lookup("sum")
// CH wraps sumState(Nullable(Float64)) with a 1-byte "had non-null
// value" flag in front of the Float64 sum.
mk := func(f float64) []byte {
b := make([]byte, 9)
b[0] = 1
le64f(b, 1, f)
return b
}
s1, _ := a.Decode(mk(2.5))
s2, _ := a.Decode(mk(7.5))
merged, _ := a.Merge([]State{s1, s2})
v, _ := a.Final(merged)
if got := v.(float64); got != 10.0 {
t.Errorf("sum final: got %v want 10", got)
}
// Empty Null-wrapped state (no rows ever contributed): single 0x00 byte.
empty, err := a.Decode([]byte{0})
if err != nil {
t.Fatalf("decode empty: %v", err)
}
if got := empty.(*sumState).Sum; got != 0 {
t.Errorf("empty sum: got %v want 0", got)
}
}
func TestAvgDecodeMergeFinal(t *testing.T) {
a, _ := Lookup("avg")
// Wire shape: [null_flag=1][Float64 num][VarUInt den].
mk := func(num float64, den uint64) []byte {
b := make([]byte, 1+8+binary.MaxVarintLen64)
b[0] = 1
le64f(b, 1, num)
w := binary.PutUvarint(b[9:], den)
return b[:9+w]
}
// chunk1: sum=10 over 4 samples, chunk2: sum=20 over 6 samples → avg=3
s1, _ := a.Decode(mk(10, 4))
s2, _ := a.Decode(mk(20, 6))
merged, _ := a.Merge([]State{s1, s2})
v, _ := a.Final(merged)
if got := v.(float64); got != 3.0 {
t.Errorf("avg final: got %v want 3", got)
}
}
func TestAvgFinalEmptyDenominator(t *testing.T) {
a, _ := Lookup("avg")
// Single 0x00: Null-wrapper says no non-null value was ever seen.
s, err := a.Decode([]byte{0})
if err != nil {
t.Fatal(err)
}
v, _ := a.Final(s)
if got := v.(float64); !math.IsNaN(got) {
t.Errorf("avg final on empty: got %v want NaN", got)
}
}
func TestMinMaxDecodeMergeFinal(t *testing.T) {
mn, _ := Lookup("min")
mx, _ := Lookup("max")
// Wire shape with Null-wrapper:
// has=false: 1 byte [null_flag=0] — never saw a non-null value
// has=true: 10 bytes [null_flag=1][has=1][Float64 value LE]
mk := func(has bool, v float64) []byte {
if !has {
return []byte{0}
}
b := make([]byte, 10)
b[0] = 1
b[1] = 1
le64f(b, 2, v)
return b
}
// Three chunks: 5.0, missing, -2.0 → min = -2, max = 5
s1, _ := mn.Decode(mk(true, 5.0))
s2, _ := mn.Decode(mk(false, 0))
s3, _ := mn.Decode(mk(true, -2.0))
merged, _ := mn.Merge([]State{s1, s2, s3})
v, _ := mn.Final(merged)
if got := v.(float64); got != -2.0 {
t.Errorf("min final: got %v want -2", got)
}
s1m, _ := mx.Decode(mk(true, 5.0))
s2m, _ := mx.Decode(mk(false, 0))
s3m, _ := mx.Decode(mk(true, -2.0))
mergedM, _ := mx.Merge([]State{s1m, s2m, s3m})
vm, _ := mx.Final(mergedM)
if got := vm.(float64); got != 5.0 {
t.Errorf("max final: got %v want 5", got)
}
// All-missing: NaN
s, _ := mn.Decode(mk(false, 0))
merged2, _ := mn.Merge([]State{s})
vNaN, _ := mn.Final(merged2)
if !math.IsNaN(vNaN.(float64)) {
t.Errorf("min on all-missing: got %v want NaN", vNaN)
}
}
func TestVarPopAndStddevPop(t *testing.T) {
vp, _ := Lookup("varpop")
sp, _ := Lookup("stddevpop")
mk := func(count, sum, sumsq float64) []byte {
b := make([]byte, 24)
le64f(b, 0, count)
le64f(b, 8, sum)
le64f(b, 16, sumsq)
return b
}
// Chunk1: values {1,2} -> count=2, sum=3, sumsq=5
// Chunk2: values {3,4,5} -> count=3, sum=12, sumsq=50
// Combined: 5 values {1,2,3,4,5}, varPop = mean(x²) - mean(x)²
// mean = 3, mean(x²) = (1+4+9+16+25)/5 = 11, varPop = 11 - 9 = 2.
s1, _ := vp.Decode(mk(2, 3, 5))
s2, _ := vp.Decode(mk(3, 12, 50))
merged, _ := vp.Merge([]State{s1, s2})
v, _ := vp.Final(merged)
if got := v.(float64); math.Abs(got-2.0) > 1e-9 {
t.Errorf("varPop final: got %v want 2", got)
}
v2, _ := sp.Final(merged)
if got := v2.(float64); math.Abs(got-math.Sqrt(2.0)) > 1e-9 {
t.Errorf("stddevPop final: got %v want sqrt(2)", got)
}
}

54
pkg/scalarstate/sum.go Normal file
View File

@@ -0,0 +1,54 @@
package scalarstate
import (
"encoding/binary"
"encoding/hex"
"math"
"github.com/SigNoz/signoz/pkg/errors"
)
type sumState struct{ Sum float64 }
type sumAgg struct{}
func (sumAgg) Name() string { return "sum" }
func (sumAgg) StateFunc(inner string) string { return "sumState(toFloat64(" + inner + "))" }
func (sumAgg) StateColumnType() string { return "AggregateFunction(sum, Float64)" }
// sumNestedBytes is the size of the nested AggregateFunctionSumData<Float64>
// state: a single Float64 sum, written via writeBinaryLittleEndian.
const sumNestedBytes = 8
// CH wraps sumState(Nullable(Float64)) with a 1-byte "has non-null value"
// flag — see stripNullableFlag in registry.go.
func (sumAgg) Decode(b []byte) (State, error) {
body, ok := stripNullableFlag(b)
if !ok {
return &sumState{}, nil
}
if len(body) != sumNestedBytes {
return nil, errors.NewInternalf(errors.CodeInternal, "scalarstate.sum: expected %d nested bytes, got %d (hex=%s)", sumNestedBytes, len(body), hex.EncodeToString(b))
}
return &sumState{Sum: math.Float64frombits(binary.LittleEndian.Uint64(body))}, nil
}
func (sumAgg) Merge(states []State) (State, error) {
out := &sumState{}
for _, s := range states {
c, ok := s.(*sumState)
if !ok {
return nil, errors.NewInternalf(errors.CodeInternal, "scalarstate.sum.merge: bad state type %T", s)
}
out.Sum += c.Sum
}
return out, nil
}
func (sumAgg) Final(s State) (any, error) {
c, ok := s.(*sumState)
if !ok {
return nil, errors.NewInternalf(errors.CodeInternal, "scalarstate.sum.final: bad state type %T", s)
}
return c.Sum, nil
}

View File

@@ -0,0 +1,138 @@
package scalarstate
import (
"encoding/binary"
"fmt"
"math"
)
// varMomentsState mirrors VarMoments<Float64, 2> in CH's Moments.h:
// three Float64 values written via writeBinaryLittleEndian — count
// (as Float64 — yes, CH stores it as the same T as the moments),
// sum, and sum-of-squares, in that order. 24 bytes total.
//
// We keep the same naive-parallel-variance form CH uses on the
// non-cached path so cached and uncached results stay numerically
// identical (TRD: stddev/var entry).
type varMomentsState struct {
M0 float64 // count
M1 float64 // sum
M2 float64 // sum of squares
}
func decodeVarMoments(b []byte) (*varMomentsState, error) {
if len(b) != 24 {
return nil, fmt.Errorf("scalarstate.varMoments: expected 24 bytes, got %d", len(b))
}
return &varMomentsState{
M0: math.Float64frombits(binary.LittleEndian.Uint64(b[0:8])),
M1: math.Float64frombits(binary.LittleEndian.Uint64(b[8:16])),
M2: math.Float64frombits(binary.LittleEndian.Uint64(b[16:24])),
}, nil
}
func mergeVarMoments(states []State) (*varMomentsState, error) {
out := &varMomentsState{}
for _, s := range states {
c, ok := s.(*varMomentsState)
if !ok {
return nil, fmt.Errorf("scalarstate.varMoments.merge: bad state type %T", s)
}
out.M0 += c.M0
out.M1 += c.M1
out.M2 += c.M2
}
return out, nil
}
// population variance: (m2 - m1^2/m0) / m0
func varPop(s *varMomentsState) float64 {
if s.M0 == 0 {
return math.NaN()
}
return (s.M2 - s.M1*s.M1/s.M0) / s.M0
}
// sample variance: (m2 - m1^2/m0) / (m0 - 1)
func varSamp(s *varMomentsState) float64 {
if s.M0 < 2 {
return math.NaN()
}
return (s.M2 - s.M1*s.M1/s.M0) / (s.M0 - 1)
}
type varPopAgg struct{}
func (varPopAgg) Name() string { return "varpop" }
func (varPopAgg) StateFunc(inner string) string { return "varPopState(toFloat64(" + inner + "))" }
func (varPopAgg) StateColumnType() string { return "AggregateFunction(varPop, Float64)" }
func (varPopAgg) Decode(b []byte) (State, error) {
return decodeVarMoments(b)
}
func (varPopAgg) Merge(states []State) (State, error) {
return mergeVarMoments(states)
}
func (varPopAgg) Final(s State) (any, error) {
v, ok := s.(*varMomentsState)
if !ok {
return nil, fmt.Errorf("scalarstate.varPop.final: bad state type %T", s)
}
return varPop(v), nil
}
type varSampAgg struct{}
func (varSampAgg) Name() string { return "varsamp" }
func (varSampAgg) StateFunc(inner string) string { return "varSampState(toFloat64(" + inner + "))" }
func (varSampAgg) StateColumnType() string { return "AggregateFunction(varSamp, Float64)" }
func (varSampAgg) Decode(b []byte) (State, error) {
return decodeVarMoments(b)
}
func (varSampAgg) Merge(states []State) (State, error) {
return mergeVarMoments(states)
}
func (varSampAgg) Final(s State) (any, error) {
v, ok := s.(*varMomentsState)
if !ok {
return nil, fmt.Errorf("scalarstate.varSamp.final: bad state type %T", s)
}
return varSamp(v), nil
}
type stddevPopAgg struct{}
func (stddevPopAgg) Name() string { return "stddevpop" }
func (stddevPopAgg) StateFunc(inner string) string { return "stddevPopState(toFloat64(" + inner + "))" }
func (stddevPopAgg) StateColumnType() string { return "AggregateFunction(stddevPop, Float64)" }
func (stddevPopAgg) Decode(b []byte) (State, error) {
return decodeVarMoments(b)
}
func (stddevPopAgg) Merge(states []State) (State, error) {
return mergeVarMoments(states)
}
func (stddevPopAgg) Final(s State) (any, error) {
v, ok := s.(*varMomentsState)
if !ok {
return nil, fmt.Errorf("scalarstate.stddevPop.final: bad state type %T", s)
}
return math.Sqrt(varPop(v)), nil
}
type stddevSampAgg struct{}
func (stddevSampAgg) Name() string { return "stddevsamp" }
func (stddevSampAgg) StateFunc(inner string) string { return "stddevSampState(toFloat64(" + inner + "))" }
func (stddevSampAgg) StateColumnType() string { return "AggregateFunction(stddevSamp, Float64)" }
func (stddevSampAgg) Decode(b []byte) (State, error) {
return decodeVarMoments(b)
}
func (stddevSampAgg) Merge(states []State) (State, error) {
return mergeVarMoments(states)
}
func (stddevSampAgg) Final(s State) (any, error) {
v, ok := s.(*varMomentsState)
if !ok {
return nil, fmt.Errorf("scalarstate.stddevSamp.final: bad state type %T", s)
}
return math.Sqrt(varSamp(v)), nil
}

View File

@@ -72,6 +72,7 @@ func (b *auditQueryStatementBuilder) Build(
requestType qbtypes.RequestType,
query qbtypes.QueryBuilderQuery[qbtypes.LogAggregation],
variables map[string]qbtypes.VariableItem,
opts qbtypes.StatementBuilderOptions,
) (*qbtypes.Statement, error) {
start = querybuilder.ToNanoSecs(start)
end = querybuilder.ToNanoSecs(end)
@@ -93,7 +94,7 @@ func (b *auditQueryStatementBuilder) Build(
case qbtypes.RequestTypeTimeSeries:
stmt, err = b.buildTimeSeriesQuery(ctx, q, query, start, end, keys, variables)
case qbtypes.RequestTypeScalar:
stmt, err = b.buildScalarQuery(ctx, q, query, start, end, keys, false, variables)
stmt, err = b.buildScalarQuery(ctx, q, query, start, end, keys, variables, opts)
default:
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "unsupported request type: %s", requestType)
}
@@ -355,7 +356,11 @@ func (b *auditQueryStatementBuilder) buildTimeSeriesQuery(
if query.Limit > 0 && len(query.GroupBy) > 0 {
cteSB := sqlbuilder.NewSelectBuilder()
cteStmt, err := b.buildScalarQuery(ctx, cteSB, query, start, end, keys, true, variables)
// Limit CTE selects top-N groups by aggregate value, so it
// needs plain aggregates that ORDER BY can sort. State-mode
// SQL emits hex(*State()) blobs that have no numeric
// ordering — skip the state path here.
cteStmt, err := b.buildScalarQuery(ctx, cteSB, query, start, end, keys, variables, qbtypes.NewStatementBuilderOptions().WithSkipResourceCTE().WithSkipScalarState())
if err != nil {
return nil, err
}
@@ -439,8 +444,8 @@ func (b *auditQueryStatementBuilder) buildScalarQuery(
query qbtypes.QueryBuilderQuery[qbtypes.LogAggregation],
start, end uint64,
keys map[string][]*telemetrytypes.TelemetryFieldKey,
skipResourceCTE bool,
variables map[string]qbtypes.VariableItem,
opts qbtypes.StatementBuilderOptions,
) (*qbtypes.Statement, error) {
var (
cteFragments []string
@@ -449,13 +454,12 @@ func (b *auditQueryStatementBuilder) buildScalarQuery(
if frag, args, err := b.maybeAttachResourceFilter(ctx, sb, query, start, end, variables); err != nil {
return nil, err
} else if frag != "" && !skipResourceCTE {
} else if frag != "" && !opts.SkipResourceCTE {
cteFragments = append(cteFragments, frag)
cteArgs = append(cteArgs, args)
}
allAggChArgs := []any{}
var allGroupByArgs []any
for _, gb := range query.GroupBy {
@@ -463,7 +467,6 @@ func (b *auditQueryStatementBuilder) buildScalarQuery(
if err != nil {
return nil, err
}
colExpr := fmt.Sprintf("toString(%s) AS `%s`", expr, gb.Name)
allGroupByArgs = append(allGroupByArgs, args...)
sb.SelectMore(colExpr)
@@ -471,15 +474,29 @@ func (b *auditQueryStatementBuilder) buildScalarQuery(
rateInterval := (end - start) / querybuilder.NsToSeconds
if len(query.Aggregations) > 0 {
for idx := range query.Aggregations {
aggExpr := query.Aggregations[idx]
rewritten, chArgs, err := b.aggExprRewriter.Rewrite(ctx, start, end, aggExpr.Expression, rateInterval, keys)
if err != nil {
return nil, err
}
allAggChArgs = append(allAggChArgs, chArgs...)
for idx := range query.Aggregations {
aggExpr := query.Aggregations[idx]
var (
rewritten string
chArgs []any
err error
)
if opts.SkipScalarState {
rewritten, chArgs, err = b.aggExprRewriter.Rewrite(ctx, start, end, aggExpr.Expression, rateInterval, keys)
} else {
rewritten, chArgs, err = b.aggExprRewriter.RewriteWithState(ctx, start, end, aggExpr.Expression, keys)
}
if err != nil {
return nil, err
}
allAggChArgs = append(allAggChArgs, chArgs...)
// clickhouse-go can't decode AggregateFunction(...) columns; wrap
// state-mode aggregates in hex(...) so they come back as String.
// readAsScalarState hex-decodes back to the raw state bytes.
if opts.SkipScalarState {
sb.SelectMore(fmt.Sprintf("%s AS __result_%d", rewritten, idx))
} else {
sb.SelectMore(fmt.Sprintf("hex(%s) AS __result_%d", rewritten, idx))
}
}
@@ -492,7 +509,7 @@ func (b *auditQueryStatementBuilder) buildScalarQuery(
sb.GroupBy(querybuilder.GroupByKeys(query.GroupBy)...)
if query.Having != nil && query.Having.Expression != "" {
if query.Having != nil && query.Having.Expression != "" && !opts.SkipHaving {
rewriter := querybuilder.NewHavingExpressionRewriter()
rewrittenExpr, err := rewriter.RewriteForLogs(query.Having.Expression, query.Aggregations)
if err != nil {
@@ -501,25 +518,24 @@ func (b *auditQueryStatementBuilder) buildScalarQuery(
sb.Having(rewrittenExpr)
}
for _, orderBy := range query.Order {
idx, ok := aggOrderBy(orderBy, query)
if ok {
sb.OrderBy(fmt.Sprintf("__result_%d %s", idx, orderBy.Direction.StringValue()))
} else {
sb.OrderBy(fmt.Sprintf("`%s` %s", orderBy.Key.Name, orderBy.Direction.StringValue()))
if opts.SkipScalarState {
for _, orderBy := range query.Order {
idx, ok := aggOrderBy(orderBy, query)
if ok {
sb.OrderBy(fmt.Sprintf("__result_%d %s", idx, orderBy.Direction.StringValue()))
} else {
sb.OrderBy(fmt.Sprintf("`%s` %s", orderBy.Key.Name, orderBy.Direction.StringValue()))
}
}
if len(query.Order) == 0 {
sb.OrderBy("__result_0 DESC")
}
if query.Limit > 0 {
sb.Limit(query.Limit)
}
}
if len(query.Order) == 0 {
sb.OrderBy("__result_0 DESC")
}
if query.Limit > 0 {
sb.Limit(query.Limit)
}
combinedArgs := append(allGroupByArgs, allAggChArgs...)
mainSQL, mainArgs := sb.BuildWithFlavor(sqlbuilder.ClickHouse, combinedArgs...)
finalSQL := querybuilder.CombineCTEs(cteFragments) + mainSQL
@@ -604,7 +620,7 @@ func (b *auditQueryStatementBuilder) maybeAttachResourceFilter(
start, end uint64,
variables map[string]qbtypes.VariableItem,
) (cteSQL string, cteArgs []any, err error) {
stmt, err := b.resourceFilterStmtBuilder.Build(ctx, start, end, qbtypes.RequestTypeRaw, query, variables)
stmt, err := b.resourceFilterStmtBuilder.Build(ctx, start, end, qbtypes.RequestTypeRaw, query, variables, qbtypes.NewStatementBuilderOptions())
if err != nil {
return "", nil, err
}

View File

@@ -5,12 +5,12 @@ import (
"testing"
"time"
"github.com/SigNoz/signoz/pkg/flagger/flaggertest"
"github.com/SigNoz/signoz/pkg/instrumentation/instrumentationtest"
"github.com/SigNoz/signoz/pkg/querybuilder"
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes/telemetrytypestest"
"github.com/SigNoz/signoz/pkg/flagger/flaggertest"
"github.com/stretchr/testify/require"
)
@@ -213,7 +213,7 @@ func TestStatementBuilder(t *testing.T) {
for _, testCase := range testCases {
t.Run(testCase.name, func(t *testing.T) {
q, err := statementBuilder.Build(ctx, 1747947419000, 1747983448000, testCase.requestType, testCase.query, nil)
q, err := statementBuilder.Build(ctx, 1747947419000, 1747983448000, testCase.requestType, testCase.query, nil, qbtypes.NewStatementBuilderOptions().WithSkipScalarState())
if testCase.expectedErr != nil {
require.Error(t, err)
require.Contains(t, err.Error(), testCase.expectedErr.Error())

View File

@@ -94,7 +94,7 @@ func TestJSONStmtBuilder_TimeSeries(t *testing.T) {
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
q, err := statementBuilder.Build(context.Background(), 1747947419000, 1747983448000, c.requestType, c.query, nil)
q, err := statementBuilder.Build(context.Background(), 1747947419000, 1747983448000, c.requestType, c.query, nil, qbtypes.NewStatementBuilderOptions())
if c.expectedErrContains != "" {
require.Error(t, err)
@@ -155,7 +155,7 @@ func TestStmtBuilderTimeSeriesBodyGroupByPromoted(t *testing.T) {
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
q, err := statementBuilder.Build(context.Background(), 1747947419000, 1747983448000, c.requestType, c.query, nil)
q, err := statementBuilder.Build(context.Background(), 1747947419000, 1747983448000, c.requestType, c.query, nil, qbtypes.NewStatementBuilderOptions())
if c.expectedErrContains != "" {
require.Error(t, err)
require.Contains(t, err.Error(), c.expectedErrContains)
@@ -313,7 +313,7 @@ func TestJSONStmtBuilder_PrimitivePaths(t *testing.T) {
Signal: telemetrytypes.SignalLogs,
Filter: &qbtypes.Filter{Expression: c.filter},
Limit: 10,
}, nil)
}, nil, qbtypes.NewStatementBuilderOptions())
if c.expectedErr != nil {
require.Error(t, err)
require.Contains(t, err.Error(), c.expectedErr.Error())
@@ -477,7 +477,7 @@ func TestStatementBuilderListQueryBodyPromoted(t *testing.T) {
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
q, err := statementBuilder.Build(context.Background(), 1747947419000, 1747983448000, c.requestType, c.query, nil)
q, err := statementBuilder.Build(context.Background(), 1747947419000, 1747983448000, c.requestType, c.query, nil, qbtypes.NewStatementBuilderOptions())
if c.expectedErr != nil {
require.Error(t, err)
@@ -784,7 +784,7 @@ func TestJSONStmtBuilder_ArrayPaths(t *testing.T) {
Signal: telemetrytypes.SignalLogs,
Filter: &qbtypes.Filter{Expression: c.filter},
Limit: 10,
}, nil)
}, nil, qbtypes.NewStatementBuilderOptions())
if c.expectedErr != nil {
require.Error(t, err)
require.Contains(t, err.Error(), c.expectedErr.Error())
@@ -904,7 +904,7 @@ func TestJSONStmtBuilder_IndexedPaths(t *testing.T) {
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
q, err := statementBuilder.Build(context.Background(), 1747947419000, 1747983448000, qbtypes.RequestTypeRaw, c.query, nil)
q, err := statementBuilder.Build(context.Background(), 1747947419000, 1747983448000, qbtypes.RequestTypeRaw, c.query, nil, qbtypes.NewStatementBuilderOptions())
if c.expectedErr != nil {
require.Error(t, err)
require.Contains(t, err.Error(), c.expectedErr.Error())
@@ -991,7 +991,7 @@ func TestJSONStmtBuilder_SelectField(t *testing.T) {
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
q, err := statementBuilder.Build(context.Background(), 1747947419000, 1747983448000, c.requestType, c.query, nil)
q, err := statementBuilder.Build(context.Background(), 1747947419000, 1747983448000, c.requestType, c.query, nil, qbtypes.NewStatementBuilderOptions())
if c.expectedErrContains != "" {
require.Error(t, err)
require.Contains(t, err.Error(), c.expectedErrContains)
@@ -1068,7 +1068,7 @@ func TestJSONStmtBuilder_OrderBy(t *testing.T) {
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
q, err := statementBuilder.Build(context.Background(), 1747947419000, 1747983448000, c.requestType, c.query, nil)
q, err := statementBuilder.Build(context.Background(), 1747947419000, 1747983448000, c.requestType, c.query, nil, qbtypes.NewStatementBuilderOptions())
if c.expectedErrContains != "" {
require.Error(t, err)
require.Contains(t, err.Error(), c.expectedErrContains)

View File

@@ -78,6 +78,7 @@ func (b *logQueryStatementBuilder) Build(
requestType qbtypes.RequestType,
query qbtypes.QueryBuilderQuery[qbtypes.LogAggregation],
variables map[string]qbtypes.VariableItem,
opts qbtypes.StatementBuilderOptions,
) (*qbtypes.Statement, error) {
start = querybuilder.ToNanoSecs(start)
@@ -103,7 +104,7 @@ func (b *logQueryStatementBuilder) Build(
case qbtypes.RequestTypeTimeSeries:
stmt, err = b.buildTimeSeriesQuery(ctx, q, query, start, end, keys, variables)
case qbtypes.RequestTypeScalar:
stmt, err = b.buildScalarQuery(ctx, q, query, start, end, keys, false, variables)
stmt, err = b.buildScalarQuery(ctx, q, query, start, end, keys, variables, opts)
default:
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "unsupported request type: %s", requestType)
}
@@ -433,7 +434,11 @@ func (b *logQueryStatementBuilder) buildTimeSeriesQuery(
if query.Limit > 0 && len(query.GroupBy) > 0 {
// build the scalar “top/bottom-N” query in its own builder.
cteSB := sqlbuilder.NewSelectBuilder()
cteStmt, err := b.buildScalarQuery(ctx, cteSB, query, start, end, keys, true, variables)
// Limit CTE selects top-N groups by aggregate value, so it
// needs plain aggregates that ORDER BY can sort. State-mode
// SQL emits hex(*State()) blobs that have no numeric
// ordering — skip the state path here.
cteStmt, err := b.buildScalarQuery(ctx, cteSB, query, start, end, keys, variables, qbtypes.NewStatementBuilderOptions().WithSkipResourceCTE().WithSkipScalarState())
if err != nil {
return nil, err
}
@@ -524,8 +529,8 @@ func (b *logQueryStatementBuilder) buildScalarQuery(
query qbtypes.QueryBuilderQuery[qbtypes.LogAggregation],
start, end uint64,
keys map[string][]*telemetrytypes.TelemetryFieldKey,
skipResourceCTE bool,
variables map[string]qbtypes.VariableItem,
opts qbtypes.StatementBuilderOptions,
) (*qbtypes.Statement, error) {
var (
@@ -537,7 +542,7 @@ func (b *logQueryStatementBuilder) buildScalarQuery(
if frag, args, err := b.maybeAttachResourceFilter(ctx, sb, query, start, end, variables); err != nil {
return nil, err
} else if frag != "" && !skipResourceCTE {
} else if frag != "" && !opts.SkipResourceCTE {
cteFragments = append(cteFragments, frag)
cteArgs = append(cteArgs, args)
}
@@ -560,20 +565,29 @@ func (b *logQueryStatementBuilder) buildScalarQuery(
// for scalar queries, the rate would be end-start
rateInterval := (end - start) / querybuilder.NsToSeconds
// Add aggregation
if len(query.Aggregations) > 0 {
for idx := range query.Aggregations {
aggExpr := query.Aggregations[idx]
rewritten, chArgs, err := b.aggExprRewriter.Rewrite(
ctx, start, end, aggExpr.Expression,
rateInterval,
keys,
)
if err != nil {
return nil, err
}
allAggChArgs = append(allAggChArgs, chArgs...)
for idx := range query.Aggregations {
aggExpr := query.Aggregations[idx]
var (
rewritten string
chArgs []any
err error
)
if opts.SkipScalarState {
rewritten, chArgs, err = b.aggExprRewriter.Rewrite(ctx, start, end, aggExpr.Expression, rateInterval, keys)
} else {
rewritten, chArgs, err = b.aggExprRewriter.RewriteWithState(ctx, start, end, aggExpr.Expression, keys)
}
if err != nil {
return nil, err
}
allAggChArgs = append(allAggChArgs, chArgs...)
// clickhouse-go can't decode AggregateFunction(...) columns; wrap
// state-mode aggregates in hex(...) so they come back as String.
// readAsScalarState hex-decodes back to the raw state bytes.
if opts.SkipScalarState {
sb.SelectMore(fmt.Sprintf("%s AS __result_%d", rewritten, idx))
} else {
sb.SelectMore(fmt.Sprintf("hex(%s) AS __result_%d", rewritten, idx))
}
}
@@ -581,7 +595,6 @@ func (b *logQueryStatementBuilder) buildScalarQuery(
// Add filter conditions
preparedWhereClause, err := b.addFilterCondition(ctx, sb, start, end, query, keys, variables)
if err != nil {
return nil, err
}
@@ -589,8 +602,7 @@ func (b *logQueryStatementBuilder) buildScalarQuery(
// Group by dimensions
sb.GroupBy(querybuilder.GroupByKeys(query.GroupBy)...)
// Add having clause if needed
if query.Having != nil && query.Having.Expression != "" {
if query.Having != nil && query.Having.Expression != "" && !opts.SkipHaving {
rewriter := querybuilder.NewHavingExpressionRewriter()
rewrittenExpr, err := rewriter.RewriteForLogs(query.Having.Expression, query.Aggregations)
if err != nil {
@@ -599,24 +611,21 @@ func (b *logQueryStatementBuilder) buildScalarQuery(
sb.Having(rewrittenExpr)
}
// Add order by
for _, orderBy := range query.Order {
idx, ok := aggOrderBy(orderBy, query)
if ok {
sb.OrderBy(fmt.Sprintf("__result_%d %s", idx, orderBy.Direction.StringValue()))
} else {
sb.OrderBy(fmt.Sprintf("`%s` %s", orderBy.Key.Name, orderBy.Direction.StringValue()))
if opts.SkipScalarState {
for _, orderBy := range query.Order {
idx, ok := aggOrderBy(orderBy, query)
if ok {
sb.OrderBy(fmt.Sprintf("__result_%d %s", idx, orderBy.Direction.StringValue()))
} else {
sb.OrderBy(fmt.Sprintf("`%s` %s", orderBy.Key.Name, orderBy.Direction.StringValue()))
}
}
if len(query.Order) == 0 {
sb.OrderBy("__result_0 DESC")
}
if query.Limit > 0 {
sb.Limit(query.Limit)
}
}
// if there is no order by, then use the __result_0 as the order by
if len(query.Order) == 0 {
sb.OrderBy("__result_0 DESC")
}
// Add limit and offset
if query.Limit > 0 {
sb.Limit(query.Limit)
}
combinedArgs := append(allGroupByArgs, allAggChArgs...)
@@ -741,5 +750,6 @@ func (b *logQueryStatementBuilder) buildResourceFilterCTE(
qbtypes.RequestTypeRaw,
query,
variables,
qbtypes.NewStatementBuilderOptions(),
)
}

View File

@@ -217,7 +217,7 @@ func TestStatementBuilderTimeSeries(t *testing.T) {
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
q, err := statementBuilder.Build(ctx, c.startTs, c.endTs, c.requestType, c.query, nil)
q, err := statementBuilder.Build(ctx, c.startTs, c.endTs, c.requestType, c.query, nil, qbtypes.NewStatementBuilderOptions())
if c.expectedErr != nil {
require.Error(t, err)
@@ -340,7 +340,7 @@ func TestStatementBuilderListQuery(t *testing.T) {
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
q, err := statementBuilder.Build(ctx, 1747947419000, 1747983448000, c.requestType, c.query, nil)
q, err := statementBuilder.Build(ctx, 1747947419000, 1747983448000, c.requestType, c.query, nil, qbtypes.NewStatementBuilderOptions())
if c.expectedErr != nil {
require.Error(t, err)
@@ -482,7 +482,7 @@ func TestStatementBuilderListQueryResourceTests(t *testing.T) {
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
q, err := statementBuilder.Build(ctx, 1747947419000, 1747983448000, c.requestType, c.query, nil)
q, err := statementBuilder.Build(ctx, 1747947419000, 1747983448000, c.requestType, c.query, nil, qbtypes.NewStatementBuilderOptions())
if c.expectedErr != nil {
require.Error(t, err)
@@ -558,7 +558,7 @@ func TestStatementBuilderTimeSeriesBodyGroupBy(t *testing.T) {
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
q, err := statementBuilder.Build(ctx, 1747947419000, 1747983448000, c.requestType, c.query, nil)
q, err := statementBuilder.Build(ctx, 1747947419000, 1747983448000, c.requestType, c.query, nil, qbtypes.NewStatementBuilderOptions())
if c.expectedErrContains != "" {
require.Error(t, err)
@@ -653,7 +653,7 @@ func TestStatementBuilderListQueryServiceCollision(t *testing.T) {
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
q, err := statementBuilder.Build(ctx, 1747947419000, 1747983448000, c.requestType, c.query, nil)
q, err := statementBuilder.Build(ctx, 1747947419000, 1747983448000, c.requestType, c.query, nil, qbtypes.NewStatementBuilderOptions())
if c.expectedErr != nil {
require.Error(t, err)
@@ -1019,7 +1019,7 @@ func TestStmtBuilderBodyField(t *testing.T) {
fl,
)
q, err := statementBuilder.Build(context.Background(), 1747947419000, 1747983448000, c.requestType, c.query, nil)
q, err := statementBuilder.Build(context.Background(), 1747947419000, 1747983448000, c.requestType, c.query, nil, qbtypes.NewStatementBuilderOptions())
if c.expectedErr != nil {
require.Error(t, err)
require.Contains(t, err.Error(), c.expectedErr.Error())
@@ -1118,7 +1118,7 @@ func TestStmtBuilderBodyFullTextSearch(t *testing.T) {
fl,
)
q, err := statementBuilder.Build(context.Background(), 1747947419000, 1747983448000, c.requestType, c.query, nil)
q, err := statementBuilder.Build(context.Background(), 1747947419000, 1747983448000, c.requestType, c.query, nil, qbtypes.NewStatementBuilderOptions())
if c.expectedErr != nil {
require.Error(t, err)
require.Contains(t, err.Error(), c.expectedErr.Error())

View File

@@ -50,6 +50,7 @@ func (b *meterQueryStatementBuilder) Build(
_ qbtypes.RequestType,
query qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation],
variables map[string]qbtypes.VariableItem,
_ qbtypes.StatementBuilderOptions,
) (*qbtypes.Statement, error) {
keySelectors := telemetrymetrics.GetKeySelectors(query)
keys, _, err := b.metadataStore.GetKeysMulti(ctx, keySelectors)

View File

@@ -181,7 +181,7 @@ func TestStatementBuilder(t *testing.T) {
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
q, err := statementBuilder.Build(context.Background(), 1747947419000, 1747983448000, c.requestType, c.query, nil)
q, err := statementBuilder.Build(context.Background(), 1747947419000, 1747983448000, c.requestType, c.query, nil, qbtypes.NewStatementBuilderOptions())
if c.expectedErr != nil {
require.Error(t, err)

View File

@@ -94,6 +94,7 @@ func (b *MetricQueryStatementBuilder) Build(
_ qbtypes.RequestType,
query qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation],
variables map[string]qbtypes.VariableItem,
_ qbtypes.StatementBuilderOptions,
) (*qbtypes.Statement, error) {
keySelectors := GetKeySelectors(query)
keys, _, err := b.metadataStore.GetKeysMulti(ctx, keySelectors)

View File

@@ -251,7 +251,7 @@ func TestStatementBuilder(t *testing.T) {
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
q, err := statementBuilder.Build(context.Background(), 1747947419000, 1747983448000, c.requestType, c.query, nil)
q, err := statementBuilder.Build(context.Background(), 1747947419000, 1747983448000, c.requestType, c.query, nil, qbtypes.NewStatementBuilderOptions())
if c.expectedErr != nil {
require.Error(t, err)

View File

@@ -97,6 +97,7 @@ func (b *resourceFilterStatementBuilder[T]) Build(
requestType qbtypes.RequestType,
query qbtypes.QueryBuilderQuery[T],
variables map[string]qbtypes.VariableItem,
_ qbtypes.StatementBuilderOptions,
) (*qbtypes.Statement, error) {
q := sqlbuilder.NewSelectBuilder()
q.Select("fingerprint")

View File

@@ -367,7 +367,7 @@ func TestResourceFilterStatementBuilder_Traces(t *testing.T) {
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
stmt, err := builder.Build(context.Background(), c.start, c.end, qbtypes.RequestTypeTimeSeries, c.query, nil)
stmt, err := builder.Build(context.Background(), c.start, c.end, qbtypes.RequestTypeTimeSeries, c.query, nil, qbtypes.NewStatementBuilderOptions())
if c.expectedErr != nil {
require.Error(t, err)
@@ -561,7 +561,7 @@ func TestResourceFilterStatementBuilder_Logs(t *testing.T) {
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
stmt, err := builder.Build(context.Background(), c.start, c.end, qbtypes.RequestTypeTimeSeries, c.query, nil)
stmt, err := builder.Build(context.Background(), c.start, c.end, qbtypes.RequestTypeTimeSeries, c.query, nil, qbtypes.NewStatementBuilderOptions())
if c.expectedErr != nil {
require.Error(t, err)
@@ -629,7 +629,7 @@ func TestResourceFilterStatementBuilder_Variables(t *testing.T) {
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
stmt, err := builder.Build(context.Background(), c.start, c.end, qbtypes.RequestTypeTimeSeries, c.query, c.variables)
stmt, err := builder.Build(context.Background(), c.start, c.end, qbtypes.RequestTypeTimeSeries, c.query, c.variables, qbtypes.NewStatementBuilderOptions())
if c.expectedErr != nil {
require.Error(t, err)

View File

@@ -77,6 +77,7 @@ func (b *traceQueryStatementBuilder) Build(
requestType qbtypes.RequestType,
query qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation],
variables map[string]qbtypes.VariableItem,
opts qbtypes.StatementBuilderOptions,
) (*qbtypes.Statement, error) {
start = querybuilder.ToNanoSecs(start)
@@ -135,7 +136,7 @@ func (b *traceQueryStatementBuilder) Build(
case qbtypes.RequestTypeTimeSeries:
return b.buildTimeSeriesQuery(ctx, q, query, start, end, keys, variables)
case qbtypes.RequestTypeScalar:
return b.buildScalarQuery(ctx, q, query, start, end, keys, variables, false, false)
return b.buildScalarQuery(ctx, q, query, start, end, keys, variables, opts)
case qbtypes.RequestTypeTrace:
return b.buildTraceQuery(ctx, q, query, start, end, keys, variables)
}
@@ -550,7 +551,11 @@ func (b *traceQueryStatementBuilder) buildTimeSeriesQuery(
if query.Limit > 0 && len(query.GroupBy) > 0 {
// build the scalar “top/bottom-N” query in its own builder.
cteSB := sqlbuilder.NewSelectBuilder()
cteStmt, err := b.buildScalarQuery(ctx, cteSB, query, start, end, keys, variables, true, true)
// Limit CTE selects top-N groups by aggregate value, so it
// needs plain aggregates that ORDER BY can sort. State-mode
// SQL emits hex(*State()) blobs that have no numeric
// ordering — skip the state path here.
cteStmt, err := b.buildScalarQuery(ctx, cteSB, query, start, end, keys, variables, qbtypes.NewStatementBuilderOptions().WithSkipResourceCTE().WithSkipHaving().WithSkipScalarState())
if err != nil {
return nil, err
}
@@ -641,8 +646,7 @@ func (b *traceQueryStatementBuilder) buildScalarQuery(
start, end uint64,
keys map[string][]*telemetrytypes.TelemetryFieldKey,
variables map[string]qbtypes.VariableItem,
skipResourceCTE bool,
skipHaving bool,
opts qbtypes.StatementBuilderOptions,
) (*qbtypes.Statement, error) {
var (
@@ -652,7 +656,7 @@ func (b *traceQueryStatementBuilder) buildScalarQuery(
if frag, args, err := b.maybeAttachResourceFilter(ctx, sb, query, start, end, variables); err != nil {
return nil, err
} else if frag != "" && !skipResourceCTE {
} else if frag != "" && !opts.SkipResourceCTE {
cteFragments = append(cteFragments, frag)
cteArgs = append(cteArgs, args)
}
@@ -674,19 +678,29 @@ func (b *traceQueryStatementBuilder) buildScalarQuery(
rateInterval := (end - start) / querybuilder.NsToSeconds
// Add aggregation
if len(query.Aggregations) > 0 {
for idx := range query.Aggregations {
aggExpr := query.Aggregations[idx]
rewritten, chArgs, err := b.aggExprRewriter.Rewrite(
ctx, start, end, aggExpr.Expression,
rateInterval,
keys,
)
if err != nil {
return nil, err
}
allAggChArgs = append(allAggChArgs, chArgs...)
for idx := range query.Aggregations {
aggExpr := query.Aggregations[idx]
var (
rewritten string
chArgs []any
err error
)
if opts.SkipScalarState {
rewritten, chArgs, err = b.aggExprRewriter.Rewrite(ctx, start, end, aggExpr.Expression, rateInterval, keys)
} else {
rewritten, chArgs, err = b.aggExprRewriter.RewriteWithState(ctx, start, end, aggExpr.Expression, keys)
}
if err != nil {
return nil, err
}
allAggChArgs = append(allAggChArgs, chArgs...)
// clickhouse-go can't decode AggregateFunction(...) columns; wrap
// state-mode aggregates in hex(...) so they come back as String.
// readAsScalarState hex-decodes back to the raw state bytes.
if opts.SkipScalarState {
sb.SelectMore(fmt.Sprintf("%s AS __result_%d", rewritten, idx))
} else {
sb.SelectMore(fmt.Sprintf("hex(%s) AS __result_%d", rewritten, idx))
}
}
@@ -703,7 +717,7 @@ func (b *traceQueryStatementBuilder) buildScalarQuery(
sb.GroupBy(querybuilder.GroupByKeys(query.GroupBy)...)
// Add having clause if needed
if query.Having != nil && query.Having.Expression != "" && !skipHaving {
if query.Having != nil && query.Having.Expression != "" && !opts.SkipHaving {
rewriter := querybuilder.NewHavingExpressionRewriter()
rewrittenExpr, err := rewriter.RewriteForTraces(query.Having.Expression, query.Aggregations)
if err != nil {
@@ -712,24 +726,23 @@ func (b *traceQueryStatementBuilder) buildScalarQuery(
sb.Having(rewrittenExpr)
}
// Add order by
for _, orderBy := range query.Order {
idx, ok := aggOrderBy(orderBy, query)
if ok {
sb.OrderBy(fmt.Sprintf("__result_%d %s", idx, orderBy.Direction.StringValue()))
} else {
sb.OrderBy(fmt.Sprintf("`%s` %s", orderBy.Key.Name, orderBy.Direction.StringValue()))
// State-mode aggregates produce AggregateFunction blobs that have
// no meaningful numeric ordering; skip ORDER BY / LIMIT entirely.
if opts.SkipScalarState {
for _, orderBy := range query.Order {
idx, ok := aggOrderBy(orderBy, query)
if ok {
sb.OrderBy(fmt.Sprintf("__result_%d %s", idx, orderBy.Direction.StringValue()))
} else {
sb.OrderBy(fmt.Sprintf("`%s` %s", orderBy.Key.Name, orderBy.Direction.StringValue()))
}
}
if len(query.Order) == 0 {
sb.OrderBy("__result_0 DESC")
}
if query.Limit > 0 {
sb.Limit(query.Limit)
}
}
// if there is no order by, then use the __result_0 as the order by
if len(query.Order) == 0 {
sb.OrderBy("__result_0 DESC")
}
// Add limit and offset
if query.Limit > 0 {
sb.Limit(query.Limit)
}
combinedArgs := append(allGroupByArgs, allAggChArgs...)
@@ -842,5 +855,6 @@ func (b *traceQueryStatementBuilder) buildResourceFilterCTE(
qbtypes.RequestTypeRaw,
query,
variables,
qbtypes.NewStatementBuilderOptions(),
)
}

View File

@@ -378,7 +378,7 @@ func TestStatementBuilder(t *testing.T) {
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
q, err := statementBuilder.Build(context.Background(), 1747947419000, 1747983448000, c.requestType, c.query, vars)
q, err := statementBuilder.Build(context.Background(), 1747947419000, 1747983448000, c.requestType, c.query, vars, qbtypes.NewStatementBuilderOptions())
if c.expectedErr != nil {
require.Error(t, err)
@@ -667,7 +667,7 @@ func TestStatementBuilderListQuery(t *testing.T) {
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
q, err := statementBuilder.Build(context.Background(), 1747947419000, 1747983448000, c.requestType, c.query, nil)
q, err := statementBuilder.Build(context.Background(), 1747947419000, 1747983448000, c.requestType, c.query, nil, qbtypes.NewStatementBuilderOptions())
if c.expectedErr != nil {
require.Error(t, err)
@@ -773,7 +773,7 @@ func TestStatementBuilderListQueryWithCorruptData(t *testing.T) {
fl,
)
q, err := statementBuilder.Build(context.Background(), 1747947419000, 1747983448000, c.requestType, c.query, nil)
q, err := statementBuilder.Build(context.Background(), 1747947419000, 1747983448000, c.requestType, c.query, nil, qbtypes.NewStatementBuilderOptions())
if c.expectedErr != nil {
require.Error(t, err)
@@ -928,7 +928,7 @@ func TestStatementBuilderTraceQuery(t *testing.T) {
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
q, err := statementBuilder.Build(context.Background(), 1747947419000, 1747983448000, c.requestType, c.query, nil)
q, err := statementBuilder.Build(context.Background(), 1747947419000, 1747983448000, c.requestType, c.query, nil, qbtypes.NewStatementBuilderOptions())
if c.expectedErr != nil {
require.Error(t, err)

View File

@@ -144,6 +144,7 @@ func (b *traceOperatorCTEBuilder) buildResourceFilterCTE(ctx context.Context, qu
qbtypes.RequestTypeRaw,
query,
nil,
qbtypes.NewStatementBuilderOptions(),
)
}
@@ -413,21 +414,18 @@ func (b *traceOperatorCTEBuilder) buildFinalQuery(ctx context.Context, selectFro
}
func (b *traceOperatorCTEBuilder) buildListQuery(ctx context.Context, selectFromCTE string) (*qbtypes.Statement, error) {
keySelectors := b.getKeySelectors()
for _, field := range b.operator.SelectFields {
keySelectors = append(keySelectors, &telemetrytypes.FieldKeySelector{
Name: field.Name,
Signal: telemetrytypes.SignalTraces,
FieldContext: field.FieldContext,
FieldDataType: field.FieldDataType,
})
}
keys, _, err := b.stmtBuilder.metadataStore.GetKeysMulti(ctx, keySelectors)
if err != nil {
return nil, err
}
sb := sqlbuilder.NewSelectBuilder()
// Select core fields
sb.Select(
"timestamp",
"trace_id",
"span_id",
"name",
"duration_nano",
"parent_span_id",
)
coreFields := []string{"trace_id", "span_id", "name", "duration_nano", "parent_span_id"}
selectedFields := map[string]bool{
"timestamp": true,
"trace_id": true,
@@ -437,15 +435,23 @@ func (b *traceOperatorCTEBuilder) buildListQuery(ctx context.Context, selectFrom
"parent_span_id": true,
}
// Inner SELECT reads from the CTE and renames timestamp→ts.
// This breaks the `ORDER BY col AS `col`` pattern that triggers a
// CH 25.12.5 distributed-analyzer regression (NOT_FOUND_COLUMN_IN_BLOCK /
// timestamp renamed to timestamp_0). See ClickHouse/ClickHouse#103508.
innerSB := sqlbuilder.NewSelectBuilder()
innerSB.Select("timestamp AS ts")
innerSB.SelectMore(coreFields...)
// Get keys for selectFields
keySelectors := b.getKeySelectors()
for _, field := range b.operator.SelectFields {
keySelectors = append(keySelectors, &telemetrytypes.FieldKeySelector{
Name: field.Name,
Signal: telemetrytypes.SignalTraces,
FieldContext: field.FieldContext,
FieldDataType: field.FieldDataType,
})
}
var additionalSelectedFields []string
keys, _, err := b.stmtBuilder.metadataStore.GetKeysMulti(ctx, keySelectors)
if err != nil {
return nil, err
}
// Add selectFields using ColumnExpressionFor since we now have all base table columns
for _, field := range b.operator.SelectFields {
if selectedFields[field.Name] {
continue
@@ -456,60 +462,41 @@ func (b *traceOperatorCTEBuilder) buildListQuery(ctx context.Context, selectFrom
slog.String("field", field.Name), errors.Attr(err))
continue
}
innerSB.SelectMore(colExpr)
sb.SelectMore(colExpr)
selectedFields[field.Name] = true
additionalSelectedFields = append(additionalSelectedFields, field.Name)
}
// Also expose any explicit ORDER BY fields that aren't already selected,
// so the outer query can reference them by alias name.
sb.From(selectFromCTE)
// Add order by support using ColumnExpressionFor
orderApplied := false
for _, orderBy := range b.operator.Order {
if selectedFields[orderBy.Key.Name] {
continue
}
colExpr, err := b.stmtBuilder.fm.ColumnExpressionFor(ctx, b.start, b.end, &orderBy.Key.TelemetryFieldKey, keys)
if err != nil {
return nil, err
}
innerSB.SelectMore(colExpr)
selectedFields[orderBy.Key.Name] = true
sb.OrderBy(fmt.Sprintf("%s %s", colExpr, orderBy.Direction.StringValue()))
orderApplied = true
}
innerSB.From(selectFromCTE)
innerSQL, innerArgs := innerSB.BuildWithFlavor(sqlbuilder.ClickHouse)
// Outer SELECT reads from the inner subquery and re-exposes timestamp via
// the ts alias. ORDER BY uses the alias name directly — no AS-alias in the
// ORDER BY position — which is the pattern that avoids the CH regression.
outerSB := sqlbuilder.NewSelectBuilder()
outerSB.Select("ts AS timestamp")
outerSB.SelectMore(coreFields...)
for _, name := range additionalSelectedFields {
outerSB.SelectMore(fmt.Sprintf("`%s`", name))
}
outerSB.From(fmt.Sprintf("(%s) AS t", innerSQL))
if len(b.operator.Order) > 0 {
for _, orderBy := range b.operator.Order {
outerSB.OrderBy(fmt.Sprintf("`%s` %s", orderBy.Key.Name, orderBy.Direction.StringValue()))
}
} else {
outerSB.OrderBy("timestamp DESC")
if !orderApplied {
sb.OrderBy("timestamp DESC")
}
if b.operator.Limit > 0 {
outerSB.Limit(b.operator.Limit)
sb.Limit(b.operator.Limit)
} else {
outerSB.Limit(100)
}
if b.operator.Offset > 0 {
outerSB.Offset(b.operator.Offset)
sb.Limit(100)
}
outerSQL, outerArgs := outerSB.BuildWithFlavor(sqlbuilder.ClickHouse)
if b.operator.Offset > 0 {
sb.Offset(b.operator.Offset)
}
sql, args := sb.BuildWithFlavor(sqlbuilder.ClickHouse)
return &qbtypes.Statement{
Query: outerSQL,
Args: append(innerArgs, outerArgs...),
Query: sql,
Args: args,
}, nil
}

View File

@@ -67,7 +67,7 @@ func TestTraceOperatorStatementBuilder(t *testing.T) {
},
},
expected: qbtypes.Statement{
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_DIR_DESC_B AS (SELECT p.* FROM A AS p INNER JOIN B AS c ON p.trace_id = c.trace_id AND p.span_id = c.parent_span_id) SELECT ts AS timestamp, trace_id, span_id, name, duration_nano, parent_span_id, `service.name` FROM (SELECT timestamp AS ts, trace_id, span_id, name, duration_nano, parent_span_id, multiIf(resource.`service.name` IS NOT NULL, resource.`service.name`::String, mapContains(resources_string, 'service.name'), resources_string['service.name'], NULL) AS `service.name` FROM A_DIR_DESC_B) AS t ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_DIR_DESC_B AS (SELECT p.* FROM A AS p INNER JOIN B AS c ON p.trace_id = c.trace_id AND p.span_id = c.parent_span_id) SELECT timestamp, trace_id, span_id, name, duration_nano, parent_span_id, multiIf(resource.`service.name` IS NOT NULL, resource.`service.name`::String, mapContains(resources_string, 'service.name'), resources_string['service.name'], NULL) AS `service.name` FROM A_DIR_DESC_B ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Args: []any{"1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "frontend", "%service.name%", "%service.name\":\"frontend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "backend", "%service.name%", "%service.name\":\"backend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 10},
},
expectedErr: nil,
@@ -104,7 +104,7 @@ func TestTraceOperatorStatementBuilder(t *testing.T) {
},
},
expected: qbtypes.Statement{
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_INDIR_DESC_B AS (WITH RECURSIVE up AS (SELECT d.trace_id, d.span_id, d.parent_span_id, 0 AS depth FROM B AS d UNION ALL SELECT p.trace_id, p.span_id, p.parent_span_id, up.depth + 1 FROM all_spans AS p JOIN up ON p.trace_id = up.trace_id AND p.span_id = up.parent_span_id WHERE up.depth < 100) SELECT DISTINCT a.* FROM A AS a GLOBAL INNER JOIN (SELECT DISTINCT trace_id, span_id FROM up WHERE depth > 0 ) AS ancestors ON ancestors.trace_id = a.trace_id AND ancestors.span_id = a.span_id) SELECT ts AS timestamp, trace_id, span_id, name, duration_nano, parent_span_id FROM (SELECT timestamp AS ts, trace_id, span_id, name, duration_nano, parent_span_id FROM A_INDIR_DESC_B) AS t ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_INDIR_DESC_B AS (WITH RECURSIVE up AS (SELECT d.trace_id, d.span_id, d.parent_span_id, 0 AS depth FROM B AS d UNION ALL SELECT p.trace_id, p.span_id, p.parent_span_id, up.depth + 1 FROM all_spans AS p JOIN up ON p.trace_id = up.trace_id AND p.span_id = up.parent_span_id WHERE up.depth < 100) SELECT DISTINCT a.* FROM A AS a GLOBAL INNER JOIN (SELECT DISTINCT trace_id, span_id FROM up WHERE depth > 0 ) AS ancestors ON ancestors.trace_id = a.trace_id AND ancestors.span_id = a.span_id) SELECT timestamp, trace_id, span_id, name, duration_nano, parent_span_id FROM A_INDIR_DESC_B ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Args: []any{"1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "gateway", "%service.name%", "%service.name\":\"gateway%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "database", "%service.name%", "%service.name\":\"database%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 5},
},
expectedErr: nil,
@@ -141,7 +141,7 @@ func TestTraceOperatorStatementBuilder(t *testing.T) {
},
},
expected: qbtypes.Statement{
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_AND_B AS (SELECT l.* FROM A AS l INNER JOIN B AS r ON l.trace_id = r.trace_id) SELECT ts AS timestamp, trace_id, span_id, name, duration_nano, parent_span_id FROM (SELECT timestamp AS ts, trace_id, span_id, name, duration_nano, parent_span_id FROM A_AND_B) AS t ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_AND_B AS (SELECT l.* FROM A AS l INNER JOIN B AS r ON l.trace_id = r.trace_id) SELECT timestamp, trace_id, span_id, name, duration_nano, parent_span_id FROM A_AND_B ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Args: []any{"1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "frontend", "%service.name%", "%service.name\":\"frontend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "backend", "%service.name%", "%service.name\":\"backend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 15},
},
expectedErr: nil,
@@ -178,7 +178,7 @@ func TestTraceOperatorStatementBuilder(t *testing.T) {
},
},
expected: qbtypes.Statement{
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_OR_B AS (SELECT * FROM A UNION DISTINCT SELECT * FROM B) SELECT ts AS timestamp, trace_id, span_id, name, duration_nano, parent_span_id FROM (SELECT timestamp AS ts, trace_id, span_id, name, duration_nano, parent_span_id FROM A_OR_B) AS t ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_OR_B AS (SELECT * FROM A UNION DISTINCT SELECT * FROM B) SELECT timestamp, trace_id, span_id, name, duration_nano, parent_span_id FROM A_OR_B ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Args: []any{"1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "frontend", "%service.name%", "%service.name\":\"frontend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "backend", "%service.name%", "%service.name\":\"backend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 20},
},
expectedErr: nil,
@@ -215,7 +215,7 @@ func TestTraceOperatorStatementBuilder(t *testing.T) {
},
},
expected: qbtypes.Statement{
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_not_B AS (SELECT l.* FROM A AS l WHERE l.trace_id GLOBAL NOT IN (SELECT DISTINCT trace_id FROM B)) SELECT ts AS timestamp, trace_id, span_id, name, duration_nano, parent_span_id FROM (SELECT timestamp AS ts, trace_id, span_id, name, duration_nano, parent_span_id FROM A_not_B) AS t ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_not_B AS (SELECT l.* FROM A AS l WHERE l.trace_id GLOBAL NOT IN (SELECT DISTINCT trace_id FROM B)) SELECT timestamp, trace_id, span_id, name, duration_nano, parent_span_id FROM A_not_B ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Args: []any{"1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "frontend", "%service.name%", "%service.name\":\"frontend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "backend", "%service.name%", "%service.name\":\"backend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 10},
},
expectedErr: nil,
@@ -380,72 +380,11 @@ func TestTraceOperatorStatementBuilder(t *testing.T) {
},
},
expected: qbtypes.Statement{
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_DIR_DESC_B AS (SELECT p.* FROM A AS p INNER JOIN B AS c ON p.trace_id = c.trace_id AND p.span_id = c.parent_span_id), __resource_filter_C AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), C AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_C) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_D AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), D AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_D) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), C_DIR_DESC_D AS (SELECT p.* FROM C AS p INNER JOIN D AS c ON p.trace_id = c.trace_id AND p.span_id = c.parent_span_id), A_DIR_DESC_B_AND_C_DIR_DESC_D AS (SELECT l.* FROM A_DIR_DESC_B AS l INNER JOIN C_DIR_DESC_D AS r ON l.trace_id = r.trace_id) SELECT ts AS timestamp, trace_id, span_id, name, duration_nano, parent_span_id FROM (SELECT timestamp AS ts, trace_id, span_id, name, duration_nano, parent_span_id FROM A_DIR_DESC_B_AND_C_DIR_DESC_D) AS t ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_DIR_DESC_B AS (SELECT p.* FROM A AS p INNER JOIN B AS c ON p.trace_id = c.trace_id AND p.span_id = c.parent_span_id), __resource_filter_C AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), C AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_C) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_D AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), D AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_D) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), C_DIR_DESC_D AS (SELECT p.* FROM C AS p INNER JOIN D AS c ON p.trace_id = c.trace_id AND p.span_id = c.parent_span_id), A_DIR_DESC_B_AND_C_DIR_DESC_D AS (SELECT l.* FROM A_DIR_DESC_B AS l INNER JOIN C_DIR_DESC_D AS r ON l.trace_id = r.trace_id) SELECT timestamp, trace_id, span_id, name, duration_nano, parent_span_id FROM A_DIR_DESC_B_AND_C_DIR_DESC_D ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Args: []any{"1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "frontend", "%service.name%", "%service.name\":\"frontend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "backend", "%service.name%", "%service.name\":\"backend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "auth", "%service.name%", "%service.name\":\"auth%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "database", "%service.name%", "%service.name\":\"database%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 5},
},
expectedErr: nil,
},
{
// order-by field (http.request.method) is not present in SelectFields;
// it must be included in the inner SELECT so the outer ORDER BY can
// reference it by alias, but must NOT appear in the outer SELECT list.
name: "order by field not in select fields",
requestType: qbtypes.RequestTypeRaw,
operator: qbtypes.QueryBuilderTraceOperator{
Expression: "A => B",
SelectFields: []telemetrytypes.TelemetryFieldKey{
{
Name: "service.name",
FieldContext: telemetrytypes.FieldContextResource,
FieldDataType: telemetrytypes.FieldDataTypeString,
},
},
Order: []qbtypes.OrderBy{
{
Key: qbtypes.OrderByKey{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: "http.request.method",
FieldContext: telemetrytypes.FieldContextAttribute,
FieldDataType: telemetrytypes.FieldDataTypeString,
},
},
Direction: qbtypes.OrderDirectionDesc,
},
},
Limit: 10,
},
compositeQuery: &qbtypes.CompositeQuery{
Queries: []qbtypes.QueryEnvelope{
{
Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation]{
Name: "A",
Signal: telemetrytypes.SignalTraces,
Filter: &qbtypes.Filter{
Expression: "service.name = 'frontend'",
},
},
},
{
Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation]{
Name: "B",
Signal: telemetrytypes.SignalTraces,
Filter: &qbtypes.Filter{
Expression: "service.name = 'backend'",
},
},
},
},
},
expected: qbtypes.Statement{
// http.request.method is in the inner SELECT (so ORDER BY can reach it)
// but is absent from the outer SELECT column list — only the ORDER BY clause references it.
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_DIR_DESC_B AS (SELECT p.* FROM A AS p INNER JOIN B AS c ON p.trace_id = c.trace_id AND p.span_id = c.parent_span_id) SELECT ts AS timestamp, trace_id, span_id, name, duration_nano, parent_span_id, `service.name` FROM (SELECT timestamp AS ts, trace_id, span_id, name, duration_nano, parent_span_id, multiIf(resource.`service.name` IS NOT NULL, resource.`service.name`::String, mapContains(resources_string, 'service.name'), resources_string['service.name'], NULL) AS `service.name`, attributes_string['http.request.method'] AS `http.request.method` FROM A_DIR_DESC_B) AS t ORDER BY `http.request.method` desc LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Args: []any{"1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "frontend", "%service.name%", "%service.name\":\"frontend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "backend", "%service.name%", "%service.name\":\"backend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 10},
},
expectedErr: nil,
},
}
fm := NewFieldMapper()

View File

@@ -5,13 +5,13 @@ import (
"strings"
"testing"
"github.com/SigNoz/signoz/pkg/flagger/flaggertest"
"github.com/SigNoz/signoz/pkg/instrumentation/instrumentationtest"
"github.com/SigNoz/signoz/pkg/querybuilder"
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes/telemetrytypestest"
"github.com/stretchr/testify/assert"
"github.com/SigNoz/signoz/pkg/flagger/flaggertest"
"github.com/stretchr/testify/require"
)
@@ -115,6 +115,7 @@ func TestTraceTimeRangeOptimization(t *testing.T) {
qbtypes.RequestTypeRaw,
tt.query,
nil,
qbtypes.NewStatementBuilderOptions(),
)
require.NoError(t, err)

View File

@@ -176,6 +176,44 @@ func (q *QueryBuilderQuery[T]) Normalize() {
}
type BuilderQueryOptions struct {
UseScalarState bool
}
func (o BuilderQueryOptions) WithUseScalarState() BuilderQueryOptions {
o.UseScalarState = true
return o
}
func NewBuilderQueryOptions() BuilderQueryOptions {
return BuilderQueryOptions{}
}
type StatementBuilderOptions struct {
SkipResourceCTE bool
SkipHaving bool
SkipScalarState bool
}
func NewStatementBuilderOptions() StatementBuilderOptions {
return StatementBuilderOptions{}
}
func (o StatementBuilderOptions) WithSkipResourceCTE() StatementBuilderOptions {
o.SkipResourceCTE = true
return o
}
func (o StatementBuilderOptions) WithSkipHaving() StatementBuilderOptions {
o.SkipHaving = true
return o
}
func (o StatementBuilderOptions) WithSkipScalarState() StatementBuilderOptions {
o.SkipScalarState = true
return o
}
// Fastpath (no fingerprint grouping)
// canShortCircuitDelta returns true if we can use the optimized query
// for the given query

View File

@@ -39,6 +39,7 @@ type AggExprRewriter interface {
// Rewrite rewrites the aggregation expression to be used in the query.
Rewrite(ctx context.Context, startNs, endNs uint64, expr string, rateInterval uint64, keys map[string][]*telemetrytypes.TelemetryFieldKey) (string, []any, error)
RewriteMulti(ctx context.Context, startNs, endNs uint64, exprs []string, rateInterval uint64, keys map[string][]*telemetrytypes.TelemetryFieldKey) ([]string, [][]any, error)
RewriteWithState(ctx context.Context, startNs, endNs uint64, expr string, keys map[string][]*telemetrytypes.TelemetryFieldKey) (string, []any, error)
}
type Statement struct {
@@ -51,7 +52,7 @@ type Statement struct {
// StatementBuilder builds the query.
type StatementBuilder[T any] interface {
// Build builds the query.
Build(ctx context.Context, start, end uint64, requestType RequestType, query QueryBuilderQuery[T], variables map[string]VariableItem) (*Statement, error)
Build(ctx context.Context, start, end uint64, requestType RequestType, query QueryBuilderQuery[T], variables map[string]VariableItem, opts StatementBuilderOptions) (*Statement, error)
}
type TraceOperatorStatementBuilder interface {

View File

@@ -10,6 +10,10 @@ type Query interface {
// Fingerprint must return a deterministic key that uniquely identifies
// (query-text, params, step, etc..) but *not* the time range.
Fingerprint() string
// Cacheable reports whether this query should be routed through the
// bucket cache. Independent of Fingerprint so the fingerprint
// remains a pure identity, not a cacheability decision.
IsCacheable() bool
// Window returns [from, to) in epochms so cache can slice/merge.
Window() (startMS, endMS uint64)
// Execute runs the query; implementors must be sideeffectfree.
@@ -22,6 +26,7 @@ type Result struct {
Stats ExecStats
Warnings []string
WarningsDocURL string
IsNotCacheable bool
}
type ExecStats struct {

View File

@@ -321,6 +321,20 @@ func sanitizeValue(v any) any {
return sanitizeValue(rv.Elem().Interface())
case reflect.Struct:
return v
case reflect.Float32, reflect.Float64:
// Catches named float types (e.g. `type Duration float64`) that
// the type-assertion fast-paths above don't match. Without
// this, a NaN of a named type leaks through and crashes
// json.Marshal at the top level.
f := rv.Float()
if math.IsNaN(f) {
return "NaN"
} else if math.IsInf(f, 1) {
return "Inf"
} else if math.IsInf(f, -1) {
return "-Inf"
}
return roundToNonZeroDecimals(f, 3)
default:
return v
}

View File

@@ -0,0 +1,122 @@
package querybuildertypesv5
import (
"math"
"slices"
"strings"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
)
// ApplyScalarLimit applies ordering and limit to scalar (tabular) data.
// It sorts the rows based on the provided order criteria and truncates to limit.
func ApplyScalarLimit(scalar *ScalarData, orderBy []OrderBy, limit int) {
if len(scalar.Data) == 0 {
return
}
effectiveOrderBy := orderBy
if len(effectiveOrderBy) == 0 {
effectiveOrderBy = []OrderBy{
{
Key: OrderByKey{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: DefaultOrderByKey,
FieldDataType: telemetrytypes.FieldDataTypeFloat64,
},
},
Direction: OrderDirectionDesc,
},
}
}
// Build column name -> row index map
colIndex := make(map[string]int, len(scalar.Columns))
for i, col := range scalar.Columns {
colIndex[col.Name] = i
}
// Find the first aggregation column for __result ordering
resultColIdx := -1
for i, col := range scalar.Columns {
if col.Type == ColumnTypeAggregation {
resultColIdx = i
break
}
}
slices.SortStableFunc(scalar.Data, func(rowI, rowJ []any) int {
for _, order := range effectiveOrderBy {
columnName := order.Key.Name
direction := order.Direction
if columnName == DefaultOrderByKey {
if resultColIdx < 0 {
continue
}
valueI := rowCellFloat(rowI, resultColIdx)
valueJ := rowCellFloat(rowJ, resultColIdx)
if valueI != valueJ {
if direction == OrderDirectionAsc {
if valueI < valueJ {
return -1
}
return 1
}
if valueI > valueJ {
return -1
}
return 1
}
} else {
idx, exists := colIndex[columnName]
if !exists {
continue
}
strI := convertValueToString(rowCellValue(rowI, idx))
strJ := convertValueToString(rowCellValue(rowJ, idx))
cmp := strings.Compare(strI, strJ)
if cmp != 0 {
if direction == OrderDirectionAsc {
return cmp
}
return -cmp
}
}
}
return 0
})
if limit > 0 && len(scalar.Data) > limit {
scalar.Data = scalar.Data[:limit]
}
}
// rowCellFloat extracts a float64 from a row cell, returning 0 for
// missing, NaN, Inf, or non-numeric values.
func rowCellFloat(row []any, idx int) float64 {
if idx >= len(row) {
return 0
}
switch v := row[idx].(type) {
case float64:
if math.IsNaN(v) || math.IsInf(v, 0) {
return 0
}
return v
case int:
return float64(v)
case int64:
return float64(v)
default:
return 0
}
}
// rowCellValue safely returns the value at idx, or nil if out of bounds.
func rowCellValue(row []any, idx int) any {
if idx >= len(row) {
return nil
}
return row[idx]
}

View File

@@ -0,0 +1,67 @@
package querybuildertypesv5
// ScalarStateRow is a single per-(chunk × group_key × aggregation) entry
// holding the raw ClickHouse AggregateFunction(state, ...) blob bytes.
type ScalarStateRow struct {
GroupKey []any `json:"groupKey"`
AggIdx int `json:"aggIdx"`
State []byte `json:"state"`
}
// ScalarStateData is the cache-side payload for a chunked scalar query.
// It is the value carried in Result.Value when the internal request type
// is RequestTypeScalarState. After merging, it is materialized into the
// user-facing ScalarData via the scalarstate registry.
type ScalarStateData struct {
QueryName string `json:"queryName"`
GroupCols []*ColumnDescriptor `json:"groupCols"`
AggCols []*ColumnDescriptor `json:"aggCols"`
// AggNames is the registry lookup key per AggCols index (e.g., "avg",
// "sum", "p99"). Lets the merger find the matching Go decoder/merger.
AggNames []string `json:"aggNames"`
// RateMask[i] is true when AggNames[i] is a rate-style aggregate
// (rate, rate_sum, rate_avg, rate_min, rate_max). Per-chunk SQL
// emits the underlying state (count/sum/avg/min/max), and the
// rate-window division is applied after Final() at materialize
// time using the full query window.
RateMask []bool `json:"rateMask,omitempty"`
// Order and Limit are applied post-merge in materializeScalarData
// (chunk SQL skips them to avoid losing groups that are globally
// top-N but never per-chunk top-N).
Order []OrderBy `json:"order,omitempty"`
Limit int `json:"limit,omitempty"`
Rows []ScalarStateRow `json:"rows"`
}
// Adopt copies metadata fields from src onto s when s's matching field
// is empty, then appends src.Rows. This is the "first non-empty payload
// wins" policy used by both the cross-chunk merge in the querier and
// the cache-side bucket merge — keep them in sync via this method so
// RateMask/Order/Limit can't silently drift between the two callers.
func (s *ScalarStateData) Adopt(src *ScalarStateData) {
if src == nil {
return
}
if s.QueryName == "" {
s.QueryName = src.QueryName
}
if len(s.GroupCols) == 0 {
s.GroupCols = src.GroupCols
}
if len(s.AggCols) == 0 {
s.AggCols = src.AggCols
}
if len(s.AggNames) == 0 {
s.AggNames = src.AggNames
}
if len(s.RateMask) == 0 {
s.RateMask = src.RateMask
}
if len(s.Order) == 0 {
s.Order = src.Order
}
if s.Limit == 0 {
s.Limit = src.Limit
}
s.Rows = append(s.Rows, src.Rows...)
}

View File

@@ -72,7 +72,6 @@ class TraceOperatorQuery:
return_spans_from: str
limit: int | None = None
order: list[OrderBy] | None = None
select_fields: list[TelemetryFieldKey] | None = None
def to_dict(self) -> dict:
spec: dict[str, Any] = {
@@ -84,8 +83,6 @@ class TraceOperatorQuery:
spec["limit"] = self.limit
if self.order:
spec["order"] = [o.to_dict() if hasattr(o, "to_dict") else o for o in self.order]
if self.select_fields:
spec["selectFields"] = [f.to_dict() for f in self.select_fields]
return {"type": "builder_trace_operator", "spec": spec}

View File

@@ -530,13 +530,11 @@ def test_export_traces_with_composite_query_trace_operator(
) -> None:
"""
Setup:
Insert a parent span and two child spans, all with an http.method attribute.
Insert multiple traces with parent-child relationships.
Tests:
1. Basic trace operator (A => B) returning parent spans, ordered by timestamp.
2. Same operator with selectFields=[service.name] and order by http.method, which is
NOT in selectFields — verifies the inner/outer subquery fix for the CH 25.12.5
NOT_FOUND_COLUMN_IN_BLOCK regression (ORDER BY col AS `col` in a CTE shape).
1. Export traces using trace operator in composite query (POST)
2. Verify trace operator query works correctly
"""
parent_trace_id = TraceIdGenerator.trace_id()
parent_span_id = TraceIdGenerator.span_id()
@@ -557,8 +555,12 @@ def test_export_traces_with_composite_query_trace_operator(
kind=TracesKind.SPAN_KIND_SERVER,
status_code=TracesStatusCode.STATUS_CODE_OK,
status_message="",
resources={"service.name": "parent-service"},
attributes={"operation.type": "parent", "http.method": "GET"},
resources={
"service.name": "parent-service",
},
attributes={
"operation.type": "parent",
},
),
Traces(
timestamp=now - timedelta(seconds=9),
@@ -570,8 +572,12 @@ def test_export_traces_with_composite_query_trace_operator(
kind=TracesKind.SPAN_KIND_INTERNAL,
status_code=TracesStatusCode.STATUS_CODE_OK,
status_message="",
resources={"service.name": "parent-service"},
attributes={"operation.type": "child", "http.method": "POST"},
resources={
"service.name": "parent-service",
},
attributes={
"operation.type": "child",
},
),
Traces(
timestamp=now - timedelta(seconds=7),
@@ -583,23 +589,31 @@ def test_export_traces_with_composite_query_trace_operator(
kind=TracesKind.SPAN_KIND_INTERNAL,
status_code=TracesStatusCode.STATUS_CODE_OK,
status_message="",
resources={"service.name": "parent-service"},
attributes={"operation.type": "child", "http.method": "POST"},
resources={
"service.name": "parent-service",
},
attributes={
"operation.type": "child",
},
),
]
)
token = get_token(USER_ADMIN_EMAIL, USER_ADMIN_PASSWORD)
# Calculate timestamps in nanoseconds
start_ns = int((now - timedelta(minutes=5)).timestamp() * 1e9)
end_ns = int(now.timestamp() * 1e9)
url = signoz.self.host_configs["8080"].get("/api/v1/export_raw_data?format=jsonl")
# A: spans with operation.type = 'parent'
query_a = BuilderQuery(
signal="traces",
name="A",
limit=1000,
filter_expression="operation.type = 'parent'",
)
# B: spans with operation.type = 'child'
query_b = BuilderQuery(
signal="traces",
name="B",
@@ -607,50 +621,47 @@ def test_export_traces_with_composite_query_trace_operator(
filter_expression="operation.type = 'child'",
)
def export(operator: TraceOperatorQuery) -> list[dict]:
body = QueryRangeRequest(
start=start_ns,
end=end_ns,
queries=[query_a, query_b, operator],
).to_dict()
resp = requests.post(
url,
json=body,
timeout=10,
headers={"authorization": f"Bearer {token}", "Content-Type": "application/json"},
)
assert resp.status_code == HTTPStatus.OK, resp.text
assert resp.headers["Content-Type"] == "application/x-ndjson"
return [json.loads(line) for line in resp.text.strip().split("\n") if line]
# Test 1: basic trace operator ordered by timestamp
spans = export(
TraceOperatorQuery(
name="C",
expression="A => B",
return_spans_from="A",
limit=1000,
order=[OrderBy(TelemetryFieldKey("timestamp", "string", "span"), "desc")],
)
# Trace operator: find traces where A has a direct descendant B
query_c = TraceOperatorQuery(
name="C",
expression="A => B",
return_spans_from="A",
limit=1000,
order=[OrderBy(TelemetryFieldKey("timestamp", "string", "span"), "desc")],
)
assert len(spans) == 1
assert all(s.get("trace_id") == parent_trace_id for s in spans)
assert any(s.get("name") == "parent-operation" for s in spans)
# Test 2: order-by field (http.method) absent from selectFields
spans = export(
TraceOperatorQuery(
name="C",
expression="A => B",
return_spans_from="A",
limit=1000,
select_fields=[TelemetryFieldKey("service.name", "string", "resource")],
order=[OrderBy(TelemetryFieldKey("http.method", "string", "tag"), "desc")],
)
body = QueryRangeRequest(
start=start_ns,
end=end_ns,
queries=[query_a, query_b, query_c],
).to_dict()
url = signoz.self.host_configs["8080"].get("/api/v1/export_raw_data?format=jsonl")
response = requests.post(
url,
json=body,
timeout=10,
headers={
"authorization": f"Bearer {token}",
"Content-Type": "application/json",
},
)
assert len(spans) >= 1
assert all(s.get("trace_id") == parent_trace_id for s in spans)
assert any(s.get("name") == "parent-operation" for s in spans)
assert response.status_code == HTTPStatus.OK
assert response.headers["Content-Type"] == "application/x-ndjson"
# Parse JSONL content
jsonl_lines = response.text.strip().split("\n")
assert len(jsonl_lines) == 1, f"Expected at least 1 line, got {len(jsonl_lines)}"
# Verify all returned spans belong to the matched trace
json_objects = [json.loads(line) for line in jsonl_lines]
trace_ids = [obj.get("trace_id") for obj in json_objects]
assert all(tid == parent_trace_id for tid in trace_ids)
# Verify the parent span (returnSpansFrom = "A") is present
span_names = [obj.get("name") for obj in json_objects]
assert "parent-operation" in span_names
def test_export_traces_with_select_fields(