Compare commits

..

6 Commits

Author SHA1 Message Date
Piyush Singariya
a969c38224 chore: fmtlint 2026-05-07 13:53:12 +05:30
Piyush Singariya
b892a0f0a5 chore: file rename 2026-05-07 13:51:22 +05:30
Piyush Singariya
4d47762eba chore: separate e2e test file 2026-05-07 13:50:11 +05:30
Piyush Singariya
77396a0bb3 Merge branch 'main' into traceop 2026-05-07 12:56:59 +05:30
Piyush Singariya
28c05e1bab Merge branch 'main' into traceop 2026-05-04 14:27:19 +05:30
Piyush Singariya
2b9e383994 fix: trace raw export e2e 2026-04-30 15:25:43 +05:30
12 changed files with 426 additions and 947 deletions

View File

@@ -1,99 +0,0 @@
package v2
import (
"regexp"
"sync"
"time"
)
// Code is a dotted, hierarchical identifier registered at process start. It
// encodes domain (subsystem), op (verb), optional sub (qualifier), and a
// terminal reason. Codes are values; two Codes with the same string are equal
// by value and safe to compare with ==.
type Code struct{ s string }
// String returns the dotted code as it appears on the wire. Empty for the
// zero value.
func (c Code) String() string { return c.s }
// codePattern allows 2-4 dotted segments, each starting with a lowercase
// letter and continuing with [a-z0-9_]. One segment is too broad (use a
// domain prefix); five or more means the domain should be split.
var codePattern = regexp.MustCompile(`^[a-z][a-z0-9_]*(\.[a-z][a-z0-9_]*){1,3}$`)
// Meta is the per-code default envelope applied by constructors before
// per-call options. Every field has a natural per-code default — an auth
// code always wants Reauthenticate, every documented code wants its docs
// URL — so the registry is the right place to declare them once.
type Meta struct {
Category Category
Fault Fault
Retry Retry
Remediation Remediation
Refs map[RefKind]string
}
// Retry tells the caller how and when to retry. After is meaningful only
// when Policy == RetryAfter.
type Retry struct {
Policy RetryPolicy
After time.Duration
}
var (
registryMu sync.RWMutex
registry = map[string]Meta{}
)
// Register installs a code with its default Meta and returns the Code value.
// It panics on a malformed code string or a duplicate registration — both
// indicate a programming error that must be caught at boot, not at first
// failure.
//
// Call from the owning domain's package init or top-level var block:
//
// var CodeUnknownFunction = errors.Register("query.parse.unknown_function", errors.Meta{
// Category: errors.CategoryInvalidInput,
// Fault: errors.FaultCaller,
// Retry: errors.Retry{Policy: errors.RetryAfterFix},
// })
func Register(s string, meta Meta) Code {
if !codePattern.MatchString(s) {
panic("errors/v2: malformed code: " + s)
}
registryMu.Lock()
defer registryMu.Unlock()
if _, ok := registry[s]; ok {
panic("errors/v2: duplicate code: " + s)
}
registry[s] = meta
return Code{s: s}
}
// MetaOf returns the Meta a code was registered with. Returns the zero Meta
// and false for unregistered or zero codes.
func MetaOf(c Code) (Meta, bool) {
if c.s == "" {
return Meta{}, false
}
registryMu.RLock()
defer registryMu.RUnlock()
m, ok := registry[c.s]
return m, ok
}
// registerOrGet is the internal idempotent register used by adapters that
// may see the same code (e.g. legacy.<v1>) more than once across the process
// lifetime. It panics on malformed codes — duplicate codes silently keep the
// existing Meta.
func registerOrGet(s string, meta Meta) Code {
if !codePattern.MatchString(s) {
panic("errors/v2: malformed code: " + s)
}
registryMu.Lock()
defer registryMu.Unlock()
if _, ok := registry[s]; !ok {
registry[s] = meta
}
return Code{s: s}
}

View File

@@ -1,93 +0,0 @@
package v2
// The enums in this file are closed sets. Each value is a package-level var of
// an unexported-field struct, so external code cannot synthesize new values —
// it must reference one of the defined ones. String() returns the stable
// snake_case wire name; once shipped, those names are append-only.
// Category groups errors by what kind of failure occurred. It is the coarsest
// branch-worthy axis and is intended to be a superset of gRPC status codes
// extended with cases SigNoz cares about (e.g. license issues land under
// FailedDependency or ResourceExhausted depending on context).
type Category struct{ s string }
func (c Category) String() string { return c.s }
var (
CategoryInvalidInput = Category{"invalid_input"} // request was malformed or violated a documented constraint.
CategoryNotFound = Category{"not_found"} // referenced resource does not exist.
CategoryAlreadyExists = Category{"already_exists"} // resource creation conflicts with an existing one.
CategoryConflict = Category{"conflict"} // concurrent modification or state mismatch (e.g. stale revision).
CategoryPrecondition = Category{"precondition"} // a required precondition (system or caller-asserted) was not met.
CategoryUnauthenticated = Category{"unauthenticated"} // credentials are missing or invalid.
CategoryForbidden = Category{"forbidden"} // authenticated but not authorized for this action.
CategoryResourceExhausted = Category{"resource_exhausted"} // quota, rate limit, or other budget exceeded.
CategoryFailedDependency = Category{"failed_dependency"} // an upstream service we depend on failed (db, license, etc.).
CategoryUnavailable = Category{"unavailable"} // service is temporarily down; retry with backoff.
CategoryTimeout = Category{"timeout"} // deadline exceeded before the operation completed.
CategoryCanceled = Category{"canceled"} // caller or context canceled the operation.
CategoryUnimplemented = Category{"unimplemented"} // operation is not supported (or not yet) by this server.
CategoryDataLoss = Category{"data_loss"} // unrecoverable data corruption or loss detected.
CategoryInternal = Category{"internal"} // bug — invariant broken; should not occur in normal operation.
)
// Fault attributes responsibility. An agent uses this to decide whether to
// fix the request (Caller), retry/escalate (Server, Upstream), or page a
// human (Operator).
type Fault struct{ s string }
func (f Fault) String() string { return f.s }
var (
FaultCaller = Fault{"caller"}
FaultServer = Fault{"server"}
FaultUpstream = Fault{"upstream"}
FaultOperator = Fault{"operator"}
)
// RetryPolicy tells the caller how to behave on retry. Backoff implies the
// caller should use its own backoff schedule; After means honor Retry.After
// exactly; AfterFix and AfterAuth signal that retry is pointless until the
// caller fixes the request or re-authenticates.
type RetryPolicy struct{ s string }
func (r RetryPolicy) String() string { return r.s }
var (
RetryNever = RetryPolicy{"never"}
RetryImmediate = RetryPolicy{"immediate"}
RetryBackoff = RetryPolicy{"backoff"}
RetryAfter = RetryPolicy{"after"}
RetryAfterFix = RetryPolicy{"after_fix"}
RetryAfterAuth = RetryPolicy{"after_auth"}
)
// Remediation names the single recommended next action. It does not execute.
type Remediation struct{ s string }
func (r Remediation) String() string { return r.s }
var (
RemediationNone = Remediation{"none"}
RemediationFixInput = Remediation{"fix_input"}
RemediationReauthenticate = Remediation{"reauthenticate"}
RemediationWaitAndRetry = Remediation{"wait_and_retry"}
RemediationFailover = Remediation{"failover"}
RemediationContactOperator = Remediation{"contact_operator"}
RemediationFileBug = Remediation{"file_bug"}
RemediationUpgradeLicense = Remediation{"upgrade_license"}
)
// RefKind classifies a reference URL attached to the error.
type RefKind struct{ s string }
func (r RefKind) String() string { return r.s }
var (
RefDocs = RefKind{"docs"}
RefRunbook = RefKind{"runbook"}
RefDashboard = RefKind{"dashboard"}
RefTrace = RefKind{"trace"}
RefSource = RefKind{"source"}
RefIssue = RefKind{"issue"}
)

View File

@@ -1,248 +0,0 @@
// Package v2 is the redesigned pkg/errors.
//
// Every branch-worthy field on the Error struct is a closed enum and every
// variable part is a typed key/value. The intent is to make errors first-class
// data for programmatic consumers — SDK clients, UI surfaces, alerting, and
// LLM agents — without sacrificing human readability.
//
// Domain and op are encoded into Code (e.g. "query.parse.unknown_function")
// rather than carried as separate struct fields. Frames[0] is the
// authoritative call-site location, captured at construction time.
package v2
import (
stderrors "errors"
"fmt"
"io"
"sort"
"strconv"
"strings"
)
// Error is the redesigned error value. *Error is the canonical form passed
// around — the zero value is unused, construct via New / Newf / Wrap / Wrapf.
//
// Frames are intentionally not a struct field: resolving captured PCs into
// func/file/line is the dominant construction cost, so we capture PCs eagerly
// at construction time (so the snapshot is faithful to the call site) and
// resolve them lazily via Frames() only when something actually inspects them.
type Error struct {
// WHAT
Category Category
Code Code
Title string
Detail string
// WHY / WHO
Cause error
Fault Fault
// WHAT NEXT
Retry Retry
Remediation Remediation
Refs map[RefKind]string
// CONTEXT
Attrs map[string]any
TraceID string
SpanID string
// stack is the captured PCs plus a memoized []Frame; never read directly,
// always go through Frames().
stack *frameStack
}
// Frames returns the captured stack, resolved to func/file/line on first
// access. Frames[0] is the constructor's caller. Safe for concurrent use.
func (e *Error) Frames() []Frame {
if e == nil {
return nil
}
return e.stack.frames()
}
// New creates an Error for a registered Code. Defaults from the registered
// Meta are applied first; opts override per call site.
func New(code Code, title string, opts ...Option) *Error {
e := &Error{Code: code, Title: title, stack: captureStack(3)}
applyMeta(e)
for _, opt := range opts {
opt(e)
}
return e
}
// Newf is New with fmt.Sprintf-style formatting for the title.
func Newf(code Code, format string, args ...any) *Error {
e := &Error{Code: code, Title: fmt.Sprintf(format, args...), stack: captureStack(3)}
applyMeta(e)
return e
}
// Wrap creates an Error that wraps cause. The new error's Title is the
// caller-supplied title (not the cause's message), so Error() reports what
// went wrong at this layer — the cause is reachable via Unwrap.
func Wrap(cause error, code Code, title string, opts ...Option) *Error {
e := &Error{Code: code, Title: title, Cause: cause, stack: captureStack(3)}
applyMeta(e)
for _, opt := range opts {
opt(e)
}
return e
}
// Wrapf is Wrap with fmt.Sprintf-style formatting for the title.
func Wrapf(cause error, code Code, format string, args ...any) *Error {
e := &Error{Code: code, Title: fmt.Sprintf(format, args...), Cause: cause, stack: captureStack(3)}
applyMeta(e)
return e
}
// applyMeta copies default values from the registered Meta into a fresh
// Error. It runs before per-call options so options win.
func applyMeta(e *Error) {
meta, ok := MetaOf(e.Code)
if !ok {
return
}
if (e.Category == Category{}) {
e.Category = meta.Category
}
if (e.Fault == Fault{}) {
e.Fault = meta.Fault
}
if (e.Retry == Retry{}) {
e.Retry = meta.Retry
}
if (e.Remediation == Remediation{}) {
e.Remediation = meta.Remediation
}
if len(meta.Refs) > 0 {
if e.Refs == nil {
e.Refs = make(map[RefKind]string, len(meta.Refs))
}
for k, v := range meta.Refs {
if _, exists := e.Refs[k]; !exists {
e.Refs[k] = v
}
}
}
}
// Error returns the Title (the message specifically attached at this wrap
// site), not the cause's message. This fixes the v1 surprise where Error()
// returned the wrapped cause's text.
func (e *Error) Error() string {
if e == nil {
return "<nil>"
}
return e.Title
}
// Unwrap returns the wrapped cause, enabling errors.Is / errors.As.
func (e *Error) Unwrap() error {
if e == nil {
return nil
}
return e.Cause
}
// Format implements fmt.Formatter.
//
// %s, %v → Title only
// %+v → full chain: code, title, frames, attrs, recursive cause
func (e *Error) Format(f fmt.State, verb rune) {
switch verb {
case 's':
_, _ = io.WriteString(f, e.Title)
case 'v':
if f.Flag('+') {
_, _ = io.WriteString(f, e.fullString())
return
}
_, _ = io.WriteString(f, e.Title)
case 'q':
fmt.Fprintf(f, "%q", e.Title)
default:
fmt.Fprintf(f, "%%!%c(*errors/v2.Error)", verb)
}
}
// fullString produces the %+v rendering. Format is intentionally
// human-readable rather than machine-parseable; consumers that want structure
// should marshal to JSON.
func (e *Error) fullString() string {
var b strings.Builder
e.appendFull(&b, 0)
return b.String()
}
func (e *Error) appendFull(b *strings.Builder, depth int) {
indent := strings.Repeat(" ", depth)
fmt.Fprintf(b, "%s[%s] %s\n", indent, e.Code.s, e.Title)
if e.Detail != "" {
fmt.Fprintf(b, "%s detail: %s\n", indent, e.Detail)
}
if len(e.Attrs) > 0 {
// Stable key order for deterministic output.
keys := make([]string, 0, len(e.Attrs))
for k := range e.Attrs {
keys = append(keys, k)
}
sort.Strings(keys)
fmt.Fprintf(b, "%s attrs:\n", indent)
for _, k := range keys {
fmt.Fprintf(b, "%s %s=%v\n", indent, k, e.Attrs[k])
}
}
if frames := e.Frames(); len(frames) > 0 {
fmt.Fprintf(b, "%s frames:\n", indent)
for _, fr := range frames {
fmt.Fprintf(b, "%s %s\n%s %s:%s\n", indent, fr.Func, indent, fr.File, strconv.Itoa(fr.Line))
}
}
if e.Cause != nil {
fmt.Fprintf(b, "%scaused by:\n", indent)
var ce *Error
if stderrors.As(e.Cause, &ce) && ce != nil {
ce.appendFull(b, depth+1)
} else {
fmt.Fprintf(b, "%s %s\n", indent, e.Cause.Error())
}
}
}
// AsError extracts a *Error from anywhere in err's wrap chain. It is the
// common shortcut around errors.As for code that always wants this package's
// type.
func AsError(err error) (*Error, bool) {
if err == nil {
return nil, false
}
var e *Error
if stderrors.As(err, &e) {
return e, true
}
return nil, false
}
// Is reports whether err or any error in its chain has the given Code.
// Convenience wrapper that's friendlier than errors.As at call sites that
// only care about code identity.
func Is(err error, code Code) bool {
e, ok := AsError(err)
if !ok {
return false
}
for e != nil {
if e.Code == code {
return true
}
next, ok := AsError(e.Cause)
if !ok {
return false
}
e = next
}
return false
}

View File

@@ -1,90 +0,0 @@
package v2
// This file is a self-contained walkthrough of how a domain integrates with
// pkg/errors/v2. It mirrors what a real pkg/<domain>/errors.go looks like in
// practice — registering codes, constructing typed errors at failure sites,
// and consuming them at API boundaries. The "example.*" namespace is reserved
// for these demo codes so they never collide with a real domain's
// registrations.
// 1. Register codes at package init time. Each Register call panics on
// malformed code or duplicate registration, so misconfiguration is caught
// at process boot, not at first failure.
var (
// A caller-fault, fix-the-input error: rejected before any work happens.
exampleCodeInvalidQuery = Register("example.query.invalid_filter", Meta{
Category: CategoryInvalidInput,
Fault: FaultCaller,
Remediation: RemediationFixInput,
Retry: Retry{Policy: RetryAfterFix},
Refs: map[RefKind]string{
RefDocs: "https://signoz.io/docs/query/filters",
},
})
// A quota error: the caller's request was well-formed but their plan
// doesn't allow it. The recommended remediation is structural (upgrade),
// not "try again later."
exampleCodeQuotaExceeded = Register("example.billing.quota_exceeded", Meta{
Category: CategoryResourceExhausted,
Fault: FaultCaller,
Remediation: RemediationUpgradeLicense,
Retry: Retry{Policy: RetryNever},
})
)
// 2. Construct errors at the failure site. Notice that variable parts of
// the message (the offending field, the limits) live in typed Attrs, not in
// the title prose — a downstream agent can read them without parsing English.
func exampleRejectInvalidFilter(field string) *Error {
return New(exampleCodeInvalidQuery, "filter is not supported",
WithAttr("field", field),
)
}
// 3. Consume errors at the API boundary. Branching on Category gives the
// HTTP status; Retry tells an SDK how to behave; Fault drives logging
// classification (caller errors are warnings, server/upstream errors page).
func exampleClassifyForHTTP(err error) (status int, retry RetryPolicy) {
e, ok := AsError(err)
if !ok {
return 500, RetryNever
}
switch e.Category {
case CategoryInvalidInput, CategoryPrecondition:
status = 400
case CategoryUnauthenticated:
status = 401
case CategoryForbidden:
status = 403
case CategoryNotFound:
status = 404
case CategoryConflict, CategoryAlreadyExists:
status = 409
case CategoryResourceExhausted:
status = 429
case CategoryUnavailable, CategoryTimeout:
status = 503
case CategoryUnimplemented:
status = 501
default:
status = 500
}
return status, e.Retry.Policy
}
// 4. Identify a specific failure mode by Code. Is walks the cause chain so
// a wrapper at the HTTP layer still matches when the root cause was raised
// deep in the call graph.
func exampleIsQuotaExceeded(err error) bool {
return Is(err, exampleCodeQuotaExceeded)
}
// The example helpers are reference-only: they exist to document call-site
// patterns, not to be called from anywhere in the binary. This anchor keeps
// them visible to readers (and the linter) without exporting demo code.
var _ = []any{
exampleRejectInvalidFilter,
exampleClassifyForHTTP,
exampleIsQuotaExceeded,
}

View File

@@ -1,65 +0,0 @@
package v2
import (
"runtime"
"sync"
)
// Frame is a single line in the call stack. Frames[0] is the constructor's
// caller — the authoritative "where this error came from" — and downstream
// consumers can filter (e.g. "frames inside our code") without regex
// reparsing of a pre-formatted stack string.
type Frame struct {
Func string `json:"func,omitempty"`
File string `json:"file,omitempty"`
Line int `json:"line,omitempty"`
}
// frameStack carries the PCs captured at construction plus the resolved
// []Frame slice, behind a sync.Once. Resolving frames into func/file/line is
// expensive (runtime.CallersFrames walks the symbol table); the vast majority
// of errors are constructed and never inspected, so we only pay that cost
// when a consumer actually asks for frames (Frames()/MarshalJSON/%+v).
//
// The PC capture itself is cheap and happens at construction so that
// Frames[0] is a faithful "where" record of the original call site.
type frameStack struct {
pcs []uintptr
once sync.Once
resolved []Frame
}
// captureStack is called by every constructor. skip drops runtime.Callers,
// captureStack itself, and the constructor frame so that the first PC is the
// user code that invoked the constructor.
func captureStack(skip int) *frameStack {
const depth = 32
pcs := make([]uintptr, depth)
n := runtime.Callers(skip, pcs)
if n == 0 {
return nil
}
return &frameStack{pcs: pcs[:n:n]}
}
// frames resolves the captured PCs into []Frame. The resolution is memoized
// — concurrent calls are safe and only one of them does the work.
func (s *frameStack) frames() []Frame {
if s == nil {
return nil
}
s.once.Do(func() {
cf := runtime.CallersFrames(s.pcs)
out := make([]Frame, 0, len(s.pcs))
for {
f, more := cf.Next()
out = append(out, Frame{Func: f.Function, File: f.File, Line: f.Line})
if !more {
break
}
}
s.resolved = out
})
return s.resolved
}

View File

@@ -1,175 +0,0 @@
package v2
import (
"encoding/json"
"net/url"
)
// CodeUnknown is the sentinel returned when AsJSON / AsURLValues are called
// on a non-*Error. A consumer that sees this on the wire should read it as
// "the producer did not raise a v2 Error and we projected it through the
// fallback path" — i.e. somewhere upstream is still using std errors or v1.
var CodeUnknown = Register("unknown.unset", Meta{
Category: CategoryInternal,
Fault: FaultServer,
Retry: Retry{Policy: RetryNever},
})
// JSON is the wire envelope for an Error. It is intentionally a superset of
// v1's pkg/errors.JSON: SDK clients that only read v1's {code, message, url,
// errors[]} keep working, while v2 consumers can branch on the new typed
// fields (category, fault, retry, remediation, attrs, refs, cause).
type JSON struct {
Code string `json:"code" required:"true"`
Title string `json:"title" required:"true"`
Detail string `json:"detail,omitempty"`
Category string `json:"category,omitempty"`
Fault string `json:"fault,omitempty"`
Retry *RetryJSON `json:"retry,omitempty"`
Remediation string `json:"remediation,omitempty"`
Attrs map[string]any `json:"attrs,omitempty"`
Refs map[string]string `json:"refs,omitempty"`
Frames []Frame `json:"frames,omitempty"`
TraceID string `json:"trace_id,omitempty"`
SpanID string `json:"span_id,omitempty"`
Cause *CauseJSON `json:"cause,omitempty"`
}
// RetryJSON renders Retry as an object so consumers can branch on policy
// before consulting AfterMS. AfterMS is omitted unless policy is "after".
type RetryJSON struct {
Policy string `json:"policy"`
AfterMS int64 `json:"after_ms,omitempty"`
}
// CauseJSON is the thin recursive shape for a cause chain. Only code, title,
// and a nested cause are guaranteed — producers may add more, consumers must
// not rely on it.
type CauseJSON struct {
Code string `json:"code,omitempty"`
Title string `json:"title"`
Cause *CauseJSON `json:"cause,omitempty"`
}
// AsJSON projects any error onto the v2 wire envelope. If cause is a
// *Error (anywhere in its wrap chain) every field is filled from it;
// otherwise the result is a CodeUnknown envelope with Title=cause.Error()
// so the wire shape is always valid and never panics.
func AsJSON(cause error) *JSON {
if cause == nil {
return nil
}
e, ok := AsError(cause)
if !ok {
return &JSON{
Code: CodeUnknown.s,
Title: cause.Error(),
Category: CategoryInternal.s,
Fault: FaultServer.s,
}
}
return errorToJSON(e)
}
func errorToJSON(e *Error) *JSON {
out := &JSON{
Code: e.Code.s,
Title: e.Title,
Detail: e.Detail,
Category: e.Category.s,
Fault: e.Fault.s,
Remediation: e.Remediation.s,
Attrs: e.Attrs,
TraceID: e.TraceID,
SpanID: e.SpanID,
}
if (e.Retry.Policy != RetryPolicy{}) {
out.Retry = &RetryJSON{Policy: e.Retry.Policy.s}
if e.Retry.Policy == RetryAfter && e.Retry.After > 0 {
out.Retry.AfterMS = e.Retry.After.Milliseconds()
}
}
if len(e.Refs) > 0 {
out.Refs = make(map[string]string, len(e.Refs))
for k, v := range e.Refs {
out.Refs[k.s] = v
}
}
if frames := e.Frames(); len(frames) > 0 {
out.Frames = frames
}
if e.Cause != nil {
out.Cause = causeToJSON(e.Cause)
}
return out
}
func causeToJSON(err error) *CauseJSON {
if err == nil {
return nil
}
if e, ok := err.(*Error); ok {
c := &CauseJSON{Code: e.Code.s, Title: e.Title}
if e.Cause != nil {
c.Cause = causeToJSON(e.Cause)
}
return c
}
// Non-*Error leaf: only Title is set, no Code.
return &CauseJSON{Title: err.Error()}
}
// AsURLValues projects an error onto a flat url.Values, matching v1's shape
// for callers (e.g. OAuth/SSO redirects) that smuggle errors back through a
// query string. Complex fields (attrs, refs, retry, frames, cause) are
// JSON-marshaled into a single value rather than spread across multiple
// keys, since query strings have no good representation for nested data.
func AsURLValues(cause error) url.Values {
j := AsJSON(cause)
if j == nil {
return url.Values{}
}
v := url.Values{
"code": {j.Code},
"title": {j.Title},
}
if j.Detail != "" {
v.Set("detail", j.Detail)
}
if j.Category != "" {
v.Set("category", j.Category)
}
if j.Fault != "" {
v.Set("fault", j.Fault)
}
if j.Remediation != "" {
v.Set("remediation", j.Remediation)
}
if j.TraceID != "" {
v.Set("trace_id", j.TraceID)
}
if j.SpanID != "" {
v.Set("span_id", j.SpanID)
}
if j.Retry != nil {
if b, err := json.Marshal(j.Retry); err == nil {
v.Set("retry", string(b))
}
}
if len(j.Refs) > 0 {
if b, err := json.Marshal(j.Refs); err == nil {
v.Set("refs", string(b))
}
}
if len(j.Attrs) > 0 {
if b, err := json.Marshal(j.Attrs); err == nil {
v.Set("attrs", string(b))
}
}
if j.Cause != nil {
if b, err := json.Marshal(j.Cause); err == nil {
v.Set("cause", string(b))
}
}
return v
}

View File

@@ -1,85 +0,0 @@
package v2
import "time"
// Option mutates an Error during construction. Options are applied after the
// registered Meta defaults so a per-call WithFault wins over the code's
// default Fault.
type Option func(*Error)
// WithTitle overrides the title (used when Newf's formatted string is not
// what you want, or after a Wrap that took its title from the cause).
func WithTitle(s string) Option { return func(e *Error) { e.Title = s } }
// WithDetail adds a long, user-safe explanation. Detail must never include
// raw cause text; the cause is already in the chain.
func WithDetail(s string) Option { return func(e *Error) { e.Detail = s } }
// WithCategory overrides the registered Category.
func WithCategory(c Category) Option { return func(e *Error) { e.Category = c } }
// WithFault overrides the registered Fault.
func WithFault(f Fault) Option { return func(e *Error) { e.Fault = f } }
// WithRetry overrides the registered Retry.
func WithRetry(r Retry) Option { return func(e *Error) { e.Retry = r } }
// WithRetryAfter is a convenience for the common RetryAfter case.
func WithRetryAfter(d time.Duration) Option {
return func(e *Error) { e.Retry = Retry{Policy: RetryAfter, After: d} }
}
// WithRemediation overrides the registered Remediation.
//
// Convention for "did you mean" hints: stash a []string under
// Attrs["suggestions"], ranked best-first. Each element should be a complete,
// copy-pasteable replacement — not an explanation of what went wrong (use
// WithDetail for that). Once 3-4 domains adopt the convention identically,
// promote to a first-class field.
func WithRemediation(r Remediation) Option { return func(e *Error) { e.Remediation = r } }
// WithRef adds (or replaces) a single reference URL keyed by kind.
func WithRef(kind RefKind, url string) Option {
return func(e *Error) {
if e.Refs == nil {
e.Refs = make(map[RefKind]string, 1)
}
e.Refs[kind] = url
}
}
// WithAttr sets a single typed attribute. Prefer typed per-domain helpers
// (e.g. WithQueryAttrs(q Query)) over raw WithAttr at call sites — they keep
// the attr keys consistent and let the compiler reject typos.
func WithAttr(key string, value any) Option {
return func(e *Error) {
if e.Attrs == nil {
e.Attrs = make(map[string]any, 1)
}
e.Attrs[key] = value
}
}
// WithAttrs merges a map of attributes; later keys win.
func WithAttrs(attrs map[string]any) Option {
return func(e *Error) {
if len(attrs) == 0 {
return
}
if e.Attrs == nil {
e.Attrs = make(map[string]any, len(attrs))
}
for k, v := range attrs {
e.Attrs[k] = v
}
}
}
// WithTrace stamps the error with OTel trace and span IDs so the JSON
// response can link back to the originating span.
func WithTrace(traceID, spanID string) Option {
return func(e *Error) {
e.TraceID = traceID
e.SpanID = spanID
}
}

View File

@@ -1,40 +1,15 @@
package querybuilder
import (
"fmt"
"sort"
"strings"
errors "github.com/SigNoz/signoz/pkg/errors/v2"
"github.com/SigNoz/signoz/pkg/errors"
grammar "github.com/SigNoz/signoz/pkg/parser/havingexpression/grammar"
"github.com/antlr4-go/antlr/v4"
"github.com/huandu/go-sqlbuilder"
)
// HAVING-expression validator codes. All three are caller-fault, fix-the-input
// errors — the user wrote an expression we cannot turn into SQL — so retry
// is pointless until the expression itself changes.
var (
codeHavingStringLiteral = errors.Register("querybuilder.having.string_literal", errors.Meta{
Category: errors.CategoryInvalidInput,
Fault: errors.FaultCaller,
Retry: errors.Retry{Policy: errors.RetryAfterFix},
Remediation: errors.RemediationFixInput,
})
codeHavingInvalidReference = errors.Register("querybuilder.having.invalid_reference", errors.Meta{
Category: errors.CategoryInvalidInput,
Fault: errors.FaultCaller,
Retry: errors.Retry{Policy: errors.RetryAfterFix},
Remediation: errors.RemediationFixInput,
})
codeHavingSyntaxError = errors.Register("querybuilder.having.syntax_error", errors.Meta{
Category: errors.CategoryInvalidInput,
Fault: errors.FaultCaller,
Retry: errors.Retry{Policy: errors.RetryAfterFix},
Remediation: errors.RemediationFixInput,
})
)
// havingExpressionRewriteVisitor walks the parse tree of a HavingExpression in a single
// pass, simultaneously rewriting user-facing references to their SQL column names and
// collecting any references that could not be resolved.
@@ -306,10 +281,10 @@ func (r *HavingExpressionRewriter) rewriteAndValidate(expression string) (string
// This is checked before invalid references so that "contains string literals" takes
// priority when a bare string literal is also an unresolvable operand.
if v.hasStringLiteral {
return "", errors.New(codeHavingStringLiteral,
return "", errors.NewInvalidInputf(
errors.CodeInvalidInput,
"`Having` expression contains string literals",
errors.WithDetail("Aggregator results are numeric"),
)
).WithAdditional("Aggregator results are numeric")
}
if len(v.invalid) > 0 {
@@ -319,10 +294,7 @@ func (r *HavingExpressionRewriter) rewriteAndValidate(expression string) (string
validKeys = append(validKeys, k)
}
sort.Strings(validKeys)
opts := []errors.Option{
errors.WithAttr("invalid_refs", v.invalid),
errors.WithAttr("valid_refs", validKeys),
}
additional := []string{"Valid references are: [" + strings.Join(validKeys, ", ") + "]"}
if len(v.invalid) == 1 {
inv := v.invalid[0]
// Only suggest for plain identifier typos, not for unresolved function
@@ -331,13 +303,15 @@ func (r *HavingExpressionRewriter) rewriteAndValidate(expression string) (string
// a simple string substitution produce a corrupt expression.
isFuncCall := strings.Contains(original, inv+"(")
if match, dist := closestMatch(inv, validKeys); !isFuncCall && !strings.Contains(match, "(") && dist <= 3 {
opts = append(opts, errors.WithAttr("suggestions", []string{strings.ReplaceAll(original, inv, match)}))
corrected := strings.ReplaceAll(original, inv, match)
additional = append(additional, "Suggestion: `"+corrected+"`")
}
}
return "", errors.New(codeHavingInvalidReference,
fmt.Sprintf("Invalid references in `Having` expression: [%s]", strings.Join(v.invalid, ", ")),
opts...,
)
return "", errors.NewInvalidInputf(
errors.CodeInvalidInput,
"Invalid references in `Having` expression: [%s]",
strings.Join(v.invalid, ", "),
).WithAdditional(additional...)
}
// Layer 3 ANTLR syntax errors. We parse the original expression, so error messages
@@ -354,20 +328,17 @@ func (r *HavingExpressionRewriter) rewriteAndValidate(expression string) (string
if detail == "" {
detail = "check the expression syntax"
}
opts := []errors.Option{
errors.WithDetail(detail),
errors.WithAttr("syntax_errors", msgs),
}
additional := []string{detail}
// For single-error expressions, try to produce an actionable suggestion.
if len(allSyntaxErrors) == 1 {
if s := havingSuggestion(allSyntaxErrors[0], original); s != "" {
opts = append(opts, errors.WithAttr("suggestions", []string{s}))
additional = append(additional, "Suggestion: `"+s+"`")
}
}
return "", errors.New(codeHavingSyntaxError,
return "", errors.NewInvalidInputf(
errors.CodeInvalidInput,
"Syntax error in `Having` expression",
opts...,
)
).WithAdditional(additional...)
}
return result, nil

View File

@@ -413,28 +413,6 @@ func (b *traceOperatorCTEBuilder) buildFinalQuery(ctx context.Context, selectFro
}
func (b *traceOperatorCTEBuilder) buildListQuery(ctx context.Context, selectFromCTE string) (*qbtypes.Statement, error) {
sb := sqlbuilder.NewSelectBuilder()
// Select core fields
sb.Select(
"timestamp",
"trace_id",
"span_id",
"name",
"duration_nano",
"parent_span_id",
)
selectedFields := map[string]bool{
"timestamp": true,
"trace_id": true,
"span_id": true,
"name": true,
"duration_nano": true,
"parent_span_id": true,
}
// Get keys for selectFields
keySelectors := b.getKeySelectors()
for _, field := range b.operator.SelectFields {
keySelectors = append(keySelectors, &telemetrytypes.FieldKeySelector{
@@ -444,13 +422,30 @@ func (b *traceOperatorCTEBuilder) buildListQuery(ctx context.Context, selectFrom
FieldDataType: field.FieldDataType,
})
}
keys, _, err := b.stmtBuilder.metadataStore.GetKeysMulti(ctx, keySelectors)
if err != nil {
return nil, err
}
// Add selectFields using ColumnExpressionFor since we now have all base table columns
coreFields := []string{"trace_id", "span_id", "name", "duration_nano", "parent_span_id"}
selectedFields := map[string]bool{
"timestamp": true,
"trace_id": true,
"span_id": true,
"name": true,
"duration_nano": true,
"parent_span_id": true,
}
// Inner SELECT reads from the CTE and renames timestamp→ts.
// This breaks the `ORDER BY col AS `col`` pattern that triggers a
// CH 25.12.5 distributed-analyzer regression (NOT_FOUND_COLUMN_IN_BLOCK /
// timestamp renamed to timestamp_0). See ClickHouse/ClickHouse#103508.
innerSB := sqlbuilder.NewSelectBuilder()
innerSB.Select("timestamp AS ts")
innerSB.SelectMore(coreFields...)
var additionalSelectedFields []string
for _, field := range b.operator.SelectFields {
if selectedFields[field.Name] {
continue
@@ -461,41 +456,60 @@ func (b *traceOperatorCTEBuilder) buildListQuery(ctx context.Context, selectFrom
slog.String("field", field.Name), errors.Attr(err))
continue
}
sb.SelectMore(colExpr)
innerSB.SelectMore(colExpr)
selectedFields[field.Name] = true
additionalSelectedFields = append(additionalSelectedFields, field.Name)
}
sb.From(selectFromCTE)
// Add order by support using ColumnExpressionFor
orderApplied := false
// Also expose any explicit ORDER BY fields that aren't already selected,
// so the outer query can reference them by alias name.
for _, orderBy := range b.operator.Order {
if selectedFields[orderBy.Key.Name] {
continue
}
colExpr, err := b.stmtBuilder.fm.ColumnExpressionFor(ctx, b.start, b.end, &orderBy.Key.TelemetryFieldKey, keys)
if err != nil {
return nil, err
}
sb.OrderBy(fmt.Sprintf("%s %s", colExpr, orderBy.Direction.StringValue()))
orderApplied = true
innerSB.SelectMore(colExpr)
selectedFields[orderBy.Key.Name] = true
}
if !orderApplied {
sb.OrderBy("timestamp DESC")
innerSB.From(selectFromCTE)
innerSQL, innerArgs := innerSB.BuildWithFlavor(sqlbuilder.ClickHouse)
// Outer SELECT reads from the inner subquery and re-exposes timestamp via
// the ts alias. ORDER BY uses the alias name directly — no AS-alias in the
// ORDER BY position — which is the pattern that avoids the CH regression.
outerSB := sqlbuilder.NewSelectBuilder()
outerSB.Select("ts AS timestamp")
outerSB.SelectMore(coreFields...)
for _, name := range additionalSelectedFields {
outerSB.SelectMore(fmt.Sprintf("`%s`", name))
}
outerSB.From(fmt.Sprintf("(%s) AS t", innerSQL))
if len(b.operator.Order) > 0 {
for _, orderBy := range b.operator.Order {
outerSB.OrderBy(fmt.Sprintf("`%s` %s", orderBy.Key.Name, orderBy.Direction.StringValue()))
}
} else {
outerSB.OrderBy("timestamp DESC")
}
if b.operator.Limit > 0 {
sb.Limit(b.operator.Limit)
outerSB.Limit(b.operator.Limit)
} else {
sb.Limit(100)
outerSB.Limit(100)
}
if b.operator.Offset > 0 {
sb.Offset(b.operator.Offset)
outerSB.Offset(b.operator.Offset)
}
sql, args := sb.BuildWithFlavor(sqlbuilder.ClickHouse)
outerSQL, outerArgs := outerSB.BuildWithFlavor(sqlbuilder.ClickHouse)
return &qbtypes.Statement{
Query: sql,
Args: args,
Query: outerSQL,
Args: append(innerArgs, outerArgs...),
}, nil
}

View File

@@ -67,7 +67,7 @@ func TestTraceOperatorStatementBuilder(t *testing.T) {
},
},
expected: qbtypes.Statement{
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_DIR_DESC_B AS (SELECT p.* FROM A AS p INNER JOIN B AS c ON p.trace_id = c.trace_id AND p.span_id = c.parent_span_id) SELECT timestamp, trace_id, span_id, name, duration_nano, parent_span_id, multiIf(resource.`service.name` IS NOT NULL, resource.`service.name`::String, mapContains(resources_string, 'service.name'), resources_string['service.name'], NULL) AS `service.name` FROM A_DIR_DESC_B ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_DIR_DESC_B AS (SELECT p.* FROM A AS p INNER JOIN B AS c ON p.trace_id = c.trace_id AND p.span_id = c.parent_span_id) SELECT ts AS timestamp, trace_id, span_id, name, duration_nano, parent_span_id, `service.name` FROM (SELECT timestamp AS ts, trace_id, span_id, name, duration_nano, parent_span_id, multiIf(resource.`service.name` IS NOT NULL, resource.`service.name`::String, mapContains(resources_string, 'service.name'), resources_string['service.name'], NULL) AS `service.name` FROM A_DIR_DESC_B) AS t ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Args: []any{"1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "frontend", "%service.name%", "%service.name\":\"frontend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "backend", "%service.name%", "%service.name\":\"backend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 10},
},
expectedErr: nil,
@@ -104,7 +104,7 @@ func TestTraceOperatorStatementBuilder(t *testing.T) {
},
},
expected: qbtypes.Statement{
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_INDIR_DESC_B AS (WITH RECURSIVE up AS (SELECT d.trace_id, d.span_id, d.parent_span_id, 0 AS depth FROM B AS d UNION ALL SELECT p.trace_id, p.span_id, p.parent_span_id, up.depth + 1 FROM all_spans AS p JOIN up ON p.trace_id = up.trace_id AND p.span_id = up.parent_span_id WHERE up.depth < 100) SELECT DISTINCT a.* FROM A AS a GLOBAL INNER JOIN (SELECT DISTINCT trace_id, span_id FROM up WHERE depth > 0 ) AS ancestors ON ancestors.trace_id = a.trace_id AND ancestors.span_id = a.span_id) SELECT timestamp, trace_id, span_id, name, duration_nano, parent_span_id FROM A_INDIR_DESC_B ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_INDIR_DESC_B AS (WITH RECURSIVE up AS (SELECT d.trace_id, d.span_id, d.parent_span_id, 0 AS depth FROM B AS d UNION ALL SELECT p.trace_id, p.span_id, p.parent_span_id, up.depth + 1 FROM all_spans AS p JOIN up ON p.trace_id = up.trace_id AND p.span_id = up.parent_span_id WHERE up.depth < 100) SELECT DISTINCT a.* FROM A AS a GLOBAL INNER JOIN (SELECT DISTINCT trace_id, span_id FROM up WHERE depth > 0 ) AS ancestors ON ancestors.trace_id = a.trace_id AND ancestors.span_id = a.span_id) SELECT ts AS timestamp, trace_id, span_id, name, duration_nano, parent_span_id FROM (SELECT timestamp AS ts, trace_id, span_id, name, duration_nano, parent_span_id FROM A_INDIR_DESC_B) AS t ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Args: []any{"1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "gateway", "%service.name%", "%service.name\":\"gateway%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "database", "%service.name%", "%service.name\":\"database%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 5},
},
expectedErr: nil,
@@ -141,7 +141,7 @@ func TestTraceOperatorStatementBuilder(t *testing.T) {
},
},
expected: qbtypes.Statement{
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_AND_B AS (SELECT l.* FROM A AS l INNER JOIN B AS r ON l.trace_id = r.trace_id) SELECT timestamp, trace_id, span_id, name, duration_nano, parent_span_id FROM A_AND_B ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_AND_B AS (SELECT l.* FROM A AS l INNER JOIN B AS r ON l.trace_id = r.trace_id) SELECT ts AS timestamp, trace_id, span_id, name, duration_nano, parent_span_id FROM (SELECT timestamp AS ts, trace_id, span_id, name, duration_nano, parent_span_id FROM A_AND_B) AS t ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Args: []any{"1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "frontend", "%service.name%", "%service.name\":\"frontend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "backend", "%service.name%", "%service.name\":\"backend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 15},
},
expectedErr: nil,
@@ -178,7 +178,7 @@ func TestTraceOperatorStatementBuilder(t *testing.T) {
},
},
expected: qbtypes.Statement{
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_OR_B AS (SELECT * FROM A UNION DISTINCT SELECT * FROM B) SELECT timestamp, trace_id, span_id, name, duration_nano, parent_span_id FROM A_OR_B ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_OR_B AS (SELECT * FROM A UNION DISTINCT SELECT * FROM B) SELECT ts AS timestamp, trace_id, span_id, name, duration_nano, parent_span_id FROM (SELECT timestamp AS ts, trace_id, span_id, name, duration_nano, parent_span_id FROM A_OR_B) AS t ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Args: []any{"1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "frontend", "%service.name%", "%service.name\":\"frontend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "backend", "%service.name%", "%service.name\":\"backend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 20},
},
expectedErr: nil,
@@ -215,7 +215,7 @@ func TestTraceOperatorStatementBuilder(t *testing.T) {
},
},
expected: qbtypes.Statement{
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_not_B AS (SELECT l.* FROM A AS l WHERE l.trace_id GLOBAL NOT IN (SELECT DISTINCT trace_id FROM B)) SELECT timestamp, trace_id, span_id, name, duration_nano, parent_span_id FROM A_not_B ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_not_B AS (SELECT l.* FROM A AS l WHERE l.trace_id GLOBAL NOT IN (SELECT DISTINCT trace_id FROM B)) SELECT ts AS timestamp, trace_id, span_id, name, duration_nano, parent_span_id FROM (SELECT timestamp AS ts, trace_id, span_id, name, duration_nano, parent_span_id FROM A_not_B) AS t ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Args: []any{"1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "frontend", "%service.name%", "%service.name\":\"frontend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "backend", "%service.name%", "%service.name\":\"backend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 10},
},
expectedErr: nil,
@@ -380,11 +380,72 @@ func TestTraceOperatorStatementBuilder(t *testing.T) {
},
},
expected: qbtypes.Statement{
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_DIR_DESC_B AS (SELECT p.* FROM A AS p INNER JOIN B AS c ON p.trace_id = c.trace_id AND p.span_id = c.parent_span_id), __resource_filter_C AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), C AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_C) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_D AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), D AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_D) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), C_DIR_DESC_D AS (SELECT p.* FROM C AS p INNER JOIN D AS c ON p.trace_id = c.trace_id AND p.span_id = c.parent_span_id), A_DIR_DESC_B_AND_C_DIR_DESC_D AS (SELECT l.* FROM A_DIR_DESC_B AS l INNER JOIN C_DIR_DESC_D AS r ON l.trace_id = r.trace_id) SELECT timestamp, trace_id, span_id, name, duration_nano, parent_span_id FROM A_DIR_DESC_B_AND_C_DIR_DESC_D ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_DIR_DESC_B AS (SELECT p.* FROM A AS p INNER JOIN B AS c ON p.trace_id = c.trace_id AND p.span_id = c.parent_span_id), __resource_filter_C AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), C AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_C) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_D AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), D AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_D) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), C_DIR_DESC_D AS (SELECT p.* FROM C AS p INNER JOIN D AS c ON p.trace_id = c.trace_id AND p.span_id = c.parent_span_id), A_DIR_DESC_B_AND_C_DIR_DESC_D AS (SELECT l.* FROM A_DIR_DESC_B AS l INNER JOIN C_DIR_DESC_D AS r ON l.trace_id = r.trace_id) SELECT ts AS timestamp, trace_id, span_id, name, duration_nano, parent_span_id FROM (SELECT timestamp AS ts, trace_id, span_id, name, duration_nano, parent_span_id FROM A_DIR_DESC_B_AND_C_DIR_DESC_D) AS t ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Args: []any{"1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "frontend", "%service.name%", "%service.name\":\"frontend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "backend", "%service.name%", "%service.name\":\"backend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "auth", "%service.name%", "%service.name\":\"auth%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "database", "%service.name%", "%service.name\":\"database%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 5},
},
expectedErr: nil,
},
{
// order-by field (http.request.method) is not present in SelectFields;
// it must be included in the inner SELECT so the outer ORDER BY can
// reference it by alias, but must NOT appear in the outer SELECT list.
name: "order by field not in select fields",
requestType: qbtypes.RequestTypeRaw,
operator: qbtypes.QueryBuilderTraceOperator{
Expression: "A => B",
SelectFields: []telemetrytypes.TelemetryFieldKey{
{
Name: "service.name",
FieldContext: telemetrytypes.FieldContextResource,
FieldDataType: telemetrytypes.FieldDataTypeString,
},
},
Order: []qbtypes.OrderBy{
{
Key: qbtypes.OrderByKey{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: "http.request.method",
FieldContext: telemetrytypes.FieldContextAttribute,
FieldDataType: telemetrytypes.FieldDataTypeString,
},
},
Direction: qbtypes.OrderDirectionDesc,
},
},
Limit: 10,
},
compositeQuery: &qbtypes.CompositeQuery{
Queries: []qbtypes.QueryEnvelope{
{
Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation]{
Name: "A",
Signal: telemetrytypes.SignalTraces,
Filter: &qbtypes.Filter{
Expression: "service.name = 'frontend'",
},
},
},
{
Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation]{
Name: "B",
Signal: telemetrytypes.SignalTraces,
Filter: &qbtypes.Filter{
Expression: "service.name = 'backend'",
},
},
},
},
},
expected: qbtypes.Statement{
// http.request.method is in the inner SELECT (so ORDER BY can reach it)
// but is absent from the outer SELECT column list — only the ORDER BY clause references it.
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_DIR_DESC_B AS (SELECT p.* FROM A AS p INNER JOIN B AS c ON p.trace_id = c.trace_id AND p.span_id = c.parent_span_id) SELECT ts AS timestamp, trace_id, span_id, name, duration_nano, parent_span_id, `service.name` FROM (SELECT timestamp AS ts, trace_id, span_id, name, duration_nano, parent_span_id, multiIf(resource.`service.name` IS NOT NULL, resource.`service.name`::String, mapContains(resources_string, 'service.name'), resources_string['service.name'], NULL) AS `service.name`, attributes_string['http.request.method'] AS `http.request.method` FROM A_DIR_DESC_B) AS t ORDER BY `http.request.method` desc LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Args: []any{"1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "frontend", "%service.name%", "%service.name\":\"frontend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "backend", "%service.name%", "%service.name\":\"backend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 10},
},
expectedErr: nil,
},
}
fm := NewFieldMapper()

View File

@@ -72,6 +72,7 @@ class TraceOperatorQuery:
return_spans_from: str
limit: int | None = None
order: list[OrderBy] | None = None
select_fields: list[TelemetryFieldKey] | None = None
def to_dict(self) -> dict:
spec: dict[str, Any] = {
@@ -83,6 +84,8 @@ class TraceOperatorQuery:
spec["limit"] = self.limit
if self.order:
spec["order"] = [o.to_dict() if hasattr(o, "to_dict") else o for o in self.order]
if self.select_fields:
spec["selectFields"] = [f.to_dict() for f in self.select_fields]
return {"type": "builder_trace_operator", "spec": spec}

View File

@@ -0,0 +1,285 @@
"""
Integration tests for TraceOperatorQuery (builder_trace_operator) through the
/api/v5/query_range endpoint.
Covers:
1. Basic trace operator (A => B) — returns matched spans from the correct trace.
2. Order by a field absent from selectFields — must not return a server error.
Guards against the ClickHouse NOT_FOUND_COLUMN_IN_BLOCK regression where
ordering by a column absent from an outer SELECT caused a query failure.
"""
from collections.abc import Callable
from datetime import UTC, datetime, timedelta
from http import HTTPStatus
from fixtures import types
from fixtures.auth import USER_ADMIN_EMAIL, USER_ADMIN_PASSWORD
from fixtures.querier import (
OrderBy,
TelemetryFieldKey,
TraceOperatorQuery,
make_query_request,
)
from fixtures.traces import TraceIdGenerator, Traces, TracesKind, TracesStatusCode
def _builder_query(name: str, filter_expr: str, limit: int = 100) -> dict:
return {
"type": "builder_query",
"spec": {
"name": name,
"signal": "traces",
"filter": {"expression": filter_expr},
"limit": limit,
},
}
def test_trace_operator_query_basic(
signoz: types.SigNoz,
create_user_admin: None, # pylint: disable=unused-argument
get_token: Callable[[str, str], str],
insert_traces: Callable[[list[Traces]], None],
) -> None:
"""
Setup:
Insert one parent span and one child span in the same trace.
Tests:
A => B (parent has a direct child) returns the parent span (returnSpansFrom=A)
from the correct trace.
"""
parent_trace_id = TraceIdGenerator.trace_id()
parent_span_id = TraceIdGenerator.span_id()
child_span_id = TraceIdGenerator.span_id()
now = datetime.now(tz=UTC).replace(second=0, microsecond=0)
insert_traces(
[
Traces(
timestamp=now - timedelta(seconds=10),
duration=timedelta(seconds=5),
trace_id=parent_trace_id,
span_id=parent_span_id,
parent_span_id="",
name="parent-op",
kind=TracesKind.SPAN_KIND_SERVER,
status_code=TracesStatusCode.STATUS_CODE_OK,
status_message="",
resources={"service.name": "svc-a"},
attributes={"operation.type": "parent"},
),
Traces(
timestamp=now - timedelta(seconds=9),
duration=timedelta(seconds=2),
trace_id=parent_trace_id,
span_id=child_span_id,
parent_span_id=parent_span_id,
name="child-op",
kind=TracesKind.SPAN_KIND_INTERNAL,
status_code=TracesStatusCode.STATUS_CODE_OK,
status_message="",
resources={"service.name": "svc-a"},
attributes={"operation.type": "child"},
),
]
)
token = get_token(USER_ADMIN_EMAIL, USER_ADMIN_PASSWORD)
start_ms = int((now - timedelta(minutes=5)).timestamp() * 1000)
end_ms = int(now.timestamp() * 1000)
response = make_query_request(
signoz,
token,
start_ms=start_ms,
end_ms=end_ms,
request_type="raw",
queries=[
_builder_query("A", "operation.type = 'parent'"),
_builder_query("B", "operation.type = 'child'"),
TraceOperatorQuery(
name="C",
expression="A => B",
return_spans_from="A",
limit=100,
).to_dict(),
],
)
assert response.status_code == HTTPStatus.OK, response.text
assert response.json()["status"] == "success"
results = response.json()["data"]["data"]["results"]
assert len(results) == 1
rows = results[0].get("rows") or []
assert len(rows) == 1
assert rows[0]["data"]["trace_id"] == parent_trace_id
assert rows[0]["data"]["name"] == "parent-op"
def test_trace_operator_query_order_by_field_not_in_select_fields(
signoz: types.SigNoz,
create_user_admin: None, # pylint: disable=unused-argument
get_token: Callable[[str, str], str],
insert_traces: Callable[[list[Traces]], None],
) -> None:
"""
Setup:
Two traces, each with a grandparent → middle → grandchild chain:
Trace 1: grandparent (svc-a, http.method=POST) → middle → grandchild
Trace 2: grandparent (svc-b, http.method=GET) → middle → grandchild
Tests:
A -> B (indirect descendant) with selectFields=[service.name] and
order=[http.method DESC], where http.method is NOT in selectFields.
1. Query succeeds (no NOT_FOUND_COLUMN_IN_BLOCK error from ClickHouse).
2. Results are actually ordered: POST sorts before GET descending, so
svc-a must come before svc-b.
"""
trace_id_1 = TraceIdGenerator.trace_id()
trace_id_2 = TraceIdGenerator.trace_id()
gp_span_id_1 = TraceIdGenerator.span_id()
mid_span_id_1 = TraceIdGenerator.span_id()
gc_span_id_1 = TraceIdGenerator.span_id()
gp_span_id_2 = TraceIdGenerator.span_id()
mid_span_id_2 = TraceIdGenerator.span_id()
gc_span_id_2 = TraceIdGenerator.span_id()
now = datetime.now(tz=UTC).replace(second=0, microsecond=0)
insert_traces(
[
# Trace 1 — grandparent has http.method=POST (sorts first in DESC)
Traces(
timestamp=now - timedelta(seconds=10),
duration=timedelta(seconds=5),
trace_id=trace_id_1,
span_id=gp_span_id_1,
parent_span_id="",
name="gp-op",
kind=TracesKind.SPAN_KIND_SERVER,
status_code=TracesStatusCode.STATUS_CODE_OK,
status_message="",
resources={"service.name": "svc-a"},
attributes={"operation.type": "grandparent", "http.method": "POST"},
),
Traces(
timestamp=now - timedelta(seconds=9),
duration=timedelta(seconds=3),
trace_id=trace_id_1,
span_id=mid_span_id_1,
parent_span_id=gp_span_id_1,
name="mid-op",
kind=TracesKind.SPAN_KIND_INTERNAL,
status_code=TracesStatusCode.STATUS_CODE_OK,
status_message="",
resources={"service.name": "svc-a"},
attributes={"operation.type": "middle"},
),
Traces(
timestamp=now - timedelta(seconds=8),
duration=timedelta(seconds=1),
trace_id=trace_id_1,
span_id=gc_span_id_1,
parent_span_id=mid_span_id_1,
name="gc-op",
kind=TracesKind.SPAN_KIND_INTERNAL,
status_code=TracesStatusCode.STATUS_CODE_OK,
status_message="",
resources={"service.name": "svc-a"},
attributes={"operation.type": "grandchild"},
),
# Trace 2 — grandparent has http.method=GET (sorts second in DESC)
Traces(
timestamp=now - timedelta(seconds=7),
duration=timedelta(seconds=5),
trace_id=trace_id_2,
span_id=gp_span_id_2,
parent_span_id="",
name="gp-op",
kind=TracesKind.SPAN_KIND_SERVER,
status_code=TracesStatusCode.STATUS_CODE_OK,
status_message="",
resources={"service.name": "svc-b"},
attributes={"operation.type": "grandparent", "http.method": "GET"},
),
Traces(
timestamp=now - timedelta(seconds=6),
duration=timedelta(seconds=3),
trace_id=trace_id_2,
span_id=mid_span_id_2,
parent_span_id=gp_span_id_2,
name="mid-op",
kind=TracesKind.SPAN_KIND_INTERNAL,
status_code=TracesStatusCode.STATUS_CODE_OK,
status_message="",
resources={"service.name": "svc-b"},
attributes={"operation.type": "middle"},
),
Traces(
timestamp=now - timedelta(seconds=5),
duration=timedelta(seconds=1),
trace_id=trace_id_2,
span_id=gc_span_id_2,
parent_span_id=mid_span_id_2,
name="gc-op",
kind=TracesKind.SPAN_KIND_INTERNAL,
status_code=TracesStatusCode.STATUS_CODE_OK,
status_message="",
resources={"service.name": "svc-b"},
attributes={"operation.type": "grandchild"},
),
]
)
token = get_token(USER_ADMIN_EMAIL, USER_ADMIN_PASSWORD)
start_ms = int((now - timedelta(minutes=5)).timestamp() * 1000)
end_ms = int(now.timestamp() * 1000)
response = make_query_request(
signoz,
token,
start_ms=start_ms,
end_ms=end_ms,
request_type="raw",
queries=[
_builder_query("A", "operation.type = 'grandparent'"),
_builder_query("B", "operation.type = 'grandchild'"),
TraceOperatorQuery(
name="C",
expression="A -> B", # indirect descendant
return_spans_from="A",
limit=100,
select_fields=[
TelemetryFieldKey(name="service.name", field_data_type="string", field_context="resource"),
],
order=[
# http.method is intentionally absent from select_fields
OrderBy(
key=TelemetryFieldKey(name="http.method", field_data_type="string", field_context="tag"),
direction="desc",
),
],
).to_dict(),
],
)
assert response.status_code == HTTPStatus.OK, response.text
assert response.json()["status"] == "success"
results = response.json()["data"]["data"]["results"]
assert len(results) == 1
rows = results[0].get("rows") or []
# Both grandparent spans must be returned
assert len(rows) == 2
# Ordering: POST > GET in DESC — svc-a (POST) must come before svc-b (GET)
assert rows[0]["data"]["service.name"] == "svc-a", f"Expected svc-a (POST) first in http.method DESC order, got {rows[0]['data']['service.name']}"
assert rows[1]["data"]["service.name"] == "svc-b", f"Expected svc-b (GET) second in http.method DESC order, got {rows[1]['data']['service.name']}"