Compare commits

...

13 Commits

Author SHA1 Message Date
Piyush Singariya
f6ef56cf22 chore: tests updated 2026-05-19 11:12:22 +05:30
Piyush Singariya
357444c94e Merge branch 'main' into traceop 2026-05-11 20:53:51 +05:30
Piyush Singariya
a8598f3bfa fix: alias all core columns 2026-05-11 20:53:09 +05:30
Piyush Singariya
bca71f9a33 chore: remove comments 2026-05-11 16:04:32 +05:30
Piyush Singariya
c93660357d chore: fmt python 2026-05-11 16:02:18 +05:30
Piyush Singariya
5651e3b7a8 Merge branch 'main' into traceop 2026-05-11 14:28:58 +05:30
Piyush Singariya
cf2cfbc7d4 fix: remove specific of timestamp 2026-05-11 14:27:01 +05:30
Piyush Singariya
a969c38224 chore: fmtlint 2026-05-07 13:53:12 +05:30
Piyush Singariya
b892a0f0a5 chore: file rename 2026-05-07 13:51:22 +05:30
Piyush Singariya
4d47762eba chore: separate e2e test file 2026-05-07 13:50:11 +05:30
Piyush Singariya
77396a0bb3 Merge branch 'main' into traceop 2026-05-07 12:56:59 +05:30
Piyush Singariya
28c05e1bab Merge branch 'main' into traceop 2026-05-04 14:27:19 +05:30
Piyush Singariya
2b9e383994 fix: trace raw export e2e 2026-04-30 15:25:43 +05:30
4 changed files with 615 additions and 46 deletions

View File

@@ -413,28 +413,6 @@ func (b *traceOperatorCTEBuilder) buildFinalQuery(ctx context.Context, selectFro
}
func (b *traceOperatorCTEBuilder) buildListQuery(ctx context.Context, selectFromCTE string) (*qbtypes.Statement, error) {
sb := sqlbuilder.NewSelectBuilder()
// Select core fields
sb.Select(
"timestamp",
"trace_id",
"span_id",
"name",
"duration_nano",
"parent_span_id",
)
selectedFields := map[string]bool{
"timestamp": true,
"trace_id": true,
"span_id": true,
"name": true,
"duration_nano": true,
"parent_span_id": true,
}
// Get keys for selectFields
keySelectors := b.getKeySelectors()
for _, field := range b.operator.SelectFields {
keySelectors = append(keySelectors, &telemetrytypes.FieldKeySelector{
@@ -444,13 +422,38 @@ func (b *traceOperatorCTEBuilder) buildListQuery(ctx context.Context, selectFrom
FieldDataType: field.FieldDataType,
})
}
keys, _, err := b.stmtBuilder.metadataStore.GetKeysMulti(ctx, keySelectors)
if err != nil {
return nil, err
}
// Add selectFields using ColumnExpressionFor since we now have all base table columns
coreFields := []string{"timestamp", "trace_id", "span_id", "name", "duration_nano", "parent_span_id"}
selectedFields := map[string]bool{
"timestamp": true,
"trace_id": true,
"span_id": true,
"name": true,
"duration_nano": true,
"parent_span_id": true,
}
// CH 25.12.5 distributed-analyzer regression (ClickHouse/ClickHouse#103508):
// native columns from a Distributed-table-backed CTE get renamed col → col_0
// in the shard block, making them invisible to the outer SELECT/ORDER BY.
// Fix: rename every core field to a safe alias (_s_<col>) in the inner SELECT
// so the analyzer never sees the original name as an ORDER BY target. The
// outer SELECT re-exposes them under their original names.
innerCoreExprs := make([]string, len(coreFields))
outerCoreExprs := make([]string, len(coreFields))
for i, f := range coreFields {
innerCoreExprs[i] = fmt.Sprintf("%s AS _s_%s", f, f)
outerCoreExprs[i] = fmt.Sprintf("_s_%s AS %s", f, f)
}
innerSB := sqlbuilder.NewSelectBuilder()
innerSB.Select(innerCoreExprs...)
var additionalSelectedFields []string
for _, field := range b.operator.SelectFields {
if selectedFields[field.Name] {
continue
@@ -461,41 +464,56 @@ func (b *traceOperatorCTEBuilder) buildListQuery(ctx context.Context, selectFrom
slog.String("field", field.Name), errors.Attr(err))
continue
}
sb.SelectMore(colExpr)
innerSB.SelectMore(colExpr)
selectedFields[field.Name] = true
additionalSelectedFields = append(additionalSelectedFields, field.Name)
}
sb.From(selectFromCTE)
// Add order by support using ColumnExpressionFor
orderApplied := false
for _, orderBy := range b.operator.Order {
if selectedFields[orderBy.Key.Name] {
continue
}
colExpr, err := b.stmtBuilder.fm.ColumnExpressionFor(ctx, b.start, b.end, &orderBy.Key.TelemetryFieldKey, keys)
if err != nil {
return nil, err
}
sb.OrderBy(fmt.Sprintf("%s %s", colExpr, orderBy.Direction.StringValue()))
orderApplied = true
innerSB.SelectMore(colExpr)
selectedFields[orderBy.Key.Name] = true
}
if !orderApplied {
sb.OrderBy("timestamp DESC")
innerSB.From(selectFromCTE)
innerSQL, innerArgs := innerSB.BuildWithFlavor(sqlbuilder.ClickHouse)
// Outer SELECT: re-exposes core fields under their original names and acts as
// a projection barrier so ORDER BY-only fields do not leak into the result.
outerSB := sqlbuilder.NewSelectBuilder()
outerSB.Select(outerCoreExprs...)
for _, name := range additionalSelectedFields {
outerSB.SelectMore(fmt.Sprintf("`%s`", name))
}
outerSB.From(fmt.Sprintf("(%s) AS t", innerSQL))
if len(b.operator.Order) > 0 {
for _, orderBy := range b.operator.Order {
outerSB.OrderBy(fmt.Sprintf("`%s` %s", orderBy.Key.Name, orderBy.Direction.StringValue()))
}
} else {
outerSB.OrderBy("timestamp DESC")
}
if b.operator.Limit > 0 {
sb.Limit(b.operator.Limit)
outerSB.Limit(b.operator.Limit)
} else {
sb.Limit(100)
outerSB.Limit(100)
}
if b.operator.Offset > 0 {
sb.Offset(b.operator.Offset)
outerSB.Offset(b.operator.Offset)
}
sql, args := sb.BuildWithFlavor(sqlbuilder.ClickHouse)
outerSQL, outerArgs := outerSB.BuildWithFlavor(sqlbuilder.ClickHouse)
return &qbtypes.Statement{
Query: sql,
Args: args,
Query: outerSQL,
Args: append(innerArgs, outerArgs...),
}, nil
}

View File

@@ -67,7 +67,7 @@ func TestTraceOperatorStatementBuilder(t *testing.T) {
},
},
expected: qbtypes.Statement{
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_DIR_DESC_B AS (SELECT p.* FROM A AS p INNER JOIN B AS c ON p.trace_id = c.trace_id AND p.span_id = c.parent_span_id) SELECT timestamp, trace_id, span_id, name, duration_nano, parent_span_id, multiIf(resource.`service.name` IS NOT NULL, resource.`service.name`::String, mapContains(resources_string, 'service.name'), resources_string['service.name'], NULL) AS `service.name` FROM A_DIR_DESC_B ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_DIR_DESC_B AS (SELECT p.* FROM A AS p INNER JOIN B AS c ON p.trace_id = c.trace_id AND p.span_id = c.parent_span_id) SELECT _s_timestamp AS timestamp, _s_trace_id AS trace_id, _s_span_id AS span_id, _s_name AS name, _s_duration_nano AS duration_nano, _s_parent_span_id AS parent_span_id, `service.name` FROM (SELECT timestamp AS _s_timestamp, trace_id AS _s_trace_id, span_id AS _s_span_id, name AS _s_name, duration_nano AS _s_duration_nano, parent_span_id AS _s_parent_span_id, multiIf(resource.`service.name` IS NOT NULL, resource.`service.name`::String, mapContains(resources_string, 'service.name'), resources_string['service.name'], NULL) AS `service.name` FROM A_DIR_DESC_B) AS t ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Args: []any{"1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "frontend", "%service.name%", "%service.name\":\"frontend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "backend", "%service.name%", "%service.name\":\"backend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 10},
},
expectedErr: nil,
@@ -104,7 +104,7 @@ func TestTraceOperatorStatementBuilder(t *testing.T) {
},
},
expected: qbtypes.Statement{
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_INDIR_DESC_B AS (WITH RECURSIVE up AS (SELECT d.trace_id, d.span_id, d.parent_span_id, 0 AS depth FROM B AS d UNION ALL SELECT p.trace_id, p.span_id, p.parent_span_id, up.depth + 1 FROM all_spans AS p JOIN up ON p.trace_id = up.trace_id AND p.span_id = up.parent_span_id WHERE up.depth < 100) SELECT DISTINCT a.* FROM A AS a GLOBAL INNER JOIN (SELECT DISTINCT trace_id, span_id FROM up WHERE depth > 0 ) AS ancestors ON ancestors.trace_id = a.trace_id AND ancestors.span_id = a.span_id) SELECT timestamp, trace_id, span_id, name, duration_nano, parent_span_id FROM A_INDIR_DESC_B ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_INDIR_DESC_B AS (WITH RECURSIVE up AS (SELECT d.trace_id, d.span_id, d.parent_span_id, 0 AS depth FROM B AS d UNION ALL SELECT p.trace_id, p.span_id, p.parent_span_id, up.depth + 1 FROM all_spans AS p JOIN up ON p.trace_id = up.trace_id AND p.span_id = up.parent_span_id WHERE up.depth < 100) SELECT DISTINCT a.* FROM A AS a GLOBAL INNER JOIN (SELECT DISTINCT trace_id, span_id FROM up WHERE depth > 0 ) AS ancestors ON ancestors.trace_id = a.trace_id AND ancestors.span_id = a.span_id) SELECT _s_timestamp AS timestamp, _s_trace_id AS trace_id, _s_span_id AS span_id, _s_name AS name, _s_duration_nano AS duration_nano, _s_parent_span_id AS parent_span_id FROM (SELECT timestamp AS _s_timestamp, trace_id AS _s_trace_id, span_id AS _s_span_id, name AS _s_name, duration_nano AS _s_duration_nano, parent_span_id AS _s_parent_span_id FROM A_INDIR_DESC_B) AS t ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Args: []any{"1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "gateway", "%service.name%", "%service.name\":\"gateway%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "database", "%service.name%", "%service.name\":\"database%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 5},
},
expectedErr: nil,
@@ -141,7 +141,7 @@ func TestTraceOperatorStatementBuilder(t *testing.T) {
},
},
expected: qbtypes.Statement{
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_AND_B AS (SELECT l.* FROM A AS l INNER JOIN B AS r ON l.trace_id = r.trace_id) SELECT timestamp, trace_id, span_id, name, duration_nano, parent_span_id FROM A_AND_B ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_AND_B AS (SELECT l.* FROM A AS l INNER JOIN B AS r ON l.trace_id = r.trace_id) SELECT _s_timestamp AS timestamp, _s_trace_id AS trace_id, _s_span_id AS span_id, _s_name AS name, _s_duration_nano AS duration_nano, _s_parent_span_id AS parent_span_id FROM (SELECT timestamp AS _s_timestamp, trace_id AS _s_trace_id, span_id AS _s_span_id, name AS _s_name, duration_nano AS _s_duration_nano, parent_span_id AS _s_parent_span_id FROM A_AND_B) AS t ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Args: []any{"1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "frontend", "%service.name%", "%service.name\":\"frontend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "backend", "%service.name%", "%service.name\":\"backend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 15},
},
expectedErr: nil,
@@ -178,7 +178,7 @@ func TestTraceOperatorStatementBuilder(t *testing.T) {
},
},
expected: qbtypes.Statement{
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_OR_B AS (SELECT * FROM A UNION DISTINCT SELECT * FROM B) SELECT timestamp, trace_id, span_id, name, duration_nano, parent_span_id FROM A_OR_B ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_OR_B AS (SELECT * FROM A UNION DISTINCT SELECT * FROM B) SELECT _s_timestamp AS timestamp, _s_trace_id AS trace_id, _s_span_id AS span_id, _s_name AS name, _s_duration_nano AS duration_nano, _s_parent_span_id AS parent_span_id FROM (SELECT timestamp AS _s_timestamp, trace_id AS _s_trace_id, span_id AS _s_span_id, name AS _s_name, duration_nano AS _s_duration_nano, parent_span_id AS _s_parent_span_id FROM A_OR_B) AS t ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Args: []any{"1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "frontend", "%service.name%", "%service.name\":\"frontend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "backend", "%service.name%", "%service.name\":\"backend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 20},
},
expectedErr: nil,
@@ -215,7 +215,7 @@ func TestTraceOperatorStatementBuilder(t *testing.T) {
},
},
expected: qbtypes.Statement{
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_not_B AS (SELECT l.* FROM A AS l WHERE l.trace_id GLOBAL NOT IN (SELECT DISTINCT trace_id FROM B)) SELECT timestamp, trace_id, span_id, name, duration_nano, parent_span_id FROM A_not_B ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_not_B AS (SELECT l.* FROM A AS l WHERE l.trace_id GLOBAL NOT IN (SELECT DISTINCT trace_id FROM B)) SELECT _s_timestamp AS timestamp, _s_trace_id AS trace_id, _s_span_id AS span_id, _s_name AS name, _s_duration_nano AS duration_nano, _s_parent_span_id AS parent_span_id FROM (SELECT timestamp AS _s_timestamp, trace_id AS _s_trace_id, span_id AS _s_span_id, name AS _s_name, duration_nano AS _s_duration_nano, parent_span_id AS _s_parent_span_id FROM A_not_B) AS t ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Args: []any{"1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "frontend", "%service.name%", "%service.name\":\"frontend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "backend", "%service.name%", "%service.name\":\"backend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 10},
},
expectedErr: nil,
@@ -380,11 +380,72 @@ func TestTraceOperatorStatementBuilder(t *testing.T) {
},
},
expected: qbtypes.Statement{
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_DIR_DESC_B AS (SELECT p.* FROM A AS p INNER JOIN B AS c ON p.trace_id = c.trace_id AND p.span_id = c.parent_span_id), __resource_filter_C AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), C AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_C) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_D AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), D AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_D) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), C_DIR_DESC_D AS (SELECT p.* FROM C AS p INNER JOIN D AS c ON p.trace_id = c.trace_id AND p.span_id = c.parent_span_id), A_DIR_DESC_B_AND_C_DIR_DESC_D AS (SELECT l.* FROM A_DIR_DESC_B AS l INNER JOIN C_DIR_DESC_D AS r ON l.trace_id = r.trace_id) SELECT timestamp, trace_id, span_id, name, duration_nano, parent_span_id FROM A_DIR_DESC_B_AND_C_DIR_DESC_D ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_DIR_DESC_B AS (SELECT p.* FROM A AS p INNER JOIN B AS c ON p.trace_id = c.trace_id AND p.span_id = c.parent_span_id), __resource_filter_C AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), C AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_C) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_D AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), D AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_D) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), C_DIR_DESC_D AS (SELECT p.* FROM C AS p INNER JOIN D AS c ON p.trace_id = c.trace_id AND p.span_id = c.parent_span_id), A_DIR_DESC_B_AND_C_DIR_DESC_D AS (SELECT l.* FROM A_DIR_DESC_B AS l INNER JOIN C_DIR_DESC_D AS r ON l.trace_id = r.trace_id) SELECT _s_timestamp AS timestamp, _s_trace_id AS trace_id, _s_span_id AS span_id, _s_name AS name, _s_duration_nano AS duration_nano, _s_parent_span_id AS parent_span_id FROM (SELECT timestamp AS _s_timestamp, trace_id AS _s_trace_id, span_id AS _s_span_id, name AS _s_name, duration_nano AS _s_duration_nano, parent_span_id AS _s_parent_span_id FROM A_DIR_DESC_B_AND_C_DIR_DESC_D) AS t ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Args: []any{"1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "frontend", "%service.name%", "%service.name\":\"frontend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "backend", "%service.name%", "%service.name\":\"backend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "auth", "%service.name%", "%service.name\":\"auth%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "database", "%service.name%", "%service.name\":\"database%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 5},
},
expectedErr: nil,
},
{
// order-by field (http.request.method) is not present in SelectFields;
// it must be included in the inner SELECT so the outer ORDER BY can
// reference it by alias, but must NOT appear in the outer SELECT list.
name: "order by field not in select fields",
requestType: qbtypes.RequestTypeRaw,
operator: qbtypes.QueryBuilderTraceOperator{
Expression: "A => B",
SelectFields: []telemetrytypes.TelemetryFieldKey{
{
Name: "service.name",
FieldContext: telemetrytypes.FieldContextResource,
FieldDataType: telemetrytypes.FieldDataTypeString,
},
},
Order: []qbtypes.OrderBy{
{
Key: qbtypes.OrderByKey{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: "http.request.method",
FieldContext: telemetrytypes.FieldContextAttribute,
FieldDataType: telemetrytypes.FieldDataTypeString,
},
},
Direction: qbtypes.OrderDirectionDesc,
},
},
Limit: 10,
},
compositeQuery: &qbtypes.CompositeQuery{
Queries: []qbtypes.QueryEnvelope{
{
Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation]{
Name: "A",
Signal: telemetrytypes.SignalTraces,
Filter: &qbtypes.Filter{
Expression: "service.name = 'frontend'",
},
},
},
{
Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation]{
Name: "B",
Signal: telemetrytypes.SignalTraces,
Filter: &qbtypes.Filter{
Expression: "service.name = 'backend'",
},
},
},
},
},
expected: qbtypes.Statement{
// http.request.method is in the inner SELECT (so ORDER BY can reach it)
// but is absent from the outer SELECT column list — only the ORDER BY clause references it.
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_DIR_DESC_B AS (SELECT p.* FROM A AS p INNER JOIN B AS c ON p.trace_id = c.trace_id AND p.span_id = c.parent_span_id) SELECT _s_timestamp AS timestamp, _s_trace_id AS trace_id, _s_span_id AS span_id, _s_name AS name, _s_duration_nano AS duration_nano, _s_parent_span_id AS parent_span_id, `service.name` FROM (SELECT timestamp AS _s_timestamp, trace_id AS _s_trace_id, span_id AS _s_span_id, name AS _s_name, duration_nano AS _s_duration_nano, parent_span_id AS _s_parent_span_id, multiIf(resource.`service.name` IS NOT NULL, resource.`service.name`::String, mapContains(resources_string, 'service.name'), resources_string['service.name'], NULL) AS `service.name`, attributes_string['http.request.method'] AS `http.request.method` FROM A_DIR_DESC_B) AS t ORDER BY `http.request.method` desc LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Args: []any{"1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "frontend", "%service.name%", "%service.name\":\"frontend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "backend", "%service.name%", "%service.name\":\"backend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 10},
},
expectedErr: nil,
},
}
fm := NewFieldMapper()

View File

@@ -72,6 +72,7 @@ class TraceOperatorQuery:
return_spans_from: str
limit: int | None = None
order: list[OrderBy] | None = None
select_fields: list[TelemetryFieldKey] | None = None
def to_dict(self) -> dict:
spec: dict[str, Any] = {
@@ -83,6 +84,8 @@ class TraceOperatorQuery:
spec["limit"] = self.limit
if self.order:
spec["order"] = [o.to_dict() if hasattr(o, "to_dict") else o for o in self.order]
if self.select_fields:
spec["selectFields"] = [f.to_dict() for f in self.select_fields]
return {"type": "builder_trace_operator", "spec": spec}

View File

@@ -0,0 +1,487 @@
"""
Integration tests for TraceOperatorQuery (builder_trace_operator) through the
/api/v5/query_range endpoint.
Covers:
1. Basic trace operator (A => B) — returns matched spans from the correct trace.
2. Order by a field absent from selectFields — must not return a server error.
Guards against the ClickHouse NOT_FOUND_COLUMN_IN_BLOCK regression where
ordering by a column absent from an outer SELECT caused a query failure.
3. Expression operators (=>, ->, &&, ||, A NOT B) with and without returnSpansFrom.
"""
import pytest
from collections.abc import Callable
from dataclasses import dataclass, field
from datetime import UTC, datetime, timedelta
from http import HTTPStatus
from fixtures import types
from fixtures.auth import USER_ADMIN_EMAIL, USER_ADMIN_PASSWORD
from fixtures.querier import (
OrderBy,
TelemetryFieldKey,
TraceOperatorQuery,
make_query_request,
)
from fixtures.traces import TraceIdGenerator, Traces, TracesKind, TracesStatusCode
# ---------------------------------------------------------------------------
# Shared helpers
# ---------------------------------------------------------------------------
@dataclass
class _SpanDef:
"""Span spec relative to 'now'. parent_idx=-1 for root spans."""
name: str
service: str
op_type: str
duration_s: int
time_offset_s: int
parent_idx: int = -1
extra_attrs: dict = field(default_factory=dict)
def _build_trace(now: datetime, trace_id: str, spans: list[_SpanDef]) -> list[Traces]:
span_ids = [TraceIdGenerator.span_id() for _ in spans]
result = []
for i, (defn, span_id) in enumerate(zip(spans, span_ids)):
parent_id = "" if defn.parent_idx < 0 else span_ids[defn.parent_idx]
kind = TracesKind.SPAN_KIND_SERVER if defn.parent_idx < 0 else TracesKind.SPAN_KIND_INTERNAL
result.append(
Traces(
timestamp=now - timedelta(seconds=defn.time_offset_s),
duration=timedelta(seconds=defn.duration_s),
trace_id=trace_id,
span_id=span_id,
parent_span_id=parent_id,
name=defn.name,
kind=kind,
status_code=TracesStatusCode.STATUS_CODE_OK,
status_message="",
resources={"service.name": defn.service},
attributes={"operation.type": defn.op_type, **defn.extra_attrs},
)
)
return result
def _builder_query(name: str, filter_expr: str, limit: int = 100) -> dict:
return {
"type": "builder_query",
"spec": {
"name": name,
"signal": "traces",
"filter": {"expression": filter_expr},
"limit": limit,
},
}
# ---------------------------------------------------------------------------
# Order-by variants
# ---------------------------------------------------------------------------
# Each case uses a unique op_type prefix so spans inserted by earlier
# parametrize runs (shared DB session) are never picked up by later ones.
@dataclass
class _OrderByCase:
id: str
trace1_spans: list[_SpanDef]
trace2_spans: list[_SpanDef]
filter_a: str
filter_b: str
expression: str
select_fields: list[TelemetryFieldKey] | None
order: list[OrderBy]
expected_rows: list[dict] # ordered; each dict is a partial match against row data
_ORDER_BY_CASES: list[_OrderByCase] = [
# Order by attribute absent from selectFields — NOT_FOUND_COLUMN_IN_BLOCK regression guard.
_OrderByCase(
id="field_not_in_select",
trace1_spans=[
_SpanDef("fnis-gp", "svc-a", "fnis-grandparent", 5, 10, extra_attrs={"http.method": "POST"}),
_SpanDef("fnis-mid", "svc-a", "fnis-middle", 3, 9, parent_idx=0),
_SpanDef("fnis-gc", "svc-a", "fnis-grandchild", 1, 8, parent_idx=1),
],
trace2_spans=[
_SpanDef("fnis-gp", "svc-b", "fnis-grandparent", 5, 7, extra_attrs={"http.method": "GET"}),
_SpanDef("fnis-mid", "svc-b", "fnis-middle", 3, 6, parent_idx=0),
_SpanDef("fnis-gc", "svc-b", "fnis-grandchild", 1, 5, parent_idx=1),
],
filter_a="operation.type = 'fnis-grandparent'",
filter_b="operation.type = 'fnis-grandchild'",
expression="A -> B",
select_fields=[TelemetryFieldKey(name="service.name", field_data_type="string", field_context="resource")],
order=[OrderBy(
key=TelemetryFieldKey(name="http.method", field_data_type="string", field_context="attribute"),
direction="desc",
)],
# POST > GET in DESC → svc-a first
expected_rows=[{"service.name": "svc-a"}, {"service.name": "svc-b"}],
),
# Order by a core span field (duration_nano) with no explicit selectFields.
_OrderByCase(
id="core_span_field",
trace1_spans=[
_SpanDef("csf-parent-long", "svc-long", "csf-parent", 5, 10),
_SpanDef("csf-child-long", "svc-long", "csf-child", 1, 9, parent_idx=0),
],
trace2_spans=[
_SpanDef("csf-parent-short", "svc-short", "csf-parent", 1, 8),
_SpanDef("csf-child-short", "svc-short", "csf-child", 1, 7, parent_idx=0),
],
filter_a="operation.type = 'csf-parent'",
filter_b="operation.type = 'csf-child'",
expression="A => B",
select_fields=None,
order=[OrderBy(key=TelemetryFieldKey(name="duration_nano", field_context="span"), direction="desc")],
# 5 s parent first, 1 s parent second
expected_rows=[{"name": "csf-parent-long"}, {"name": "csf-parent-short"}],
),
# Order by a non-core attribute that IS in selectFields — checks ordering and field presence.
_OrderByCase(
id="non_core_field_in_select",
trace1_spans=[
_SpanDef("ncis-parent-post", "svc-post", "ncis-parent", 3, 10, extra_attrs={"http.method": "POST"}),
_SpanDef("ncis-child-post", "svc-post", "ncis-child", 1, 9, parent_idx=0),
],
trace2_spans=[
_SpanDef("ncis-parent-get", "svc-get", "ncis-parent", 3, 8, extra_attrs={"http.method": "GET"}),
_SpanDef("ncis-child-get", "svc-get", "ncis-child", 1, 7, parent_idx=0),
],
filter_a="operation.type = 'ncis-parent'",
filter_b="operation.type = 'ncis-child'",
expression="A => B",
select_fields=[TelemetryFieldKey(name="http.method", field_data_type="string", field_context="attribute")],
order=[OrderBy(
key=TelemetryFieldKey(name="http.method", field_data_type="string", field_context="attribute"),
direction="desc",
)],
# POST > GET in DESC; http.method must appear in both rows (it is in selectFields)
expected_rows=[{"http.method": "POST"}, {"http.method": "GET"}],
),
]
@pytest.mark.parametrize("case", [pytest.param(c, id=c.id) for c in _ORDER_BY_CASES])
def test_trace_operator_query_order_by(
signoz: types.SigNoz,
create_user_admin: None, # pylint: disable=unused-argument
get_token: Callable[[str, str], str],
insert_traces: Callable[[list[Traces]], None],
case: _OrderByCase,
) -> None:
"""
Verifies that trace operator queries honour the order-by clause.
Cases:
- field_not_in_select: order by attribute absent from selectFields
(NOT_FOUND_COLUMN_IN_BLOCK regression guard).
- core_span_field: order by duration_nano with no explicit selectFields.
- non_core_field_in_select: order by attribute present in selectFields.
"""
now = datetime.now(tz=UTC).replace(second=0, microsecond=0)
trace_id_1 = TraceIdGenerator.trace_id()
trace_id_2 = TraceIdGenerator.trace_id()
insert_traces(
_build_trace(now, trace_id_1, case.trace1_spans)
+ _build_trace(now, trace_id_2, case.trace2_spans)
+ _build_trace(now, TraceIdGenerator.trace_id(), [_SpanDef("noise-span", "svc-noise", "noise-op", 1, 2)])
)
token = get_token(USER_ADMIN_EMAIL, USER_ADMIN_PASSWORD)
start_ms = int((now - timedelta(minutes=5)).timestamp() * 1000)
end_ms = int(now.timestamp() * 1000)
response = make_query_request(
signoz,
token,
start_ms=start_ms,
end_ms=end_ms,
request_type="raw",
queries=[
_builder_query("A", case.filter_a),
_builder_query("B", case.filter_b),
TraceOperatorQuery(
name="C",
expression=case.expression,
return_spans_from="A",
limit=100,
select_fields=case.select_fields,
order=case.order,
).to_dict(),
],
)
assert response.status_code == HTTPStatus.OK, response.text
assert response.json()["status"] == "success"
results = response.json()["data"]["data"]["results"]
assert len(results) == 1
rows = results[0].get("rows") or []
assert len(rows) == len(case.expected_rows)
for i, (row, expected) in enumerate(zip(rows, case.expected_rows)):
for key, value in expected.items():
assert row["data"].get(key) == value, (
f"[{case.id}] row {i}: expected {key}={value!r}, got {row['data'].get(key)!r}"
)
# ---------------------------------------------------------------------------
# Operator × returnSpansFrom matrix
# ---------------------------------------------------------------------------
# Each case uses a unique op_type prefix so DB rows from earlier parametrize
# runs never contaminate later ones (the session-level ClickHouse is shared).
#
# Operators tested: => -> && || (A NOT B)
# For each operator two cases:
# "default" — returnSpansFrom="" → result comes from the expression's root CTE
# "return_A" — returnSpansFrom="A" → result comes from the A sub-query CTE
#
# Root-CTE semantics:
# => A spans that have a DIRECT child matching B
# -> A spans that are ancestors of any B span
# && A spans from traces that also contain a B span
# || UNION of A spans and B spans
# A NOT B A spans from traces that contain no B span
@dataclass
class _ExprCase:
id: str
traces: list[list[_SpanDef]] # one inner list per trace
filter_a: str
filter_b: str
expression: str
return_spans_from: str # "" → use expression root CTE
expected_names: set[str] # span.name values that must appear (exact set)
_EXPR_CASES: list[_ExprCase] = [
# ── A => B (direct child) ────────────────────────────────────────────────
# "default": root CTE = A spans that have a direct B child
_ExprCase(
id="direct_child_default",
traces=[
[
_SpanDef("dcd-root", "svc-dcd-a", "dcd-root", 5, 10),
_SpanDef("dcd-leaf", "svc-dcd-a", "dcd-leaf", 2, 9, parent_idx=0),
],
[_SpanDef("dcd-root-only", "svc-dcd-b", "dcd-root", 2, 7)], # A but no B child
],
filter_a="operation.type = 'dcd-root'",
filter_b="operation.type = 'dcd-leaf'",
expression="A => B",
return_spans_from="",
expected_names={"dcd-root"}, # only the root that HAS a direct child
),
# "return_A": returns ALL A spans, bypassing the expression filter
_ExprCase(
id="direct_child_return_A",
traces=[
[
_SpanDef("dca-root", "svc-dca-a", "dca-root", 5, 10),
_SpanDef("dca-leaf", "svc-dca-a", "dca-leaf", 2, 9, parent_idx=0),
],
[_SpanDef("dca-root-only", "svc-dca-b", "dca-root", 2, 7)],
],
filter_a="operation.type = 'dca-root'",
filter_b="operation.type = 'dca-leaf'",
expression="A => B",
return_spans_from="A",
expected_names={"dca-root", "dca-root-only"},
),
# ── A -> B (indirect descendant) ─────────────────────────────────────────
_ExprCase(
id="indirect_descendant_default",
traces=[
[
_SpanDef("idd-gp", "svc-idd-a", "idd-gp", 5, 10),
_SpanDef("idd-mid", "svc-idd-a", "idd-mid", 3, 9, parent_idx=0),
_SpanDef("idd-gc", "svc-idd-a", "idd-gc", 1, 8, parent_idx=1),
],
[_SpanDef("idd-gp-only", "svc-idd-b", "idd-gp", 2, 7)], # A but no B descendant
],
filter_a="operation.type = 'idd-gp'",
filter_b="operation.type = 'idd-gc'",
expression="A -> B",
return_spans_from="",
expected_names={"idd-gp"},
),
_ExprCase(
id="indirect_descendant_return_A",
traces=[
[
_SpanDef("ida-gp", "svc-ida-a", "ida-gp", 5, 10),
_SpanDef("ida-mid", "svc-ida-a", "ida-mid", 3, 9, parent_idx=0),
_SpanDef("ida-gc", "svc-ida-a", "ida-gc", 1, 8, parent_idx=1),
],
[_SpanDef("ida-gp-only", "svc-ida-b", "ida-gp", 2, 7)],
],
filter_a="operation.type = 'ida-gp'",
filter_b="operation.type = 'ida-gc'",
expression="A -> B",
return_spans_from="A",
expected_names={"ida-gp", "ida-gp-only"},
),
# ── A && B ────────────────────────────────────────────────────────────────
_ExprCase(
id="and_default",
traces=[
[
_SpanDef("and-root", "svc-and-a", "and-root", 5, 10),
_SpanDef("and-leaf", "svc-and-a", "and-leaf", 2, 9, parent_idx=0),
],
[_SpanDef("and-root-only", "svc-and-b", "and-root", 2, 7)], # A but no B in trace
],
filter_a="operation.type = 'and-root'",
filter_b="operation.type = 'and-leaf'",
expression="A && B",
return_spans_from="",
expected_names={"and-root"}, # A from traces that also contain B
),
_ExprCase(
id="and_return_A",
traces=[
[
_SpanDef("ana-root", "svc-ana-a", "ana-root", 5, 10),
_SpanDef("ana-leaf", "svc-ana-a", "ana-leaf", 2, 9, parent_idx=0),
],
[_SpanDef("ana-root-only", "svc-ana-b", "ana-root", 2, 7)],
],
filter_a="operation.type = 'ana-root'",
filter_b="operation.type = 'ana-leaf'",
expression="A && B",
return_spans_from="A",
expected_names={"ana-root", "ana-root-only"},
),
# ── A || B ────────────────────────────────────────────────────────────────
_ExprCase(
id="or_default",
traces=[
[_SpanDef("ord-a-span", "svc-ord-a", "ord-a", 5, 10)],
[_SpanDef("ord-b-span", "svc-ord-b", "ord-b", 2, 7)],
],
filter_a="operation.type = 'ord-a'",
filter_b="operation.type = 'ord-b'",
expression="A || B",
return_spans_from="",
expected_names={"ord-a-span", "ord-b-span"}, # UNION of both A and B
),
_ExprCase(
id="or_return_A",
traces=[
[_SpanDef("ora-a-span", "svc-ora-a", "ora-a", 5, 10)],
[_SpanDef("ora-b-span", "svc-ora-b", "ora-b", 2, 7)],
],
filter_a="operation.type = 'ora-a'",
filter_b="operation.type = 'ora-b'",
expression="A || B",
return_spans_from="A",
expected_names={"ora-a-span"},
),
# ── A NOT B (binary not) ──────────────────────────────────────────────────
# Unary NOT A is skipped: its root CTE reads from all_spans (unbounded by
# filter), making row counts non-deterministic across a shared test session.
_ExprCase(
id="not_binary_default",
traces=[
[
_SpanDef("nbd-root-with-child", "svc-nbd-a", "nbd-root", 5, 10),
_SpanDef("nbd-child", "svc-nbd-a", "nbd-child", 2, 9, parent_idx=0),
],
[_SpanDef("nbd-root-no-child", "svc-nbd-b", "nbd-root", 2, 7)], # A, no B
],
filter_a="operation.type = 'nbd-root'",
filter_b="operation.type = 'nbd-child'",
expression="A NOT B",
return_spans_from="",
expected_names={"nbd-root-no-child"}, # A from traces that have no B
),
_ExprCase(
id="not_binary_return_A",
traces=[
[
_SpanDef("nba-root-with-child", "svc-nba-a", "nba-root", 5, 10),
_SpanDef("nba-child", "svc-nba-a", "nba-child", 2, 9, parent_idx=0),
],
[_SpanDef("nba-root-no-child", "svc-nba-b", "nba-root", 2, 7)],
],
filter_a="operation.type = 'nba-root'",
filter_b="operation.type = 'nba-child'",
expression="A NOT B",
return_spans_from="A",
expected_names={"nba-root-with-child", "nba-root-no-child"}, # ALL A spans
),
]
@pytest.mark.parametrize("case", [pytest.param(c, id=c.id) for c in _EXPR_CASES])
def test_trace_operator_expressions(
signoz: types.SigNoz,
create_user_admin: None, # pylint: disable=unused-argument
get_token: Callable[[str, str], str],
insert_traces: Callable[[list[Traces]], None],
case: _ExprCase,
) -> None:
"""
Matrix of expression operators × returnSpansFrom settings.
For each operator (=>, ->, &&, ||, A NOT B) two cases verify:
- default (returnSpansFrom=""): result comes from the expression's root CTE,
so only spans satisfying the full structural predicate are returned.
- return_A (returnSpansFrom="A"): result comes from the raw A sub-query CTE,
bypassing the structural filter, returning ALL spans matching filter A.
"""
now = datetime.now(tz=UTC).replace(second=0, microsecond=0)
all_spans: list[Traces] = []
for span_defs in case.traces:
all_spans.extend(_build_trace(now, TraceIdGenerator.trace_id(), span_defs))
# Noise: op_type "noise-op" matches no filter in any case; surfacing it would
# mean a filter regression, which the set-equality assertion below would catch.
all_spans.extend(_build_trace(now, TraceIdGenerator.trace_id(), [_SpanDef("noise-span", "svc-noise", "noise-op", 1, 2)]))
insert_traces(all_spans)
token = get_token(USER_ADMIN_EMAIL, USER_ADMIN_PASSWORD)
start_ms = int((now - timedelta(minutes=5)).timestamp() * 1000)
end_ms = int(now.timestamp() * 1000)
response = make_query_request(
signoz,
token,
start_ms=start_ms,
end_ms=end_ms,
request_type="raw",
queries=[
_builder_query("A", case.filter_a),
_builder_query("B", case.filter_b),
TraceOperatorQuery(
name="C",
expression=case.expression,
return_spans_from=case.return_spans_from,
limit=100,
).to_dict(),
],
)
assert response.status_code == HTTPStatus.OK, response.text
assert response.json()["status"] == "success"
results = response.json()["data"]["data"]["results"]
assert len(results) == 1
rows = results[0].get("rows") or []
actual_names = {row["data"]["name"] for row in rows}
assert actual_names == case.expected_names, (
f"[{case.id}] expected spans {case.expected_names}, got {actual_names}"
)