Compare commits

..

3 Commits

Author SHA1 Message Date
Nikhil Soni
1d8a7429c5 feat: support promoted path in trace attributes queries 2026-06-23 23:36:22 +05:30
Nikhil Soni
9bb3f86a8a chore: fix formating 2026-06-23 23:33:27 +05:30
Nikhil Soni
d3a3b61716 feat: use json trace attribute for select/group by clause 2026-06-23 18:48:04 +05:30
31 changed files with 364 additions and 1444 deletions

View File

@@ -3,17 +3,16 @@ package flagger
import "github.com/SigNoz/signoz/pkg/types/featuretypes"
var (
FeatureUseSpanMetrics = featuretypes.MustNewName("use_span_metrics")
FeatureKafkaSpanEval = featuretypes.MustNewName("kafka_span_eval")
FeatureHideRootUser = featuretypes.MustNewName("hide_root_user")
FeatureGetMetersFromZeus = featuretypes.MustNewName("get_meters_from_zeus")
FeaturePutMetersInZeus = featuretypes.MustNewName("put_meters_in_zeus")
FeatureUseMeterReporter = featuretypes.MustNewName("use_meter_reporter")
FeatureUseJSONBody = featuretypes.MustNewName("use_json_body")
FeatureUseFineGrainedAuthz = featuretypes.MustNewName("use_fine_grained_authz")
FeatureUseDashboardV2 = featuretypes.MustNewName("use_dashboard_v2")
FeatureEnableAIObservability = featuretypes.MustNewName("enable_ai_observability")
FeatureEnableMetricsReduction = featuretypes.MustNewName("enable_metrics_reduction")
FeatureUseSpanMetrics = featuretypes.MustNewName("use_span_metrics")
FeatureKafkaSpanEval = featuretypes.MustNewName("kafka_span_eval")
FeatureHideRootUser = featuretypes.MustNewName("hide_root_user")
FeatureGetMetersFromZeus = featuretypes.MustNewName("get_meters_from_zeus")
FeaturePutMetersInZeus = featuretypes.MustNewName("put_meters_in_zeus")
FeatureUseMeterReporter = featuretypes.MustNewName("use_meter_reporter")
FeatureUseJSONBody = featuretypes.MustNewName("use_json_body")
FeatureUseFineGrainedAuthz = featuretypes.MustNewName("use_fine_grained_authz")
FeatureUseDashboardV2 = featuretypes.MustNewName("use_dashboard_v2")
FeatureEnableAIObservability = featuretypes.MustNewName("enable_ai_observability")
)
func MustNewRegistry() featuretypes.Registry {
@@ -98,14 +97,6 @@ func MustNewRegistry() featuretypes.Registry {
DefaultVariant: featuretypes.MustNewName("disabled"),
Variants: featuretypes.NewBooleanVariants(),
},
&featuretypes.Feature{
Name: FeatureEnableMetricsReduction,
Kind: featuretypes.KindBoolean,
Stage: featuretypes.StageExperimental,
Description: "Controls whether metrics cardinality reduction (buffer/reduced tables) is read by the querier",
DefaultVariant: featuretypes.MustNewName("disabled"),
Variants: featuretypes.NewBooleanVariants(),
},
)
if err != nil {
panic(err)

View File

@@ -341,12 +341,12 @@ func alignedMetricWindow(startMs, endMs int64) (
}
tsAdjustedStartMs, _, distributedTSTable, localTSTable := telemetrymetrics.WhichTSTableToUse(
samplesAdjustedStartMs, flooredEndMs, false, nil,
samplesAdjustedStartMs, flooredEndMs, nil,
)
distributedSamplesTable, localSamplesTable := telemetrymetrics.WhichSamplesTableToUse(
samplesAdjustedStartMs, flooredEndMs,
metrictypes.UnspecifiedType, metrictypes.TimeAggregationUnspecified, false, nil,
metrictypes.UnspecifiedType, metrictypes.TimeAggregationUnspecified, nil,
)
return samplesAdjustedStartMs, flooredEndMs, tsAdjustedStartMs, distributedTSTable, localTSTable, distributedSamplesTable, localSamplesTable

View File

@@ -141,7 +141,7 @@ func (m *module) listMetrics(ctx context.Context, orgID valuer.UUID, params *met
sb.Select("DISTINCT metric_name")
if params.Start != nil && params.End != nil {
start, end, distributedTsTable, _ := telemetrymetrics.WhichTSTableToUse(uint64(*params.Start), uint64(*params.End), false, nil)
start, end, distributedTsTable, _ := telemetrymetrics.WhichTSTableToUse(uint64(*params.Start), uint64(*params.End), nil)
sb.From(fmt.Sprintf("%s.%s", telemetrymetrics.DBName, distributedTsTable))
sb.Where(sb.Between("unix_milli", start, end))
} else {
@@ -527,7 +527,7 @@ func (m *module) InspectMetrics(
return nil, err
}
tsStart, _, tsTable, _ := telemetrymetrics.WhichTSTableToUse(start, end, false, nil)
tsStart, _, tsTable, _ := telemetrymetrics.WhichTSTableToUse(start, end, nil)
tsSb := sqlbuilder.NewSelectBuilder()
tsSb.Select("fingerprint", "labels")
tsSb.From(fmt.Sprintf("%s.%s", telemetrymetrics.DBName, tsTable))
@@ -971,8 +971,8 @@ func (m *module) fetchMetricsStatsWithSamples(
}
}
start, end, distributedTsTable, localTsTable := telemetrymetrics.WhichTSTableToUse(uint64(req.Start), uint64(req.End), false, nil)
distributedSamplesTable, _ := telemetrymetrics.WhichSamplesTableToUse(uint64(req.Start), uint64(req.End), metrictypes.UnspecifiedType, metrictypes.TimeAggregationUnspecified, false, nil)
start, end, distributedTsTable, localTsTable := telemetrymetrics.WhichTSTableToUse(uint64(req.Start), uint64(req.End), nil)
distributedSamplesTable, _ := telemetrymetrics.WhichSamplesTableToUse(uint64(req.Start), uint64(req.End), metrictypes.UnspecifiedType, metrictypes.TimeAggregationUnspecified, nil)
countExp := telemetrymetrics.CountExpressionForSamplesTable(distributedSamplesTable)
// Timeseries counts per metric
@@ -1100,7 +1100,7 @@ func (m *module) computeTimeseriesTreemap(ctx context.Context, req *metricsexplo
}
}
start, end, distributedTsTable, _ := telemetrymetrics.WhichTSTableToUse(uint64(req.Start), uint64(req.End), false, nil)
start, end, distributedTsTable, _ := telemetrymetrics.WhichTSTableToUse(uint64(req.Start), uint64(req.End), nil)
totalTSBuilder := sqlbuilder.NewSelectBuilder()
totalTSBuilder.Select("uniq(fingerprint) AS total_time_series")
@@ -1176,8 +1176,8 @@ func (m *module) computeSamplesTreemap(ctx context.Context, req *metricsexplorer
}
}
start, end, distributedTsTable, localTsTable := telemetrymetrics.WhichTSTableToUse(uint64(req.Start), uint64(req.End), false, nil)
distributedSamplesTable, _ := telemetrymetrics.WhichSamplesTableToUse(uint64(req.Start), uint64(req.End), metrictypes.UnspecifiedType, metrictypes.TimeAggregationUnspecified, false, nil)
start, end, distributedTsTable, localTsTable := telemetrymetrics.WhichTSTableToUse(uint64(req.Start), uint64(req.End), nil)
distributedSamplesTable, _ := telemetrymetrics.WhichSamplesTableToUse(uint64(req.Start), uint64(req.End), metrictypes.UnspecifiedType, metrictypes.TimeAggregationUnspecified, nil)
countExp := telemetrymetrics.CountExpressionForSamplesTable(distributedSamplesTable)
candidateLimit := req.Limit + 50

View File

@@ -91,22 +91,13 @@ func (q *builderQuery[T]) Fingerprint() string {
if a.ComparisonSpaceAggregationParam != nil {
spaceAggParamStr = a.ComparisonSpaceAggregationParam.StringValue()
}
part := fmt.Sprintf("%s:%s:%s:%s:%s",
aggParts = append(aggParts, fmt.Sprintf("%s:%s:%s:%s:%s",
a.MetricName,
a.Temporality.StringValue(),
a.TimeAggregation.StringValue(),
a.SpaceAggregation.StringValue(),
spaceAggParamStr,
)
if a.Reduced {
oneDay := uint64(24 * time.Hour.Milliseconds())
route := "reduced"
if q.toMS-q.fromMS < oneDay && q.fromMS >= uint64(time.Now().UnixMilli())-oneDay {
route = "buffer"
}
part += ":" + route
}
aggParts = append(aggParts, part)
))
}
}
parts = append(parts, fmt.Sprintf("aggs=[%s]", strings.Join(aggParts, ",")))
@@ -372,15 +363,6 @@ func (q *builderQuery[T]) executeWithContext(ctx context.Context, query string,
return nil, err
}
// TODO: This should move to readAsRaw function in consume.go but for now we are keeping it here since it's only relevant for traces
if q.spec.Signal == telemetrytypes.SignalTraces {
if raw, ok := payload.(*qbtypes.RawData); ok {
for _, rr := range raw.Rows {
mergeSpanAttributeColumns(rr.Data)
}
}
}
return &qbtypes.Result{
Type: q.kind,
Value: payload,

View File

@@ -14,7 +14,6 @@ import (
"github.com/ClickHouse/clickhouse-go/v2/lib/driver"
"github.com/SigNoz/signoz/pkg/errors"
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
"github.com/SigNoz/signoz/pkg/types/spantypes"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
"github.com/bytedance/sonic"
)
@@ -453,53 +452,6 @@ func readAsRaw(rows driver.Rows, queryName string) (*qbtypes.RawData, error) {
}, nil
}
// mergeSpanAttributeColumns merges (attributes_string, attributes_number, attributes_bool, resources_string) into
// unified "attributes" and "resource" keys, and parses the stringified `events`
// and `links` columns into structured slices. Raw DB columns are removed.
func mergeSpanAttributeColumns(data map[string]any) {
attrStr, hasStr := data["attributes_string"]
attrNum, hasNum := data["attributes_number"]
attrBool, hasBool := data["attributes_bool"]
// todo(nitya): move to resource json
resStr, hasRes := data["resources_string"]
if hasStr || hasNum || hasBool || hasRes {
attributes := make(map[string]any)
if m, ok := attrStr.(map[string]string); ok {
for k, v := range m {
attributes[k] = v
}
}
if m, ok := attrNum.(map[string]float64); ok {
for k, v := range m {
attributes[k] = v
}
}
if m, ok := attrBool.(map[string]bool); ok {
for k, v := range m {
attributes[k] = v
}
}
delete(data, "attributes_string")
delete(data, "attributes_number")
delete(data, "attributes_bool")
data["attributes"] = attributes
resource := map[string]string{}
if m, ok := resStr.(map[string]string); ok {
resource = m
}
data["resource"] = resource
delete(data, "resources_string")
}
if raw, ok := data["events"]; ok {
data["events"] = spantypes.ParseEvents(raw)
}
if raw, ok := data["links"]; ok {
data["links"] = spantypes.ParseLinks(raw)
}
}
// numericAsFloat converts numeric types to float64 efficiently.
func numericAsFloat(v any) float64 {
switch x := v.(type) {

View File

@@ -1,92 +0,0 @@
package querier
import (
"reflect"
"testing"
"github.com/SigNoz/signoz/pkg/types/spantypes"
)
func TestMergeSpanAttributeColumns_ParsesEventsAndLinks(t *testing.T) {
data := map[string]any{
"attributes_string": map[string]string{"http.method": "GET"},
"attributes_number": map[string]float64{"http.status_code": 200},
"attributes_bool": map[string]bool{"is_root": true},
"resources_string": map[string]string{"service.name": "api"},
"events": []string{
`{"name":"request_received","timeUnixNano":1778489782759245000,"attributeMap":{"http.method":"GET","http.route":"/api/chat"}}`,
`{"name":"cache_lookup","timeUnixNano":1778489782811697000,"attributeMap":{"cache.hit":"true","cache.key":"user:123:prompt"}}`,
},
"links": `[{"traceId":"abc","spanId":"123","refType":"CHILD_OF"},{"traceId":"def","spanId":"456","refType":"FOLLOWS_FROM"}]`,
}
mergeSpanAttributeColumns(data)
attrs, ok := data["attributes"].(map[string]any)
if !ok {
t.Fatalf("expected attributes to be map[string]any, got %T", data["attributes"])
}
if attrs["http.method"] != "GET" || attrs["http.status_code"] != float64(200) || attrs["is_root"] != true {
t.Fatalf("attributes not merged correctly: %#v", attrs)
}
res, ok := data["resource"].(map[string]string)
if !ok || res["service.name"] != "api" {
t.Fatalf("resource not set correctly: %#v", data["resource"])
}
for _, removed := range []string{"attributes_string", "attributes_number", "attributes_bool", "resources_string"} {
if _, present := data[removed]; present {
t.Fatalf("expected %s to be removed", removed)
}
}
events, ok := data["events"].([]spantypes.EventV2)
if !ok {
t.Fatalf("expected events to be []spantypes.EventV2, got %T", data["events"])
}
wantEvents := []spantypes.EventV2{
{
Name: "request_received",
TimeUnixNano: 1778489782759245000,
Attributes: map[string]any{"http.method": "GET", "http.route": "/api/chat"},
IsError: false,
},
{
Name: "cache_lookup",
TimeUnixNano: 1778489782811697000,
Attributes: map[string]any{"cache.hit": "true", "cache.key": "user:123:prompt"},
},
}
if !reflect.DeepEqual(events, wantEvents) {
t.Fatalf("events parsed incorrectly:\n got: %#v\nwant: %#v", events, wantEvents)
}
links, ok := data["links"].([]spantypes.Link)
if !ok {
t.Fatalf("expected links to be []spantypes.Link, got %T", data["links"])
}
wantLinks := []spantypes.Link{
{TraceID: "abc", SpanID: "123"},
{TraceID: "def", SpanID: "456"},
}
if !reflect.DeepEqual(links, wantLinks) {
t.Fatalf("links parsed incorrectly:\n got: %#v\nwant: %#v", links, wantLinks)
}
}
func TestMergeSpanAttributeColumns_EmptyEventsAndLinks(t *testing.T) {
data := map[string]any{
"events": []string{},
"links": "[]",
}
mergeSpanAttributeColumns(data)
if events, ok := data["events"].([]spantypes.EventV2); !ok || len(events) != 0 {
t.Fatalf("expected empty []spantypes.EventV2, got %#v", data["events"])
}
if links, ok := data["links"].([]spantypes.Link); !ok || len(links) != 0 {
t.Fatalf("expected empty []spantypes.Link, got %#v", data["links"])
}
}

View File

@@ -111,7 +111,7 @@ func (q *querier) QueryRange(ctx context.Context, orgID valuer.UUID, req *qbtype
// We need to set if it is unspecified or adjust it if value is not within recommended range
intervalWarnings := q.adjustStepInterval(req.CompositeQuery.Queries, req.Start, req.End)
missingMetricQueries, metricWarnings, err := q.resolveMetricMetadata(ctx, orgID, req.CompositeQuery.Queries, req.Start, req.End)
missingMetricQueries, metricWarnings, err := q.resolveMetricMetadata(ctx, req.CompositeQuery.Queries, req.Start, req.End)
if err != nil {
return nil, err
}
@@ -320,7 +320,7 @@ func (q *querier) populateQBEvent(event *qbtypes.QBEvent, queries []qbtypes.Quer
// resolved: never-seen metrics and dormant metrics (seen but no data in
// the query window).
// - err: Internal when a metadata fetch fails.
func (q *querier) resolveMetricMetadata(ctx context.Context, orgID valuer.UUID, queries []qbtypes.QueryEnvelope, start, end uint64) (missingMetricQueries []string, metricWarnings []string, err error) {
func (q *querier) resolveMetricMetadata(ctx context.Context, queries []qbtypes.QueryEnvelope, start, end uint64) (missingMetricQueries []string, metricWarnings []string, err error) {
metricNames := make([]string, 0)
for idx := range queries {
if queries[idx].Type != qbtypes.QueryTypeBuilder {
@@ -341,7 +341,7 @@ func (q *querier) resolveMetricMetadata(ctx context.Context, orgID valuer.UUID,
return nil, nil, nil
}
metricTemporality, metricTypes, reducedMetricsSet, err := q.metadataStore.FetchTemporalityAndTypeMulti(ctx, orgID, start, end, metricNames...)
metricTemporality, metricTypes, err := q.metadataStore.FetchTemporalityAndTypeMulti(ctx, start, end, metricNames...)
if err != nil {
q.logger.WarnContext(ctx, "failed to fetch metric temporality", errors.Attr(err), slog.Any("metrics", metricNames))
return nil, nil, errors.NewInternalf(errors.CodeInternal, "failed to fetch metrics temporality")
@@ -378,9 +378,6 @@ func (q *querier) resolveMetricMetadata(ctx context.Context, orgID valuer.UUID,
if err := spec.Aggregations[i].ValidateForType(); err != nil {
return nil, nil, err
}
if reducedMetricsSet[spec.Aggregations[i].MetricName] {
spec.Aggregations[i].Reduced = true
}
presentAggregations = append(presentAggregations, spec.Aggregations[i])
}
if len(presentAggregations) == 0 {

View File

@@ -85,13 +85,6 @@ func (q *traceOperatorQuery) executeWithContext(ctx context.Context, query strin
return nil, err
}
// TODO: This should move to readAsRaw function in consume.go but for now we can keep it here since it's only relevant for traces
if raw, ok := payload.(*qbtypes.RawData); ok {
for _, rr := range raw.Rows {
mergeSpanAttributeColumns(rr.Data)
}
}
return &qbtypes.Result{
Type: q.kind,
Value: payload,

View File

@@ -2136,12 +2136,12 @@ func (t *telemetryMetaStore) GetAllValues(ctx context.Context, fieldValueSelecto
return values, complete, nil
}
func (t *telemetryMetaStore) FetchTemporality(ctx context.Context, orgID valuer.UUID, queryTimeRangeStartTs, queryTimeRangeEndTs uint64, metricName string) (metrictypes.Temporality, error) {
func (t *telemetryMetaStore) FetchTemporality(ctx context.Context, queryTimeRangeStartTs, queryTimeRangeEndTs uint64, metricName string) (metrictypes.Temporality, error) {
if metricName == "" {
return metrictypes.Unknown, errors.Newf(errors.TypeInternal, errors.CodeInternal, "metric name cannot be empty")
}
temporalityMap, err := t.FetchTemporalityMulti(ctx, orgID, queryTimeRangeStartTs, queryTimeRangeEndTs, metricName)
temporalityMap, err := t.FetchTemporalityMulti(ctx, queryTimeRangeStartTs, queryTimeRangeEndTs, metricName)
if err != nil {
return metrictypes.Unknown, err
}
@@ -2154,27 +2154,25 @@ func (t *telemetryMetaStore) FetchTemporality(ctx context.Context, orgID valuer.
return temporality, nil
}
func (t *telemetryMetaStore) FetchTemporalityMulti(ctx context.Context, orgID valuer.UUID, queryTimeRangeStartTs, queryTimeRangeEndTs uint64, metricNames ...string) (map[string]metrictypes.Temporality, error) {
temporalities, _, _, err := t.FetchTemporalityAndTypeMulti(ctx, orgID, queryTimeRangeStartTs, queryTimeRangeEndTs, metricNames...)
func (t *telemetryMetaStore) FetchTemporalityMulti(ctx context.Context, queryTimeRangeStartTs, queryTimeRangeEndTs uint64, metricNames ...string) (map[string]metrictypes.Temporality, error) {
temporalities, _, err := t.FetchTemporalityAndTypeMulti(ctx, queryTimeRangeStartTs, queryTimeRangeEndTs, metricNames...)
return temporalities, err
}
func (t *telemetryMetaStore) FetchTemporalityAndTypeMulti(ctx context.Context, orgID valuer.UUID, queryTimeRangeStartTs, queryTimeRangeEndTs uint64, metricNames ...string) (map[string]metrictypes.Temporality, map[string]metrictypes.Type, map[string]bool, error) {
func (t *telemetryMetaStore) FetchTemporalityAndTypeMulti(ctx context.Context, queryTimeRangeStartTs, queryTimeRangeEndTs uint64, metricNames ...string) (map[string]metrictypes.Temporality, map[string]metrictypes.Type, error) {
if len(metricNames) == 0 {
return make(map[string]metrictypes.Temporality), make(map[string]metrictypes.Type), make(map[string]bool), nil
return make(map[string]metrictypes.Temporality), make(map[string]metrictypes.Type), nil
}
reductionEnabled := t.fl.BooleanOrEmpty(ctx, flagger.FeatureEnableMetricsReduction, featuretypes.NewFlaggerEvaluationContext(orgID))
temporalities := make(map[string]metrictypes.Temporality)
types := make(map[string]metrictypes.Type)
metricsTemporality, metricTypes, reduced, err := t.fetchMetricsTemporalityAndType(ctx, queryTimeRangeStartTs, queryTimeRangeEndTs, reductionEnabled, metricNames...)
metricsTemporality, metricTypes, err := t.fetchMetricsTemporalityAndType(ctx, queryTimeRangeStartTs, queryTimeRangeEndTs, metricNames...)
if err != nil {
return nil, nil, nil, err
return nil, nil, err
}
meterMetricsTemporality, meterMetricsTypes, err := t.fetchMeterSourceMetricsTemporalityAndType(ctx, metricNames...)
if err != nil {
return nil, nil, nil, err
return nil, nil, err
}
// For metrics not found in the database, set to Unknown
@@ -2199,10 +2197,10 @@ func (t *telemetryMetaStore) FetchTemporalityAndTypeMulti(ctx context.Context, o
}
}
return temporalities, types, reduced, nil
return temporalities, types, nil
}
func (t *telemetryMetaStore) fetchMetricsTemporalityAndType(ctx context.Context, queryTimeRangeStartTs, queryTimeRangeEndTs uint64, reductionEnabled bool, metricNames ...string) (map[string][]metrictypes.Temporality, map[string]metrictypes.Type, map[string]bool, error) {
func (t *telemetryMetaStore) fetchMetricsTemporalityAndType(ctx context.Context, queryTimeRangeStartTs, queryTimeRangeEndTs uint64, metricNames ...string) (map[string][]metrictypes.Temporality, map[string]metrictypes.Type, error) {
ctx = ctxtypes.NewContextWithCommentVals(ctx, map[string]string{
instrumentationtypes.TelemetrySignal: telemetrytypes.SignalMetrics.StringValue(),
instrumentationtypes.CodeNamespace: "metadata",
@@ -2210,58 +2208,48 @@ func (t *telemetryMetaStore) fetchMetricsTemporalityAndType(ctx context.Context,
})
temporalities := make(map[string][]metrictypes.Temporality)
types := make(map[string]metrictypes.Type)
reduced := make(map[string]bool)
adjustedStartTs, adjustedEndTs, tsTableName, _ := telemetrymetrics.WhichTSTableToUse(queryTimeRangeStartTs, queryTimeRangeEndTs, false, nil)
adjustedStartTs, adjustedEndTs, tsTableName, _ := telemetrymetrics.WhichTSTableToUse(queryTimeRangeStartTs, queryTimeRangeEndTs, nil)
cols := []string{"metric_name", "temporality", "any(type) AS type", "any(is_monotonic) as is_monotonic"}
// Build query to fetch temporality for all metrics
// We use attr_string_value where attr_name = '__temporality__'
// Note: The columns are mixed in the current data - temporality column contains metric_name
// and metric_name column contains temporality value, so we use the correct mapping
sb := sqlbuilder.Select(
"metric_name",
"temporality",
"any(type) AS type",
"any(is_monotonic) as is_monotonic",
).
From(t.metricsDBName + "." + tsTableName)
// When reduction is enabled, fold the reduced-catalog presence check into the
// same query so a metric's reduced status comes back in one round trip.
var reducedArgs []any
if reductionEnabled {
rs := sqlbuilder.NewSelectBuilder()
rs.Select("metric_name")
rs.From(t.metricsDBName + "." + telemetrymetrics.TimeseriesV4ReducedTableName)
rs.Where(rs.In("metric_name", metricNames), rs.GTE("unix_milli", adjustedStartTs), rs.LT("unix_milli", adjustedEndTs))
rs.GroupBy("metric_name")
rsQuery, rsArgs := rs.BuildWithFlavor(sqlbuilder.ClickHouse)
cols = append(cols, fmt.Sprintf("metric_name GLOBAL IN (%s) AS reduced", rsQuery))
reducedArgs = rsArgs
}
sb := sqlbuilder.NewSelectBuilder()
sb.Select(cols...)
sb.From(t.metricsDBName + "." + tsTableName)
// Filter by metric names (in the temporality column due to data mix-up)
sb.Where(
sb.In("metric_name", metricNames),
sb.GTE("unix_milli", adjustedStartTs),
sb.LT("unix_milli", adjustedEndTs),
)
sb.GroupBy("metric_name", "temporality")
query, args := sb.BuildWithFlavor(sqlbuilder.ClickHouse, reducedArgs...)
query, args := sb.BuildWithFlavor(sqlbuilder.ClickHouse)
t.logger.DebugContext(ctx, "fetching metric temporality", slog.String("query", query), slog.Any("args", args))
rows, err := t.telemetrystore.ClickhouseDB().Query(ctx, query, args...)
if err != nil {
return nil, nil, nil, errors.Wrapf(err, errors.TypeInternal, errors.CodeInternal, "failed to fetch metric temporality")
return nil, nil, errors.Wrapf(err, errors.TypeInternal, errors.CodeInternal, "failed to fetch metric temporality")
}
defer rows.Close()
// Process results
for rows.Next() {
var metricName string
var temporality metrictypes.Temporality
var metricType metrictypes.Type
var isMonotonic bool
var isReduced uint8
dest := []any{&metricName, &temporality, &metricType, &isMonotonic}
if reductionEnabled {
dest = append(dest, &isReduced)
}
if err := rows.Scan(dest...); err != nil {
return nil, nil, nil, errors.Wrapf(err, errors.TypeInternal, errors.CodeInternal, "failed to scan temporality result")
if err := rows.Scan(&metricName, &temporality, &metricType, &isMonotonic); err != nil {
return nil, nil, errors.Wrapf(err, errors.TypeInternal, errors.CodeInternal, "failed to scan temporality result")
}
if temporality != metrictypes.Unknown {
temporalities[metricName] = append(temporalities[metricName], temporality)
@@ -2270,15 +2258,12 @@ func (t *telemetryMetaStore) fetchMetricsTemporalityAndType(ctx context.Context,
metricType = metrictypes.GaugeType
}
types[metricName] = metricType
if isReduced != 0 {
reduced[metricName] = true
}
}
if err := rows.Err(); err != nil {
return nil, nil, nil, errors.Wrapf(err, errors.TypeInternal, errors.CodeInternal, "error iterating over metrics temporality rows")
return nil, nil, errors.Wrapf(err, errors.TypeInternal, errors.CodeInternal, "error iterating over metrics temporality rows")
}
return temporalities, types, reduced, nil
return temporalities, types, nil
}
func (t *telemetryMetaStore) fetchMeterSourceMetricsTemporalityAndType(ctx context.Context, metricNames ...string) (map[string]metrictypes.Temporality, map[string]metrictypes.Type, error) {
@@ -2433,6 +2418,14 @@ func (k *telemetryMetaStore) updateColumnEvolutionMetadataForKeys(ctx context.Co
selector.FieldName = key.Name
if keyEvolutions, ok := evolutionsByUniqueKey[selector.QualifiedName()]; ok {
keysToUpdate[i].Evolutions = keyEvolutions
for _, evolution := range keyEvolutions {
if evolution.ColumnName == telemetrytraces.ColumnAttributesPromoted {
// field has been promoted into the dedicated JSON sub-column.
keysToUpdate[i].Promoted = true
break
}
}
}
}
}

View File

@@ -1,157 +0,0 @@
package telemetrymetrics
import (
"context"
"testing"
"time"
"github.com/SigNoz/signoz/pkg/flagger"
"github.com/SigNoz/signoz/pkg/instrumentation/instrumentationtest"
"github.com/SigNoz/signoz/pkg/types/metrictypes"
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes/telemetrytypestest"
"github.com/stretchr/testify/require"
)
func reducedQuery(metric string, ty metrictypes.Type, temp metrictypes.Temporality, ta metrictypes.TimeAggregation, sa metrictypes.SpaceAggregation) qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation] {
return qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
Signal: telemetrytypes.SignalMetrics,
StepInterval: qbtypes.Step{Duration: 5 * time.Minute},
Aggregations: []qbtypes.MetricAggregation{{
MetricName: metric,
Type: ty,
Temporality: temp,
TimeAggregation: ta,
SpaceAggregation: sa,
Reduced: true,
}},
}
}
func TestReducedStatementBuilder(t *testing.T) {
cases := []struct {
name string
query qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]
expected qbtypes.Statement
}{
{
name: "gauge_sum_latest",
query: reducedQuery("test.metric", metrictypes.GaugeType, metrictypes.Unspecified, metrictypes.TimeAggregationLatest, metrictypes.SpaceAggregationSum),
expected: qbtypes.Statement{
Query: "SELECT * FROM (WITH __temporal_aggregation_cte AS (SELECT fingerprint, toStartOfInterval(toDateTime(intDiv(unix_milli, 1000)), toIntervalSecond(300)) AS ts, anyLast(last) AS per_series_value FROM signoz_metrics.distributed_samples_v4_agg_5m AS points INNER JOIN (SELECT fingerprint FROM signoz_metrics.time_series_v4_1day WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli <= ? AND LOWER(temporality) LIKE LOWER(?) AND __normalized = ? GROUP BY fingerprint) AS filtered_time_series ON points.fingerprint = filtered_time_series.fingerprint WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli < ? GROUP BY fingerprint, ts ORDER BY fingerprint, ts), __spatial_aggregation_cte AS (SELECT ts, sum(per_series_value) AS value FROM __temporal_aggregation_cte WHERE isNaN(per_series_value) = ? GROUP BY ts) SELECT * FROM __spatial_aggregation_cte ORDER BY ts) UNION ALL SELECT * FROM (WITH __temporal_aggregation_cte AS (SELECT fingerprint, toStartOfInterval(toDateTime(intDiv(unix_milli, 1000)), toIntervalSecond(300)) AS ts, argMax(value, unix_milli) AS per_series_value FROM (SELECT reduced_fingerprint AS fingerprint, unix_milli, argMax(`sum_last`, computed_at) AS value FROM signoz_metrics.distributed_samples_v4_reduced_last_60s WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli < ? GROUP BY reduced_fingerprint, unix_milli) AS points INNER JOIN (SELECT fingerprint FROM signoz_metrics.distributed_time_series_v4_reduced WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli <= ? AND __normalized = ? GROUP BY fingerprint) AS filtered_time_series ON points.fingerprint = filtered_time_series.fingerprint GROUP BY fingerprint, ts), __spatial_aggregation_cte AS (SELECT ts, sum(per_series_value) AS value FROM __temporal_aggregation_cte GROUP BY ts) SELECT * FROM __spatial_aggregation_cte ORDER BY ts) ORDER BY ts",
Args: []any{"test.metric", uint64(1746921600000), uint64(1747172760000), "unspecified", false, "test.metric", uint64(1746999900000), uint64(1747172760000), 0, "test.metric", uint64(1746999900000), uint64(1747172760000), "test.metric", uint64(1746999900000), uint64(1747172760000), false},
},
},
{
name: "gauge_avg_avg",
query: reducedQuery("test.metric", metrictypes.GaugeType, metrictypes.Unspecified, metrictypes.TimeAggregationAvg, metrictypes.SpaceAggregationAvg),
expected: qbtypes.Statement{
Query: "SELECT * FROM (WITH __temporal_aggregation_cte AS (SELECT fingerprint, toStartOfInterval(toDateTime(intDiv(unix_milli, 1000)), toIntervalSecond(300)) AS ts, sum(sum) / sum(count) AS per_series_value FROM signoz_metrics.distributed_samples_v4_agg_5m AS points INNER JOIN (SELECT fingerprint FROM signoz_metrics.time_series_v4_1day WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli <= ? AND LOWER(temporality) LIKE LOWER(?) AND __normalized = ? GROUP BY fingerprint) AS filtered_time_series ON points.fingerprint = filtered_time_series.fingerprint WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli < ? GROUP BY fingerprint, ts ORDER BY fingerprint, ts), __spatial_aggregation_cte AS (SELECT ts, avg(per_series_value) AS value FROM __temporal_aggregation_cte WHERE isNaN(per_series_value) = ? GROUP BY ts) SELECT * FROM __spatial_aggregation_cte ORDER BY ts) UNION ALL SELECT * FROM (WITH __temporal_aggregation_cte AS (SELECT fingerprint, toStartOfInterval(toDateTime(intDiv(unix_milli, 1000)), toIntervalSecond(300)) AS ts, avg(value) AS per_series_value, avg(weight) AS per_series_weight FROM (SELECT reduced_fingerprint AS fingerprint, unix_milli, argMax(`sum_last`, computed_at) AS value, argMax(`count_series`, computed_at) AS weight FROM signoz_metrics.distributed_samples_v4_reduced_last_60s WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli < ? GROUP BY reduced_fingerprint, unix_milli) AS points INNER JOIN (SELECT fingerprint FROM signoz_metrics.distributed_time_series_v4_reduced WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli <= ? AND __normalized = ? GROUP BY fingerprint) AS filtered_time_series ON points.fingerprint = filtered_time_series.fingerprint GROUP BY fingerprint, ts), __spatial_aggregation_cte AS (SELECT ts, sum(per_series_value) / sum(per_series_weight) AS value FROM __temporal_aggregation_cte GROUP BY ts) SELECT * FROM __spatial_aggregation_cte ORDER BY ts) ORDER BY ts",
Args: []any{"test.metric", uint64(1746921600000), uint64(1747172760000), "unspecified", false, "test.metric", uint64(1746999900000), uint64(1747172760000), 0, "test.metric", uint64(1746999900000), uint64(1747172760000), "test.metric", uint64(1746999900000), uint64(1747172760000), false},
},
},
{
name: "gauge_min_min",
query: reducedQuery("test.metric", metrictypes.GaugeType, metrictypes.Unspecified, metrictypes.TimeAggregationMin, metrictypes.SpaceAggregationMin),
expected: qbtypes.Statement{
Query: "SELECT * FROM (WITH __temporal_aggregation_cte AS (SELECT fingerprint, toStartOfInterval(toDateTime(intDiv(unix_milli, 1000)), toIntervalSecond(300)) AS ts, min(min) AS per_series_value FROM signoz_metrics.distributed_samples_v4_agg_5m AS points INNER JOIN (SELECT fingerprint FROM signoz_metrics.time_series_v4_1day WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli <= ? AND LOWER(temporality) LIKE LOWER(?) AND __normalized = ? GROUP BY fingerprint) AS filtered_time_series ON points.fingerprint = filtered_time_series.fingerprint WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli < ? GROUP BY fingerprint, ts ORDER BY fingerprint, ts), __spatial_aggregation_cte AS (SELECT ts, min(per_series_value) AS value FROM __temporal_aggregation_cte WHERE isNaN(per_series_value) = ? GROUP BY ts) SELECT * FROM __spatial_aggregation_cte ORDER BY ts) UNION ALL SELECT * FROM (WITH __temporal_aggregation_cte AS (SELECT fingerprint, toStartOfInterval(toDateTime(intDiv(unix_milli, 1000)), toIntervalSecond(300)) AS ts, min(value) AS per_series_value FROM (SELECT reduced_fingerprint AS fingerprint, unix_milli, argMax(`min`, computed_at) AS value FROM signoz_metrics.distributed_samples_v4_reduced_last_60s WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli < ? GROUP BY reduced_fingerprint, unix_milli) AS points INNER JOIN (SELECT fingerprint FROM signoz_metrics.distributed_time_series_v4_reduced WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli <= ? AND __normalized = ? GROUP BY fingerprint) AS filtered_time_series ON points.fingerprint = filtered_time_series.fingerprint GROUP BY fingerprint, ts), __spatial_aggregation_cte AS (SELECT ts, min(per_series_value) AS value FROM __temporal_aggregation_cte GROUP BY ts) SELECT * FROM __spatial_aggregation_cte ORDER BY ts) ORDER BY ts",
Args: []any{"test.metric", uint64(1746921600000), uint64(1747172760000), "unspecified", false, "test.metric", uint64(1746999900000), uint64(1747172760000), 0, "test.metric", uint64(1746999900000), uint64(1747172760000), "test.metric", uint64(1746999900000), uint64(1747172760000), false},
},
},
{
name: "gauge_max_max",
query: reducedQuery("test.metric", metrictypes.GaugeType, metrictypes.Unspecified, metrictypes.TimeAggregationMax, metrictypes.SpaceAggregationMax),
expected: qbtypes.Statement{
Query: "SELECT * FROM (WITH __temporal_aggregation_cte AS (SELECT fingerprint, toStartOfInterval(toDateTime(intDiv(unix_milli, 1000)), toIntervalSecond(300)) AS ts, max(max) AS per_series_value FROM signoz_metrics.distributed_samples_v4_agg_5m AS points INNER JOIN (SELECT fingerprint FROM signoz_metrics.time_series_v4_1day WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli <= ? AND LOWER(temporality) LIKE LOWER(?) AND __normalized = ? GROUP BY fingerprint) AS filtered_time_series ON points.fingerprint = filtered_time_series.fingerprint WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli < ? GROUP BY fingerprint, ts ORDER BY fingerprint, ts), __spatial_aggregation_cte AS (SELECT ts, max(per_series_value) AS value FROM __temporal_aggregation_cte WHERE isNaN(per_series_value) = ? GROUP BY ts) SELECT * FROM __spatial_aggregation_cte ORDER BY ts) UNION ALL SELECT * FROM (WITH __temporal_aggregation_cte AS (SELECT fingerprint, toStartOfInterval(toDateTime(intDiv(unix_milli, 1000)), toIntervalSecond(300)) AS ts, max(value) AS per_series_value FROM (SELECT reduced_fingerprint AS fingerprint, unix_milli, argMax(`max`, computed_at) AS value FROM signoz_metrics.distributed_samples_v4_reduced_last_60s WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli < ? GROUP BY reduced_fingerprint, unix_milli) AS points INNER JOIN (SELECT fingerprint FROM signoz_metrics.distributed_time_series_v4_reduced WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli <= ? AND __normalized = ? GROUP BY fingerprint) AS filtered_time_series ON points.fingerprint = filtered_time_series.fingerprint GROUP BY fingerprint, ts), __spatial_aggregation_cte AS (SELECT ts, max(per_series_value) AS value FROM __temporal_aggregation_cte GROUP BY ts) SELECT * FROM __spatial_aggregation_cte ORDER BY ts) ORDER BY ts",
Args: []any{"test.metric", uint64(1746921600000), uint64(1747172760000), "unspecified", false, "test.metric", uint64(1746999900000), uint64(1747172760000), 0, "test.metric", uint64(1746999900000), uint64(1747172760000), "test.metric", uint64(1746999900000), uint64(1747172760000), false},
},
},
{
name: "counter_sum_rate",
query: reducedQuery("test.metric.sum", metrictypes.SumType, metrictypes.Cumulative, metrictypes.TimeAggregationRate, metrictypes.SpaceAggregationSum),
expected: qbtypes.Statement{
Query: "SELECT * FROM (WITH __temporal_aggregation_cte AS (SELECT ts, multiIf(row_number() OVER rate_window = 1, nan, (per_series_value - lagInFrame(per_series_value, 1) OVER rate_window) < 0, per_series_value / (ts - lagInFrame(ts, 1) OVER rate_window), (per_series_value - lagInFrame(per_series_value, 1) OVER rate_window) / (ts - lagInFrame(ts, 1) OVER rate_window)) AS per_series_value FROM (SELECT fingerprint, toStartOfInterval(toDateTime(intDiv(unix_milli, 1000)), toIntervalSecond(300)) AS ts, max(max) AS per_series_value FROM signoz_metrics.distributed_samples_v4_agg_5m AS points INNER JOIN (SELECT fingerprint FROM signoz_metrics.time_series_v4_1day WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli <= ? AND LOWER(temporality) LIKE LOWER(?) AND __normalized = ? GROUP BY fingerprint) AS filtered_time_series ON points.fingerprint = filtered_time_series.fingerprint WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli < ? GROUP BY fingerprint, ts ORDER BY fingerprint, ts) WINDOW rate_window AS (PARTITION BY fingerprint ORDER BY fingerprint, ts)), __spatial_aggregation_cte AS (SELECT ts, sum(per_series_value) AS value FROM __temporal_aggregation_cte WHERE isNaN(per_series_value) = ? GROUP BY ts) SELECT * FROM __spatial_aggregation_cte ORDER BY ts) UNION ALL SELECT * FROM (WITH __temporal_aggregation_cte AS (SELECT fingerprint, toStartOfInterval(toDateTime(intDiv(unix_milli, 1000)), toIntervalSecond(300)) AS ts, sum(value) / 300 AS per_series_value FROM (SELECT reduced_fingerprint AS fingerprint, unix_milli, argMax(`sum`, computed_at) AS value FROM signoz_metrics.distributed_samples_v4_reduced_sum_60s WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli < ? GROUP BY reduced_fingerprint, unix_milli) AS points INNER JOIN (SELECT fingerprint FROM signoz_metrics.distributed_time_series_v4_reduced WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli <= ? AND __normalized = ? GROUP BY fingerprint) AS filtered_time_series ON points.fingerprint = filtered_time_series.fingerprint GROUP BY fingerprint, ts), __spatial_aggregation_cte AS (SELECT ts, sum(per_series_value) AS value FROM __temporal_aggregation_cte GROUP BY ts) SELECT * FROM __spatial_aggregation_cte ORDER BY ts) ORDER BY ts",
Args: []any{"test.metric.sum", uint64(1746921600000), uint64(1747172760000), "cumulative", false, "test.metric.sum", uint64(1746999600000), uint64(1747172760000), 0, "test.metric.sum", uint64(1746999600000), uint64(1747172760000), "test.metric.sum", uint64(1746999600000), uint64(1747172760000), false},
},
},
{
name: "counter_avg_increase",
query: reducedQuery("test.metric", metrictypes.SumType, metrictypes.Cumulative, metrictypes.TimeAggregationIncrease, metrictypes.SpaceAggregationAvg),
expected: qbtypes.Statement{
Query: "SELECT * FROM (WITH __temporal_aggregation_cte AS (SELECT ts, multiIf(row_number() OVER rate_window = 1, nan, (per_series_value - lagInFrame(per_series_value, 1) OVER rate_window) < 0, per_series_value, per_series_value - lagInFrame(per_series_value, 1) OVER rate_window) AS per_series_value FROM (SELECT fingerprint, toStartOfInterval(toDateTime(intDiv(unix_milli, 1000)), toIntervalSecond(300)) AS ts, max(max) AS per_series_value FROM signoz_metrics.distributed_samples_v4_agg_5m AS points INNER JOIN (SELECT fingerprint FROM signoz_metrics.time_series_v4_1day WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli <= ? AND LOWER(temporality) LIKE LOWER(?) AND __normalized = ? GROUP BY fingerprint) AS filtered_time_series ON points.fingerprint = filtered_time_series.fingerprint WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli < ? GROUP BY fingerprint, ts ORDER BY fingerprint, ts) WINDOW rate_window AS (PARTITION BY fingerprint ORDER BY fingerprint, ts)), __spatial_aggregation_cte AS (SELECT ts, avg(per_series_value) AS value FROM __temporal_aggregation_cte WHERE isNaN(per_series_value) = ? GROUP BY ts) SELECT * FROM __spatial_aggregation_cte ORDER BY ts) UNION ALL SELECT * FROM (WITH __temporal_aggregation_cte AS (SELECT fingerprint, toStartOfInterval(toDateTime(intDiv(unix_milli, 1000)), toIntervalSecond(300)) AS ts, sum(value) AS per_series_value, avg(weight) AS per_series_weight FROM (SELECT reduced_fingerprint AS fingerprint, unix_milli, argMax(`sum`, computed_at) AS value, argMax(`count_series`, computed_at) AS weight FROM signoz_metrics.distributed_samples_v4_reduced_sum_60s WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli < ? GROUP BY reduced_fingerprint, unix_milli) AS points INNER JOIN (SELECT fingerprint FROM signoz_metrics.distributed_time_series_v4_reduced WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli <= ? AND __normalized = ? GROUP BY fingerprint) AS filtered_time_series ON points.fingerprint = filtered_time_series.fingerprint GROUP BY fingerprint, ts), __spatial_aggregation_cte AS (SELECT ts, sum(per_series_value) / sum(per_series_weight) AS value FROM __temporal_aggregation_cte GROUP BY ts) SELECT * FROM __spatial_aggregation_cte ORDER BY ts) ORDER BY ts",
Args: []any{"test.metric", uint64(1746921600000), uint64(1747172760000), "cumulative", false, "test.metric", uint64(1746999600000), uint64(1747172760000), 0, "test.metric", uint64(1746999600000), uint64(1747172760000), "test.metric", uint64(1746999600000), uint64(1747172760000), false},
},
},
{
name: "counter_min_omitted",
query: reducedQuery("test.metric", metrictypes.SumType, metrictypes.Cumulative, metrictypes.TimeAggregationRate, metrictypes.SpaceAggregationMin),
expected: qbtypes.Statement{
Query: "WITH __temporal_aggregation_cte AS (SELECT ts, multiIf(row_number() OVER rate_window = 1, nan, (per_series_value - lagInFrame(per_series_value, 1) OVER rate_window) < 0, per_series_value / (ts - lagInFrame(ts, 1) OVER rate_window), (per_series_value - lagInFrame(per_series_value, 1) OVER rate_window) / (ts - lagInFrame(ts, 1) OVER rate_window)) AS per_series_value FROM (SELECT fingerprint, toStartOfInterval(toDateTime(intDiv(unix_milli, 1000)), toIntervalSecond(300)) AS ts, max(max) AS per_series_value FROM signoz_metrics.distributed_samples_v4_agg_5m AS points INNER JOIN (SELECT fingerprint FROM signoz_metrics.time_series_v4_1day WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli <= ? AND LOWER(temporality) LIKE LOWER(?) AND __normalized = ? GROUP BY fingerprint) AS filtered_time_series ON points.fingerprint = filtered_time_series.fingerprint WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli < ? GROUP BY fingerprint, ts ORDER BY fingerprint, ts) WINDOW rate_window AS (PARTITION BY fingerprint ORDER BY fingerprint, ts)), __spatial_aggregation_cte AS (SELECT ts, min(per_series_value) AS value FROM __temporal_aggregation_cte WHERE isNaN(per_series_value) = ? GROUP BY ts) SELECT * FROM __spatial_aggregation_cte ORDER BY ts",
Args: []any{"test.metric", uint64(1746921600000), uint64(1747172760000), "cumulative", false, "test.metric", uint64(1746999600000), uint64(1747172760000), 0},
},
},
{
name: "counter_max_omitted",
query: reducedQuery("test.metric", metrictypes.SumType, metrictypes.Cumulative, metrictypes.TimeAggregationRate, metrictypes.SpaceAggregationMax),
expected: qbtypes.Statement{
Query: "WITH __temporal_aggregation_cte AS (SELECT ts, multiIf(row_number() OVER rate_window = 1, nan, (per_series_value - lagInFrame(per_series_value, 1) OVER rate_window) < 0, per_series_value / (ts - lagInFrame(ts, 1) OVER rate_window), (per_series_value - lagInFrame(per_series_value, 1) OVER rate_window) / (ts - lagInFrame(ts, 1) OVER rate_window)) AS per_series_value FROM (SELECT fingerprint, toStartOfInterval(toDateTime(intDiv(unix_milli, 1000)), toIntervalSecond(300)) AS ts, max(max) AS per_series_value FROM signoz_metrics.distributed_samples_v4_agg_5m AS points INNER JOIN (SELECT fingerprint FROM signoz_metrics.time_series_v4_1day WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli <= ? AND LOWER(temporality) LIKE LOWER(?) AND __normalized = ? GROUP BY fingerprint) AS filtered_time_series ON points.fingerprint = filtered_time_series.fingerprint WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli < ? GROUP BY fingerprint, ts ORDER BY fingerprint, ts) WINDOW rate_window AS (PARTITION BY fingerprint ORDER BY fingerprint, ts)), __spatial_aggregation_cte AS (SELECT ts, max(per_series_value) AS value FROM __temporal_aggregation_cte WHERE isNaN(per_series_value) = ? GROUP BY ts) SELECT * FROM __spatial_aggregation_cte ORDER BY ts",
Args: []any{"test.metric", uint64(1746921600000), uint64(1747172760000), "cumulative", false, "test.metric", uint64(1746999600000), uint64(1747172760000), 0},
},
},
{
name: "histogram_p99",
query: reducedQuery("test.metric.bucket", metrictypes.HistogramType, metrictypes.Cumulative, metrictypes.TimeAggregationUnspecified, metrictypes.SpaceAggregationPercentile99),
expected: qbtypes.Statement{
Query: "SELECT * FROM (WITH __temporal_aggregation_cte AS (SELECT ts, `le`, multiIf(row_number() OVER rate_window = 1, nan, (per_series_value - lagInFrame(per_series_value, 1) OVER rate_window) < 0, per_series_value / (ts - lagInFrame(ts, 1) OVER rate_window), (per_series_value - lagInFrame(per_series_value, 1) OVER rate_window) / (ts - lagInFrame(ts, 1) OVER rate_window)) AS per_series_value FROM (SELECT fingerprint, toStartOfInterval(toDateTime(intDiv(unix_milli, 1000)), toIntervalSecond(300)) AS ts, `le`, max(max) AS per_series_value FROM signoz_metrics.distributed_samples_v4_agg_5m AS points INNER JOIN (SELECT fingerprint, JSONExtractString(labels, 'le') AS `le` FROM signoz_metrics.time_series_v4_1day WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli <= ? AND LOWER(temporality) LIKE LOWER(?) AND __normalized = ? GROUP BY fingerprint, `le`) AS filtered_time_series ON points.fingerprint = filtered_time_series.fingerprint WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli < ? GROUP BY fingerprint, ts, `le` ORDER BY fingerprint, ts) WINDOW rate_window AS (PARTITION BY fingerprint ORDER BY fingerprint, ts)), __spatial_aggregation_cte AS (SELECT ts, `le`, sum(per_series_value) AS value FROM __temporal_aggregation_cte WHERE isNaN(per_series_value) = ? GROUP BY ts, `le`) SELECT ts, histogramQuantile(arrayMap(x -> toFloat64(x), groupArray(le)), groupArray(value), 0.990) AS value FROM __spatial_aggregation_cte GROUP BY ts ORDER BY ts) UNION ALL SELECT * FROM (WITH __temporal_aggregation_cte AS (SELECT fingerprint, toStartOfInterval(toDateTime(intDiv(unix_milli, 1000)), toIntervalSecond(300)) AS ts, `le`, sum(value) / 300 AS per_series_value FROM (SELECT reduced_fingerprint AS fingerprint, unix_milli, argMax(`sum`, computed_at) AS value FROM signoz_metrics.distributed_samples_v4_reduced_sum_60s WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli < ? GROUP BY reduced_fingerprint, unix_milli) AS points INNER JOIN (SELECT fingerprint, JSONExtractString(labels, 'le') AS `le` FROM signoz_metrics.distributed_time_series_v4_reduced WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli <= ? AND __normalized = ? GROUP BY fingerprint, `le`) AS filtered_time_series ON points.fingerprint = filtered_time_series.fingerprint GROUP BY fingerprint, ts, `le`), __spatial_aggregation_cte AS (SELECT ts, `le`, sum(per_series_value) AS value FROM __temporal_aggregation_cte GROUP BY ts, `le`) SELECT ts, histogramQuantile(arrayMap(x -> toFloat64(x), groupArray(le)), groupArray(value), 0.990) AS value FROM __spatial_aggregation_cte GROUP BY ts ORDER BY ts) ORDER BY ts",
Args: []any{"test.metric.bucket", uint64(1746921600000), uint64(1747172760000), "cumulative", false, "test.metric.bucket", uint64(1746999900000), uint64(1747172760000), 0, "test.metric.bucket", uint64(1746999900000), uint64(1747172760000), "test.metric.bucket", uint64(1746999900000), uint64(1747172760000), false},
},
},
{
name: "summary_avg",
query: reducedQuery("test.metric", metrictypes.SummaryType, metrictypes.Unspecified, metrictypes.TimeAggregationAvg, metrictypes.SpaceAggregationAvg),
expected: qbtypes.Statement{
Query: "SELECT * FROM (WITH __temporal_aggregation_cte AS (SELECT fingerprint, toStartOfInterval(toDateTime(intDiv(unix_milli, 1000)), toIntervalSecond(300)) AS ts, sum(sum) / sum(count) AS per_series_value FROM signoz_metrics.distributed_samples_v4_agg_5m AS points INNER JOIN (SELECT fingerprint FROM signoz_metrics.time_series_v4_1day WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli <= ? AND LOWER(temporality) LIKE LOWER(?) AND __normalized = ? GROUP BY fingerprint) AS filtered_time_series ON points.fingerprint = filtered_time_series.fingerprint WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli < ? GROUP BY fingerprint, ts ORDER BY fingerprint, ts), __spatial_aggregation_cte AS (SELECT ts, avg(per_series_value) AS value FROM __temporal_aggregation_cte WHERE isNaN(per_series_value) = ? GROUP BY ts) SELECT * FROM __spatial_aggregation_cte ORDER BY ts) UNION ALL SELECT * FROM (WITH __temporal_aggregation_cte AS (SELECT fingerprint, toStartOfInterval(toDateTime(intDiv(unix_milli, 1000)), toIntervalSecond(300)) AS ts, avg(value) AS per_series_value, avg(weight) AS per_series_weight FROM (SELECT reduced_fingerprint AS fingerprint, unix_milli, argMax(`sum_last`, computed_at) AS value, argMax(`count_series`, computed_at) AS weight FROM signoz_metrics.distributed_samples_v4_reduced_last_60s WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli < ? GROUP BY reduced_fingerprint, unix_milli) AS points INNER JOIN (SELECT fingerprint FROM signoz_metrics.distributed_time_series_v4_reduced WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli <= ? AND __normalized = ? GROUP BY fingerprint) AS filtered_time_series ON points.fingerprint = filtered_time_series.fingerprint GROUP BY fingerprint, ts), __spatial_aggregation_cte AS (SELECT ts, sum(per_series_value) / sum(per_series_weight) AS value FROM __temporal_aggregation_cte GROUP BY ts) SELECT * FROM __spatial_aggregation_cte ORDER BY ts) ORDER BY ts",
Args: []any{"test.metric", uint64(1746921600000), uint64(1747172760000), "unspecified", false, "test.metric", uint64(1746999900000), uint64(1747172760000), 0, "test.metric", uint64(1746999900000), uint64(1747172760000), "test.metric", uint64(1746999900000), uint64(1747172760000), false},
},
},
}
fm := NewFieldMapper()
cb := NewConditionBuilder(fm)
fl, err := flagger.New(context.Background(), instrumentationtest.New().ToProviderSettings(), flagger.Config{}, flagger.MustNewRegistry())
require.NoError(t, err)
sb := NewMetricQueryStatementBuilder(instrumentationtest.New().ToProviderSettings(), telemetrytypestest.NewMockMetadataStore(), fm, cb, fl)
const start, end = uint64(1747000000000), uint64(1747172800000)
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
got, err := sb.Build(context.Background(), start, end, qbtypes.RequestTypeTimeSeries, c.query, nil)
require.NoError(t, err)
require.Equal(t, c.expected.Query, got.Query)
require.Equal(t, c.expected.Args, got.Args)
})
}
t.Run("buffer_recent_window", func(t *testing.T) {
now := time.Now().UnixMilli()
q := reducedQuery("test.metric", metrictypes.GaugeType, metrictypes.Unspecified, metrictypes.TimeAggregationLatest, metrictypes.SpaceAggregationSum)
got, err := sb.Build(context.Background(), uint64(now-2*time.Hour.Milliseconds()), uint64(now), qbtypes.RequestTypeTimeSeries, q, nil)
require.NoError(t, err)
require.Contains(t, got.Query, "signoz_metrics.distributed_samples_v4_buffer")
require.Contains(t, got.Query, "signoz_metrics.time_series_v4_buffer")
require.Contains(t, got.Query, "is_reduced")
require.NotContains(t, got.Query, "UNION ALL")
})
t.Run("not_reduced", func(t *testing.T) {
q := reducedQuery("test.metric", metrictypes.GaugeType, metrictypes.Unspecified, metrictypes.TimeAggregationLatest, metrictypes.SpaceAggregationSum)
q.Aggregations[0].Reduced = false
got, err := sb.Build(context.Background(), start, end, qbtypes.RequestTypeTimeSeries, q, nil)
require.NoError(t, err)
require.NotContains(t, got.Query, "UNION ALL")
require.NotContains(t, got.Query, "reduced")
require.NotContains(t, got.Query, "buffer")
})
}

View File

@@ -4,7 +4,6 @@ import (
"context"
"fmt"
"log/slog"
"time"
"github.com/SigNoz/signoz/pkg/factory"
"github.com/SigNoz/signoz/pkg/flagger"
@@ -181,30 +180,19 @@ func (b *MetricQueryStatementBuilder) buildPipelineStatement(
query.Aggregations[0].SpaceAggregation = metrictypes.SpaceAggregationSum
}
agg := query.Aggregations[0]
// A reduced metric reads the raw buffer for recent short windows, and
// samples_v4/agg (unioned with the reduced tables) otherwise. The buffer is
// shaped exactly like samples_v4 / time_series_v4, so once the table names are
// chosen the rest of the pipeline is unchanged.
useBuffer := agg.Reduced &&
end-start < oneDayInMilliseconds &&
start >= uint64(time.Now().UnixMilli())-oneDayInMilliseconds
samplesTable, _ := WhichSamplesTableToUse(start, end, agg.Type, agg.TimeAggregation, useBuffer, agg.TableHints)
tsStart, tsEnd, _, tsTable := WhichTSTableToUse(start, end, useBuffer, agg.TableHints)
var timeSeriesCTE string
var timeSeriesCTEArgs []any
var err error
if timeSeriesCTE, timeSeriesCTEArgs, err = b.buildTimeSeriesCTE(ctx, tsStart, tsEnd, query, keys, variables, tsTable); err != nil {
// time_series_cte
// this is applicable for all the queries
if timeSeriesCTE, timeSeriesCTEArgs, err = b.buildTimeSeriesCTE(ctx, start, end, query, keys, variables); err != nil {
return nil, err
}
if qbtypes.CanShortCircuitDelta(query.Aggregations[0]) {
// spatial_aggregation_cte directly for certain delta queries
if frag, args, err := b.buildTemporalAggDeltaFastPath(start, end, query, samplesTable, timeSeriesCTE, timeSeriesCTEArgs); err != nil {
if frag, args, err := b.buildTemporalAggDeltaFastPath(start, end, query, timeSeriesCTE, timeSeriesCTEArgs); err != nil {
return nil, err
} else if frag != "" {
cteFragments = append(cteFragments, frag)
@@ -212,7 +200,7 @@ func (b *MetricQueryStatementBuilder) buildPipelineStatement(
}
} else {
// temporal_aggregation_cte
if frag, args, err := b.buildTemporalAggregationCTE(ctx, start, end, query, keys, samplesTable, timeSeriesCTE, timeSeriesCTEArgs); err != nil {
if frag, args, err := b.buildTemporalAggregationCTE(ctx, start, end, query, keys, timeSeriesCTE, timeSeriesCTEArgs); err != nil {
return nil, err
} else if frag != "" {
cteFragments = append(cteFragments, frag)
@@ -226,188 +214,18 @@ func (b *MetricQueryStatementBuilder) buildPipelineStatement(
}
}
var reducedFragments []string
var reducedArgs [][]any
if agg.Reduced && !useBuffer {
var tsCTE string
var tsArgs []any
if tsCTE, tsArgs, err = b.buildReducedTimeSeriesCTE(ctx, start, end, query, keys, variables); err != nil {
return nil, err
}
if temporalFrag, temporalArgs, ok := b.buildReducedTemporalAggregationCTE(start, end, query, tsCTE, tsArgs); ok {
spatialFrag, spatialArgs := b.buildReducedSpatialAggregationCTE(query)
reducedFragments = []string{temporalFrag, spatialFrag}
reducedArgs = [][]any{temporalArgs, spatialArgs}
}
}
// reset the query to the original state
query.Aggregations[0].SpaceAggregation = origSpaceAgg
query.Aggregations[0].TimeAggregation = origTimeAgg
query.GroupBy = origGroupBy
mainStmt, err := b.BuildFinalSelect(cteFragments, cteArgs, query)
if err != nil {
return nil, err
}
if reducedFragments == nil {
return mainStmt, nil
}
reducedStmt, err := b.BuildFinalSelect(reducedFragments, reducedArgs, query)
if err != nil {
return nil, err
}
return unionStatements(mainStmt, reducedStmt, query)
}
func unionStatements(main, reduced *qbtypes.Statement, query qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]) (*qbtypes.Statement, error) {
orderBy := "ts"
for _, g := range query.GroupBy {
orderBy = fmt.Sprintf("`%s`, ", g.Name) + orderBy
}
q := fmt.Sprintf("SELECT * FROM (%s) UNION ALL SELECT * FROM (%s) ORDER BY %s", main.Query, reduced.Query, orderBy)
args := append(append([]any{}, main.Args...), reduced.Args...)
warnings := append(append([]string{}, main.Warnings...), reduced.Warnings...)
return &qbtypes.Statement{Query: q, Args: args, Warnings: warnings}, nil
}
func (b *MetricQueryStatementBuilder) buildReducedTimeSeriesCTE(
ctx context.Context,
start, end uint64,
query qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation],
keys map[string][]*telemetrytypes.TelemetryFieldKey,
variables map[string]qbtypes.VariableItem,
) (string, []any, error) {
sb := sqlbuilder.NewSelectBuilder()
var preparedWhereClause querybuilder.PreparedWhereClause
var err error
if query.Filter != nil && query.Filter.Expression != "" {
preparedWhereClause, err = querybuilder.PrepareWhereClause(query.Filter.Expression, querybuilder.FilterExprVisitorOpts{
Context: ctx,
Logger: b.logger,
FieldMapper: b.fm,
ConditionBuilder: b.cb,
FieldKeys: keys,
FullTextColumn: &telemetrytypes.TelemetryFieldKey{Name: "labels"},
Variables: variables,
StartNs: start,
EndNs: end,
})
if err != nil {
return "", nil, err
}
}
sb.From(fmt.Sprintf("%s.%s", DBName, TimeseriesV4ReducedTableName))
sb.Select("fingerprint")
for _, g := range query.GroupBy {
col, err := b.fm.ColumnExpressionFor(ctx, start, end, &g.TelemetryFieldKey, keys)
if err != nil {
return "", nil, err
}
sb.SelectMore(col)
}
sb.Where(
sb.In("metric_name", query.Aggregations[0].MetricName),
sb.GTE("unix_milli", start),
sb.LTE("unix_milli", end),
sb.EQ("__normalized", false),
)
if !preparedWhereClause.IsEmpty() {
sb.AddWhereClause(preparedWhereClause.WhereClause)
}
sb.GroupBy("fingerprint")
sb.GroupBy(querybuilder.GroupByKeys(query.GroupBy)...)
q, args := sb.BuildWithFlavor(sqlbuilder.ClickHouse)
return fmt.Sprintf("(%s) AS filtered_time_series", q), args, nil
}
func (b *MetricQueryStatementBuilder) buildReducedTemporalAggregationCTE(
start, end uint64,
query qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation],
timeSeriesCTE string,
timeSeriesCTEArgs []any,
) (string, []any, bool) {
agg := query.Aggregations[0]
stepSec := int64(query.StepInterval.Seconds())
value, weight, ok := ReducedValueColumn(agg.Type, agg.SpaceAggregation)
if !ok {
return "", nil, false
}
// dedup recomputed buckets: latest computed_at wins per (series, 60s bucket)
dedup := sqlbuilder.NewSelectBuilder()
dedup.Select("reduced_fingerprint AS fingerprint", "unix_milli")
dedup.SelectMore(fmt.Sprintf("argMax(%s, computed_at) AS value", value))
if weight != "" {
dedup.SelectMore(fmt.Sprintf("argMax(%s, computed_at) AS weight", weight))
}
dedup.From(fmt.Sprintf("%s.%s", DBName, WhichReducedSamplesTableToUse(agg.Type)))
dedup.Where(
dedup.In("metric_name", agg.MetricName),
dedup.GTE("unix_milli", start),
dedup.LT("unix_milli", end),
)
dedup.GroupBy("reduced_fingerprint", "unix_milli")
dedupQuery, dedupArgs := dedup.BuildWithFlavor(sqlbuilder.ClickHouse)
sb := sqlbuilder.NewSelectBuilder()
sb.Select("fingerprint")
sb.SelectMore(fmt.Sprintf("toStartOfInterval(toDateTime(intDiv(unix_milli, 1000)), toIntervalSecond(%d)) AS ts", stepSec))
for _, g := range query.GroupBy {
sb.SelectMore(fmt.Sprintf("`%s`", g.Name))
}
sb.SelectMore(fmt.Sprintf("%s AS per_series_value", ReducedTimeAggregationColumn(agg.TimeAggregation, stepSec)))
if weight != "" {
// count_series is a series count, not additive over time, so the avg
// denominator is reduced with avg
sb.SelectMore("avg(weight) AS per_series_weight")
}
sb.From(fmt.Sprintf("(%s) AS points", dedupQuery))
sb.JoinWithOption(sqlbuilder.InnerJoin, timeSeriesCTE, "points.fingerprint = filtered_time_series.fingerprint")
sb.GroupBy("fingerprint", "ts")
sb.GroupBy(querybuilder.GroupByKeys(query.GroupBy)...)
initArgs := append(append([]any{}, dedupArgs...), timeSeriesCTEArgs...)
q, args := sb.BuildWithFlavor(sqlbuilder.ClickHouse, initArgs...)
return fmt.Sprintf("__temporal_aggregation_cte AS (%s)", q), args, true
}
func (b *MetricQueryStatementBuilder) buildReducedSpatialAggregationCTE(
query qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation],
) (string, []any) {
spatial := "sum(per_series_value)"
switch query.Aggregations[0].SpaceAggregation {
case metrictypes.SpaceAggregationAvg:
spatial = "sum(per_series_value) / sum(per_series_weight)"
case metrictypes.SpaceAggregationMin:
spatial = "min(per_series_value)"
case metrictypes.SpaceAggregationMax:
spatial = "max(per_series_value)"
}
sb := sqlbuilder.NewSelectBuilder()
sb.Select("ts")
for _, g := range query.GroupBy {
sb.SelectMore(fmt.Sprintf("`%s`", g.Name))
}
sb.SelectMore(spatial + " AS value")
sb.From("__temporal_aggregation_cte")
sb.GroupBy("ts")
sb.GroupBy(querybuilder.GroupByKeys(query.GroupBy)...)
q, args := sb.BuildWithFlavor(sqlbuilder.ClickHouse)
return fmt.Sprintf("__spatial_aggregation_cte AS (%s)", q), args
// final SELECT
return b.BuildFinalSelect(cteFragments, cteArgs, query)
}
func (b *MetricQueryStatementBuilder) buildTemporalAggDeltaFastPath(
start, end uint64,
query qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation],
samplesTable string,
timeSeriesCTE string,
timeSeriesCTEArgs []any,
) (string, []any, error) {
@@ -424,7 +242,8 @@ func (b *MetricQueryStatementBuilder) buildTemporalAggDeltaFastPath(
}
aggCol, err := AggregationColumnForSamplesTable(
samplesTable, query.Aggregations[0].Temporality, query.Aggregations[0].TimeAggregation,
start, end, query.Aggregations[0].Type, query.Aggregations[0].Temporality,
query.Aggregations[0].TimeAggregation, query.Aggregations[0].TableHints,
)
if err != nil {
return "", nil, err
@@ -441,7 +260,8 @@ func (b *MetricQueryStatementBuilder) buildTemporalAggDeltaFastPath(
sb.SelectMore(fmt.Sprintf("%s AS value", aggCol))
sb.From(fmt.Sprintf("%s.%s AS points", DBName, samplesTable))
tbl, _ := WhichSamplesTableToUse(start, end, query.Aggregations[0].Type, query.Aggregations[0].TimeAggregation, query.Aggregations[0].TableHints)
sb.From(fmt.Sprintf("%s.%s AS points", DBName, tbl))
sb.JoinWithOption(sqlbuilder.InnerJoin, timeSeriesCTE, "points.fingerprint = filtered_time_series.fingerprint")
sb.Where(
sb.In("metric_name", query.Aggregations[0].MetricName),
@@ -461,7 +281,6 @@ func (b *MetricQueryStatementBuilder) buildTimeSeriesCTE(
query qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation],
keys map[string][]*telemetrytypes.TelemetryFieldKey,
variables map[string]qbtypes.VariableItem,
tsTable string,
) (string, []any, error) {
sb := sqlbuilder.NewSelectBuilder()
@@ -485,7 +304,8 @@ func (b *MetricQueryStatementBuilder) buildTimeSeriesCTE(
}
}
sb.From(fmt.Sprintf("%s.%s", DBName, tsTable))
start, end, _, tbl := WhichTSTableToUse(start, end, query.Aggregations[0].TableHints)
sb.From(fmt.Sprintf("%s.%s", DBName, tbl))
sb.Select("fingerprint")
for _, g := range query.GroupBy {
@@ -511,12 +331,6 @@ func (b *MetricQueryStatementBuilder) buildTimeSeriesCTE(
sb.EQ("__normalized", false),
)
// the buffer holds both raw rows and the reduced catalog rows; the raw read
// only wants the original series
if tsTable == TimeseriesV4BufferLocalTableName {
sb.Where(sb.EQ("is_reduced", false))
}
if !preparedWhereClause.IsEmpty() {
sb.AddWhereClause(preparedWhereClause.WhereClause)
}
@@ -533,23 +347,21 @@ func (b *MetricQueryStatementBuilder) buildTemporalAggregationCTE(
start, end uint64,
query qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation],
_ map[string][]*telemetrytypes.TelemetryFieldKey,
samplesTable string,
timeSeriesCTE string,
timeSeriesCTEArgs []any,
) (string, []any, error) {
if query.Aggregations[0].Temporality == metrictypes.Delta {
return b.buildTemporalAggDelta(ctx, start, end, query, samplesTable, timeSeriesCTE, timeSeriesCTEArgs)
return b.buildTemporalAggDelta(ctx, start, end, query, timeSeriesCTE, timeSeriesCTEArgs)
} else if query.Aggregations[0].Temporality != metrictypes.Multiple {
return b.buildTemporalAggCumulativeOrUnspecified(ctx, start, end, query, samplesTable, timeSeriesCTE, timeSeriesCTEArgs)
return b.buildTemporalAggCumulativeOrUnspecified(ctx, start, end, query, timeSeriesCTE, timeSeriesCTEArgs)
}
return b.buildTemporalAggForMultipleTemporalities(ctx, start, end, query, samplesTable, timeSeriesCTE, timeSeriesCTEArgs)
return b.buildTemporalAggForMultipleTemporalities(ctx, start, end, query, timeSeriesCTE, timeSeriesCTEArgs)
}
func (b *MetricQueryStatementBuilder) buildTemporalAggDelta(
_ context.Context,
start, end uint64,
query qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation],
samplesTable string,
timeSeriesCTE string,
timeSeriesCTEArgs []any,
) (string, []any, error) {
@@ -566,7 +378,7 @@ func (b *MetricQueryStatementBuilder) buildTemporalAggDelta(
sb.SelectMore(fmt.Sprintf("`%s`", g.Name))
}
aggCol, err := AggregationColumnForSamplesTable(samplesTable, query.Aggregations[0].Temporality, query.Aggregations[0].TimeAggregation)
aggCol, err := AggregationColumnForSamplesTable(start, end, query.Aggregations[0].Type, query.Aggregations[0].Temporality, query.Aggregations[0].TimeAggregation, query.Aggregations[0].TableHints)
if err != nil {
return "", nil, err
}
@@ -577,7 +389,8 @@ func (b *MetricQueryStatementBuilder) buildTemporalAggDelta(
sb.SelectMore(fmt.Sprintf("%s AS per_series_value", aggCol))
sb.From(fmt.Sprintf("%s.%s AS points", DBName, samplesTable))
tbl, _ := WhichSamplesTableToUse(start, end, query.Aggregations[0].Type, query.Aggregations[0].TimeAggregation, query.Aggregations[0].TableHints)
sb.From(fmt.Sprintf("%s.%s AS points", DBName, tbl))
sb.JoinWithOption(sqlbuilder.InnerJoin, timeSeriesCTE, "points.fingerprint = filtered_time_series.fingerprint")
sb.Where(
sb.In("metric_name", query.Aggregations[0].MetricName),
@@ -596,7 +409,6 @@ func (b *MetricQueryStatementBuilder) buildTemporalAggCumulativeOrUnspecified(
_ context.Context,
start, end uint64,
query qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation],
samplesTable string,
timeSeriesCTE string,
timeSeriesCTEArgs []any,
) (string, []any, error) {
@@ -612,13 +424,14 @@ func (b *MetricQueryStatementBuilder) buildTemporalAggCumulativeOrUnspecified(
baseSb.SelectMore(fmt.Sprintf("`%s`", g.Name))
}
aggCol, err := AggregationColumnForSamplesTable(samplesTable, query.Aggregations[0].Temporality, query.Aggregations[0].TimeAggregation)
aggCol, err := AggregationColumnForSamplesTable(start, end, query.Aggregations[0].Type, query.Aggregations[0].Temporality, query.Aggregations[0].TimeAggregation, query.Aggregations[0].TableHints)
if err != nil {
return "", nil, err
}
baseSb.SelectMore(fmt.Sprintf("%s AS per_series_value", aggCol))
baseSb.From(fmt.Sprintf("%s.%s AS points", DBName, samplesTable))
tbl, _ := WhichSamplesTableToUse(start, end, query.Aggregations[0].Type, query.Aggregations[0].TimeAggregation, query.Aggregations[0].TableHints)
baseSb.From(fmt.Sprintf("%s.%s AS points", DBName, tbl))
baseSb.JoinWithOption(sqlbuilder.InnerJoin, timeSeriesCTE, "points.fingerprint = filtered_time_series.fingerprint")
baseSb.Where(
baseSb.In("metric_name", query.Aggregations[0].MetricName),
@@ -662,7 +475,6 @@ func (b *MetricQueryStatementBuilder) buildTemporalAggForMultipleTemporalities(
_ context.Context,
start, end uint64,
query qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation],
samplesTable string,
timeSeriesCTE string,
timeSeriesCTEArgs []any,
) (string, []any, error) {
@@ -677,11 +489,11 @@ func (b *MetricQueryStatementBuilder) buildTemporalAggForMultipleTemporalities(
sb.SelectMore(fmt.Sprintf("`%s`", g.Name))
}
aggForDeltaTemporality, err := AggregationColumnForSamplesTable(samplesTable, metrictypes.Delta, query.Aggregations[0].TimeAggregation)
aggForDeltaTemporality, err := AggregationColumnForSamplesTable(start, end, query.Aggregations[0].Type, metrictypes.Delta, query.Aggregations[0].TimeAggregation, query.Aggregations[0].TableHints)
if err != nil {
return "", nil, err
}
aggForCumulativeTemporality, err := AggregationColumnForSamplesTable(samplesTable, metrictypes.Cumulative, query.Aggregations[0].TimeAggregation)
aggForCumulativeTemporality, err := AggregationColumnForSamplesTable(start, end, query.Aggregations[0].Type, metrictypes.Cumulative, query.Aggregations[0].TimeAggregation, query.Aggregations[0].TableHints)
if err != nil {
return "", nil, err
}
@@ -709,7 +521,8 @@ func (b *MetricQueryStatementBuilder) buildTemporalAggForMultipleTemporalities(
sb.SelectMore(expr)
}
sb.From(fmt.Sprintf("%s.%s AS points", DBName, samplesTable))
tbl, _ := WhichSamplesTableToUse(start, end, query.Aggregations[0].Type, query.Aggregations[0].TimeAggregation, query.Aggregations[0].TableHints)
sb.From(fmt.Sprintf("%s.%s AS points", DBName, tbl))
sb.JoinWithOption(sqlbuilder.InnerJoin, timeSeriesCTE, "points.fingerprint = filtered_time_series.fingerprint")
sb.Where(
sb.In("metric_name", query.Aggregations[0].MetricName),

View File

@@ -30,17 +30,6 @@ const (
TimeseriesV41weekLocalTableName = "time_series_v4_1week"
AttributesMetadataTableName = "distributed_metadata"
AttributesMetadataLocalTableName = "metadata"
// The buffer holds raw points for ~24h; the reduced tables hold 60s
// aggregates of dropped-label series.
SamplesV4BufferTableName = "distributed_samples_v4_buffer"
SamplesV4BufferLocalTableName = "samples_v4_buffer"
TimeseriesV4BufferTableName = "distributed_time_series_v4_buffer"
TimeseriesV4BufferLocalTableName = "time_series_v4_buffer"
SamplesV4ReducedLastTableName = "distributed_samples_v4_reduced_last_60s"
SamplesV4ReducedSumTableName = "distributed_samples_v4_reduced_sum_60s"
TimeseriesV4ReducedTableName = "distributed_time_series_v4_reduced"
TimeseriesV4ReducedLocalTableName = "time_series_v4_reduced"
)
var (
@@ -60,16 +49,8 @@ var (
// in that order.
func WhichTSTableToUse(
start, end uint64,
useBuffer bool,
tableHints *metrictypes.MetricTableHints,
) (uint64, uint64, string, string) {
// the buffer holds the recent raw window for reduced metrics and has the same
// shape as time_series_v4; round the start to the hour like the v4 table.
if useBuffer {
start = start - (start % (oneHourInMilliseconds))
return start, end, TimeseriesV4BufferTableName, TimeseriesV4BufferLocalTableName
}
// if we have a hint for the table, we need to use it
// the hint will be used to override the default table selection logic
if tableHints != nil {
@@ -168,20 +149,14 @@ func WhichSamplesTableToUse(
start, end uint64,
metricType metrictypes.Type,
timeAggregation metrictypes.TimeAggregation,
useBuffer bool,
tableHints *metrictypes.MetricTableHints,
) (string, string) {
// the buffer holds the recent raw window for reduced metrics; same shape as samples_v4
if useBuffer {
return SamplesV4BufferTableName, SamplesV4BufferLocalTableName
}
// if we have a hint for the table, we need to use it
// the hint will be used to override the default table selection logic.
// SamplesTableName is the distributed name; derive the local via switch.
if tableHints != nil && tableHints.SamplesTableName != "" {
switch tableHints.SamplesTableName {
case SamplesV4TableName, SamplesV4BufferTableName:
case SamplesV4TableName:
return SamplesV4TableName, SamplesV4LocalTableName
case SamplesV4Agg5mTableName:
return SamplesV4Agg5mTableName, SamplesV4Agg5mLocalTableName
@@ -213,10 +188,13 @@ func WhichSamplesTableToUse(
}
func AggregationColumnForSamplesTable(
tableName string,
start, end uint64,
metricType metrictypes.Type,
temporality metrictypes.Temporality,
timeAggregation metrictypes.TimeAggregation,
tableHints *metrictypes.MetricTableHints,
) (string, error) {
tableName, _ := WhichSamplesTableToUse(start, end, metricType, timeAggregation, tableHints)
var aggregationColumn string
switch temporality {
case metrictypes.Delta:
@@ -224,7 +202,7 @@ func AggregationColumnForSamplesTable(
// although it doesn't make sense to use anyLast, avg, min, max, count on delta metrics,
// we are keeping it here to make sure that query will not be invalid
switch tableName {
case SamplesV4TableName, SamplesV4BufferTableName:
case SamplesV4TableName:
switch timeAggregation {
case metrictypes.TimeAggregationLatest:
aggregationColumn = "anyLast(value)"
@@ -266,7 +244,7 @@ func AggregationColumnForSamplesTable(
// for cumulative metrics, we only support `RATE`/`INCREASE`. The max value in window is
// used to calculate the sum which is then divided by the window size to get the rate
switch tableName {
case SamplesV4TableName, SamplesV4BufferTableName:
case SamplesV4TableName:
switch timeAggregation {
case metrictypes.TimeAggregationLatest:
aggregationColumn = "anyLast(value)"
@@ -306,7 +284,7 @@ func AggregationColumnForSamplesTable(
}
case metrictypes.Unspecified:
switch tableName {
case SamplesV4TableName, SamplesV4BufferTableName:
case SamplesV4TableName:
switch timeAggregation {
case metrictypes.TimeAggregationLatest:
aggregationColumn = "anyLast(value)"
@@ -354,65 +332,6 @@ func AggregationColumnForSamplesTable(
return aggregationColumn, nil
}
// WhichReducedSamplesTableToUse returns the 60s reduced samples table for a metric
// type: the last_60s table for gauge-like series, the sum_60s table for counters
// and histograms.
func WhichReducedSamplesTableToUse(metricType metrictypes.Type) string {
if metricType == metrictypes.SumType || metricType == metrictypes.HistogramType {
return SamplesV4ReducedSumTableName
}
return SamplesV4ReducedLastTableName
}
// ReducedValueColumn returns the reduced value column (and the avg-denominator
// weight) for a space aggregation. The reduced columns are pre-aggregated across
// the original series, so the space aggregation picks the underlying value; the
// sum table only has `sum`, so min/max across series have no column (ok=false).
func ReducedValueColumn(metricType metrictypes.Type, space metrictypes.SpaceAggregation) (value, weight string, ok bool) {
if metricType == metrictypes.SumType || metricType == metrictypes.HistogramType {
switch space {
case metrictypes.SpaceAggregationSum:
return "`sum`", "", true
case metrictypes.SpaceAggregationAvg:
return "`sum`", "`count_series`", true
}
return "", "", false
}
switch space {
case metrictypes.SpaceAggregationSum:
return "`sum_last`", "", true
case metrictypes.SpaceAggregationAvg:
return "`sum_last`", "`count_series`", true
case metrictypes.SpaceAggregationMin:
return "`min`", "", true
case metrictypes.SpaceAggregationMax:
return "`max`", "", true
}
return "", "", false
}
// ReducedTimeAggregationColumn applies the time aggregation to the reduced `value`
// column over the step's 60s buckets. latest uses argMax over the bucket timestamp
// (the buckets have no read order); rate divides the per-step sum by the step.
func ReducedTimeAggregationColumn(timeAggregation metrictypes.TimeAggregation, stepSec int64) string {
switch timeAggregation {
case metrictypes.TimeAggregationLatest:
return "argMax(value, unix_milli)"
case metrictypes.TimeAggregationAvg:
return "avg(value)"
case metrictypes.TimeAggregationMin:
return "min(value)"
case metrictypes.TimeAggregationMax:
return "max(value)"
case metrictypes.TimeAggregationCount:
return "count(value)"
case metrictypes.TimeAggregationRate:
return fmt.Sprintf("sum(value) / %d", stepSec)
default: // sum, increase
return "sum(value)"
}
}
func AggregationQueryForHistogramCountWithParams(param *metrictypes.ComparisonSpaceAggregationParam) (string, error) {
if param == nil {
return "", errors.NewInvalidInputf(errors.CodeInvalidInput, "no aggregation param provided for histogram count")

View File

@@ -41,6 +41,9 @@ func (c *conditionBuilder) conditionFor(
value = querybuilder.FormatValueForContains(value)
}
// For filters, field mapper keeps WHERE on the map columns instead of JSON column.
ctx = qbtypes.WithFilterIntent(ctx)
// first, locate the raw column type (so we can choose the right EXISTS logic)
columns, err := c.fm.ColumnFor(ctx, startNs, endNs, key)
if err != nil {

View File

@@ -5,45 +5,8 @@ import (
)
const (
// Internal Columns.
SpanTimestampBucketStartColumn = "ts_bucket_start"
SpanResourceFingerPrintColumn = "resource_fingerprint"
// Intrinsic Columns.
SpanTimestampColumn = "timestamp"
SpanTraceIDColumn = "trace_id"
SpanSpanIDColumn = "span_id"
SpanTraceStateColumn = "trace_state"
SpanParentSpanIDColumn = "parent_span_id"
SpanFlagsColumn = "flags"
SpanNameColumn = "name"
SpanKindColumn = "kind"
SpanKindStringColumn = "kind_string"
SpanDurationNanoColumn = "duration_nano"
SpanStatusCodeColumn = "status_code"
SpanStatusMessageColumn = "status_message"
SpanStatusCodeStringColumn = "status_code_string"
SpanEventsColumn = "events"
SpanLinksColumn = "links"
// Calculated Columns.
SpanResponseStatusCodeColumn = "response_status_code"
SpanExternalHTTPURLColumn = "external_http_url"
SpanHTTPURLColumn = "http_url"
SpanExternalHTTPMethodColumn = "external_http_method"
SpanHTTPMethodColumn = "http_method"
SpanHTTPHostColumn = "http_host"
SpanDBNameColumn = "db_name"
SpanDBOperationColumn = "db_operation"
SpanHasErrorColumn = "has_error"
SpanIsRemoteColumn = "is_remote"
// Contextual Columns.
SpanAttributesStringColumn = "attributes_string"
SpanAttributesNumberColumn = "attributes_number"
SpanAttributesBoolColumn = "attributes_bool"
SpanResourcesStringColumn = "resources_string"
ColumnAttributes = "attributes"
ColumnAttributesPromoted = "attributes_promoted"
)
var (
@@ -378,51 +341,6 @@ var (
SpanSearchScopeRoot = "isroot"
SpanSearchScopeEntryPoint = "isentrypoint"
// IntrinsicSpanFields lists the intrinsic span columns, in the order they
// should appear when a raw query expands its SelectFields.
IntrinsicSpanFields = []telemetrytypes.TelemetryFieldKey{
{Name: SpanTimestampColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanTraceIDColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanSpanIDColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanTraceStateColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanParentSpanIDColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanFlagsColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanNameColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanKindColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanKindStringColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanDurationNanoColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanStatusCodeColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanStatusMessageColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanStatusCodeStringColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanEventsColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanLinksColumn, FieldContext: telemetrytypes.FieldContextSpan},
}
// CalculatedSpanFields lists the calculated/derived span columns, in the
// order they should appear when a raw query expands its SelectFields.
CalculatedSpanFields = []telemetrytypes.TelemetryFieldKey{
{Name: SpanResponseStatusCodeColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanExternalHTTPURLColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanHTTPURLColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanExternalHTTPMethodColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanHTTPMethodColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanHTTPHostColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanDBNameColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanDBOperationColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanHasErrorColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanIsRemoteColumn, FieldContext: telemetrytypes.FieldContextSpan},
}
// ContextualSpanColumns lists the typed attribute and resource columns
// selected raw (rather than via ColumnExpressionFor) so that consume.go
// can merge them into unified "attributes" and "resource" maps.
ContextualSpanColumns = []string{
SpanAttributesStringColumn,
SpanAttributesNumberColumn,
SpanAttributesBoolColumn,
SpanResourcesStringColumn,
}
DefaultFields = map[string]telemetrytypes.TelemetryFieldKey{
"timestamp": {
Name: "timestamp",

View File

@@ -46,6 +46,10 @@ var (
KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString},
ValueType: schema.ColumnTypeBool,
}},
ColumnAttributes: {Name: ColumnAttributes, Type: schema.JSONColumnType{}},
ColumnAttributesPromoted: {Name: ColumnAttributesPromoted, Type: schema.JSONColumnType{}},
"resources_string": {Name: "resources_string", Type: schema.MapColumnType{
KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString},
ValueType: schema.ColumnTypeString,
@@ -167,6 +171,54 @@ func NewFieldMapper() *defaultFieldMapper {
return &defaultFieldMapper{}
}
// chJSONTypeAndMapColumn returns the ClickHouse dynamic type used to read an
// attribute value out of a JSON column and the corresponding map column used as fallback
func chJSONTypeAndMapColumn(dt telemetrytypes.FieldDataType) (chType string, mapColumn string, ok bool) {
switch dt {
case telemetrytypes.FieldDataTypeString:
return "String", "attributes_string", true
case telemetrytypes.FieldDataTypeInt64,
telemetrytypes.FieldDataTypeFloat64,
telemetrytypes.FieldDataTypeNumber:
return "Float64", "attributes_number", true
case telemetrytypes.FieldDataTypeBool:
return "Bool", "attributes_bool", true
}
return "", "", false
}
// buildAttributeJSONExpr builds a value expression for an attribute key that
// prefers the JSON columns and falls back to the map column. When promoted is
// true the attributes_promoted sub-column (dedicated, fastest) is tried first:
//
// promoted: multiIf(attributes_promoted.`k` IS NOT NULL, attributes_promoted.`k`::T,
// attributes.`k` IS NOT NULL, attributes.`k`::T, attributes_<t>['k'])
// non-promoted: multiIf(attributes.`k` IS NOT NULL, attributes.`k`::T, attributes_<t>['k'])
func buildAttributeJSONExpr(key *telemetrytypes.TelemetryFieldKey, promoted bool) (string, bool) {
chType, mapColumn, ok := chJSONTypeAndMapColumn(key.FieldDataType)
if !ok {
return "", false
}
branches := []string{}
if promoted {
promotedPath := fmt.Sprintf("%s.`%s`", ColumnAttributesPromoted, key.Name)
branches = append(branches,
fmt.Sprintf("%s IS NOT NULL", promotedPath),
fmt.Sprintf("%s::%s", promotedPath, chType),
)
}
jsonPath := fmt.Sprintf("%s.`%s`", ColumnAttributes, key.Name)
branches = append(branches,
fmt.Sprintf("%s IS NOT NULL", jsonPath),
fmt.Sprintf("%s::%s", jsonPath, chType),
)
mapAccess := fmt.Sprintf("%s['%s']", mapColumn, key.Name)
expr := fmt.Sprintf("multiIf(%s, %s)", strings.Join(branches, ", "), mapAccess)
return expr, true
}
func (m *defaultFieldMapper) getColumn(
_ context.Context,
_, _ uint64,
@@ -250,6 +302,12 @@ func (m *defaultFieldMapper) FieldFor(
return key.Name, nil
}
if key.FieldContext == telemetrytypes.FieldContextAttribute && !key.Materialized && !qbtypes.IsFilterIntent(ctx) {
if expr, ok := buildAttributeJSONExpr(key, key.Promoted); ok {
return expr, nil
}
}
columns, err := m.getColumn(ctx, startNs, endNs, key)
if err != nil {
return "", err

View File

@@ -31,33 +31,33 @@ func TestGetFieldKeyName(t *testing.T) {
expectedError: nil,
},
{
name: "Map column type - string attribute",
name: "Attribute - string attribute uses JSON with map fallback",
key: telemetrytypes.TelemetryFieldKey{
Name: "user.id",
FieldContext: telemetrytypes.FieldContextAttribute,
FieldDataType: telemetrytypes.FieldDataTypeString,
},
expectedResult: "attributes_string['user.id']",
expectedResult: "multiIf(attributes.`user.id` IS NOT NULL, attributes.`user.id`::String, attributes_string['user.id'])",
expectedError: nil,
},
{
name: "Map column type - number attribute",
name: "Attribute - number attribute uses JSON with map fallback",
key: telemetrytypes.TelemetryFieldKey{
Name: "request.size",
FieldContext: telemetrytypes.FieldContextAttribute,
FieldDataType: telemetrytypes.FieldDataTypeNumber,
},
expectedResult: "attributes_number['request.size']",
expectedResult: "multiIf(attributes.`request.size` IS NOT NULL, attributes.`request.size`::Float64, attributes_number['request.size'])",
expectedError: nil,
},
{
name: "Map column type - bool attribute",
name: "Attribute - bool attribute uses JSON with map fallback",
key: telemetrytypes.TelemetryFieldKey{
Name: "request.success",
FieldContext: telemetrytypes.FieldContextAttribute,
FieldDataType: telemetrytypes.FieldDataTypeBool,
},
expectedResult: "attributes_bool['request.success']",
expectedResult: "multiIf(attributes.`request.success` IS NOT NULL, attributes.`request.success`::Bool, attributes_bool['request.success'])",
expectedError: nil,
},
{
@@ -82,17 +82,6 @@ func TestGetFieldKeyName(t *testing.T) {
expectedResult: "multiIf(resource.`deployment.environment` IS NOT NULL, resource.`deployment.environment`::String, `resource_string_deployment$$environment_exists`==true, `resource_string_deployment$$environment`, NULL)",
expectedError: nil,
},
{
// Query like `attribute.attribute_string:string` should resolve to `attributes_string['attribute_string']`.
name: "Attribute key whose name collides with contextual map column resolves as a map lookup",
key: telemetrytypes.TelemetryFieldKey{
Name: SpanAttributesStringColumn,
FieldContext: telemetrytypes.FieldContextAttribute,
FieldDataType: telemetrytypes.FieldDataTypeString,
},
expectedResult: "attributes_string['attributes_string']",
expectedError: nil,
},
{
name: "Non-existent column",
key: telemetrytypes.TelemetryFieldKey{
@@ -201,3 +190,101 @@ func TestFieldForResourceWithEvolution(t *testing.T) {
})
}
}
func TestFieldForAttributeJSON(t *testing.T) {
start := uint64(time.Date(2024, 6, 1, 0, 0, 0, 0, time.UTC).UnixNano())
end := uint64(time.Date(2024, 6, 5, 0, 0, 0, 0, time.UTC).UnixNano())
stringKey := telemetrytypes.TelemetryFieldKey{
Name: "http.method",
FieldContext: telemetrytypes.FieldContextAttribute,
FieldDataType: telemetrytypes.FieldDataTypeString,
}
numberKey := telemetrytypes.TelemetryFieldKey{
Name: "http.status_code",
FieldContext: telemetrytypes.FieldContextAttribute,
FieldDataType: telemetrytypes.FieldDataTypeNumber,
}
boolKey := telemetrytypes.TelemetryFieldKey{
Name: "request.success",
FieldContext: telemetrytypes.FieldContextAttribute,
FieldDataType: telemetrytypes.FieldDataTypeBool,
}
// http.route has a real materialized ($$) physical column.
legacyMaterializedKey := telemetrytypes.TelemetryFieldKey{
Name: "http.route",
FieldContext: telemetrytypes.FieldContextAttribute,
FieldDataType: telemetrytypes.FieldDataTypeString,
Materialized: true,
}
promotedStringKey := telemetrytypes.TelemetryFieldKey{
Name: "http.method",
FieldContext: telemetrytypes.FieldContextAttribute,
FieldDataType: telemetrytypes.FieldDataTypeString,
Promoted: true,
}
promotedNumberKey := telemetrytypes.TelemetryFieldKey{
Name: "http.status_code",
FieldContext: telemetrytypes.FieldContextAttribute,
FieldDataType: telemetrytypes.FieldDataTypeNumber,
Promoted: true,
}
testCases := []struct {
name string
key telemetrytypes.TelemetryFieldKey
filterIntent bool
expectedResult string
}{
{
name: "select/group by uses attributes JSON with map fallback - string",
key: stringKey,
expectedResult: "multiIf(attributes.`http.method` IS NOT NULL, attributes.`http.method`::String, attributes_string['http.method'])",
},
{
name: "select/group by uses attributes JSON with map fallback - number",
key: numberKey,
expectedResult: "multiIf(attributes.`http.status_code` IS NOT NULL, attributes.`http.status_code`::Float64, attributes_number['http.status_code'])",
},
{
name: "select/group by uses attributes JSON with map fallback - bool",
key: boolKey,
expectedResult: "multiIf(attributes.`request.success` IS NOT NULL, attributes.`request.success`::Bool, attributes_bool['request.success'])",
},
{
name: "promoted string key tries attributes_promoted first",
key: promotedStringKey,
expectedResult: "multiIf(attributes_promoted.`http.method` IS NOT NULL, attributes_promoted.`http.method`::String, attributes.`http.method` IS NOT NULL, attributes.`http.method`::String, attributes_string['http.method'])",
},
{
name: "promoted number key tries attributes_promoted first",
key: promotedNumberKey,
expectedResult: "multiIf(attributes_promoted.`http.status_code` IS NOT NULL, attributes_promoted.`http.status_code`::Float64, attributes.`http.status_code` IS NOT NULL, attributes.`http.status_code`::Float64, attributes_number['http.status_code'])",
},
{
name: "filter intent stays on map column",
key: stringKey,
filterIntent: true,
expectedResult: "attributes_string['http.method']",
},
{
name: "legacy materialized key keeps physical column",
key: legacyMaterializedKey,
expectedResult: "`attribute_string_http$$route`",
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
fm := NewFieldMapper()
ctx := context.Background()
if tc.filterIntent {
ctx = qbtypes.WithFilterIntent(ctx)
}
key := tc.key
result, err := fm.FieldFor(ctx, start, end, &key)
require.NoError(t, err)
assert.Equal(t, tc.expectedResult, result)
})
}
}

View File

@@ -4,6 +4,7 @@ import (
"context"
"fmt"
"log/slog"
"slices"
"strings"
"github.com/SigNoz/signoz/pkg/errors"
@@ -15,6 +16,7 @@ import (
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
"github.com/huandu/go-sqlbuilder"
"golang.org/x/exp/maps"
)
var (
@@ -84,13 +86,40 @@ func (b *traceQueryStatementBuilder) Build(
start = querybuilder.ToNanoSecs(start)
end = querybuilder.ToNanoSecs(end)
isSelectFieldsEmpty := false
/*
Adding a tech debt note here:
This piece of code is a hot fix and should be removed once we close issue: engineering-pod/issues/3622
*/
/*
-------------------------------- Start of tech debt ----------------------------
*/
if requestType == qbtypes.RequestTypeRaw {
isSelectFieldsEmpty = len(query.SelectFields) == 0
// we are expanding here to ensure that all the conflicts are taken care in adjustKeys
// i.e if there is a conflict we strip away context of the key in adjustKeys
query = b.expandRawSelectFields(query)
selectedFields := query.SelectFields
if len(selectedFields) == 0 {
sortedKeys := maps.Keys(DefaultFields)
slices.Sort(sortedKeys)
for _, key := range sortedKeys {
selectedFields = append(selectedFields, DefaultFields[key])
}
query.SelectFields = selectedFields
}
selectFieldKeys := []string{}
for _, field := range selectedFields {
selectFieldKeys = append(selectFieldKeys, field.Name)
}
for _, x := range []string{"timestamp", "span_id", "trace_id"} {
if !slices.Contains(selectFieldKeys, x) {
query.SelectFields = append(query.SelectFields, DefaultFields[x])
}
}
}
/*
-------------------------------- End of tech debt ----------------------------
*/
// We modify SelectFields above (injecting default fields), and those default
// fields can carry keys that need evolutions, so fetch keys after that.
@@ -110,7 +139,7 @@ func (b *traceQueryStatementBuilder) Build(
switch requestType {
case qbtypes.RequestTypeRaw:
return b.buildListQuery(ctx, q, query, start, end, keys, variables, isSelectFieldsEmpty)
return b.buildListQuery(ctx, q, query, start, end, keys, variables)
case qbtypes.RequestTypeTimeSeries:
return b.buildTimeSeriesQuery(ctx, q, query, start, end, keys, variables)
case qbtypes.RequestTypeScalar:
@@ -276,7 +305,6 @@ func (b *traceQueryStatementBuilder) buildListQuery(
start, end uint64,
keys map[string][]*telemetrytypes.TelemetryFieldKey,
variables map[string]qbtypes.VariableItem,
isSelectFieldsEmpty bool,
) (*qbtypes.Statement, error) {
var (
@@ -293,6 +321,7 @@ func (b *traceQueryStatementBuilder) buildListQuery(
cteArgs = append(cteArgs, args)
}
// TODO: should we deprecate `SelectFields` and return everything from a span like we do for logs?
for _, field := range query.SelectFields {
colExpr, err := b.fm.ColumnExpressionFor(ctx, start, end, &field, keys)
if err != nil {
@@ -301,12 +330,6 @@ func (b *traceQueryStatementBuilder) buildListQuery(
sb.SelectMore(colExpr)
}
if isSelectFieldsEmpty {
for _, col := range ContextualSpanColumns {
sb.SelectMore(col)
}
}
// From table
sb.From(fmt.Sprintf("%s.%s", DBName, SpanIndexV3TableName))
@@ -828,30 +851,3 @@ func (b *traceQueryStatementBuilder) maybeAttachResourceFilter(
sb.Where("resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter)")
return fmt.Sprintf("__resource_filter AS (%s)", stmt.Query), stmt.Args, true, nil
}
// expandRawSelectFields populates SelectFields for raw (list view) queries.
// It must be called before adjustKeys so that normalization runs over the full set.
func (b *traceQueryStatementBuilder) expandRawSelectFields(query qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation]) qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation] {
if len(query.SelectFields) == 0 {
selectFields := make([]telemetrytypes.TelemetryFieldKey, 0, len(IntrinsicSpanFields)+len(CalculatedSpanFields))
selectFields = append(selectFields, IntrinsicSpanFields...)
selectFields = append(selectFields, CalculatedSpanFields...)
query.SelectFields = selectFields
return query
}
selectFields := []telemetrytypes.TelemetryFieldKey{
{Name: SpanTimestampColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanTraceIDColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanSpanIDColumn, FieldContext: telemetrytypes.FieldContextSpan},
}
for _, field := range query.SelectFields {
// TODO(tvats): If a user specifies attribute.timestamp in the select fields, this loop will basically ignore it, as we already added a field by default. This can be fixed once we close https://github.com/SigNoz/engineering-pod/issues/3693
if field.Name == SpanTimestampColumn || field.Name == SpanTraceIDColumn || field.Name == SpanSpanIDColumn {
continue
}
selectFields = append(selectFields, field)
}
query.SelectFields = selectFields
return query
}

View File

@@ -235,7 +235,7 @@ func TestStatementBuilder(t *testing.T) {
},
},
expected: qbtypes.Statement{
Query: "WITH __resource_filter AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint), __limit_cte AS (SELECT toString(multiIf(multiIf(resource.`service.name` IS NOT NULL, resource.`service.name`::String, mapContains(resources_string, 'service.name'), resources_string['service.name'], NULL) IS NOT NULL, multiIf(resource.`service.name` IS NOT NULL, resource.`service.name`::String, mapContains(resources_string, 'service.name'), resources_string['service.name'], NULL), NULL)) AS `service.name`, sum(multiIf(mapContains(attributes_number, 'metric.max_count') = ?, toFloat64(attributes_number['metric.max_count']), NULL)) AS __result_0 FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? GROUP BY `service.name` ORDER BY __result_0 desc LIMIT ?) SELECT toStartOfInterval(timestamp, INTERVAL 30 SECOND) AS ts, toString(multiIf(multiIf(resource.`service.name` IS NOT NULL, resource.`service.name`::String, mapContains(resources_string, 'service.name'), resources_string['service.name'], NULL) IS NOT NULL, multiIf(resource.`service.name` IS NOT NULL, resource.`service.name`::String, mapContains(resources_string, 'service.name'), resources_string['service.name'], NULL), NULL)) AS `service.name`, sum(multiIf(mapContains(attributes_number, 'metric.max_count') = ?, toFloat64(attributes_number['metric.max_count']), NULL)) AS __result_0 FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? AND (`service.name`) GLOBAL IN (SELECT `service.name` FROM __limit_cte) GROUP BY ts, `service.name` ORDER BY ts desc",
Query: "WITH __resource_filter AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint), __limit_cte AS (SELECT toString(multiIf(multiIf(resource.`service.name` IS NOT NULL, resource.`service.name`::String, mapContains(resources_string, 'service.name'), resources_string['service.name'], NULL) IS NOT NULL, multiIf(resource.`service.name` IS NOT NULL, resource.`service.name`::String, mapContains(resources_string, 'service.name'), resources_string['service.name'], NULL), NULL)) AS `service.name`, sum(multiIf(mapContains(attributes_number, 'metric.max_count') = ?, toFloat64(multiIf(attributes.`metric.max_count` IS NOT NULL, attributes.`metric.max_count`::Float64, attributes_number['metric.max_count'])), NULL)) AS __result_0 FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? GROUP BY `service.name` ORDER BY __result_0 desc LIMIT ?) SELECT toStartOfInterval(timestamp, INTERVAL 30 SECOND) AS ts, toString(multiIf(multiIf(resource.`service.name` IS NOT NULL, resource.`service.name`::String, mapContains(resources_string, 'service.name'), resources_string['service.name'], NULL) IS NOT NULL, multiIf(resource.`service.name` IS NOT NULL, resource.`service.name`::String, mapContains(resources_string, 'service.name'), resources_string['service.name'], NULL), NULL)) AS `service.name`, sum(multiIf(mapContains(attributes_number, 'metric.max_count') = ?, toFloat64(multiIf(attributes.`metric.max_count` IS NOT NULL, attributes.`metric.max_count`::Float64, attributes_number['metric.max_count'])), NULL)) AS __result_0 FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? AND (`service.name`) GLOBAL IN (SELECT `service.name` FROM __limit_cte) GROUP BY ts, `service.name` ORDER BY ts desc",
Args: []any{"redis-manual", "%service.name%", "%service.name\":\"redis-manual%", uint64(1747945619), uint64(1747983448), true, "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 10, true, "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448)},
},
expectedErr: nil,
@@ -462,7 +462,7 @@ func TestStatementBuilderListQuery(t *testing.T) {
},
},
expected: qbtypes.Statement{
Query: "WITH __resource_filter AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint) SELECT timestamp AS `timestamp`, trace_id AS `trace_id`, span_id AS `span_id`, name AS `name`, multiIf(resource.`service.name` IS NOT NULL, resource.`service.name`::String, mapContains(resources_string, 'service.name'), resources_string['service.name'], NULL) AS `service.name`, duration_nano AS `duration_nano`, `attribute_number_cart$$items_count` AS `cart.items_count` FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? LIMIT ?",
Query: "WITH __resource_filter AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint) SELECT name AS `name`, multiIf(resource.`service.name` IS NOT NULL, resource.`service.name`::String, mapContains(resources_string, 'service.name'), resources_string['service.name'], NULL) AS `service.name`, duration_nano AS `duration_nano`, `attribute_number_cart$$items_count` AS `cart.items_count`, timestamp AS `timestamp`, span_id AS `span_id`, trace_id AS `trace_id` FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? LIMIT ?",
Args: []any{"redis-manual", "%service.name%", "%service.name\":\"redis-manual%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 10},
},
expectedErr: nil,
@@ -491,7 +491,7 @@ func TestStatementBuilderListQuery(t *testing.T) {
Limit: 10,
},
expected: qbtypes.Statement{
Query: "WITH __resource_filter AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint) SELECT timestamp AS `timestamp`, trace_id AS `trace_id`, span_id AS `span_id`, trace_state AS `trace_state`, parent_span_id AS `parent_span_id`, flags AS `flags`, name AS `name`, kind AS `kind`, kind_string AS `kind_string`, duration_nano AS `duration_nano`, status_code AS `status_code`, status_message AS `status_message`, status_code_string AS `status_code_string`, events AS `events`, links AS `links`, response_status_code AS `response_status_code`, external_http_url AS `external_http_url`, http_url AS `http_url`, external_http_method AS `external_http_method`, http_method AS `http_method`, http_host AS `http_host`, db_name AS `db_name`, db_operation AS `db_operation`, has_error AS `has_error`, is_remote AS `is_remote`, attributes_string, attributes_number, attributes_bool, resources_string FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? ORDER BY attributes_string['user.id'] AS `user.id` desc LIMIT ?",
Query: "WITH __resource_filter AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint) SELECT duration_nano AS `duration_nano`, name AS `name`, response_status_code AS `response_status_code`, multiIf(resource.`service.name` IS NOT NULL, resource.`service.name`::String, mapContains(resources_string, 'service.name'), resources_string['service.name'], NULL) AS `service.name`, span_id AS `span_id`, timestamp AS `timestamp`, trace_id AS `trace_id` FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? ORDER BY multiIf(attributes.`user.id` IS NOT NULL, attributes.`user.id`::String, attributes_string['user.id']) AS `user.id` desc LIMIT ?",
Args: []any{"redis-manual", "%service.name%", "%service.name\":\"redis-manual%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 10},
},
expectedErr: nil,
@@ -535,7 +535,7 @@ func TestStatementBuilderListQuery(t *testing.T) {
Limit: 10,
},
expected: qbtypes.Statement{
Query: "WITH __resource_filter AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint) SELECT timestamp AS `timestamp`, trace_id AS `trace_id`, span_id AS `span_id`, name AS `name`, resource_string_service$$name AS `serviceName`, duration_nano AS `durationNano`, http_method AS `httpMethod`, response_status_code AS `responseStatusCode` FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? LIMIT ?",
Query: "WITH __resource_filter AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint) SELECT name AS `name`, resource_string_service$$name AS `serviceName`, duration_nano AS `durationNano`, http_method AS `httpMethod`, response_status_code AS `responseStatusCode`, timestamp AS `timestamp`, span_id AS `span_id`, trace_id AS `trace_id` FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? LIMIT ?",
Args: []any{"redis-manual", "%service.name%", "%service.name\":\"redis-manual%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 10},
},
expectedErr: nil,
@@ -579,7 +579,7 @@ func TestStatementBuilderListQuery(t *testing.T) {
Limit: 10,
},
expected: qbtypes.Statement{
Query: "WITH __resource_filter AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint) SELECT timestamp AS `timestamp`, trace_id AS `trace_id`, span_id AS `span_id`, name AS `name`, resource_string_service$$name AS `serviceName`, duration_nano AS `durationNano`, http_method AS `httpMethod`, multiIf(toString(`attribute_string_mixed$$materialization$$key`) != '', toString(`attribute_string_mixed$$materialization$$key`), toString(multiIf(resource.`mixed.materialization.key` IS NOT NULL, resource.`mixed.materialization.key`::String, mapContains(resources_string, 'mixed.materialization.key'), resources_string['mixed.materialization.key'], NULL)) != '', toString(multiIf(resource.`mixed.materialization.key` IS NOT NULL, resource.`mixed.materialization.key`::String, mapContains(resources_string, 'mixed.materialization.key'), resources_string['mixed.materialization.key'], NULL)), NULL) AS `mixed.materialization.key` FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? LIMIT ?",
Query: "WITH __resource_filter AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint) SELECT name AS `name`, resource_string_service$$name AS `serviceName`, duration_nano AS `durationNano`, http_method AS `httpMethod`, multiIf(toString(`attribute_string_mixed$$materialization$$key`) != '', toString(`attribute_string_mixed$$materialization$$key`), toString(multiIf(resource.`mixed.materialization.key` IS NOT NULL, resource.`mixed.materialization.key`::String, mapContains(resources_string, 'mixed.materialization.key'), resources_string['mixed.materialization.key'], NULL)) != '', toString(multiIf(resource.`mixed.materialization.key` IS NOT NULL, resource.`mixed.materialization.key`::String, mapContains(resources_string, 'mixed.materialization.key'), resources_string['mixed.materialization.key'], NULL)), NULL) AS `mixed.materialization.key`, timestamp AS `timestamp`, span_id AS `span_id`, trace_id AS `trace_id` FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? LIMIT ?",
Args: []any{"redis-manual", "%service.name%", "%service.name\":\"redis-manual%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 10},
},
expectedErr: nil,
@@ -624,7 +624,7 @@ func TestStatementBuilderListQuery(t *testing.T) {
Limit: 10,
},
expected: qbtypes.Statement{
Query: "WITH __resource_filter AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint) SELECT timestamp AS `timestamp`, trace_id AS `trace_id`, span_id AS `span_id`, name AS `name`, resource_string_service$$name AS `serviceName`, duration_nano AS `durationNano`, http_method AS `httpMethod`, `attribute_string_mixed$$materialization$$key` AS `mixed.materialization.key` FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? LIMIT ?",
Query: "WITH __resource_filter AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint) SELECT name AS `name`, resource_string_service$$name AS `serviceName`, duration_nano AS `durationNano`, http_method AS `httpMethod`, `attribute_string_mixed$$materialization$$key` AS `mixed.materialization.key`, timestamp AS `timestamp`, span_id AS `span_id`, trace_id AS `trace_id` FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? LIMIT ?",
Args: []any{"redis-manual", "%service.name%", "%service.name\":\"redis-manual%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 10},
},
expectedErr: nil,
@@ -746,7 +746,7 @@ func TestStatementBuilderListQueryWithCorruptData(t *testing.T) {
Limit: 10,
},
expected: qbtypes.Statement{
Query: "SELECT timestamp AS `timestamp`, trace_id AS `trace_id`, span_id AS `span_id`, trace_state AS `trace_state`, parent_span_id AS `parent_span_id`, flags AS `flags`, name AS `name`, kind AS `kind`, kind_string AS `kind_string`, duration_nano AS `duration_nano`, status_code AS `status_code`, status_message AS `status_message`, status_code_string AS `status_code_string`, events AS `events`, links AS `links`, response_status_code AS `response_status_code`, external_http_url AS `external_http_url`, http_url AS `http_url`, external_http_method AS `external_http_method`, http_method AS `http_method`, http_host AS `http_host`, db_name AS `db_name`, db_operation AS `db_operation`, has_error AS `has_error`, is_remote AS `is_remote`, attributes_string, attributes_number, attributes_bool, resources_string FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? LIMIT ?",
Query: "SELECT duration_nano AS `duration_nano`, name AS `name`, response_status_code AS `response_status_code`, multiIf(resource.`service.name` IS NOT NULL, resource.`service.name`::String, mapContains(resources_string, 'service.name'), resources_string['service.name'], NULL) AS `service.name`, span_id AS `span_id`, timestamp AS `timestamp`, trace_id AS `trace_id` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? LIMIT ?",
Args: []any{"1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 10},
},
expectedErr: nil,
@@ -788,7 +788,7 @@ func TestStatementBuilderListQueryWithCorruptData(t *testing.T) {
}},
},
expected: qbtypes.Statement{
Query: "SELECT timestamp AS `timestamp`, trace_id AS `trace_id`, span_id AS `span_id`, trace_state AS `trace_state`, parent_span_id AS `parent_span_id`, flags AS `flags`, name AS `name`, kind AS `kind`, kind_string AS `kind_string`, duration_nano AS `duration_nano`, status_code AS `status_code`, status_message AS `status_message`, status_code_string AS `status_code_string`, events AS `events`, links AS `links`, response_status_code AS `response_status_code`, external_http_url AS `external_http_url`, http_url AS `http_url`, external_http_method AS `external_http_method`, http_method AS `http_method`, http_host AS `http_host`, db_name AS `db_name`, db_operation AS `db_operation`, has_error AS `has_error`, is_remote AS `is_remote`, attributes_string, attributes_number, attributes_bool, resources_string FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? ORDER BY timestamp AS `timestamp` asc LIMIT ?",
Query: "SELECT duration_nano AS `duration_nano`, name AS `name`, response_status_code AS `response_status_code`, multiIf(resource.`service.name` IS NOT NULL, resource.`service.name`::String, mapContains(resources_string, 'service.name'), resources_string['service.name'], NULL) AS `service.name`, span_id AS `span_id`, timestamp AS `timestamp`, trace_id AS `trace_id` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? ORDER BY timestamp AS `timestamp` asc LIMIT ?",
Args: []any{"1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 10},
},
expectedErr: nil,

View File

@@ -424,20 +424,6 @@ func (b *traceOperatorCTEBuilder) buildNotCTE(leftCTE, rightCTE string) (string,
}
func (b *traceOperatorCTEBuilder) buildFinalQuery(ctx context.Context, selectFromCTE string, requestType qbtypes.RequestType) (*qbtypes.Statement, error) {
// Mirror statement_builder.go::Build: for raw queries, empty selectFields
// expands to the full intrinsic + calculated set, and the list query also
// pulls in the contextual columns so the consume layer can merge them
// into unified attributes/resource (and parse events/links).
isSelectFieldsEmpty := false
if requestType == qbtypes.RequestTypeRaw {
isSelectFieldsEmpty = len(b.operator.SelectFields) == 0
if isSelectFieldsEmpty {
b.operator.SelectFields = make([]telemetrytypes.TelemetryFieldKey, 0, len(IntrinsicSpanFields)+len(CalculatedSpanFields))
b.operator.SelectFields = append(b.operator.SelectFields, IntrinsicSpanFields...)
b.operator.SelectFields = append(b.operator.SelectFields, CalculatedSpanFields...)
}
}
keySelectors := b.getKeySelectors()
keys, _, err := b.stmtBuilder.metadataStore.GetKeysMulti(ctx, keySelectors)
if err != nil {
@@ -447,7 +433,7 @@ func (b *traceOperatorCTEBuilder) buildFinalQuery(ctx context.Context, selectFro
switch requestType {
case qbtypes.RequestTypeRaw:
return b.buildListQuery(ctx, selectFromCTE, keys, isSelectFieldsEmpty)
return b.buildListQuery(ctx, selectFromCTE, keys)
case qbtypes.RequestTypeTimeSeries:
return b.buildTimeSeriesQuery(ctx, selectFromCTE, keys)
case qbtypes.RequestTypeTrace:
@@ -459,11 +445,10 @@ func (b *traceOperatorCTEBuilder) buildFinalQuery(ctx context.Context, selectFro
}
}
func (b *traceOperatorCTEBuilder) buildListQuery(ctx context.Context, selectFromCTE string, keys map[string][]*telemetrytypes.TelemetryFieldKey, isSelectFieldsEmpty bool) (*qbtypes.Statement, error) {
func (b *traceOperatorCTEBuilder) buildListQuery(ctx context.Context, selectFromCTE string, keys map[string][]*telemetrytypes.TelemetryFieldKey) (*qbtypes.Statement, error) {
sb := sqlbuilder.NewSelectBuilder()
// Select core fields. These are always present so the trace operator
// response shape is stable regardless of user-supplied selectFields.
// Select core fields
sb.Select(
"timestamp",
"trace_id",
@@ -497,12 +482,6 @@ func (b *traceOperatorCTEBuilder) buildListQuery(ctx context.Context, selectFrom
selectedFields[field.Name] = true
}
if isSelectFieldsEmpty {
for _, col := range ContextualSpanColumns {
sb.SelectMore(col)
}
}
sb.From(selectFromCTE)
// Add order by support using ColumnExpressionFor

View File

@@ -123,7 +123,7 @@ func TestTraceOperatorStatementBuilder(t *testing.T) {
},
},
expected: qbtypes.Statement{
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_INDIR_DESC_B AS (WITH RECURSIVE up AS (SELECT d.trace_id, d.span_id, d.parent_span_id, 0 AS depth FROM B AS d UNION ALL SELECT p.trace_id, p.span_id, p.parent_span_id, up.depth + 1 FROM all_spans AS p JOIN up ON p.trace_id = up.trace_id AND p.span_id = up.parent_span_id WHERE up.depth < 100) SELECT DISTINCT a.* FROM A AS a GLOBAL INNER JOIN (SELECT DISTINCT trace_id, span_id FROM up WHERE depth > 0 ) AS ancestors ON ancestors.trace_id = a.trace_id AND ancestors.span_id = a.span_id) SELECT timestamp, trace_id, span_id, name, duration_nano, parent_span_id, trace_state AS `trace_state`, flags AS `flags`, kind AS `kind`, kind_string AS `kind_string`, status_code AS `status_code`, status_message AS `status_message`, status_code_string AS `status_code_string`, events AS `events`, links AS `links`, response_status_code AS `response_status_code`, external_http_url AS `external_http_url`, http_url AS `http_url`, external_http_method AS `external_http_method`, http_method AS `http_method`, http_host AS `http_host`, db_name AS `db_name`, db_operation AS `db_operation`, has_error AS `has_error`, is_remote AS `is_remote`, attributes_string, attributes_number, attributes_bool, resources_string FROM A_INDIR_DESC_B ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_INDIR_DESC_B AS (WITH RECURSIVE up AS (SELECT d.trace_id, d.span_id, d.parent_span_id, 0 AS depth FROM B AS d UNION ALL SELECT p.trace_id, p.span_id, p.parent_span_id, up.depth + 1 FROM all_spans AS p JOIN up ON p.trace_id = up.trace_id AND p.span_id = up.parent_span_id WHERE up.depth < 100) SELECT DISTINCT a.* FROM A AS a GLOBAL INNER JOIN (SELECT DISTINCT trace_id, span_id FROM up WHERE depth > 0 ) AS ancestors ON ancestors.trace_id = a.trace_id AND ancestors.span_id = a.span_id) SELECT timestamp, trace_id, span_id, name, duration_nano, parent_span_id FROM A_INDIR_DESC_B ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Args: []any{"1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "gateway", "%service.name%", "%service.name\":\"gateway%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "database", "%service.name%", "%service.name\":\"database%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 5},
},
expectedErr: nil,
@@ -160,7 +160,7 @@ func TestTraceOperatorStatementBuilder(t *testing.T) {
},
},
expected: qbtypes.Statement{
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_AND_B AS (SELECT l.* FROM A AS l INNER JOIN B AS r ON l.trace_id = r.trace_id) SELECT timestamp, trace_id, span_id, name, duration_nano, parent_span_id, trace_state AS `trace_state`, flags AS `flags`, kind AS `kind`, kind_string AS `kind_string`, status_code AS `status_code`, status_message AS `status_message`, status_code_string AS `status_code_string`, events AS `events`, links AS `links`, response_status_code AS `response_status_code`, external_http_url AS `external_http_url`, http_url AS `http_url`, external_http_method AS `external_http_method`, http_method AS `http_method`, http_host AS `http_host`, db_name AS `db_name`, db_operation AS `db_operation`, has_error AS `has_error`, is_remote AS `is_remote`, attributes_string, attributes_number, attributes_bool, resources_string FROM A_AND_B ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_AND_B AS (SELECT l.* FROM A AS l INNER JOIN B AS r ON l.trace_id = r.trace_id) SELECT timestamp, trace_id, span_id, name, duration_nano, parent_span_id FROM A_AND_B ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Args: []any{"1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "frontend", "%service.name%", "%service.name\":\"frontend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "backend", "%service.name%", "%service.name\":\"backend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 15},
},
expectedErr: nil,
@@ -197,7 +197,7 @@ func TestTraceOperatorStatementBuilder(t *testing.T) {
},
},
expected: qbtypes.Statement{
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_OR_B AS (SELECT * FROM A UNION DISTINCT SELECT * FROM B) SELECT timestamp, trace_id, span_id, name, duration_nano, parent_span_id, trace_state AS `trace_state`, flags AS `flags`, kind AS `kind`, kind_string AS `kind_string`, status_code AS `status_code`, status_message AS `status_message`, status_code_string AS `status_code_string`, events AS `events`, links AS `links`, response_status_code AS `response_status_code`, external_http_url AS `external_http_url`, http_url AS `http_url`, external_http_method AS `external_http_method`, http_method AS `http_method`, http_host AS `http_host`, db_name AS `db_name`, db_operation AS `db_operation`, has_error AS `has_error`, is_remote AS `is_remote`, attributes_string, attributes_number, attributes_bool, resources_string FROM A_OR_B ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_OR_B AS (SELECT * FROM A UNION DISTINCT SELECT * FROM B) SELECT timestamp, trace_id, span_id, name, duration_nano, parent_span_id FROM A_OR_B ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Args: []any{"1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "frontend", "%service.name%", "%service.name\":\"frontend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "backend", "%service.name%", "%service.name\":\"backend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 20},
},
expectedErr: nil,
@@ -234,7 +234,7 @@ func TestTraceOperatorStatementBuilder(t *testing.T) {
},
},
expected: qbtypes.Statement{
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_not_B AS (SELECT l.* FROM A AS l WHERE l.trace_id GLOBAL NOT IN (SELECT DISTINCT trace_id FROM B)) SELECT timestamp, trace_id, span_id, name, duration_nano, parent_span_id, trace_state AS `trace_state`, flags AS `flags`, kind AS `kind`, kind_string AS `kind_string`, status_code AS `status_code`, status_message AS `status_message`, status_code_string AS `status_code_string`, events AS `events`, links AS `links`, response_status_code AS `response_status_code`, external_http_url AS `external_http_url`, http_url AS `http_url`, external_http_method AS `external_http_method`, http_method AS `http_method`, http_host AS `http_host`, db_name AS `db_name`, db_operation AS `db_operation`, has_error AS `has_error`, is_remote AS `is_remote`, attributes_string, attributes_number, attributes_bool, resources_string FROM A_not_B ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_not_B AS (SELECT l.* FROM A AS l WHERE l.trace_id GLOBAL NOT IN (SELECT DISTINCT trace_id FROM B)) SELECT timestamp, trace_id, span_id, name, duration_nano, parent_span_id FROM A_not_B ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Args: []any{"1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "frontend", "%service.name%", "%service.name\":\"frontend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "backend", "%service.name%", "%service.name\":\"backend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 10},
},
expectedErr: nil,
@@ -399,7 +399,7 @@ func TestTraceOperatorStatementBuilder(t *testing.T) {
},
},
expected: qbtypes.Statement{
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_DIR_DESC_B AS (SELECT p.* FROM A AS p INNER JOIN B AS c ON p.trace_id = c.trace_id AND p.span_id = c.parent_span_id), __resource_filter_C AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint), C AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_C) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_D AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint), D AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_D) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), C_DIR_DESC_D AS (SELECT p.* FROM C AS p INNER JOIN D AS c ON p.trace_id = c.trace_id AND p.span_id = c.parent_span_id), A_DIR_DESC_B_AND_C_DIR_DESC_D AS (SELECT l.* FROM A_DIR_DESC_B AS l INNER JOIN C_DIR_DESC_D AS r ON l.trace_id = r.trace_id) SELECT timestamp, trace_id, span_id, name, duration_nano, parent_span_id, trace_state AS `trace_state`, flags AS `flags`, kind AS `kind`, kind_string AS `kind_string`, status_code AS `status_code`, status_message AS `status_message`, status_code_string AS `status_code_string`, events AS `events`, links AS `links`, response_status_code AS `response_status_code`, external_http_url AS `external_http_url`, http_url AS `http_url`, external_http_method AS `external_http_method`, http_method AS `http_method`, http_host AS `http_host`, db_name AS `db_name`, db_operation AS `db_operation`, has_error AS `has_error`, is_remote AS `is_remote`, attributes_string, attributes_number, attributes_bool, resources_string FROM A_DIR_DESC_B_AND_C_DIR_DESC_D ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_DIR_DESC_B AS (SELECT p.* FROM A AS p INNER JOIN B AS c ON p.trace_id = c.trace_id AND p.span_id = c.parent_span_id), __resource_filter_C AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint), C AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_C) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_D AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint), D AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_D) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), C_DIR_DESC_D AS (SELECT p.* FROM C AS p INNER JOIN D AS c ON p.trace_id = c.trace_id AND p.span_id = c.parent_span_id), A_DIR_DESC_B_AND_C_DIR_DESC_D AS (SELECT l.* FROM A_DIR_DESC_B AS l INNER JOIN C_DIR_DESC_D AS r ON l.trace_id = r.trace_id) SELECT timestamp, trace_id, span_id, name, duration_nano, parent_span_id FROM A_DIR_DESC_B_AND_C_DIR_DESC_D ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Args: []any{"1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "frontend", "%service.name%", "%service.name\":\"frontend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "backend", "%service.name%", "%service.name\":\"backend%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "auth", "%service.name%", "%service.name\":\"auth%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "database", "%service.name%", "%service.name\":\"database%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 5},
},
expectedErr: nil,
@@ -433,7 +433,7 @@ func TestTraceOperatorStatementBuilder(t *testing.T) {
},
},
expected: qbtypes.Statement{
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_INDIR_DESC_B AS (WITH RECURSIVE up AS (SELECT d.trace_id, d.span_id, d.parent_span_id, 0 AS depth FROM B AS d UNION ALL SELECT p.trace_id, p.span_id, p.parent_span_id, up.depth + 1 FROM all_spans AS p JOIN up ON p.trace_id = up.trace_id AND p.span_id = up.parent_span_id WHERE up.depth < 100) SELECT DISTINCT a.* FROM A AS a GLOBAL INNER JOIN (SELECT DISTINCT trace_id, span_id FROM up WHERE depth > 0 ) AS ancestors ON ancestors.trace_id = a.trace_id AND ancestors.span_id = a.span_id), __return_from_B AS (SELECT * FROM B WHERE trace_id IN (SELECT DISTINCT trace_id FROM A_INDIR_DESC_B)) SELECT timestamp, trace_id, span_id, name, duration_nano, parent_span_id, trace_state AS `trace_state`, flags AS `flags`, kind AS `kind`, kind_string AS `kind_string`, status_code AS `status_code`, status_message AS `status_message`, status_code_string AS `status_code_string`, events AS `events`, links AS `links`, response_status_code AS `response_status_code`, external_http_url AS `external_http_url`, http_url AS `http_url`, external_http_method AS `external_http_method`, http_method AS `http_method`, http_host AS `http_host`, db_name AS `db_name`, db_operation AS `db_operation`, has_error AS `has_error`, is_remote AS `is_remote`, attributes_string, attributes_number, attributes_bool, resources_string FROM __return_from_B ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_INDIR_DESC_B AS (WITH RECURSIVE up AS (SELECT d.trace_id, d.span_id, d.parent_span_id, 0 AS depth FROM B AS d UNION ALL SELECT p.trace_id, p.span_id, p.parent_span_id, up.depth + 1 FROM all_spans AS p JOIN up ON p.trace_id = up.trace_id AND p.span_id = up.parent_span_id WHERE up.depth < 100) SELECT DISTINCT a.* FROM A AS a GLOBAL INNER JOIN (SELECT DISTINCT trace_id, span_id FROM up WHERE depth > 0 ) AS ancestors ON ancestors.trace_id = a.trace_id AND ancestors.span_id = a.span_id), __return_from_B AS (SELECT * FROM B WHERE trace_id IN (SELECT DISTINCT trace_id FROM A_INDIR_DESC_B)) SELECT timestamp, trace_id, span_id, name, duration_nano, parent_span_id FROM __return_from_B ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Args: []any{"1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "gateway", "%service.name%", "%service.name\":\"gateway%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "database", "%service.name%", "%service.name\":\"database%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 10},
},
expectedErr: nil,
@@ -475,7 +475,7 @@ func TestTraceOperatorStatementBuilder(t *testing.T) {
},
},
expected: qbtypes.Statement{
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_INDIR_DESC_B AS (WITH RECURSIVE up AS (SELECT d.trace_id, d.span_id, d.parent_span_id, 0 AS depth FROM B AS d UNION ALL SELECT p.trace_id, p.span_id, p.parent_span_id, up.depth + 1 FROM all_spans AS p JOIN up ON p.trace_id = up.trace_id AND p.span_id = up.parent_span_id WHERE up.depth < 100) SELECT DISTINCT a.* FROM A AS a GLOBAL INNER JOIN (SELECT DISTINCT trace_id, span_id FROM up WHERE depth > 0 ) AS ancestors ON ancestors.trace_id = a.trace_id AND ancestors.span_id = a.span_id), __resource_filter_C AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint), C AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_C) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_INDIR_DESC_B_AND_C AS (SELECT l.* FROM A_INDIR_DESC_B AS l INNER JOIN C AS r ON l.trace_id = r.trace_id), __return_from_C AS (SELECT * FROM C WHERE trace_id IN (SELECT DISTINCT trace_id FROM A_INDIR_DESC_B_AND_C)) SELECT timestamp, trace_id, span_id, name, duration_nano, parent_span_id, trace_state AS `trace_state`, flags AS `flags`, kind AS `kind`, kind_string AS `kind_string`, status_code AS `status_code`, status_message AS `status_message`, status_code_string AS `status_code_string`, events AS `events`, links AS `links`, response_status_code AS `response_status_code`, external_http_url AS `external_http_url`, http_url AS `http_url`, external_http_method AS `external_http_method`, http_method AS `http_method`, http_host AS `http_host`, db_name AS `db_name`, db_operation AS `db_operation`, has_error AS `has_error`, is_remote AS `is_remote`, attributes_string, attributes_number, attributes_bool, resources_string FROM __return_from_C ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Query: "WITH toDateTime64(1747947419000000000, 9) AS t_from, toDateTime64(1747983448000000000, 9) AS t_to, 1747945619 AS bucket_from, 1747983448 AS bucket_to, all_spans AS (SELECT *, resource_string_service$$name AS `service.name` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_A AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint), A AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_A) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), __resource_filter_B AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint), B AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_B) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_INDIR_DESC_B AS (WITH RECURSIVE up AS (SELECT d.trace_id, d.span_id, d.parent_span_id, 0 AS depth FROM B AS d UNION ALL SELECT p.trace_id, p.span_id, p.parent_span_id, up.depth + 1 FROM all_spans AS p JOIN up ON p.trace_id = up.trace_id AND p.span_id = up.parent_span_id WHERE up.depth < 100) SELECT DISTINCT a.* FROM A AS a GLOBAL INNER JOIN (SELECT DISTINCT trace_id, span_id FROM up WHERE depth > 0 ) AS ancestors ON ancestors.trace_id = a.trace_id AND ancestors.span_id = a.span_id), __resource_filter_C AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ? GROUP BY fingerprint), C AS (SELECT * FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter_C) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ?), A_INDIR_DESC_B_AND_C AS (SELECT l.* FROM A_INDIR_DESC_B AS l INNER JOIN C AS r ON l.trace_id = r.trace_id), __return_from_C AS (SELECT * FROM C WHERE trace_id IN (SELECT DISTINCT trace_id FROM A_INDIR_DESC_B_AND_C)) SELECT timestamp, trace_id, span_id, name, duration_nano, parent_span_id FROM __return_from_C ORDER BY timestamp DESC LIMIT ? SETTINGS distributed_product_mode='allow', max_memory_usage=10000000000",
Args: []any{"1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "gateway", "%service.name%", "%service.name\":\"gateway%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "database", "%service.name%", "%service.name\":\"database%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), "auth", "%service.name%", "%service.name\":\"auth%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 10},
},
expectedErr: nil,
@@ -732,5 +732,5 @@ func TestTraceOperatorStatementBuilderDeduplicatesKeys(t *testing.T) {
require.NoError(t, err)
require.Contains(t, q.Query,
"SELECT toString(multiIf(mapContains(attributes_string, 'http.method') = ?, attributes_string['http.method'], NULL)) AS `http.method`, count() AS __result_0 FROM A GROUP BY `http.method` ORDER BY __result_0 DESC")
"SELECT toString(multiIf(mapContains(attributes_string, 'http.method') = ?, multiIf(attributes.`http.method` IS NOT NULL, attributes.`http.method`::String, attributes_string['http.method']), NULL)) AS `http.method`, count() AS __result_0 FROM A GROUP BY `http.method` ORDER BY __result_0 DESC")
}

View File

@@ -480,9 +480,7 @@ type MetricAggregation struct {
// value filter to apply to the query
ValueFilter *metrictypes.MetricValueFilter `json:"-"`
// reduce to operator for metric scalar requests
ReduceTo ReduceTo `json:"reduceTo,omitzero"`
Reduced bool `json:"-"`
ReduceTo ReduceTo `json:"reduceTo,omitempty"`
}
// Copy creates a deep copy of MetricAggregation.

View File

@@ -0,0 +1,15 @@
package querybuildertypesv5
import "context"
// filterIntentKey is the context key used to mark that usage is for a filter (WHERE) clause
type filterIntentKey struct{}
func WithFilterIntent(ctx context.Context) context.Context {
return context.WithValue(ctx, filterIntentKey{}, true)
}
func IsFilterIntent(ctx context.Context) bool {
v, ok := ctx.Value(filterIntentKey{}).(bool)
return ok && v
}

View File

@@ -236,7 +236,6 @@ type RawStream struct {
Error chan error
}
func roundToNonZeroDecimals(val float64, n int) float64 {
if val == 0 || math.IsNaN(val) || math.IsInf(val, 0) {
return val

View File

@@ -1,54 +0,0 @@
package spantypes
import "encoding/json"
// The Event struct has the data exactly store in the db, while EventV2 is more of what we want to send to client.
type EventV2 struct {
Name string `json:"name"`
TimeUnixNano uint64 `json:"timeUnixNano"`
Attributes map[string]any `json:"attributes,omitempty"`
IsError bool `json:"isError,omitempty"`
}
// Link is the response shape for a span link.
// The refType field is intentionally not decoded; it's a Jaeger-era
// concept that OTel doesn't model, so we drop it on the way out.
type Link struct {
TraceID string `json:"traceId,omitempty"`
SpanID string `json:"spanId,omitempty"`
}
// ParseEvents column (Array(String) of JSON-encoded events) into a slice of Event values.
// Malformed entries are skipped.
func ParseEvents(raw any) []EventV2 {
strs, ok := raw.([]string)
if !ok {
return []EventV2{}
}
events := make([]EventV2, 0, len(strs))
for _, s := range strs {
var e Event
if err := json.Unmarshal([]byte(s), &e); err != nil {
continue
}
events = append(events, EventV2{
Name: e.Name,
TimeUnixNano: e.TimeUnixNano,
Attributes: e.AttributeMap,
IsError: false,
})
}
return events
}
func ParseLinks(raw any) []Link {
s, ok := raw.(string)
if !ok || s == "" {
return []Link{}
}
var links []Link
if err := json.Unmarshal([]byte(s), &links); err != nil {
return []Link{}
}
return links
}

View File

@@ -32,7 +32,6 @@ func (p *PostableWaterfall) Validate() error {
}
// Event represents a span event.
// todo():depricate this and use EventV2 everywhere instead while sending to client.
type Event struct {
Name string `json:"name,omitempty"`
TimeUnixNano uint64 `json:"timeUnixNano,omitempty"`

View File

@@ -40,6 +40,7 @@ type TelemetryFieldKey struct {
JSONPlan JSONAccessPlan `json:"-"`
Indexes []TelemetryFieldKeySkipIndex `json:"-"`
Materialized bool `json:"-"` // refers to promoted in case of body.... fields
Promoted bool `json:"-"`
Evolutions []*EvolutionEntry `json:"-"`
}

View File

@@ -4,7 +4,6 @@ import (
"context"
"github.com/SigNoz/signoz/pkg/types/metrictypes"
"github.com/SigNoz/signoz/pkg/valuer"
)
// MetadataStore is the interface for the telemetry metadata store.
@@ -27,12 +26,12 @@ type MetadataStore interface {
GetAllValues(ctx context.Context, fieldValueSelector *FieldValueSelector) (*TelemetryFieldValues, bool, error)
// FetchTemporality fetches the temporality for metric
FetchTemporality(ctx context.Context, orgID valuer.UUID, queryTimeRangeStartTs, queryTimeRangeEndTs uint64, metricName string) (metrictypes.Temporality, error)
FetchTemporality(ctx context.Context, queryTimeRangeStartTs, queryTimeRangeEndTs uint64, metricName string) (metrictypes.Temporality, error)
// FetchTemporalityMulti fetches the temporality for multiple metrics
FetchTemporalityMulti(ctx context.Context, orgID valuer.UUID, queryTimeRangeStartTs, queryTimeRangeEndTs uint64, metricNames ...string) (map[string]metrictypes.Temporality, error)
FetchTemporalityMulti(ctx context.Context, queryTimeRangeStartTs, queryTimeRangeEndTs uint64, metricNames ...string) (map[string]metrictypes.Temporality, error)
FetchTemporalityAndTypeMulti(ctx context.Context, orgID valuer.UUID, queryTimeRangeStartTs, queryTimeRangeEndTs uint64, metricNames ...string) (map[string]metrictypes.Temporality, map[string]metrictypes.Type, map[string]bool, error)
FetchTemporalityAndTypeMulti(ctx context.Context, queryTimeRangeStartTs, queryTimeRangeEndTs uint64, metricNames ...string) (map[string]metrictypes.Temporality, map[string]metrictypes.Type, error)
// ListLogsJSONIndexes lists the JSON indexes for the logs table.
ListLogsJSONIndexes(ctx context.Context, filters ...string) ([]TelemetryFieldKeySkipIndex, error)

View File

@@ -6,7 +6,6 @@ import (
"github.com/SigNoz/signoz/pkg/types/metrictypes"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
"github.com/SigNoz/signoz/pkg/valuer"
)
// MockMetadataStore implements the MetadataStore interface for testing purposes.
@@ -17,7 +16,6 @@ type MockMetadataStore struct {
AllValuesMap map[string]*telemetrytypes.TelemetryFieldValues
TemporalityMap map[string]metrictypes.Temporality
TypeMap map[string]metrictypes.Type
ReducedMap map[string]bool
PromotedPathsMap map[string]bool
LogsJSONIndexes []telemetrytypes.TelemetryFieldKeySkipIndex
ColumnEvolutionMetadataMap map[string][]*telemetrytypes.EvolutionEntry
@@ -308,7 +306,7 @@ func (m *MockMetadataStore) SetAllValues(lookupKey string, values *telemetrytype
}
// FetchTemporality fetches the temporality for a metric.
func (m *MockMetadataStore) FetchTemporality(ctx context.Context, orgID valuer.UUID, queryTimeRangeStartTs, queryTimeRangeEndTs uint64, metricName string) (metrictypes.Temporality, error) {
func (m *MockMetadataStore) FetchTemporality(ctx context.Context, queryTimeRangeStartTs, queryTimeRangeEndTs uint64, metricName string) (metrictypes.Temporality, error) {
if temporality, exists := m.TemporalityMap[metricName]; exists {
return temporality, nil
}
@@ -316,7 +314,7 @@ func (m *MockMetadataStore) FetchTemporality(ctx context.Context, orgID valuer.U
}
// FetchTemporalityMulti fetches the temporality for multiple metrics.
func (m *MockMetadataStore) FetchTemporalityMulti(ctx context.Context, orgID valuer.UUID, queryTimeRangeStartTs, queryTimeRangeEndTs uint64, metricNames ...string) (map[string]metrictypes.Temporality, error) {
func (m *MockMetadataStore) FetchTemporalityMulti(ctx context.Context, queryTimeRangeStartTs, queryTimeRangeEndTs uint64, metricNames ...string) (map[string]metrictypes.Temporality, error) {
result := make(map[string]metrictypes.Temporality)
for _, metricName := range metricNames {
@@ -331,10 +329,9 @@ func (m *MockMetadataStore) FetchTemporalityMulti(ctx context.Context, orgID val
}
// FetchTemporalityMulti fetches the temporality for multiple metrics.
func (m *MockMetadataStore) FetchTemporalityAndTypeMulti(ctx context.Context, orgID valuer.UUID, queryTimeRangeStartTs, queryTimeRangeEndTs uint64, metricNames ...string) (map[string]metrictypes.Temporality, map[string]metrictypes.Type, map[string]bool, error) {
func (m *MockMetadataStore) FetchTemporalityAndTypeMulti(ctx context.Context, queryTimeRangeStartTs, queryTimeRangeEndTs uint64, metricNames ...string) (map[string]metrictypes.Temporality, map[string]metrictypes.Type, error) {
temporalities := make(map[string]metrictypes.Temporality)
types := make(map[string]metrictypes.Type)
reduced := make(map[string]bool)
for _, metricName := range metricNames {
if temporality, exists := m.TemporalityMap[metricName]; exists {
@@ -347,12 +344,9 @@ func (m *MockMetadataStore) FetchTemporalityAndTypeMulti(ctx context.Context, or
} else {
types[metricName] = metrictypes.UnspecifiedType
}
if m.ReducedMap[metricName] {
reduced[metricName] = true
}
}
return temporalities, types, reduced, nil
return temporalities, types, nil
}
// SetTemporality sets the temporality for a metric in the mock store.

View File

@@ -16,42 +16,6 @@ from fixtures import types
from fixtures.fingerprint import LogsOrTracesFingerprint
from fixtures.time import parse_duration, parse_timestamp
# All keys returned by the trace list endpoint when selectFields is empty:
# every intrinsic and calculated column, plus the merged `attributes` and
# `resource` maps that wrap the contextual columns in the response layer.
ALL_SELECT_FIELDS = [
# all intrinsic columns
"timestamp",
"trace_id",
"span_id",
"trace_state",
"parent_span_id",
"flags",
"name",
"kind",
"kind_string",
"duration_nano",
"status_code",
"status_message",
"status_code_string",
"events",
"links",
# all calculated columns
"response_status_code",
"external_http_url",
"http_url",
"external_http_method",
"http_method",
"http_host",
"db_name",
"db_operation",
"has_error",
"is_remote",
# all contextual columns (merged in response layer)
"attributes",
"resource",
]
class TracesKind(Enum):
SPAN_KIND_UNSPECIFIED = 0
@@ -272,10 +236,9 @@ class Traces(ABC):
attributes_number: dict[str, np.float64]
attributes_bool: dict[str, bool]
resources_string: dict[str, str]
# Accepting parsed events and links, but will be stored as list[str], str in db
events: list[dict[str, Any]]
links: list[dict[str, Any]]
resource_json: dict[str, str]
events: list[str]
links: str
response_status_code: str
external_http_url: str
http_url: str
@@ -465,17 +428,10 @@ class Traces(ABC):
)
)
# Process events and derive error events. self.events holds the parsed
# response shape; np_arr() encodes back to the DB format on insert.
# Process events and derive error events
self.events = []
for event in events:
self.events.append(
{
"name": event.name,
"timeUnixNano": int(event.time_unix_nano),
"attributes": dict(event.attribute_map),
}
)
self.events.append(json.dumps([event.name, event.time_unix_nano, event.attribute_map]))
# Create error events for exception events (following Go exporter logic)
if event.name == "exception":
@@ -497,26 +453,7 @@ class Traces(ABC):
),
)
# self.links holds the parsed response shape (trace_id/span_id only;
# ref_type is dropped to match the API). np_arr() re-encodes for DB insert.
self.links = [{"traceId": link.trace_id, "spanId": link.span_id} for link in links_copy]
self._links_db = json.dumps(
[link.__dict__() for link in links_copy],
separators=(",", ":"),
)
# DB shape per event: {"name", "timeUnixNano", "attributeMap"}. Must match
# what the consume-layer parser in pkg/types/spantypes expects.
self._events_db = [
json.dumps(
{
"name": event.name,
"timeUnixNano": int(event.time_unix_nano),
"attributeMap": dict(event.attribute_map),
},
separators=(",", ":"),
)
for event in events
]
self.links = json.dumps([link.__dict__() for link in links_copy], separators=(",", ":"))
# Initialize resource
self.resource = []
@@ -631,8 +568,8 @@ class Traces(ABC):
self.attributes_number,
self.attributes_bool,
self.resources_string,
self._events_db,
self._links_db,
self.events,
self.links,
self.response_status_code,
self.external_http_url,
self.http_url,

View File

@@ -17,16 +17,7 @@ from fixtures.querier import (
index_series_by_label,
make_query_request,
)
from fixtures.traces import (
ALL_SELECT_FIELDS,
TraceIdGenerator,
Traces,
TracesEvent,
TracesKind,
TracesLink,
TracesRefType,
TracesStatusCode,
)
from fixtures.traces import TraceIdGenerator, Traces, TracesKind, TracesStatusCode
def test_traces_list(
@@ -482,9 +473,7 @@ def test_traces_list(
@pytest.mark.parametrize(
"payload,status_code,results",
[
# Case 1: order by timestamp; empty selectFields returns the full
# response shape (all intrinsic + calculated columns plus the merged
# `attributes` and `resource` maps). x[3] (topic-service) is latest.
# Case 1: order by timestamp field which there in attributes as well
pytest.param(
{
"type": "builder_query",
@@ -498,42 +487,19 @@ def test_traces_list(
},
HTTPStatus.OK,
lambda x: [
{
**x[3].attribute_string,
**x[3].attributes_number,
**x[3].attributes_bool,
}, # attributes
x[3].db_name,
x[3].db_operation,
int(x[3].duration_nano),
x[3].events,
x[3].external_http_method,
x[3].external_http_url,
int(x[3].flags),
x[3].has_error,
x[3].http_host,
x[3].http_method,
x[3].http_url,
x[3].is_remote,
int(x[3].kind),
x[3].kind_string,
x[3].links,
x[3].duration_nano,
x[3].name,
x[3].parent_span_id,
x[3].resources_string,
x[3].response_status_code,
x[3].service_name,
x[3].span_id,
int(x[3].status_code),
x[3].status_code_string,
x[3].status_message,
format_timestamp(x[3].timestamp),
x[3].trace_id,
x[3].trace_state,
], # type: Callable[[List[Traces]], List[Any]]
),
# Case 2: order by attribute.timestamp. The key resolves to the
# intrinsic span.timestamp column, so the latest span (x[3]) is
# returned with the same full response shape as Case 1.
# Case 2: order by attribute timestamp field which is there in attributes as well
# This should break but it doesn't because attribute.timestamp gets adjusted to timestamp
# because of default trace.timestamp gets added by default and bug in field mapper picks
# instrinsic field
pytest.param(
{
"type": "builder_query",
@@ -547,37 +513,13 @@ def test_traces_list(
},
HTTPStatus.OK,
lambda x: [
{
**x[3].attribute_string,
**x[3].attributes_number,
**x[3].attributes_bool,
}, # attributes
x[3].db_name,
x[3].db_operation,
int(x[3].duration_nano),
x[3].events,
x[3].external_http_method,
x[3].external_http_url,
int(x[3].flags),
x[3].has_error,
x[3].http_host,
x[3].http_method,
x[3].http_url,
x[3].is_remote,
int(x[3].kind),
x[3].kind_string,
x[3].links,
x[3].duration_nano,
x[3].name,
x[3].parent_span_id,
x[3].resources_string,
x[3].response_status_code,
x[3].service_name,
x[3].span_id,
int(x[3].status_code),
x[3].status_code_string,
x[3].status_message,
format_timestamp(x[3].timestamp),
x[3].trace_id,
x[3].trace_state,
], # type: Callable[[List[Traces]], List[Any]]
),
# Case 3: select timestamp with empty order by
@@ -600,7 +542,7 @@ def test_traces_list(
], # type: Callable[[List[Traces]], List[Any]]
),
# Case 4: select attribute.timestamp with empty order by
# This returns the one span which has attribute.timestamp
# This doesn't return any data because of where_clause using aliased timestamp
pytest.param(
{
"type": "builder_query",
@@ -614,11 +556,7 @@ def test_traces_list(
},
},
HTTPStatus.OK,
lambda x: [
x[0].span_id,
format_timestamp(x[0].timestamp),
x[0].trace_id,
], # type: Callable[[List[Traces]], List[Any]]
lambda x: [], # type: Callable[[List[Traces]], List[Any]]
),
# Case 5: select timestamp with timestamp order by
pytest.param(
@@ -755,159 +693,6 @@ def test_traces_list_with_corrupt_data(
assert data[key] == value
def _verify_events_links_full(rows: list[dict], traces: list[Traces]) -> None:
"""Empty-selectFields case: events/links arrive parsed into structured objects.
Every row's events/links should match the fixture's stored parsed shape
(the fixture's `.events`/`.links` mirror the API response shape directly).
"""
for row, trace in zip(rows, traces, strict=True):
assert row["data"]["events"] == trace.events
assert row["data"]["links"] == trace.links
# Jaeger-era `refType` is dropped at the consume layer.
for link in row["data"]["links"]:
assert "refType" not in link
def _verify_events_links_skip(rows: list[dict], traces: list[Traces]) -> None:
"""Projected-selectFields case: nothing to verify beyond the key set."""
@pytest.mark.parametrize(
"select_fields,status_code,expected_keys,verify_values",
[
pytest.param(
[],
HTTPStatus.OK,
ALL_SELECT_FIELDS,
_verify_events_links_full,
),
pytest.param(
[
{"name": "service.name"},
],
HTTPStatus.OK,
["timestamp", "trace_id", "span_id", "service.name"],
_verify_events_links_skip,
),
],
)
def test_traces_list_with_select_fields(
signoz: types.SigNoz,
create_user_admin: None, # pylint: disable=unused-argument
get_token: Callable[[str, str], str],
insert_traces: Callable[[list[Traces]], None],
select_fields: list[dict],
status_code: HTTPStatus,
expected_keys: list[str],
verify_values: Callable[[list[dict], list[Traces]], None],
) -> None:
"""
Setup:
Insert a root span with no events/links and a child span carrying two
events and one user-supplied link.
Tests:
1. Empty select fields should return all the fields, and the `events` /
`links` columns should arrive parsed into structured objects (events
carry `attributes`, links carry only `traceId`/`spanId` — refType is
dropped at the consume layer).
2. Non-empty select field should return the select field along with
timestamp, trace_id and span_id.
"""
now = datetime.now(tz=UTC).replace(second=0, microsecond=0)
parent_trace_id = TraceIdGenerator.trace_id()
parent_span_id = TraceIdGenerator.span_id()
child_span_id = TraceIdGenerator.span_id()
linked_trace_id = TraceIdGenerator.trace_id()
linked_span_id = TraceIdGenerator.span_id()
event_one = TracesEvent(
name="request_received",
timestamp=now - timedelta(seconds=3, microseconds=500_000),
attribute_map={"http.method": "GET", "http.route": "/api/chat"},
)
event_two = TracesEvent(
name="cache_lookup",
timestamp=now - timedelta(seconds=3, microseconds=400_000),
attribute_map={"cache.hit": "true", "cache.key": "user:123:prompt"},
)
user_link = TracesLink(
trace_id=linked_trace_id,
span_id=linked_span_id,
ref_type=TracesRefType.REF_TYPE_FOLLOWS_FROM,
)
traces = [
# Root span: no events, no links. Verifies the empty-case parsed shape.
Traces(
timestamp=now - timedelta(seconds=4),
duration=timedelta(seconds=3),
trace_id=parent_trace_id,
span_id=parent_span_id,
parent_span_id="",
name="root span",
kind=TracesKind.SPAN_KIND_SERVER,
status_code=TracesStatusCode.STATUS_CODE_OK,
resources={"service.name": "events-links-service"},
attributes={"http.request.method": "GET"},
),
# Child span: two events + one user-supplied link. The fixture
# auto-inserts a CHILD_OF link for the parent, so the parsed response
# contains two links total — the auto-inserted one first.
Traces(
timestamp=now - timedelta(seconds=3),
duration=timedelta(seconds=1),
trace_id=parent_trace_id,
span_id=child_span_id,
parent_span_id=parent_span_id,
name="child span",
kind=TracesKind.SPAN_KIND_INTERNAL,
status_code=TracesStatusCode.STATUS_CODE_OK,
resources={"service.name": "events-links-service"},
attributes={"http.request.method": "GET"},
events=[event_one, event_two],
links=[user_link],
),
]
insert_traces(traces)
token = get_token(USER_ADMIN_EMAIL, USER_ADMIN_PASSWORD)
payload = {
"type": "builder_query",
"spec": {
"name": "A",
"signal": "traces",
"filter": {"expression": "resource.service.name = 'events-links-service'"},
"selectFields": select_fields,
"order": [{"key": {"name": "timestamp"}, "direction": "asc"}],
"limit": 10,
},
}
response = make_query_request(
signoz,
token,
start_ms=int((datetime.now(tz=UTC) - timedelta(minutes=5)).timestamp() * 1000),
end_ms=int(datetime.now(tz=UTC).timestamp() * 1000),
request_type="raw",
queries=[payload],
)
assert response.status_code == status_code
if response.status_code != HTTPStatus.OK:
return
rows = response.json()["data"]["data"]["results"][0]["rows"]
assert len(rows) == 2
for row in rows:
assert set(row["data"].keys()) == set(expected_keys)
verify_values(rows, traces)
@pytest.mark.parametrize(
"order_by,aggregation_alias,expected_status",
[

View File

@@ -36,22 +36,12 @@ from fixtures.querier import (
assert_grouped_scalar,
assert_raw_row_subset,
assert_scalar_value,
find_named_result,
format_timestamp,
generate_traces_with_corrupt_metadata,
get_rows,
make_query_request,
)
from fixtures.traces import (
ALL_SELECT_FIELDS,
TraceIdGenerator,
Traces,
TracesEvent,
TracesKind,
TracesLink,
TracesRefType,
TracesStatusCode,
)
from fixtures.traces import TraceIdGenerator, Traces, TracesKind, TracesStatusCode
def _names(response: requests.Response) -> set:
@@ -623,178 +613,3 @@ def test_trace_operator_with_adjusted_keys(
assert response.status_code == HTTPStatus.OK, response.text
assert_result(response, traces)
# Hardcoded core columns the trace_operator buildListQuery always projects,
# in addition to any user-supplied selectFields.
TRACE_OPERATOR_CORE_FIELDS = [
"timestamp",
"trace_id",
"span_id",
"name",
"duration_nano",
"parent_span_id",
]
def _verify_full_expansion(rows: list[dict], parent_trace: Traces) -> None:
"""Empty-selectFields case: every column from the builder_query parity set
arrives, and events/links are parsed into structured form (refType is
dropped at the consume layer).
"""
assert len(rows) == 1
parent_row = rows[0]["data"]
assert set(parent_row.keys()) == set(ALL_SELECT_FIELDS)
assert parent_row["events"] == parent_trace.events
assert parent_row["links"] == parent_trace.links
for link in parent_row["links"]:
assert "refType" not in link
def _verify_explicit_projection(rows: list[dict], parent_trace: Traces) -> None: # pylint: disable=unused-argument
"""Explicit-selectFields case: only the 6 hardcoded core fields plus the
user-supplied resource.service.name come back. Contextual columns
(events/links/attributes/resource) and the rest of the intrinsics never
appear because the consume-layer merge isn't triggered.
"""
assert len(rows) == 1
parent_row = rows[0]["data"]
assert set(parent_row.keys()) == set(TRACE_OPERATOR_CORE_FIELDS + ["service.name"])
@pytest.mark.parametrize(
"select_fields,verify_values",
[
pytest.param([], _verify_full_expansion, id="empty-select-fields"),
pytest.param(
[{"name": "service.name", "fieldContext": "resource"}],
_verify_explicit_projection,
id="explicit-service-name",
),
],
)
def test_trace_operator_select_fields(
signoz: types.SigNoz,
create_user_admin: None, # pylint: disable=unused-argument
get_token: Callable[[str, str], str],
insert_traces: Callable[[list[Traces]], None],
select_fields: list[dict[str, Any]],
verify_values: Callable[[list[dict], Traces], None],
) -> None:
"""
Setup:
Insert a parent (operation.type = 'parent') with one event and one
user-supplied link, plus a child span (operation.type = 'child').
Tests:
1. With selectFields=[], the `A => B` trace_operator returns every column
in ALL_SELECT_FIELDS, mirroring the builder_query path. Events arrive
as {name, timeUnixNano, attributes} and links as {traceId, spanId}
with refType dropped at the consume layer.
2. With an explicit selectFields=[{"name": "service.name"}], only the 6
hardcoded core columns plus service.name come back — no auto-expansion
to the full set.
See:
- pkg/telemetrytraces/trace_operator_cte_builder.go::buildFinalQuery for
the expansion gate.
- pkg/telemetrytraces/trace_operator_cte_builder.go::buildListQuery for
the per-row SELECT.
"""
now = datetime.now(tz=UTC).replace(second=0, microsecond=0)
trace_id = TraceIdGenerator.trace_id()
parent_span_id = TraceIdGenerator.span_id()
child_span_id = TraceIdGenerator.span_id()
parent_event = TracesEvent(
name="request_received",
timestamp=now - timedelta(seconds=4, microseconds=500_000),
attribute_map={"http.method": "GET"},
)
linked_trace_id = TraceIdGenerator.trace_id()
linked_span_id = TraceIdGenerator.span_id()
user_link = TracesLink(
trace_id=linked_trace_id,
span_id=linked_span_id,
ref_type=TracesRefType.REF_TYPE_FOLLOWS_FROM,
)
parent_trace = Traces(
timestamp=now - timedelta(seconds=5),
duration=timedelta(seconds=4),
trace_id=trace_id,
span_id=parent_span_id,
parent_span_id="",
name="parent-operation",
kind=TracesKind.SPAN_KIND_SERVER,
status_code=TracesStatusCode.STATUS_CODE_OK,
resources={"service.name": "trace-operator-query"},
attributes={"operation.type": "parent"},
events=[parent_event],
links=[user_link],
)
child_trace = Traces(
timestamp=now - timedelta(seconds=4),
duration=timedelta(seconds=1),
trace_id=trace_id,
span_id=child_span_id,
parent_span_id=parent_span_id,
name="child-operation",
kind=TracesKind.SPAN_KIND_INTERNAL,
status_code=TracesStatusCode.STATUS_CODE_OK,
resources={"service.name": "trace-operator-query"},
attributes={"operation.type": "child"},
)
insert_traces([parent_trace, child_trace])
token = get_token(USER_ADMIN_EMAIL, USER_ADMIN_PASSWORD)
operator_spec: dict[str, Any] = {
"name": "C",
"expression": "A => B",
"limit": 10,
"order": [{"key": {"name": "timestamp"}, "direction": "asc"}],
}
if select_fields:
operator_spec["selectFields"] = select_fields
queries = [
{
"type": "builder_query",
"spec": {
"name": "A",
"signal": "traces",
"filter": {"expression": "operation.type = 'parent'"},
"limit": 100,
"disabled": True,
},
},
{
"type": "builder_query",
"spec": {
"name": "B",
"signal": "traces",
"filter": {"expression": "operation.type = 'child'"},
"limit": 100,
"disabled": True,
},
},
{"type": "builder_trace_operator", "spec": operator_spec},
]
response = make_query_request(
signoz,
token,
start_ms=int((datetime.now(tz=UTC) - timedelta(minutes=5)).timestamp() * 1000),
end_ms=int(datetime.now(tz=UTC).timestamp() * 1000),
request_type="raw",
queries=queries,
)
assert response.status_code == HTTPStatus.OK
results = response.json()["data"]["data"]["results"]
trace_operator_result = find_named_result(results, "C")
rows = trace_operator_result["rows"]
verify_values(rows, parent_trace)