Compare commits

...

17 Commits

Author SHA1 Message Date
nityanandagohain
e70836dd2f fix: tests 2026-06-02 16:59:57 +05:30
nityanandagohain
4e14b2d00d Merge remote-tracking branch 'origin/main' into issue_4522 2026-06-02 16:49:32 +05:30
Nityananda Gohain
a71ac2ada6 fix: add adjustkeys in trace operator cte builder (#11349)
* fix: add adjustkeys in trace operator cte builder

* fix: more fixes

* fix: cleanup

* fix: move tests to trace operator file

* fix: address comments

* fix: lint issues

---------

Co-authored-by: Srikanth Chekuri <srikanth.chekuri92@gmail.com>
2026-06-02 10:59:15 +00:00
Vikrant Gupta
0963ff08cd feat(web): disable all integrations by default (#11539) 2026-06-02 09:32:20 +00:00
nityanandagohain
e51c90142b Merge remote-tracking branch 'origin/issue_4522' into issue_4522 2026-06-02 14:49:56 +05:30
nityanandagohain
87f6089298 fix: tests 2026-06-02 14:49:32 +05:30
Nityananda Gohain
a6786c38b3 Merge branch 'main' into issue_4522 2026-06-02 12:47:35 +05:30
nityanandagohain
aec40e499a fix: address comments 2026-06-02 12:46:29 +05:30
Srikanth Chekuri
7c926d593b Merge branch 'main' into issue_4522 2026-06-01 19:53:18 +05:30
Srikanth Chekuri
8a6de08530 Merge branch 'main' into issue_4522 2026-05-16 22:38:16 +05:30
nityanandagohain
04824cf2f2 fix: lint 2026-05-11 22:06:29 +05:30
Nityananda Gohain
384c649ef8 Merge branch 'main' into issue_4522 2026-05-11 22:03:43 +05:30
nityanandagohain
68693f8ffd fix: more updated 2026-05-11 22:03:05 +05:30
nityanandagohain
ca1f92f474 fix: get keys after modifying the selectkeys 2026-05-11 21:19:33 +05:30
nityanandagohain
1ed3d8fc8c fix: minor changes 2026-05-11 20:25:05 +05:30
nityanandagohain
196aa301c4 Merge remote-tracking branch 'origin/main' into issue_4522 2026-05-11 15:06:05 +05:30
nityanandagohain
51fcc22d8a feat: [traces] time aware dynamic field mapper 2026-05-08 18:11:15 +05:30
26 changed files with 1781 additions and 1384 deletions

View File

@@ -64,16 +64,16 @@ web:
settings:
posthog:
# Whether to enable PostHog in web.
enabled: true
enabled: false
appcues:
# Whether to enable Appcues in web.
enabled: true
enabled: false
sentry:
# Whether to enable Sentry in web.
enabled: true
enabled: false
pylon:
# Whether to enable Pylon in web.
enabled: true
enabled: false
##################### Cache #####################
cache:

View File

@@ -186,7 +186,7 @@ func (c *conditionBuilder) conditionFor(
column := columns[0]
if len(key.Evolutions) > 0 {
// we will use the corresponding column and its evolution entry for the query
newColumns, _, err := selectEvolutionsForColumns(columns, key.Evolutions, startNs, endNs)
newColumns, _, err := qbtypes.SelectEvolutionsForColumns(columns, key.Evolutions, startNs, endNs)
if err != nil {
return "", err
}

View File

@@ -3,11 +3,7 @@ package telemetrylogs
import (
"context"
"fmt"
"slices"
"sort"
"strconv"
"strings"
"time"
schema "github.com/SigNoz/signoz-otel-collector/cmd/signozschemamigrator/schema_migrator"
"github.com/SigNoz/signoz-otel-collector/utils"
@@ -137,113 +133,6 @@ func (m *fieldMapper) getColumn(ctx context.Context, key *telemetrytypes.Telemet
return nil, qbtypes.ErrColumnNotFound
}
// selectEvolutionsForColumns selects the appropriate evolution entries for each column based on the time range.
// Logic:
// - Finds the latest base evolution (<= tsStartTime) across ALL columns
// - Rejects all evolutions before this latest base evolution
// - For duplicate evolutions it considers the oldest one (first in ReleaseTime)
// - For each column, includes its evolution if it's >= latest base evolution and <= tsEndTime
// - Results are sorted by ReleaseTime descending (newest first)
func selectEvolutionsForColumns(columns []*schema.Column, evolutions []*telemetrytypes.EvolutionEntry, tsStart, tsEnd uint64) ([]*schema.Column, []*telemetrytypes.EvolutionEntry, error) {
sortedEvolutions := make([]*telemetrytypes.EvolutionEntry, len(evolutions))
copy(sortedEvolutions, evolutions)
// sort the evolutions by ReleaseTime ascending
sort.Slice(sortedEvolutions, func(i, j int) bool {
return sortedEvolutions[i].ReleaseTime.Before(sortedEvolutions[j].ReleaseTime)
})
tsStartTime := time.Unix(0, int64(tsStart))
tsEndTime := time.Unix(0, int64(tsEnd))
// Build evolution map: column name -> evolution
evolutionMap := make(map[string]*telemetrytypes.EvolutionEntry)
for _, evolution := range sortedEvolutions {
if _, exists := evolutionMap[evolution.ColumnName+":"+evolution.FieldName+":"+strconv.Itoa(int(evolution.Version))]; exists {
// since if there is duplicate we would just use the oldest one.
continue
}
evolutionMap[evolution.ColumnName+":"+evolution.FieldName+":"+strconv.Itoa(int(evolution.Version))] = evolution
}
// Find the latest base evolution (<= tsStartTime) across ALL columns
// Evolutions are sorted, so we can break early
var latestBaseEvolutionAcrossAll *telemetrytypes.EvolutionEntry
for _, evolution := range sortedEvolutions {
if evolution.ReleaseTime.After(tsStartTime) {
break
}
latestBaseEvolutionAcrossAll = evolution
}
// We shouldn't reach this, it basically means there is something wrong with the evolutions data
if latestBaseEvolutionAcrossAll == nil {
return nil, nil, errors.Newf(errors.TypeInternal, errors.CodeInternal, "no base evolution found for columns %v", columns)
}
columnLookUpMap := make(map[string]*schema.Column)
for _, column := range columns {
columnLookUpMap[column.Name] = column
}
// Collect column-evolution pairs
type colEvoPair struct {
column *schema.Column
evolution *telemetrytypes.EvolutionEntry
}
pairs := []colEvoPair{}
for _, evolution := range evolutionMap {
// Reject evolutions before the latest base evolution
if evolution.ReleaseTime.Before(latestBaseEvolutionAcrossAll.ReleaseTime) {
continue
}
// skip evolutions after tsEndTime
if evolution.ReleaseTime.After(tsEndTime) || evolution.ReleaseTime.Equal(tsEndTime) {
continue
}
if _, exists := columnLookUpMap[evolution.ColumnName]; !exists {
return nil, nil, errors.Newf(errors.TypeInternal, errors.CodeInternal, "evolution column %s not found in columns %v", evolution.ColumnName, columns)
}
pairs = append(pairs, colEvoPair{columnLookUpMap[evolution.ColumnName], evolution})
}
// If no pairs found, fall back to latestBaseEvolutionAcrossAll for matching columns
if len(pairs) == 0 {
for _, column := range columns {
// Use latestBaseEvolutionAcrossAll if this column name matches its column name
if column.Name == latestBaseEvolutionAcrossAll.ColumnName {
pairs = append(pairs, colEvoPair{column, latestBaseEvolutionAcrossAll})
}
}
}
// Sort by ReleaseTime descending (newest first)
slices.SortFunc(pairs, func(a, b colEvoPair) int {
// Sort by ReleaseTime descending (newest first)
if a.evolution.ReleaseTime.After(b.evolution.ReleaseTime) {
return -1
}
if a.evolution.ReleaseTime.Before(b.evolution.ReleaseTime) {
return 1
}
return 0
})
// Extract results
newColumns := make([]*schema.Column, len(pairs))
evolutionsEntries := make([]*telemetrytypes.EvolutionEntry, len(pairs))
for i, pair := range pairs {
newColumns[i] = pair.column
evolutionsEntries[i] = pair.evolution
}
return newColumns, evolutionsEntries, nil
}
func (m *fieldMapper) FieldFor(ctx context.Context, tsStart, tsEnd uint64, key *telemetrytypes.TelemetryFieldKey) (string, error) {
columns, err := m.getColumn(ctx, key)
if err != nil {
@@ -254,7 +143,7 @@ func (m *fieldMapper) FieldFor(ctx context.Context, tsStart, tsEnd uint64, key *
var evolutionsEntries []*telemetrytypes.EvolutionEntry
if len(key.Evolutions) > 0 {
// we will use the corresponding column and its evolution entry for the query
newColumns, evolutionsEntries, err = selectEvolutionsForColumns(columns, key.Evolutions, tsStart, tsEnd)
newColumns, evolutionsEntries, err = qbtypes.SelectEvolutionsForColumns(columns, key.Evolutions, tsStart, tsEnd)
if err != nil {
return "", err
}

View File

@@ -536,390 +536,6 @@ func TestFieldForWithEvolutions(t *testing.T) {
}
}
func TestSelectEvolutionsForColumns(t *testing.T) {
testCases := []struct {
name string
columns []*schema.Column
evolutions []*telemetrytypes.EvolutionEntry
tsStart uint64
tsEnd uint64
expectedColumns []string // column names
expectedEvols []string // evolution column names
expectedError bool
errorStr string
}{
{
name: "New evolutions at tsStartTime - should include latest evolution",
columns: []*schema.Column{
logsV2Columns["resources_string"],
logsV2Columns["resource"],
},
evolutions: []*telemetrytypes.EvolutionEntry{
{
Signal: telemetrytypes.SignalLogs,
ColumnName: "resources_string",
ColumnType: "Map(LowCardinality(String), String)",
FieldContext: telemetrytypes.FieldContextResource,
FieldName: "__all__",
Version: 0,
ReleaseTime: time.Date(0, 0, 0, 0, 0, 0, 0, time.UTC),
},
{
Signal: telemetrytypes.SignalLogs,
ColumnName: "resource",
ColumnType: "JSON()",
FieldContext: telemetrytypes.FieldContextResource,
FieldName: "__all__",
Version: 1,
ReleaseTime: time.Date(2024, 2, 25, 0, 0, 0, 0, time.UTC),
},
},
tsStart: uint64(time.Date(2024, 2, 25, 0, 0, 0, 0, time.UTC).UnixNano()),
tsEnd: uint64(time.Date(2024, 2, 30, 0, 0, 0, 0, time.UTC).UnixNano()),
expectedColumns: []string{"resource"},
expectedEvols: []string{"resource"},
},
{
name: "New evolutions after tsStartTime but less than tsEndTime - should include both",
columns: []*schema.Column{
logsV2Columns["resources_string"],
logsV2Columns["resource"],
},
evolutions: []*telemetrytypes.EvolutionEntry{
{
Signal: telemetrytypes.SignalLogs,
ColumnName: "resources_string",
ColumnType: "Map(LowCardinality(String), String)",
FieldContext: telemetrytypes.FieldContextResource,
FieldName: "__all__",
Version: 0,
ReleaseTime: time.Date(0, 0, 0, 0, 0, 0, 0, time.UTC),
},
{
Signal: telemetrytypes.SignalLogs,
ColumnName: "resource",
ColumnType: "JSON()",
FieldContext: telemetrytypes.FieldContextResource,
FieldName: "__all__",
Version: 1,
ReleaseTime: time.Date(2024, 2, 3, 0, 0, 0, 0, time.UTC),
},
},
tsStart: uint64(time.Date(2024, 2, 1, 0, 0, 0, 0, time.UTC).UnixNano()),
tsEnd: uint64(time.Date(2024, 2, 15, 0, 0, 0, 0, time.UTC).UnixNano()),
expectedColumns: []string{"resource", "resources_string"}, // sorted by ReleaseTime desc
expectedEvols: []string{"resource", "resources_string"},
},
{
name: "Columns without matching evolutions - should exclude them",
columns: []*schema.Column{
logsV2Columns["resources_string"],
logsV2Columns["resource"], // no evolution for this
logsV2Columns["attributes_string"], // no evolution for this
},
evolutions: []*telemetrytypes.EvolutionEntry{
{
Signal: telemetrytypes.SignalLogs,
ColumnName: "resources_string",
ColumnType: "Map(LowCardinality(String), String)",
FieldContext: telemetrytypes.FieldContextResource,
FieldName: "__all__",
Version: 0,
ReleaseTime: time.Date(2024, 1, 15, 0, 0, 0, 0, time.UTC),
},
},
tsStart: uint64(time.Date(2024, 2, 1, 0, 0, 0, 0, time.UTC).UnixNano()),
tsEnd: uint64(time.Date(2024, 2, 15, 0, 0, 0, 0, time.UTC).UnixNano()),
expectedColumns: []string{"resources_string"},
expectedEvols: []string{"resources_string"},
},
{
name: "New evolutions at tsEndTime - should not include new evolution",
columns: []*schema.Column{
logsV2Columns["resources_string"],
logsV2Columns["resource"],
},
evolutions: []*telemetrytypes.EvolutionEntry{
{
Signal: telemetrytypes.SignalLogs,
ColumnName: "resources_string",
ColumnType: "Map(LowCardinality(String), String)",
FieldContext: telemetrytypes.FieldContextResource,
FieldName: "__all__",
Version: 0,
ReleaseTime: time.Date(0, 0, 0, 0, 0, 0, 0, time.UTC),
},
{
Signal: telemetrytypes.SignalLogs,
ColumnName: "resource",
ColumnType: "JSON()",
FieldContext: telemetrytypes.FieldContextResource,
FieldName: "__all__",
Version: 1,
ReleaseTime: time.Date(2024, 2, 30, 0, 0, 0, 0, time.UTC),
},
},
tsStart: uint64(time.Date(2024, 2, 25, 0, 0, 0, 0, time.UTC).UnixNano()),
tsEnd: uint64(time.Date(2024, 2, 30, 0, 0, 0, 0, time.UTC).UnixNano()),
expectedColumns: []string{"resources_string"},
expectedEvols: []string{"resources_string"},
},
{
name: "New evolutions after tsEndTime - should exclude new",
columns: []*schema.Column{
logsV2Columns["resources_string"],
logsV2Columns["resource"],
},
evolutions: []*telemetrytypes.EvolutionEntry{
{
Signal: telemetrytypes.SignalLogs,
ColumnName: "resources_string",
ColumnType: "Map(LowCardinality(String), String)",
FieldContext: telemetrytypes.FieldContextResource,
FieldName: "__all__",
Version: 0,
ReleaseTime: time.Date(0, 0, 0, 0, 0, 0, 0, time.UTC),
},
{
Signal: telemetrytypes.SignalLogs,
ColumnName: "resource",
ColumnType: "JSON()",
FieldContext: telemetrytypes.FieldContextResource,
FieldName: "__all__",
Version: 1,
ReleaseTime: time.Date(2024, 2, 25, 0, 0, 0, 0, time.UTC),
},
},
tsStart: uint64(time.Date(2024, 2, 1, 0, 0, 0, 0, time.UTC).UnixNano()),
tsEnd: uint64(time.Date(2024, 2, 15, 0, 0, 0, 0, time.UTC).UnixNano()),
expectedColumns: []string{"resources_string"},
expectedEvols: []string{"resources_string"},
},
{
name: "Empty columns array",
columns: []*schema.Column{},
evolutions: []*telemetrytypes.EvolutionEntry{
{
Signal: telemetrytypes.SignalLogs,
ColumnName: "resources_string",
ColumnType: "Map(LowCardinality(String), String)",
FieldContext: telemetrytypes.FieldContextResource,
FieldName: "__all__",
Version: 0,
ReleaseTime: time.Date(2024, 1, 15, 0, 0, 0, 0, time.UTC),
},
},
tsStart: uint64(time.Date(2024, 2, 1, 0, 0, 0, 0, time.UTC).UnixNano()),
tsEnd: uint64(time.Date(2024, 2, 15, 0, 0, 0, 0, time.UTC).UnixNano()),
expectedColumns: []string{},
expectedEvols: []string{},
expectedError: true,
errorStr: "column resources_string not found",
},
{
name: "Duplicate evolutions - should use first encountered (oldest if sorted)",
columns: []*schema.Column{
logsV2Columns["resource"],
},
evolutions: []*telemetrytypes.EvolutionEntry{
{
Signal: telemetrytypes.SignalLogs,
ColumnName: "resources_string",
ColumnType: "Map(LowCardinality(String), String)",
FieldContext: telemetrytypes.FieldContextResource,
FieldName: "__all__",
Version: 0,
ReleaseTime: time.Date(0, 0, 0, 0, 0, 0, 0, time.UTC),
},
{
Signal: telemetrytypes.SignalLogs,
ColumnName: "resource",
ColumnType: "JSON()",
FieldContext: telemetrytypes.FieldContextResource,
FieldName: "__all__",
Version: 1,
ReleaseTime: time.Date(2024, 1, 15, 0, 0, 0, 0, time.UTC),
},
{
Signal: telemetrytypes.SignalLogs,
ColumnName: "resource",
ColumnType: "JSON()",
FieldContext: telemetrytypes.FieldContextResource,
FieldName: "__all__",
Version: 1,
ReleaseTime: time.Date(2024, 1, 20, 0, 0, 0, 0, time.UTC),
},
},
tsStart: uint64(time.Date(2024, 2, 1, 0, 0, 0, 0, time.UTC).UnixNano()),
tsEnd: uint64(time.Date(2024, 2, 15, 0, 0, 0, 0, time.UTC).UnixNano()),
expectedColumns: []string{"resource"},
expectedEvols: []string{"resource"}, // should use first one (older)
},
{
name: "Genuine Duplicate evolutions with new version- should consider both",
columns: []*schema.Column{
logsV2Columns["resources_string"],
logsV2Columns["resource"],
},
evolutions: []*telemetrytypes.EvolutionEntry{
{
Signal: telemetrytypes.SignalLogs,
ColumnName: "resources_string",
ColumnType: "Map(LowCardinality(String), String)",
FieldContext: telemetrytypes.FieldContextResource,
FieldName: "__all__",
Version: 0,
ReleaseTime: time.Date(0, 0, 0, 0, 0, 0, 0, time.UTC),
},
{
Signal: telemetrytypes.SignalLogs,
ColumnName: "resource",
ColumnType: "JSON()",
FieldContext: telemetrytypes.FieldContextResource,
FieldName: "__all__",
Version: 1,
ReleaseTime: time.Date(2024, 1, 15, 0, 0, 0, 0, time.UTC),
},
{
Signal: telemetrytypes.SignalLogs,
ColumnName: "resources_string",
ColumnType: "Map(LowCardinality(String), String)",
FieldContext: telemetrytypes.FieldContextResource,
FieldName: "__all__",
Version: 2,
ReleaseTime: time.Date(2024, 1, 20, 0, 0, 0, 0, time.UTC),
},
},
tsStart: uint64(time.Date(2024, 1, 16, 0, 0, 0, 0, time.UTC).UnixNano()),
tsEnd: uint64(time.Date(2024, 2, 15, 0, 0, 0, 0, time.UTC).UnixNano()),
expectedColumns: []string{"resources_string", "resource"},
expectedEvols: []string{"resources_string", "resource"}, // should use first one (older)
},
{
name: "Evolution exactly at tsEndTime",
columns: []*schema.Column{
logsV2Columns["resources_string"],
logsV2Columns["resource"],
},
evolutions: []*telemetrytypes.EvolutionEntry{
{
Signal: telemetrytypes.SignalLogs,
ColumnName: "resources_string",
ColumnType: "Map(LowCardinality(String), String)",
FieldContext: telemetrytypes.FieldContextResource,
FieldName: "__all__",
ReleaseTime: time.Date(2024, 1, 15, 0, 0, 0, 0, time.UTC),
},
{
Signal: telemetrytypes.SignalLogs,
ColumnName: "resource",
ColumnType: "JSON()",
FieldContext: telemetrytypes.FieldContextResource,
FieldName: "__all__",
ReleaseTime: time.Date(2024, 2, 15, 0, 0, 0, 0, time.UTC), // exactly at tsEnd
},
},
tsStart: uint64(time.Date(2024, 2, 1, 0, 0, 0, 0, time.UTC).UnixNano()),
tsEnd: uint64(time.Date(2024, 2, 15, 0, 0, 0, 0, time.UTC).UnixNano()),
expectedColumns: []string{"resources_string"}, // resource excluded because After(tsEnd) is true
expectedEvols: []string{"resources_string"},
},
{
name: "Single evolution after tsStartTime - JSON body",
columns: []*schema.Column{
logsV2Columns[LogsV2BodyV2Column],
logsV2Columns[LogsV2BodyPromotedColumn],
},
evolutions: []*telemetrytypes.EvolutionEntry{
{
Signal: telemetrytypes.SignalLogs,
ColumnName: LogsV2BodyV2Column,
ColumnType: "JSON()",
FieldContext: telemetrytypes.FieldContextBody,
FieldName: "__all__",
ReleaseTime: time.Unix(0, 0),
},
{
Signal: telemetrytypes.SignalLogs,
ColumnName: LogsV2BodyPromotedColumn,
ColumnType: "JSON()",
FieldContext: telemetrytypes.FieldContextBody,
FieldName: "user.name",
ReleaseTime: time.Date(2024, 2, 2, 0, 0, 0, 0, time.UTC),
},
},
tsStart: uint64(time.Date(2024, 2, 1, 0, 0, 0, 0, time.UTC).UnixNano()),
tsEnd: uint64(time.Date(2024, 2, 15, 0, 0, 0, 0, time.UTC).UnixNano()),
expectedColumns: []string{LogsV2BodyPromotedColumn, LogsV2BodyV2Column}, // sorted by ReleaseTime desc (newest first)
expectedEvols: []string{LogsV2BodyPromotedColumn, LogsV2BodyV2Column},
},
{
name: "No evolution after tsStartTime - JSON body",
columns: []*schema.Column{
logsV2Columns[LogsV2BodyV2Column],
logsV2Columns[LogsV2BodyPromotedColumn],
},
evolutions: []*telemetrytypes.EvolutionEntry{
{
Signal: telemetrytypes.SignalLogs,
ColumnName: LogsV2BodyV2Column,
ColumnType: "JSON()",
FieldContext: telemetrytypes.FieldContextBody,
FieldName: "__all__",
ReleaseTime: time.Unix(0, 0),
},
{
Signal: telemetrytypes.SignalLogs,
ColumnName: LogsV2BodyPromotedColumn,
ColumnType: "JSON()",
FieldContext: telemetrytypes.FieldContextBody,
FieldName: "user.name",
ReleaseTime: time.Date(2024, 2, 2, 0, 0, 0, 0, time.UTC),
},
},
tsStart: uint64(time.Date(2024, 2, 3, 0, 0, 0, 0, time.UTC).UnixNano()),
tsEnd: uint64(time.Date(2024, 2, 15, 0, 0, 0, 0, time.UTC).UnixNano()),
expectedColumns: []string{LogsV2BodyPromotedColumn},
expectedEvols: []string{LogsV2BodyPromotedColumn},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
resultColumns, resultEvols, err := selectEvolutionsForColumns(tc.columns, tc.evolutions, tc.tsStart, tc.tsEnd)
if tc.expectedError {
assert.Contains(t, err.Error(), tc.errorStr)
} else {
require.NoError(t, err)
assert.Equal(t, len(tc.expectedColumns), len(resultColumns), "column count mismatch")
assert.Equal(t, len(tc.expectedEvols), len(resultEvols), "evolution count mismatch")
resultColumnNames := make([]string, len(resultColumns))
for i, col := range resultColumns {
resultColumnNames[i] = col.Name
}
resultEvolNames := make([]string, len(resultEvols))
for i, evol := range resultEvols {
resultEvolNames[i] = evol.ColumnName
}
for i := range tc.expectedColumns {
assert.Equal(t, resultColumnNames[i], tc.expectedColumns[i], "expected column missing: "+tc.expectedColumns[i])
}
for i := range tc.expectedEvols {
assert.Equal(t, resultEvolNames[i], tc.expectedEvols[i], "expected evolution missing: "+tc.expectedEvols[i])
}
// Verify sorting: should be descending by ReleaseTime
for i := 0; i < len(resultEvols)-1; i++ {
assert.True(t, !resultEvols[i].ReleaseTime.Before(resultEvols[i+1].ReleaseTime),
"evolutions should be sorted descending by ReleaseTime")
}
}
})
}
}
func TestFieldForWithMaterialized(t *testing.T) {
ctx := context.Background()

View File

@@ -344,6 +344,11 @@ func (t *telemetryMetaStore) getTracesKeys(ctx context.Context, fieldKeySelector
})
}
}
if err = t.updateColumnEvolutionMetadataForKeys(ctx, keys); err != nil {
return nil, false, err
}
return keys, complete, nil
}
@@ -689,7 +694,7 @@ func (t *telemetryMetaStore) getLogsKeys(ctx context.Context, fieldKeySelectors
}
}
if _, err := t.updateColumnEvolutionMetadataForKeys(ctx, keys); err != nil {
if err := t.updateColumnEvolutionMetadataForKeys(ctx, keys); err != nil {
return nil, false, err
}
@@ -2370,8 +2375,8 @@ func (k *telemetryMetaStore) fetchEvolutionEntryFromClickHouse(ctx context.Conte
return entries, nil
}
// Get retrieves all evolutions for the given selectors from DB.
func (k *telemetryMetaStore) updateColumnEvolutionMetadataForKeys(ctx context.Context, keysToUpdate []*telemetrytypes.TelemetryFieldKey) (map[string][]*telemetrytypes.EvolutionEntry, error) {
// updateColumnEvolutionMetadataForKeys updates the evolution field for keys.
func (k *telemetryMetaStore) updateColumnEvolutionMetadataForKeys(ctx context.Context, keysToUpdate []*telemetrytypes.TelemetryFieldKey) error {
var metadataKeySelectors []*telemetrytypes.EvolutionSelector
for _, keySelector := range keysToUpdate {
@@ -2385,7 +2390,7 @@ func (k *telemetryMetaStore) updateColumnEvolutionMetadataForKeys(ctx context.Co
evolutions, err := k.fetchEvolutionEntryFromClickHouse(ctx, metadataKeySelectors)
if err != nil {
return nil, errors.Newf(errors.TypeInternal, errors.CodeInternal, "failed to fetch evolution from clickhouse %s", err.Error())
return errors.Newf(errors.TypeInternal, errors.CodeInternal, "failed to fetch evolution from clickhouse %s", err.Error())
}
evolutionsByUniqueKey := make(map[string][]*telemetrytypes.EvolutionEntry)
@@ -2416,7 +2421,7 @@ func (k *telemetryMetaStore) updateColumnEvolutionMetadataForKeys(ctx context.Co
}
}
}
return evolutionsByUniqueKey, nil
return nil
}
// chunkSizeFirstSeenMetricMetadata limits the number of tuples per SQL query to avoid hitting the max_query_size limit.

View File

@@ -2,8 +2,12 @@ package telemetrymetadata
import (
"context"
"regexp"
"testing"
cmock "github.com/SigNoz/clickhouse-go-mock"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/flagger/flaggertest"
"github.com/SigNoz/signoz/pkg/instrumentation/instrumentationtest"
"github.com/SigNoz/signoz/pkg/telemetryaudit"
"github.com/SigNoz/signoz/pkg/telemetrylogs"
@@ -13,12 +17,24 @@ import (
"github.com/SigNoz/signoz/pkg/telemetrystore/telemetrystoretest"
"github.com/SigNoz/signoz/pkg/telemetrytraces"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
cmock "github.com/SigNoz/clickhouse-go-mock"
"github.com/stretchr/testify/assert"
"github.com/SigNoz/signoz/pkg/flagger/flaggertest"
"github.com/stretchr/testify/require"
)
type regexMatcher struct {
}
func (m *regexMatcher) Match(expectedSQL, actualSQL string) error {
re, err := regexp.Compile(expectedSQL)
if err != nil {
return err
}
if !re.MatchString(actualSQL) {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "expected query to contain %s, got %s", expectedSQL, actualSQL)
}
return nil
}
func TestGetFirstSeenFromMetricMetadata(t *testing.T) {
mockTelemetryStore := telemetrystoretest.New(telemetrystore.Config{}, &regexMatcher{})
mock := mockTelemetryStore.Mock()

View File

@@ -1,582 +0,0 @@
package telemetrymetadata
import (
"context"
"regexp"
"testing"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/instrumentation/instrumentationtest"
"github.com/SigNoz/signoz/pkg/telemetryaudit"
"github.com/SigNoz/signoz/pkg/telemetrylogs"
"github.com/SigNoz/signoz/pkg/telemetrymeter"
"github.com/SigNoz/signoz/pkg/telemetrymetrics"
"github.com/SigNoz/signoz/pkg/telemetrystore"
"github.com/SigNoz/signoz/pkg/telemetrystore/telemetrystoretest"
"github.com/SigNoz/signoz/pkg/telemetrytraces"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
cmock "github.com/SigNoz/clickhouse-go-mock"
"github.com/stretchr/testify/assert"
"github.com/SigNoz/signoz/pkg/flagger/flaggertest"
"github.com/stretchr/testify/require"
)
func newTestTelemetryMetaStoreTestHelper(t *testing.T, store telemetrystore.TelemetryStore) telemetrytypes.MetadataStore {
t.Helper()
return NewTelemetryMetaStore(
instrumentationtest.New().ToProviderSettings(),
store,
telemetrytraces.DBName,
telemetrytraces.TagAttributesV2TableName,
telemetrytraces.SpanAttributesKeysTblName,
telemetrytraces.SpanIndexV3TableName,
telemetrymetrics.DBName,
telemetrymetrics.AttributesMetadataTableName,
telemetrymeter.DBName,
telemetrymeter.SamplesAgg1dTableName,
telemetrylogs.DBName,
telemetrylogs.LogsV2TableName,
telemetrylogs.TagAttributesV2TableName,
telemetrylogs.LogAttributeKeysTblName,
telemetrylogs.LogResourceKeysTblName,
telemetryaudit.DBName,
telemetryaudit.AuditLogsTableName,
telemetryaudit.TagAttributesTableName,
telemetryaudit.LogAttributeKeysTblName,
telemetryaudit.LogResourceKeysTblName,
DBName,
AttributesMetadataLocalTableName,
ColumnEvolutionMetadataTableName,
flaggertest.New(t),
)
}
type regexMatcher struct {
}
func (m *regexMatcher) Match(expectedSQL, actualSQL string) error {
re, err := regexp.Compile(expectedSQL)
if err != nil {
return err
}
if !re.MatchString(actualSQL) {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "expected query to contain %s, got %s", expectedSQL, actualSQL)
}
return nil
}
func TestGetKeys(t *testing.T) {
mockTelemetryStore := telemetrystoretest.New(telemetrystore.Config{}, &regexMatcher{})
mock := mockTelemetryStore.Mock()
metadata := newTestTelemetryMetaStoreTestHelper(t, mockTelemetryStore)
rows := cmock.NewRows([]cmock.ColumnType{
{Name: "statement", Type: "String"},
}, [][]any{{"CREATE TABLE signoz_traces.signoz_index_v3"}})
mock.
ExpectSelect("SHOW CREATE TABLE signoz_traces.distributed_signoz_index_v3").
WillReturnRows(rows)
query := `SELECT.*`
mock.ExpectQuery(query).
WithArgs("%http.method%", telemetrytypes.FieldDataTypeString.TagDataType(), 11).
WillReturnRows(cmock.NewRows([]cmock.ColumnType{
{Name: "tag_key", Type: "String"},
{Name: "tag_type", Type: "String"},
{Name: "tag_data_type", Type: "String"},
{Name: "priority", Type: "UInt8"},
}, [][]any{{"http.method", "tag", "String", 1}, {"http.method", "tag", "String", 1}}))
keys, _, err := metadata.GetKeys(context.Background(), &telemetrytypes.FieldKeySelector{
Signal: telemetrytypes.SignalTraces,
FieldContext: telemetrytypes.FieldContextSpan,
FieldDataType: telemetrytypes.FieldDataTypeString,
Name: "http.method",
Limit: 10,
})
if err != nil {
t.Fatalf("Failed to get keys: %v", err)
}
t.Logf("Keys: %v", keys)
}
func TestApplyBackwardCompatibleKeys(t *testing.T) {
tests := []struct {
name string
inputKeys []*telemetrytypes.TelemetryFieldKey
expectedKeys []string
notExpectedKey string
}{
{
name: "bidirectional mapping: net.peer.name -> server.address",
inputKeys: []*telemetrytypes.TelemetryFieldKey{
{
Name: "net.peer.name",
Signal: telemetrytypes.SignalTraces,
FieldContext: telemetrytypes.FieldContextAttribute,
FieldDataType: telemetrytypes.FieldDataTypeString,
},
},
expectedKeys: []string{"net.peer.name", "server.address"},
},
{
name: "bidirectional mapping: server.address -> net.peer.name",
inputKeys: []*telemetrytypes.TelemetryFieldKey{
{
Name: "server.address",
Signal: telemetrytypes.SignalTraces,
FieldContext: telemetrytypes.FieldContextResource,
FieldDataType: telemetrytypes.FieldDataTypeString,
},
},
expectedKeys: []string{"server.address", "net.peer.name"},
},
{
name: "bidirectional mapping: http.url -> url.full",
inputKeys: []*telemetrytypes.TelemetryFieldKey{
{
Name: "http.url",
Signal: telemetrytypes.SignalTraces,
FieldContext: telemetrytypes.FieldContextAttribute,
FieldDataType: telemetrytypes.FieldDataTypeString,
},
},
expectedKeys: []string{"http.url", "url.full"},
},
{
name: "bidirectional mapping: url.full -> http.url",
inputKeys: []*telemetrytypes.TelemetryFieldKey{
{
Name: "url.full",
Signal: telemetrytypes.SignalTraces,
FieldContext: telemetrytypes.FieldContextAttribute,
FieldDataType: telemetrytypes.FieldDataTypeString,
},
},
expectedKeys: []string{"url.full", "http.url"},
},
{
name: "key without alias",
inputKeys: []*telemetrytypes.TelemetryFieldKey{
{
Name: "custom.attribute",
Signal: telemetrytypes.SignalTraces,
FieldContext: telemetrytypes.FieldContextAttribute,
FieldDataType: telemetrytypes.FieldDataTypeString,
},
},
expectedKeys: []string{"custom.attribute"},
notExpectedKey: "server.address",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
mockTelemetryStore := telemetrystoretest.New(telemetrystore.Config{}, &regexMatcher{})
mock := mockTelemetryStore.Mock()
metadata := newTestTelemetryMetaStoreTestHelper(t, mockTelemetryStore)
hasTraces := false
hasLogs := false
for _, key := range tt.inputKeys {
switch key.Signal {
case telemetrytypes.SignalTraces:
hasTraces = true
case telemetrytypes.SignalLogs:
hasLogs = true
}
}
if hasTraces {
mock.ExpectSelect("SHOW CREATE TABLE signoz_traces.distributed_signoz_index_v3").
WillReturnRows(cmock.NewRows([]cmock.ColumnType{
{Name: "statement", Type: "String"},
}, [][]any{{"CREATE TABLE signoz_traces.signoz_index_v3"}}))
var args []interface{}
var rows [][]any
for _, key := range tt.inputKeys {
if key.Signal == telemetrytypes.SignalTraces {
tagType := "tag"
if key.FieldContext == telemetrytypes.FieldContextResource {
tagType = "resource"
}
args = append(args, "%"+key.Name+"%", tagType, key.FieldDataType.TagDataType())
rows = append(rows, []any{key.Name, tagType, key.FieldDataType.TagDataType(), 1})
}
}
args = append(args, 11)
mock.ExpectQuery(`SELECT.*`).
WithArgs(args...).
WillReturnRows(cmock.NewRows([]cmock.ColumnType{
{Name: "tag_key", Type: "String"},
{Name: "tag_type", Type: "String"},
{Name: "tag_data_type", Type: "String"},
{Name: "priority", Type: "UInt8"},
}, rows))
}
if hasLogs {
var args []interface{}
var rows [][]any
for _, key := range tt.inputKeys {
if key.Signal == telemetrytypes.SignalLogs {
tagType := "tag"
if key.FieldContext == telemetrytypes.FieldContextResource {
tagType = "resource"
}
args = append(args, "%"+key.Name+"%", tagType, key.FieldDataType.TagDataType())
rows = append(rows, []any{key.Name, tagType, key.FieldDataType.TagDataType(), 1})
}
}
args = append(args, 11)
mock.ExpectQuery(`SELECT.*`).
WithArgs(args...).
WillReturnRows(cmock.NewRows([]cmock.ColumnType{
{Name: "tag_key", Type: "String"},
{Name: "tag_type", Type: "String"},
{Name: "tag_data_type", Type: "String"},
{Name: "priority", Type: "UInt8"},
}, rows))
}
selectors := []*telemetrytypes.FieldKeySelector{}
for _, key := range tt.inputKeys {
selectors = append(selectors, &telemetrytypes.FieldKeySelector{
Signal: key.Signal,
FieldContext: key.FieldContext,
FieldDataType: key.FieldDataType,
Name: key.Name,
Limit: 10,
})
}
resultMap, _, err := metadata.GetKeysMulti(context.Background(), selectors)
require.NoError(t, err, "GetKeysMulti should not return an error")
for _, expectedKey := range tt.expectedKeys {
assert.Contains(t, resultMap, expectedKey, "Expected key %q to exist in result map", expectedKey)
}
if tt.notExpectedKey != "" {
assert.NotContains(t, resultMap, tt.notExpectedKey, "Did not expect key %q to exist in result map", tt.notExpectedKey)
}
for _, srcKey := range tt.inputKeys {
backwardCompatKeys := GetBackwardCompatKeysForSignal(srcKey.Signal)
if aliasKey, ok := backwardCompatKeys[srcKey.Name]; ok {
aliasExistedInInput := false
for _, inputKey := range tt.inputKeys {
if inputKey.Name == aliasKey {
aliasExistedInInput = true
break
}
}
if !aliasExistedInInput {
if aliasEntries, exists := resultMap[aliasKey]; exists && len(aliasEntries) > 0 {
aliasEntry := aliasEntries[0]
assert.Equal(t, srcKey.Signal, aliasEntry.Signal, "Alias %q should have same signal", aliasKey)
assert.Equal(t, srcKey.FieldContext, aliasEntry.FieldContext, "Alias %q should have same field context", aliasKey)
assert.Equal(t, srcKey.FieldDataType, aliasEntry.FieldDataType, "Alias %q should have same field data type", aliasKey)
}
}
}
}
require.NoError(t, mock.ExpectationsWereMet(), "All SQL expectations should be met")
})
}
}
func TestEnrichWithIntrinsicMetricKeys(t *testing.T) {
result := enrichWithIntrinsicMetricKeys(
map[string][]*telemetrytypes.TelemetryFieldKey{},
[]*telemetrytypes.FieldKeySelector{
{
Signal: telemetrytypes.SignalMetrics,
Name: "metric",
SelectorMatchType: telemetrytypes.FieldSelectorMatchTypeFuzzy,
},
},
)
require.Contains(t, result, "metric_name")
assert.Equal(t, telemetrytypes.FieldContextMetric, result["metric_name"][0].FieldContext)
result = enrichWithIntrinsicMetricKeys(
map[string][]*telemetrytypes.TelemetryFieldKey{},
[]*telemetrytypes.FieldKeySelector{
{
Signal: telemetrytypes.SignalMetrics,
Name: "metric",
FieldContext: telemetrytypes.FieldContextAttribute,
SelectorMatchType: telemetrytypes.FieldSelectorMatchTypeFuzzy,
},
},
)
assert.NotContains(t, result, "metric_name")
result = enrichWithIntrinsicMetricKeys(
map[string][]*telemetrytypes.TelemetryFieldKey{},
[]*telemetrytypes.FieldKeySelector{
{
Signal: telemetrytypes.SignalMetrics,
MetricContext: &telemetrytypes.MetricContext{
MetricNamespace: "system.cpu",
},
SelectorMatchType: telemetrytypes.FieldSelectorMatchTypeFuzzy,
},
},
)
assert.NotContains(t, result, "metric_name")
}
func TestGetMetricFieldValuesIntrinsicMetricName(t *testing.T) {
mockTelemetryStore := telemetrystoretest.New(telemetrystore.Config{}, &regexMatcher{})
mock := mockTelemetryStore.Mock()
metadata := newTestTelemetryMetaStoreTestHelper(t, mockTelemetryStore)
valueRows := cmock.NewRows([]cmock.ColumnType{
{Name: "metric_name", Type: "String"},
}, [][]any{{"metric.a"}, {"metric.b"}})
query := `SELECT .*metric_name.*` + telemetrymetrics.TimeseriesV41weekTableName + `.*GROUP BY.*metric_name`
mock.ExpectQuery(query).
WithArgs(51).
WillReturnRows(valueRows)
metadataRows := cmock.NewRows([]cmock.ColumnType{
{Name: "attr_string_value", Type: "String"},
}, [][]any{})
mock.ExpectQuery(regexp.QuoteMeta("SELECT DISTINCT attr_string_value FROM signoz_metrics.distributed_metadata WHERE attr_name = ? LIMIT ?")).
WithArgs("metric_name", 49).
WillReturnRows(metadataRows)
values, complete, err := metadata.(*telemetryMetaStore).getMetricFieldValues(context.Background(), &telemetrytypes.FieldValueSelector{
FieldKeySelector: &telemetrytypes.FieldKeySelector{
Signal: telemetrytypes.SignalMetrics,
Name: "metric_name",
Limit: 50,
SelectorMatchType: telemetrytypes.FieldSelectorMatchTypeFuzzy,
},
Limit: 50,
})
require.NoError(t, err)
assert.True(t, complete)
assert.ElementsMatch(t, []string{"metric.a", "metric.b"}, values.StringValues)
require.NoError(t, mock.ExpectationsWereMet())
}
func TestGetMetricFieldValuesIntrinsicBoolReturnsEmpty(t *testing.T) {
mockTelemetryStore := telemetrystoretest.New(telemetrystore.Config{}, &regexMatcher{})
mock := mockTelemetryStore.Mock()
metadata := newTestTelemetryMetaStoreTestHelper(t, mockTelemetryStore)
metadataRows := cmock.NewRows([]cmock.ColumnType{
{Name: "attr_string_value", Type: "String"},
}, [][]any{})
mock.ExpectQuery(regexp.QuoteMeta("SELECT DISTINCT attr_string_value FROM signoz_metrics.distributed_metadata WHERE attr_name = ? AND attr_datatype = ? AND attr_string_value = ? LIMIT ?")).
WithArgs("is_monotonic", telemetrytypes.FieldDataTypeBool.TagDataType(), "true", 11).
WillReturnRows(metadataRows)
values, complete, err := metadata.(*telemetryMetaStore).getMetricFieldValues(context.Background(), &telemetrytypes.FieldValueSelector{
FieldKeySelector: &telemetrytypes.FieldKeySelector{
Signal: telemetrytypes.SignalMetrics,
Name: "is_monotonic",
FieldDataType: telemetrytypes.FieldDataTypeBool,
Limit: 10,
SelectorMatchType: telemetrytypes.FieldSelectorMatchTypeExact,
},
Value: "true",
Limit: 10,
})
require.NoError(t, err)
assert.True(t, complete)
assert.Empty(t, values.StringValues)
assert.Empty(t, values.BoolValues)
require.NoError(t, mock.ExpectationsWereMet())
}
func TestGetMetricFieldValuesAppliesMetricNamespace(t *testing.T) {
mockTelemetryStore := telemetrystoretest.New(telemetrystore.Config{}, &regexMatcher{})
mock := mockTelemetryStore.Mock()
metadata := newTestTelemetryMetaStoreTestHelper(t, mockTelemetryStore)
valueRows := cmock.NewRows([]cmock.ColumnType{
{Name: "attr_string_value", Type: "String"},
}, [][]any{{"value.a"}})
mock.ExpectQuery(regexp.QuoteMeta("SELECT DISTINCT attr_string_value FROM signoz_metrics.distributed_metadata WHERE attr_name = ? AND metric_name LIKE ? LIMIT ?")).
WithArgs("custom_key", "system.cpu%", 11).
WillReturnRows(valueRows)
values, complete, err := metadata.(*telemetryMetaStore).getMetricFieldValues(context.Background(), &telemetrytypes.FieldValueSelector{
FieldKeySelector: &telemetrytypes.FieldKeySelector{
Signal: telemetrytypes.SignalMetrics,
Name: "custom_key",
Limit: 10,
SelectorMatchType: telemetrytypes.FieldSelectorMatchTypeFuzzy,
MetricContext: &telemetrytypes.MetricContext{
MetricNamespace: "system.cpu",
},
},
Limit: 10,
})
require.NoError(t, err)
assert.True(t, complete)
assert.ElementsMatch(t, []string{"value.a"}, values.StringValues)
require.NoError(t, mock.ExpectationsWereMet())
}
func TestGetMetricFieldValuesIntrinsicMetricNameAppliesMetricNamespace(t *testing.T) {
mockTelemetryStore := telemetrystoretest.New(telemetrystore.Config{}, &regexMatcher{})
mock := mockTelemetryStore.Mock()
metadata := newTestTelemetryMetaStoreTestHelper(t, mockTelemetryStore)
valueRows := cmock.NewRows([]cmock.ColumnType{
{Name: "metric_name", Type: "String"},
}, [][]any{{"system.cpu.utilization"}})
mock.ExpectQuery(regexp.QuoteMeta("SELECT metric_name FROM signoz_metrics.distributed_time_series_v4_1week WHERE metric_name LIKE ? GROUP BY metric_name LIMIT ?")).
WithArgs("system.cpu%", 51).
WillReturnRows(valueRows)
metadataRows := cmock.NewRows([]cmock.ColumnType{
{Name: "attr_string_value", Type: "String"},
}, [][]any{})
mock.ExpectQuery(regexp.QuoteMeta("SELECT DISTINCT attr_string_value FROM signoz_metrics.distributed_metadata WHERE attr_name = ? AND metric_name LIKE ? LIMIT ?")).
WithArgs("metric_name", "system.cpu%", 50).
WillReturnRows(metadataRows)
values, complete, err := metadata.(*telemetryMetaStore).getMetricFieldValues(context.Background(), &telemetrytypes.FieldValueSelector{
FieldKeySelector: &telemetrytypes.FieldKeySelector{
Signal: telemetrytypes.SignalMetrics,
Name: "metric_name",
Limit: 50,
SelectorMatchType: telemetrytypes.FieldSelectorMatchTypeFuzzy,
MetricContext: &telemetrytypes.MetricContext{
MetricNamespace: "system.cpu",
},
},
Limit: 50,
})
require.NoError(t, err)
assert.True(t, complete)
assert.ElementsMatch(t, []string{"system.cpu.utilization"}, values.StringValues)
require.NoError(t, mock.ExpectationsWereMet())
}
func TestGetMeterSourceMetricFieldValuesAppliesMetricNamespace(t *testing.T) {
mockTelemetryStore := telemetrystoretest.New(telemetrystore.Config{}, &regexMatcher{})
mock := mockTelemetryStore.Mock()
metadata := newTestTelemetryMetaStoreTestHelper(t, mockTelemetryStore)
rows := cmock.NewRows([]cmock.ColumnType{
{Name: "attr", Type: "Array(String)"},
}, [][]any{{[]string{"service.name", "frontend"}}})
mock.ExpectQuery(`SELECT .*distributed_samples_agg_1d.*metric_name LIKE .*`).
WithArgs("service.name", "\\_\\_%", "system.cpu%", "", 11).
WillReturnRows(rows)
values, complete, err := metadata.(*telemetryMetaStore).getMeterSourceMetricFieldValues(context.Background(), &telemetrytypes.FieldValueSelector{
FieldKeySelector: &telemetrytypes.FieldKeySelector{
Signal: telemetrytypes.SignalMetrics,
Source: telemetrytypes.SourceMeter,
Name: "service.name",
MetricContext: &telemetrytypes.MetricContext{
MetricNamespace: "system.cpu",
},
},
Limit: 10,
})
require.NoError(t, err)
assert.True(t, complete)
assert.ElementsMatch(t, []string{"frontend"}, values.StringValues)
require.NoError(t, mock.ExpectationsWereMet())
}
func TestGetMetricsKeysAppliesMetricNamespace(t *testing.T) {
mockTelemetryStore := telemetrystoretest.New(telemetrystore.Config{}, &regexMatcher{})
mock := mockTelemetryStore.Mock()
metadata := newTestTelemetryMetaStoreTestHelper(t, mockTelemetryStore)
rows := cmock.NewRows([]cmock.ColumnType{
{Name: "name", Type: "String"},
{Name: "field_context", Type: "String"},
{Name: "field_data_type", Type: "String"},
{Name: "priority", Type: "UInt8"},
}, [][]any{{"service.name", "resource", "String", 1}})
mock.ExpectQuery(`(?s)SELECT.*distributed_metadata.*metric_name LIKE.*`).
WithArgs("%service%", "\\_\\_%", "system.cpu%", 11).
WillReturnRows(rows)
keys, complete, err := metadata.(*telemetryMetaStore).getMetricsKeys(context.Background(), []*telemetrytypes.FieldKeySelector{
{
Signal: telemetrytypes.SignalMetrics,
Name: "service",
Limit: 10,
SelectorMatchType: telemetrytypes.FieldSelectorMatchTypeFuzzy,
MetricContext: &telemetrytypes.MetricContext{
MetricNamespace: "system.cpu",
},
},
})
require.NoError(t, err)
assert.True(t, complete)
assert.Len(t, keys, 1)
assert.Equal(t, "service.name", keys[0].Name)
require.NoError(t, mock.ExpectationsWereMet())
}
func TestGetMeterSourceMetricKeysAppliesMetricNamespace(t *testing.T) {
mockTelemetryStore := telemetrystoretest.New(telemetrystore.Config{}, &regexMatcher{})
mock := mockTelemetryStore.Mock()
metadata := newTestTelemetryMetaStoreTestHelper(t, mockTelemetryStore)
rows := cmock.NewRows([]cmock.ColumnType{
{Name: "attr_name", Type: "String"},
}, [][]any{{"service.name"}})
mock.ExpectQuery(`SELECT.*distributed_samples_agg_1d.*metric_name LIKE.*`).
WithArgs("%service%", "\\_\\_%", "system.cpu%", 10).
WillReturnRows(rows)
keys, complete, err := metadata.(*telemetryMetaStore).getMeterSourceMetricKeys(context.Background(), []*telemetrytypes.FieldKeySelector{
{
Signal: telemetrytypes.SignalMetrics,
Source: telemetrytypes.SourceMeter,
Name: "service",
Limit: 10,
SelectorMatchType: telemetrytypes.FieldSelectorMatchTypeFuzzy,
MetricContext: &telemetrytypes.MetricContext{
MetricNamespace: "system.cpu",
},
},
})
require.NoError(t, err)
assert.True(t, complete)
assert.Len(t, keys, 1)
assert.Equal(t, "service.name", keys[0].Name)
require.NoError(t, mock.ExpectationsWereMet())
}

View File

@@ -161,7 +161,33 @@ func (c *conditionBuilder) conditionFor(
case qbtypes.FilterOperatorExists, qbtypes.FilterOperatorNotExists:
var value any
switch columns[0].Type.GetType() {
column := columns[0]
if len(key.Evolutions) > 0 {
// we will use the corresponding column and its evolution entry for the query
newColumns, _, err := qbtypes.SelectEvolutionsForColumns(columns, key.Evolutions, startNs, endNs)
if err != nil {
return "", err
}
if len(newColumns) == 0 {
return "", errors.NewInvalidInputf(errors.CodeInvalidInput, "no valid evolution found for field %s in the given time range", key.Name)
}
// Multiple columns means fieldExpression is a multiIf returning NULL when none match,
// so a simple null check is sufficient.
if len(newColumns) > 1 {
if operator == qbtypes.FilterOperatorExists {
return sb.IsNotNull(fieldExpression), nil
} else {
return sb.IsNull(fieldExpression), nil
}
}
// otherwise we have to find the correct exist operator based on the column type
column = newColumns[0]
}
switch column.Type.GetType() {
case schema.ColumnTypeEnumJSON:
if operator == qbtypes.FilterOperatorExists {
return sb.IsNotNull(fieldExpression), nil
@@ -178,7 +204,7 @@ func (c *conditionBuilder) conditionFor(
return sb.E(fieldExpression, value), nil
}
case schema.ColumnTypeEnumLowCardinality:
switch elementType := columns[0].Type.(schema.LowCardinalityColumnType).ElementType; elementType.GetType() {
switch elementType := column.Type.(schema.LowCardinalityColumnType).ElementType; elementType.GetType() {
case schema.ColumnTypeEnumString:
value = ""
if operator == qbtypes.FilterOperatorExists {
@@ -202,14 +228,14 @@ func (c *conditionBuilder) conditionFor(
return sb.E(fieldExpression, value), nil
}
case schema.ColumnTypeEnumMap:
keyType := columns[0].Type.(schema.MapColumnType).KeyType
keyType := column.Type.(schema.MapColumnType).KeyType
if _, ok := keyType.(schema.LowCardinalityColumnType); !ok {
return "", errors.NewInvalidInputf(errors.CodeInvalidInput, "key type %s is not supported for map column type %s", keyType, columns[0].Type)
return "", errors.NewInvalidInputf(errors.CodeInvalidInput, "key type %s is not supported for map column type %s", keyType, column.Type)
}
switch valueType := columns[0].Type.(schema.MapColumnType).ValueType; valueType.GetType() {
switch valueType := column.Type.(schema.MapColumnType).ValueType; valueType.GetType() {
case schema.ColumnTypeEnumString, schema.ColumnTypeEnumBool, schema.ColumnTypeEnumFloat64:
leftOperand := fmt.Sprintf("mapContains(%s, '%s')", columns[0].Name, key.Name)
leftOperand := fmt.Sprintf("mapContains(%s, '%s')", column.Name, key.Name)
if key.Materialized {
leftOperand = telemetrytypes.FieldKeyToMaterializedColumnNameForExists(key)
}
@@ -222,7 +248,7 @@ func (c *conditionBuilder) conditionFor(
return "", errors.NewInvalidInputf(errors.CodeInvalidInput, "exists operator is not supported for map column type %s", valueType)
}
default:
return "", errors.NewInvalidInputf(errors.CodeInvalidInput, "exists operator is not supported for column type %s", columns[0].Type)
return "", errors.NewInvalidInputf(errors.CodeInvalidInput, "exists operator is not supported for column type %s", column.Type)
}
}
return "", nil

View File

@@ -3,6 +3,7 @@ package telemetrytraces
import (
"context"
"testing"
"time"
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
@@ -14,6 +15,7 @@ import (
func TestConditionFor(t *testing.T) {
ctx := context.Background()
mockEvolution := mockEvolutionData(time.Date(2025, 10, 26, 0, 10, 0, 0, time.UTC))
testCases := []struct {
name string
key telemetrytypes.TelemetryFieldKey
@@ -213,6 +215,7 @@ func TestConditionFor(t *testing.T) {
Name: "service.name",
FieldContext: telemetrytypes.FieldContextResource,
FieldDataType: telemetrytypes.FieldDataTypeString,
Evolutions: mockEvolution,
},
operator: qbtypes.FilterOperatorExists,
value: nil,
@@ -225,6 +228,7 @@ func TestConditionFor(t *testing.T) {
Name: "service.name",
FieldContext: telemetrytypes.FieldContextResource,
FieldDataType: telemetrytypes.FieldDataTypeString,
Evolutions: mockEvolution,
},
operator: qbtypes.FilterOperatorNotExists,
value: nil,
@@ -302,3 +306,85 @@ func TestConditionFor(t *testing.T) {
})
}
}
func TestConditionForResourceWithEvolution(t *testing.T) {
ctx := context.Background()
releaseTime := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)
evolutions := mockEvolutionData(releaseTime)
testCases := []struct {
name string
key telemetrytypes.TelemetryFieldKey
operator qbtypes.FilterOperator
tsStart uint64
tsEnd uint64
expectedSQL string
}{
{
name: "Exists - window after release - JSON only",
key: telemetrytypes.TelemetryFieldKey{
Name: "service.name",
FieldContext: telemetrytypes.FieldContextResource,
FieldDataType: telemetrytypes.FieldDataTypeString,
Evolutions: evolutions,
},
operator: qbtypes.FilterOperatorExists,
tsStart: uint64(time.Date(2025, 6, 1, 0, 0, 0, 0, time.UTC).UnixNano()),
tsEnd: uint64(time.Date(2025, 7, 1, 0, 0, 0, 0, time.UTC).UnixNano()),
expectedSQL: "WHERE resource.`service.name`::String IS NOT NULL",
},
{
name: "NotExists - window after release - JSON only",
key: telemetrytypes.TelemetryFieldKey{
Name: "service.name",
FieldContext: telemetrytypes.FieldContextResource,
FieldDataType: telemetrytypes.FieldDataTypeString,
Evolutions: evolutions,
},
operator: qbtypes.FilterOperatorNotExists,
tsStart: uint64(time.Date(2025, 6, 1, 0, 0, 0, 0, time.UTC).UnixNano()),
tsEnd: uint64(time.Date(2025, 7, 1, 0, 0, 0, 0, time.UTC).UnixNano()),
expectedSQL: "WHERE resource.`service.name`::String IS NULL",
},
{
name: "Exists - window before release - map only",
key: telemetrytypes.TelemetryFieldKey{
Name: "service.name",
FieldContext: telemetrytypes.FieldContextResource,
FieldDataType: telemetrytypes.FieldDataTypeString,
Evolutions: evolutions,
},
operator: qbtypes.FilterOperatorExists,
tsStart: uint64(time.Date(2024, 1, 1, 0, 0, 0, 0, time.UTC).UnixNano()),
tsEnd: uint64(time.Date(2024, 6, 1, 0, 0, 0, 0, time.UTC).UnixNano()),
expectedSQL: "WHERE mapContains(resources_string, 'service.name') = ?",
},
{
name: "Exists - window straddles release - multiIf null check",
key: telemetrytypes.TelemetryFieldKey{
Name: "service.name",
FieldContext: telemetrytypes.FieldContextResource,
FieldDataType: telemetrytypes.FieldDataTypeString,
Evolutions: evolutions,
},
operator: qbtypes.FilterOperatorExists,
tsStart: uint64(time.Date(2024, 6, 1, 0, 0, 0, 0, time.UTC).UnixNano()),
tsEnd: uint64(time.Date(2025, 6, 1, 0, 0, 0, 0, time.UTC).UnixNano()),
expectedSQL: "WHERE multiIf(resource.`service.name` IS NOT NULL, resource.`service.name`::String, mapContains(resources_string, 'service.name'), resources_string['service.name'], NULL) IS NOT NULL",
},
}
fm := NewFieldMapper()
conditionBuilder := NewConditionBuilder(fm)
for _, tc := range testCases {
sb := sqlbuilder.NewSelectBuilder()
t.Run(tc.name, func(t *testing.T) {
cond, err := conditionBuilder.ConditionFor(ctx, tc.tsStart, tc.tsEnd, &tc.key, tc.operator, nil, sb)
require.NoError(t, err)
sb.Where(cond)
sql, _ := sb.BuildWithFlavor(sqlbuilder.ClickHouse)
assert.Contains(t, sql, tc.expectedSQL)
})
}
}

View File

@@ -1,6 +1,8 @@
package telemetrytraces
import "github.com/SigNoz/signoz/pkg/types/telemetrytypes"
import (
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
)
var (
IntrinsicFields = map[string]telemetrytypes.TelemetryFieldKey{

View File

@@ -174,7 +174,7 @@ func (m *defaultFieldMapper) getColumn(
) ([]*schema.Column, error) {
switch key.FieldContext {
case telemetrytypes.FieldContextResource:
return []*schema.Column{indexV3Columns["resource"]}, nil
return []*schema.Column{indexV3Columns["resources_string"], indexV3Columns["resource"]}, nil
case telemetrytypes.FieldContextScope:
return []*schema.Column{}, qbtypes.ErrColumnNotFound
case telemetrytypes.FieldContextAttribute:
@@ -254,63 +254,92 @@ func (m *defaultFieldMapper) FieldFor(
if err != nil {
return "", err
}
if len(columns) != 1 {
return "", errors.Newf(errors.TypeInternal, errors.CodeInternal, "expected exactly 1 column, got %d", len(columns))
var newColumns []*schema.Column
var evolutionsEntries []*telemetrytypes.EvolutionEntry
if len(key.Evolutions) > 0 {
// we will use the corresponding column and its evolution entry for the query
newColumns, evolutionsEntries, err = qbtypes.SelectEvolutionsForColumns(columns, key.Evolutions, startNs, endNs)
if err != nil {
return "", err
}
} else {
newColumns = columns
}
column := columns[0]
switch column.Type.GetType() {
case schema.ColumnTypeEnumJSON:
// json is only supported for resource context as of now
if key.FieldContext != telemetrytypes.FieldContextResource {
return "", errors.Newf(errors.TypeInvalidInput, errors.CodeInvalidInput, "only resource context fields are supported for json columns, got %s", key.FieldContext.String)
}
oldColumn := indexV3Columns["resources_string"]
oldKeyName := fmt.Sprintf("%s['%s']", oldColumn.Name, key.Name)
// have to add ::string as clickHouse throws an error :- data types Variant/Dynamic are not allowed in GROUP BY
// once clickHouse dependency is updated, we need to check if we can remove it.
if key.Materialized {
oldKeyName = telemetrytypes.FieldKeyToMaterializedColumnName(key)
oldKeyNameExists := telemetrytypes.FieldKeyToMaterializedColumnNameForExists(key)
return fmt.Sprintf("multiIf(%s.`%s` IS NOT NULL, %s.`%s`::String, %s==true, %s, NULL)", column.Name, key.Name, column.Name, key.Name, oldKeyNameExists, oldKeyName), nil
} else {
return fmt.Sprintf("multiIf(%s.`%s` IS NOT NULL, %s.`%s`::String, mapContains(%s, '%s'), %s, NULL)", column.Name, key.Name, column.Name, key.Name, oldColumn.Name, key.Name, oldKeyName), nil
}
case schema.ColumnTypeEnumString,
schema.ColumnTypeEnumUInt64,
schema.ColumnTypeEnumUInt32,
schema.ColumnTypeEnumInt8,
schema.ColumnTypeEnumInt16,
schema.ColumnTypeEnumBool,
schema.ColumnTypeEnumDateTime64,
schema.ColumnTypeEnumFixedString:
return column.Name, nil
case schema.ColumnTypeEnumLowCardinality:
switch elementType := column.Type.(schema.LowCardinalityColumnType).ElementType; elementType.GetType() {
case schema.ColumnTypeEnumString:
return column.Name, nil
default:
return "", errors.NewInvalidInputf(errors.CodeInvalidInput, "value type %s is not supported for low cardinality column type %s", elementType, column.Type)
}
case schema.ColumnTypeEnumMap:
keyType := column.Type.(schema.MapColumnType).KeyType
if _, ok := keyType.(schema.LowCardinalityColumnType); !ok {
return "", errors.NewInvalidInputf(errors.CodeInvalidInput, "key type %s is not supported for map column type %s", keyType, column.Type)
exprs := []string{}
existExpr := []string{}
for i, column := range newColumns {
// Use evolution column name if available, otherwise use the column name
columnName := column.Name
if evolutionsEntries != nil && evolutionsEntries[i] != nil {
columnName = evolutionsEntries[i].ColumnName
}
switch valueType := column.Type.(schema.MapColumnType).ValueType; valueType.GetType() {
case schema.ColumnTypeEnumString, schema.ColumnTypeEnumFloat64, schema.ColumnTypeEnumBool:
// a key could have been materialized, if so return the materialized column name
if key.Materialized {
return telemetrytypes.FieldKeyToMaterializedColumnName(key), nil
switch column.Type.GetType() {
case schema.ColumnTypeEnumJSON:
// json is only supported for resource context as of now
if key.FieldContext != telemetrytypes.FieldContextResource {
return "", errors.Newf(errors.TypeInvalidInput, errors.CodeInvalidInput, "only resource context fields are supported for json columns, got %s", key.FieldContext.String)
}
// have to add ::string as clickHouse throws an error :- data types Variant/Dynamic are not allowed in GROUP BY
// once clickHouse dependency is updated, we need to check if we can remove it.
exprs = append(exprs, fmt.Sprintf("%s.`%s`::String", columnName, key.Name))
existExpr = append(existExpr, fmt.Sprintf("%s.`%s` IS NOT NULL", columnName, key.Name))
case schema.ColumnTypeEnumString,
schema.ColumnTypeEnumUInt64,
schema.ColumnTypeEnumUInt32,
schema.ColumnTypeEnumInt8,
schema.ColumnTypeEnumInt16,
schema.ColumnTypeEnumBool,
schema.ColumnTypeEnumDateTime64,
schema.ColumnTypeEnumFixedString:
exprs = append(exprs, column.Name)
case schema.ColumnTypeEnumLowCardinality:
switch elementType := column.Type.(schema.LowCardinalityColumnType).ElementType; elementType.GetType() {
case schema.ColumnTypeEnumString:
exprs = append(exprs, column.Name)
default:
return "", errors.NewInvalidInputf(errors.CodeInvalidInput, "value type %s is not supported for low cardinality column type %s", elementType, column.Type)
}
case schema.ColumnTypeEnumMap:
keyType := column.Type.(schema.MapColumnType).KeyType
if _, ok := keyType.(schema.LowCardinalityColumnType); !ok {
return "", errors.NewInvalidInputf(errors.CodeInvalidInput, "key type %s is not supported for map column type %s", keyType, column.Type)
}
switch valueType := column.Type.(schema.MapColumnType).ValueType; valueType.GetType() {
case schema.ColumnTypeEnumString, schema.ColumnTypeEnumFloat64, schema.ColumnTypeEnumBool:
// a key could have been materialized, if so return the materialized column name
if key.Materialized {
exprs = append(exprs, telemetrytypes.FieldKeyToMaterializedColumnName(key))
existExpr = append(existExpr, fmt.Sprintf("%s==true", telemetrytypes.FieldKeyToMaterializedColumnNameForExists(key)))
} else {
exprs = append(exprs, fmt.Sprintf("%s['%s']", columnName, key.Name))
existExpr = append(existExpr, fmt.Sprintf("mapContains(%s, '%s')", columnName, key.Name))
}
default:
return "", errors.NewInvalidInputf(errors.CodeInvalidInput, "value type %s is not supported for map column type %s", valueType, column.Type)
}
return fmt.Sprintf("%s['%s']", column.Name, key.Name), nil
default:
return "", errors.NewInvalidInputf(errors.CodeInvalidInput, "value type %s is not supported for map column type %s", valueType, column.Type)
}
}
if len(exprs) == 1 {
return exprs[0], nil
} else if len(exprs) > 1 {
// Ensure existExpr has the same length as exprs
if len(existExpr) != len(exprs) {
return "", errors.New(errors.TypeInternal, errors.CodeInternal, "length of exist exprs doesn't match to that of exprs")
}
finalExprs := []string{}
for i, expr := range exprs {
finalExprs = append(finalExprs, fmt.Sprintf("%s, %s", existExpr[i], expr))
}
return "multiIf(" + strings.Join(finalExprs, ", ") + ", NULL)", nil
}
// should not reach here
return column.Name, nil
return columns[0].Name, nil
}
// ColumnExpressionFor returns the column expression for the given field

View File

@@ -3,6 +3,7 @@ package telemetrytraces
import (
"context"
"testing"
"time"
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
@@ -13,6 +14,7 @@ import (
func TestGetFieldKeyName(t *testing.T) {
ctx := context.Background()
mockEvolution := mockEvolutionData(time.Date(2024, 6, 2, 0, 0, 0, 0, time.UTC))
testCases := []struct {
name string
key telemetrytypes.TelemetryFieldKey
@@ -63,6 +65,7 @@ func TestGetFieldKeyName(t *testing.T) {
key: telemetrytypes.TelemetryFieldKey{
Name: "service.name",
FieldContext: telemetrytypes.FieldContextResource,
Evolutions: mockEvolution,
},
expectedResult: "multiIf(resource.`service.name` IS NOT NULL, resource.`service.name`::String, mapContains(resources_string, 'service.name'), resources_string['service.name'], NULL)",
expectedError: nil,
@@ -74,6 +77,7 @@ func TestGetFieldKeyName(t *testing.T) {
FieldContext: telemetrytypes.FieldContextResource,
FieldDataType: telemetrytypes.FieldDataTypeString,
Materialized: true,
Evolutions: mockEvolution,
},
expectedResult: "multiIf(resource.`deployment.environment` IS NOT NULL, resource.`deployment.environment`::String, `resource_string_deployment$$environment_exists`==true, `resource_string_deployment$$environment`, NULL)",
expectedError: nil,
@@ -92,7 +96,7 @@ func TestGetFieldKeyName(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
fm := NewFieldMapper()
result, err := fm.FieldFor(ctx, 0, 0, &tc.key)
result, err := fm.FieldFor(ctx, uint64(time.Date(2024, 6, 1, 0, 0, 0, 0, time.UTC).UnixNano()), uint64(time.Date(2024, 6, 5, 0, 0, 0, 0, time.UTC).UnixNano()), &tc.key)
if tc.expectedError != nil {
assert.Equal(t, tc.expectedError, err)
@@ -103,3 +107,86 @@ func TestGetFieldKeyName(t *testing.T) {
})
}
}
func TestFieldForResourceWithEvolution(t *testing.T) {
ctx := context.Background()
releaseTime := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)
evolutions := mockEvolutionData(releaseTime)
testCases := []struct {
name string
key telemetrytypes.TelemetryFieldKey
tsStart uint64
tsEnd uint64
expectedResult string
}{
{
name: "Window straddles release - both columns",
key: telemetrytypes.TelemetryFieldKey{
Name: "service.name",
FieldContext: telemetrytypes.FieldContextResource,
Evolutions: evolutions,
},
tsStart: uint64(time.Date(2024, 6, 1, 0, 0, 0, 0, time.UTC).UnixNano()),
tsEnd: uint64(time.Date(2025, 6, 1, 0, 0, 0, 0, time.UTC).UnixNano()),
expectedResult: "multiIf(resource.`service.name` IS NOT NULL, resource.`service.name`::String, mapContains(resources_string, 'service.name'), resources_string['service.name'], NULL)",
},
{
name: "Window fully after release - JSON column only",
key: telemetrytypes.TelemetryFieldKey{
Name: "service.name",
FieldContext: telemetrytypes.FieldContextResource,
Evolutions: evolutions,
},
tsStart: uint64(time.Date(2025, 6, 1, 0, 0, 0, 0, time.UTC).UnixNano()),
tsEnd: uint64(time.Date(2025, 7, 1, 0, 0, 0, 0, time.UTC).UnixNano()),
expectedResult: "resource.`service.name`::String",
},
{
name: "Window fully before release - map column only",
key: telemetrytypes.TelemetryFieldKey{
Name: "service.name",
FieldContext: telemetrytypes.FieldContextResource,
Evolutions: evolutions,
},
tsStart: uint64(time.Date(2024, 1, 1, 0, 0, 0, 0, time.UTC).UnixNano()),
tsEnd: uint64(time.Date(2024, 6, 1, 0, 0, 0, 0, time.UTC).UnixNano()),
expectedResult: "resources_string['service.name']",
},
{
name: "Window fully after release - materialized resource",
key: telemetrytypes.TelemetryFieldKey{
Name: "deployment.environment",
FieldContext: telemetrytypes.FieldContextResource,
FieldDataType: telemetrytypes.FieldDataTypeString,
Materialized: true,
Evolutions: evolutions,
},
tsStart: uint64(time.Date(2025, 6, 1, 0, 0, 0, 0, time.UTC).UnixNano()),
tsEnd: uint64(time.Date(2025, 7, 1, 0, 0, 0, 0, time.UTC).UnixNano()),
expectedResult: "resource.`deployment.environment`::String",
},
{
name: "Window straddles release - materialized resource",
key: telemetrytypes.TelemetryFieldKey{
Name: "deployment.environment",
FieldContext: telemetrytypes.FieldContextResource,
FieldDataType: telemetrytypes.FieldDataTypeString,
Materialized: true,
Evolutions: evolutions,
},
tsStart: uint64(time.Date(2024, 6, 1, 0, 0, 0, 0, time.UTC).UnixNano()),
tsEnd: uint64(time.Date(2025, 6, 1, 0, 0, 0, 0, time.UTC).UnixNano()),
expectedResult: "multiIf(resource.`deployment.environment` IS NOT NULL, resource.`deployment.environment`::String, `resource_string_deployment$$environment_exists`==true, `resource_string_deployment$$environment`, NULL)",
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
fm := NewFieldMapper()
result, err := fm.FieldFor(ctx, tc.tsStart, tc.tsEnd, &tc.key)
require.NoError(t, err)
assert.Equal(t, tc.expectedResult, result)
})
}
}

View File

@@ -82,13 +82,6 @@ func (b *traceQueryStatementBuilder) Build(
start = querybuilder.ToNanoSecs(start)
end = querybuilder.ToNanoSecs(end)
keySelectors := getKeySelectors(query)
keys, _, err := b.metadataStore.GetKeysMulti(ctx, keySelectors)
if err != nil {
return nil, err
}
/*
Adding a tech debt note here:
This piece of code is a hot fix and should be removed once we close issue: engineering-pod/issues/3622
@@ -124,8 +117,19 @@ func (b *traceQueryStatementBuilder) Build(
-------------------------------- End of tech debt ----------------------------
*/
query = b.adjustKeys(ctx, keys, query, requestType)
// We modify SelectFields above (injecting default fields), and those default
// fields can carry keys that need evolutions, so fetch keys after that.
keySelectors := getKeySelectors(query)
keys, _, err := b.metadataStore.GetKeysMulti(ctx, keySelectors)
if err != nil {
return nil, err
}
for _, action := range adjustTraceKeys(keys, &query, requestType) {
// TODO: change to debug level once we are confident about the behavior
b.logger.InfoContext(ctx, "key adjustment action", slog.String("action", action))
}
// Create SQL builder
q := sqlbuilder.NewSelectBuilder()
@@ -193,24 +197,30 @@ func getKeySelectors(query qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation])
return keySelectors
}
func (b *traceQueryStatementBuilder) adjustKeys(ctx context.Context, keys map[string][]*telemetrytypes.TelemetryFieldKey, query qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation], requestType qbtypes.RequestType) qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation] {
// add deprecated fields only during statement building
// why?
// 1. to not fail filter expression that use deprecated cols
// 2. this could have been moved to metadata fetching itself, however, that
// would mean, they also show up in suggestions we we don't want to do
// 3. reason for not doing a simple append is to keep intrinsic/calculated field first so that it gets
// priority in multi_if sql expression
// mergeDeprecatedTraceKeys prepends deprecated intrinsic/calculated trace field
// definitions to the keys map. We do this during statement building, not at
// metadata fetch time, because:
// 1. Filter expressions that reference deprecated columns must continue to
// resolve — otherwise they fail with "key not found".
// 2. Doing it at metadata fetch time would also surface deprecated keys in
// autocomplete suggestions, which we don't want.
// 3. We prepend (not append) so the intrinsic/calculated entry wins ordering
// in the multi_if SQL expression.
func mergeDeprecatedTraceKeys(keys map[string][]*telemetrytypes.TelemetryFieldKey) {
for fieldKeyName, fieldKey := range IntrinsicFieldsDeprecated {
keys[fieldKeyName] = append([]*telemetrytypes.TelemetryFieldKey{&fieldKey}, keys[fieldKeyName]...)
}
for fieldKeyName, fieldKey := range CalculatedFieldsDeprecated {
keys[fieldKeyName] = append([]*telemetrytypes.TelemetryFieldKey{&fieldKey}, keys[fieldKeyName]...)
}
}
func adjustTraceKeys(keys map[string][]*telemetrytypes.TelemetryFieldKey, query *qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation], requestType qbtypes.RequestType) []string {
mergeDeprecatedTraceKeys(keys)
// Adjust keys for alias expressions in aggregations
actions := querybuilder.AdjustKeysForAliasExpressions(&query, requestType)
actions := querybuilder.AdjustKeysForAliasExpressions(query, requestType)
/*
Check if user is using multiple contexts or data types for same field name
@@ -228,7 +238,7 @@ func (b *traceQueryStatementBuilder) adjustKeys(ctx context.Context, keys map[st
and make it just http.status_code and remove the duplicate entry.
*/
actions = append(actions, querybuilder.AdjustDuplicateKeys(&query)...)
actions = append(actions, querybuilder.AdjustDuplicateKeys(query)...)
/*
Now adjust each key to have correct context and data type
@@ -236,24 +246,20 @@ func (b *traceQueryStatementBuilder) adjustKeys(ctx context.Context, keys map[st
Reason for doing this is to not create an unexpected behavior for users
*/
for idx := range query.SelectFields {
actions = append(actions, b.adjustKey(&query.SelectFields[idx], keys)...)
actions = append(actions, adjustTraceKey(&query.SelectFields[idx], keys)...)
}
for idx := range query.GroupBy {
actions = append(actions, b.adjustKey(&query.GroupBy[idx].TelemetryFieldKey, keys)...)
actions = append(actions, adjustTraceKey(&query.GroupBy[idx].TelemetryFieldKey, keys)...)
}
for idx := range query.Order {
actions = append(actions, b.adjustKey(&query.Order[idx].Key.TelemetryFieldKey, keys)...)
actions = append(actions, adjustTraceKey(&query.Order[idx].Key.TelemetryFieldKey, keys)...)
}
for _, action := range actions {
// TODO: change to debug level once we are confident about the behavior
b.logger.InfoContext(ctx, "key adjustment action", slog.String("action", action))
}
return query
return actions
}
func (b *traceQueryStatementBuilder) adjustKey(key *telemetrytypes.TelemetryFieldKey, keys map[string][]*telemetrytypes.TelemetryFieldKey) []string {
// adjustTraceKey resolves a single TelemetryFieldKey against the keys map.
func adjustTraceKey(key *telemetrytypes.TelemetryFieldKey, keys map[string][]*telemetrytypes.TelemetryFieldKey) []string {
// for recording actions taken
actions := []string{}

View File

@@ -16,6 +16,9 @@ import (
)
func TestStatementBuilder(t *testing.T) {
// releaseTime is chosen so it lands inside the standard [1747947419000, 1747983448000]ms
// test window, keeping the multiIf SQL form for resource fields.
releaseTime := time.Date(2025, 5, 22, 22, 0, 0, 0, time.UTC)
cases := []struct {
name string
requestType qbtypes.RequestType
@@ -355,7 +358,7 @@ func TestStatementBuilder(t *testing.T) {
fm := NewFieldMapper()
cb := NewConditionBuilder(fm)
mockMetadataStore := telemetrytypestest.NewMockMetadataStore()
mockMetadataStore.KeysMap = buildCompleteFieldKeyMap()
mockMetadataStore.KeysMap = buildCompleteFieldKeyMap(releaseTime)
fl := flaggertest.New(t)
aggExprRewriter := querybuilder.NewAggExprRewriter(instrumentationtest.New().ToProviderSettings(), nil, fm, cb, nil, fl)
@@ -394,6 +397,7 @@ func TestStatementBuilder(t *testing.T) {
}
func TestStatementBuilderListQuery(t *testing.T) {
releaseTime := time.Date(2025, 5, 22, 22, 0, 0, 0, time.UTC)
cases := []struct {
name string
requestType qbtypes.RequestType
@@ -650,7 +654,7 @@ func TestStatementBuilderListQuery(t *testing.T) {
fm := NewFieldMapper()
cb := NewConditionBuilder(fm)
mockMetadataStore := telemetrytypestest.NewMockMetadataStore()
mockMetadataStore.KeysMap = buildCompleteFieldKeyMap()
mockMetadataStore.KeysMap = buildCompleteFieldKeyMap(releaseTime)
fl := flaggertest.New(t)
aggExprRewriter := querybuilder.NewAggExprRewriter(instrumentationtest.New().ToProviderSettings(), nil, fm, cb, nil, fl)
@@ -683,6 +687,7 @@ func TestStatementBuilderListQuery(t *testing.T) {
}
func TestStatementBuilderListQueryWithCorruptData(t *testing.T) {
releaseTime := time.Date(2025, 5, 22, 22, 0, 0, 0, time.UTC)
cases := []struct {
name string
requestType qbtypes.RequestType
@@ -703,6 +708,15 @@ func TestStatementBuilderListQueryWithCorruptData(t *testing.T) {
FieldDataType: telemetrytypes.FieldDataTypeString,
},
},
"service.name": {
{
Name: "service.name",
Signal: telemetrytypes.SignalTraces,
FieldContext: telemetrytypes.FieldContextResource,
FieldDataType: telemetrytypes.FieldDataTypeString,
Evolutions: mockEvolutionData(time.Date(2025, 5, 22, 22, 0, 0, 0, time.UTC)),
},
},
},
query: qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation]{
Signal: telemetrytypes.SignalTraces,
@@ -728,6 +742,15 @@ func TestStatementBuilderListQueryWithCorruptData(t *testing.T) {
FieldDataType: telemetrytypes.FieldDataTypeString,
},
},
"service.name": {
{
Name: "service.name",
Signal: telemetrytypes.SignalTraces,
FieldContext: telemetrytypes.FieldContextResource,
FieldDataType: telemetrytypes.FieldDataTypeString,
Evolutions: mockEvolutionData(time.Date(2025, 5, 22, 22, 0, 0, 0, time.UTC)),
},
},
},
query: qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation]{
Signal: telemetrytypes.SignalTraces,
@@ -758,7 +781,7 @@ func TestStatementBuilderListQueryWithCorruptData(t *testing.T) {
mockMetadataStore := telemetrytypestest.NewMockMetadataStore()
mockMetadataStore.KeysMap = c.keysMap
if mockMetadataStore.KeysMap == nil {
mockMetadataStore.KeysMap = buildCompleteFieldKeyMap()
mockMetadataStore.KeysMap = buildCompleteFieldKeyMap(releaseTime)
}
fl := flaggertest.New(t)
aggExprRewriter := querybuilder.NewAggExprRewriter(instrumentationtest.New().ToProviderSettings(), nil, fm, cb, nil, fl)
@@ -788,7 +811,90 @@ func TestStatementBuilderListQueryWithCorruptData(t *testing.T) {
}
}
func TestStatementBuilderGroupByResourceEvolution(t *testing.T) {
releaseTime := time.Date(2025, 5, 22, 22, 0, 0, 0, time.UTC)
cases := []struct {
name string
startMs uint64
endMs uint64
expected qbtypes.Statement
}{
{
name: "window straddles release - both JSON and map branches",
startMs: 1747947419000, // 2025-05-22 21:56:59 UTC, ~3m before release
endMs: 1747983448000, // 2025-05-23 07:57:28 UTC, ~10h after release
expected: qbtypes.Statement{
Query: "WITH __limit_cte AS (SELECT toString(multiIf(multiIf(resource.`service.name` IS NOT NULL, resource.`service.name`::String, mapContains(resources_string, 'service.name'), resources_string['service.name'], NULL) IS NOT NULL, multiIf(resource.`service.name` IS NOT NULL, resource.`service.name`::String, mapContains(resources_string, 'service.name'), resources_string['service.name'], NULL), NULL)) AS `service.name`, count() AS __result_0 FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? GROUP BY `service.name` ORDER BY __result_0 DESC LIMIT ?) SELECT toStartOfInterval(timestamp, INTERVAL 30 SECOND) AS ts, toString(multiIf(multiIf(resource.`service.name` IS NOT NULL, resource.`service.name`::String, mapContains(resources_string, 'service.name'), resources_string['service.name'], NULL) IS NOT NULL, multiIf(resource.`service.name` IS NOT NULL, resource.`service.name`::String, mapContains(resources_string, 'service.name'), resources_string['service.name'], NULL), NULL)) AS `service.name`, count() AS __result_0 FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? AND (`service.name`) GLOBAL IN (SELECT `service.name` FROM __limit_cte) GROUP BY ts, `service.name`",
Args: []any{"1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 10, "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448)},
},
},
{
name: "window after release - JSON column only",
startMs: 1747960000000, // 2025-05-23 00:26:40 UTC, ~2.5h after release
endMs: 1747983448000, // 2025-05-23 07:57:28 UTC
expected: qbtypes.Statement{
Query: "WITH __limit_cte AS (SELECT toString(multiIf(resource.`service.name`::String IS NOT NULL, resource.`service.name`::String, NULL)) AS `service.name`, count() AS __result_0 FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? GROUP BY `service.name` ORDER BY __result_0 DESC LIMIT ?) SELECT toStartOfInterval(timestamp, INTERVAL 30 SECOND) AS ts, toString(multiIf(resource.`service.name`::String IS NOT NULL, resource.`service.name`::String, NULL)) AS `service.name`, count() AS __result_0 FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? AND (`service.name`) GLOBAL IN (SELECT `service.name` FROM __limit_cte) GROUP BY ts, `service.name`",
Args: []any{"1747960000000000000", "1747983448000000000", uint64(1747958200), uint64(1747983448), 10, "1747960000000000000", "1747983448000000000", uint64(1747958200), uint64(1747983448)},
},
},
{
name: "window before release - map column only",
startMs: 1747900000000, // 2025-05-22 08:26:40 UTC, ~13.5h before release
endMs: 1747947000000, // 2025-05-22 21:50:00 UTC, ~10m before release
expected: qbtypes.Statement{
Query: "WITH __limit_cte AS (SELECT toString(multiIf(mapContains(resources_string, 'service.name') = ?, resources_string['service.name'], NULL)) AS `service.name`, count() AS __result_0 FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? GROUP BY `service.name` ORDER BY __result_0 DESC LIMIT ?) SELECT toStartOfInterval(timestamp, INTERVAL 30 SECOND) AS ts, toString(multiIf(mapContains(resources_string, 'service.name') = ?, resources_string['service.name'], NULL)) AS `service.name`, count() AS __result_0 FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? AND (`service.name`) GLOBAL IN (SELECT `service.name` FROM __limit_cte) GROUP BY ts, `service.name`",
Args: []any{true, "1747900000000000000", "1747947000000000000", uint64(1747898200), uint64(1747947000), 10, true, "1747900000000000000", "1747947000000000000", uint64(1747898200), uint64(1747947000)},
},
},
}
fm := NewFieldMapper()
cb := NewConditionBuilder(fm)
mockMetadataStore := telemetrytypestest.NewMockMetadataStore()
mockMetadataStore.KeysMap = buildCompleteFieldKeyMap(releaseTime)
fl := flaggertest.New(t)
aggExprRewriter := querybuilder.NewAggExprRewriter(instrumentationtest.New().ToProviderSettings(), nil, fm, cb, nil, fl)
statementBuilder := NewTraceQueryStatementBuilder(
instrumentationtest.New().ToProviderSettings(),
mockMetadataStore,
fm,
cb,
aggExprRewriter,
nil,
fl,
)
query := qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation]{
Signal: telemetrytypes.SignalTraces,
StepInterval: qbtypes.Step{Duration: 30 * time.Second},
Aggregations: []qbtypes.TraceAggregation{
{Expression: "count()"},
},
Filter: &qbtypes.Filter{},
Limit: 10,
GroupBy: []qbtypes.GroupByKey{
{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: "service.name",
},
},
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
q, err := statementBuilder.Build(context.Background(), c.startMs, c.endMs, qbtypes.RequestTypeTimeSeries, query, nil)
require.NoError(t, err)
require.Equal(t, c.expected.Query, q.Query)
require.Equal(t, c.expected.Args, q.Args)
})
}
}
func TestStatementBuilderTraceQuery(t *testing.T) {
releaseTime := time.Date(2025, 5, 22, 22, 0, 0, 0, time.UTC)
cases := []struct {
name string
requestType qbtypes.RequestType
@@ -911,7 +1017,7 @@ func TestStatementBuilderTraceQuery(t *testing.T) {
fm := NewFieldMapper()
cb := NewConditionBuilder(fm)
mockMetadataStore := telemetrytypestest.NewMockMetadataStore()
mockMetadataStore.KeysMap = buildCompleteFieldKeyMap()
mockMetadataStore.KeysMap = buildCompleteFieldKeyMap(releaseTime)
fl := flaggertest.New(t)
aggExprRewriter := querybuilder.NewAggExprRewriter(instrumentationtest.New().ToProviderSettings(), nil, fm, cb, nil, fl)
@@ -944,6 +1050,7 @@ func TestStatementBuilderTraceQuery(t *testing.T) {
}
func TestAdjustKey(t *testing.T) {
releaseTime := time.Date(2025, 5, 22, 22, 0, 0, 0, time.UTC)
cases := []struct {
name string
inputKey telemetrytypes.TelemetryFieldKey
@@ -957,7 +1064,7 @@ func TestAdjustKey(t *testing.T) {
FieldContext: telemetrytypes.FieldContextUnspecified,
FieldDataType: telemetrytypes.FieldDataTypeUnspecified,
},
keysMap: buildCompleteFieldKeyMap(),
keysMap: buildCompleteFieldKeyMap(releaseTime),
expectedKey: IntrinsicFields["trace_id"],
},
{
@@ -967,7 +1074,7 @@ func TestAdjustKey(t *testing.T) {
FieldContext: telemetrytypes.FieldContextBody, // incorrect context
FieldDataType: telemetrytypes.FieldDataTypeInt64,
},
keysMap: buildCompleteFieldKeyMap(),
keysMap: buildCompleteFieldKeyMap(releaseTime),
expectedKey: telemetrytypes.TelemetryFieldKey{
Name: "duration_nano",
FieldContext: telemetrytypes.FieldContextSpan, // should be corrected
@@ -981,7 +1088,7 @@ func TestAdjustKey(t *testing.T) {
FieldContext: telemetrytypes.FieldContextSpan, // correct context
FieldDataType: telemetrytypes.FieldDataTypeInt64,
},
keysMap: buildCompleteFieldKeyMap(),
keysMap: buildCompleteFieldKeyMap(releaseTime),
expectedKey: telemetrytypes.TelemetryFieldKey{
Name: "duration_nano",
FieldContext: telemetrytypes.FieldContextSpan, // should be corrected
@@ -995,8 +1102,8 @@ func TestAdjustKey(t *testing.T) {
FieldContext: telemetrytypes.FieldContextUnspecified,
FieldDataType: telemetrytypes.FieldDataTypeUnspecified,
},
keysMap: buildCompleteFieldKeyMap(),
expectedKey: *buildCompleteFieldKeyMap()["service.name"][0],
keysMap: buildCompleteFieldKeyMap(releaseTime),
expectedKey: *buildCompleteFieldKeyMap(releaseTime)["service.name"][0],
},
{
name: "single matching key with context specified - override",
@@ -1005,8 +1112,8 @@ func TestAdjustKey(t *testing.T) {
FieldContext: telemetrytypes.FieldContextAttribute,
FieldDataType: telemetrytypes.FieldDataTypeUnspecified,
},
keysMap: buildCompleteFieldKeyMap(),
expectedKey: *buildCompleteFieldKeyMap()["cart.items_count"][0],
keysMap: buildCompleteFieldKeyMap(releaseTime),
expectedKey: *buildCompleteFieldKeyMap(releaseTime)["cart.items_count"][0],
},
{
name: "multiple matching keys - all materialized",
@@ -1043,7 +1150,7 @@ func TestAdjustKey(t *testing.T) {
FieldContext: telemetrytypes.FieldContextUnspecified,
FieldDataType: telemetrytypes.FieldDataTypeUnspecified,
},
keysMap: buildCompleteFieldKeyMap(),
keysMap: buildCompleteFieldKeyMap(releaseTime),
expectedKey: telemetrytypes.TelemetryFieldKey{
Name: "mixed.materialization.key",
FieldDataType: telemetrytypes.FieldDataTypeString,
@@ -1057,7 +1164,7 @@ func TestAdjustKey(t *testing.T) {
FieldContext: telemetrytypes.FieldContextAttribute,
FieldDataType: telemetrytypes.FieldDataTypeUnspecified,
},
keysMap: buildCompleteFieldKeyMap(),
keysMap: buildCompleteFieldKeyMap(releaseTime),
expectedKey: telemetrytypes.TelemetryFieldKey{
Name: "mixed.materialization.key",
FieldContext: telemetrytypes.FieldContextAttribute,
@@ -1072,7 +1179,7 @@ func TestAdjustKey(t *testing.T) {
FieldContext: telemetrytypes.FieldContextUnspecified,
FieldDataType: telemetrytypes.FieldDataTypeUnspecified,
},
keysMap: buildCompleteFieldKeyMap(),
keysMap: buildCompleteFieldKeyMap(releaseTime),
expectedKey: telemetrytypes.TelemetryFieldKey{
Name: "unknown.field",
Materialized: false,
@@ -1085,7 +1192,7 @@ func TestAdjustKey(t *testing.T) {
FieldContext: telemetrytypes.FieldContextAttribute,
FieldDataType: telemetrytypes.FieldDataTypeUnspecified,
},
keysMap: buildCompleteFieldKeyMap(),
keysMap: buildCompleteFieldKeyMap(releaseTime),
expectedKey: telemetrytypes.TelemetryFieldKey{
Name: "service.name",
FieldContext: telemetrytypes.FieldContextAttribute,
@@ -1100,7 +1207,7 @@ func TestAdjustKey(t *testing.T) {
FieldContext: telemetrytypes.FieldContextUnspecified,
FieldDataType: telemetrytypes.FieldDataTypeUnspecified,
},
keysMap: buildCompleteFieldKeyMap(),
keysMap: buildCompleteFieldKeyMap(releaseTime),
expectedKey: telemetrytypes.TelemetryFieldKey{
Name: "cart.items_count",
FieldContext: telemetrytypes.FieldContextAttribute,
@@ -1115,7 +1222,7 @@ func TestAdjustKey(t *testing.T) {
FieldContext: telemetrytypes.FieldContextUnspecified,
FieldDataType: telemetrytypes.FieldDataTypeUnspecified,
},
keysMap: buildCompleteFieldKeyMap(),
keysMap: buildCompleteFieldKeyMap(releaseTime),
expectedKey: telemetrytypes.TelemetryFieldKey{
Name: "user.id",
FieldContext: telemetrytypes.FieldContextAttribute,
@@ -1125,28 +1232,13 @@ func TestAdjustKey(t *testing.T) {
},
}
fm := NewFieldMapper()
cb := NewConditionBuilder(fm)
mockMetadataStore := telemetrytypestest.NewMockMetadataStore()
fl := flaggertest.New(t)
aggExprRewriter := querybuilder.NewAggExprRewriter(instrumentationtest.New().ToProviderSettings(), nil, fm, cb, nil, fl)
statementBuilder := NewTraceQueryStatementBuilder(
instrumentationtest.New().ToProviderSettings(),
mockMetadataStore,
fm,
cb,
aggExprRewriter,
nil,
fl,
)
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
// Create a copy of the input key to avoid modifying the original
key := c.inputKey
// Call adjustKey
statementBuilder.adjustKey(&key, c.keysMap)
adjustTraceKey(&key, c.keysMap)
// Verify the key was adjusted as expected
require.Equal(t, c.expectedKey.Name, key.Name, "key name should match")
@@ -1158,6 +1250,7 @@ func TestAdjustKey(t *testing.T) {
}
func TestAdjustKeys(t *testing.T) {
releaseTime := time.Date(2025, 5, 22, 22, 0, 0, 0, time.UTC)
cases := []struct {
name string
query qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation]
@@ -1183,7 +1276,7 @@ func TestAdjustKeys(t *testing.T) {
},
},
},
keysMap: buildCompleteFieldKeyMap(),
keysMap: buildCompleteFieldKeyMap(releaseTime),
expectedSelectFields: []telemetrytypes.TelemetryFieldKey{
{
Name: "service.name",
@@ -1220,7 +1313,7 @@ func TestAdjustKeys(t *testing.T) {
},
},
},
keysMap: buildCompleteFieldKeyMap(),
keysMap: buildCompleteFieldKeyMap(releaseTime),
expectedGroupBy: []qbtypes.GroupByKey{
{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
@@ -1267,7 +1360,7 @@ func TestAdjustKeys(t *testing.T) {
},
},
},
keysMap: buildCompleteFieldKeyMap(),
keysMap: buildCompleteFieldKeyMap(releaseTime),
expectedOrder: []qbtypes.OrderBy{
{
Key: qbtypes.OrderByKey{
@@ -1326,7 +1419,7 @@ func TestAdjustKeys(t *testing.T) {
},
},
},
keysMap: buildCompleteFieldKeyMap(),
keysMap: buildCompleteFieldKeyMap(releaseTime),
expectedSelectFields: []telemetrytypes.TelemetryFieldKey{
{
Name: "trace_id",
@@ -1381,7 +1474,7 @@ func TestAdjustKeys(t *testing.T) {
},
},
},
keysMap: buildCompleteFieldKeyMap(),
keysMap: buildCompleteFieldKeyMap(releaseTime),
// After alias adjustment, name becomes "span.duration" with FieldContextUnspecified
// "span.duration" is not in keysMap, so context stays unspecified
expectedOrder: []qbtypes.OrderBy{
@@ -1399,21 +1492,6 @@ func TestAdjustKeys(t *testing.T) {
},
}
fm := NewFieldMapper()
cb := NewConditionBuilder(fm)
mockMetadataStore := telemetrytypestest.NewMockMetadataStore()
fl := flaggertest.New(t)
aggExprRewriter := querybuilder.NewAggExprRewriter(instrumentationtest.New().ToProviderSettings(), nil, fm, cb, nil, fl)
statementBuilder := NewTraceQueryStatementBuilder(
instrumentationtest.New().ToProviderSettings(),
mockMetadataStore,
fm,
cb,
aggExprRewriter,
nil,
fl,
)
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
// Create a deep copy of the keys map to avoid modifying the original
@@ -1424,7 +1502,7 @@ func TestAdjustKeys(t *testing.T) {
}
// Call adjustKeys
c.query = statementBuilder.adjustKeys(context.Background(), keysMapCopy, c.query, qbtypes.RequestTypeScalar)
adjustTraceKeys(keysMapCopy, &c.query, qbtypes.RequestTypeScalar)
// Verify select fields were adjusted
if c.expectedSelectFields != nil {

View File

@@ -1,10 +1,12 @@
package telemetrytraces
import (
"time"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
)
func buildCompleteFieldKeyMap() map[string][]*telemetrytypes.TelemetryFieldKey {
func buildCompleteFieldKeyMap(releaseTime time.Time) map[string][]*telemetrytypes.TelemetryFieldKey {
keysMap := map[string][]*telemetrytypes.TelemetryFieldKey{
"service.name": {
{
@@ -115,7 +117,33 @@ func buildCompleteFieldKeyMap() map[string][]*telemetrytypes.TelemetryFieldKey {
for _, keys := range keysMap {
for _, key := range keys {
key.Signal = telemetrytypes.SignalTraces
if key.FieldContext == telemetrytypes.FieldContextResource {
key.Evolutions = mockEvolutionData(releaseTime)
}
}
}
return keysMap
}
// mockEvolutionData returns the canonical resource-column evolution timeline used in tests:
// the legacy resources_string map at epoch 0 and the JSON resource column released at releaseTime.
func mockEvolutionData(releaseTime time.Time) []*telemetrytypes.EvolutionEntry {
return []*telemetrytypes.EvolutionEntry{
{
Signal: telemetrytypes.SignalTraces,
ColumnName: "resources_string",
FieldContext: telemetrytypes.FieldContextResource,
ColumnType: "Map(LowCardinality(String), String)",
FieldName: "__all__",
ReleaseTime: time.Unix(0, 0),
},
{
Signal: telemetrytypes.SignalTraces,
ColumnName: "resource",
ColumnType: "JSON()",
FieldContext: telemetrytypes.FieldContextResource,
FieldName: "__all__",
ReleaseTime: releaseTime,
},
}
}

View File

@@ -216,6 +216,13 @@ func (b *traceOperatorCTEBuilder) buildQueryCTE(ctx context.Context, queryName s
}
b.stmtBuilder.logger.DebugContext(ctx, "Retrieved keys for query", slog.String("query_name", queryName), slog.Int("keys_count", len(keys)))
// The CTE only selects spans matching the filter. Aggregations, group by
// and order by run later in buildFinalQuery, so RequestTypeRaw is fine here.
for _, action := range adjustTraceKeys(keys, query, qbtypes.RequestTypeRaw) {
// TODO: change to debug level once we are confident about the behavior
b.stmtBuilder.logger.InfoContext(ctx, "key adjustment action", slog.String("action", action))
}
// Build resource filter CTE for this specific query
resourceFilterCTEName := fmt.Sprintf("__resource_filter_%s", cteName)
resourceStmt, err := b.buildResourceFilterCTE(ctx, *query)
@@ -417,21 +424,28 @@ func (b *traceOperatorCTEBuilder) buildNotCTE(leftCTE, rightCTE string) (string,
}
func (b *traceOperatorCTEBuilder) buildFinalQuery(ctx context.Context, selectFromCTE string, requestType qbtypes.RequestType) (*qbtypes.Statement, error) {
keySelectors := b.getKeySelectors()
keys, _, err := b.stmtBuilder.metadataStore.GetKeysMulti(ctx, keySelectors)
if err != nil {
return nil, err
}
b.adjustOperatorKeys(ctx, keys, requestType)
switch requestType {
case qbtypes.RequestTypeRaw:
return b.buildListQuery(ctx, selectFromCTE)
return b.buildListQuery(ctx, selectFromCTE, keys)
case qbtypes.RequestTypeTimeSeries:
return b.buildTimeSeriesQuery(ctx, selectFromCTE)
return b.buildTimeSeriesQuery(ctx, selectFromCTE, keys)
case qbtypes.RequestTypeTrace:
return b.buildTraceQuery(ctx, selectFromCTE)
return b.buildTraceQuery(ctx, selectFromCTE, keys)
case qbtypes.RequestTypeScalar:
return b.buildScalarQuery(ctx, selectFromCTE)
return b.buildScalarQuery(ctx, selectFromCTE, keys)
default:
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "unsupported request type: %s", requestType)
}
}
func (b *traceOperatorCTEBuilder) buildListQuery(ctx context.Context, selectFromCTE string) (*qbtypes.Statement, error) {
func (b *traceOperatorCTEBuilder) buildListQuery(ctx context.Context, selectFromCTE string, keys map[string][]*telemetrytypes.TelemetryFieldKey) (*qbtypes.Statement, error) {
sb := sqlbuilder.NewSelectBuilder()
// Select core fields
@@ -453,22 +467,6 @@ func (b *traceOperatorCTEBuilder) buildListQuery(ctx context.Context, selectFrom
"parent_span_id": true,
}
// Get keys for selectFields
keySelectors := b.getKeySelectors()
for _, field := range b.operator.SelectFields {
keySelectors = append(keySelectors, &telemetrytypes.FieldKeySelector{
Name: field.Name,
Signal: telemetrytypes.SignalTraces,
FieldContext: field.FieldContext,
FieldDataType: field.FieldDataType,
})
}
keys, _, err := b.stmtBuilder.metadataStore.GetKeysMulti(ctx, keySelectors)
if err != nil {
return nil, err
}
// Add selectFields using ColumnExpressionFor since we now have all base table columns
for _, field := range b.operator.SelectFields {
if selectedFields[field.Name] {
@@ -518,6 +516,44 @@ func (b *traceOperatorCTEBuilder) buildListQuery(ctx context.Context, selectFrom
}, nil
}
// adjustOperatorKeys runs the same key adjustments as adjustTraceKeys, but on
// the operator's own fields. The operator has a different struct shape than
// QueryBuilderQuery, so we copy the relevant fields into a temp query, run
// the shared helpers, and copy the results back.
func (b *traceOperatorCTEBuilder) adjustOperatorKeys(ctx context.Context, keys map[string][]*telemetrytypes.TelemetryFieldKey, requestType qbtypes.RequestType) {
mergeDeprecatedTraceKeys(keys)
tmp := qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation]{
Aggregations: b.operator.Aggregations,
SelectFields: b.operator.SelectFields,
GroupBy: b.operator.GroupBy,
Order: b.operator.Order,
}
actions := querybuilder.AdjustKeysForAliasExpressions(&tmp, requestType)
actions = append(actions, querybuilder.AdjustDuplicateKeys(&tmp)...)
for idx := range tmp.SelectFields {
actions = append(actions, adjustTraceKey(&tmp.SelectFields[idx], keys)...)
}
for idx := range tmp.GroupBy {
actions = append(actions, adjustTraceKey(&tmp.GroupBy[idx].TelemetryFieldKey, keys)...)
}
for idx := range tmp.Order {
actions = append(actions, adjustTraceKey(&tmp.Order[idx].Key.TelemetryFieldKey, keys)...)
}
// Copy back the slices the helpers can rewrite.
b.operator.Aggregations = tmp.Aggregations
b.operator.SelectFields = tmp.SelectFields
b.operator.GroupBy = tmp.GroupBy
b.operator.Order = tmp.Order
for _, action := range actions {
b.stmtBuilder.logger.InfoContext(ctx, "key adjustment action", slog.String("action", action))
}
}
func (b *traceOperatorCTEBuilder) getKeySelectors() []*telemetrytypes.FieldKeySelector {
var keySelectors []*telemetrytypes.FieldKeySelector
@@ -545,6 +581,15 @@ func (b *traceOperatorCTEBuilder) getKeySelectors() []*telemetrytypes.FieldKeySe
})
}
for _, sf := range b.operator.SelectFields {
keySelectors = append(keySelectors, &telemetrytypes.FieldKeySelector{
Name: sf.Name,
Signal: telemetrytypes.SignalTraces,
FieldContext: sf.FieldContext,
FieldDataType: sf.FieldDataType,
})
}
for i := range keySelectors {
keySelectors[i].Signal = telemetrytypes.SignalTraces
}
@@ -552,7 +597,7 @@ func (b *traceOperatorCTEBuilder) getKeySelectors() []*telemetrytypes.FieldKeySe
return keySelectors
}
func (b *traceOperatorCTEBuilder) buildTimeSeriesQuery(ctx context.Context, selectFromCTE string) (*qbtypes.Statement, error) {
func (b *traceOperatorCTEBuilder) buildTimeSeriesQuery(ctx context.Context, selectFromCTE string, keys map[string][]*telemetrytypes.TelemetryFieldKey) (*qbtypes.Statement, error) {
sb := sqlbuilder.NewSelectBuilder()
sb.Select(fmt.Sprintf(
@@ -560,12 +605,6 @@ func (b *traceOperatorCTEBuilder) buildTimeSeriesQuery(ctx context.Context, sele
int64(b.operator.StepInterval.Seconds()),
))
keySelectors := b.getKeySelectors()
keys, _, err := b.stmtBuilder.metadataStore.GetKeysMulti(ctx, keySelectors)
if err != nil {
return nil, err
}
var allGroupByArgs []any
for _, gb := range b.operator.GroupBy {
@@ -644,8 +683,7 @@ func (b *traceOperatorCTEBuilder) buildTimeSeriesQuery(ctx context.Context, sele
combinedArgs := append(allGroupByArgs, allAggChArgs...)
// Add HAVING clause if specified
err = b.addHavingClause(sb)
if err != nil {
if err := b.addHavingClause(sb); err != nil {
return nil, err
}
@@ -672,17 +710,11 @@ func (b *traceOperatorCTEBuilder) buildTraceSummaryCTE(selectFromCTE string) {
b.addCTE("trace_summary", sql, args, []string{"all_spans", selectFromCTE})
}
func (b *traceOperatorCTEBuilder) buildTraceQuery(ctx context.Context, selectFromCTE string) (*qbtypes.Statement, error) {
func (b *traceOperatorCTEBuilder) buildTraceQuery(ctx context.Context, selectFromCTE string, keys map[string][]*telemetrytypes.TelemetryFieldKey) (*qbtypes.Statement, error) {
b.buildTraceSummaryCTE(selectFromCTE)
sb := sqlbuilder.NewSelectBuilder()
keySelectors := b.getKeySelectors()
keys, _, err := b.stmtBuilder.metadataStore.GetKeysMulti(ctx, keySelectors)
if err != nil {
return nil, err
}
var allGroupByArgs []any
for _, gb := range b.operator.GroupBy {
@@ -764,8 +796,7 @@ func (b *traceOperatorCTEBuilder) buildTraceQuery(ctx context.Context, selectFro
sb.GroupBy(groupByKeys...)
}
err = b.addHavingClause(sb)
if err != nil {
if err := b.addHavingClause(sb); err != nil {
return nil, err
}
@@ -821,15 +852,9 @@ func (b *traceOperatorCTEBuilder) buildTraceQuery(ctx context.Context, selectFro
}, nil
}
func (b *traceOperatorCTEBuilder) buildScalarQuery(ctx context.Context, selectFromCTE string) (*qbtypes.Statement, error) {
func (b *traceOperatorCTEBuilder) buildScalarQuery(ctx context.Context, selectFromCTE string, keys map[string][]*telemetrytypes.TelemetryFieldKey) (*qbtypes.Statement, error) {
sb := sqlbuilder.NewSelectBuilder()
keySelectors := b.getKeySelectors()
keys, _, err := b.stmtBuilder.metadataStore.GetKeysMulti(ctx, keySelectors)
if err != nil {
return nil, err
}
var allGroupByArgs []any
for _, gb := range b.operator.GroupBy {
@@ -911,8 +936,7 @@ func (b *traceOperatorCTEBuilder) buildScalarQuery(ctx context.Context, selectFr
combinedArgs := append(allGroupByArgs, allAggChArgs...)
// Add HAVING clause if specified
err = b.addHavingClause(sb)
if err != nil {
if err := b.addHavingClause(sb); err != nil {
return nil, err
}

View File

@@ -14,6 +14,25 @@ import (
"github.com/stretchr/testify/require"
)
func newTestTraceOperatorStatementBuilder(t *testing.T) *traceOperatorStatementBuilder {
t.Helper()
releaseTime := time.Date(2025, 5, 22, 22, 0, 0, 0, time.UTC)
fm := NewFieldMapper()
cb := NewConditionBuilder(fm)
mockMetadataStore := telemetrytypestest.NewMockMetadataStore()
mockMetadataStore.KeysMap = buildCompleteFieldKeyMap(releaseTime)
fl := flaggertest.New(t)
aggExprRewriter := querybuilder.NewAggExprRewriter(instrumentationtest.New().ToProviderSettings(), nil, fm, cb, nil, fl)
traceStmtBuilder := NewTraceQueryStatementBuilder(
instrumentationtest.New().ToProviderSettings(),
mockMetadataStore, fm, cb, aggExprRewriter, nil, fl,
)
return NewTraceOperatorStatementBuilder(
instrumentationtest.New().ToProviderSettings(),
mockMetadataStore, fm, cb, traceStmtBuilder, aggExprRewriter, fl,
)
}
func TestTraceOperatorStatementBuilder(t *testing.T) {
cases := []struct {
name string
@@ -463,32 +482,7 @@ func TestTraceOperatorStatementBuilder(t *testing.T) {
},
}
fm := NewFieldMapper()
cb := NewConditionBuilder(fm)
mockMetadataStore := telemetrytypestest.NewMockMetadataStore()
mockMetadataStore.KeysMap = buildCompleteFieldKeyMap()
fl := flaggertest.New(t)
aggExprRewriter := querybuilder.NewAggExprRewriter(instrumentationtest.New().ToProviderSettings(), nil, fm, cb, nil, fl)
traceStmtBuilder := NewTraceQueryStatementBuilder(
instrumentationtest.New().ToProviderSettings(),
mockMetadataStore,
fm,
cb,
aggExprRewriter,
nil,
fl,
)
statementBuilder := NewTraceOperatorStatementBuilder(
instrumentationtest.New().ToProviderSettings(),
mockMetadataStore,
fm,
cb,
traceStmtBuilder,
aggExprRewriter,
fl,
)
statementBuilder := newTestTraceOperatorStatementBuilder(t)
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
@@ -579,32 +573,7 @@ func TestTraceOperatorStatementBuilderErrors(t *testing.T) {
},
}
fm := NewFieldMapper()
cb := NewConditionBuilder(fm)
mockMetadataStore := telemetrytypestest.NewMockMetadataStore()
mockMetadataStore.KeysMap = buildCompleteFieldKeyMap()
fl := flaggertest.New(t)
aggExprRewriter := querybuilder.NewAggExprRewriter(instrumentationtest.New().ToProviderSettings(), nil, fm, cb, nil, fl)
traceStmtBuilder := NewTraceQueryStatementBuilder(
instrumentationtest.New().ToProviderSettings(),
mockMetadataStore,
fm,
cb,
aggExprRewriter,
nil,
fl,
)
statementBuilder := NewTraceOperatorStatementBuilder(
instrumentationtest.New().ToProviderSettings(),
mockMetadataStore,
fm,
cb,
traceStmtBuilder,
aggExprRewriter,
fl,
)
statementBuilder := newTestTraceOperatorStatementBuilder(t)
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
@@ -626,3 +595,142 @@ func TestTraceOperatorStatementBuilderErrors(t *testing.T) {
})
}
}
func TestTraceOperatorStatementBuilderAdjustsKeys(t *testing.T) {
cases := []struct {
name string
requestType qbtypes.RequestType
operator qbtypes.QueryBuilderTraceOperator
builderFilter string
wantSQL string
wantArgs []any
}{
{
name: "deprecated duration filter in referenced builder query",
requestType: qbtypes.RequestTypeRaw,
operator: qbtypes.QueryBuilderTraceOperator{
Expression: "A",
Limit: 10,
},
builderFilter: "durationNano = '3s'",
wantSQL: "duration_nano = ?",
wantArgs: []any{int64(3000000000)},
},
{
name: "context-prefixed aggregation alias in order by",
requestType: qbtypes.RequestTypeScalar,
operator: qbtypes.QueryBuilderTraceOperator{
Expression: "A",
Aggregations: []qbtypes.TraceAggregation{
{
Expression: "count()",
Alias: "span.count_",
},
},
Order: []qbtypes.OrderBy{
{
Key: qbtypes.OrderByKey{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: "count_",
FieldContext: telemetrytypes.FieldContextSpan,
},
},
Direction: qbtypes.OrderDirectionDesc,
},
},
},
wantSQL: "ORDER BY __result_0 desc",
},
}
statementBuilder := newTestTraceOperatorStatementBuilder(t)
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
err := c.operator.ParseExpression()
require.NoError(t, err)
filter := c.builderFilter
if filter == "" {
filter = "service.name = 'frontend'"
}
q, err := statementBuilder.Build(
context.Background(),
1747947419000,
1747983448000,
c.requestType,
c.operator,
&qbtypes.CompositeQuery{
Queries: []qbtypes.QueryEnvelope{
{
Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation]{
Name: "A",
Signal: telemetrytypes.SignalTraces,
Filter: &qbtypes.Filter{Expression: filter},
},
},
},
},
)
require.NoError(t, err)
require.Contains(t, q.Query, c.wantSQL)
for _, arg := range c.wantArgs {
require.Contains(t, q.Args, arg)
}
})
}
}
// TestTraceOperatorStatementBuilderDeduplicatesKeys checks that a trace
// operator with the same field name listed twice in GroupBy (once with a
// context, once without) ends up with a single column in the outer SELECT
// and a single entry in GROUP BY.
func TestTraceOperatorStatementBuilderDeduplicatesKeys(t *testing.T) {
statementBuilder := newTestTraceOperatorStatementBuilder(t)
operator := qbtypes.QueryBuilderTraceOperator{
Expression: "A",
Aggregations: []qbtypes.TraceAggregation{
{Expression: "count()"},
},
GroupBy: []qbtypes.GroupByKey{
{TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: "http.method",
FieldContext: telemetrytypes.FieldContextAttribute,
}},
// Same name, no context — should be merged with the entry above.
{TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: "http.method",
}},
},
}
require.NoError(t, operator.ParseExpression())
q, err := statementBuilder.Build(
context.Background(),
1747947419000,
1747983448000,
qbtypes.RequestTypeScalar,
operator,
&qbtypes.CompositeQuery{
Queries: []qbtypes.QueryEnvelope{
{
Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation]{
Name: "A",
Signal: telemetrytypes.SignalTraces,
Filter: &qbtypes.Filter{Expression: "service.name = 'frontend'"},
},
},
},
},
)
require.NoError(t, err)
require.Contains(t, q.Query,
"SELECT toString(multiIf(mapContains(attributes_string, 'http.method') = ?, attributes_string['http.method'], NULL)) AS `http.method`, count() AS __result_0 FROM A GROUP BY `http.method` ORDER BY __result_0 DESC")
}

View File

@@ -4,6 +4,7 @@ import (
"context"
"strings"
"testing"
"time"
"github.com/SigNoz/signoz/pkg/instrumentation/instrumentationtest"
"github.com/SigNoz/signoz/pkg/querybuilder"
@@ -16,12 +17,13 @@ import (
)
func TestTraceTimeRangeOptimization(t *testing.T) {
releaseTime := time.Date(2025, 5, 22, 22, 0, 0, 0, time.UTC)
fm := NewFieldMapper()
cb := NewConditionBuilder(fm)
mockMetadataStore := telemetrytypestest.NewMockMetadataStore()
mockMetadataStore.KeysMap = buildCompleteFieldKeyMap()
mockMetadataStore.KeysMap = buildCompleteFieldKeyMap(releaseTime)
mockMetadataStore.KeysMap["trace_id"] = []*telemetrytypes.TelemetryFieldKey{{
Name: "trace_id",
FieldContext: telemetrytypes.FieldContextSpan,

View File

@@ -0,0 +1,119 @@
package querybuildertypesv5
import (
"slices"
"sort"
"strconv"
"time"
schema "github.com/SigNoz/signoz-otel-collector/cmd/signozschemamigrator/schema_migrator"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
)
// SelectEvolutionsForColumns selects the appropriate evolution entries for each column based on the time range.
// Logic:
// - Finds the latest base evolution (<= tsStartTime) across ALL columns
// - Rejects all evolutions before this latest base evolution
// - For duplicate evolutions it considers the oldest one (first in ReleaseTime)
// - For each column, includes its evolution if it's >= latest base evolution and <= tsEndTime
// - Results are sorted by ReleaseTime descending (newest first)
func SelectEvolutionsForColumns(columns []*schema.Column, evolutions []*telemetrytypes.EvolutionEntry, tsStart, tsEnd uint64) ([]*schema.Column, []*telemetrytypes.EvolutionEntry, error) {
sortedEvolutions := make([]*telemetrytypes.EvolutionEntry, len(evolutions))
copy(sortedEvolutions, evolutions)
// sort the evolutions by ReleaseTime ascending
sort.Slice(sortedEvolutions, func(i, j int) bool {
return sortedEvolutions[i].ReleaseTime.Before(sortedEvolutions[j].ReleaseTime)
})
tsStartTime := time.Unix(0, int64(tsStart))
tsEndTime := time.Unix(0, int64(tsEnd))
// Build evolution map: column name -> evolution
evolutionMap := make(map[string]*telemetrytypes.EvolutionEntry)
for _, evolution := range sortedEvolutions {
if _, exists := evolutionMap[evolution.ColumnName+":"+evolution.FieldName+":"+strconv.Itoa(int(evolution.Version))]; exists {
// since if there is duplicate we would just use the oldest one.
continue
}
evolutionMap[evolution.ColumnName+":"+evolution.FieldName+":"+strconv.Itoa(int(evolution.Version))] = evolution
}
// Find the latest base evolution (<= tsStartTime) across ALL columns
// Evolutions are sorted, so we can break early
var latestBaseEvolutionAcrossAll *telemetrytypes.EvolutionEntry
for _, evolution := range sortedEvolutions {
if evolution.ReleaseTime.After(tsStartTime) {
break
}
latestBaseEvolutionAcrossAll = evolution
}
// We shouldn't reach this, it basically means there is something wrong with the evolutions data
if latestBaseEvolutionAcrossAll == nil {
return nil, nil, errors.Newf(errors.TypeInternal, errors.CodeInternal, "no base evolution found for columns %v", columns)
}
columnLookUpMap := make(map[string]*schema.Column)
for _, column := range columns {
columnLookUpMap[column.Name] = column
}
// Collect column-evolution pairs
type colEvoPair struct {
column *schema.Column
evolution *telemetrytypes.EvolutionEntry
}
pairs := []colEvoPair{}
for _, evolution := range evolutionMap {
// Reject evolutions before the latest base evolution
if evolution.ReleaseTime.Before(latestBaseEvolutionAcrossAll.ReleaseTime) {
continue
}
// skip evolutions after tsEndTime
if evolution.ReleaseTime.After(tsEndTime) || evolution.ReleaseTime.Equal(tsEndTime) {
continue
}
if _, exists := columnLookUpMap[evolution.ColumnName]; !exists {
return nil, nil, errors.Newf(errors.TypeInternal, errors.CodeInternal, "evolution column %s not found in columns %v", evolution.ColumnName, columns)
}
pairs = append(pairs, colEvoPair{columnLookUpMap[evolution.ColumnName], evolution})
}
// If no pairs found, fall back to latestBaseEvolutionAcrossAll for matching columns
if len(pairs) == 0 {
for _, column := range columns {
// Use latestBaseEvolutionAcrossAll if this column name matches its column name
if column.Name == latestBaseEvolutionAcrossAll.ColumnName {
pairs = append(pairs, colEvoPair{column, latestBaseEvolutionAcrossAll})
}
}
}
// Sort by ReleaseTime descending (newest first)
slices.SortFunc(pairs, func(a, b colEvoPair) int {
// Sort by ReleaseTime descending (newest first)
if a.evolution.ReleaseTime.After(b.evolution.ReleaseTime) {
return -1
}
if a.evolution.ReleaseTime.Before(b.evolution.ReleaseTime) {
return 1
}
return 0
})
// Extract results
newColumns := make([]*schema.Column, len(pairs))
evolutionsEntries := make([]*telemetrytypes.EvolutionEntry, len(pairs))
for i, pair := range pairs {
newColumns[i] = pair.column
evolutionsEntries[i] = pair.evolution
}
return newColumns, evolutionsEntries, nil
}

View File

@@ -0,0 +1,414 @@
package querybuildertypesv5
import (
"testing"
"time"
schema "github.com/SigNoz/signoz-otel-collector/cmd/signozschemamigrator/schema_migrator"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
const (
LogsV2BodyV2Column = "body_v2"
LogsV2BodyPromotedColumn = "body_promoted"
)
var (
resources_string = &schema.Column{Name: "resources_string", Type: schema.MapColumnType{
KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString},
ValueType: schema.ColumnTypeString,
}}
resource = &schema.Column{Name: "resource", Type: schema.JSONColumnType{}}
attributes_string = &schema.Column{Name: "attributes_string", Type: schema.MapColumnType{
KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString},
ValueType: schema.ColumnTypeString,
}}
body_v2 = &schema.Column{Name: LogsV2BodyV2Column, Type: schema.JSONColumnType{}}
body_promoted = &schema.Column{Name: LogsV2BodyPromotedColumn, Type: schema.JSONColumnType{}}
)
func TestSelectEvolutionsForColumns(t *testing.T) {
testCases := []struct {
name string
columns []*schema.Column
evolutions []*telemetrytypes.EvolutionEntry
tsStart uint64
tsEnd uint64
expectedColumns []string // column names
expectedEvols []string // evolution column names
expectedError bool
errorStr string
}{
{
name: "New evolutions at tsStartTime - should include latest evolution",
columns: []*schema.Column{
resources_string,
resource,
},
evolutions: []*telemetrytypes.EvolutionEntry{
{
Signal: telemetrytypes.SignalLogs,
ColumnName: "resources_string",
ColumnType: "Map(LowCardinality(String), String)",
FieldContext: telemetrytypes.FieldContextResource,
FieldName: "__all__",
Version: 0,
ReleaseTime: time.Date(0, 0, 0, 0, 0, 0, 0, time.UTC),
},
{
Signal: telemetrytypes.SignalLogs,
ColumnName: "resource",
ColumnType: "JSON()",
FieldContext: telemetrytypes.FieldContextResource,
FieldName: "__all__",
Version: 1,
ReleaseTime: time.Date(2024, 2, 25, 0, 0, 0, 0, time.UTC),
},
},
tsStart: uint64(time.Date(2024, 2, 25, 0, 0, 0, 0, time.UTC).UnixNano()),
tsEnd: uint64(time.Date(2024, 2, 30, 0, 0, 0, 0, time.UTC).UnixNano()),
expectedColumns: []string{"resource"},
expectedEvols: []string{"resource"},
},
{
name: "New evolutions after tsStartTime but less than tsEndTime - should include both",
columns: []*schema.Column{
resources_string,
resource,
},
evolutions: []*telemetrytypes.EvolutionEntry{
{
Signal: telemetrytypes.SignalLogs,
ColumnName: "resources_string",
ColumnType: "Map(LowCardinality(String), String)",
FieldContext: telemetrytypes.FieldContextResource,
FieldName: "__all__",
Version: 0,
ReleaseTime: time.Date(0, 0, 0, 0, 0, 0, 0, time.UTC),
},
{
Signal: telemetrytypes.SignalLogs,
ColumnName: "resource",
ColumnType: "JSON()",
FieldContext: telemetrytypes.FieldContextResource,
FieldName: "__all__",
Version: 1,
ReleaseTime: time.Date(2024, 2, 3, 0, 0, 0, 0, time.UTC),
},
},
tsStart: uint64(time.Date(2024, 2, 1, 0, 0, 0, 0, time.UTC).UnixNano()),
tsEnd: uint64(time.Date(2024, 2, 15, 0, 0, 0, 0, time.UTC).UnixNano()),
expectedColumns: []string{"resource", "resources_string"}, // sorted by ReleaseTime desc
expectedEvols: []string{"resource", "resources_string"},
},
{
name: "Columns without matching evolutions - should exclude them",
columns: []*schema.Column{
resources_string,
resource, // no evolution for this
attributes_string, // no evolution for this
},
evolutions: []*telemetrytypes.EvolutionEntry{
{
Signal: telemetrytypes.SignalLogs,
ColumnName: "resources_string",
ColumnType: "Map(LowCardinality(String), String)",
FieldContext: telemetrytypes.FieldContextResource,
FieldName: "__all__",
Version: 0,
ReleaseTime: time.Date(2024, 1, 15, 0, 0, 0, 0, time.UTC),
},
},
tsStart: uint64(time.Date(2024, 2, 1, 0, 0, 0, 0, time.UTC).UnixNano()),
tsEnd: uint64(time.Date(2024, 2, 15, 0, 0, 0, 0, time.UTC).UnixNano()),
expectedColumns: []string{"resources_string"},
expectedEvols: []string{"resources_string"},
},
{
name: "New evolutions at tsEndTime - should not include new evolution",
columns: []*schema.Column{
resources_string,
resource,
},
evolutions: []*telemetrytypes.EvolutionEntry{
{
Signal: telemetrytypes.SignalLogs,
ColumnName: "resources_string",
ColumnType: "Map(LowCardinality(String), String)",
FieldContext: telemetrytypes.FieldContextResource,
FieldName: "__all__",
Version: 0,
ReleaseTime: time.Date(0, 0, 0, 0, 0, 0, 0, time.UTC),
},
{
Signal: telemetrytypes.SignalLogs,
ColumnName: "resource",
ColumnType: "JSON()",
FieldContext: telemetrytypes.FieldContextResource,
FieldName: "__all__",
Version: 1,
ReleaseTime: time.Date(2024, 2, 30, 0, 0, 0, 0, time.UTC),
},
},
tsStart: uint64(time.Date(2024, 2, 25, 0, 0, 0, 0, time.UTC).UnixNano()),
tsEnd: uint64(time.Date(2024, 2, 30, 0, 0, 0, 0, time.UTC).UnixNano()),
expectedColumns: []string{"resources_string"},
expectedEvols: []string{"resources_string"},
},
{
name: "New evolutions after tsEndTime - should exclude new",
columns: []*schema.Column{
resources_string,
resource,
},
evolutions: []*telemetrytypes.EvolutionEntry{
{
Signal: telemetrytypes.SignalLogs,
ColumnName: "resources_string",
ColumnType: "Map(LowCardinality(String), String)",
FieldContext: telemetrytypes.FieldContextResource,
FieldName: "__all__",
Version: 0,
ReleaseTime: time.Date(0, 0, 0, 0, 0, 0, 0, time.UTC),
},
{
Signal: telemetrytypes.SignalLogs,
ColumnName: "resource",
ColumnType: "JSON()",
FieldContext: telemetrytypes.FieldContextResource,
FieldName: "__all__",
Version: 1,
ReleaseTime: time.Date(2024, 2, 25, 0, 0, 0, 0, time.UTC),
},
},
tsStart: uint64(time.Date(2024, 2, 1, 0, 0, 0, 0, time.UTC).UnixNano()),
tsEnd: uint64(time.Date(2024, 2, 15, 0, 0, 0, 0, time.UTC).UnixNano()),
expectedColumns: []string{"resources_string"},
expectedEvols: []string{"resources_string"},
},
{
name: "Empty columns array",
columns: []*schema.Column{},
evolutions: []*telemetrytypes.EvolutionEntry{
{
Signal: telemetrytypes.SignalLogs,
ColumnName: "resources_string",
ColumnType: "Map(LowCardinality(String), String)",
FieldContext: telemetrytypes.FieldContextResource,
FieldName: "__all__",
Version: 0,
ReleaseTime: time.Date(2024, 1, 15, 0, 0, 0, 0, time.UTC),
},
},
tsStart: uint64(time.Date(2024, 2, 1, 0, 0, 0, 0, time.UTC).UnixNano()),
tsEnd: uint64(time.Date(2024, 2, 15, 0, 0, 0, 0, time.UTC).UnixNano()),
expectedColumns: []string{},
expectedEvols: []string{},
expectedError: true,
errorStr: "column resources_string not found",
},
{
name: "Duplicate evolutions - should use first encountered (oldest if sorted)",
columns: []*schema.Column{
resource,
},
evolutions: []*telemetrytypes.EvolutionEntry{
{
Signal: telemetrytypes.SignalLogs,
ColumnName: "resources_string",
ColumnType: "Map(LowCardinality(String), String)",
FieldContext: telemetrytypes.FieldContextResource,
FieldName: "__all__",
Version: 0,
ReleaseTime: time.Date(0, 0, 0, 0, 0, 0, 0, time.UTC),
},
{
Signal: telemetrytypes.SignalLogs,
ColumnName: "resource",
ColumnType: "JSON()",
FieldContext: telemetrytypes.FieldContextResource,
FieldName: "__all__",
Version: 1,
ReleaseTime: time.Date(2024, 1, 15, 0, 0, 0, 0, time.UTC),
},
{
Signal: telemetrytypes.SignalLogs,
ColumnName: "resource",
ColumnType: "JSON()",
FieldContext: telemetrytypes.FieldContextResource,
FieldName: "__all__",
Version: 1,
ReleaseTime: time.Date(2024, 1, 20, 0, 0, 0, 0, time.UTC),
},
},
tsStart: uint64(time.Date(2024, 2, 1, 0, 0, 0, 0, time.UTC).UnixNano()),
tsEnd: uint64(time.Date(2024, 2, 15, 0, 0, 0, 0, time.UTC).UnixNano()),
expectedColumns: []string{"resource"},
expectedEvols: []string{"resource"}, // should use first one (older)
},
{
name: "Genuine Duplicate evolutions with new version- should consider both",
columns: []*schema.Column{
resources_string,
resource,
},
evolutions: []*telemetrytypes.EvolutionEntry{
{
Signal: telemetrytypes.SignalLogs,
ColumnName: "resources_string",
ColumnType: "Map(LowCardinality(String), String)",
FieldContext: telemetrytypes.FieldContextResource,
FieldName: "__all__",
Version: 0,
ReleaseTime: time.Date(0, 0, 0, 0, 0, 0, 0, time.UTC),
},
{
Signal: telemetrytypes.SignalLogs,
ColumnName: "resource",
ColumnType: "JSON()",
FieldContext: telemetrytypes.FieldContextResource,
FieldName: "__all__",
Version: 1,
ReleaseTime: time.Date(2024, 1, 15, 0, 0, 0, 0, time.UTC),
},
{
Signal: telemetrytypes.SignalLogs,
ColumnName: "resources_string",
ColumnType: "Map(LowCardinality(String), String)",
FieldContext: telemetrytypes.FieldContextResource,
FieldName: "__all__",
Version: 2,
ReleaseTime: time.Date(2024, 1, 20, 0, 0, 0, 0, time.UTC),
},
},
tsStart: uint64(time.Date(2024, 1, 16, 0, 0, 0, 0, time.UTC).UnixNano()),
tsEnd: uint64(time.Date(2024, 2, 15, 0, 0, 0, 0, time.UTC).UnixNano()),
expectedColumns: []string{"resources_string", "resource"},
expectedEvols: []string{"resources_string", "resource"}, // should use first one (older)
},
{
name: "Evolution exactly at tsEndTime",
columns: []*schema.Column{
resources_string,
resource,
},
evolutions: []*telemetrytypes.EvolutionEntry{
{
Signal: telemetrytypes.SignalLogs,
ColumnName: "resources_string",
ColumnType: "Map(LowCardinality(String), String)",
FieldContext: telemetrytypes.FieldContextResource,
FieldName: "__all__",
ReleaseTime: time.Date(2024, 1, 15, 0, 0, 0, 0, time.UTC),
},
{
Signal: telemetrytypes.SignalLogs,
ColumnName: "resource",
ColumnType: "JSON()",
FieldContext: telemetrytypes.FieldContextResource,
FieldName: "__all__",
ReleaseTime: time.Date(2024, 2, 15, 0, 0, 0, 0, time.UTC), // exactly at tsEnd
},
},
tsStart: uint64(time.Date(2024, 2, 1, 0, 0, 0, 0, time.UTC).UnixNano()),
tsEnd: uint64(time.Date(2024, 2, 15, 0, 0, 0, 0, time.UTC).UnixNano()),
expectedColumns: []string{"resources_string"}, // resource excluded because After(tsEnd) is true
expectedEvols: []string{"resources_string"},
},
{
name: "Single evolution after tsStartTime - JSON body",
columns: []*schema.Column{
body_v2,
body_promoted,
},
evolutions: []*telemetrytypes.EvolutionEntry{
{
Signal: telemetrytypes.SignalLogs,
ColumnName: LogsV2BodyV2Column,
ColumnType: "JSON()",
FieldContext: telemetrytypes.FieldContextBody,
FieldName: "__all__",
ReleaseTime: time.Unix(0, 0),
},
{
Signal: telemetrytypes.SignalLogs,
ColumnName: LogsV2BodyPromotedColumn,
ColumnType: "JSON()",
FieldContext: telemetrytypes.FieldContextBody,
FieldName: "user.name",
ReleaseTime: time.Date(2024, 2, 2, 0, 0, 0, 0, time.UTC),
},
},
tsStart: uint64(time.Date(2024, 2, 1, 0, 0, 0, 0, time.UTC).UnixNano()),
tsEnd: uint64(time.Date(2024, 2, 15, 0, 0, 0, 0, time.UTC).UnixNano()),
expectedColumns: []string{LogsV2BodyPromotedColumn, LogsV2BodyV2Column}, // sorted by ReleaseTime desc (newest first)
expectedEvols: []string{LogsV2BodyPromotedColumn, LogsV2BodyV2Column},
},
{
name: "No evolution after tsStartTime - JSON body",
columns: []*schema.Column{
body_v2,
body_promoted,
},
evolutions: []*telemetrytypes.EvolutionEntry{
{
Signal: telemetrytypes.SignalLogs,
ColumnName: LogsV2BodyV2Column,
ColumnType: "JSON()",
FieldContext: telemetrytypes.FieldContextBody,
FieldName: "__all__",
ReleaseTime: time.Unix(0, 0),
},
{
Signal: telemetrytypes.SignalLogs,
ColumnName: LogsV2BodyPromotedColumn,
ColumnType: "JSON()",
FieldContext: telemetrytypes.FieldContextBody,
FieldName: "user.name",
ReleaseTime: time.Date(2024, 2, 2, 0, 0, 0, 0, time.UTC),
},
},
tsStart: uint64(time.Date(2024, 2, 3, 0, 0, 0, 0, time.UTC).UnixNano()),
tsEnd: uint64(time.Date(2024, 2, 15, 0, 0, 0, 0, time.UTC).UnixNano()),
expectedColumns: []string{LogsV2BodyPromotedColumn},
expectedEvols: []string{LogsV2BodyPromotedColumn},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
resultColumns, resultEvols, err := SelectEvolutionsForColumns(tc.columns, tc.evolutions, tc.tsStart, tc.tsEnd)
if tc.expectedError {
assert.Contains(t, err.Error(), tc.errorStr)
} else {
require.NoError(t, err)
assert.Equal(t, len(tc.expectedColumns), len(resultColumns), "column count mismatch")
assert.Equal(t, len(tc.expectedEvols), len(resultEvols), "evolution count mismatch")
resultColumnNames := make([]string, len(resultColumns))
for i, col := range resultColumns {
resultColumnNames[i] = col.Name
}
resultEvolNames := make([]string, len(resultEvols))
for i, evol := range resultEvols {
resultEvolNames[i] = evol.ColumnName
}
for i := range tc.expectedColumns {
assert.Equal(t, resultColumnNames[i], tc.expectedColumns[i], "expected column missing: "+tc.expectedColumns[i])
}
for i := range tc.expectedEvols {
assert.Equal(t, resultEvolNames[i], tc.expectedEvols[i], "expected evolution missing: "+tc.expectedEvols[i])
}
// Verify sorting: should be descending by ReleaseTime
for i := 0; i < len(resultEvols)-1; i++ {
assert.True(t, !resultEvols[i].ReleaseTime.Before(resultEvols[i+1].ReleaseTime),
"evolutions should be sorted descending by ReleaseTime")
}
}
})
}
}

View File

@@ -54,16 +54,16 @@ func newConfig() factory.Config {
Directory: "/etc/signoz/web",
Settings: SettingsConfig{
Posthog: PosthogConfig{
Enabled: true,
Enabled: false,
},
Appcues: AppcuesConfig{
Enabled: true,
Enabled: false,
},
Sentry: SentryConfig{
Enabled: true,
Enabled: false,
},
Pylon: PylonConfig{
Enabled: true,
Enabled: false,
},
},
}

View File

@@ -449,6 +449,21 @@ def index_series_by_label(
return series_by_label
def assert_grouped_series(
series_by_group: dict[str, dict],
expected_values_by_group: dict[str, dict[int, int]],
) -> None:
assert set(series_by_group.keys()) == set(expected_values_by_group.keys())
for group_name, expected_by_ts in expected_values_by_group.items():
actual_values = sorted(
series_by_group[group_name]["values"],
key=lambda value: value["timestamp"],
)
expected_values = [{"timestamp": timestamp, "value": value} for timestamp, value in sorted(expected_by_ts.items())]
assert actual_values == expected_values
def find_named_result(
results: list[dict[str, Any]],
name: str,
@@ -459,6 +474,57 @@ def find_named_result(
)
def assert_scalar_value(
response: requests.Response,
name: str,
expected: Any,
*,
row: int = 0,
col: int = 0,
) -> None:
"""Assert that the named scalar result has `expected` at data[row][col]."""
result = find_named_result(response.json()["data"]["data"]["results"], name)
assert result is not None, f"no result for query {name}"
assert result["data"][row][col] == expected, f"expected {expected} at [{row}][{col}], got {result['data'][row][col]}"
def assert_grouped_scalar(
response: requests.Response,
name: str,
*,
expected_groups: int,
expected_columns: int,
last_col_value: Any | None = None,
) -> None:
"""Assert grouped scalar result has the expected column count and group count.
If `last_col_value` is set and there is exactly one group, also assert the
last column of that single row equals it (a common aggregation-value check)."""
result = find_named_result(response.json()["data"]["data"]["results"], name)
assert result is not None, f"no result for query {name}"
columns = result["columns"]
rows = result["data"]
assert len(columns) == expected_columns, f"expected {expected_columns} columns, got {len(columns)}: {columns}"
assert len(rows) == expected_groups, f"expected {expected_groups} groups, got {len(rows)}: {rows}"
if last_col_value is not None and expected_groups == 1:
assert rows[0][-1] == last_col_value, f"expected last col {last_col_value}, got row {rows[0]}"
def assert_raw_row_subset(
response: requests.Response,
name: str,
expected: dict[str, Any],
*,
row: int = 0,
) -> None:
"""Assert that the named raw result's rows[row]['data'] is a superset of `expected`."""
result = find_named_result(response.json()["data"]["data"]["results"], name)
assert result is not None, f"no result for query {name}"
rows = result["rows"]
assert rows is not None, f"no rows for query {name}"
data = rows[row]["data"]
assert expected.items() <= data.items(), f"expected subset {expected}, got data {data}"
def build_scalar_query(
name: str,
signal: str,
@@ -647,6 +713,28 @@ def assert_identical_query_response(response1: requests.Response, response2: req
assert response1.json()["data"]["data"]["results"] == response2.json()["data"]["data"]["results"], "Response data do not match"
# we already create the evolution for resource during schema migration
# since we have to create test data around it, we need to get the evolution time
def get_resource_evolution_time(signoz: types.SigNoz, signal: str) -> datetime:
result = signoz.telemetrystore.conn.query(
"""
SELECT release_time
FROM signoz_metadata.distributed_column_evolution_metadata
WHERE signal = %(signal)s
AND field_context = 'resource'
AND field_name = '__all__'
AND column_name = 'resource'
LIMIT 1
""",
parameters={"signal": signal},
).result_rows
assert result, f"Expected {signal} resource evolution metadata to exist"
release_time_ns = int(result[0][0])
return datetime.fromtimestamp(release_time_ns / 1e9, tz=UTC)
def generate_logs_with_corrupt_metadata() -> list[Logs]:
"""
Specifically, entries with 'id', 'timestamp', 'severity_text', 'severity_number' and 'body' fields in metadata

View File

@@ -6,7 +6,7 @@ import uuid
from abc import ABC
from collections.abc import Callable, Generator
from enum import Enum
from typing import Any
from typing import Any, Literal
from urllib.parse import urlparse
import numpy as np
@@ -236,6 +236,7 @@ class Traces(ABC):
attributes_number: dict[str, np.float64]
attributes_bool: dict[str, bool]
resources_string: dict[str, str]
resource_json: dict[str, str]
events: list[str]
links: str
response_status_code: str
@@ -273,6 +274,7 @@ class Traces(ABC):
links: list[TracesLink] = [],
trace_state: str = "",
flags: np.uint32 = 0,
resource_write_mode: Literal["legacy_only", "dual_write"] = "dual_write",
) -> None:
if timestamp is None:
timestamp = datetime.datetime.now()
@@ -322,8 +324,11 @@ class Traces(ABC):
self.db_name = ""
self.db_operation = ""
# Process resources and derive service_name
# Process resources and derive service_name. Spans written before the
# JSON-resource evolution time only populate resources_string (legacy_only);
# spans at or after the evolution time dual-write to both columns.
self.resources_string = {k: str(v) for k, v in resources.items()}
self.resource_json = {} if resource_write_mode == "legacy_only" else dict(self.resources_string)
self.service_name = self.resources_string.get("service.name", "default-service")
for k, v in self.resources_string.items():
@@ -575,7 +580,7 @@ class Traces(ABC):
self.db_operation,
self.has_error,
self.is_remote,
self.resources_string,
self.resource_json,
],
dtype=object,
)

View File

@@ -1,39 +1,20 @@
from collections.abc import Callable
from datetime import UTC, datetime, timedelta
from datetime import datetime, timedelta
from http import HTTPStatus
from fixtures import types
from fixtures.auth import USER_ADMIN_EMAIL, USER_ADMIN_PASSWORD
from fixtures.logs import Logs
from fixtures.querier import (
assert_grouped_series,
build_group_by_field,
build_logs_aggregation,
get_resource_evolution_time,
index_series_by_label,
make_query_request,
)
# we already create the evolution for resource during schema migration
# since we have to create test data around it, we need to get the evolution time
def _get_logs_resource_evolution_time_json(signoz: types.SigNoz) -> datetime:
result = signoz.telemetrystore.conn.query(
"""
SELECT release_time
FROM signoz_metadata.distributed_column_evolution_metadata
WHERE signal = 'logs'
AND field_context = 'resource'
AND field_name = '__all__'
AND column_name = 'resource'
LIMIT 1
"""
).result_rows
assert result, "Expected logs resource evolution metadata to exist"
release_time_ns = int(result[0][0])
return datetime.fromtimestamp(release_time_ns / 1e9, tz=UTC)
# Logs with timestamps before the evolution time will have resources written only to resources_string.
# Logs with timestamps at or after the evolution time will have resources written to both resources_string and resource_json.
def _build_evolved_log(
@@ -97,21 +78,6 @@ def _query_grouped_log_series(
return index_series_by_label(aggregations[0]["series"], group_by)
def _assert_grouped_series(
series_by_group: dict[str, dict],
expected_values_by_group: dict[str, dict[int, int]],
) -> None:
assert set(series_by_group.keys()) == set(expected_values_by_group.keys())
for group_name, expected_by_ts in expected_values_by_group.items():
actual_values = sorted(
series_by_group[group_name]["values"],
key=lambda value: value["timestamp"],
)
expected_values = [{"timestamp": timestamp, "value": value} for timestamp, value in sorted(expected_by_ts.items())]
assert actual_values == expected_values
def _test_logs_resource_evolution(
signoz: types.SigNoz,
token: str,
@@ -125,7 +91,7 @@ def _test_logs_resource_evolution(
# 5. Query the logs after the evolution time.
# Both aggregation and group by should be checked.
"""
evolution_time = _get_logs_resource_evolution_time_json(signoz)
evolution_time = get_resource_evolution_time(signoz, "logs")
evolution_time = evolution_time.replace(second=0, microsecond=0)
before_2 = evolution_time - timedelta(minutes=10)
@@ -163,7 +129,7 @@ def _test_logs_resource_evolution(
)
before_series = _query_grouped_log_series(signoz, token, before_2 - timedelta(minutes=1), before_1 + timedelta(minutes=1))
_assert_grouped_series(
assert_grouped_series(
before_series,
expected_values_by_group={
"svc-before-2": {
@@ -176,7 +142,7 @@ def _test_logs_resource_evolution(
)
after_series = _query_grouped_log_series(signoz, token, after_1 - timedelta(minutes=1), after_2 + timedelta(minutes=1))
_assert_grouped_series(
assert_grouped_series(
after_series,
expected_values_by_group={
"svc-after-1": {
@@ -189,7 +155,7 @@ def _test_logs_resource_evolution(
)
spanning_series = _query_grouped_log_series(signoz, token, before_2, after_2 + timedelta(minutes=1))
_assert_grouped_series(
assert_grouped_series(
spanning_series,
expected_values_by_group={
"svc-before-2": {
@@ -216,7 +182,7 @@ def _test_logs_resource_evolution(
group_by="deployment.environment",
aggregation="count_distinct(service.name)",
)
_assert_grouped_series(
assert_grouped_series(
aggregation_series,
expected_values_by_group={
"integration": {

View File

@@ -0,0 +1,206 @@
from collections.abc import Callable
from datetime import datetime, timedelta
from http import HTTPStatus
from fixtures import types
from fixtures.auth import USER_ADMIN_EMAIL, USER_ADMIN_PASSWORD
from fixtures.querier import (
assert_grouped_series,
build_group_by_field,
build_logs_aggregation,
get_resource_evolution_time,
index_series_by_label,
make_query_request,
)
from fixtures.traces import TraceIdGenerator, Traces
# Spans with timestamps before the evolution time will have resources written only to resources_string.
# Spans with timestamps at or after the evolution time will have resources written to both resources_string and resource (JSON).
def _build_evolved_span(
timestamp: datetime,
evolution_time: datetime,
service_name: str,
name: str,
) -> Traces:
resource_write_mode = "legacy_only" if timestamp < evolution_time else "dual_write"
return Traces(
timestamp=timestamp,
trace_id=TraceIdGenerator.trace_id(),
span_id=TraceIdGenerator.span_id(),
name=name,
resources={
"service.name": service_name,
"deployment.environment": "integration",
},
resource_write_mode=resource_write_mode,
)
def _query_grouped_trace_series(
signoz: types.SigNoz,
token: str,
start: datetime,
end: datetime,
group_by: str = "service.name",
aggregation: str = "count()",
) -> dict[str, list[dict]]:
response = make_query_request(
signoz,
token,
start_ms=int(start.timestamp() * 1000),
end_ms=int(end.timestamp() * 1000),
request_type="time_series",
queries=[
{
"type": "builder_query",
"spec": {
"name": "A",
"signal": "traces",
"stepInterval": 60,
"disabled": False,
"groupBy": [build_group_by_field(group_by)],
"having": {"expression": ""},
"aggregations": [build_logs_aggregation(aggregation)],
},
}
],
)
assert response.status_code == HTTPStatus.OK
assert response.json()["status"] == "success"
results = response.json()["data"]["data"]["results"]
assert len(results) == 1
aggregations = results[0]["aggregations"]
assert len(aggregations) == 1
return index_series_by_label(aggregations[0]["series"], group_by)
def _test_traces_resource_evolution(
signoz: types.SigNoz,
token: str,
insert_traces: Callable[[list[Traces]], None],
) -> None:
"""
# 1. Get the evolution time.
# 2. Ingest spans before the evolution time.
# 3. Ingest spans after the evolution time.
# 4. Query the spans before the evolution time.
# 5. Query the spans after the evolution time.
# Both aggregation and group by should be checked.
"""
evolution_time = get_resource_evolution_time(signoz, "traces")
evolution_time = evolution_time.replace(second=0, microsecond=0)
before_2 = evolution_time - timedelta(minutes=10)
before_1 = evolution_time - timedelta(minutes=5)
after_1 = evolution_time + timedelta(minutes=5)
after_2 = evolution_time + timedelta(minutes=10)
insert_traces(
[
_build_evolved_span(
timestamp=before_2,
evolution_time=evolution_time,
service_name="svc-before-2",
name="span before evolution 2",
),
_build_evolved_span(
timestamp=before_1,
evolution_time=evolution_time,
service_name="svc-before-1",
name="span before evolution 1",
),
_build_evolved_span(
timestamp=after_1,
evolution_time=evolution_time,
service_name="svc-after-1",
name="span after evolution 1",
),
_build_evolved_span(
timestamp=after_2,
evolution_time=evolution_time,
service_name="svc-after-2",
name="span after evolution 2",
),
]
)
before_series = _query_grouped_trace_series(signoz, token, before_2 - timedelta(minutes=1), before_1 + timedelta(minutes=1))
assert_grouped_series(
before_series,
expected_values_by_group={
"svc-before-2": {
int(before_2.timestamp() * 1000): 1,
},
"svc-before-1": {
int(before_1.timestamp() * 1000): 1,
},
},
)
after_series = _query_grouped_trace_series(signoz, token, after_1 - timedelta(minutes=1), after_2 + timedelta(minutes=1))
assert_grouped_series(
after_series,
expected_values_by_group={
"svc-after-1": {
int(after_1.timestamp() * 1000): 1,
},
"svc-after-2": {
int(after_2.timestamp() * 1000): 1,
},
},
)
spanning_series = _query_grouped_trace_series(signoz, token, before_2, after_2 + timedelta(minutes=1))
assert_grouped_series(
spanning_series,
expected_values_by_group={
"svc-before-2": {
int(before_2.timestamp() * 1000): 1,
},
"svc-before-1": {
int(before_1.timestamp() * 1000): 1,
},
"svc-after-1": {
int(after_1.timestamp() * 1000): 1,
},
"svc-after-2": {
int(after_2.timestamp() * 1000): 1,
},
},
)
# query to check aggregation on the resource field like count_distinct(service.name)
aggregation_series = _query_grouped_trace_series(
signoz,
token,
before_2,
after_2 + timedelta(minutes=1),
group_by="deployment.environment",
aggregation="count_distinct(service.name)",
)
assert_grouped_series(
aggregation_series,
expected_values_by_group={
"integration": {
int(before_2.timestamp() * 1000): 1,
int(before_1.timestamp() * 1000): 1,
int(after_1.timestamp() * 1000): 1,
int(after_2.timestamp() * 1000): 1,
},
},
)
def test_traces_resource_evolution(
signoz: types.SigNoz,
create_user_admin: None, # pylint: disable=unused-argument
get_token: Callable[[str, str], str],
insert_traces: Callable[[list[Traces]], None],
) -> None:
token = get_token(USER_ADMIN_EMAIL, USER_ADMIN_PASSWORD)
_test_traces_resource_evolution(signoz, token, insert_traces)

View File

@@ -25,13 +25,22 @@ returnSpansFrom="A"
from collections.abc import Callable
from datetime import UTC, datetime, timedelta
from http import HTTPStatus
from typing import Any
import pytest
import requests
from fixtures import types
from fixtures.auth import USER_ADMIN_EMAIL, USER_ADMIN_PASSWORD
from fixtures.querier import get_rows
from fixtures.querier import (
assert_grouped_scalar,
assert_raw_row_subset,
assert_scalar_value,
format_timestamp,
generate_traces_with_corrupt_metadata,
get_rows,
make_query_request,
)
from fixtures.traces import TraceIdGenerator, Traces, TracesKind, TracesStatusCode
@@ -434,3 +443,173 @@ def test_trace_operator(
)
assert response.status_code == HTTPStatus.OK, f"HTTP {response.status_code}: {response.text}"
assert case["validate"](response), f"validation failed: {response.json()}"
def _expected_trace_subset(trace: Traces) -> dict[str, Any]:
return {
"duration_nano": trace.duration_nano,
"name": trace.name,
"parent_span_id": trace.parent_span_id,
"span_id": trace.span_id,
"timestamp": format_timestamp(trace.timestamp),
"trace_id": trace.trace_id,
}
@pytest.mark.parametrize(
"payload_factory,request_type,assert_result",
[
# Case 1: CTE filter uses the deprecated intrinsic field `durationNano`.
pytest.param(
lambda traces: [
{
"type": "builder_query",
"spec": {
"name": "A",
"signal": "traces",
"filter": {"expression": 'durationNano = "3s"'},
},
},
{
"type": "builder_query",
"spec": {
"name": "B",
"signal": "traces",
"filter": {"expression": 'durationNano = "5s"'},
},
},
{
"type": "builder_trace_operator",
"spec": {
"name": "C",
"expression": "A => B",
"limit": 1,
},
},
],
"raw",
lambda response, traces: assert_raw_row_subset(response, "C", _expected_trace_subset(traces[0])),
id="deprecated-intrinsic-filter",
),
# Case 2: CTE filter uses the deprecated calculated field `responseStatusCode`.
pytest.param(
lambda traces: [
{
"type": "builder_query",
"spec": {
"name": "A",
"signal": "traces",
"filter": {"expression": 'responseStatusCode = "200"'},
},
},
{
"type": "builder_query",
"spec": {
"name": "B",
"signal": "traces",
"filter": {"expression": 'durationNano = "5s"'},
},
},
{
"type": "builder_trace_operator",
"spec": {
"name": "C",
"expression": "A => B",
"limit": 1,
},
},
],
"raw",
lambda response, traces: assert_raw_row_subset(response, "C", _expected_trace_subset(traces[0])),
id="deprecated-calculated-filter",
),
# Case 3: order by uses `count_` with fieldContext `span`, which has
# to be rewritten to the aggregation alias `span.count_`.
pytest.param(
lambda traces: [
{
"type": "builder_query",
"spec": {
"name": "A",
"signal": "traces",
"aggregations": [{"expression": "count()"}],
},
},
{
"type": "builder_trace_operator",
"spec": {
"name": "C",
"expression": "A",
"aggregations": [{"expression": "count()", "alias": "span.count_"}],
"order": [{"key": {"name": "count_", "fieldContext": "span"}, "direction": "desc"}],
},
},
],
"scalar",
lambda response, traces: assert_scalar_value(response, "C", len(traces)),
id="context-prefixed-aggregation-alias-order",
),
# Case 4: group by lists `cloud.provider` twice (once with a resource
# context, once without).
pytest.param(
lambda traces: [
{
"type": "builder_query",
"spec": {
"name": "A",
"signal": "traces",
"disabled": True,
"aggregations": [{"expression": "count()"}],
},
},
{
"type": "builder_trace_operator",
"spec": {
"name": "C",
"expression": "A",
"aggregations": [{"expression": "count()"}],
"groupBy": [
{"name": "cloud.provider", "fieldContext": "resource"},
{"name": "cloud.provider"},
],
},
},
],
"scalar",
lambda response, traces: assert_grouped_scalar(response, "C", expected_groups=1, expected_columns=2, last_col_value=len(traces)),
id="duplicate-group-by-deduplicated",
),
],
)
def test_trace_operator_with_adjusted_keys(
signoz: types.SigNoz,
create_user_admin: None, # pylint: disable=unused-argument
get_token: Callable[[str, str], str],
insert_traces: Callable[[list[Traces]], None],
payload_factory: Callable[[list[Traces]], list[dict[str, Any]]],
request_type: str,
assert_result: Callable[[requests.Response, list[Traces]], None],
) -> None:
"""
Trace operators build a CTE per referenced builder query and an outer
query on top. Both layers need the same key adjustment as regular trace
queries, otherwise deprecated keys and context-prefixed aliases don't
resolve.
"""
traces = generate_traces_with_corrupt_metadata()
insert_traces(traces)
payload = payload_factory(traces)
token = get_token(USER_ADMIN_EMAIL, USER_ADMIN_PASSWORD)
response = make_query_request(
signoz,
token,
start_ms=int((datetime.now(tz=UTC) - timedelta(minutes=5)).timestamp() * 1000),
end_ms=int(datetime.now(tz=UTC).timestamp() * 1000),
request_type=request_type,
queries=payload,
)
assert response.status_code == HTTPStatus.OK, response.text
assert_result(response, traces)