Compare commits

..

3 Commits

Author SHA1 Message Date
nityanandagohain
c71b16ff18 fix: minor changes 2026-05-18 14:38:03 +05:30
nityanandagohain
3c86f82e8f fix: update go.mod 2026-05-18 13:18:59 +05:30
nityanandagohain
96b6c6caba feat: opamp integration signozspanmapper 2026-05-18 13:12:24 +05:30
21 changed files with 602 additions and 666 deletions

2
go.mod
View File

@@ -23,6 +23,7 @@ require (
github.com/go-playground/validator/v10 v10.27.0
github.com/go-redis/redismock/v9 v9.2.0
github.com/go-viper/mapstructure/v2 v2.5.0
github.com/goccy/go-yaml v1.19.2
github.com/gojek/heimdall/v7 v7.0.3
github.com/golang-jwt/jwt/v5 v5.3.1
github.com/google/uuid v1.6.0
@@ -131,7 +132,6 @@ require (
github.com/go-openapi/swag/yamlutils v0.25.5 // indirect
github.com/go-playground/locales v0.14.1 // indirect
github.com/go-playground/universal-translator v0.18.1 // indirect
github.com/goccy/go-yaml v1.19.2 // indirect
github.com/google/go-cmp v0.7.0 // indirect
github.com/hashicorp/go-metrics v0.5.4 // indirect
github.com/huandu/go-clone v1.7.3 // indirect

View File

@@ -2,12 +2,17 @@ package implspanmapper
import (
"context"
"encoding/json"
"github.com/SigNoz/signoz/pkg/modules/spanmapper"
"github.com/SigNoz/signoz/pkg/query-service/agentConf"
"github.com/SigNoz/signoz/pkg/types/opamptypes"
"github.com/SigNoz/signoz/pkg/types/spantypes"
"github.com/SigNoz/signoz/pkg/valuer"
)
var _ spanmapper.Module = (*module)(nil)
type module struct {
store spantypes.SpanMapperStore
}
@@ -34,11 +39,22 @@ func (module *module) UpdateGroup(ctx context.Context, orgID, id valuer.UUID, na
return err
}
group.Update(name, condition, enabled, updatedBy)
return module.store.UpdateGroup(ctx, group)
err = module.store.UpdateGroup(ctx, group)
if err != nil {
return err
}
agentConf.NotifyConfigUpdate(ctx)
return nil
}
func (module *module) DeleteGroup(ctx context.Context, orgID, id valuer.UUID) error {
return module.store.DeleteGroup(ctx, orgID, id)
err := module.store.DeleteGroup(ctx, orgID, id)
if err != nil {
return err
}
agentConf.NotifyConfigUpdate(ctx)
return nil
}
func (module *module) ListMappers(ctx context.Context, orgID, groupID valuer.UUID) ([]*spantypes.SpanMapper, error) {
@@ -54,7 +70,12 @@ func (module *module) CreateMapper(ctx context.Context, orgID, groupID valuer.UU
if _, err := module.store.GetGroup(ctx, orgID, groupID); err != nil {
return err
}
return module.store.CreateMapper(ctx, mapper)
err := module.store.CreateMapper(ctx, mapper)
if err != nil {
return err
}
agentConf.NotifyConfigUpdate(ctx)
return nil
}
func (module *module) UpdateMapper(ctx context.Context, orgID, groupID, id valuer.UUID, fieldContext spantypes.FieldContext, config *spantypes.SpanMapperConfig, enabled *bool, updatedBy string) error {
@@ -66,9 +87,72 @@ func (module *module) UpdateMapper(ctx context.Context, orgID, groupID, id value
return err
}
mapper.Update(fieldContext, config, enabled, updatedBy)
return module.store.UpdateMapper(ctx, mapper)
err = module.store.UpdateMapper(ctx, mapper)
if err != nil {
return err
}
agentConf.NotifyConfigUpdate(ctx)
return nil
}
func (module *module) DeleteMapper(ctx context.Context, orgID, groupID, id valuer.UUID) error {
return module.store.DeleteMapper(ctx, orgID, groupID, id)
err := module.store.DeleteMapper(ctx, orgID, groupID, id)
if err != nil {
return err
}
agentConf.NotifyConfigUpdate(ctx)
return nil
}
func (module *module) AgentFeatureType() agentConf.AgentFeatureType {
return spantypes.SpanAttrMappingFeatureType
}
func (module *module) RecommendAgentConfig(orgID valuer.UUID, currentConfYaml []byte, configVersion *opamptypes.AgentConfigVersion) ([]byte, string, error) {
ctx := context.Background()
enabled, err := module.listEnabledGroupsWithMappers(ctx, orgID)
if err != nil {
return nil, "", err
}
updatedConf, err := spantypes.GenerateCollectorConfigWithSpanMapperProcessor(currentConfYaml, enabled)
if err != nil {
return nil, "", err
}
serialized, err := json.Marshal(enabled)
if err != nil {
return nil, "", err
}
return updatedConf, string(serialized), nil
}
// listEnabledGroupsWithMappers returns groups with their mappers.
func (module *module) listEnabledGroupsWithMappers(ctx context.Context, orgID valuer.UUID) ([]*spantypes.SpanMapperGroupWithMappers, error) {
enabled := true
groups, err := module.store.ListGroups(ctx, orgID, &spantypes.ListSpanMapperGroupsQuery{Enabled: &enabled})
if err != nil {
return nil, err
}
out := make([]*spantypes.SpanMapperGroupWithMappers, 0, len(groups))
for _, g := range groups {
mappers, err := module.store.ListMappers(ctx, orgID, g.ID)
if err != nil {
return nil, err
}
if len(mappers) == 0 {
continue
}
enabledMappers := make([]*spantypes.SpanMapper, 0, len(mappers))
for _, m := range mappers {
if m.Enabled {
enabledMappers = append(enabledMappers, m)
}
}
out = append(out, &spantypes.SpanMapperGroupWithMappers{Group: g, Mappers: enabledMappers})
}
return out, nil
}

View File

@@ -4,12 +4,16 @@ import (
"context"
"net/http"
"github.com/SigNoz/signoz/pkg/query-service/agentConf"
"github.com/SigNoz/signoz/pkg/types/spantypes"
"github.com/SigNoz/signoz/pkg/valuer"
)
// Module defines the business logic for span attribute mapping groups and mappers.
type Module interface {
// Since this module interacts with OpAMP, it must implement the AgentFeature interface.
agentConf.AgentFeature
// Group operations
ListGroups(ctx context.Context, orgID valuer.UUID, q *spantypes.ListSpanMapperGroupsQuery) ([]*spantypes.SpanMapperGroup, error)
GetGroup(ctx context.Context, orgID, id valuer.UUID) (*spantypes.SpanMapperGroup, error)

View File

@@ -265,15 +265,6 @@ func (q *builderQuery[T]) executeWithContext(ctx context.Context, query string,
return nil, err
}
// TODO: This should move to readAsRaw function in consume.go but for now we are keeping it here since it's only relevant for traces
if q.spec.Signal == telemetrytypes.SignalTraces {
if raw, ok := payload.(*qbtypes.RawData); ok {
for _, rr := range raw.Rows {
mergeSpanAttributeColumns(rr.Data)
}
}
}
return &qbtypes.Result{
Type: q.kind,
Value: payload,

View File

@@ -13,7 +13,6 @@ import (
"github.com/ClickHouse/clickhouse-go/v2/lib/driver"
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
"github.com/SigNoz/signoz/pkg/types/spantypes"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
)
@@ -432,53 +431,6 @@ func readAsRaw(rows driver.Rows, queryName string) (*qbtypes.RawData, error) {
}, nil
}
// mergeSpanAttributeColumns merges (attributes_string, attributes_number, attributes_bool, resources_string) into
// unified "attributes" and "resource" keys, and parses the stringified `events`
// and `links` columns into structured slices. Raw DB columns are removed.
func mergeSpanAttributeColumns(data map[string]any) {
attrStr, hasStr := data["attributes_string"]
attrNum, hasNum := data["attributes_number"]
attrBool, hasBool := data["attributes_bool"]
// todo(nitya): move to resource json
resStr, hasRes := data["resources_string"]
if hasStr || hasNum || hasBool || hasRes {
attributes := make(map[string]any)
if m, ok := attrStr.(map[string]string); ok {
for k, v := range m {
attributes[k] = v
}
}
if m, ok := attrNum.(map[string]float64); ok {
for k, v := range m {
attributes[k] = v
}
}
if m, ok := attrBool.(map[string]bool); ok {
for k, v := range m {
attributes[k] = v
}
}
delete(data, "attributes_string")
delete(data, "attributes_number")
delete(data, "attributes_bool")
data["attributes"] = attributes
resource := map[string]string{}
if m, ok := resStr.(map[string]string); ok {
resource = m
}
data["resource"] = resource
delete(data, "resources_string")
}
if raw, ok := data["events"]; ok {
data["events"] = spantypes.ParseEvents(raw)
}
if raw, ok := data["links"]; ok {
data["links"] = spantypes.ParseLinks(raw)
}
}
// numericAsFloat converts numeric types to float64 efficiently.
func numericAsFloat(v any) float64 {
switch x := v.(type) {

View File

@@ -1,91 +0,0 @@
package querier
import (
"reflect"
"testing"
"github.com/SigNoz/signoz/pkg/types/spantypes"
)
func TestMergeSpanAttributeColumns_ParsesEventsAndLinks(t *testing.T) {
data := map[string]any{
"attributes_string": map[string]string{"http.method": "GET"},
"attributes_number": map[string]float64{"http.status_code": 200},
"attributes_bool": map[string]bool{"is_root": true},
"resources_string": map[string]string{"service.name": "api"},
"events": []string{
`{"name":"request_received","timeUnixNano":1778489782759245000,"attributeMap":{"http.method":"GET","http.route":"/api/chat"}}`,
`{"name":"cache_lookup","timeUnixNano":1778489782811697000,"attributeMap":{"cache.hit":"true","cache.key":"user:123:prompt"}}`,
},
"links": `[{"traceId":"abc","spanId":"123","refType":"CHILD_OF"},{"traceId":"def","spanId":"456","refType":"FOLLOWS_FROM"}]`,
}
mergeSpanAttributeColumns(data)
attrs, ok := data["attributes"].(map[string]any)
if !ok {
t.Fatalf("expected attributes to be map[string]any, got %T", data["attributes"])
}
if attrs["http.method"] != "GET" || attrs["http.status_code"] != float64(200) || attrs["is_root"] != true {
t.Fatalf("attributes not merged correctly: %#v", attrs)
}
res, ok := data["resource"].(map[string]string)
if !ok || res["service.name"] != "api" {
t.Fatalf("resource not set correctly: %#v", data["resource"])
}
for _, removed := range []string{"attributes_string", "attributes_number", "attributes_bool", "resources_string"} {
if _, present := data[removed]; present {
t.Fatalf("expected %s to be removed", removed)
}
}
events, ok := data["events"].([]spantypes.Event)
if !ok {
t.Fatalf("expected events to be []spantypes.Event, got %T", data["events"])
}
wantEvents := []spantypes.Event{
{
Name: "request_received",
TimeUnixNano: 1778489782759245000,
Attributes: map[string]any{"http.method": "GET", "http.route": "/api/chat"},
},
{
Name: "cache_lookup",
TimeUnixNano: 1778489782811697000,
Attributes: map[string]any{"cache.hit": "true", "cache.key": "user:123:prompt"},
},
}
if !reflect.DeepEqual(events, wantEvents) {
t.Fatalf("events parsed incorrectly:\n got: %#v\nwant: %#v", events, wantEvents)
}
links, ok := data["links"].([]spantypes.Link)
if !ok {
t.Fatalf("expected links to be []spantypes.Link, got %T", data["links"])
}
wantLinks := []spantypes.Link{
{TraceID: "abc", SpanID: "123"},
{TraceID: "def", SpanID: "456"},
}
if !reflect.DeepEqual(links, wantLinks) {
t.Fatalf("links parsed incorrectly:\n got: %#v\nwant: %#v", links, wantLinks)
}
}
func TestMergeSpanAttributeColumns_EmptyEventsAndLinks(t *testing.T) {
data := map[string]any{
"events": []string{},
"links": "[]",
}
mergeSpanAttributeColumns(data)
if events, ok := data["events"].([]spantypes.Event); !ok || len(events) != 0 {
t.Fatalf("expected empty []spantypes.Event, got %#v", data["events"])
}
if links, ok := data["links"].([]spantypes.Link); !ok || len(links) != 0 {
t.Fatalf("expected empty []spantypes.Link, got %#v", data["links"])
}
}

View File

@@ -85,13 +85,6 @@ func (q *traceOperatorQuery) executeWithContext(ctx context.Context, query strin
return nil, err
}
// TODO: This should move to readAsRaw function in consume.go but for now we can keep it here since it's only relevant for traces
if raw, ok := payload.(*qbtypes.RawData); ok {
for _, rr := range raw.Rows {
mergeSpanAttributeColumns(rr.Data)
}
}
return &qbtypes.Result{
Type: q.kind,
Value: payload,

View File

@@ -1,50 +1,6 @@
package telemetrytraces
import (
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
)
const (
// Internal Columns.
SpanTimestampBucketStartColumn = "ts_bucket_start"
SpanResourceFingerPrintColumn = "resource_fingerprint"
// Intrinsic Columns.
SpanTimestampColumn = "timestamp"
SpanTraceIDColumn = "trace_id"
SpanSpanIDColumn = "span_id"
SpanTraceStateColumn = "trace_state"
SpanParentSpanIDColumn = "parent_span_id"
SpanFlagsColumn = "flags"
SpanNameColumn = "name"
SpanKindColumn = "kind"
SpanKindStringColumn = "kind_string"
SpanDurationNanoColumn = "duration_nano"
SpanStatusCodeColumn = "status_code"
SpanStatusMessageColumn = "status_message"
SpanStatusCodeStringColumn = "status_code_string"
SpanEventsColumn = "events"
SpanLinksColumn = "links"
// Calculated Columns.
SpanResponseStatusCodeColumn = "response_status_code"
SpanExternalHTTPURLColumn = "external_http_url"
SpanHTTPURLColumn = "http_url"
SpanExternalHTTPMethodColumn = "external_http_method"
SpanHTTPMethodColumn = "http_method"
SpanHTTPHostColumn = "http_host"
SpanDBNameColumn = "db_name"
SpanDBOperationColumn = "db_operation"
SpanHasErrorColumn = "has_error"
SpanIsRemoteColumn = "is_remote"
// Contextual Columns.
SpanAttributesStringColumn = "attributes_string"
SpanAttributesNumberColumn = "attributes_number"
SpanAttributesBoolColumn = "attributes_bool"
SpanResourcesStringColumn = "resources_string"
)
import "github.com/SigNoz/signoz/pkg/types/telemetrytypes"
var (
IntrinsicFields = map[string]telemetrytypes.TelemetryFieldKey{
@@ -378,51 +334,6 @@ var (
SpanSearchScopeRoot = "isroot"
SpanSearchScopeEntryPoint = "isentrypoint"
// IntrinsicSpanFields lists the intrinsic span columns, in the order they
// should appear when a raw query expands its SelectFields.
IntrinsicSpanFields = []telemetrytypes.TelemetryFieldKey{
{Name: SpanTimestampColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanTraceIDColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanSpanIDColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanTraceStateColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanParentSpanIDColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanFlagsColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanNameColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanKindColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanKindStringColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanDurationNanoColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanStatusCodeColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanStatusMessageColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanStatusCodeStringColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanEventsColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanLinksColumn, FieldContext: telemetrytypes.FieldContextSpan},
}
// CalculatedSpanFields lists the calculated/derived span columns, in the
// order they should appear when a raw query expands its SelectFields.
CalculatedSpanFields = []telemetrytypes.TelemetryFieldKey{
{Name: SpanResponseStatusCodeColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanExternalHTTPURLColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanHTTPURLColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanExternalHTTPMethodColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanHTTPMethodColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanHTTPHostColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanDBNameColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanDBOperationColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanHasErrorColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanIsRemoteColumn, FieldContext: telemetrytypes.FieldContextSpan},
}
// ContextualSpanColumns lists the typed attribute and resource columns
// selected raw (rather than via ColumnExpressionFor) so that consume.go
// can merge them into unified "attributes" and "resource" maps.
ContextualSpanColumns = []string{
SpanAttributesStringColumn,
SpanAttributesNumberColumn,
SpanAttributesBoolColumn,
SpanResourcesStringColumn,
}
DefaultFields = map[string]telemetrytypes.TelemetryFieldKey{
"timestamp": {
Name: "timestamp",

View File

@@ -78,17 +78,6 @@ func TestGetFieldKeyName(t *testing.T) {
expectedResult: "multiIf(resource.`deployment.environment` IS NOT NULL, resource.`deployment.environment`::String, `resource_string_deployment$$environment_exists`==true, `resource_string_deployment$$environment`, NULL)",
expectedError: nil,
},
{
// Query like `attribute.attribute_string:string` should resolve to `attributes_string['attribute_string']`.
name: "Attribute key whose name collides with contextual map column resolves as a map lookup",
key: telemetrytypes.TelemetryFieldKey{
Name: SpanAttributesStringColumn,
FieldContext: telemetrytypes.FieldContextAttribute,
FieldDataType: telemetrytypes.FieldDataTypeString,
},
expectedResult: "attributes_string['attributes_string']",
expectedError: nil,
},
{
name: "Non-existent column",
key: telemetrytypes.TelemetryFieldKey{

View File

@@ -4,6 +4,7 @@ import (
"context"
"fmt"
"log/slog"
"slices"
"strings"
"github.com/SigNoz/signoz/pkg/errors"
@@ -15,6 +16,7 @@ import (
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
"github.com/huandu/go-sqlbuilder"
"golang.org/x/exp/maps"
)
var (
@@ -87,13 +89,40 @@ func (b *traceQueryStatementBuilder) Build(
return nil, err
}
isSelectFieldsEmpty := false
/*
Adding a tech debt note here:
This piece of code is a hot fix and should be removed once we close issue: engineering-pod/issues/3622
*/
/*
-------------------------------- Start of tech debt ----------------------------
*/
if requestType == qbtypes.RequestTypeRaw {
isSelectFieldsEmpty = len(query.SelectFields) == 0
// we are expanding here to ensure that all the conflicts are taken care in adjustKeys
// i.e if there is a conflict we strip away context of the key in adjustKeys
query = b.expandRawSelectFields(query)
selectedFields := query.SelectFields
if len(selectedFields) == 0 {
sortedKeys := maps.Keys(DefaultFields)
slices.Sort(sortedKeys)
for _, key := range sortedKeys {
selectedFields = append(selectedFields, DefaultFields[key])
}
query.SelectFields = selectedFields
}
selectFieldKeys := []string{}
for _, field := range selectedFields {
selectFieldKeys = append(selectFieldKeys, field.Name)
}
for _, x := range []string{"timestamp", "span_id", "trace_id"} {
if !slices.Contains(selectFieldKeys, x) {
query.SelectFields = append(query.SelectFields, DefaultFields[x])
}
}
}
/*
-------------------------------- End of tech debt ----------------------------
*/
query = b.adjustKeys(ctx, keys, query, requestType)
@@ -102,7 +131,7 @@ func (b *traceQueryStatementBuilder) Build(
switch requestType {
case qbtypes.RequestTypeRaw:
return b.buildListQuery(ctx, q, query, start, end, keys, variables, isSelectFieldsEmpty)
return b.buildListQuery(ctx, q, query, start, end, keys, variables)
case qbtypes.RequestTypeTimeSeries:
return b.buildTimeSeriesQuery(ctx, q, query, start, end, keys, variables)
case qbtypes.RequestTypeScalar:
@@ -266,7 +295,6 @@ func (b *traceQueryStatementBuilder) buildListQuery(
start, end uint64,
keys map[string][]*telemetrytypes.TelemetryFieldKey,
variables map[string]qbtypes.VariableItem,
isSelectFieldsEmpty bool,
) (*qbtypes.Statement, error) {
var (
@@ -281,6 +309,7 @@ func (b *traceQueryStatementBuilder) buildListQuery(
cteArgs = append(cteArgs, args)
}
// TODO: should we deprecate `SelectFields` and return everything from a span like we do for logs?
for _, field := range query.SelectFields {
colExpr, err := b.fm.ColumnExpressionFor(ctx, start, end, &field, keys)
if err != nil {
@@ -289,12 +318,6 @@ func (b *traceQueryStatementBuilder) buildListQuery(
sb.SelectMore(colExpr)
}
if isSelectFieldsEmpty {
for _, col := range ContextualSpanColumns {
sb.SelectMore(col)
}
}
// From table
sb.From(fmt.Sprintf("%s.%s", DBName, SpanIndexV3TableName))
@@ -821,30 +844,3 @@ func (b *traceQueryStatementBuilder) buildResourceFilterCTE(
variables,
)
}
// expandRawSelectFields populates SelectFields for raw (list view) queries.
// It must be called before adjustKeys so that normalization runs over the full set.
func (b *traceQueryStatementBuilder) expandRawSelectFields(query qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation]) qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation] {
if len(query.SelectFields) == 0 {
selectFields := make([]telemetrytypes.TelemetryFieldKey, 0, len(IntrinsicSpanFields)+len(CalculatedSpanFields))
selectFields = append(selectFields, IntrinsicSpanFields...)
selectFields = append(selectFields, CalculatedSpanFields...)
query.SelectFields = selectFields
return query
}
selectFields := []telemetrytypes.TelemetryFieldKey{
{Name: SpanTimestampColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanTraceIDColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanSpanIDColumn, FieldContext: telemetrytypes.FieldContextSpan},
}
for _, field := range query.SelectFields {
// TODO(tvats): If a user specifies attribute.timestamp in the select fields, this loop will basically ignore it, as we already added a field by default. This can be fixed once we close https://github.com/SigNoz/engineering-pod/issues/3693
if field.Name == SpanTimestampColumn || field.Name == SpanTraceIDColumn || field.Name == SpanSpanIDColumn {
continue
}
selectFields = append(selectFields, field)
}
query.SelectFields = selectFields
return query
}

View File

@@ -439,7 +439,7 @@ func TestStatementBuilderListQuery(t *testing.T) {
},
},
expected: qbtypes.Statement{
Query: "WITH __resource_filter AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?) SELECT timestamp AS `timestamp`, trace_id AS `trace_id`, span_id AS `span_id`, name AS `name`, multiIf(resource.`service.name` IS NOT NULL, resource.`service.name`::String, mapContains(resources_string, 'service.name'), resources_string['service.name'], NULL) AS `service.name`, duration_nano AS `duration_nano`, `attribute_number_cart$$items_count` AS `cart.items_count` FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? LIMIT ?",
Query: "WITH __resource_filter AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?) SELECT name AS `name`, multiIf(resource.`service.name` IS NOT NULL, resource.`service.name`::String, mapContains(resources_string, 'service.name'), resources_string['service.name'], NULL) AS `service.name`, duration_nano AS `duration_nano`, `attribute_number_cart$$items_count` AS `cart.items_count`, timestamp AS `timestamp`, span_id AS `span_id`, trace_id AS `trace_id` FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? LIMIT ?",
Args: []any{"redis-manual", "%service.name%", "%service.name\":\"redis-manual%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 10},
},
expectedErr: nil,
@@ -468,7 +468,7 @@ func TestStatementBuilderListQuery(t *testing.T) {
Limit: 10,
},
expected: qbtypes.Statement{
Query: "WITH __resource_filter AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?) SELECT timestamp AS `timestamp`, trace_id AS `trace_id`, span_id AS `span_id`, trace_state AS `trace_state`, parent_span_id AS `parent_span_id`, flags AS `flags`, name AS `name`, kind AS `kind`, kind_string AS `kind_string`, duration_nano AS `duration_nano`, status_code AS `status_code`, status_message AS `status_message`, status_code_string AS `status_code_string`, events AS `events`, links AS `links`, response_status_code AS `response_status_code`, external_http_url AS `external_http_url`, http_url AS `http_url`, external_http_method AS `external_http_method`, http_method AS `http_method`, http_host AS `http_host`, db_name AS `db_name`, db_operation AS `db_operation`, has_error AS `has_error`, is_remote AS `is_remote`, attributes_string, attributes_number, attributes_bool, resources_string FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? ORDER BY attributes_string['user.id'] AS `user.id` desc LIMIT ?",
Query: "WITH __resource_filter AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?) SELECT duration_nano AS `duration_nano`, name AS `name`, response_status_code AS `response_status_code`, multiIf(resource.`service.name` IS NOT NULL, resource.`service.name`::String, mapContains(resources_string, 'service.name'), resources_string['service.name'], NULL) AS `service.name`, span_id AS `span_id`, timestamp AS `timestamp`, trace_id AS `trace_id` FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? ORDER BY attributes_string['user.id'] AS `user.id` desc LIMIT ?",
Args: []any{"redis-manual", "%service.name%", "%service.name\":\"redis-manual%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 10},
},
expectedErr: nil,
@@ -512,7 +512,7 @@ func TestStatementBuilderListQuery(t *testing.T) {
Limit: 10,
},
expected: qbtypes.Statement{
Query: "WITH __resource_filter AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?) SELECT timestamp AS `timestamp`, trace_id AS `trace_id`, span_id AS `span_id`, name AS `name`, resource_string_service$$name AS `serviceName`, duration_nano AS `durationNano`, http_method AS `httpMethod`, response_status_code AS `responseStatusCode` FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? LIMIT ?",
Query: "WITH __resource_filter AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?) SELECT name AS `name`, resource_string_service$$name AS `serviceName`, duration_nano AS `durationNano`, http_method AS `httpMethod`, response_status_code AS `responseStatusCode`, timestamp AS `timestamp`, span_id AS `span_id`, trace_id AS `trace_id` FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? LIMIT ?",
Args: []any{"redis-manual", "%service.name%", "%service.name\":\"redis-manual%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 10},
},
expectedErr: nil,
@@ -556,7 +556,7 @@ func TestStatementBuilderListQuery(t *testing.T) {
Limit: 10,
},
expected: qbtypes.Statement{
Query: "WITH __resource_filter AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?) SELECT timestamp AS `timestamp`, trace_id AS `trace_id`, span_id AS `span_id`, name AS `name`, resource_string_service$$name AS `serviceName`, duration_nano AS `durationNano`, http_method AS `httpMethod`, multiIf(toString(`attribute_string_mixed$$materialization$$key`) != '', toString(`attribute_string_mixed$$materialization$$key`), toString(multiIf(resource.`mixed.materialization.key` IS NOT NULL, resource.`mixed.materialization.key`::String, mapContains(resources_string, 'mixed.materialization.key'), resources_string['mixed.materialization.key'], NULL)) != '', toString(multiIf(resource.`mixed.materialization.key` IS NOT NULL, resource.`mixed.materialization.key`::String, mapContains(resources_string, 'mixed.materialization.key'), resources_string['mixed.materialization.key'], NULL)), NULL) AS `mixed.materialization.key` FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? LIMIT ?",
Query: "WITH __resource_filter AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?) SELECT name AS `name`, resource_string_service$$name AS `serviceName`, duration_nano AS `durationNano`, http_method AS `httpMethod`, multiIf(toString(`attribute_string_mixed$$materialization$$key`) != '', toString(`attribute_string_mixed$$materialization$$key`), toString(multiIf(resource.`mixed.materialization.key` IS NOT NULL, resource.`mixed.materialization.key`::String, mapContains(resources_string, 'mixed.materialization.key'), resources_string['mixed.materialization.key'], NULL)) != '', toString(multiIf(resource.`mixed.materialization.key` IS NOT NULL, resource.`mixed.materialization.key`::String, mapContains(resources_string, 'mixed.materialization.key'), resources_string['mixed.materialization.key'], NULL)), NULL) AS `mixed.materialization.key`, timestamp AS `timestamp`, span_id AS `span_id`, trace_id AS `trace_id` FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? LIMIT ?",
Args: []any{"redis-manual", "%service.name%", "%service.name\":\"redis-manual%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 10},
},
expectedErr: nil,
@@ -601,7 +601,7 @@ func TestStatementBuilderListQuery(t *testing.T) {
Limit: 10,
},
expected: qbtypes.Statement{
Query: "WITH __resource_filter AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?) SELECT timestamp AS `timestamp`, trace_id AS `trace_id`, span_id AS `span_id`, name AS `name`, resource_string_service$$name AS `serviceName`, duration_nano AS `durationNano`, http_method AS `httpMethod`, `attribute_string_mixed$$materialization$$key` AS `mixed.materialization.key` FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? LIMIT ?",
Query: "WITH __resource_filter AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?) SELECT name AS `name`, resource_string_service$$name AS `serviceName`, duration_nano AS `durationNano`, http_method AS `httpMethod`, `attribute_string_mixed$$materialization$$key` AS `mixed.materialization.key`, timestamp AS `timestamp`, span_id AS `span_id`, trace_id AS `trace_id` FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? LIMIT ?",
Args: []any{"redis-manual", "%service.name%", "%service.name\":\"redis-manual%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 10},
},
expectedErr: nil,
@@ -711,7 +711,7 @@ func TestStatementBuilderListQueryWithCorruptData(t *testing.T) {
Limit: 10,
},
expected: qbtypes.Statement{
Query: "SELECT timestamp AS `timestamp`, trace_id AS `trace_id`, span_id AS `span_id`, trace_state AS `trace_state`, parent_span_id AS `parent_span_id`, flags AS `flags`, name AS `name`, kind AS `kind`, kind_string AS `kind_string`, duration_nano AS `duration_nano`, status_code AS `status_code`, status_message AS `status_message`, status_code_string AS `status_code_string`, events AS `events`, links AS `links`, response_status_code AS `response_status_code`, external_http_url AS `external_http_url`, http_url AS `http_url`, external_http_method AS `external_http_method`, http_method AS `http_method`, http_host AS `http_host`, db_name AS `db_name`, db_operation AS `db_operation`, has_error AS `has_error`, is_remote AS `is_remote`, attributes_string, attributes_number, attributes_bool, resources_string FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? LIMIT ?",
Query: "SELECT duration_nano AS `duration_nano`, name AS `name`, response_status_code AS `response_status_code`, multiIf(resource.`service.name` IS NOT NULL, resource.`service.name`::String, mapContains(resources_string, 'service.name'), resources_string['service.name'], NULL) AS `service.name`, span_id AS `span_id`, timestamp AS `timestamp`, trace_id AS `trace_id` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? LIMIT ?",
Args: []any{"1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 10},
},
expectedErr: nil,
@@ -744,7 +744,7 @@ func TestStatementBuilderListQueryWithCorruptData(t *testing.T) {
}},
},
expected: qbtypes.Statement{
Query: "SELECT timestamp AS `timestamp`, trace_id AS `trace_id`, span_id AS `span_id`, trace_state AS `trace_state`, parent_span_id AS `parent_span_id`, flags AS `flags`, name AS `name`, kind AS `kind`, kind_string AS `kind_string`, duration_nano AS `duration_nano`, status_code AS `status_code`, status_message AS `status_message`, status_code_string AS `status_code_string`, events AS `events`, links AS `links`, response_status_code AS `response_status_code`, external_http_url AS `external_http_url`, http_url AS `http_url`, external_http_method AS `external_http_method`, http_method AS `http_method`, http_host AS `http_host`, db_name AS `db_name`, db_operation AS `db_operation`, has_error AS `has_error`, is_remote AS `is_remote`, attributes_string, attributes_number, attributes_bool, resources_string FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? ORDER BY timestamp AS `timestamp` asc LIMIT ?",
Query: "SELECT duration_nano AS `duration_nano`, name AS `name`, response_status_code AS `response_status_code`, multiIf(resource.`service.name` IS NOT NULL, resource.`service.name`::String, mapContains(resources_string, 'service.name'), resources_string['service.name'], NULL) AS `service.name`, span_id AS `span_id`, timestamp AS `timestamp`, trace_id AS `trace_id` FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? ORDER BY timestamp AS `timestamp` asc LIMIT ?",
Args: []any{"1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 10},
},
expectedErr: nil,

View File

@@ -236,7 +236,6 @@ type RawStream struct {
Error chan error
}
func roundToNonZeroDecimals(val float64, n int) float64 {
if val == 0 || math.IsNaN(val) || math.IsInf(val, 0) {
return val

View File

@@ -1,59 +0,0 @@
package spantypes
import "encoding/json"
type Event struct {
Name string `json:"name"`
TimeUnixNano uint64 `json:"timeUnixNano"`
Attributes map[string]any `json:"attributes,omitempty"`
}
// Link is the response shape for a span link.
// The refType field is intentionally not decoded; it's a Jaeger-era
// concept that OTel doesn't model, so we drop it on the way out.
type Link struct {
TraceID string `json:"traceId,omitempty"`
SpanID string `json:"spanId,omitempty"`
}
// dbEvent matches the JSON object stored in the ClickHouse `events`
// Array(String) column.
type dbEvent struct {
Name string `json:"name"`
TimeUnixNano uint64 `json:"timeUnixNano"`
AttributeMap map[string]any `json:"attributeMap"`
}
// ParseEvents column (Array(String) of JSON-encoded events) into a slice of Event values.
// Malformed entries are skipped.
func ParseEvents(raw any) []Event {
strs, ok := raw.([]string)
if !ok {
return []Event{}
}
events := make([]Event, 0, len(strs))
for _, s := range strs {
var e dbEvent
if err := json.Unmarshal([]byte(s), &e); err != nil {
continue
}
events = append(events, Event{
Name: e.Name,
TimeUnixNano: e.TimeUnixNano,
Attributes: e.AttributeMap,
})
}
return events
}
func ParseLinks(raw any) []Link {
s, ok := raw.(string)
if !ok || s == "" {
return []Link{}
}
var links []Link
if err := json.Unmarshal([]byte(s), &links); err != nil {
return []Link{}
}
return links
}

View File

@@ -13,7 +13,9 @@ var (
ErrCodeMappingGroupAlreadyExists = errors.MustNewCode("span_attribute_mapping_group_already_exists")
)
// A group runs when any of the listed attribute/resource key patterns match.
// SpanMapperGroupCondition gates whether a group's rules run for a given span.
// A group runs when any attribute or resource key on the span CONTAINS one of
// the listed substrings (plain substring match — no glob syntax).
type SpanMapperGroupCondition struct {
Attributes []string `json:"attributes" required:"true" nullable:"true"`
Resource []string `json:"resource" required:"true" nullable:"true"`

View File

@@ -0,0 +1,144 @@
package spantypes
import (
"sort"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/query-service/agentConf"
"github.com/goccy/go-yaml"
)
const (
SpanAttrMappingFeatureType agentConf.AgentFeatureType = "span_attr_mapping"
ProcessorName = "signozspanmapper"
)
var (
ErrCodeInvalidCollectorConfig = errors.MustNewCode("invalid_collector_config")
ErrCodeBuildMappingProcessorConfig = errors.MustNewCode("build_mapping_processor_config")
)
type SpanMapperGroupWithMappers struct {
Group *SpanMapperGroup `json:"group"`
Mappers []*SpanMapper `json:"mappers"`
}
// spanMapperProcessorConfig is the collector config for signozspanmapper.
type spanMapperProcessorConfig struct {
Groups []spanMapperProcessorGroup `yaml:"groups" json:"groups"`
}
type spanMapperProcessorGroup struct {
ID string `yaml:"id" json:"id"`
ExistsAny spanMapperProcessorExistsAny `yaml:"exists_any" json:"exists_any"`
Attributes []spanMapperProcessorAttribute `yaml:"attributes" json:"attributes"`
}
type spanMapperProcessorExistsAny struct {
Attributes []string `yaml:"attributes,omitempty" json:"attributes,omitempty"`
Resource []string `yaml:"resource,omitempty" json:"resource,omitempty"`
}
type spanMapperProcessorAttribute struct {
Target string `yaml:"target" json:"target"`
Context string `yaml:"context,omitempty" json:"context,omitempty"`
Action string `yaml:"action,omitempty" json:"action,omitempty"`
Sources []string `yaml:"sources" json:"sources"`
}
func GenerateCollectorConfigWithSpanMapperProcessor(currentConfYaml []byte, groups []*SpanMapperGroupWithMappers) ([]byte, error) {
var collectorConf map[string]any
if err := yaml.Unmarshal(currentConfYaml, &collectorConf); err != nil {
return nil, errors.Wrapf(err, errors.TypeInvalidInput, ErrCodeInvalidCollectorConfig, "failed to unmarshal collector config")
}
// rare but don't do anything in this case, also means it's just comments.
if collectorConf == nil {
collectorConf = map[string]any{}
}
processors := map[string]any{}
if existing, ok := collectorConf["processors"]; ok && existing != nil {
p, ok := existing.(map[string]any)
if !ok {
return nil, errors.Newf(errors.TypeInvalidInput, ErrCodeInvalidCollectorConfig, "collector config 'processors' must be a mapping, got %T", existing)
}
processors = p
}
procConfig := buildProcessorConfig(groups)
configBytes, err := yaml.Marshal(procConfig)
if err != nil {
return nil, errors.Wrapf(err, errors.TypeInternal, ErrCodeBuildMappingProcessorConfig, "failed to marshal span mapper processor config")
}
var configMap any
if err := yaml.Unmarshal(configBytes, &configMap); err != nil {
return nil, errors.Wrapf(err, errors.TypeInternal, ErrCodeBuildMappingProcessorConfig, "failed to re-unmarshal span mapper processor config")
}
processors[ProcessorName] = configMap
collectorConf["processors"] = processors
out, err := yaml.Marshal(collectorConf)
if err != nil {
return nil, errors.Wrapf(err, errors.TypeInternal, ErrCodeBuildMappingProcessorConfig, "failed to marshal collector config")
}
return out, nil
}
func buildProcessorConfig(groups []*SpanMapperGroupWithMappers) *spanMapperProcessorConfig {
out := make([]spanMapperProcessorGroup, 0, len(groups))
for _, gm := range groups {
rules := make([]spanMapperProcessorAttribute, 0, len(gm.Mappers))
for _, m := range gm.Mappers {
rules = append(rules, buildAttributeRule(m))
}
out = append(out, spanMapperProcessorGroup{
ID: gm.Group.Name,
ExistsAny: spanMapperProcessorExistsAny{
Attributes: gm.Group.Condition.Attributes,
Resource: gm.Group.Condition.Resource,
},
Attributes: rules,
})
}
return &spanMapperProcessorConfig{Groups: out}
}
// buildAttributeRule maps a single SpanMapper to a collector attribute rule.
// Sources are sorted by Priority DESC (highest-priority first), and read-from-
// resource sources are encoded via the "resource." prefix.
func buildAttributeRule(m *SpanMapper) spanMapperProcessorAttribute {
sources := make([]SpanMapperSource, len(m.Config.Sources))
copy(sources, m.Config.Sources)
sort.SliceStable(sources, func(i, j int) bool { return sources[i].Priority > sources[j].Priority })
keys := make([]string, 0, len(sources))
for _, s := range sources {
if s.Context == FieldContextResource {
keys = append(keys, FieldContextResource.StringValue()+"."+s.Key)
} else {
keys = append(keys, s.Key)
}
}
action := SpanMapperOperationCopy
if len(sources) > 0 && sources[0].Operation == SpanMapperOperationMove {
action = SpanMapperOperationMove
}
ctx := FieldContextSpanAttribute
if m.FieldContext == FieldContextResource {
ctx = FieldContextResource
}
return spanMapperProcessorAttribute{
Target: m.Name,
Context: ctx.StringValue(),
Action: action.StringValue(),
Sources: keys,
}
}

View File

@@ -0,0 +1,193 @@
package spantypes
import (
"os"
"path/filepath"
"testing"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v3"
)
func TestGenerateCollectorConfigWithSpanMapperProcessor(t *testing.T) {
t.Parallel()
baseline := loadFixture(t, "collector_baseline.yaml")
tests := []struct {
name string
groups []*SpanMapperGroupWithMappers
want string
}{
{
name: "no_groups",
want: "collector_no_groups.yaml",
},
{
name: "with_groups",
groups: []*SpanMapperGroupWithMappers{
{
Group: newGroup("llm", []string{"model"}, []string{"service.name"}),
Mappers: []*SpanMapper{
newMapper("gen_ai.request.model", FieldContextResource,
attrSrc("gen_ai.llm.model", SpanMapperOperationCopy, 3),
attrSrc("llm.model", SpanMapperOperationCopy, 2),
resSrc("service.name", SpanMapperOperationCopy, 1),
),
newMapper("gen_ai.request.tokens", FieldContextSpanAttribute,
attrSrc("gen_ai.request_tokens", SpanMapperOperationCopy, 2),
attrSrc("llm.tokens", SpanMapperOperationCopy, 1),
),
newMapper("gen_ai.request.input", FieldContextSpanAttribute,
attrSrc("gen_ai.input", SpanMapperOperationMove, 2),
attrSrc("llm.input", SpanMapperOperationMove, 1),
),
},
},
{
Group: newGroup("agent", []string{"agent."}, nil),
Mappers: []*SpanMapper{
newMapper("gen_ai.agent.name", FieldContextSpanAttribute,
attrSrc("agent.name", SpanMapperOperationCopy, 2),
attrSrc("llm.agent.name", SpanMapperOperationCopy, 1),
),
newMapper("gen_ai.agent.id", FieldContextSpanAttribute,
attrSrc("gen_ai.agent.id", SpanMapperOperationCopy, 2),
attrSrc("llm.agent.id", SpanMapperOperationCopy, 1),
),
},
},
{
Group: newGroup("tool", []string{"agent."}, nil),
Mappers: []*SpanMapper{
newMapper("gen_ai.tool.name", FieldContextSpanAttribute,
attrSrc("ai.tool.name", SpanMapperOperationCopy, 2),
attrSrc("llm.tool.name", SpanMapperOperationCopy, 1),
),
},
},
},
want: "collector_with_groups.yaml",
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
got, err := GenerateCollectorConfigWithSpanMapperProcessor(baseline, tc.groups)
require.NoError(t, err)
assertYAMLEqual(t, loadFixture(t, tc.want), got)
})
}
}
func TestGenerateCollectorConfigWithSpanMapperProcessor_Errors(t *testing.T) {
t.Parallel()
tests := []struct {
name string
in []byte
}{
{"processors_not_a_map", []byte("processors: not-a-map\n")},
{"malformed_yaml", []byte(": :")},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
_, err := GenerateCollectorConfigWithSpanMapperProcessor(tc.in, nil)
require.Error(t, err)
assert.True(t, errors.Ast(err, errors.TypeInvalidInput), "want TypeInvalidInput, got %v", err)
assert.True(t, errors.Asc(err, ErrCodeInvalidCollectorConfig), "want ErrCodeInvalidCollectorConfig, got %v", err)
})
}
}
func TestBuildAttributeRule(t *testing.T) {
t.Parallel()
tests := []struct {
name string
mapper *SpanMapper
want spanMapperProcessorAttribute
}{
{
name: "priority_sort_and_resource_prefix",
mapper: newMapper("gen_ai.request.model", FieldContextResource,
attrSrc("llm.model", SpanMapperOperationCopy, 20),
resSrc("service.name", SpanMapperOperationCopy, 10),
attrSrc("gen_ai.llm.model", SpanMapperOperationCopy, 30),
),
want: spanMapperProcessorAttribute{
Target: "gen_ai.request.model",
Context: FieldContextResource.StringValue(),
Action: SpanMapperOperationCopy.StringValue(),
Sources: []string{"gen_ai.llm.model", "llm.model", "resource.service.name"},
},
},
{
name: "move_action_follows_highest_priority_source",
mapper: newMapper("gen_ai.request.input", FieldContextSpanAttribute,
attrSrc("gen_ai.input", SpanMapperOperationMove, 20),
attrSrc("llm.input", SpanMapperOperationMove, 10),
),
want: spanMapperProcessorAttribute{
Target: "gen_ai.request.input",
Context: FieldContextSpanAttribute.StringValue(),
Action: SpanMapperOperationMove.StringValue(),
Sources: []string{"gen_ai.input", "llm.input"},
},
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
assert.Equal(t, tc.want, buildAttributeRule(tc.mapper))
})
}
}
func loadFixture(t *testing.T, name string) []byte {
t.Helper()
b, err := os.ReadFile(filepath.Join("testdata", name))
require.NoError(t, err)
return b
}
// assertYAMLEqual compares two YAML documents structurally so key order and
// slice formatting do not matter.
func assertYAMLEqual(t *testing.T, want, got []byte) {
t.Helper()
var w, g any
require.NoError(t, yaml.Unmarshal(want, &w))
require.NoError(t, yaml.Unmarshal(got, &g))
assert.Equal(t, w, g)
}
func newGroup(name string, attrs, res []string) *SpanMapperGroup {
return &SpanMapperGroup{
Name: name,
Condition: SpanMapperGroupCondition{Attributes: attrs, Resource: res},
Enabled: true,
}
}
func newMapper(name string, target FieldContext, sources ...SpanMapperSource) *SpanMapper {
return &SpanMapper{
Name: name,
FieldContext: target,
Config: SpanMapperConfig{Sources: sources},
Enabled: true,
}
}
func attrSrc(key string, op SpanMapperOperation, priority int) SpanMapperSource {
return SpanMapperSource{Key: key, Context: FieldContextSpanAttribute, Operation: op, Priority: priority}
}
func resSrc(key string, op SpanMapperOperation, priority int) SpanMapperSource {
return SpanMapperSource{Key: key, Context: FieldContextResource, Operation: op, Priority: priority}
}

View File

@@ -0,0 +1,17 @@
receivers:
otlp:
protocols:
grpc:
processors:
signozspanmapper:
groups: []
batch: {}
exporters:
otlp:
endpoint: localhost:4317
service:
pipelines:
traces:
receivers: [otlp]
processors: [signozspanmapper, batch]
exporters: [otlp]

View File

@@ -0,0 +1,17 @@
receivers:
otlp:
protocols:
grpc:
processors:
signozspanmapper:
groups: []
batch: {}
exporters:
otlp:
endpoint: localhost:4317
service:
pipelines:
traces:
receivers: [otlp]
processors: [signozspanmapper, batch]
exporters: [otlp]

View File

@@ -0,0 +1,71 @@
receivers:
otlp:
protocols:
grpc:
processors:
signozspanmapper:
groups:
- id: llm
exists_any:
attributes:
- model
resource:
- service.name
attributes:
- target: gen_ai.request.model
context: resource
action: copy
sources:
- gen_ai.llm.model
- llm.model
- resource.service.name
- target: gen_ai.request.tokens
context: attribute
action: copy
sources:
- gen_ai.request_tokens
- llm.tokens
- target: gen_ai.request.input
context: attribute
action: move
sources:
- gen_ai.input
- llm.input
- id: agent
exists_any:
attributes:
- agent.
attributes:
- target: gen_ai.agent.name
context: attribute
action: copy
sources:
- agent.name
- llm.agent.name
- target: gen_ai.agent.id
context: attribute
action: copy
sources:
- gen_ai.agent.id
- llm.agent.id
- id: tool
exists_any:
attributes:
- agent.
attributes:
- target: gen_ai.tool.name
context: attribute
action: copy
sources:
- ai.tool.name
- llm.tool.name
batch: {}
exporters:
otlp:
endpoint: localhost:4317
service:
pipelines:
traces:
receivers: [otlp]
processors: [signozspanmapper, batch]
exporters: [otlp]

View File

@@ -236,9 +236,8 @@ class Traces(ABC):
attributes_number: dict[str, np.float64]
attributes_bool: dict[str, bool]
resources_string: dict[str, str]
# Accepting parsed events and links, but will be stored as list[str], str in db
events: list[dict[str, Any]]
links: list[dict[str, Any]]
events: list[str]
links: str
response_status_code: str
external_http_url: str
http_url: str
@@ -424,17 +423,10 @@ class Traces(ABC):
)
)
# Process events and derive error events. self.events holds the parsed
# response shape; np_arr() encodes back to the DB format on insert.
# Process events and derive error events
self.events = []
for event in events:
self.events.append(
{
"name": event.name,
"timeUnixNano": int(event.time_unix_nano),
"attributes": dict(event.attribute_map),
}
)
self.events.append(json.dumps([event.name, event.time_unix_nano, event.attribute_map]))
# Create error events for exception events (following Go exporter logic)
if event.name == "exception":
@@ -456,26 +448,7 @@ class Traces(ABC):
),
)
# self.links holds the parsed response shape (trace_id/span_id only;
# ref_type is dropped to match the API). np_arr() re-encodes for DB insert.
self.links = [{"traceId": link.trace_id, "spanId": link.span_id} for link in links_copy]
self._links_db = json.dumps(
[link.__dict__() for link in links_copy],
separators=(",", ":"),
)
# DB shape per event: {"name", "timeUnixNano", "attributeMap"}. Must match
# what the consume-layer parser in pkg/types/spantypes expects.
self._events_db = [
json.dumps(
{
"name": event.name,
"timeUnixNano": int(event.time_unix_nano),
"attributeMap": dict(event.attribute_map),
},
separators=(",", ":"),
)
for event in events
]
self.links = json.dumps([link.__dict__() for link in links_copy], separators=(",", ":"))
# Initialize resource
self.resource = []
@@ -590,8 +563,8 @@ class Traces(ABC):
self.attributes_number,
self.attributes_bool,
self.resources_string,
self._events_db,
self._links_db,
self.events,
self.links,
self.response_status_code,
self.external_http_url,
self.http_url,

View File

@@ -17,51 +17,7 @@ from fixtures.querier import (
index_series_by_label,
make_query_request,
)
from fixtures.traces import (
TraceIdGenerator,
Traces,
TracesEvent,
TracesKind,
TracesLink,
TracesRefType,
TracesStatusCode,
)
# All keys returned by the trace list endpoint when selectFields is empty:
# every intrinsic and calculated column, plus the merged `attributes` and
# `resource` maps that wrap the contextual columns in the response layer.
ALL_SELECT_FIELDS = [
# all intrinsic columns
"timestamp",
"trace_id",
"span_id",
"trace_state",
"parent_span_id",
"flags",
"name",
"kind",
"kind_string",
"duration_nano",
"status_code",
"status_message",
"status_code_string",
"events",
"links",
# all calculated columns
"response_status_code",
"external_http_url",
"http_url",
"external_http_method",
"http_method",
"http_host",
"db_name",
"db_operation",
"has_error",
"is_remote",
# all contextual columns (merged in response layer)
"attributes",
"resource",
]
from fixtures.traces import TraceIdGenerator, Traces, TracesKind, TracesStatusCode
def test_traces_list(
@@ -517,9 +473,7 @@ def test_traces_list(
@pytest.mark.parametrize(
"payload,status_code,results",
[
# Case 1: order by timestamp; empty selectFields returns the full
# response shape (all intrinsic + calculated columns plus the merged
# `attributes` and `resource` maps). x[3] (topic-service) is latest.
# Case 1: order by timestamp field which there in attributes as well
pytest.param(
{
"type": "builder_query",
@@ -533,42 +487,19 @@ def test_traces_list(
},
HTTPStatus.OK,
lambda x: [
{
**x[3].attribute_string,
**x[3].attributes_number,
**x[3].attributes_bool,
}, # attributes
x[3].db_name,
x[3].db_operation,
int(x[3].duration_nano),
x[3].events,
x[3].external_http_method,
x[3].external_http_url,
int(x[3].flags),
x[3].has_error,
x[3].http_host,
x[3].http_method,
x[3].http_url,
x[3].is_remote,
int(x[3].kind),
x[3].kind_string,
x[3].links,
x[3].duration_nano,
x[3].name,
x[3].parent_span_id,
x[3].resources_string,
x[3].response_status_code,
x[3].service_name,
x[3].span_id,
int(x[3].status_code),
x[3].status_code_string,
x[3].status_message,
format_timestamp(x[3].timestamp),
x[3].trace_id,
x[3].trace_state,
], # type: Callable[[List[Traces]], List[Any]]
),
# Case 2: order by attribute.timestamp. The key resolves to the
# intrinsic span.timestamp column, so the latest span (x[3]) is
# returned with the same full response shape as Case 1.
# Case 2: order by attribute timestamp field which is there in attributes as well
# This should break but it doesn't because attribute.timestamp gets adjusted to timestamp
# because of default trace.timestamp gets added by default and bug in field mapper picks
# instrinsic field
pytest.param(
{
"type": "builder_query",
@@ -582,37 +513,13 @@ def test_traces_list(
},
HTTPStatus.OK,
lambda x: [
{
**x[3].attribute_string,
**x[3].attributes_number,
**x[3].attributes_bool,
}, # attributes
x[3].db_name,
x[3].db_operation,
int(x[3].duration_nano),
x[3].events,
x[3].external_http_method,
x[3].external_http_url,
int(x[3].flags),
x[3].has_error,
x[3].http_host,
x[3].http_method,
x[3].http_url,
x[3].is_remote,
int(x[3].kind),
x[3].kind_string,
x[3].links,
x[3].duration_nano,
x[3].name,
x[3].parent_span_id,
x[3].resources_string,
x[3].response_status_code,
x[3].service_name,
x[3].span_id,
int(x[3].status_code),
x[3].status_code_string,
x[3].status_message,
format_timestamp(x[3].timestamp),
x[3].trace_id,
x[3].trace_state,
], # type: Callable[[List[Traces]], List[Any]]
),
# Case 3: select timestamp with empty order by
@@ -635,7 +542,7 @@ def test_traces_list(
], # type: Callable[[List[Traces]], List[Any]]
),
# Case 4: select attribute.timestamp with empty order by
# This returns the one span which has attribute.timestamp
# This doesn't return any data because of where_clause using aliased timestamp
pytest.param(
{
"type": "builder_query",
@@ -649,11 +556,7 @@ def test_traces_list(
},
},
HTTPStatus.OK,
lambda x: [
x[0].span_id,
format_timestamp(x[0].timestamp),
x[0].trace_id,
], # type: Callable[[List[Traces]], List[Any]]
lambda x: [], # type: Callable[[List[Traces]], List[Any]]
),
# Case 5: select timestamp with timestamp order by
pytest.param(
@@ -790,159 +693,6 @@ def test_traces_list_with_corrupt_data(
assert data[key] == value
def _verify_events_links_full(rows: list[dict], traces: list[Traces]) -> None:
"""Empty-selectFields case: events/links arrive parsed into structured objects.
Every row's events/links should match the fixture's stored parsed shape
(the fixture's `.events`/`.links` mirror the API response shape directly).
"""
for row, trace in zip(rows, traces, strict=True):
assert row["data"]["events"] == trace.events
assert row["data"]["links"] == trace.links
# Jaeger-era `refType` is dropped at the consume layer.
for link in row["data"]["links"]:
assert "refType" not in link
def _verify_events_links_skip(rows: list[dict], traces: list[Traces]) -> None:
"""Projected-selectFields case: nothing to verify beyond the key set."""
@pytest.mark.parametrize(
"select_fields,status_code,expected_keys,verify_values",
[
pytest.param(
[],
HTTPStatus.OK,
ALL_SELECT_FIELDS,
_verify_events_links_full,
),
pytest.param(
[
{"name": "service.name"},
],
HTTPStatus.OK,
["timestamp", "trace_id", "span_id", "service.name"],
_verify_events_links_skip,
),
],
)
def test_traces_list_with_select_fields(
signoz: types.SigNoz,
create_user_admin: None, # pylint: disable=unused-argument
get_token: Callable[[str, str], str],
insert_traces: Callable[[list[Traces]], None],
select_fields: list[dict],
status_code: HTTPStatus,
expected_keys: list[str],
verify_values: Callable[[list[dict], list[Traces]], None],
) -> None:
"""
Setup:
Insert a root span with no events/links and a child span carrying two
events and one user-supplied link.
Tests:
1. Empty select fields should return all the fields, and the `events` /
`links` columns should arrive parsed into structured objects (events
carry `attributes`, links carry only `traceId`/`spanId` — refType is
dropped at the consume layer).
2. Non-empty select field should return the select field along with
timestamp, trace_id and span_id.
"""
now = datetime.now(tz=UTC).replace(second=0, microsecond=0)
parent_trace_id = TraceIdGenerator.trace_id()
parent_span_id = TraceIdGenerator.span_id()
child_span_id = TraceIdGenerator.span_id()
linked_trace_id = TraceIdGenerator.trace_id()
linked_span_id = TraceIdGenerator.span_id()
event_one = TracesEvent(
name="request_received",
timestamp=now - timedelta(seconds=3, microseconds=500_000),
attribute_map={"http.method": "GET", "http.route": "/api/chat"},
)
event_two = TracesEvent(
name="cache_lookup",
timestamp=now - timedelta(seconds=3, microseconds=400_000),
attribute_map={"cache.hit": "true", "cache.key": "user:123:prompt"},
)
user_link = TracesLink(
trace_id=linked_trace_id,
span_id=linked_span_id,
ref_type=TracesRefType.REF_TYPE_FOLLOWS_FROM,
)
traces = [
# Root span: no events, no links. Verifies the empty-case parsed shape.
Traces(
timestamp=now - timedelta(seconds=4),
duration=timedelta(seconds=3),
trace_id=parent_trace_id,
span_id=parent_span_id,
parent_span_id="",
name="root span",
kind=TracesKind.SPAN_KIND_SERVER,
status_code=TracesStatusCode.STATUS_CODE_OK,
resources={"service.name": "events-links-service"},
attributes={"http.request.method": "GET"},
),
# Child span: two events + one user-supplied link. The fixture
# auto-inserts a CHILD_OF link for the parent, so the parsed response
# contains two links total — the auto-inserted one first.
Traces(
timestamp=now - timedelta(seconds=3),
duration=timedelta(seconds=1),
trace_id=parent_trace_id,
span_id=child_span_id,
parent_span_id=parent_span_id,
name="child span",
kind=TracesKind.SPAN_KIND_INTERNAL,
status_code=TracesStatusCode.STATUS_CODE_OK,
resources={"service.name": "events-links-service"},
attributes={"http.request.method": "GET"},
events=[event_one, event_two],
links=[user_link],
),
]
insert_traces(traces)
token = get_token(USER_ADMIN_EMAIL, USER_ADMIN_PASSWORD)
payload = {
"type": "builder_query",
"spec": {
"name": "A",
"signal": "traces",
"filter": {"expression": "resource.service.name = 'events-links-service'"},
"selectFields": select_fields,
"order": [{"key": {"name": "timestamp"}, "direction": "asc"}],
"limit": 10,
},
}
response = make_query_request(
signoz,
token,
start_ms=int((datetime.now(tz=UTC) - timedelta(minutes=5)).timestamp() * 1000),
end_ms=int(datetime.now(tz=UTC).timestamp() * 1000),
request_type="raw",
queries=[payload],
)
assert response.status_code == status_code
if response.status_code != HTTPStatus.OK:
return
rows = response.json()["data"]["data"]["results"][0]["rows"]
assert len(rows) == 2
for row in rows:
assert set(row["data"].keys()) == set(expected_keys)
verify_values(rows, traces)
@pytest.mark.parametrize(
"order_by,aggregation_alias,expected_status",
[